aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power20
-rw-r--r--Documentation/DocBook/filesystems.tmpl5
-rw-r--r--Documentation/RCU/whatisRCU.txt31
-rw-r--r--Documentation/devicetree/bindings/i2c/ce4100-i2c.txt93
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-cmos.txt28
-rw-r--r--Documentation/devicetree/bindings/x86/ce4100.txt38
-rw-r--r--Documentation/devicetree/bindings/x86/interrupt.txt26
-rw-r--r--Documentation/devicetree/bindings/x86/timer.txt6
-rw-r--r--Documentation/devicetree/booting-without-of.txt20
-rw-r--r--Documentation/hwmon/jc4221
-rw-r--r--Documentation/hwmon/k10temp8
-rw-r--r--Documentation/kernel-parameters.txt28
-rw-r--r--Documentation/keys-request-key.txt9
-rw-r--r--Documentation/keys.txt28
-rw-r--r--Documentation/memory-barriers.txt58
-rw-r--r--Documentation/networking/00-INDEX6
-rw-r--r--Documentation/networking/Makefile2
-rw-r--r--Documentation/networking/dns_resolver.txt9
-rw-r--r--Documentation/power/devices.txt94
-rw-r--r--Documentation/power/runtime_pm.txt13
-rw-r--r--Documentation/power/states.txt12
-rw-r--r--Documentation/rtc.txt29
-rw-r--r--Documentation/spinlocks.txt24
-rw-r--r--Documentation/trace/ftrace-design.txt7
-rw-r--r--Documentation/trace/ftrace.txt151
-rw-r--r--Documentation/trace/kprobetrace.txt16
-rw-r--r--Documentation/workqueue.txt4
-rw-r--r--MAINTAINERS41
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/fcntl.h2
-rw-r--r--arch/alpha/include/asm/futex.h29
-rw-r--r--arch/alpha/include/asm/rwsem.h36
-rw-r--r--arch/alpha/kernel/irq.c13
-rw-r--r--arch/alpha/kernel/irq_alpha.c11
-rw-r--r--arch/alpha/kernel/irq_i8259.c18
-rw-r--r--arch/alpha/kernel/irq_impl.h8
-rw-r--r--arch/alpha/kernel/irq_pyxis.c20
-rw-r--r--arch/alpha/kernel/irq_srm.c16
-rw-r--r--arch/alpha/kernel/osf_sys.c36
-rw-r--r--arch/alpha/kernel/sys_alcor.c28
-rw-r--r--arch/alpha/kernel/sys_cabriolet.c16
-rw-r--r--arch/alpha/kernel/sys_dp264.c52
-rw-r--r--arch/alpha/kernel/sys_eb64p.c18
-rw-r--r--arch/alpha/kernel/sys_eiger.c14
-rw-r--r--arch/alpha/kernel/sys_jensen.c24
-rw-r--r--arch/alpha/kernel/sys_marvel.c42
-rw-r--r--arch/alpha/kernel/sys_mikasa.c16
-rw-r--r--arch/alpha/kernel/sys_noritake.c16
-rw-r--r--arch/alpha/kernel/sys_rawhide.c17
-rw-r--r--arch/alpha/kernel/sys_rx164.c16
-rw-r--r--arch/alpha/kernel/sys_sable.c20
-rw-r--r--arch/alpha/kernel/sys_takara.c14
-rw-r--r--arch/alpha/kernel/sys_titan.c22
-rw-r--r--arch/alpha/kernel/sys_wildfire.c32
-rw-r--r--arch/alpha/kernel/time.c8
-rw-r--r--arch/alpha/kernel/vmlinux.lds.S5
-rw-r--r--arch/arm/Kconfig25
-rw-r--r--arch/arm/Makefile2
-rw-r--r--arch/arm/boot/compressed/.gitignore6
-rw-r--r--arch/arm/common/Kconfig2
-rw-r--r--arch/arm/include/asm/futex.h29
-rw-r--r--arch/arm/include/asm/hardware/cache-l2x0.h1
-rw-r--r--arch/arm/include/asm/hardware/sp810.h3
-rw-r--r--arch/arm/include/asm/mach/arch.h4
-rw-r--r--arch/arm/include/asm/pgalloc.h2
-rw-r--r--arch/arm/include/asm/tlb.h105
-rw-r--r--arch/arm/include/asm/tlbflush.h7
-rw-r--r--arch/arm/kernel/hw_breakpoint.c26
-rw-r--r--arch/arm/kernel/kprobes-decode.c2
-rw-r--r--arch/arm/kernel/pmu.c22
-rw-r--r--arch/arm/kernel/ptrace.c6
-rw-r--r--arch/arm/kernel/setup.c4
-rw-r--r--arch/arm/kernel/signal.c4
-rw-r--r--arch/arm/kernel/time.c4
-rw-r--r--arch/arm/kernel/vmlinux.lds.S13
-rw-r--r--arch/arm/mach-clps711x/include/mach/time.h2
-rw-r--r--arch/arm/mach-davinci/cpufreq.c2
-rw-r--r--arch/arm/mach-davinci/devices-da8xx.c7
-rw-r--r--arch/arm/mach-davinci/gpio-tnetv107x.c18
-rw-r--r--arch/arm/mach-davinci/include/mach/clkdev.h2
-rw-r--r--arch/arm/mach-omap2/clkt_dpll.c2
-rw-r--r--arch/arm/mach-omap2/mailbox.c12
-rw-r--r--arch/arm/mach-omap2/mux.c2
-rw-r--r--arch/arm/mach-omap2/pm-debug.c8
-rw-r--r--arch/arm/mach-omap2/prcm_mpu44xx.h4
-rw-r--r--arch/arm/mach-omap2/smartreflex.c37
-rw-r--r--arch/arm/mach-omap2/timer-gp.c13
-rw-r--r--arch/arm/mach-pxa/pxa25x.c1
-rw-r--r--arch/arm/mach-pxa/tosa-bt.c2
-rw-r--r--arch/arm/mach-pxa/tosa.c6
-rw-r--r--arch/arm/mach-s3c2440/Kconfig1
-rw-r--r--arch/arm/mach-s3c2440/include/mach/gta02.h26
-rw-r--r--arch/arm/mach-s3c64xx/clock.c6
-rw-r--r--arch/arm/mach-s3c64xx/dma.c11
-rw-r--r--arch/arm/mach-s3c64xx/gpiolib.c4
-rw-r--r--arch/arm/mach-s3c64xx/mach-smdk6410.c13
-rw-r--r--arch/arm/mach-s3c64xx/setup-keypad.c2
-rw-r--r--arch/arm/mach-s3c64xx/setup-sdhci.c2
-rw-r--r--arch/arm/mach-s5p6442/include/mach/map.h69
-rw-r--r--arch/arm/mach-s5p64x0/include/mach/gpio.h4
-rw-r--r--arch/arm/mach-s5p64x0/include/mach/map.h83
-rw-r--r--arch/arm/mach-s5pc100/include/mach/map.h193
-rw-r--r--arch/arm/mach-s5pv210/include/mach/map.h168
-rw-r--r--arch/arm/mach-s5pv210/mach-aquila.c15
-rw-r--r--arch/arm/mach-s5pv210/mach-goni.c15
-rw-r--r--arch/arm/mach-s5pv310/include/mach/map.h149
-rw-r--r--arch/arm/mach-shmobile/board-ag5evm.c1
-rw-r--r--arch/arm/mach-shmobile/board-ap4evb.c2
-rw-r--r--arch/arm/mach-shmobile/board-mackerel.c2
-rw-r--r--arch/arm/mach-shmobile/clock-sh73a0.c17
-rw-r--r--arch/arm/mach-shmobile/include/mach/head-ap4evb.txt10
-rw-r--r--arch/arm/mach-shmobile/include/mach/head-mackerel.txt10
-rw-r--r--arch/arm/mach-spear3xx/include/mach/spear320.h2
-rw-r--r--arch/arm/mach-tegra/include/mach/kbc.h1
-rw-r--r--arch/arm/mm/cache-l2x0.c6
-rw-r--r--arch/arm/mm/proc-v7.S6
-rw-r--r--arch/arm/plat-omap/mailbox.c21
-rw-r--r--arch/arm/plat-s5p/dev-uart.c12
-rw-r--r--arch/arm/plat-samsung/dev-ts.c1
-rw-r--r--arch/arm/plat-samsung/dev-uart.c2
-rw-r--r--arch/arm/plat-spear/include/plat/uncompress.h4
-rw-r--r--arch/arm/plat-spear/include/plat/vmalloc.h2
-rw-r--r--arch/blackfin/kernel/time.c6
-rw-r--r--arch/blackfin/kernel/vmlinux.lds.S2
-rw-r--r--arch/blackfin/lib/outs.S16
-rw-r--r--arch/blackfin/mach-common/cache.S2
-rw-r--r--arch/cris/arch-v10/kernel/time.c4
-rw-r--r--arch/cris/arch-v32/kernel/smp.c4
-rw-r--r--arch/cris/arch-v32/kernel/time.c6
-rw-r--r--arch/cris/kernel/vmlinux.lds.S7
-rw-r--r--arch/frv/include/asm/futex.h5
-rw-r--r--arch/frv/kernel/futex.c14
-rw-r--r--arch/frv/kernel/time.c14
-rw-r--r--arch/frv/kernel/vmlinux.lds.S2
-rw-r--r--arch/h8300/kernel/time.c4
-rw-r--r--arch/h8300/kernel/timer/timer8.c2
-rw-r--r--arch/ia64/include/asm/futex.h15
-rw-r--r--arch/ia64/include/asm/rwsem.h37
-rw-r--r--arch/ia64/include/asm/xen/hypercall.h2
-rw-r--r--arch/ia64/kernel/time.c19
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S2
-rw-r--r--arch/ia64/xen/suspend.c9
-rw-r--r--arch/ia64/xen/time.c13
-rw-r--r--arch/m32r/kernel/time.c5
-rw-r--r--arch/m32r/kernel/vmlinux.lds.S2
-rw-r--r--arch/m68k/bvme6000/config.c4
-rw-r--r--arch/m68k/kernel/time.c4
-rw-r--r--arch/m68k/mvme147/config.c4
-rw-r--r--arch/m68k/mvme16x/config.c4
-rw-r--r--arch/m68k/sun3/sun3ints.c2
-rw-r--r--arch/m68knommu/kernel/time.c8
-rw-r--r--arch/microblaze/include/asm/futex.h31
-rw-r--r--arch/microblaze/include/asm/pci-bridge.h12
-rw-r--r--arch/microblaze/include/asm/prom.h15
-rw-r--r--arch/microblaze/kernel/prom_parse.c77
-rw-r--r--arch/microblaze/pci/pci-common.c1
-rw-r--r--arch/mips/Kconfig4
-rw-r--r--arch/mips/alchemy/mtx-1/board_setup.c4
-rw-r--r--arch/mips/alchemy/mtx-1/platform.c9
-rw-r--r--arch/mips/alchemy/xxs1500/board_setup.c4
-rw-r--r--arch/mips/include/asm/futex.h39
-rw-r--r--arch/mips/include/asm/perf_event.h12
-rw-r--r--arch/mips/kernel/ftrace.c179
-rw-r--r--arch/mips/kernel/perf_event.c345
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c4
-rw-r--r--arch/mips/kernel/signal.c2
-rw-r--r--arch/mips/kernel/signal32.c2
-rw-r--r--arch/mips/kernel/smp.c31
-rw-r--r--arch/mips/kernel/syscall.c5
-rw-r--r--arch/mips/kernel/vmlinux.lds.S2
-rw-r--r--arch/mips/kernel/vpe.c4
-rw-r--r--arch/mips/loongson/Kconfig5
-rw-r--r--arch/mips/loongson/common/cmdline.c5
-rw-r--r--arch/mips/loongson/common/machtype.c3
-rw-r--r--arch/mips/math-emu/ieee754int.h4
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/mips/mm/tlbex.c2
-rw-r--r--arch/mips/pci/ops-pmcmsp.c4
-rw-r--r--arch/mips/pmc-sierra/Kconfig4
-rw-r--r--arch/mips/pmc-sierra/msp71xx/msp_time.c2
-rw-r--r--arch/mn10300/include/asm/atomic.h2
-rw-r--r--arch/mn10300/include/asm/uaccess.h5
-rw-r--r--arch/mn10300/kernel/time.c6
-rw-r--r--arch/mn10300/kernel/vmlinux.lds.S2
-rw-r--r--arch/mn10300/mm/cache-inv-icache.c4
-rw-r--r--arch/parisc/hpux/sys_hpux.c65
-rw-r--r--arch/parisc/include/asm/fcntl.h2
-rw-r--r--arch/parisc/include/asm/futex.h24
-rw-r--r--arch/parisc/kernel/time.c7
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/include/asm/futex.h27
-rw-r--r--arch/powerpc/include/asm/lppaca.h16
-rw-r--r--arch/powerpc/include/asm/machdep.h6
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h10
-rw-r--r--arch/powerpc/include/asm/prom.h15
-rw-r--r--arch/powerpc/include/asm/rwsem.h51
-rw-r--r--arch/powerpc/kernel/machine_kexec.c5
-rw-r--r--arch/powerpc/kernel/paca.c14
-rw-r--r--arch/powerpc/kernel/pci-common.c1
-rw-r--r--arch/powerpc/kernel/process.c8
-rw-r--r--arch/powerpc/kernel/prom_parse.c84
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/mm/numa.c3
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c2
-rw-r--r--arch/powerpc/platforms/iseries/dt.c6
-rw-r--r--arch/powerpc/platforms/iseries/setup.c1
-rw-r--r--arch/s390/boot/compressed/misc.c5
-rw-r--r--arch/s390/crypto/sha_common.c1
-rw-r--r--arch/s390/include/asm/atomic.h26
-rw-r--r--arch/s390/include/asm/cache.h1
-rw-r--r--arch/s390/include/asm/futex.h12
-rw-r--r--arch/s390/include/asm/rwsem.h63
-rw-r--r--arch/s390/include/asm/uaccess.h4
-rw-r--r--arch/s390/kernel/vmlinux.lds.S2
-rw-r--r--arch/s390/lib/uaccess.h8
-rw-r--r--arch/s390/lib/uaccess_pt.c17
-rw-r--r--arch/s390/lib/uaccess_std.c8
-rw-r--r--arch/sh/include/asm/futex-irq.h24
-rw-r--r--arch/sh/include/asm/futex.h11
-rw-r--r--arch/sh/include/asm/rwsem.h56
-rw-r--r--arch/sh/include/asm/sections.h2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7750.c13
-rw-r--r--arch/sh/kernel/vmlinux.lds.S2
-rw-r--r--arch/sh/lib/delay.c10
-rw-r--r--arch/sh/mm/cache.c3
-rw-r--r--arch/sparc/include/asm/fcntl.h2
-rw-r--r--arch/sparc/include/asm/futex_64.h20
-rw-r--r--arch/sparc/include/asm/pcr.h2
-rw-r--r--arch/sparc/include/asm/rwsem.h46
-rw-r--r--arch/sparc/kernel/iommu.c5
-rw-r--r--arch/sparc/kernel/pcic.c4
-rw-r--r--arch/sparc/kernel/pcr.c2
-rw-r--r--arch/sparc/kernel/smp_64.c2
-rw-r--r--arch/sparc/kernel/time_32.c9
-rw-r--r--arch/sparc/kernel/una_asm_32.S4
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S2
-rw-r--r--arch/sparc/lib/atomic32.c2
-rw-r--r--arch/sparc/lib/bitext.c5
-rw-r--r--arch/tile/include/asm/futex.h27
-rw-r--r--arch/tile/kernel/vmlinux.lds.S2
-rw-r--r--arch/um/Kconfig.common1
-rw-r--r--arch/um/Kconfig.x865
-rw-r--r--arch/um/drivers/mconsole_kern.c21
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/um/include/asm/common.lds.S2
-rw-r--r--arch/um/kernel/irq.c31
-rw-r--r--arch/x86/Kconfig35
-rw-r--r--arch/x86/Kconfig.cpu5
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c7
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c24
-rw-r--r--arch/x86/ia32/ia32entry.S32
-rw-r--r--arch/x86/include/asm/acpi.h11
-rw-r--r--arch/x86/include/asm/amd_nb.h14
-rw-r--r--arch/x86/include/asm/apic.h42
-rw-r--r--arch/x86/include/asm/apicdef.h12
-rw-r--r--arch/x86/include/asm/bootparam.h1
-rw-r--r--arch/x86/include/asm/ce4100.h6
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/entry_arch.h5
-rw-r--r--arch/x86/include/asm/frame.h6
-rw-r--r--arch/x86/include/asm/futex.h22
-rw-r--r--arch/x86/include/asm/hw_irq.h24
-rw-r--r--arch/x86/include/asm/init.h6
-rw-r--r--arch/x86/include/asm/io_apic.h44
-rw-r--r--arch/x86/include/asm/ipi.h8
-rw-r--r--arch/x86/include/asm/irq.h3
-rw-r--r--arch/x86/include/asm/irq_controller.h12
-rw-r--r--arch/x86/include/asm/irq_vectors.h45
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mpspec.h3
-rw-r--r--arch/x86/include/asm/msr-index.h8
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/numa.h52
-rw-r--r--arch/x86/include/asm/numa_32.h7
-rw-r--r--arch/x86/include/asm/numa_64.h23
-rw-r--r--arch/x86/include/asm/olpc_ofw.h14
-rw-r--r--arch/x86/include/asm/page_types.h9
-rw-r--r--arch/x86/include/asm/percpu.h48
-rw-r--r--arch/x86/include/asm/perf_event_p4.h1
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/prom.h70
-rw-r--r--arch/x86/include/asm/rwsem.h80
-rw-r--r--arch/x86/include/asm/smp.h20
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h2
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/include/asm/topology.h19
-rw-r--r--arch/x86/include/asm/unistd_32.h5
-rw-r--r--arch/x86/include/asm/unistd_64.h6
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h15
-rw-r--r--arch/x86/include/asm/xen/page.h47
-rw-r--r--arch/x86/include/asm/xen/pci.h8
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c22
-rw-r--r--arch/x86/kernel/amd_nb.c84
-rw-r--r--arch/x86/kernel/apb_timer.c62
-rw-r--r--arch/x86/kernel/aperture_64.c33
-rw-r--r--arch/x86/kernel/apic/apic.c150
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/apic_noop.c26
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c34
-rw-r--r--arch/x86/kernel/apic/es7000_32.c35
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c388
-rw-r--r--arch/x86/kernel/apic/ipi.c12
-rw-r--r--arch/x86/kernel/apic/numaq_32.c21
-rw-r--r--arch/x86/kernel/apic/probe_32.c10
-rw-r--r--arch/x86/kernel/apic/summit_32.c47
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/apm_32.c5
-rw-r--r--arch/x86/kernel/asm-offsets.c65
-rw-r--r--arch/x86/kernel/asm-offsets_32.c69
-rw-r--r--arch/x86/kernel/asm-offsets_64.c90
-rw-r--r--arch/x86/kernel/check.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c61
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c13
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c80
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event.c170
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c175
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c417
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c97
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c19
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/devicetree.c441
-rw-r--r--arch/x86/kernel/dumpstack.c25
-rw-r--r--arch/x86/kernel/e820.c18
-rw-r--r--arch/x86/kernel/early-quirks.c16
-rw-r--r--arch/x86/kernel/entry_32.S11
-rw-r--r--arch/x86/kernel/entry_64.S13
-rw-r--r--arch/x86/kernel/ftrace.c15
-rw-r--r--arch/x86/kernel/head_32.S10
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/ioport.c20
-rw-r--r--arch/x86/kernel/irq.c91
-rw-r--r--arch/x86/kernel/irqinit.c92
-rw-r--r--arch/x86/kernel/kgdb.c9
-rw-r--r--arch/x86/kernel/kprobes.c8
-rw-r--r--arch/x86/kernel/microcode_amd.c188
-rw-r--r--arch/x86/kernel/microcode_core.c6
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/rtc.c3
-rw-r--r--arch/x86/kernel/setup.c76
-rw-r--r--arch/x86/kernel/setup_percpu.c11
-rw-r--r--arch/x86/kernel/smpboot.c124
-rw-r--r--arch/x86/kernel/syscall_table_32.S3
-rw-r--r--arch/x86/kernel/vmlinux.lds.S5
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c1
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/trace.h8
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S6
-rw-r--r--arch/x86/lib/checksum_32.S63
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S59
-rw-r--r--arch/x86/lib/memmove_64.S197
-rw-r--r--arch/x86/lib/memmove_64.c192
-rw-r--r--arch/x86/lib/rwsem_64.S56
-rw-r--r--arch/x86/lib/semaphore_32.S38
-rw-r--r--arch/x86/lib/thunk_32.S18
-rw-r--r--arch/x86/lib/thunk_64.S27
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/amdtopology_64.c142
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/init.c56
-rw-r--r--arch/x86/mm/init_32.c11
-rw-r--r--arch/x86/mm/init_64.c78
-rw-r--r--arch/x86/mm/numa.c212
-rw-r--r--arch/x86/mm/numa_32.c10
-rw-r--r--arch/x86/mm/numa_64.c988
-rw-r--r--arch/x86/mm/numa_emulation.c494
-rw-r--r--arch/x86/mm/numa_internal.h31
-rw-r--r--arch/x86/mm/pageattr.c18
-rw-r--r--arch/x86/mm/pgtable.c11
-rw-r--r--arch/x86/mm/srat_32.c6
-rw-r--r--arch/x86/mm/srat_64.c367
-rw-r--r--arch/x86/mm/tlb.c14
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/ce4100.c9
-rw-r--r--arch/x86/pci/xen.c159
-rw-r--r--arch/x86/platform/ce4100/ce4100.c26
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts428
-rw-r--r--arch/x86/platform/mrst/mrst.c2
-rw-r--r--arch/x86/platform/mrst/vrtc.c16
-rw-r--r--arch/x86/platform/olpc/Makefile4
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c3
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/platform/uv/uv_irq.c4
-rw-r--r--arch/x86/platform/visws/visws_quirks.c4
-rw-r--r--arch/x86/xen/Kconfig10
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c84
-rw-r--r--arch/x86/xen/p2m.c330
-rw-r--r--arch/x86/xen/setup.c68
-rw-r--r--arch/x86/xen/smp.c38
-rw-r--r--arch/x86/xen/suspend.c8
-rw-r--r--arch/x86/xen/time.c4
-rw-r--r--arch/x86/xen/xen-head.S4
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--arch/xtensa/include/asm/rwsem.h37
-rw-r--r--arch/xtensa/kernel/time.c6
-rw-r--r--arch/xtensa/kernel/vmlinux.lds.S2
-rw-r--r--block/blk-core.c18
-rw-r--r--block/blk-flush.c8
-rw-r--r--block/blk-lib.c21
-rw-r--r--block/blk-throttle.c29
-rw-r--r--block/cfq-iosched.c6
-rw-r--r--block/elevator.c4
-rw-r--r--block/genhd.c2
-rw-r--r--block/ioctl.c8
-rw-r--r--crypto/ablkcipher.c3
-rw-r--r--crypto/tcrypt.c3
-rw-r--r--crypto/testmgr.c2
-rw-r--r--crypto/testmgr.h30
-rw-r--r--drivers/acpi/Kconfig1
-rw-r--r--drivers/acpi/acpica/aclocal.h7
-rw-r--r--drivers/acpi/acpica/evgpe.c17
-rw-r--r--drivers/acpi/acpica/evxfgpe.c42
-rw-r--r--drivers/acpi/bus.c23
-rw-r--r--drivers/acpi/debugfs.c20
-rw-r--r--drivers/acpi/numa.c9
-rw-r--r--drivers/acpi/osl.c6
-rw-r--r--drivers/acpi/sleep.c4
-rw-r--r--drivers/ata/Kconfig18
-rw-r--r--drivers/ata/Makefile1
-rw-r--r--drivers/ata/ahci.c8
-rw-r--r--drivers/ata/ahci.h6
-rw-r--r--drivers/ata/ata_generic.c2
-rw-r--r--drivers/ata/ata_piix.c2
-rw-r--r--drivers/ata/libata-acpi.c3
-rw-r--r--drivers/ata/libata-core.c54
-rw-r--r--drivers/ata/libata-eh.c60
-rw-r--r--drivers/ata/libata-scsi.c15
-rw-r--r--drivers/ata/libata-sff.c17
-rw-r--r--drivers/ata/libata.h1
-rw-r--r--drivers/ata/pata_acpi.c2
-rw-r--r--drivers/ata/pata_arasan_cf.c983
-rw-r--r--drivers/ata/pata_at32.c2
-rw-r--r--drivers/ata/pata_bf54x.c4
-rw-r--r--drivers/ata/pata_hpt366.c7
-rw-r--r--drivers/ata/pata_hpt37x.c23
-rw-r--r--drivers/ata/pata_hpt3x2n.c13
-rw-r--r--drivers/ata/pata_hpt3x3.c2
-rw-r--r--drivers/ata/pata_it821x.c4
-rw-r--r--drivers/ata/pata_ixp4xx_cf.c2
-rw-r--r--drivers/ata/pata_macio.c3
-rw-r--r--drivers/ata/pata_marvell.c2
-rw-r--r--drivers/ata/pata_ninja32.c2
-rw-r--r--drivers/ata/pata_octeon_cf.c3
-rw-r--r--drivers/ata/pata_palmld.c2
-rw-r--r--drivers/ata/pata_pcmcia.c2
-rw-r--r--drivers/ata/pata_pdc2027x.c6
-rw-r--r--drivers/ata/pata_pxa.c1
-rw-r--r--drivers/ata/pata_rb532_cf.c1
-rw-r--r--drivers/ata/pata_samsung_cf.c1
-rw-r--r--drivers/ata/pata_scc.c2
-rw-r--r--drivers/ata/pata_sis.c2
-rw-r--r--drivers/ata/pdc_adma.c4
-rw-r--r--drivers/ata/sata_dwc_460ex.c75
-rw-r--r--drivers/ata/sata_fsl.c22
-rw-r--r--drivers/ata/sata_mv.c3
-rw-r--r--drivers/ata/sata_nv.c14
-rw-r--r--drivers/ata/sata_promise.c4
-rw-r--r--drivers/ata/sata_qstor.c3
-rw-r--r--drivers/ata/sata_sil.c3
-rw-r--r--drivers/ata/sata_sil24.c3
-rw-r--r--drivers/ata/sata_sis.c2
-rw-r--r--drivers/ata/sata_svw.c12
-rw-r--r--drivers/ata/sata_sx4.c5
-rw-r--r--drivers/ata/sata_uli.c3
-rw-r--r--drivers/ata/sata_via.c9
-rw-r--r--drivers/ata/sata_vsc.c3
-rw-r--r--drivers/atm/solos-pci.c5
-rw-r--r--drivers/base/Makefile2
-rw-r--r--drivers/base/power/Makefile3
-rw-r--r--drivers/base/power/main.c175
-rw-r--r--drivers/base/power/opp.c2
-rw-r--r--drivers/base/power/power.h21
-rw-r--r--drivers/base/power/runtime.c37
-rw-r--r--drivers/base/power/sysfs.c78
-rw-r--r--drivers/base/power/trace.c6
-rw-r--r--drivers/base/power/wakeup.c109
-rw-r--r--drivers/base/syscore.c117
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/loop.c5
-rw-r--r--drivers/block/xen-blkfront.c87
-rw-r--r--drivers/bluetooth/ath3k.c5
-rw-r--r--drivers/bluetooth/btusb.c12
-rw-r--r--drivers/char/agp/amd64-agp.c9
-rw-r--r--drivers/char/agp/intel-agp.h1
-rw-r--r--drivers/char/agp/intel-gtt.c56
-rw-r--r--drivers/char/hw_random/Kconfig12
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/omap-rng.c14
-rw-r--r--drivers/char/hw_random/picoxcell-rng.c208
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c8
-rw-r--r--drivers/char/mmtimer.c30
-rw-r--r--drivers/char/pcmcia/cm4000_cs.c3
-rw-r--r--drivers/char/pcmcia/ipwireless/main.c52
-rw-r--r--drivers/char/random.c13
-rw-r--r--drivers/char/tpm/tpm.c28
-rw-r--r--drivers/char/tpm/tpm.h2
-rw-r--r--drivers/char/tpm/tpm_tis.c4
-rw-r--r--drivers/char/virtio_console.c8
-rw-r--r--drivers/cpufreq/cpufreq.c27
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c22
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c20
-rw-r--r--drivers/crypto/Kconfig17
-rw-r--r--drivers/crypto/Makefile2
-rw-r--r--drivers/crypto/omap-aes.c4
-rw-r--r--drivers/crypto/omap-sham.c4
-rw-r--r--drivers/crypto/picoxcell_crypto.c1867
-rw-r--r--drivers/crypto/picoxcell_crypto_regs.h128
-rw-r--r--drivers/gpio/ml_ioh_gpio.c1
-rw-r--r--drivers/gpio/pch_gpio.c1
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c4
-rw-r--r--drivers/gpu/drm/drm_irq.c29
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c4
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c11
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c17
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h24
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c21
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c6
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h2
-rw-r--r--drivers/gpu/drm/i915/intel_display.c103
-rw-r--r--drivers/gpu/drm/i915/intel_panel.c9
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h13
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bios.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c18
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dma.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_notifier.c11
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_pm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nv04_dfp.c12
-rw-r--r--drivers/gpu/drm/nouveau/nv40_graph.c46
-rw-r--r--drivers/gpu/drm/nouveau/nv50_instmem.c8
-rw-r--r--drivers/gpu/drm/nouveau/nv50_vm.c4
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c4
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c3
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c4
-rw-r--r--drivers/gpu/drm/radeon/r100.c26
-rw-r--r--drivers/gpu/drm/radeon/r300.c2
-rw-r--r--drivers/gpu/drm/radeon/r600.c3
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_kms.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_fb.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_crtc.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c14
-rw-r--r--drivers/gpu/drm/radeon/rs600.c1
-rw-r--r--drivers/gpu/drm/radeon/rs690.c1
-rw-r--r--drivers/gpu/drm/radeon/rv770.c3
-rw-r--r--drivers/hwmon/Kconfig19
-rw-r--r--drivers/hwmon/ad7414.c1
-rw-r--r--drivers/hwmon/adt7411.c1
-rw-r--r--drivers/hwmon/f71882fg.c4
-rw-r--r--drivers/hwmon/jc42.c35
-rw-r--r--drivers/hwmon/k10temp.c5
-rw-r--r--drivers/hwmon/lm85.c23
-rw-r--r--drivers/i2c/busses/i2c-eg20t.c1
-rw-r--r--drivers/i2c/busses/i2c-ocores.c16
-rw-r--r--drivers/i2c/busses/i2c-omap.c39
-rw-r--r--drivers/i2c/busses/i2c-stu300.c2
-rw-r--r--drivers/i2c/i2c-core.c2
-rw-r--r--drivers/idle/intel_idle.c24
-rw-r--r--drivers/infiniband/core/cm.c20
-rw-r--r--drivers/infiniband/core/cma.c58
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c6
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c24
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c4
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c1
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c32
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c13
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c5
-rw-r--r--drivers/input/gameport/gameport.c2
-rw-r--r--drivers/input/keyboard/tegra-kbc.c62
-rw-r--r--drivers/input/mouse/synaptics.h23
-rw-r--r--drivers/input/serio/serio.c2
-rw-r--r--drivers/input/touchscreen/tps6507x-ts.c12
-rw-r--r--drivers/isdn/hardware/eicon/istream.c2
-rw-r--r--drivers/isdn/hisax/isdnl2.c28
-rw-r--r--drivers/md/linear.c1
-rw-r--r--drivers/md/md.c33
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/multipath.c1
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid1.c6
-rw-r--r--drivers/md/raid10.c7
-rw-r--r--drivers/md/raid5.c1
-rw-r--r--drivers/media/common/tuners/tda8290.c14
-rw-r--r--drivers/media/dvb/dvb-usb/dib0700_devices.c21
-rw-r--r--drivers/media/dvb/dvb-usb/lmedm04.c6
-rw-r--r--drivers/media/dvb/frontends/dib7000m.c19
-rw-r--r--drivers/media/dvb/frontends/dib7000m.h15
-rw-r--r--drivers/media/dvb/mantis/mantis_pci.c1
-rw-r--r--drivers/media/rc/ir-raw.c3
-rw-r--r--drivers/media/rc/mceusb.c27
-rw-r--r--drivers/media/rc/nuvoton-cir.c5
-rw-r--r--drivers/media/rc/nuvoton-cir.h7
-rw-r--r--drivers/media/rc/rc-main.c2
-rw-r--r--drivers/media/video/au0828/au0828-video.c28
-rw-r--r--drivers/media/video/cx18/cx18-cards.c50
-rw-r--r--drivers/media/video/cx18/cx18-driver.c25
-rw-r--r--drivers/media/video/cx18/cx18-driver.h3
-rw-r--r--drivers/media/video/cx18/cx18-dvb.c38
-rw-r--r--drivers/media/video/cx23885/cx23885-i2c.c10
-rw-r--r--drivers/media/video/cx25840/cx25840-core.c3
-rw-r--r--drivers/media/video/ivtv/ivtv-irq.c58
-rw-r--r--drivers/media/video/mem2mem_testdev.c1
-rw-r--r--drivers/media/video/s2255drv.c10
-rw-r--r--drivers/memstick/core/memstick.c2
-rw-r--r--drivers/message/fusion/mptbase.h4
-rw-r--r--drivers/message/fusion/mptctl.c8
-rw-r--r--drivers/message/fusion/mptscsih.c7
-rw-r--r--drivers/message/i2o/driver.c3
-rw-r--r--drivers/mfd/asic3.c4
-rw-r--r--drivers/mfd/davinci_voicecodec.c4
-rw-r--r--drivers/mfd/tps6586x.c10
-rw-r--r--drivers/mfd/ucb1x00-ts.c12
-rw-r--r--drivers/mfd/wm8994-core.c18
-rw-r--r--drivers/misc/bmp085.c1
-rw-r--r--drivers/misc/iwmc3200top/iwmc3200top.h4
-rw-r--r--drivers/misc/iwmc3200top/main.c14
-rw-r--r--drivers/misc/tifm_core.c2
-rw-r--r--drivers/misc/vmw_balloon.c2
-rw-r--r--drivers/mmc/core/core.c2
-rw-r--r--drivers/mmc/core/sdio.c3
-rw-r--r--drivers/mmc/host/mmc_spi.c4
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c43
-rw-r--r--drivers/mtd/chips/jedec_probe.c35
-rw-r--r--drivers/mtd/maps/amd76xrom.c1
-rw-r--r--drivers/mtd/mtd_blkdevs.c1
-rw-r--r--drivers/mtd/nand/omap2.c2
-rw-r--r--drivers/mtd/nand/r852.c2
-rw-r--r--drivers/mtd/onenand/generic.c2
-rw-r--r--drivers/mtd/onenand/omap2.c2
-rw-r--r--drivers/mtd/sm_ftl.c2
-rw-r--r--drivers/net/ariadne.c5
-rw-r--r--drivers/net/bnx2x/bnx2x.h31
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.c87
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.h29
-rw-r--r--drivers/net/bnx2x/bnx2x_ethtool.c39
-rw-r--r--drivers/net/bnx2x/bnx2x_init.h2
-rw-r--r--drivers/net/bnx2x/bnx2x_main.c37
-rw-r--r--drivers/net/bnx2x/bnx2x_stats.c4
-rw-r--r--drivers/net/bonding/bond_3ad.c32
-rw-r--r--drivers/net/bonding/bond_3ad.h3
-rw-r--r--drivers/net/can/mcp251x.c2
-rw-r--r--drivers/net/can/softing/Kconfig2
-rw-r--r--drivers/net/can/softing/softing_main.c1
-rw-r--r--drivers/net/cnic.c33
-rw-r--r--drivers/net/cxgb4/t4_msg.h1
-rw-r--r--drivers/net/cxgb4vf/cxgb4vf_main.c80
-rw-r--r--drivers/net/cxgb4vf/t4vf_hw.c2
-rw-r--r--drivers/net/davinci_emac.c2
-rw-r--r--drivers/net/dm9000.c9
-rw-r--r--drivers/net/dnet.c3
-rw-r--r--drivers/net/e1000/e1000_osdep.h3
-rw-r--r--drivers/net/e1000e/netdev.c63
-rw-r--r--drivers/net/ethoc.c8
-rw-r--r--drivers/net/fec.c3
-rw-r--r--drivers/net/forcedeth.c2
-rw-r--r--drivers/net/igbvf/vf.c2
-rw-r--r--drivers/net/ixgbe/ixgbe_fcoe.c51
-rw-r--r--drivers/net/ixgbe/ixgbe_fcoe.h2
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c6
-rw-r--r--drivers/net/macb.c2
-rw-r--r--drivers/net/macvtap.c3
-rw-r--r--drivers/net/pch_gbe/pch_gbe.h2
-rw-r--r--drivers/net/pch_gbe/pch_gbe_main.c106
-rw-r--r--drivers/net/pcmcia/fmvj18x_cs.c1
-rw-r--r--drivers/net/r6040.c115
-rw-r--r--drivers/net/r8169.c50
-rw-r--r--drivers/net/sfc/ethtool.c22
-rw-r--r--drivers/net/skge.c3
-rw-r--r--drivers/net/smsc911x.c5
-rw-r--r--drivers/net/stmmac/stmmac_main.c4
-rw-r--r--drivers/net/tg3.c8
-rw-r--r--drivers/net/usb/dm9601.c4
-rw-r--r--drivers/net/usb/hso.c12
-rw-r--r--drivers/net/usb/usbnet.c4
-rw-r--r--drivers/net/wireless/ath/ath5k/phy.c143
-rw-r--r--drivers/net/wireless/ath/ath9k/ath9k.h6
-rw-r--r--drivers/net/wireless/ath/ath9k/hif_usb.c9
-rw-r--r--drivers/net/wireless/ath/ath9k/init.c8
-rw-r--r--drivers/net/wireless/ath/ath9k/mac.c5
-rw-r--r--drivers/net/wireless/ath/ath9k/main.c8
-rw-r--r--drivers/net/wireless/ath/carl9170/usb.c2
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2100.c70
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2100.h1
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2200.c196
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2200.h2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-3945.c67
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-5000.c2
-rw-r--r--drivers/net/wireless/p54/p54pci.c14
-rw-r--r--drivers/net/wireless/p54/p54usb.c1
-rw-r--r--drivers/net/wireless/rndis_wlan.c3
-rw-r--r--drivers/net/wireless/rt2x00/rt2800pci.c8
-rw-r--r--drivers/net/wireless/rt2x00/rt2800usb.c6
-rw-r--r--drivers/nfc/Kconfig2
-rw-r--r--drivers/nfc/pn544.c4
-rw-r--r--drivers/of/Kconfig6
-rw-r--r--drivers/of/Makefile1
-rw-r--r--drivers/of/of_pci.c92
-rw-r--r--drivers/of/pdt.c112
-rw-r--r--drivers/pci/pci-driver.c4
-rw-r--r--drivers/pci/xen-pcifront.c31
-rw-r--r--drivers/pcmcia/pcmcia_resource.c2
-rw-r--r--drivers/pcmcia/pxa2xx_base.c2
-rw-r--r--drivers/pcmcia/pxa2xx_base.h1
-rw-r--r--drivers/pcmcia/pxa2xx_colibri.c3
-rw-r--r--drivers/pcmcia/pxa2xx_lubbock.c1
-rw-r--r--drivers/platform/x86/Kconfig2
-rw-r--r--drivers/platform/x86/acer-wmi.c4
-rw-r--r--drivers/platform/x86/asus_acpi.c8
-rw-r--r--drivers/platform/x86/dell-laptop.c24
-rw-r--r--drivers/platform/x86/intel_pmic_gpio.c116
-rw-r--r--drivers/platform/x86/tc1100-wmi.c2
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c8
-rw-r--r--drivers/pps/generators/Kconfig2
-rw-r--r--drivers/pps/kapi.c2
-rw-r--r--drivers/rapidio/rio-sysfs.c12
-rw-r--r--drivers/regulator/mc13xxx-regulator-core.c2
-rw-r--r--drivers/regulator/wm831x-dcdc.c1
-rw-r--r--drivers/rtc/Kconfig12
-rw-r--r--drivers/rtc/class.c7
-rw-r--r--drivers/rtc/interface.c203
-rw-r--r--drivers/rtc/rtc-at91rm9200.c28
-rw-r--r--drivers/rtc/rtc-at91sam9.c30
-rw-r--r--drivers/rtc/rtc-bfin.c27
-rw-r--r--drivers/rtc/rtc-cmos.c111
-rw-r--r--drivers/rtc/rtc-davinci.c55
-rw-r--r--drivers/rtc/rtc-dev.c104
-rw-r--r--drivers/rtc/rtc-ds1511.c17
-rw-r--r--drivers/rtc/rtc-ds1553.c17
-rw-r--r--drivers/rtc/rtc-ds3232.c32
-rw-r--r--drivers/rtc/rtc-jz4740.c7
-rw-r--r--drivers/rtc/rtc-mc13xxx.c7
-rw-r--r--drivers/rtc/rtc-mpc5121.c20
-rw-r--r--drivers/rtc/rtc-mrst.c33
-rw-r--r--drivers/rtc/rtc-mxc.c7
-rw-r--r--drivers/rtc/rtc-nuc900.c15
-rw-r--r--drivers/rtc/rtc-omap.c39
-rw-r--r--drivers/rtc/rtc-pcap.c6
-rw-r--r--drivers/rtc/rtc-pcf50633.c22
-rw-r--r--drivers/rtc/rtc-pl030.c6
-rw-r--r--drivers/rtc/rtc-pl031.c55
-rw-r--r--drivers/rtc/rtc-proc.c8
-rw-r--r--drivers/rtc/rtc-pxa.c44
-rw-r--r--drivers/rtc/rtc-rs5c372.c52
-rw-r--r--drivers/rtc/rtc-rx8025.c25
-rw-r--r--drivers/rtc/rtc-s3c.c41
-rw-r--r--drivers/rtc/rtc-sa1100.c160
-rw-r--r--drivers/rtc/rtc-sh.c24
-rw-r--r--drivers/rtc/rtc-stmp3xxx.c15
-rw-r--r--drivers/rtc/rtc-test.c13
-rw-r--r--drivers/rtc/rtc-twl.c13
-rw-r--r--drivers/rtc/rtc-vr41xx.c32
-rw-r--r--drivers/rtc/rtc-wm831x.c16
-rw-r--r--drivers/rtc/rtc-wm8350.c21
-rw-r--r--drivers/s390/block/dasd_eckd.c2
-rw-r--r--drivers/s390/block/xpram.c4
-rw-r--r--drivers/s390/char/keyboard.c3
-rw-r--r--drivers/s390/char/tape.h8
-rw-r--r--drivers/s390/char/tape_34xx.c59
-rw-r--r--drivers/s390/char/tape_3590.c83
-rw-r--r--drivers/scsi/Makefile2
-rw-r--r--drivers/scsi/be2iscsi/be_main.c2
-rw-r--r--drivers/scsi/ipr.c9
-rw-r--r--drivers/scsi/libsas/sas_ata.c94
-rw-r--r--drivers/scsi/libsas/sas_scsi_host.c14
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c5
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c10
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c12
-rw-r--r--drivers/scsi/scsi_debug.c2
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--drivers/scsi/scsi_priv.h2
-rw-r--r--drivers/scsi/scsi_sysfs.c2
-rw-r--r--drivers/scsi/scsi_tgt_lib.c2
-rw-r--r--drivers/scsi/scsi_transport_fc.c2
-rw-r--r--drivers/spi/pxa2xx_spi.c2
-rw-r--r--drivers/spi/pxa2xx_spi_pci.c63
-rw-r--r--drivers/spi/xilinx_spi.c6
-rw-r--r--drivers/target/Makefile3
-rw-r--r--drivers/target/target_core_configfs.c155
-rw-r--r--drivers/target/target_core_device.c13
-rw-r--r--drivers/target/target_core_fabric_configfs.c92
-rw-r--r--drivers/target/target_core_iblock.c8
-rw-r--r--drivers/target/target_core_mib.c1078
-rw-r--r--drivers/target/target_core_mib.h28
-rw-r--r--drivers/target/target_core_pscsi.c4
-rw-r--r--drivers/target/target_core_tmr.c5
-rw-r--r--drivers/target/target_core_tpg.c29
-rw-r--r--drivers/target/target_core_transport.c56
-rw-r--r--drivers/thermal/Kconfig1
-rw-r--r--drivers/thermal/thermal_sys.c40
-rw-r--r--drivers/tty/serial/max3100.c2
-rw-r--r--drivers/tty/serial/max3107.c2
-rw-r--r--drivers/tty/serial/serial_cs.c1
-rw-r--r--drivers/usb/core/hcd-pci.c4
-rw-r--r--drivers/usb/core/hub.c28
-rw-r--r--drivers/usb/core/quirks.c8
-rw-r--r--drivers/usb/gadget/f_phonet.c15
-rw-r--r--drivers/usb/host/ehci-xilinx-of.c1
-rw-r--r--drivers/usb/host/xhci-dbg.c9
-rw-r--r--drivers/usb/host/xhci-mem.c10
-rw-r--r--drivers/usb/host/xhci-ring.c40
-rw-r--r--drivers/usb/host/xhci.c14
-rw-r--r--drivers/usb/host/xhci.h2
-rw-r--r--drivers/usb/musb/musb_core.c1
-rw-r--r--drivers/usb/musb/musb_core.h17
-rw-r--r--drivers/usb/musb/omap2430.c1
-rw-r--r--drivers/usb/serial/sierra.c3
-rw-r--r--drivers/usb/serial/usb_wwan.c15
-rw-r--r--drivers/usb/serial/visor.c12
-rw-r--r--drivers/video/backlight/ltv350qv.c9
-rw-r--r--drivers/watchdog/cpwd.c2
-rw-r--r--drivers/watchdog/hpwdt.c4
-rw-r--r--drivers/watchdog/sbc_fitpc2_wdt.c7
-rw-r--r--drivers/watchdog/sch311x_wdt.c2
-rw-r--r--drivers/watchdog/w83697ug_wdt.c2
-rw-r--r--drivers/xen/balloon.c16
-rw-r--r--drivers/xen/events.c342
-rw-r--r--drivers/xen/manage.c143
-rw-r--r--drivers/xen/platform-pci.c3
-rw-r--r--fs/9p/acl.c28
-rw-r--r--fs/9p/cache.c204
-rw-r--r--fs/9p/cache.h64
-rw-r--r--fs/9p/fid.c114
-rw-r--r--fs/9p/fid.h5
-rw-r--r--fs/9p/v9fs.c108
-rw-r--r--fs/9p/v9fs.h53
-rw-r--r--fs/9p/v9fs_vfs.h26
-rw-r--r--fs/9p/vfs_addr.c194
-rw-r--r--fs/9p/vfs_dentry.c47
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/9p/vfs_file.c316
-rw-r--r--fs/9p/vfs_inode.c307
-rw-r--r--fs/9p/vfs_inode_dotl.c198
-rw-r--r--fs/9p/vfs_super.c65
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c56
-rw-r--r--fs/block_dev.c30
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/extent-tree.c44
-rw-r--r--fs/btrfs/extent_io.c165
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c114
-rw-r--r--fs/btrfs/inode.c148
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/relocation.c13
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/btrfs/xattr.c6
-rw-r--r--fs/btrfs/xattr.h3
-rw-r--r--fs/cachefiles/namei.c52
-rw-r--r--fs/ceph/dir.c27
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/snap.c14
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/netmisc.c8
-rw-r--r--fs/cifs/sess.c8
-rw-r--r--fs/compat.c69
-rw-r--r--fs/dcache.c121
-rw-r--r--fs/ecryptfs/dentry.c22
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h3
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/ecryptfs/inode.c138
-rw-r--r--fs/eventfd.c12
-rw-r--r--fs/eventpoll.c95
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/namei.c17
-rw-r--r--fs/ext2/xattr.h6
-rw-r--r--fs/ext2/xattr_security.c5
-rw-r--r--fs/ext3/ialloc.c5
-rw-r--r--fs/ext3/namei.c15
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext3/xattr.h4
-rw-r--r--fs/ext3/xattr_security.c5
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/ext4/xattr.h4
-rw-r--r--fs/ext4/xattr_security.c5
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c60
-rw-r--r--fs/fuse/dir.c9
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/acl.c7
-rw-r--r--fs/gfs2/aops.c1
-rw-r--r--fs/gfs2/bmap.c20
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/gfs2/file.c77
-rw-r--r--fs/gfs2/glock.c414
-rw-r--r--fs/gfs2/glock.h39
-rw-r--r--fs/gfs2/glops.c33
-rw-r--r--fs/gfs2/incore.h7
-rw-r--r--fs/gfs2/inode.c7
-rw-r--r--fs/gfs2/lock_dlm.c14
-rw-r--r--fs/gfs2/log.c32
-rw-r--r--fs/gfs2/lops.c10
-rw-r--r--fs/gfs2/main.c17
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c11
-rw-r--r--fs/gfs2/ops_inode.c10
-rw-r--r--fs/gfs2/quota.c14
-rw-r--r--fs/gfs2/rgrp.c34
-rw-r--r--fs/gfs2/rgrp.h2
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/inode.c63
-rw-r--r--fs/internal.h15
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jffs2/dir.c9
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/security.c5
-rw-r--r--fs/jffs2/write.c18
-rw-r--r--fs/jffs2/xattr.h5
-rw-r--r--fs/jfs/jfs_xattr.h5
-rw-r--r--fs/jfs/namei.c13
-rw-r--r--fs/jfs/xattr.c6
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c1494
-rw-r--r--fs/namespace.c22
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c4
-rw-r--r--fs/nfs/nfs4proc.c131
-rw-r--r--fs/nfs/nfs4state.c29
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfs4xdr.c12
-rw-r--r--fs/nilfs2/btnode.c5
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/quota.h3
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/super.c35
-rw-r--r--fs/ocfs2/xattr.c10
-rw-r--r--fs/ocfs2/xattr.h4
-rw-r--r--fs/open.c137
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/partitions/mac.c17
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c8
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/namei.c15
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/reiserfs/xattr_security.c3
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c18
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c9
-rw-r--r--fs/xfs/xfs_fsops.c3
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--include/acpi/acpi_bus.h2
-rw-r--r--include/asm-generic/cputime.h3
-rw-r--r--include/asm-generic/fcntl.h4
-rw-r--r--include/asm-generic/futex.h7
-rw-r--r--include/asm-generic/pgtable.h2
-rw-r--r--include/asm-generic/sections.h1
-rw-r--r--include/asm-generic/unistd.h6
-rw-r--r--include/asm-generic/vmlinux.lds.h41
-rw-r--r--include/drm/drmP.h2
-rw-r--r--include/keys/rxrpc-type.h1
-rw-r--r--include/linux/ata.h163
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/blktrace_api.h1
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/cgroup.h4
-rw-r--r--include/linux/cgroup_subsys.h4
-rw-r--r--include/linux/dcbnl.h2
-rw-r--r--include/linux/debugobjects.h5
-rw-r--r--include/linux/device.h8
-rw-r--r--include/linux/exportfs.h9
-rw-r--r--include/linux/ext3_fs.h3
-rw-r--r--include/linux/fcntl.h1
-rw-r--r--include/linux/file.h2
-rw-r--r--include/linux/freezer.h2
-rw-r--r--include/linux/fs.h46
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/ftrace_event.h2
-rw-r--r--include/linux/gfp.h11
-rw-r--r--include/linux/hrtimer.h24
-rw-r--r--include/linux/i2c.h2
-rw-r--r--include/linux/ima.h6
-rw-r--r--include/linux/interrupt.h85
-rw-r--r--include/linux/irq.h368
-rw-r--r--include/linux/irqdesc.h78
-rw-r--r--include/linux/jiffies.h1
-rw-r--r--include/linux/key-type.h14
-rw-r--r--include/linux/key.h5
-rw-r--r--include/linux/keyctl.h2
-rw-r--r--include/linux/kthread.h2
-rw-r--r--include/linux/libata.h10
-rw-r--r--include/linux/list.h12
-rw-r--r--include/linux/mfd/wm8994/core.h1
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/module.h2
-rw-r--r--include/linux/namei.h7
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/nfs_fs_sb.h10
-rw-r--r--include/linux/of.h16
-rw-r--r--include/linux/of_pci.h9
-rw-r--r--include/linux/pata_arasan_cf_data.h49
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/percpu.h128
-rw-r--r--include/linux/perf_event.h50
-rw-r--r--include/linux/plist.h47
-rw-r--r--include/linux/pm.h21
-rw-r--r--include/linux/pm_runtime.h6
-rw-r--r--include/linux/pm_wakeup.h33
-rw-r--r--include/linux/posix-clock.h150
-rw-r--r--include/linux/posix-timers.h44
-rw-r--r--include/linux/ptrace.h3
-rw-r--r--include/linux/reiserfs_xattr.h2
-rw-r--r--include/linux/ring_buffer.h2
-rw-r--r--include/linux/rio_regs.h4
-rw-r--r--include/linux/rtc.h20
-rw-r--r--include/linux/rwlock_types.h8
-rw-r--r--include/linux/rwsem-spinlock.h31
-rw-r--r--include/linux/rwsem.h53
-rw-r--r--include/linux/sched.h24
-rw-r--r--include/linux/security.h44
-rw-r--r--include/linux/spinlock_types.h8
-rw-r--r--include/linux/sunrpc/sched.h1
-rw-r--r--include/linux/syscalls.h20
-rw-r--r--include/linux/syscore_ops.h29
-rw-r--r--include/linux/sysctl.h14
-rw-r--r--include/linux/thermal.h8
-rw-r--r--include/linux/thread_info.h3
-rw-r--r--include/linux/time.h14
-rw-r--r--include/linux/timex.h3
-rw-r--r--include/linux/workqueue.h12
-rw-r--r--include/linux/xattr.h2
-rw-r--r--include/net/9p/9p.h12
-rw-r--r--include/net/9p/client.h1
-rw-r--r--include/net/9p/transport.h9
-rw-r--r--include/net/ipv6.h12
-rw-r--r--include/net/netfilter/nf_tproxy_core.h12
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--include/pcmcia/ds.h1
-rw-r--r--include/scsi/sas_ata.h22
-rw-r--r--include/sound/wm8903.h10
-rw-r--r--include/target/target_core_base.h28
-rw-r--r--include/target/target_core_transport.h4
-rw-r--r--include/trace/events/block.h6
-rw-r--r--include/trace/events/mce.h8
-rw-r--r--include/trace/events/module.h5
-rw-r--r--include/trace/events/skb.h4
-rw-r--r--include/xen/events.h8
-rw-r--r--include/xen/interface/io/blkif.h37
-rw-r--r--include/xen/interface/xen.h4
-rw-r--r--include/xen/xen-ops.h6
-rw-r--r--init/Kconfig22
-rw-r--r--kernel/audit_watch.c85
-rw-r--r--kernel/cgroup.c54
-rw-r--r--kernel/compat.c136
-rw-r--r--kernel/cpuset.c7
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/futex.c147
-rw-r--r--kernel/hrtimer.c90
-rw-r--r--kernel/irq/Kconfig39
-rw-r--r--kernel/irq/autoprobe.c54
-rw-r--r--kernel/irq/chip.c483
-rw-r--r--kernel/irq/compat.h72
-rw-r--r--kernel/irq/debug.h40
-rw-r--r--kernel/irq/handle.c144
-rw-r--r--kernel/irq/internals.h173
-rw-r--r--kernel/irq/irqdesc.c79
-rw-r--r--kernel/irq/manage.c604
-rw-r--r--kernel/irq/migration.c38
-rw-r--r--kernel/irq/pm.c30
-rw-r--r--kernel/irq/proc.c70
-rw-r--r--kernel/irq/resend.c19
-rw-r--r--kernel/irq/settings.h138
-rw-r--r--kernel/irq/spurious.c163
-rw-r--r--kernel/perf_event.c1023
-rw-r--r--kernel/pm_qos_params.c24
-rw-r--r--kernel/posix-cpu-timers.c110
-rw-r--r--kernel/posix-timers.c342
-rw-r--r--kernel/power/Kconfig237
-rw-r--r--kernel/power/hibernate.c9
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/power/process.c6
-rw-r--r--kernel/power/snapshot.c15
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/rcupdate.c10
-rw-r--r--kernel/rcutiny_plugin.h2
-rw-r--r--kernel/rcutorture.c1
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex-tester.c40
-rw-r--r--kernel/rtmutex.c318
-rw-r--r--kernel/rtmutex_common.h16
-rw-r--r--kernel/sched.c339
-rw-r--r--kernel/sched_autogroup.c15
-rw-r--r--kernel/sched_autogroup.h5
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c397
-rw-r--r--kernel/sched_idletask.c26
-rw-r--r--kernel/sched_rt.c33
-rw-r--r--kernel/sched_stoptask.c7
-rw-r--r--kernel/softirq.c24
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--kernel/sysctl.c31
-rw-r--r--kernel/sysctl_binary.c19
-rw-r--r--kernel/time.c35
-rw-r--r--kernel/time/Makefile3
-rw-r--r--kernel/time/clockevents.c1
-rw-r--r--kernel/time/jiffies.c20
-rw-r--r--kernel/time/ntp.c13
-rw-r--r--kernel/time/posix-clock.c451
-rw-r--r--kernel/time/tick-broadcast.c11
-rw-r--r--kernel/time/tick-common.c7
-rw-r--r--kernel/time/tick-internal.h12
-rw-r--r--kernel/time/tick-oneshot.c1
-rw-r--r--kernel/time/tick-sched.c1
-rw-r--r--kernel/time/timekeeping.c141
-rw-r--r--kernel/timer.c42
-rw-r--r--kernel/trace/blktrace.c16
-rw-r--r--kernel/trace/ftrace.c52
-rw-r--r--kernel/trace/ring_buffer.c24
-rw-r--r--kernel/trace/trace.c38
-rw-r--r--kernel/trace/trace.h41
-rw-r--r--kernel/trace/trace_entries.h6
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_events_filter.c885
-rw-r--r--kernel/trace/trace_kprobe.c111
-rw-r--r--kernel/trace/trace_output.c36
-rw-r--r--kernel/trace/trace_sched_switch.c48
-rw-r--r--kernel/trace/trace_syscalls.c42
-rw-r--r--kernel/workqueue.c49
-rw-r--r--lib/debugobjects.c9
-rw-r--r--lib/list_debug.c39
-rw-r--r--lib/nlattr.c2
-rw-r--r--lib/plist.c135
-rw-r--r--lib/rwsem.c10
-rw-r--r--lib/swiotlb.c6
-rw-r--r--mm/Makefile8
-rw-r--r--mm/bootmem.c180
-rw-r--r--mm/huge_memory.c34
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mempolicy.c16
-rw-r--r--mm/migrate.c6
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/nobootmem.c435
-rw-r--r--mm/page_alloc.c76
-rw-r--r--mm/rmap.c54
-rw-r--r--mm/shmem.c13
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/vmscan.c32
-rw-r--r--net/9p/Makefile1
-rw-r--r--net/9p/client.c166
-rw-r--r--net/9p/protocol.c44
-rw-r--r--net/9p/trans_common.c97
-rw-r--r--net/9p/trans_common.h32
-rw-r--r--net/9p/trans_fd.c52
-rw-r--r--net/9p/trans_virtio.c129
-rw-r--r--net/Makefile4
-rw-r--r--net/bluetooth/l2cap.c1
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/bridge/Kconfig1
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_multicast.c42
-rw-r--r--net/bridge/br_private.h3
-rw-r--r--net/ceph/messenger.c133
-rw-r--r--net/ceph/pagevec.c18
-rw-r--r--net/core/dev.c21
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/scm.c2
-rw-r--r--net/dcb/dcbnl.c11
-rw-r--r--net/dccp/input.c7
-rw-r--r--net/dns_resolver/dns_key.c20
-rw-r--r--net/ipv4/devinet.c32
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_gre.c3
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/route.c1
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c2
-rw-r--r--net/ipv6/route.c22
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/mac80211/iface.c1
-rw-r--r--net/mac80211/mlme.c6
-rw-r--r--net/mac80211/util.c2
-rw-r--r--net/netfilter/core.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/nf_log.c4
-rw-r--r--net/netfilter/nf_tproxy_core.c27
-rw-r--r--net/netfilter/xt_TPROXY.c22
-rw-r--r--net/netfilter/xt_socket.c13
-rw-r--r--net/netlink/af_netlink.c18
-rw-r--r--net/rds/ib.c9
-rw-r--r--net/rds/ib.h2
-rw-r--r--net/rds/ib_rdma.c27
-rw-r--r--net/rds/ib_send.c5
-rw-r--r--net/rds/loop.c11
-rw-r--r--net/rxrpc/ar-input.c1
-rw-r--r--net/rxrpc/ar-key.c27
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sctp/sm_make_chunk.c10
-rw-r--r--net/sunrpc/sched.c77
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c1
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/unix/af_unix.c19
-rw-r--r--net/unix/garbage.c2
-rw-r--r--net/wireless/wext-compat.c4
-rw-r--r--net/xfrm/xfrm_policy.c7
-rw-r--r--scripts/basic/fixdep.c21
-rwxr-xr-xscripts/checkpatch.pl5
-rw-r--r--scripts/kconfig/streamline_config.pl2
-rw-r--r--scripts/mod/sumversion.c19
-rw-r--r--scripts/recordmcount.c3
-rwxr-xr-xscripts/recordmcount.pl1
-rw-r--r--scripts/rt-tester/rt-tester.py2
-rw-r--r--scripts/rt-tester/t2-l1-2rt-sameprio.tst5
-rw-r--r--scripts/rt-tester/t2-l1-pi.tst5
-rw-r--r--scripts/rt-tester/t2-l1-signal.tst5
-rw-r--r--scripts/rt-tester/t2-l2-2rt-deadlock.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-1rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-2rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-3rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-signal.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-steal.tst5
-rw-r--r--scripts/rt-tester/t3-l2-pi.tst5
-rw-r--r--scripts/rt-tester/t4-l2-pi-deboost.tst5
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst5
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost.tst5
-rw-r--r--scripts/selinux/genheaders/genheaders.c20
-rw-r--r--security/apparmor/Makefile38
-rw-r--r--security/apparmor/lsm.c2
-rw-r--r--security/capability.c15
-rw-r--r--security/commoncap.c2
-rw-r--r--security/integrity/ima/ima.h3
-rw-r--r--security/integrity/ima/ima_api.c13
-rw-r--r--security/integrity/ima/ima_iint.c5
-rw-r--r--security/integrity/ima/ima_main.c136
-rw-r--r--security/keys/compat.c50
-rw-r--r--security/keys/encrypted.c3
-rw-r--r--security/keys/internal.h8
-rw-r--r--security/keys/key.c27
-rw-r--r--security/keys/keyctl.c143
-rw-r--r--security/keys/keyring.c4
-rw-r--r--security/keys/request_key.c2
-rw-r--r--security/keys/trusted.c3
-rw-r--r--security/keys/user_defined.c3
-rw-r--r--security/security.c21
-rw-r--r--security/selinux/hooks.c350
-rw-r--r--security/selinux/include/classmap.h7
-rw-r--r--security/selinux/include/security.h8
-rw-r--r--security/selinux/ss/avtab.h23
-rw-r--r--security/selinux/ss/ebitmap.h1
-rw-r--r--security/selinux/ss/mls.c5
-rw-r--r--security/selinux/ss/mls.h3
-rw-r--r--security/selinux/ss/policydb.c130
-rw-r--r--security/selinux/ss/policydb.h14
-rw-r--r--security/selinux/ss/services.c73
-rw-r--r--security/selinux/xfrm.c2
-rw-r--r--security/smack/smack.h17
-rw-r--r--security/smack/smack_access.c52
-rw-r--r--security/smack/smack_lsm.c287
-rw-r--r--security/smack/smackfs.c370
-rw-r--r--security/tomoyo/file.c5
-rw-r--r--sound/core/jack.c1
-rw-r--r--sound/pci/au88x0/au88x0_core.c14
-rw-r--r--sound/pci/hda/hda_intel.c1
-rw-r--r--sound/pci/hda/patch_cirrus.c2
-rw-r--r--sound/pci/hda/patch_conexant.c68
-rw-r--r--sound/pci/hda/patch_hdmi.c5
-rw-r--r--sound/pci/hda/patch_realtek.c9
-rw-r--r--sound/pci/hda/patch_sigmatel.c15
-rw-r--r--sound/pci/hda/patch_via.c2
-rw-r--r--sound/soc/codecs/cx20442.c2
-rw-r--r--sound/soc/codecs/wm8903.c2
-rw-r--r--sound/soc/codecs/wm8903.h2
-rw-r--r--sound/soc/codecs/wm8978.c14
-rw-r--r--sound/soc/codecs/wm8994.c249
-rw-r--r--sound/soc/codecs/wm9081.c5
-rw-r--r--sound/soc/codecs/wm_hubs.c3
-rw-r--r--sound/soc/imx/eukrea-tlv320.c2
-rw-r--r--sound/soc/omap/am3517evm.c2
-rw-r--r--sound/soc/pxa/e740_wm9705.c4
-rw-r--r--sound/soc/pxa/e750_wm9705.c4
-rw-r--r--sound/soc/pxa/e800_wm9712.c4
-rw-r--r--sound/soc/pxa/em-x270.c4
-rw-r--r--sound/soc/pxa/mioa701_wm9713.c4
-rw-r--r--sound/soc/pxa/palm27x.c4
-rw-r--r--sound/soc/pxa/tosa.c4
-rw-r--r--sound/soc/pxa/zylonite.c4
-rw-r--r--sound/soc/soc-dapm.c25
-rw-r--r--sound/usb/caiaq/audio.c2
-rw-r--r--sound/usb/caiaq/midi.c2
-rw-r--r--sound/usb/card.c4
-rw-r--r--sound/usb/pcm.c7
-rw-r--r--sound/usb/usbaudio.h1
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Documentation/Makefile19
-rw-r--r--tools/perf/Documentation/perf-list.txt23
-rw-r--r--tools/perf/Documentation/perf-lock.txt12
-rw-r--r--tools/perf/Documentation/perf-probe.txt26
-rw-r--r--tools/perf/Documentation/perf-record.txt11
-rw-r--r--tools/perf/Documentation/perf-stat.txt11
-rw-r--r--tools/perf/Makefile649
-rw-r--r--tools/perf/bench/sched-pipe.c2
-rw-r--r--tools/perf/builtin-annotate.c351
-rw-r--r--tools/perf/builtin-diff.c16
-rw-r--r--tools/perf/builtin-inject.c82
-rw-r--r--tools/perf/builtin-kmem.c10
-rw-r--r--tools/perf/builtin-list.c43
-rw-r--r--tools/perf/builtin-lock.c8
-rw-r--r--tools/perf/builtin-probe.c70
-rw-r--r--tools/perf/builtin-record.c450
-rw-r--r--tools/perf/builtin-report.c225
-rw-r--r--tools/perf/builtin-sched.c27
-rw-r--r--tools/perf/builtin-script.c17
-rw-r--r--tools/perf/builtin-stat.c118
-rw-r--r--tools/perf/builtin-test.c184
-rw-r--r--tools/perf/builtin-timechart.c19
-rw-r--r--tools/perf/builtin-top.c1029
-rw-r--r--tools/perf/perf.h26
-rwxr-xr-xtools/perf/python/twatch.py41
-rw-r--r--tools/perf/util/annotate.c605
-rw-r--r--tools/perf/util/annotate.h103
-rw-r--r--tools/perf/util/build-id.c21
-rw-r--r--tools/perf/util/cache.h7
-rw-r--r--tools/perf/util/callchain.c227
-rw-r--r--tools/perf/util/callchain.h76
-rw-r--r--tools/perf/util/cgroup.c178
-rw-r--r--tools/perf/util/cgroup.h17
-rw-r--r--tools/perf/util/cpumap.c5
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/debug.h2
-rw-r--r--tools/perf/util/event.c374
-rw-r--r--tools/perf/util/event.h78
-rw-r--r--tools/perf/util/evlist.c394
-rw-r--r--tools/perf/util/evlist.h68
-rw-r--r--tools/perf/util/evsel.c234
-rw-r--r--tools/perf/util/evsel.h47
-rw-r--r--tools/perf/util/exec_cmd.c19
-rw-r--r--tools/perf/util/header.c546
-rw-r--r--tools/perf/util/header.h95
-rw-r--r--tools/perf/util/hist.c257
-rw-r--r--tools/perf/util/hist.h60
-rw-r--r--tools/perf/util/include/linux/list.h1
-rw-r--r--tools/perf/util/parse-events.c175
-rw-r--r--tools/perf/util/parse-events.h12
-rw-r--r--tools/perf/util/probe-event.c159
-rw-r--r--tools/perf/util/probe-event.h4
-rw-r--r--tools/perf/util/probe-finder.c533
-rw-r--r--tools/perf/util/python.c896
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c3
-rw-r--r--tools/perf/util/session.c266
-rw-r--r--tools/perf/util/session.h41
-rw-r--r--tools/perf/util/setup.py19
-rw-r--r--tools/perf/util/strfilter.c199
-rw-r--r--tools/perf/util/strfilter.h48
-rw-r--r--tools/perf/util/svghelper.c6
-rw-r--r--tools/perf/util/symbol.c7
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/thread.c55
-rw-r--r--tools/perf/util/thread.h14
-rw-r--r--tools/perf/util/thread_map.c64
-rw-r--r--tools/perf/util/thread_map.h15
-rw-r--r--tools/perf/util/top.c238
-rw-r--r--tools/perf/util/top.h66
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/perf/util/ui/browser.c25
-rw-r--r--tools/perf/util/ui/browser.h3
-rw-r--r--tools/perf/util/ui/browsers/annotate.c178
-rw-r--r--tools/perf/util/ui/browsers/hists.c197
-rw-r--r--tools/perf/util/ui/browsers/map.c2
-rw-r--r--tools/perf/util/ui/browsers/top.c213
-rw-r--r--tools/perf/util/ui/helpline.c5
-rw-r--r--tools/perf/util/ui/libslang.h6
-rw-r--r--tools/perf/util/ui/setup.c8
-rw-r--r--tools/perf/util/ui/ui.h8
-rw-r--r--tools/perf/util/ui/util.c7
-rw-r--r--tools/perf/util/util.h26
-rwxr-xr-xtools/testing/ktest/ktest.pl2
1452 files changed, 36362 insertions, 20929 deletions
diff --git a/.gitignore b/.gitignore
index 8faa6c02b39e..5d56a3fd0de6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@ modules.builtin
28*.gz 28*.gz
29*.bz2 29*.bz2
30*.lzma 30*.lzma
31*.xz
31*.lzo 32*.lzo
32*.patch 33*.patch
33*.gcno 34*.gcno
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 7628cd1bc36a..8ffbc25376a0 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -29,9 +29,8 @@ Description:
29 "disabled" to it. 29 "disabled" to it.
30 30
31 For the devices that are not capable of generating system wakeup 31 For the devices that are not capable of generating system wakeup
32 events this file contains "\n". In that cases the user space 32 events this file is not present. In that case the device cannot
33 cannot modify the contents of this file and the device cannot be 33 be enabled to wake up the system from sleep states.
34 enabled to wake up the system.
35 34
36What: /sys/devices/.../power/control 35What: /sys/devices/.../power/control
37Date: January 2009 36Date: January 2009
@@ -85,7 +84,7 @@ Description:
85 The /sys/devices/.../wakeup_count attribute contains the number 84 The /sys/devices/.../wakeup_count attribute contains the number
86 of signaled wakeup events associated with the device. This 85 of signaled wakeup events associated with the device. This
87 attribute is read-only. If the device is not enabled to wake up 86 attribute is read-only. If the device is not enabled to wake up
88 the system from sleep states, this attribute is empty. 87 the system from sleep states, this attribute is not present.
89 88
90What: /sys/devices/.../power/wakeup_active_count 89What: /sys/devices/.../power/wakeup_active_count
91Date: September 2010 90Date: September 2010
@@ -95,7 +94,7 @@ Description:
95 number of times the processing of wakeup events associated with 94 number of times the processing of wakeup events associated with
96 the device was completed (at the kernel level). This attribute 95 the device was completed (at the kernel level). This attribute
97 is read-only. If the device is not enabled to wake up the 96 is read-only. If the device is not enabled to wake up the
98 system from sleep states, this attribute is empty. 97 system from sleep states, this attribute is not present.
99 98
100What: /sys/devices/.../power/wakeup_hit_count 99What: /sys/devices/.../power/wakeup_hit_count
101Date: September 2010 100Date: September 2010
@@ -105,7 +104,8 @@ Description:
105 number of times the processing of a wakeup event associated with 104 number of times the processing of a wakeup event associated with
106 the device might prevent the system from entering a sleep state. 105 the device might prevent the system from entering a sleep state.
107 This attribute is read-only. If the device is not enabled to 106 This attribute is read-only. If the device is not enabled to
108 wake up the system from sleep states, this attribute is empty. 107 wake up the system from sleep states, this attribute is not
108 present.
109 109
110What: /sys/devices/.../power/wakeup_active 110What: /sys/devices/.../power/wakeup_active
111Date: September 2010 111Date: September 2010
@@ -115,7 +115,7 @@ Description:
115 or 0, depending on whether or not a wakeup event associated with 115 or 0, depending on whether or not a wakeup event associated with
116 the device is being processed (1). This attribute is read-only. 116 the device is being processed (1). This attribute is read-only.
117 If the device is not enabled to wake up the system from sleep 117 If the device is not enabled to wake up the system from sleep
118 states, this attribute is empty. 118 states, this attribute is not present.
119 119
120What: /sys/devices/.../power/wakeup_total_time_ms 120What: /sys/devices/.../power/wakeup_total_time_ms
121Date: September 2010 121Date: September 2010
@@ -125,7 +125,7 @@ Description:
125 the total time of processing wakeup events associated with the 125 the total time of processing wakeup events associated with the
126 device, in milliseconds. This attribute is read-only. If the 126 device, in milliseconds. This attribute is read-only. If the
127 device is not enabled to wake up the system from sleep states, 127 device is not enabled to wake up the system from sleep states,
128 this attribute is empty. 128 this attribute is not present.
129 129
130What: /sys/devices/.../power/wakeup_max_time_ms 130What: /sys/devices/.../power/wakeup_max_time_ms
131Date: September 2010 131Date: September 2010
@@ -135,7 +135,7 @@ Description:
135 the maximum time of processing a single wakeup event associated 135 the maximum time of processing a single wakeup event associated
136 with the device, in milliseconds. This attribute is read-only. 136 with the device, in milliseconds. This attribute is read-only.
137 If the device is not enabled to wake up the system from sleep 137 If the device is not enabled to wake up the system from sleep
138 states, this attribute is empty. 138 states, this attribute is not present.
139 139
140What: /sys/devices/.../power/wakeup_last_time_ms 140What: /sys/devices/.../power/wakeup_last_time_ms
141Date: September 2010 141Date: September 2010
@@ -146,7 +146,7 @@ Description:
146 signaling the last wakeup event associated with the device, in 146 signaling the last wakeup event associated with the device, in
147 milliseconds. This attribute is read-only. If the device is 147 milliseconds. This attribute is read-only. If the device is
148 not enabled to wake up the system from sleep states, this 148 not enabled to wake up the system from sleep states, this
149 attribute is empty. 149 attribute is not present.
150 150
151What: /sys/devices/.../power/autosuspend_delay_ms 151What: /sys/devices/.../power/autosuspend_delay_ms
152Date: September 2010 152Date: September 2010
diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl
index 5e87ad58c0b5..f51f28531b8d 100644
--- a/Documentation/DocBook/filesystems.tmpl
+++ b/Documentation/DocBook/filesystems.tmpl
@@ -82,6 +82,11 @@
82 </sect1> 82 </sect1>
83 </chapter> 83 </chapter>
84 84
85 <chapter id="fs_events">
86 <title>Events based on file descriptors</title>
87!Efs/eventfd.c
88 </chapter>
89
85 <chapter id="sysfs"> 90 <chapter id="sysfs">
86 <title>The Filesystem for Exporting Kernel Objects</title> 91 <title>The Filesystem for Exporting Kernel Objects</title>
87!Efs/sysfs/file.c 92!Efs/sysfs/file.c
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index cfaac34c4557..6ef692667e2f 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -849,6 +849,37 @@ All: lockdep-checked RCU-protected pointer access
849See the comment headers in the source code (or the docbook generated 849See the comment headers in the source code (or the docbook generated
850from them) for more information. 850from them) for more information.
851 851
852However, given that there are no fewer than four families of RCU APIs
853in the Linux kernel, how do you choose which one to use? The following
854list can be helpful:
855
856a. Will readers need to block? If so, you need SRCU.
857
858b. What about the -rt patchset? If readers would need to block
859 in an non-rt kernel, you need SRCU. If readers would block
860 in a -rt kernel, but not in a non-rt kernel, SRCU is not
861 necessary.
862
863c. Do you need to treat NMI handlers, hardirq handlers,
864 and code segments with preemption disabled (whether
865 via preempt_disable(), local_irq_save(), local_bh_disable(),
866 or some other mechanism) as if they were explicit RCU readers?
867 If so, you need RCU-sched.
868
869d. Do you need RCU grace periods to complete even in the face
870 of softirq monopolization of one or more of the CPUs? For
871 example, is your code subject to network-based denial-of-service
872 attacks? If so, you need RCU-bh.
873
874e. Is your workload too update-intensive for normal use of
875 RCU, but inappropriate for other synchronization mechanisms?
876 If so, consider SLAB_DESTROY_BY_RCU. But please be careful!
877
878f. Otherwise, use RCU.
879
880Of course, this all assumes that you have determined that RCU is in fact
881the right tool for your job.
882
852 883
8538. ANSWERS TO QUICK QUIZZES 8848. ANSWERS TO QUICK QUIZZES
854 885
diff --git a/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt b/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt
new file mode 100644
index 000000000000..569b16248514
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt
@@ -0,0 +1,93 @@
1CE4100 I2C
2----------
3
4CE4100 has one PCI device which is described as the I2C-Controller. This
5PCI device has three PCI-bars, each bar contains a complete I2C
6controller. So we have a total of three independent I2C-Controllers
7which share only an interrupt line.
8The driver is probed via the PCI-ID and is gathering the information of
9attached devices from the devices tree.
10Grant Likely recommended to use the ranges property to map the PCI-Bar
11number to its physical address and to use this to find the child nodes
12of the specific I2C controller. This were his exact words:
13
14 Here's where the magic happens. Each entry in
15 ranges describes how the parent pci address space
16 (middle group of 3) is translated to the local
17 address space (first group of 2) and the size of
18 each range (last cell). In this particular case,
19 the first cell of the local address is chosen to be
20 1:1 mapped to the BARs, and the second is the
21 offset from be base of the BAR (which would be
22 non-zero if you had 2 or more devices mapped off
23 the same BAR)
24
25 ranges allows the address mapping to be described
26 in a way that the OS can interpret without
27 requiring custom device driver code.
28
29This is an example which is used on FalconFalls:
30------------------------------------------------
31 i2c-controller@b,2 {
32 #address-cells = <2>;
33 #size-cells = <1>;
34 compatible = "pci8086,2e68.2",
35 "pci8086,2e68",
36 "pciclass,ff0000",
37 "pciclass,ff00";
38
39 reg = <0x15a00 0x0 0x0 0x0 0x0>;
40 interrupts = <16 1>;
41
42 /* as described by Grant, the first number in the group of
43 * three is the bar number followed by the 64bit bar address
44 * followed by size of the mapping. The bar address
45 * requires also a valid translation in parents ranges
46 * property.
47 */
48 ranges = <0 0 0x02000000 0 0xdffe0500 0x100
49 1 0 0x02000000 0 0xdffe0600 0x100
50 2 0 0x02000000 0 0xdffe0700 0x100>;
51
52 i2c@0 {
53 #address-cells = <1>;
54 #size-cells = <0>;
55 compatible = "intel,ce4100-i2c-controller";
56
57 /* The first number in the reg property is the
58 * number of the bar
59 */
60 reg = <0 0 0x100>;
61
62 /* This I2C controller has no devices */
63 };
64
65 i2c@1 {
66 #address-cells = <1>;
67 #size-cells = <0>;
68 compatible = "intel,ce4100-i2c-controller";
69 reg = <1 0 0x100>;
70
71 /* This I2C controller has one gpio controller */
72 gpio@26 {
73 #gpio-cells = <2>;
74 compatible = "ti,pcf8575";
75 reg = <0x26>;
76 gpio-controller;
77 };
78 };
79
80 i2c@2 {
81 #address-cells = <1>;
82 #size-cells = <0>;
83 compatible = "intel,ce4100-i2c-controller";
84 reg = <2 0 0x100>;
85
86 gpio@26 {
87 #gpio-cells = <2>;
88 compatible = "ti,pcf8575";
89 reg = <0x26>;
90 gpio-controller;
91 };
92 };
93 };
diff --git a/Documentation/devicetree/bindings/rtc/rtc-cmos.txt b/Documentation/devicetree/bindings/rtc/rtc-cmos.txt
new file mode 100644
index 000000000000..7382989b3052
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-cmos.txt
@@ -0,0 +1,28 @@
1 Motorola mc146818 compatible RTC
2~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3
4Required properties:
5 - compatible : "motorola,mc146818"
6 - reg : should contain registers location and length.
7
8Optional properties:
9 - interrupts : should contain interrupt.
10 - interrupt-parent : interrupt source phandle.
11 - ctrl-reg : Contains the initial value of the control register also
12 called "Register B".
13 - freq-reg : Contains the initial value of the frequency register also
14 called "Regsiter A".
15
16"Register A" and "B" are usually initialized by the firmware (BIOS for
17instance). If this is not done, it can be performed by the driver.
18
19ISA Example:
20
21 rtc@70 {
22 compatible = "motorola,mc146818";
23 interrupts = <8 3>;
24 interrupt-parent = <&ioapic1>;
25 ctrl-reg = <2>;
26 freq-reg = <0x26>;
27 reg = <1 0x70 2>;
28 };
diff --git a/Documentation/devicetree/bindings/x86/ce4100.txt b/Documentation/devicetree/bindings/x86/ce4100.txt
new file mode 100644
index 000000000000..b49ae593a60b
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/ce4100.txt
@@ -0,0 +1,38 @@
1CE4100 Device Tree Bindings
2---------------------------
3
4The CE4100 SoC uses for in core peripherals the following compatible
5format: <vendor>,<chip>-<device>.
6Many of the "generic" devices like HPET or IO APIC have the ce4100
7name in their compatible property because they first appeared in this
8SoC.
9
10The CPU node
11------------
12 cpu@0 {
13 device_type = "cpu";
14 compatible = "intel,ce4100";
15 reg = <0>;
16 lapic = <&lapic0>;
17 };
18
19The reg property describes the CPU number. The lapic property points to
20the local APIC timer.
21
22The SoC node
23------------
24
25This node describes the in-core peripherals. Required property:
26 compatible = "intel,ce4100-cp";
27
28The PCI node
29------------
30This node describes the PCI bus on the SoC. Its property should be
31 compatible = "intel,ce4100-pci", "pci";
32
33If the OS is using the IO-APIC for interrupt routing then the reported
34interrupt numbers for devices is no longer true. In order to obtain the
35correct interrupt number, the child node which represents the device has
36to contain the interrupt property. Besides the interrupt property it has
37to contain at least the reg property containing the PCI bus address and
38compatible property according to "PCI Bus Binding Revision 2.1".
diff --git a/Documentation/devicetree/bindings/x86/interrupt.txt b/Documentation/devicetree/bindings/x86/interrupt.txt
new file mode 100644
index 000000000000..7d19f494f19a
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/interrupt.txt
@@ -0,0 +1,26 @@
1Interrupt chips
2---------------
3
4* Intel I/O Advanced Programmable Interrupt Controller (IO APIC)
5
6 Required properties:
7 --------------------
8 compatible = "intel,ce4100-ioapic";
9 #interrupt-cells = <2>;
10
11 Device's interrupt property:
12
13 interrupts = <P S>;
14
15 The first number (P) represents the interrupt pin which is wired to the
16 IO APIC. The second number (S) represents the sense of interrupt which
17 should be configured and can be one of:
18 0 - Edge Rising
19 1 - Level Low
20 2 - Level High
21 3 - Edge Falling
22
23* Local APIC
24 Required property:
25
26 compatible = "intel,ce4100-lapic";
diff --git a/Documentation/devicetree/bindings/x86/timer.txt b/Documentation/devicetree/bindings/x86/timer.txt
new file mode 100644
index 000000000000..c688af58e3bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/timer.txt
@@ -0,0 +1,6 @@
1Timers
2------
3
4* High Precision Event Timer (HPET)
5 Required property:
6 compatible = "intel,ce4100-hpet";
diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt
index 28b1c9d3d351..55fd2623445b 100644
--- a/Documentation/devicetree/booting-without-of.txt
+++ b/Documentation/devicetree/booting-without-of.txt
@@ -13,6 +13,7 @@ Table of Contents
13 13
14 I - Introduction 14 I - Introduction
15 1) Entry point for arch/powerpc 15 1) Entry point for arch/powerpc
16 2) Entry point for arch/x86
16 17
17 II - The DT block format 18 II - The DT block format
18 1) Header 19 1) Header
@@ -225,6 +226,25 @@ it with special cases.
225 cannot support both configurations with Book E and configurations 226 cannot support both configurations with Book E and configurations
226 with classic Powerpc architectures. 227 with classic Powerpc architectures.
227 228
2292) Entry point for arch/x86
230-------------------------------
231
232 There is one single 32bit entry point to the kernel at code32_start,
233 the decompressor (the real mode entry point goes to the same 32bit
234 entry point once it switched into protected mode). That entry point
235 supports one calling convention which is documented in
236 Documentation/x86/boot.txt
237 The physical pointer to the device-tree block (defined in chapter II)
238 is passed via setup_data which requires at least boot protocol 2.09.
239 The type filed is defined as
240
241 #define SETUP_DTB 2
242
243 This device-tree is used as an extension to the "boot page". As such it
244 does not parse / consider data which is already covered by the boot
245 page. This includes memory size, reserved ranges, command line arguments
246 or initrd address. It simply holds information which can not be retrieved
247 otherwise like interrupt routing or a list of devices behind an I2C bus.
228 248
229II - The DT block format 249II - The DT block format
230======================== 250========================
diff --git a/Documentation/hwmon/jc42 b/Documentation/hwmon/jc42
index 0e76ef12e4c6..a22ecf48f255 100644
--- a/Documentation/hwmon/jc42
+++ b/Documentation/hwmon/jc42
@@ -51,7 +51,8 @@ Supported chips:
51 * JEDEC JC 42.4 compliant temperature sensor chips 51 * JEDEC JC 42.4 compliant temperature sensor chips
52 Prefix: 'jc42' 52 Prefix: 'jc42'
53 Addresses scanned: I2C 0x18 - 0x1f 53 Addresses scanned: I2C 0x18 - 0x1f
54 Datasheet: - 54 Datasheet:
55 http://www.jedec.org/sites/default/files/docs/4_01_04R19.pdf
55 56
56Author: 57Author:
57 Guenter Roeck <guenter.roeck@ericsson.com> 58 Guenter Roeck <guenter.roeck@ericsson.com>
@@ -60,7 +61,11 @@ Author:
60Description 61Description
61----------- 62-----------
62 63
63This driver implements support for JEDEC JC 42.4 compliant temperature sensors. 64This driver implements support for JEDEC JC 42.4 compliant temperature sensors,
65which are used on many DDR3 memory modules for mobile devices and servers. Some
66systems use the sensor to prevent memory overheating by automatically throttling
67the memory controller.
68
64The driver auto-detects the chips listed above, but can be manually instantiated 69The driver auto-detects the chips listed above, but can be manually instantiated
65to support other JC 42.4 compliant chips. 70to support other JC 42.4 compliant chips.
66 71
@@ -81,15 +86,19 @@ limits. The chip supports only a single register to configure the hysteresis,
81which applies to all limits. This register can be written by writing into 86which applies to all limits. This register can be written by writing into
82temp1_crit_hyst. Other hysteresis attributes are read-only. 87temp1_crit_hyst. Other hysteresis attributes are read-only.
83 88
89If the BIOS has configured the sensor for automatic temperature management, it
90is likely that it has locked the registers, i.e., that the temperature limits
91cannot be changed.
92
84Sysfs entries 93Sysfs entries
85------------- 94-------------
86 95
87temp1_input Temperature (RO) 96temp1_input Temperature (RO)
88temp1_min Minimum temperature (RW) 97temp1_min Minimum temperature (RO or RW)
89temp1_max Maximum temperature (RW) 98temp1_max Maximum temperature (RO or RW)
90temp1_crit Critical high temperature (RW) 99temp1_crit Critical high temperature (RO or RW)
91 100
92temp1_crit_hyst Critical hysteresis temperature (RW) 101temp1_crit_hyst Critical hysteresis temperature (RO or RW)
93temp1_max_hyst Maximum hysteresis temperature (RO) 102temp1_max_hyst Maximum hysteresis temperature (RO)
94 103
95temp1_min_alarm Temperature low alarm 104temp1_min_alarm Temperature low alarm
diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp
index 6526eee525a6..d2b56a4fd1f5 100644
--- a/Documentation/hwmon/k10temp
+++ b/Documentation/hwmon/k10temp
@@ -9,6 +9,8 @@ Supported chips:
9 Socket S1G3: Athlon II, Sempron, Turion II 9 Socket S1G3: Athlon II, Sempron, Turion II
10* AMD Family 11h processors: 10* AMD Family 11h processors:
11 Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra) 11 Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra)
12* AMD Family 12h processors: "Llano"
13* AMD Family 14h processors: "Brazos" (C/E/G-Series)
12 14
13 Prefix: 'k10temp' 15 Prefix: 'k10temp'
14 Addresses scanned: PCI space 16 Addresses scanned: PCI space
@@ -17,10 +19,14 @@ Supported chips:
17 http://support.amd.com/us/Processor_TechDocs/31116.pdf 19 http://support.amd.com/us/Processor_TechDocs/31116.pdf
18 BIOS and Kernel Developer's Guide (BKDG) for AMD Family 11h Processors: 20 BIOS and Kernel Developer's Guide (BKDG) for AMD Family 11h Processors:
19 http://support.amd.com/us/Processor_TechDocs/41256.pdf 21 http://support.amd.com/us/Processor_TechDocs/41256.pdf
22 BIOS and Kernel Developer's Guide (BKDG) for AMD Family 14h Models 00h-0Fh Processors:
23 http://support.amd.com/us/Processor_TechDocs/43170.pdf
20 Revision Guide for AMD Family 10h Processors: 24 Revision Guide for AMD Family 10h Processors:
21 http://support.amd.com/us/Processor_TechDocs/41322.pdf 25 http://support.amd.com/us/Processor_TechDocs/41322.pdf
22 Revision Guide for AMD Family 11h Processors: 26 Revision Guide for AMD Family 11h Processors:
23 http://support.amd.com/us/Processor_TechDocs/41788.pdf 27 http://support.amd.com/us/Processor_TechDocs/41788.pdf
28 Revision Guide for AMD Family 14h Models 00h-0Fh Processors:
29 http://support.amd.com/us/Processor_TechDocs/47534.pdf
24 AMD Family 11h Processor Power and Thermal Data Sheet for Notebooks: 30 AMD Family 11h Processor Power and Thermal Data Sheet for Notebooks:
25 http://support.amd.com/us/Processor_TechDocs/43373.pdf 31 http://support.amd.com/us/Processor_TechDocs/43373.pdf
26 AMD Family 10h Server and Workstation Processor Power and Thermal Data Sheet: 32 AMD Family 10h Server and Workstation Processor Power and Thermal Data Sheet:
@@ -34,7 +40,7 @@ Description
34----------- 40-----------
35 41
36This driver permits reading of the internal temperature sensor of AMD 42This driver permits reading of the internal temperature sensor of AMD
37Family 10h and 11h processors. 43Family 10h/11h/12h/14h processors.
38 44
39All these processors have a sensor, but on those for Socket F or AM2+, 45All these processors have a sensor, but on those for Socket F or AM2+,
40the sensor may return inconsistent values (erratum 319). The driver 46the sensor may return inconsistent values (erratum 319). The driver
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 89835a4766a6..738c6fda3fb0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -144,6 +144,11 @@ a fixed number of characters. This limit depends on the architecture
144and is between 256 and 4096 characters. It is defined in the file 144and is between 256 and 4096 characters. It is defined in the file
145./include/asm/setup.h as COMMAND_LINE_SIZE. 145./include/asm/setup.h as COMMAND_LINE_SIZE.
146 146
147Finally, the [KMG] suffix is commonly described after a number of kernel
148parameter values. These 'K', 'M', and 'G' letters represent the _binary_
149multipliers 'Kilo', 'Mega', and 'Giga', equalling 2^10, 2^20, and 2^30
150bytes respectively. Such letter suffixes can also be entirely omitted.
151
147 152
148 acpi= [HW,ACPI,X86] 153 acpi= [HW,ACPI,X86]
149 Advanced Configuration and Power Interface 154 Advanced Configuration and Power Interface
@@ -545,16 +550,20 @@ and is between 256 and 4096 characters. It is defined in the file
545 Format: 550 Format:
546 <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>] 551 <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
547 552
548 crashkernel=nn[KMG]@ss[KMG] 553 crashkernel=size[KMG][@offset[KMG]]
549 [KNL] Reserve a chunk of physical memory to 554 [KNL] Using kexec, Linux can switch to a 'crash kernel'
550 hold a kernel to switch to with kexec on panic. 555 upon panic. This parameter reserves the physical
556 memory region [offset, offset + size] for that kernel
557 image. If '@offset' is omitted, then a suitable offset
558 is selected automatically. Check
559 Documentation/kdump/kdump.txt for further details.
551 560
552 crashkernel=range1:size1[,range2:size2,...][@offset] 561 crashkernel=range1:size1[,range2:size2,...][@offset]
553 [KNL] Same as above, but depends on the memory 562 [KNL] Same as above, but depends on the memory
554 in the running system. The syntax of range is 563 in the running system. The syntax of range is
555 start-[end] where start and end are both 564 start-[end] where start and end are both
556 a memory unit (amount[KMG]). See also 565 a memory unit (amount[KMG]). See also
557 Documentation/kdump/kdump.txt for a example. 566 Documentation/kdump/kdump.txt for an example.
558 567
559 cs89x0_dma= [HW,NET] 568 cs89x0_dma= [HW,NET]
560 Format: <dma> 569 Format: <dma>
@@ -1262,10 +1271,9 @@ and is between 256 and 4096 characters. It is defined in the file
1262 6 (KERN_INFO) informational 1271 6 (KERN_INFO) informational
1263 7 (KERN_DEBUG) debug-level messages 1272 7 (KERN_DEBUG) debug-level messages
1264 1273
1265 log_buf_len=n Sets the size of the printk ring buffer, in bytes. 1274 log_buf_len=n[KMG] Sets the size of the printk ring buffer,
1266 Format: { n | nk | nM } 1275 in bytes. n must be a power of two. The default
1267 n must be a power of two. The default size 1276 size is set in the kernel config file.
1268 is set in the kernel config file.
1269 1277
1270 logo.nologo [FB] Disables display of the built-in Linux logo. 1278 logo.nologo [FB] Disables display of the built-in Linux logo.
1271 This may be used to provide more screen space for 1279 This may be used to provide more screen space for
@@ -2436,6 +2444,10 @@ and is between 256 and 4096 characters. It is defined in the file
2436 <deci-seconds>: poll all this frequency 2444 <deci-seconds>: poll all this frequency
2437 0: no polling (default) 2445 0: no polling (default)
2438 2446
2447 threadirqs [KNL]
2448 Force threading of all interrupt handlers except those
2449 marked explicitely IRQF_NO_THREAD.
2450
2439 topology= [S390] 2451 topology= [S390]
2440 Format: {off | on} 2452 Format: {off | on}
2441 Specify if the kernel should make use of the cpu 2453 Specify if the kernel should make use of the cpu
diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt
index 09b55e461740..69686ad12c66 100644
--- a/Documentation/keys-request-key.txt
+++ b/Documentation/keys-request-key.txt
@@ -127,14 +127,15 @@ This is because process A's keyrings can't simply be attached to
127of them, and (b) it requires the same UID/GID/Groups all the way through. 127of them, and (b) it requires the same UID/GID/Groups all the way through.
128 128
129 129
130====================== 130====================================
131NEGATIVE INSTANTIATION 131NEGATIVE INSTANTIATION AND REJECTION
132====================== 132====================================
133 133
134Rather than instantiating a key, it is possible for the possessor of an 134Rather than instantiating a key, it is possible for the possessor of an
135authorisation key to negatively instantiate a key that's under construction. 135authorisation key to negatively instantiate a key that's under construction.
136This is a short duration placeholder that causes any attempt at re-requesting 136This is a short duration placeholder that causes any attempt at re-requesting
137the key whilst it exists to fail with error ENOKEY. 137the key whilst it exists to fail with error ENOKEY if negated or the specified
138error if rejected.
138 139
139This is provided to prevent excessive repeated spawning of /sbin/request-key 140This is provided to prevent excessive repeated spawning of /sbin/request-key
140processes for a key that will never be obtainable. 141processes for a key that will never be obtainable.
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index e4dbbdb1bd96..6523a9e6f293 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -637,6 +637,9 @@ The keyctl syscall functions are:
637 long keyctl(KEYCTL_INSTANTIATE, key_serial_t key, 637 long keyctl(KEYCTL_INSTANTIATE, key_serial_t key,
638 const void *payload, size_t plen, 638 const void *payload, size_t plen,
639 key_serial_t keyring); 639 key_serial_t keyring);
640 long keyctl(KEYCTL_INSTANTIATE_IOV, key_serial_t key,
641 const struct iovec *payload_iov, unsigned ioc,
642 key_serial_t keyring);
640 643
641 If the kernel calls back to userspace to complete the instantiation of a 644 If the kernel calls back to userspace to complete the instantiation of a
642 key, userspace should use this call to supply data for the key before the 645 key, userspace should use this call to supply data for the key before the
@@ -652,11 +655,16 @@ The keyctl syscall functions are:
652 655
653 The payload and plen arguments describe the payload data as for add_key(). 656 The payload and plen arguments describe the payload data as for add_key().
654 657
658 The payload_iov and ioc arguments describe the payload data in an iovec
659 array instead of a single buffer.
660
655 661
656 (*) Negatively instantiate a partially constructed key. 662 (*) Negatively instantiate a partially constructed key.
657 663
658 long keyctl(KEYCTL_NEGATE, key_serial_t key, 664 long keyctl(KEYCTL_NEGATE, key_serial_t key,
659 unsigned timeout, key_serial_t keyring); 665 unsigned timeout, key_serial_t keyring);
666 long keyctl(KEYCTL_REJECT, key_serial_t key,
667 unsigned timeout, unsigned error, key_serial_t keyring);
660 668
661 If the kernel calls back to userspace to complete the instantiation of a 669 If the kernel calls back to userspace to complete the instantiation of a
662 key, userspace should use this call mark the key as negative before the 670 key, userspace should use this call mark the key as negative before the
@@ -669,6 +677,10 @@ The keyctl syscall functions are:
669 that keyring, however all the constraints applying in KEYCTL_LINK apply in 677 that keyring, however all the constraints applying in KEYCTL_LINK apply in
670 this case too. 678 this case too.
671 679
680 If the key is rejected, future searches for it will return the specified
681 error code until the rejected key expires. Negating the key is the same
682 as rejecting the key with ENOKEY as the error code.
683
672 684
673 (*) Set the default request-key destination keyring. 685 (*) Set the default request-key destination keyring.
674 686
@@ -1062,6 +1074,13 @@ The structure has a number of fields, some of which are mandatory:
1062 viable. 1074 viable.
1063 1075
1064 1076
1077 (*) int (*vet_description)(const char *description);
1078
1079 This optional method is called to vet a key description. If the key type
1080 doesn't approve of the key description, it may return an error, otherwise
1081 it should return 0.
1082
1083
1065 (*) int (*instantiate)(struct key *key, const void *data, size_t datalen); 1084 (*) int (*instantiate)(struct key *key, const void *data, size_t datalen);
1066 1085
1067 This method is called to attach a payload to a key during construction. 1086 This method is called to attach a payload to a key during construction.
@@ -1231,10 +1250,11 @@ hand the request off to (perhaps a path held in placed in another key by, for
1231example, the KDE desktop manager). 1250example, the KDE desktop manager).
1232 1251
1233The program (or whatever it calls) should finish construction of the key by 1252The program (or whatever it calls) should finish construction of the key by
1234calling KEYCTL_INSTANTIATE, which also permits it to cache the key in one of 1253calling KEYCTL_INSTANTIATE or KEYCTL_INSTANTIATE_IOV, which also permits it to
1235the keyrings (probably the session ring) before returning. Alternatively, the 1254cache the key in one of the keyrings (probably the session ring) before
1236key can be marked as negative with KEYCTL_NEGATE; this also permits the key to 1255returning. Alternatively, the key can be marked as negative with KEYCTL_NEGATE
1237be cached in one of the keyrings. 1256or KEYCTL_REJECT; this also permits the key to be cached in one of the
1257keyrings.
1238 1258
1239If it returns with the key remaining in the unconstructed state, the key will 1259If it returns with the key remaining in the unconstructed state, the key will
1240be marked as being negative, it will be added to the session keyring, and an 1260be marked as being negative, it will be added to the session keyring, and an
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 631ad2f1b229..f0d3a8026a56 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -21,6 +21,7 @@ Contents:
21 - SMP barrier pairing. 21 - SMP barrier pairing.
22 - Examples of memory barrier sequences. 22 - Examples of memory barrier sequences.
23 - Read memory barriers vs load speculation. 23 - Read memory barriers vs load speculation.
24 - Transitivity
24 25
25 (*) Explicit kernel barriers. 26 (*) Explicit kernel barriers.
26 27
@@ -959,6 +960,63 @@ the speculation will be cancelled and the value reloaded:
959 retrieved : : +-------+ 960 retrieved : : +-------+
960 961
961 962
963TRANSITIVITY
964------------
965
966Transitivity is a deeply intuitive notion about ordering that is not
967always provided by real computer systems. The following example
968demonstrates transitivity (also called "cumulativity"):
969
970 CPU 1 CPU 2 CPU 3
971 ======================= ======================= =======================
972 { X = 0, Y = 0 }
973 STORE X=1 LOAD X STORE Y=1
974 <general barrier> <general barrier>
975 LOAD Y LOAD X
976
977Suppose that CPU 2's load from X returns 1 and its load from Y returns 0.
978This indicates that CPU 2's load from X in some sense follows CPU 1's
979store to X and that CPU 2's load from Y in some sense preceded CPU 3's
980store to Y. The question is then "Can CPU 3's load from X return 0?"
981
982Because CPU 2's load from X in some sense came after CPU 1's store, it
983is natural to expect that CPU 3's load from X must therefore return 1.
984This expectation is an example of transitivity: if a load executing on
985CPU A follows a load from the same variable executing on CPU B, then
986CPU A's load must either return the same value that CPU B's load did,
987or must return some later value.
988
989In the Linux kernel, use of general memory barriers guarantees
990transitivity. Therefore, in the above example, if CPU 2's load from X
991returns 1 and its load from Y returns 0, then CPU 3's load from X must
992also return 1.
993
994However, transitivity is -not- guaranteed for read or write barriers.
995For example, suppose that CPU 2's general barrier in the above example
996is changed to a read barrier as shown below:
997
998 CPU 1 CPU 2 CPU 3
999 ======================= ======================= =======================
1000 { X = 0, Y = 0 }
1001 STORE X=1 LOAD X STORE Y=1
1002 <read barrier> <general barrier>
1003 LOAD Y LOAD X
1004
1005This substitution destroys transitivity: in this example, it is perfectly
1006legal for CPU 2's load from X to return 1, its load from Y to return 0,
1007and CPU 3's load from X to return 0.
1008
1009The key point is that although CPU 2's read barrier orders its pair
1010of loads, it does not guarantee to order CPU 1's store. Therefore, if
1011this example runs on a system where CPUs 1 and 2 share a store buffer
1012or a level of cache, CPU 2 might have early access to CPU 1's writes.
1013General barriers are therefore required to ensure that all CPUs agree
1014on the combined order of CPU 1's and CPU 2's accesses.
1015
1016To reiterate, if your code requires transitivity, use general barriers
1017throughout.
1018
1019
962======================== 1020========================
963EXPLICIT KERNEL BARRIERS 1021EXPLICIT KERNEL BARRIERS
964======================== 1022========================
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index fe5c099b8fc8..4edd78dfb362 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -40,8 +40,6 @@ decnet.txt
40 - info on using the DECnet networking layer in Linux. 40 - info on using the DECnet networking layer in Linux.
41depca.txt 41depca.txt
42 - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver 42 - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver
43dgrs.txt
44 - the Digi International RightSwitch SE-X Ethernet driver
45dmfe.txt 43dmfe.txt
46 - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver. 44 - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver.
47e100.txt 45e100.txt
@@ -50,8 +48,6 @@ e1000.txt
50 - info on Intel's E1000 line of gigabit ethernet boards 48 - info on Intel's E1000 line of gigabit ethernet boards
51eql.txt 49eql.txt
52 - serial IP load balancing 50 - serial IP load balancing
53ethertap.txt
54 - the Ethertap user space packet reception and transmission driver
55ewrk3.txt 51ewrk3.txt
56 - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver 52 - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver
57filter.txt 53filter.txt
@@ -104,8 +100,6 @@ tuntap.txt
104 - TUN/TAP device driver, allowing user space Rx/Tx of packets. 100 - TUN/TAP device driver, allowing user space Rx/Tx of packets.
105vortex.txt 101vortex.txt
106 - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards. 102 - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards.
107wavelan.txt
108 - AT&T GIS (nee NCR) WaveLAN card: An Ethernet-like radio transceiver
109x25.txt 103x25.txt
110 - general info on X.25 development. 104 - general info on X.25 development.
111x25-iface.txt 105x25-iface.txt
diff --git a/Documentation/networking/Makefile b/Documentation/networking/Makefile
index 5aba7a33aeeb..24c308dd3fd1 100644
--- a/Documentation/networking/Makefile
+++ b/Documentation/networking/Makefile
@@ -4,6 +4,8 @@ obj- := dummy.o
4# List of programs to build 4# List of programs to build
5hostprogs-y := ifenslave 5hostprogs-y := ifenslave
6 6
7HOSTCFLAGS_ifenslave.o += -I$(objtree)/usr/include
8
7# Tell kbuild to always build the programs 9# Tell kbuild to always build the programs
8always := $(hostprogs-y) 10always := $(hostprogs-y)
9 11
diff --git a/Documentation/networking/dns_resolver.txt b/Documentation/networking/dns_resolver.txt
index aefd1e681804..04ca06325b08 100644
--- a/Documentation/networking/dns_resolver.txt
+++ b/Documentation/networking/dns_resolver.txt
@@ -61,7 +61,6 @@ before the more general line given above as the first match is the one taken.
61 create dns_resolver foo:* * /usr/sbin/dns.foo %k 61 create dns_resolver foo:* * /usr/sbin/dns.foo %k
62 62
63 63
64
65===== 64=====
66USAGE 65USAGE
67===== 66=====
@@ -104,6 +103,14 @@ implemented in the module can be called after doing:
104 returned also. 103 returned also.
105 104
106 105
106===============================
107READING DNS KEYS FROM USERSPACE
108===============================
109
110Keys of dns_resolver type can be read from userspace using keyctl_read() or
111"keyctl read/print/pipe".
112
113
107========= 114=========
108MECHANISM 115MECHANISM
109========= 116=========
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 57080cd74575..f023ba6bba62 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -1,6 +1,6 @@
1Device Power Management 1Device Power Management
2 2
3Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. 3Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
4Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu> 4Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
5 5
6 6
@@ -159,18 +159,18 @@ matter, and the kernel is responsible for keeping track of it. By contrast,
159whether or not a wakeup-capable device should issue wakeup events is a policy 159whether or not a wakeup-capable device should issue wakeup events is a policy
160decision, and it is managed by user space through a sysfs attribute: the 160decision, and it is managed by user space through a sysfs attribute: the
161power/wakeup file. User space can write the strings "enabled" or "disabled" to 161power/wakeup file. User space can write the strings "enabled" or "disabled" to
162set or clear the should_wakeup flag, respectively. Reads from the file will 162set or clear the "should_wakeup" flag, respectively. This file is only present
163return the corresponding string if can_wakeup is true, but if can_wakeup is 163for wakeup-capable devices (i.e. devices whose "can_wakeup" flags are set)
164false then reads will return an empty string, to indicate that the device 164and is created (or removed) by device_set_wakeup_capable(). Reads from the
165doesn't support wakeup events. (But even though the file appears empty, writes 165file will return the corresponding string.
166will still affect the should_wakeup flag.)
167 166
168The device_may_wakeup() routine returns true only if both flags are set. 167The device_may_wakeup() routine returns true only if both flags are set.
169Drivers should check this routine when putting devices in a low-power state 168This information is used by subsystems, like the PCI bus type code, to see
170during a system sleep transition, to see whether or not to enable the devices' 169whether or not to enable the devices' wakeup mechanisms. If device wakeup
171wakeup mechanisms. However for runtime power management, wakeup events should 170mechanisms are enabled or disabled directly by drivers, they also should use
172be enabled whenever the device and driver both support them, regardless of the 171device_may_wakeup() to decide what to do during a system sleep transition.
173should_wakeup flag. 172However for runtime power management, wakeup events should be enabled whenever
173the device and driver both support them, regardless of the should_wakeup flag.
174 174
175 175
176/sys/devices/.../power/control files 176/sys/devices/.../power/control files
@@ -249,23 +249,18 @@ various phases always run after tasks have been frozen and before they are
249unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have 249unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have
250been disabled (except for those marked with the IRQ_WAKEUP flag). 250been disabled (except for those marked with the IRQ_WAKEUP flag).
251 251
252Most phases use bus, type, and class callbacks (that is, methods defined in 252All phases use bus, type, or class callbacks (that is, methods defined in
253dev->bus->pm, dev->type->pm, and dev->class->pm). The prepare and complete 253dev->bus->pm, dev->type->pm, or dev->class->pm). These callbacks are mutually
254phases are exceptions; they use only bus callbacks. When multiple callbacks 254exclusive, so if the device type provides a struct dev_pm_ops object pointed to
255are used in a phase, they are invoked in the order: <class, type, bus> during 255by its pm field (i.e. both dev->type and dev->type->pm are defined), the
256power-down transitions and in the opposite order during power-up transitions. 256callbacks included in that object (i.e. dev->type->pm) will be used. Otherwise,
257For example, during the suspend phase the PM core invokes 257if the class provides a struct dev_pm_ops object pointed to by its pm field
258 258(i.e. both dev->class and dev->class->pm are defined), the PM core will use the
259 dev->class->pm.suspend(dev); 259callbacks from that object (i.e. dev->class->pm). Finally, if the pm fields of
260 dev->type->pm.suspend(dev); 260both the device type and class objects are NULL (or those objects do not exist),
261 dev->bus->pm.suspend(dev); 261the callbacks provided by the bus (that is, the callbacks from dev->bus->pm)
262 262will be used (this allows device types to override callbacks provided by bus
263before moving on to the next device, whereas during the resume phase the core 263types or classes if necessary).
264invokes
265
266 dev->bus->pm.resume(dev);
267 dev->type->pm.resume(dev);
268 dev->class->pm.resume(dev);
269 264
270These callbacks may in turn invoke device- or driver-specific methods stored in 265These callbacks may in turn invoke device- or driver-specific methods stored in
271dev->driver->pm, but they don't have to. 266dev->driver->pm, but they don't have to.
@@ -507,6 +502,49 @@ routines. Nevertheless, different callback pointers are used in case there is a
507situation where it actually matters. 502situation where it actually matters.
508 503
509 504
505Device Power Domains
506--------------------
507Sometimes devices share reference clocks or other power resources. In those
508cases it generally is not possible to put devices into low-power states
509individually. Instead, a set of devices sharing a power resource can be put
510into a low-power state together at the same time by turning off the shared
511power resource. Of course, they also need to be put into the full-power state
512together, by turning the shared power resource on. A set of devices with this
513property is often referred to as a power domain.
514
515Support for power domains is provided through the pwr_domain field of struct
516device. This field is a pointer to an object of type struct dev_power_domain,
517defined in include/linux/pm.h, providing a set of power management callbacks
518analogous to the subsystem-level and device driver callbacks that are executed
519for the given device during all power transitions, in addition to the respective
520subsystem-level callbacks. Specifically, the power domain "suspend" callbacks
521(i.e. ->runtime_suspend(), ->suspend(), ->freeze(), ->poweroff(), etc.) are
522executed after the analogous subsystem-level callbacks, while the power domain
523"resume" callbacks (i.e. ->runtime_resume(), ->resume(), ->thaw(), ->restore,
524etc.) are executed before the analogous subsystem-level callbacks. Error codes
525returned by the "suspend" and "resume" power domain callbacks are ignored.
526
527Power domain ->runtime_idle() callback is executed before the subsystem-level
528->runtime_idle() callback and the result returned by it is not ignored. Namely,
529if it returns error code, the subsystem-level ->runtime_idle() callback will not
530be called and the helper function rpm_idle() executing it will return error
531code. This mechanism is intended to help platforms where saving device state
532is a time consuming operation and should only be carried out if all devices
533in the power domain are idle, before turning off the shared power resource(s).
534Namely, the power domain ->runtime_idle() callback may return error code until
535the pm_runtime_idle() helper (or its asychronous version) has been called for
536all devices in the power domain (it is recommended that the returned error code
537be -EBUSY in those cases), preventing the subsystem-level ->runtime_idle()
538callback from being run prematurely.
539
540The support for device power domains is only relevant to platforms needing to
541use the same subsystem-level (e.g. platform bus type) and device driver power
542management callbacks in many different power domain configurations and wanting
543to avoid incorporating the support for power domains into the subsystem-level
544callbacks. The other platforms need not implement it or take it into account
545in any way.
546
547
510System Devices 548System Devices
511-------------- 549--------------
512System devices (sysdevs) follow a slightly different API, which can be found in 550System devices (sysdevs) follow a slightly different API, which can be found in
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index ffe55ffa540a..654097b130b4 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -1,6 +1,6 @@
1Run-time Power Management Framework for I/O Devices 1Run-time Power Management Framework for I/O Devices
2 2
3(C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. 3(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
4(C) 2010 Alan Stern <stern@rowland.harvard.edu> 4(C) 2010 Alan Stern <stern@rowland.harvard.edu>
5 5
61. Introduction 61. Introduction
@@ -44,11 +44,12 @@ struct dev_pm_ops {
44}; 44};
45 45
46The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks are 46The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks are
47executed by the PM core for either the bus type, or device type (if the bus 47executed by the PM core for either the device type, or the class (if the device
48type's callback is not defined), or device class (if the bus type's and device 48type's struct dev_pm_ops object does not exist), or the bus type (if the
49type's callbacks are not defined) of given device. The bus type, device type 49device type's and class' struct dev_pm_ops objects do not exist) of the given
50and device class callbacks are referred to as subsystem-level callbacks in what 50device (this allows device types to override callbacks provided by bus types or
51follows. 51classes if necessary). The bus type, device type and class callbacks are
52referred to as subsystem-level callbacks in what follows.
52 53
53By default, the callbacks are always invoked in process context with interrupts 54By default, the callbacks are always invoked in process context with interrupts
54enabled. However, subsystems can use the pm_runtime_irq_safe() helper function 55enabled. However, subsystems can use the pm_runtime_irq_safe() helper function
diff --git a/Documentation/power/states.txt b/Documentation/power/states.txt
index 34800cc521bf..4416b28630df 100644
--- a/Documentation/power/states.txt
+++ b/Documentation/power/states.txt
@@ -62,12 +62,12 @@ setup via another operating system for it to use. Despite the
62inconvenience, this method requires minimal work by the kernel, since 62inconvenience, this method requires minimal work by the kernel, since
63the firmware will also handle restoring memory contents on resume. 63the firmware will also handle restoring memory contents on resume.
64 64
65For suspend-to-disk, a mechanism called swsusp called 'swsusp' (Swap 65For suspend-to-disk, a mechanism called 'swsusp' (Swap Suspend) is used
66Suspend) is used to write memory contents to free swap space. 66to write memory contents to free swap space. swsusp has some restrictive
67swsusp has some restrictive requirements, but should work in most 67requirements, but should work in most cases. Some, albeit outdated,
68cases. Some, albeit outdated, documentation can be found in 68documentation can be found in Documentation/power/swsusp.txt.
69Documentation/power/swsusp.txt. Alternatively, userspace can do most 69Alternatively, userspace can do most of the actual suspend to disk work,
70of the actual suspend to disk work, see userland-swsusp.txt. 70see userland-swsusp.txt.
71 71
72Once memory state is written to disk, the system may either enter a 72Once memory state is written to disk, the system may either enter a
73low-power state (like ACPI S4), or it may simply power down. Powering 73low-power state (like ACPI S4), or it may simply power down. Powering
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 9104c1062084..250160469d83 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -178,38 +178,29 @@ RTC class framework, but can't be supported by the older driver.
178 setting the longer alarm time and enabling its IRQ using a single 178 setting the longer alarm time and enabling its IRQ using a single
179 request (using the same model as EFI firmware). 179 request (using the same model as EFI firmware).
180 180
181 * RTC_UIE_ON, RTC_UIE_OFF ... if the RTC offers IRQs, it probably 181 * RTC_UIE_ON, RTC_UIE_OFF ... if the RTC offers IRQs, the RTC framework
182 also offers update IRQs whenever the "seconds" counter changes. 182 will emulate this mechanism.
183 If needed, the RTC framework can emulate this mechanism.
184 183
185 * RTC_PIE_ON, RTC_PIE_OFF, RTC_IRQP_SET, RTC_IRQP_READ ... another 184 * RTC_PIE_ON, RTC_PIE_OFF, RTC_IRQP_SET, RTC_IRQP_READ ... these icotls
186 feature often accessible with an IRQ line is a periodic IRQ, issued 185 are emulated via a kernel hrtimer.
187 at settable frequencies (usually 2^N Hz).
188 186
189In many cases, the RTC alarm can be a system wake event, used to force 187In many cases, the RTC alarm can be a system wake event, used to force
190Linux out of a low power sleep state (or hibernation) back to a fully 188Linux out of a low power sleep state (or hibernation) back to a fully
191operational state. For example, a system could enter a deep power saving 189operational state. For example, a system could enter a deep power saving
192state until it's time to execute some scheduled tasks. 190state until it's time to execute some scheduled tasks.
193 191
194Note that many of these ioctls need not actually be implemented by your 192Note that many of these ioctls are handled by the common rtc-dev interface.
195driver. The common rtc-dev interface handles many of these nicely if your 193Some common examples:
196driver returns ENOIOCTLCMD. Some common examples:
197 194
198 * RTC_RD_TIME, RTC_SET_TIME: the read_time/set_time functions will be 195 * RTC_RD_TIME, RTC_SET_TIME: the read_time/set_time functions will be
199 called with appropriate values. 196 called with appropriate values.
200 197
201 * RTC_ALM_SET, RTC_ALM_READ, RTC_WKALM_SET, RTC_WKALM_RD: the 198 * RTC_ALM_SET, RTC_ALM_READ, RTC_WKALM_SET, RTC_WKALM_RD: gets or sets
202 set_alarm/read_alarm functions will be called. 199 the alarm rtc_timer. May call the set_alarm driver function.
203 200
204 * RTC_IRQP_SET, RTC_IRQP_READ: the irq_set_freq function will be called 201 * RTC_IRQP_SET, RTC_IRQP_READ: These are emulated by the generic code.
205 to set the frequency while the framework will handle the read for you
206 since the frequency is stored in the irq_freq member of the rtc_device
207 structure. Your driver needs to initialize the irq_freq member during
208 init. Make sure you check the requested frequency is in range of your
209 hardware in the irq_set_freq function. If it isn't, return -EINVAL. If
210 you cannot actually change the frequency, do not define irq_set_freq.
211 202
212 * RTC_PIE_ON, RTC_PIE_OFF: the irq_set_state function will be called. 203 * RTC_PIE_ON, RTC_PIE_OFF: These are also emulated by the generic code.
213 204
214If all else fails, check out the rtc-test.c driver! 205If all else fails, check out the rtc-test.c driver!
215 206
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index 178c831b907d..2e3c64b1a6a5 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -86,7 +86,7 @@ to change the variables it has to get an exclusive write lock.
86 86
87The routines look the same as above: 87The routines look the same as above:
88 88
89 rwlock_t xxx_lock = RW_LOCK_UNLOCKED; 89 rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
90 90
91 unsigned long flags; 91 unsigned long flags;
92 92
@@ -196,25 +196,3 @@ appropriate:
196 196
197For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or 197For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
198__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate. 198__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
199
200SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. These interfere
201with lockdep state tracking.
202
203Most of the time, you can simply turn:
204 static spinlock_t xxx_lock = SPIN_LOCK_UNLOCKED;
205into:
206 static DEFINE_SPINLOCK(xxx_lock);
207
208Static structure member variables go from:
209
210 struct foo bar {
211 .lock = SPIN_LOCK_UNLOCKED;
212 };
213
214to:
215
216 struct foo bar {
217 .lock = __SPIN_LOCK_UNLOCKED(bar.lock);
218 };
219
220Declaration of static rw_locks undergo a similar transformation.
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index dc52bd442c92..79fcafc7fd64 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -247,6 +247,13 @@ You need very few things to get the syscalls tracing in an arch.
247- Support the TIF_SYSCALL_TRACEPOINT thread flags. 247- Support the TIF_SYSCALL_TRACEPOINT thread flags.
248- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace 248- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace
249 in the ptrace syscalls tracing path. 249 in the ptrace syscalls tracing path.
250- If the system call table on this arch is more complicated than a simple array
251 of addresses of the system calls, implement an arch_syscall_addr to return
252 the address of a given system call.
253- If the symbol names of the system calls do not match the function names on
254 this arch, define ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and
255 implement arch_syscall_match_sym_name with the appropriate logic to return
256 true if the function name corresponds with the symbol name.
250- Tag this arch as HAVE_SYSCALL_TRACEPOINTS. 257- Tag this arch as HAVE_SYSCALL_TRACEPOINTS.
251 258
252 259
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 557c1edeccaf..1ebc24cf9a55 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -80,11 +80,11 @@ of ftrace. Here is a list of some of the key files:
80 tracers listed here can be configured by 80 tracers listed here can be configured by
81 echoing their name into current_tracer. 81 echoing their name into current_tracer.
82 82
83 tracing_enabled: 83 tracing_on:
84 84
85 This sets or displays whether the current_tracer 85 This sets or displays whether writing to the trace
86 is activated and tracing or not. Echo 0 into this 86 ring buffer is enabled. Echo 0 into this file to disable
87 file to disable the tracer or 1 to enable it. 87 the tracer or 1 to enable it.
88 88
89 trace: 89 trace:
90 90
@@ -202,10 +202,6 @@ Here is the list of current tracers that may be configured.
202 to draw a graph of function calls similar to C code 202 to draw a graph of function calls similar to C code
203 source. 203 source.
204 204
205 "sched_switch"
206
207 Traces the context switches and wakeups between tasks.
208
209 "irqsoff" 205 "irqsoff"
210 206
211 Traces the areas that disable interrupts and saves 207 Traces the areas that disable interrupts and saves
@@ -273,39 +269,6 @@ format, the function name that was traced "path_put" and the
273parent function that called this function "path_walk". The 269parent function that called this function "path_walk". The
274timestamp is the time at which the function was entered. 270timestamp is the time at which the function was entered.
275 271
276The sched_switch tracer also includes tracing of task wakeups
277and context switches.
278
279 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S
280 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S
281 ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R
282 events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R
283 kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R
284 ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R
285
286Wake ups are represented by a "+" and the context switches are
287shown as "==>". The format is:
288
289 Context switches:
290
291 Previous task Next Task
292
293 <pid>:<prio>:<state> ==> <pid>:<prio>:<state>
294
295 Wake ups:
296
297 Current task Task waking up
298
299 <pid>:<prio>:<state> + <pid>:<prio>:<state>
300
301The prio is the internal kernel priority, which is the inverse
302of the priority that is usually displayed by user-space tools.
303Zero represents the highest priority (99). Prio 100 starts the
304"nice" priorities with 100 being equal to nice -20 and 139 being
305nice 19. The prio "140" is reserved for the idle task which is
306the lowest priority thread (pid 0).
307
308
309Latency trace format 272Latency trace format
310-------------------- 273--------------------
311 274
@@ -491,78 +454,10 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
491 latencies, as described in "Latency 454 latencies, as described in "Latency
492 trace format". 455 trace format".
493 456
494sched_switch 457 overwrite - This controls what happens when the trace buffer is
495------------ 458 full. If "1" (default), the oldest events are
496 459 discarded and overwritten. If "0", then the newest
497This tracer simply records schedule switches. Here is an example 460 events are discarded.
498of how to use it.
499
500 # echo sched_switch > current_tracer
501 # echo 1 > tracing_enabled
502 # sleep 1
503 # echo 0 > tracing_enabled
504 # cat trace
505
506# tracer: sched_switch
507#
508# TASK-PID CPU# TIMESTAMP FUNCTION
509# | | | | |
510 bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R
511 bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R
512 sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R
513 bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S
514 bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R
515 sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R
516 bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D
517 bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R
518 <idle>-0 [00] 240.132589: 0:140:R + 4:115:S
519 <idle>-0 [00] 240.132591: 0:140:R ==> 4:115:R
520 ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R
521 <idle>-0 [00] 240.132598: 0:140:R + 4:115:S
522 <idle>-0 [00] 240.132599: 0:140:R ==> 4:115:R
523 ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R
524 sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R
525 [...]
526
527
528As we have discussed previously about this format, the header
529shows the name of the trace and points to the options. The
530"FUNCTION" is a misnomer since here it represents the wake ups
531and context switches.
532
533The sched_switch file only lists the wake ups (represented with
534'+') and context switches ('==>') with the previous task or
535current task first followed by the next task or task waking up.
536The format for both of these is PID:KERNEL-PRIO:TASK-STATE.
537Remember that the KERNEL-PRIO is the inverse of the actual
538priority with zero (0) being the highest priority and the nice
539values starting at 100 (nice -20). Below is a quick chart to map
540the kernel priority to user land priorities.
541
542 Kernel Space User Space
543 ===============================================================
544 0(high) to 98(low) user RT priority 99(high) to 1(low)
545 with SCHED_RR or SCHED_FIFO
546 ---------------------------------------------------------------
547 99 sched_priority is not used in scheduling
548 decisions(it must be specified as 0)
549 ---------------------------------------------------------------
550 100(high) to 139(low) user nice -20(high) to 19(low)
551 ---------------------------------------------------------------
552 140 idle task priority
553 ---------------------------------------------------------------
554
555The task states are:
556
557 R - running : wants to run, may not actually be running
558 S - sleep : process is waiting to be woken up (handles signals)
559 D - disk sleep (uninterruptible sleep) : process must be woken up
560 (ignores signals)
561 T - stopped : process suspended
562 t - traced : process is being traced (with something like gdb)
563 Z - zombie : process waiting to be cleaned up
564 X - unknown
565
566 461
567ftrace_enabled 462ftrace_enabled
568-------------- 463--------------
@@ -607,10 +502,10 @@ an example:
607 # echo irqsoff > current_tracer 502 # echo irqsoff > current_tracer
608 # echo latency-format > trace_options 503 # echo latency-format > trace_options
609 # echo 0 > tracing_max_latency 504 # echo 0 > tracing_max_latency
610 # echo 1 > tracing_enabled 505 # echo 1 > tracing_on
611 # ls -ltr 506 # ls -ltr
612 [...] 507 [...]
613 # echo 0 > tracing_enabled 508 # echo 0 > tracing_on
614 # cat trace 509 # cat trace
615# tracer: irqsoff 510# tracer: irqsoff
616# 511#
@@ -715,10 +610,10 @@ is much like the irqsoff tracer.
715 # echo preemptoff > current_tracer 610 # echo preemptoff > current_tracer
716 # echo latency-format > trace_options 611 # echo latency-format > trace_options
717 # echo 0 > tracing_max_latency 612 # echo 0 > tracing_max_latency
718 # echo 1 > tracing_enabled 613 # echo 1 > tracing_on
719 # ls -ltr 614 # ls -ltr
720 [...] 615 [...]
721 # echo 0 > tracing_enabled 616 # echo 0 > tracing_on
722 # cat trace 617 # cat trace
723# tracer: preemptoff 618# tracer: preemptoff
724# 619#
@@ -863,10 +758,10 @@ tracers.
863 # echo preemptirqsoff > current_tracer 758 # echo preemptirqsoff > current_tracer
864 # echo latency-format > trace_options 759 # echo latency-format > trace_options
865 # echo 0 > tracing_max_latency 760 # echo 0 > tracing_max_latency
866 # echo 1 > tracing_enabled 761 # echo 1 > tracing_on
867 # ls -ltr 762 # ls -ltr
868 [...] 763 [...]
869 # echo 0 > tracing_enabled 764 # echo 0 > tracing_on
870 # cat trace 765 # cat trace
871# tracer: preemptirqsoff 766# tracer: preemptirqsoff
872# 767#
@@ -1026,9 +921,9 @@ Instead of performing an 'ls', we will run 'sleep 1' under
1026 # echo wakeup > current_tracer 921 # echo wakeup > current_tracer
1027 # echo latency-format > trace_options 922 # echo latency-format > trace_options
1028 # echo 0 > tracing_max_latency 923 # echo 0 > tracing_max_latency
1029 # echo 1 > tracing_enabled 924 # echo 1 > tracing_on
1030 # chrt -f 5 sleep 1 925 # chrt -f 5 sleep 1
1031 # echo 0 > tracing_enabled 926 # echo 0 > tracing_on
1032 # cat trace 927 # cat trace
1033# tracer: wakeup 928# tracer: wakeup
1034# 929#
@@ -1140,9 +1035,9 @@ ftrace_enabled is set; otherwise this tracer is a nop.
1140 1035
1141 # sysctl kernel.ftrace_enabled=1 1036 # sysctl kernel.ftrace_enabled=1
1142 # echo function > current_tracer 1037 # echo function > current_tracer
1143 # echo 1 > tracing_enabled 1038 # echo 1 > tracing_on
1144 # usleep 1 1039 # usleep 1
1145 # echo 0 > tracing_enabled 1040 # echo 0 > tracing_on
1146 # cat trace 1041 # cat trace
1147# tracer: function 1042# tracer: function
1148# 1043#
@@ -1180,7 +1075,7 @@ int trace_fd;
1180[...] 1075[...]
1181int main(int argc, char *argv[]) { 1076int main(int argc, char *argv[]) {
1182 [...] 1077 [...]
1183 trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY); 1078 trace_fd = open(tracing_file("tracing_on"), O_WRONLY);
1184 [...] 1079 [...]
1185 if (condition_hit()) { 1080 if (condition_hit()) {
1186 write(trace_fd, "0", 1); 1081 write(trace_fd, "0", 1);
@@ -1631,9 +1526,9 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt:
1631 # echo sys_nanosleep hrtimer_interrupt \ 1526 # echo sys_nanosleep hrtimer_interrupt \
1632 > set_ftrace_filter 1527 > set_ftrace_filter
1633 # echo function > current_tracer 1528 # echo function > current_tracer
1634 # echo 1 > tracing_enabled 1529 # echo 1 > tracing_on
1635 # usleep 1 1530 # usleep 1
1636 # echo 0 > tracing_enabled 1531 # echo 0 > tracing_on
1637 # cat trace 1532 # cat trace
1638# tracer: ftrace 1533# tracer: ftrace
1639# 1534#
@@ -1879,9 +1774,9 @@ different. The trace is live.
1879 # echo function > current_tracer 1774 # echo function > current_tracer
1880 # cat trace_pipe > /tmp/trace.out & 1775 # cat trace_pipe > /tmp/trace.out &
1881[1] 4153 1776[1] 4153
1882 # echo 1 > tracing_enabled 1777 # echo 1 > tracing_on
1883 # usleep 1 1778 # usleep 1
1884 # echo 0 > tracing_enabled 1779 # echo 0 > tracing_on
1885 # cat trace 1780 # cat trace
1886# tracer: function 1781# tracer: function
1887# 1782#
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 5f77d94598dd..6d27ab8d6e9f 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -42,11 +42,25 @@ Synopsis of kprobe_events
42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) 42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
43 NAME=FETCHARG : Set NAME as the argument name of FETCHARG. 43 NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
44 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types 44 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
45 (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported. 45 (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
46 are supported.
46 47
47 (*) only for return probe. 48 (*) only for return probe.
48 (**) this is useful for fetching a field of data structures. 49 (**) this is useful for fetching a field of data structures.
49 50
51Types
52-----
53Several types are supported for fetch-args. Kprobe tracer will access memory
54by given type. Prefix 's' and 'u' means those types are signed and unsigned
55respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
56String type is a special type, which fetches a "null-terminated" string from
57kernel space. This means it will fail and store NULL if the string container
58has been paged out.
59Bitfield is another special type, which takes 3 parameters, bit-width, bit-
60offset, and container-size (usually 32). The syntax is;
61
62 b<bit-width>@<bit-offset>/<container-size>
63
50 64
51Per-Probe Event Filtering 65Per-Probe Event Filtering
52------------------------- 66-------------------------
diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
index 996a27d9b8db..01c513fac40e 100644
--- a/Documentation/workqueue.txt
+++ b/Documentation/workqueue.txt
@@ -190,9 +190,9 @@ resources, scheduled and executed.
190 * Long running CPU intensive workloads which can be better 190 * Long running CPU intensive workloads which can be better
191 managed by the system scheduler. 191 managed by the system scheduler.
192 192
193 WQ_FREEZEABLE 193 WQ_FREEZABLE
194 194
195 A freezeable wq participates in the freeze phase of the system 195 A freezable wq participates in the freeze phase of the system
196 suspend operations. Work items on the wq are drained and no 196 suspend operations. Work items on the wq are drained and no
197 new work item starts execution until thawed. 197 new work item starts execution until thawed.
198 198
diff --git a/MAINTAINERS b/MAINTAINERS
index 5dd6c751e6a6..2a2cddd2f88c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -557,6 +557,13 @@ S: Maintained
557F: drivers/net/appletalk/ 557F: drivers/net/appletalk/
558F: net/appletalk/ 558F: net/appletalk/
559 559
560ARASAN COMPACT FLASH PATA CONTROLLER
561M: Viresh Kumar <viresh.kumar@st.com>
562L: linux-ide@vger.kernel.org
563S: Maintained
564F: include/linux/pata_arasan_cf_data.h
565F: drivers/ata/pata_arasan_cf.c
566
560ARC FRAMEBUFFER DRIVER 567ARC FRAMEBUFFER DRIVER
561M: Jaya Kumar <jayalk@intworks.biz> 568M: Jaya Kumar <jayalk@intworks.biz>
562S: Maintained 569S: Maintained
@@ -885,7 +892,7 @@ S: Supported
885 892
886ARM/QUALCOMM MSM MACHINE SUPPORT 893ARM/QUALCOMM MSM MACHINE SUPPORT
887M: David Brown <davidb@codeaurora.org> 894M: David Brown <davidb@codeaurora.org>
888M: Daniel Walker <dwalker@codeaurora.org> 895M: Daniel Walker <dwalker@fifo99.com>
889M: Bryan Huntsman <bryanh@codeaurora.org> 896M: Bryan Huntsman <bryanh@codeaurora.org>
890L: linux-arm-msm@vger.kernel.org 897L: linux-arm-msm@vger.kernel.org
891F: arch/arm/mach-msm/ 898F: arch/arm/mach-msm/
@@ -1010,6 +1017,15 @@ L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
1010S: Maintained 1017S: Maintained
1011F: arch/arm/mach-s5p*/ 1018F: arch/arm/mach-s5p*/
1012 1019
1020ARM/SAMSUNG MOBILE MACHINE SUPPORT
1021M: Kyungmin Park <kyungmin.park@samsung.com>
1022L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
1023S: Maintained
1024F: arch/arm/mach-s5pv210/mach-aquila.c
1025F: arch/arm/mach-s5pv210/mach-goni.c
1026F: arch/arm/mach-exynos4/mach-universal_c210.c
1027F: arch/arm/mach-exynos4/mach-nuri.c
1028
1013ARM/SAMSUNG S5P SERIES FIMC SUPPORT 1029ARM/SAMSUNG S5P SERIES FIMC SUPPORT
1014M: Kyungmin Park <kyungmin.park@samsung.com> 1030M: Kyungmin Park <kyungmin.park@samsung.com>
1015M: Sylwester Nawrocki <s.nawrocki@samsung.com> 1031M: Sylwester Nawrocki <s.nawrocki@samsung.com>
@@ -1467,6 +1483,7 @@ F: include/net/bluetooth/
1467 1483
1468BONDING DRIVER 1484BONDING DRIVER
1469M: Jay Vosburgh <fubar@us.ibm.com> 1485M: Jay Vosburgh <fubar@us.ibm.com>
1486M: Andy Gospodarek <andy@greyhouse.net>
1470L: netdev@vger.kernel.org 1487L: netdev@vger.kernel.org
1471W: http://sourceforge.net/projects/bonding/ 1488W: http://sourceforge.net/projects/bonding/
1472S: Supported 1489S: Supported
@@ -1692,6 +1709,13 @@ M: Andy Whitcroft <apw@canonical.com>
1692S: Supported 1709S: Supported
1693F: scripts/checkpatch.pl 1710F: scripts/checkpatch.pl
1694 1711
1712CHINESE DOCUMENTATION
1713M: Harry Wei <harryxiyou@gmail.com>
1714L: xiyoulinuxkernelgroup@googlegroups.com
1715L: linux-kernel@zh-kernel.org (moderated for non-subscribers)
1716S: Maintained
1717F: Documentation/zh_CN/
1718
1695CISCO VIC ETHERNET NIC DRIVER 1719CISCO VIC ETHERNET NIC DRIVER
1696M: Vasanthy Kolluri <vkolluri@cisco.com> 1720M: Vasanthy Kolluri <vkolluri@cisco.com>
1697M: Roopa Prabhu <roprabhu@cisco.com> 1721M: Roopa Prabhu <roprabhu@cisco.com>
@@ -2026,7 +2050,7 @@ F: Documentation/scsi/dc395x.txt
2026F: drivers/scsi/dc395x.* 2050F: drivers/scsi/dc395x.*
2027 2051
2028DCCP PROTOCOL 2052DCCP PROTOCOL
2029M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> 2053M: Gerrit Renker <gerrit@erg.abdn.ac.uk>
2030L: dccp@vger.kernel.org 2054L: dccp@vger.kernel.org
2031W: http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp 2055W: http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp
2032S: Maintained 2056S: Maintained
@@ -2873,7 +2897,6 @@ M: Guenter Roeck <guenter.roeck@ericsson.com>
2873L: lm-sensors@lm-sensors.org 2897L: lm-sensors@lm-sensors.org
2874W: http://www.lm-sensors.org/ 2898W: http://www.lm-sensors.org/
2875T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/ 2899T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/
2876T: quilt kernel.org/pub/linux/kernel/people/groeck/linux-staging/
2877T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git 2900T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
2878S: Maintained 2901S: Maintained
2879F: Documentation/hwmon/ 2902F: Documentation/hwmon/
@@ -3513,7 +3536,7 @@ F: drivers/hwmon/jc42.c
3513F: Documentation/hwmon/jc42 3536F: Documentation/hwmon/jc42
3514 3537
3515JFS FILESYSTEM 3538JFS FILESYSTEM
3516M: Dave Kleikamp <shaggy@linux.vnet.ibm.com> 3539M: Dave Kleikamp <shaggy@kernel.org>
3517L: jfs-discussion@lists.sourceforge.net 3540L: jfs-discussion@lists.sourceforge.net
3518W: http://jfs.sourceforge.net/ 3541W: http://jfs.sourceforge.net/
3519T: git git://git.kernel.org/pub/scm/linux/kernel/git/shaggy/jfs-2.6.git 3542T: git git://git.kernel.org/pub/scm/linux/kernel/git/shaggy/jfs-2.6.git
@@ -4276,10 +4299,7 @@ S: Maintained
4276F: net/sched/sch_netem.c 4299F: net/sched/sch_netem.c
4277 4300
4278NETERION 10GbE DRIVERS (s2io/vxge) 4301NETERION 10GbE DRIVERS (s2io/vxge)
4279M: Ramkrishna Vepa <ramkrishna.vepa@exar.com> 4302M: Jon Mason <jdmason@kudzu.us>
4280M: Sivakumar Subramani <sivakumar.subramani@exar.com>
4281M: Sreenivasa Honnur <sreenivasa.honnur@exar.com>
4282M: Jon Mason <jon.mason@exar.com>
4283L: netdev@vger.kernel.org 4303L: netdev@vger.kernel.org
4284W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous 4304W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous
4285W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous 4305W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous
@@ -5165,6 +5185,7 @@ F: drivers/char/random.c
5165 5185
5166RAPIDIO SUBSYSTEM 5186RAPIDIO SUBSYSTEM
5167M: Matt Porter <mporter@kernel.crashing.org> 5187M: Matt Porter <mporter@kernel.crashing.org>
5188M: Alexandre Bounine <alexandre.bounine@idt.com>
5168S: Maintained 5189S: Maintained
5169F: drivers/rapidio/ 5190F: drivers/rapidio/
5170 5191
@@ -5267,7 +5288,7 @@ S: Maintained
5267F: drivers/net/wireless/rtl818x/rtl8180/ 5288F: drivers/net/wireless/rtl818x/rtl8180/
5268 5289
5269RTL8187 WIRELESS DRIVER 5290RTL8187 WIRELESS DRIVER
5270M: Herton Ronaldo Krzesinski <herton@mandriva.com.br> 5291M: Herton Ronaldo Krzesinski <herton@canonical.com>
5271M: Hin-Tak Leung <htl10@users.sourceforge.net> 5292M: Hin-Tak Leung <htl10@users.sourceforge.net>
5272M: Larry Finger <Larry.Finger@lwfinger.net> 5293M: Larry Finger <Larry.Finger@lwfinger.net>
5273L: linux-wireless@vger.kernel.org 5294L: linux-wireless@vger.kernel.org
@@ -6105,7 +6126,7 @@ S: Maintained
6105F: security/tomoyo/ 6126F: security/tomoyo/
6106 6127
6107TOPSTAR LAPTOP EXTRAS DRIVER 6128TOPSTAR LAPTOP EXTRAS DRIVER
6108M: Herton Ronaldo Krzesinski <herton@mandriva.com.br> 6129M: Herton Ronaldo Krzesinski <herton@canonical.com>
6109L: platform-driver-x86@vger.kernel.org 6130L: platform-driver-x86@vger.kernel.org
6110S: Maintained 6131S: Maintained
6111F: drivers/platform/x86/topstar-laptop.c 6132F: drivers/platform/x86/topstar-laptop.c
diff --git a/Makefile b/Makefile
index 5e40aa2acbff..d6592b63c8cb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 38 3SUBLEVEL = 38
4EXTRAVERSION = -rc5 4EXTRAVERSION =
5NAME = Flesh-Eating Bats with Fangs 5NAME = Flesh-Eating Bats with Fangs
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 47f63d480141..cc31bec2e316 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -11,6 +11,7 @@ config ALPHA
11 select HAVE_GENERIC_HARDIRQS 11 select HAVE_GENERIC_HARDIRQS
12 select GENERIC_IRQ_PROBE 12 select GENERIC_IRQ_PROBE
13 select AUTO_IRQ_AFFINITY if SMP 13 select AUTO_IRQ_AFFINITY if SMP
14 select GENERIC_HARDIRQS_NO_DEPRECATED
14 help 15 help
15 The Alpha is a 64-bit general-purpose processor designed and 16 The Alpha is a 64-bit general-purpose processor designed and
16 marketed by the Digital Equipment Corporation of blessed memory, 17 marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h
index 70145cbb21cb..1b71ca70c9f6 100644
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -31,6 +31,8 @@
31#define __O_SYNC 020000000 31#define __O_SYNC 020000000
32#define O_SYNC (__O_SYNC|O_DSYNC) 32#define O_SYNC (__O_SYNC|O_DSYNC)
33 33
34#define O_PATH 040000000
35
34#define F_GETLK 7 36#define F_GETLK 7
35#define F_SETLK 8 37#define F_SETLK 8
36#define F_SETLKW 9 38#define F_SETLKW 9
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index 945de222ab91..e8a761aee088 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -29,7 +29,7 @@
29 : "r" (uaddr), "r"(oparg) \ 29 : "r" (uaddr), "r"(oparg) \
30 : "memory") 30 : "memory")
31 31
32static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 32static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
33{ 33{
34 int op = (encoded_op >> 28) & 7; 34 int op = (encoded_op >> 28) & 7;
35 int cmp = (encoded_op >> 24) & 15; 35 int cmp = (encoded_op >> 24) & 15;
@@ -39,7 +39,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
40 oparg = 1 << oparg; 40 oparg = 1 << oparg;
41 41
42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
43 return -EFAULT; 43 return -EFAULT;
44 44
45 pagefault_disable(); 45 pagefault_disable();
@@ -81,21 +81,23 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
81} 81}
82 82
83static inline int 83static inline int
84futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 84futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
85 u32 oldval, u32 newval)
85{ 86{
86 int prev, cmp; 87 int ret = 0, cmp;
88 u32 prev;
87 89
88 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 90 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
89 return -EFAULT; 91 return -EFAULT;
90 92
91 __asm__ __volatile__ ( 93 __asm__ __volatile__ (
92 __ASM_SMP_MB 94 __ASM_SMP_MB
93 "1: ldl_l %0,0(%2)\n" 95 "1: ldl_l %1,0(%3)\n"
94 " cmpeq %0,%3,%1\n" 96 " cmpeq %1,%4,%2\n"
95 " beq %1,3f\n" 97 " beq %2,3f\n"
96 " mov %4,%1\n" 98 " mov %5,%2\n"
97 "2: stl_c %1,0(%2)\n" 99 "2: stl_c %2,0(%3)\n"
98 " beq %1,4f\n" 100 " beq %2,4f\n"
99 "3: .subsection 2\n" 101 "3: .subsection 2\n"
100 "4: br 1b\n" 102 "4: br 1b\n"
101 " .previous\n" 103 " .previous\n"
@@ -105,11 +107,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
105 " .long 2b-.\n" 107 " .long 2b-.\n"
106 " lda $31,3b-2b(%0)\n" 108 " lda $31,3b-2b(%0)\n"
107 " .previous\n" 109 " .previous\n"
108 : "=&r"(prev), "=&r"(cmp) 110 : "+r"(ret), "=&r"(prev), "=&r"(cmp)
109 : "r"(uaddr), "r"((long)oldval), "r"(newval) 111 : "r"(uaddr), "r"((long)oldval), "r"(newval)
110 : "memory"); 112 : "memory");
111 113
112 return prev; 114 *uval = prev;
115 return ret;
113} 116}
114 117
115#endif /* __KERNEL__ */ 118#endif /* __KERNEL__ */
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 1570c0b54336..a83bbea62c67 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -13,44 +13,13 @@
13#ifdef __KERNEL__ 13#ifdef __KERNEL__
14 14
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/list.h>
17#include <linux/spinlock.h>
18 16
19struct rwsem_waiter;
20
21extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
22extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
23extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
24extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
25
26/*
27 * the semaphore definition
28 */
29struct rw_semaphore {
30 long count;
31#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L 17#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
32#define RWSEM_ACTIVE_BIAS 0x0000000000000001L 18#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
33#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL 19#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
34#define RWSEM_WAITING_BIAS (-0x0000000100000000L) 20#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
35#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 21#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
36#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 22#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
37 spinlock_t wait_lock;
38 struct list_head wait_list;
39};
40
41#define __RWSEM_INITIALIZER(name) \
42 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
43 LIST_HEAD_INIT((name).wait_list) }
44
45#define DECLARE_RWSEM(name) \
46 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
47
48static inline void init_rwsem(struct rw_semaphore *sem)
49{
50 sem->count = RWSEM_UNLOCKED_VALUE;
51 spin_lock_init(&sem->wait_lock);
52 INIT_LIST_HEAD(&sem->wait_list);
53}
54 23
55static inline void __down_read(struct rw_semaphore *sem) 24static inline void __down_read(struct rw_semaphore *sem)
56{ 25{
@@ -250,10 +219,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
250#endif 219#endif
251} 220}
252 221
253static inline int rwsem_is_locked(struct rw_semaphore *sem)
254{
255 return (sem->count != 0);
256}
257
258#endif /* __KERNEL__ */ 222#endif /* __KERNEL__ */
259#endif /* _ALPHA_RWSEM_H */ 223#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 9ab234f48dd8..a19d60082299 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -44,11 +44,16 @@ static char irq_user_affinity[NR_IRQS];
44 44
45int irq_select_affinity(unsigned int irq) 45int irq_select_affinity(unsigned int irq)
46{ 46{
47 struct irq_desc *desc = irq_to_desc[irq]; 47 struct irq_data *data = irq_get_irq_data(irq);
48 struct irq_chip *chip;
48 static int last_cpu; 49 static int last_cpu;
49 int cpu = last_cpu + 1; 50 int cpu = last_cpu + 1;
50 51
51 if (!desc || !get_irq_desc_chip(desc)->set_affinity || irq_user_affinity[irq]) 52 if (!data)
53 return 1;
54 chip = irq_data_get_irq_chip(data);
55
56 if (!chip->irq_set_affinity || irq_user_affinity[irq])
52 return 1; 57 return 1;
53 58
54 while (!cpu_possible(cpu) || 59 while (!cpu_possible(cpu) ||
@@ -56,8 +61,8 @@ int irq_select_affinity(unsigned int irq)
56 cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); 61 cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
57 last_cpu = cpu; 62 last_cpu = cpu;
58 63
59 cpumask_copy(desc->affinity, cpumask_of(cpu)); 64 cpumask_copy(data->affinity, cpumask_of(cpu));
60 get_irq_desc_chip(desc)->set_affinity(irq, cpumask_of(cpu)); 65 chip->irq_set_affinity(data, cpumask_of(cpu), false);
61 return 0; 66 return 0;
62} 67}
63#endif /* CONFIG_SMP */ 68#endif /* CONFIG_SMP */
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 2d0679b60939..411ca11d0a18 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -228,14 +228,9 @@ struct irqaction timer_irqaction = {
228void __init 228void __init
229init_rtc_irq(void) 229init_rtc_irq(void)
230{ 230{
231 struct irq_desc *desc = irq_to_desc(RTC_IRQ); 231 set_irq_chip_and_handler_name(RTC_IRQ, &no_irq_chip,
232 232 handle_simple_irq, "RTC");
233 if (desc) { 233 setup_irq(RTC_IRQ, &timer_irqaction);
234 desc->status |= IRQ_DISABLED;
235 set_irq_chip_and_handler_name(RTC_IRQ, &no_irq_chip,
236 handle_simple_irq, "RTC");
237 setup_irq(RTC_IRQ, &timer_irqaction);
238 }
239} 234}
240 235
241/* Dummy irqactions. */ 236/* Dummy irqactions. */
diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c
index 956ea0ed1694..c7cc9813e45f 100644
--- a/arch/alpha/kernel/irq_i8259.c
+++ b/arch/alpha/kernel/irq_i8259.c
@@ -33,10 +33,10 @@ i8259_update_irq_hw(unsigned int irq, unsigned long mask)
33} 33}
34 34
35inline void 35inline void
36i8259a_enable_irq(unsigned int irq) 36i8259a_enable_irq(struct irq_data *d)
37{ 37{
38 spin_lock(&i8259_irq_lock); 38 spin_lock(&i8259_irq_lock);
39 i8259_update_irq_hw(irq, cached_irq_mask &= ~(1 << irq)); 39 i8259_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << d->irq));
40 spin_unlock(&i8259_irq_lock); 40 spin_unlock(&i8259_irq_lock);
41} 41}
42 42
@@ -47,16 +47,18 @@ __i8259a_disable_irq(unsigned int irq)
47} 47}
48 48
49void 49void
50i8259a_disable_irq(unsigned int irq) 50i8259a_disable_irq(struct irq_data *d)
51{ 51{
52 spin_lock(&i8259_irq_lock); 52 spin_lock(&i8259_irq_lock);
53 __i8259a_disable_irq(irq); 53 __i8259a_disable_irq(d->irq);
54 spin_unlock(&i8259_irq_lock); 54 spin_unlock(&i8259_irq_lock);
55} 55}
56 56
57void 57void
58i8259a_mask_and_ack_irq(unsigned int irq) 58i8259a_mask_and_ack_irq(struct irq_data *d)
59{ 59{
60 unsigned int irq = d->irq;
61
60 spin_lock(&i8259_irq_lock); 62 spin_lock(&i8259_irq_lock);
61 __i8259a_disable_irq(irq); 63 __i8259a_disable_irq(irq);
62 64
@@ -71,9 +73,9 @@ i8259a_mask_and_ack_irq(unsigned int irq)
71 73
72struct irq_chip i8259a_irq_type = { 74struct irq_chip i8259a_irq_type = {
73 .name = "XT-PIC", 75 .name = "XT-PIC",
74 .unmask = i8259a_enable_irq, 76 .irq_unmask = i8259a_enable_irq,
75 .mask = i8259a_disable_irq, 77 .irq_mask = i8259a_disable_irq,
76 .mask_ack = i8259a_mask_and_ack_irq, 78 .irq_mask_ack = i8259a_mask_and_ack_irq,
77}; 79};
78 80
79void __init 81void __init
diff --git a/arch/alpha/kernel/irq_impl.h b/arch/alpha/kernel/irq_impl.h
index b63ccd7386f1..d507a234b05d 100644
--- a/arch/alpha/kernel/irq_impl.h
+++ b/arch/alpha/kernel/irq_impl.h
@@ -31,11 +31,9 @@ extern void init_rtc_irq(void);
31 31
32extern void common_init_isa_dma(void); 32extern void common_init_isa_dma(void);
33 33
34extern void i8259a_enable_irq(unsigned int); 34extern void i8259a_enable_irq(struct irq_data *d);
35extern void i8259a_disable_irq(unsigned int); 35extern void i8259a_disable_irq(struct irq_data *d);
36extern void i8259a_mask_and_ack_irq(unsigned int); 36extern void i8259a_mask_and_ack_irq(struct irq_data *d);
37extern unsigned int i8259a_startup_irq(unsigned int);
38extern void i8259a_end_irq(unsigned int);
39extern struct irq_chip i8259a_irq_type; 37extern struct irq_chip i8259a_irq_type;
40extern void init_i8259a_irqs(void); 38extern void init_i8259a_irqs(void);
41 39
diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c
index 2863458c853e..b30227fa7f5f 100644
--- a/arch/alpha/kernel/irq_pyxis.c
+++ b/arch/alpha/kernel/irq_pyxis.c
@@ -29,21 +29,21 @@ pyxis_update_irq_hw(unsigned long mask)
29} 29}
30 30
31static inline void 31static inline void
32pyxis_enable_irq(unsigned int irq) 32pyxis_enable_irq(struct irq_data *d)
33{ 33{
34 pyxis_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16)); 34 pyxis_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
35} 35}
36 36
37static void 37static void
38pyxis_disable_irq(unsigned int irq) 38pyxis_disable_irq(struct irq_data *d)
39{ 39{
40 pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16))); 40 pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
41} 41}
42 42
43static void 43static void
44pyxis_mask_and_ack_irq(unsigned int irq) 44pyxis_mask_and_ack_irq(struct irq_data *d)
45{ 45{
46 unsigned long bit = 1UL << (irq - 16); 46 unsigned long bit = 1UL << (d->irq - 16);
47 unsigned long mask = cached_irq_mask &= ~bit; 47 unsigned long mask = cached_irq_mask &= ~bit;
48 48
49 /* Disable the interrupt. */ 49 /* Disable the interrupt. */
@@ -58,9 +58,9 @@ pyxis_mask_and_ack_irq(unsigned int irq)
58 58
59static struct irq_chip pyxis_irq_type = { 59static struct irq_chip pyxis_irq_type = {
60 .name = "PYXIS", 60 .name = "PYXIS",
61 .mask_ack = pyxis_mask_and_ack_irq, 61 .irq_mask_ack = pyxis_mask_and_ack_irq,
62 .mask = pyxis_disable_irq, 62 .irq_mask = pyxis_disable_irq,
63 .unmask = pyxis_enable_irq, 63 .irq_unmask = pyxis_enable_irq,
64}; 64};
65 65
66void 66void
@@ -103,7 +103,7 @@ init_pyxis_irqs(unsigned long ignore_mask)
103 if ((ignore_mask >> i) & 1) 103 if ((ignore_mask >> i) & 1)
104 continue; 104 continue;
105 set_irq_chip_and_handler(i, &pyxis_irq_type, handle_level_irq); 105 set_irq_chip_and_handler(i, &pyxis_irq_type, handle_level_irq);
106 irq_to_desc(i)->status |= IRQ_LEVEL; 106 irq_set_status_flags(i, IRQ_LEVEL);
107 } 107 }
108 108
109 setup_irq(16+7, &isa_cascade_irqaction); 109 setup_irq(16+7, &isa_cascade_irqaction);
diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c
index 0e57e828b413..82a47bba41c4 100644
--- a/arch/alpha/kernel/irq_srm.c
+++ b/arch/alpha/kernel/irq_srm.c
@@ -18,27 +18,27 @@
18DEFINE_SPINLOCK(srm_irq_lock); 18DEFINE_SPINLOCK(srm_irq_lock);
19 19
20static inline void 20static inline void
21srm_enable_irq(unsigned int irq) 21srm_enable_irq(struct irq_data *d)
22{ 22{
23 spin_lock(&srm_irq_lock); 23 spin_lock(&srm_irq_lock);
24 cserve_ena(irq - 16); 24 cserve_ena(d->irq - 16);
25 spin_unlock(&srm_irq_lock); 25 spin_unlock(&srm_irq_lock);
26} 26}
27 27
28static void 28static void
29srm_disable_irq(unsigned int irq) 29srm_disable_irq(struct irq_data *d)
30{ 30{
31 spin_lock(&srm_irq_lock); 31 spin_lock(&srm_irq_lock);
32 cserve_dis(irq - 16); 32 cserve_dis(d->irq - 16);
33 spin_unlock(&srm_irq_lock); 33 spin_unlock(&srm_irq_lock);
34} 34}
35 35
36/* Handle interrupts from the SRM, assuming no additional weirdness. */ 36/* Handle interrupts from the SRM, assuming no additional weirdness. */
37static struct irq_chip srm_irq_type = { 37static struct irq_chip srm_irq_type = {
38 .name = "SRM", 38 .name = "SRM",
39 .unmask = srm_enable_irq, 39 .irq_unmask = srm_enable_irq,
40 .mask = srm_disable_irq, 40 .irq_mask = srm_disable_irq,
41 .mask_ack = srm_disable_irq, 41 .irq_mask_ack = srm_disable_irq,
42}; 42};
43 43
44void __init 44void __init
@@ -52,7 +52,7 @@ init_srm_irqs(long max, unsigned long ignore_mask)
52 if (i < 64 && ((ignore_mask >> i) & 1)) 52 if (i < 64 && ((ignore_mask >> i) & 1))
53 continue; 53 continue;
54 set_irq_chip_and_handler(i, &srm_irq_type, handle_level_irq); 54 set_irq_chip_and_handler(i, &srm_irq_type, handle_level_irq);
55 irq_to_desc(i)->status |= IRQ_LEVEL; 55 irq_set_status_flags(i, IRQ_LEVEL);
56 } 56 }
57} 57}
58 58
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index fe698b5045e9..376f22130791 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -230,44 +230,24 @@ linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_st
230 return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0; 230 return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0;
231} 231}
232 232
233static int 233SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
234do_osf_statfs(struct path *path, struct osf_statfs __user *buffer, 234 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
235 unsigned long bufsiz)
236{ 235{
237 struct kstatfs linux_stat; 236 struct kstatfs linux_stat;
238 int error = vfs_statfs(path, &linux_stat); 237 int error = user_statfs(pathname, &linux_stat);
239 if (!error) 238 if (!error)
240 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); 239 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
241 return error; 240 return error;
242} 241}
243 242
244SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
245 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
246{
247 struct path path;
248 int retval;
249
250 retval = user_path(pathname, &path);
251 if (!retval) {
252 retval = do_osf_statfs(&path, buffer, bufsiz);
253 path_put(&path);
254 }
255 return retval;
256}
257
258SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd, 243SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd,
259 struct osf_statfs __user *, buffer, unsigned long, bufsiz) 244 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
260{ 245{
261 struct file *file; 246 struct kstatfs linux_stat;
262 int retval; 247 int error = fd_statfs(fd, &linux_stat);
263 248 if (!error)
264 retval = -EBADF; 249 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
265 file = fget(fd); 250 return error;
266 if (file) {
267 retval = do_osf_statfs(&file->f_path, buffer, bufsiz);
268 fput(file);
269 }
270 return retval;
271} 251}
272 252
273/* 253/*
diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c
index 7bef61768236..88d95e872f55 100644
--- a/arch/alpha/kernel/sys_alcor.c
+++ b/arch/alpha/kernel/sys_alcor.c
@@ -44,31 +44,31 @@ alcor_update_irq_hw(unsigned long mask)
44} 44}
45 45
46static inline void 46static inline void
47alcor_enable_irq(unsigned int irq) 47alcor_enable_irq(struct irq_data *d)
48{ 48{
49 alcor_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16)); 49 alcor_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
50} 50}
51 51
52static void 52static void
53alcor_disable_irq(unsigned int irq) 53alcor_disable_irq(struct irq_data *d)
54{ 54{
55 alcor_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16))); 55 alcor_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
56} 56}
57 57
58static void 58static void
59alcor_mask_and_ack_irq(unsigned int irq) 59alcor_mask_and_ack_irq(struct irq_data *d)
60{ 60{
61 alcor_disable_irq(irq); 61 alcor_disable_irq(d);
62 62
63 /* On ALCOR/XLT, need to dismiss interrupt via GRU. */ 63 /* On ALCOR/XLT, need to dismiss interrupt via GRU. */
64 *(vuip)GRU_INT_CLEAR = 1 << (irq - 16); mb(); 64 *(vuip)GRU_INT_CLEAR = 1 << (d->irq - 16); mb();
65 *(vuip)GRU_INT_CLEAR = 0; mb(); 65 *(vuip)GRU_INT_CLEAR = 0; mb();
66} 66}
67 67
68static void 68static void
69alcor_isa_mask_and_ack_irq(unsigned int irq) 69alcor_isa_mask_and_ack_irq(struct irq_data *d)
70{ 70{
71 i8259a_mask_and_ack_irq(irq); 71 i8259a_mask_and_ack_irq(d);
72 72
73 /* On ALCOR/XLT, need to dismiss interrupt via GRU. */ 73 /* On ALCOR/XLT, need to dismiss interrupt via GRU. */
74 *(vuip)GRU_INT_CLEAR = 0x80000000; mb(); 74 *(vuip)GRU_INT_CLEAR = 0x80000000; mb();
@@ -77,9 +77,9 @@ alcor_isa_mask_and_ack_irq(unsigned int irq)
77 77
78static struct irq_chip alcor_irq_type = { 78static struct irq_chip alcor_irq_type = {
79 .name = "ALCOR", 79 .name = "ALCOR",
80 .unmask = alcor_enable_irq, 80 .irq_unmask = alcor_enable_irq,
81 .mask = alcor_disable_irq, 81 .irq_mask = alcor_disable_irq,
82 .mask_ack = alcor_mask_and_ack_irq, 82 .irq_mask_ack = alcor_mask_and_ack_irq,
83}; 83};
84 84
85static void 85static void
@@ -126,9 +126,9 @@ alcor_init_irq(void)
126 if (i >= 16+20 && i <= 16+30) 126 if (i >= 16+20 && i <= 16+30)
127 continue; 127 continue;
128 set_irq_chip_and_handler(i, &alcor_irq_type, handle_level_irq); 128 set_irq_chip_and_handler(i, &alcor_irq_type, handle_level_irq);
129 irq_to_desc(i)->status |= IRQ_LEVEL; 129 irq_set_status_flags(i, IRQ_LEVEL);
130 } 130 }
131 i8259a_irq_type.ack = alcor_isa_mask_and_ack_irq; 131 i8259a_irq_type.irq_ack = alcor_isa_mask_and_ack_irq;
132 132
133 init_i8259a_irqs(); 133 init_i8259a_irqs();
134 common_init_isa_dma(); 134 common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c
index b0c916493aea..57eb6307bc27 100644
--- a/arch/alpha/kernel/sys_cabriolet.c
+++ b/arch/alpha/kernel/sys_cabriolet.c
@@ -46,22 +46,22 @@ cabriolet_update_irq_hw(unsigned int irq, unsigned long mask)
46} 46}
47 47
48static inline void 48static inline void
49cabriolet_enable_irq(unsigned int irq) 49cabriolet_enable_irq(struct irq_data *d)
50{ 50{
51 cabriolet_update_irq_hw(irq, cached_irq_mask &= ~(1UL << irq)); 51 cabriolet_update_irq_hw(d->irq, cached_irq_mask &= ~(1UL << d->irq));
52} 52}
53 53
54static void 54static void
55cabriolet_disable_irq(unsigned int irq) 55cabriolet_disable_irq(struct irq_data *d)
56{ 56{
57 cabriolet_update_irq_hw(irq, cached_irq_mask |= 1UL << irq); 57 cabriolet_update_irq_hw(d->irq, cached_irq_mask |= 1UL << d->irq);
58} 58}
59 59
60static struct irq_chip cabriolet_irq_type = { 60static struct irq_chip cabriolet_irq_type = {
61 .name = "CABRIOLET", 61 .name = "CABRIOLET",
62 .unmask = cabriolet_enable_irq, 62 .irq_unmask = cabriolet_enable_irq,
63 .mask = cabriolet_disable_irq, 63 .irq_mask = cabriolet_disable_irq,
64 .mask_ack = cabriolet_disable_irq, 64 .irq_mask_ack = cabriolet_disable_irq,
65}; 65};
66 66
67static void 67static void
@@ -107,7 +107,7 @@ common_init_irq(void (*srm_dev_int)(unsigned long v))
107 for (i = 16; i < 35; ++i) { 107 for (i = 16; i < 35; ++i) {
108 set_irq_chip_and_handler(i, &cabriolet_irq_type, 108 set_irq_chip_and_handler(i, &cabriolet_irq_type,
109 handle_level_irq); 109 handle_level_irq);
110 irq_to_desc(i)->status |= IRQ_LEVEL; 110 irq_set_status_flags(i, IRQ_LEVEL);
111 } 111 }
112 } 112 }
113 113
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index edad5f759ccd..481df4ecb651 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -98,37 +98,37 @@ tsunami_update_irq_hw(unsigned long mask)
98} 98}
99 99
100static void 100static void
101dp264_enable_irq(unsigned int irq) 101dp264_enable_irq(struct irq_data *d)
102{ 102{
103 spin_lock(&dp264_irq_lock); 103 spin_lock(&dp264_irq_lock);
104 cached_irq_mask |= 1UL << irq; 104 cached_irq_mask |= 1UL << d->irq;
105 tsunami_update_irq_hw(cached_irq_mask); 105 tsunami_update_irq_hw(cached_irq_mask);
106 spin_unlock(&dp264_irq_lock); 106 spin_unlock(&dp264_irq_lock);
107} 107}
108 108
109static void 109static void
110dp264_disable_irq(unsigned int irq) 110dp264_disable_irq(struct irq_data *d)
111{ 111{
112 spin_lock(&dp264_irq_lock); 112 spin_lock(&dp264_irq_lock);
113 cached_irq_mask &= ~(1UL << irq); 113 cached_irq_mask &= ~(1UL << d->irq);
114 tsunami_update_irq_hw(cached_irq_mask); 114 tsunami_update_irq_hw(cached_irq_mask);
115 spin_unlock(&dp264_irq_lock); 115 spin_unlock(&dp264_irq_lock);
116} 116}
117 117
118static void 118static void
119clipper_enable_irq(unsigned int irq) 119clipper_enable_irq(struct irq_data *d)
120{ 120{
121 spin_lock(&dp264_irq_lock); 121 spin_lock(&dp264_irq_lock);
122 cached_irq_mask |= 1UL << (irq - 16); 122 cached_irq_mask |= 1UL << (d->irq - 16);
123 tsunami_update_irq_hw(cached_irq_mask); 123 tsunami_update_irq_hw(cached_irq_mask);
124 spin_unlock(&dp264_irq_lock); 124 spin_unlock(&dp264_irq_lock);
125} 125}
126 126
127static void 127static void
128clipper_disable_irq(unsigned int irq) 128clipper_disable_irq(struct irq_data *d)
129{ 129{
130 spin_lock(&dp264_irq_lock); 130 spin_lock(&dp264_irq_lock);
131 cached_irq_mask &= ~(1UL << (irq - 16)); 131 cached_irq_mask &= ~(1UL << (d->irq - 16));
132 tsunami_update_irq_hw(cached_irq_mask); 132 tsunami_update_irq_hw(cached_irq_mask);
133 spin_unlock(&dp264_irq_lock); 133 spin_unlock(&dp264_irq_lock);
134} 134}
@@ -149,10 +149,11 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
149} 149}
150 150
151static int 151static int
152dp264_set_affinity(unsigned int irq, const struct cpumask *affinity) 152dp264_set_affinity(struct irq_data *d, const struct cpumask *affinity,
153{ 153 bool force)
154{
154 spin_lock(&dp264_irq_lock); 155 spin_lock(&dp264_irq_lock);
155 cpu_set_irq_affinity(irq, *affinity); 156 cpu_set_irq_affinity(d->irq, *affinity);
156 tsunami_update_irq_hw(cached_irq_mask); 157 tsunami_update_irq_hw(cached_irq_mask);
157 spin_unlock(&dp264_irq_lock); 158 spin_unlock(&dp264_irq_lock);
158 159
@@ -160,10 +161,11 @@ dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
160} 161}
161 162
162static int 163static int
163clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) 164clipper_set_affinity(struct irq_data *d, const struct cpumask *affinity,
164{ 165 bool force)
166{
165 spin_lock(&dp264_irq_lock); 167 spin_lock(&dp264_irq_lock);
166 cpu_set_irq_affinity(irq - 16, *affinity); 168 cpu_set_irq_affinity(d->irq - 16, *affinity);
167 tsunami_update_irq_hw(cached_irq_mask); 169 tsunami_update_irq_hw(cached_irq_mask);
168 spin_unlock(&dp264_irq_lock); 170 spin_unlock(&dp264_irq_lock);
169 171
@@ -171,19 +173,19 @@ clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
171} 173}
172 174
173static struct irq_chip dp264_irq_type = { 175static struct irq_chip dp264_irq_type = {
174 .name = "DP264", 176 .name = "DP264",
175 .unmask = dp264_enable_irq, 177 .irq_unmask = dp264_enable_irq,
176 .mask = dp264_disable_irq, 178 .irq_mask = dp264_disable_irq,
177 .mask_ack = dp264_disable_irq, 179 .irq_mask_ack = dp264_disable_irq,
178 .set_affinity = dp264_set_affinity, 180 .irq_set_affinity = dp264_set_affinity,
179}; 181};
180 182
181static struct irq_chip clipper_irq_type = { 183static struct irq_chip clipper_irq_type = {
182 .name = "CLIPPER", 184 .name = "CLIPPER",
183 .unmask = clipper_enable_irq, 185 .irq_unmask = clipper_enable_irq,
184 .mask = clipper_disable_irq, 186 .irq_mask = clipper_disable_irq,
185 .mask_ack = clipper_disable_irq, 187 .irq_mask_ack = clipper_disable_irq,
186 .set_affinity = clipper_set_affinity, 188 .irq_set_affinity = clipper_set_affinity,
187}; 189};
188 190
189static void 191static void
@@ -268,8 +270,8 @@ init_tsunami_irqs(struct irq_chip * ops, int imin, int imax)
268{ 270{
269 long i; 271 long i;
270 for (i = imin; i <= imax; ++i) { 272 for (i = imin; i <= imax; ++i) {
271 irq_to_desc(i)->status |= IRQ_LEVEL;
272 set_irq_chip_and_handler(i, ops, handle_level_irq); 273 set_irq_chip_and_handler(i, ops, handle_level_irq);
274 irq_set_status_flags(i, IRQ_LEVEL);
273 } 275 }
274} 276}
275 277
diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c
index ae5f29d127b0..402e908ffb3e 100644
--- a/arch/alpha/kernel/sys_eb64p.c
+++ b/arch/alpha/kernel/sys_eb64p.c
@@ -44,22 +44,22 @@ eb64p_update_irq_hw(unsigned int irq, unsigned long mask)
44} 44}
45 45
46static inline void 46static inline void
47eb64p_enable_irq(unsigned int irq) 47eb64p_enable_irq(struct irq_data *d)
48{ 48{
49 eb64p_update_irq_hw(irq, cached_irq_mask &= ~(1 << irq)); 49 eb64p_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << d->irq));
50} 50}
51 51
52static void 52static void
53eb64p_disable_irq(unsigned int irq) 53eb64p_disable_irq(struct irq_data *d)
54{ 54{
55 eb64p_update_irq_hw(irq, cached_irq_mask |= 1 << irq); 55 eb64p_update_irq_hw(d->irq, cached_irq_mask |= 1 << d->irq);
56} 56}
57 57
58static struct irq_chip eb64p_irq_type = { 58static struct irq_chip eb64p_irq_type = {
59 .name = "EB64P", 59 .name = "EB64P",
60 .unmask = eb64p_enable_irq, 60 .irq_unmask = eb64p_enable_irq,
61 .mask = eb64p_disable_irq, 61 .irq_mask = eb64p_disable_irq,
62 .mask_ack = eb64p_disable_irq, 62 .irq_mask_ack = eb64p_disable_irq,
63}; 63};
64 64
65static void 65static void
@@ -118,9 +118,9 @@ eb64p_init_irq(void)
118 init_i8259a_irqs(); 118 init_i8259a_irqs();
119 119
120 for (i = 16; i < 32; ++i) { 120 for (i = 16; i < 32; ++i) {
121 irq_to_desc(i)->status |= IRQ_LEVEL;
122 set_irq_chip_and_handler(i, &eb64p_irq_type, handle_level_irq); 121 set_irq_chip_and_handler(i, &eb64p_irq_type, handle_level_irq);
123 } 122 irq_set_status_flags(i, IRQ_LEVEL);
123 }
124 124
125 common_init_isa_dma(); 125 common_init_isa_dma();
126 setup_irq(16+5, &isa_cascade_irqaction); 126 setup_irq(16+5, &isa_cascade_irqaction);
diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c
index 1121bc5c6c6c..0b44a54c1522 100644
--- a/arch/alpha/kernel/sys_eiger.c
+++ b/arch/alpha/kernel/sys_eiger.c
@@ -51,16 +51,18 @@ eiger_update_irq_hw(unsigned long irq, unsigned long mask)
51} 51}
52 52
53static inline void 53static inline void
54eiger_enable_irq(unsigned int irq) 54eiger_enable_irq(struct irq_data *d)
55{ 55{
56 unsigned int irq = d->irq;
56 unsigned long mask; 57 unsigned long mask;
57 mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63))); 58 mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63)));
58 eiger_update_irq_hw(irq, mask); 59 eiger_update_irq_hw(irq, mask);
59} 60}
60 61
61static void 62static void
62eiger_disable_irq(unsigned int irq) 63eiger_disable_irq(struct irq_data *d)
63{ 64{
65 unsigned int irq = d->irq;
64 unsigned long mask; 66 unsigned long mask;
65 mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63)); 67 mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63));
66 eiger_update_irq_hw(irq, mask); 68 eiger_update_irq_hw(irq, mask);
@@ -68,9 +70,9 @@ eiger_disable_irq(unsigned int irq)
68 70
69static struct irq_chip eiger_irq_type = { 71static struct irq_chip eiger_irq_type = {
70 .name = "EIGER", 72 .name = "EIGER",
71 .unmask = eiger_enable_irq, 73 .irq_unmask = eiger_enable_irq,
72 .mask = eiger_disable_irq, 74 .irq_mask = eiger_disable_irq,
73 .mask_ack = eiger_disable_irq, 75 .irq_mask_ack = eiger_disable_irq,
74}; 76};
75 77
76static void 78static void
@@ -136,8 +138,8 @@ eiger_init_irq(void)
136 init_i8259a_irqs(); 138 init_i8259a_irqs();
137 139
138 for (i = 16; i < 128; ++i) { 140 for (i = 16; i < 128; ++i) {
139 irq_to_desc(i)->status |= IRQ_LEVEL;
140 set_irq_chip_and_handler(i, &eiger_irq_type, handle_level_irq); 141 set_irq_chip_and_handler(i, &eiger_irq_type, handle_level_irq);
142 irq_set_status_flags(i, IRQ_LEVEL);
141 } 143 }
142} 144}
143 145
diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c
index 34f55e03d331..00341b75c8b2 100644
--- a/arch/alpha/kernel/sys_jensen.c
+++ b/arch/alpha/kernel/sys_jensen.c
@@ -63,34 +63,34 @@
63 */ 63 */
64 64
65static void 65static void
66jensen_local_enable(unsigned int irq) 66jensen_local_enable(struct irq_data *d)
67{ 67{
68 /* the parport is really hw IRQ 1, silly Jensen. */ 68 /* the parport is really hw IRQ 1, silly Jensen. */
69 if (irq == 7) 69 if (d->irq == 7)
70 i8259a_enable_irq(1); 70 i8259a_enable_irq(d);
71} 71}
72 72
73static void 73static void
74jensen_local_disable(unsigned int irq) 74jensen_local_disable(struct irq_data *d)
75{ 75{
76 /* the parport is really hw IRQ 1, silly Jensen. */ 76 /* the parport is really hw IRQ 1, silly Jensen. */
77 if (irq == 7) 77 if (d->irq == 7)
78 i8259a_disable_irq(1); 78 i8259a_disable_irq(d);
79} 79}
80 80
81static void 81static void
82jensen_local_mask_ack(unsigned int irq) 82jensen_local_mask_ack(struct irq_data *d)
83{ 83{
84 /* the parport is really hw IRQ 1, silly Jensen. */ 84 /* the parport is really hw IRQ 1, silly Jensen. */
85 if (irq == 7) 85 if (d->irq == 7)
86 i8259a_mask_and_ack_irq(1); 86 i8259a_mask_and_ack_irq(d);
87} 87}
88 88
89static struct irq_chip jensen_local_irq_type = { 89static struct irq_chip jensen_local_irq_type = {
90 .name = "LOCAL", 90 .name = "LOCAL",
91 .unmask = jensen_local_enable, 91 .irq_unmask = jensen_local_enable,
92 .mask = jensen_local_disable, 92 .irq_mask = jensen_local_disable,
93 .mask_ack = jensen_local_mask_ack, 93 .irq_mask_ack = jensen_local_mask_ack,
94}; 94};
95 95
96static void 96static void
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 2bfc9f1b1ddc..e61910734e41 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -104,9 +104,10 @@ io7_get_irq_ctl(unsigned int irq, struct io7 **pio7)
104} 104}
105 105
106static void 106static void
107io7_enable_irq(unsigned int irq) 107io7_enable_irq(struct irq_data *d)
108{ 108{
109 volatile unsigned long *ctl; 109 volatile unsigned long *ctl;
110 unsigned int irq = d->irq;
110 struct io7 *io7; 111 struct io7 *io7;
111 112
112 ctl = io7_get_irq_ctl(irq, &io7); 113 ctl = io7_get_irq_ctl(irq, &io7);
@@ -115,7 +116,7 @@ io7_enable_irq(unsigned int irq)
115 __func__, irq); 116 __func__, irq);
116 return; 117 return;
117 } 118 }
118 119
119 spin_lock(&io7->irq_lock); 120 spin_lock(&io7->irq_lock);
120 *ctl |= 1UL << 24; 121 *ctl |= 1UL << 24;
121 mb(); 122 mb();
@@ -124,9 +125,10 @@ io7_enable_irq(unsigned int irq)
124} 125}
125 126
126static void 127static void
127io7_disable_irq(unsigned int irq) 128io7_disable_irq(struct irq_data *d)
128{ 129{
129 volatile unsigned long *ctl; 130 volatile unsigned long *ctl;
131 unsigned int irq = d->irq;
130 struct io7 *io7; 132 struct io7 *io7;
131 133
132 ctl = io7_get_irq_ctl(irq, &io7); 134 ctl = io7_get_irq_ctl(irq, &io7);
@@ -135,7 +137,7 @@ io7_disable_irq(unsigned int irq)
135 __func__, irq); 137 __func__, irq);
136 return; 138 return;
137 } 139 }
138 140
139 spin_lock(&io7->irq_lock); 141 spin_lock(&io7->irq_lock);
140 *ctl &= ~(1UL << 24); 142 *ctl &= ~(1UL << 24);
141 mb(); 143 mb();
@@ -144,35 +146,29 @@ io7_disable_irq(unsigned int irq)
144} 146}
145 147
146static void 148static void
147marvel_irq_noop(unsigned int irq) 149marvel_irq_noop(struct irq_data *d)
148{ 150{
149 return; 151 return;
150}
151
152static unsigned int
153marvel_irq_noop_return(unsigned int irq)
154{
155 return 0;
156} 152}
157 153
158static struct irq_chip marvel_legacy_irq_type = { 154static struct irq_chip marvel_legacy_irq_type = {
159 .name = "LEGACY", 155 .name = "LEGACY",
160 .mask = marvel_irq_noop, 156 .irq_mask = marvel_irq_noop,
161 .unmask = marvel_irq_noop, 157 .irq_unmask = marvel_irq_noop,
162}; 158};
163 159
164static struct irq_chip io7_lsi_irq_type = { 160static struct irq_chip io7_lsi_irq_type = {
165 .name = "LSI", 161 .name = "LSI",
166 .unmask = io7_enable_irq, 162 .irq_unmask = io7_enable_irq,
167 .mask = io7_disable_irq, 163 .irq_mask = io7_disable_irq,
168 .mask_ack = io7_disable_irq, 164 .irq_mask_ack = io7_disable_irq,
169}; 165};
170 166
171static struct irq_chip io7_msi_irq_type = { 167static struct irq_chip io7_msi_irq_type = {
172 .name = "MSI", 168 .name = "MSI",
173 .unmask = io7_enable_irq, 169 .irq_unmask = io7_enable_irq,
174 .mask = io7_disable_irq, 170 .irq_mask = io7_disable_irq,
175 .ack = marvel_irq_noop, 171 .irq_ack = marvel_irq_noop,
176}; 172};
177 173
178static void 174static void
@@ -280,8 +276,8 @@ init_io7_irqs(struct io7 *io7,
280 276
281 /* Set up the lsi irqs. */ 277 /* Set up the lsi irqs. */
282 for (i = 0; i < 128; ++i) { 278 for (i = 0; i < 128; ++i) {
283 irq_to_desc(base + i)->status |= IRQ_LEVEL;
284 set_irq_chip_and_handler(base + i, lsi_ops, handle_level_irq); 279 set_irq_chip_and_handler(base + i, lsi_ops, handle_level_irq);
280 irq_set_status_flags(i, IRQ_LEVEL);
285 } 281 }
286 282
287 /* Disable the implemented irqs in hardware. */ 283 /* Disable the implemented irqs in hardware. */
@@ -294,8 +290,8 @@ init_io7_irqs(struct io7 *io7,
294 290
295 /* Set up the msi irqs. */ 291 /* Set up the msi irqs. */
296 for (i = 128; i < (128 + 512); ++i) { 292 for (i = 128; i < (128 + 512); ++i) {
297 irq_to_desc(base + i)->status |= IRQ_LEVEL;
298 set_irq_chip_and_handler(base + i, msi_ops, handle_level_irq); 293 set_irq_chip_and_handler(base + i, msi_ops, handle_level_irq);
294 irq_set_status_flags(i, IRQ_LEVEL);
299 } 295 }
300 296
301 for (i = 0; i < 16; ++i) 297 for (i = 0; i < 16; ++i)
diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c
index bcc1639e8efb..cf7f43dd3147 100644
--- a/arch/alpha/kernel/sys_mikasa.c
+++ b/arch/alpha/kernel/sys_mikasa.c
@@ -43,22 +43,22 @@ mikasa_update_irq_hw(int mask)
43} 43}
44 44
45static inline void 45static inline void
46mikasa_enable_irq(unsigned int irq) 46mikasa_enable_irq(struct irq_data *d)
47{ 47{
48 mikasa_update_irq_hw(cached_irq_mask |= 1 << (irq - 16)); 48 mikasa_update_irq_hw(cached_irq_mask |= 1 << (d->irq - 16));
49} 49}
50 50
51static void 51static void
52mikasa_disable_irq(unsigned int irq) 52mikasa_disable_irq(struct irq_data *d)
53{ 53{
54 mikasa_update_irq_hw(cached_irq_mask &= ~(1 << (irq - 16))); 54 mikasa_update_irq_hw(cached_irq_mask &= ~(1 << (d->irq - 16)));
55} 55}
56 56
57static struct irq_chip mikasa_irq_type = { 57static struct irq_chip mikasa_irq_type = {
58 .name = "MIKASA", 58 .name = "MIKASA",
59 .unmask = mikasa_enable_irq, 59 .irq_unmask = mikasa_enable_irq,
60 .mask = mikasa_disable_irq, 60 .irq_mask = mikasa_disable_irq,
61 .mask_ack = mikasa_disable_irq, 61 .irq_mask_ack = mikasa_disable_irq,
62}; 62};
63 63
64static void 64static void
@@ -98,8 +98,8 @@ mikasa_init_irq(void)
98 mikasa_update_irq_hw(0); 98 mikasa_update_irq_hw(0);
99 99
100 for (i = 16; i < 32; ++i) { 100 for (i = 16; i < 32; ++i) {
101 irq_to_desc(i)->status |= IRQ_LEVEL;
102 set_irq_chip_and_handler(i, &mikasa_irq_type, handle_level_irq); 101 set_irq_chip_and_handler(i, &mikasa_irq_type, handle_level_irq);
102 irq_set_status_flags(i, IRQ_LEVEL);
103 } 103 }
104 104
105 init_i8259a_irqs(); 105 init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c
index e88f4ae1260e..92bc188e94a9 100644
--- a/arch/alpha/kernel/sys_noritake.c
+++ b/arch/alpha/kernel/sys_noritake.c
@@ -48,22 +48,22 @@ noritake_update_irq_hw(int irq, int mask)
48} 48}
49 49
50static void 50static void
51noritake_enable_irq(unsigned int irq) 51noritake_enable_irq(struct irq_data *d)
52{ 52{
53 noritake_update_irq_hw(irq, cached_irq_mask |= 1 << (irq - 16)); 53 noritake_update_irq_hw(d->irq, cached_irq_mask |= 1 << (d->irq - 16));
54} 54}
55 55
56static void 56static void
57noritake_disable_irq(unsigned int irq) 57noritake_disable_irq(struct irq_data *d)
58{ 58{
59 noritake_update_irq_hw(irq, cached_irq_mask &= ~(1 << (irq - 16))); 59 noritake_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << (d->irq - 16)));
60} 60}
61 61
62static struct irq_chip noritake_irq_type = { 62static struct irq_chip noritake_irq_type = {
63 .name = "NORITAKE", 63 .name = "NORITAKE",
64 .unmask = noritake_enable_irq, 64 .irq_unmask = noritake_enable_irq,
65 .mask = noritake_disable_irq, 65 .irq_mask = noritake_disable_irq,
66 .mask_ack = noritake_disable_irq, 66 .irq_mask_ack = noritake_disable_irq,
67}; 67};
68 68
69static void 69static void
@@ -127,8 +127,8 @@ noritake_init_irq(void)
127 outw(0, 0x54c); 127 outw(0, 0x54c);
128 128
129 for (i = 16; i < 48; ++i) { 129 for (i = 16; i < 48; ++i) {
130 irq_to_desc(i)->status |= IRQ_LEVEL;
131 set_irq_chip_and_handler(i, &noritake_irq_type, handle_level_irq); 130 set_irq_chip_and_handler(i, &noritake_irq_type, handle_level_irq);
131 irq_set_status_flags(i, IRQ_LEVEL);
132 } 132 }
133 133
134 init_i8259a_irqs(); 134 init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c
index 6a51364dd1cc..936d4140ed5f 100644
--- a/arch/alpha/kernel/sys_rawhide.c
+++ b/arch/alpha/kernel/sys_rawhide.c
@@ -56,9 +56,10 @@ rawhide_update_irq_hw(int hose, int mask)
56 (((h) < MCPCIA_MAX_HOSES) && (cached_irq_masks[(h)] != 0)) 56 (((h) < MCPCIA_MAX_HOSES) && (cached_irq_masks[(h)] != 0))
57 57
58static inline void 58static inline void
59rawhide_enable_irq(unsigned int irq) 59rawhide_enable_irq(struct irq_data *d)
60{ 60{
61 unsigned int mask, hose; 61 unsigned int mask, hose;
62 unsigned int irq = d->irq;
62 63
63 irq -= 16; 64 irq -= 16;
64 hose = irq / 24; 65 hose = irq / 24;
@@ -76,9 +77,10 @@ rawhide_enable_irq(unsigned int irq)
76} 77}
77 78
78static void 79static void
79rawhide_disable_irq(unsigned int irq) 80rawhide_disable_irq(struct irq_data *d)
80{ 81{
81 unsigned int mask, hose; 82 unsigned int mask, hose;
83 unsigned int irq = d->irq;
82 84
83 irq -= 16; 85 irq -= 16;
84 hose = irq / 24; 86 hose = irq / 24;
@@ -96,9 +98,10 @@ rawhide_disable_irq(unsigned int irq)
96} 98}
97 99
98static void 100static void
99rawhide_mask_and_ack_irq(unsigned int irq) 101rawhide_mask_and_ack_irq(struct irq_data *d)
100{ 102{
101 unsigned int mask, mask1, hose; 103 unsigned int mask, mask1, hose;
104 unsigned int irq = d->irq;
102 105
103 irq -= 16; 106 irq -= 16;
104 hose = irq / 24; 107 hose = irq / 24;
@@ -123,9 +126,9 @@ rawhide_mask_and_ack_irq(unsigned int irq)
123 126
124static struct irq_chip rawhide_irq_type = { 127static struct irq_chip rawhide_irq_type = {
125 .name = "RAWHIDE", 128 .name = "RAWHIDE",
126 .unmask = rawhide_enable_irq, 129 .irq_unmask = rawhide_enable_irq,
127 .mask = rawhide_disable_irq, 130 .irq_mask = rawhide_disable_irq,
128 .mask_ack = rawhide_mask_and_ack_irq, 131 .irq_mask_ack = rawhide_mask_and_ack_irq,
129}; 132};
130 133
131static void 134static void
@@ -177,8 +180,8 @@ rawhide_init_irq(void)
177 } 180 }
178 181
179 for (i = 16; i < 128; ++i) { 182 for (i = 16; i < 128; ++i) {
180 irq_to_desc(i)->status |= IRQ_LEVEL;
181 set_irq_chip_and_handler(i, &rawhide_irq_type, handle_level_irq); 183 set_irq_chip_and_handler(i, &rawhide_irq_type, handle_level_irq);
184 irq_set_status_flags(i, IRQ_LEVEL);
182 } 185 }
183 186
184 init_i8259a_irqs(); 187 init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c
index 89e7e37ec84c..cea22a62913b 100644
--- a/arch/alpha/kernel/sys_rx164.c
+++ b/arch/alpha/kernel/sys_rx164.c
@@ -47,22 +47,22 @@ rx164_update_irq_hw(unsigned long mask)
47} 47}
48 48
49static inline void 49static inline void
50rx164_enable_irq(unsigned int irq) 50rx164_enable_irq(struct irq_data *d)
51{ 51{
52 rx164_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16)); 52 rx164_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
53} 53}
54 54
55static void 55static void
56rx164_disable_irq(unsigned int irq) 56rx164_disable_irq(struct irq_data *d)
57{ 57{
58 rx164_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16))); 58 rx164_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
59} 59}
60 60
61static struct irq_chip rx164_irq_type = { 61static struct irq_chip rx164_irq_type = {
62 .name = "RX164", 62 .name = "RX164",
63 .unmask = rx164_enable_irq, 63 .irq_unmask = rx164_enable_irq,
64 .mask = rx164_disable_irq, 64 .irq_mask = rx164_disable_irq,
65 .mask_ack = rx164_disable_irq, 65 .irq_mask_ack = rx164_disable_irq,
66}; 66};
67 67
68static void 68static void
@@ -99,8 +99,8 @@ rx164_init_irq(void)
99 99
100 rx164_update_irq_hw(0); 100 rx164_update_irq_hw(0);
101 for (i = 16; i < 40; ++i) { 101 for (i = 16; i < 40; ++i) {
102 irq_to_desc(i)->status |= IRQ_LEVEL;
103 set_irq_chip_and_handler(i, &rx164_irq_type, handle_level_irq); 102 set_irq_chip_and_handler(i, &rx164_irq_type, handle_level_irq);
103 irq_set_status_flags(i, IRQ_LEVEL);
104 } 104 }
105 105
106 init_i8259a_irqs(); 106 init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index 5c4423d1b06c..a349538aabc9 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -443,11 +443,11 @@ lynx_swizzle(struct pci_dev *dev, u8 *pinp)
443/* GENERIC irq routines */ 443/* GENERIC irq routines */
444 444
445static inline void 445static inline void
446sable_lynx_enable_irq(unsigned int irq) 446sable_lynx_enable_irq(struct irq_data *d)
447{ 447{
448 unsigned long bit, mask; 448 unsigned long bit, mask;
449 449
450 bit = sable_lynx_irq_swizzle->irq_to_mask[irq]; 450 bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
451 spin_lock(&sable_lynx_irq_lock); 451 spin_lock(&sable_lynx_irq_lock);
452 mask = sable_lynx_irq_swizzle->shadow_mask &= ~(1UL << bit); 452 mask = sable_lynx_irq_swizzle->shadow_mask &= ~(1UL << bit);
453 sable_lynx_irq_swizzle->update_irq_hw(bit, mask); 453 sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
@@ -459,11 +459,11 @@ sable_lynx_enable_irq(unsigned int irq)
459} 459}
460 460
461static void 461static void
462sable_lynx_disable_irq(unsigned int irq) 462sable_lynx_disable_irq(struct irq_data *d)
463{ 463{
464 unsigned long bit, mask; 464 unsigned long bit, mask;
465 465
466 bit = sable_lynx_irq_swizzle->irq_to_mask[irq]; 466 bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
467 spin_lock(&sable_lynx_irq_lock); 467 spin_lock(&sable_lynx_irq_lock);
468 mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit; 468 mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit;
469 sable_lynx_irq_swizzle->update_irq_hw(bit, mask); 469 sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
@@ -475,11 +475,11 @@ sable_lynx_disable_irq(unsigned int irq)
475} 475}
476 476
477static void 477static void
478sable_lynx_mask_and_ack_irq(unsigned int irq) 478sable_lynx_mask_and_ack_irq(struct irq_data *d)
479{ 479{
480 unsigned long bit, mask; 480 unsigned long bit, mask;
481 481
482 bit = sable_lynx_irq_swizzle->irq_to_mask[irq]; 482 bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
483 spin_lock(&sable_lynx_irq_lock); 483 spin_lock(&sable_lynx_irq_lock);
484 mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit; 484 mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit;
485 sable_lynx_irq_swizzle->update_irq_hw(bit, mask); 485 sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
@@ -489,9 +489,9 @@ sable_lynx_mask_and_ack_irq(unsigned int irq)
489 489
490static struct irq_chip sable_lynx_irq_type = { 490static struct irq_chip sable_lynx_irq_type = {
491 .name = "SABLE/LYNX", 491 .name = "SABLE/LYNX",
492 .unmask = sable_lynx_enable_irq, 492 .irq_unmask = sable_lynx_enable_irq,
493 .mask = sable_lynx_disable_irq, 493 .irq_mask = sable_lynx_disable_irq,
494 .mask_ack = sable_lynx_mask_and_ack_irq, 494 .irq_mask_ack = sable_lynx_mask_and_ack_irq,
495}; 495};
496 496
497static void 497static void
@@ -518,9 +518,9 @@ sable_lynx_init_irq(int nr_of_irqs)
518 long i; 518 long i;
519 519
520 for (i = 0; i < nr_of_irqs; ++i) { 520 for (i = 0; i < nr_of_irqs; ++i) {
521 irq_to_desc(i)->status |= IRQ_LEVEL;
522 set_irq_chip_and_handler(i, &sable_lynx_irq_type, 521 set_irq_chip_and_handler(i, &sable_lynx_irq_type,
523 handle_level_irq); 522 handle_level_irq);
523 irq_set_status_flags(i, IRQ_LEVEL);
524 } 524 }
525 525
526 common_init_isa_dma(); 526 common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c
index f8a1e8a862fb..42a5331f13c4 100644
--- a/arch/alpha/kernel/sys_takara.c
+++ b/arch/alpha/kernel/sys_takara.c
@@ -45,16 +45,18 @@ takara_update_irq_hw(unsigned long irq, unsigned long mask)
45} 45}
46 46
47static inline void 47static inline void
48takara_enable_irq(unsigned int irq) 48takara_enable_irq(struct irq_data *d)
49{ 49{
50 unsigned int irq = d->irq;
50 unsigned long mask; 51 unsigned long mask;
51 mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63))); 52 mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63)));
52 takara_update_irq_hw(irq, mask); 53 takara_update_irq_hw(irq, mask);
53} 54}
54 55
55static void 56static void
56takara_disable_irq(unsigned int irq) 57takara_disable_irq(struct irq_data *d)
57{ 58{
59 unsigned int irq = d->irq;
58 unsigned long mask; 60 unsigned long mask;
59 mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63)); 61 mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63));
60 takara_update_irq_hw(irq, mask); 62 takara_update_irq_hw(irq, mask);
@@ -62,9 +64,9 @@ takara_disable_irq(unsigned int irq)
62 64
63static struct irq_chip takara_irq_type = { 65static struct irq_chip takara_irq_type = {
64 .name = "TAKARA", 66 .name = "TAKARA",
65 .unmask = takara_enable_irq, 67 .irq_unmask = takara_enable_irq,
66 .mask = takara_disable_irq, 68 .irq_mask = takara_disable_irq,
67 .mask_ack = takara_disable_irq, 69 .irq_mask_ack = takara_disable_irq,
68}; 70};
69 71
70static void 72static void
@@ -136,8 +138,8 @@ takara_init_irq(void)
136 takara_update_irq_hw(i, -1); 138 takara_update_irq_hw(i, -1);
137 139
138 for (i = 16; i < 128; ++i) { 140 for (i = 16; i < 128; ++i) {
139 irq_to_desc(i)->status |= IRQ_LEVEL;
140 set_irq_chip_and_handler(i, &takara_irq_type, handle_level_irq); 141 set_irq_chip_and_handler(i, &takara_irq_type, handle_level_irq);
142 irq_set_status_flags(i, IRQ_LEVEL);
141 } 143 }
142 144
143 common_init_isa_dma(); 145 common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index e02494bf5ef3..8c13a0c77830 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -112,8 +112,9 @@ titan_update_irq_hw(unsigned long mask)
112} 112}
113 113
114static inline void 114static inline void
115titan_enable_irq(unsigned int irq) 115titan_enable_irq(struct irq_data *d)
116{ 116{
117 unsigned int irq = d->irq;
117 spin_lock(&titan_irq_lock); 118 spin_lock(&titan_irq_lock);
118 titan_cached_irq_mask |= 1UL << (irq - 16); 119 titan_cached_irq_mask |= 1UL << (irq - 16);
119 titan_update_irq_hw(titan_cached_irq_mask); 120 titan_update_irq_hw(titan_cached_irq_mask);
@@ -121,8 +122,9 @@ titan_enable_irq(unsigned int irq)
121} 122}
122 123
123static inline void 124static inline void
124titan_disable_irq(unsigned int irq) 125titan_disable_irq(struct irq_data *d)
125{ 126{
127 unsigned int irq = d->irq;
126 spin_lock(&titan_irq_lock); 128 spin_lock(&titan_irq_lock);
127 titan_cached_irq_mask &= ~(1UL << (irq - 16)); 129 titan_cached_irq_mask &= ~(1UL << (irq - 16));
128 titan_update_irq_hw(titan_cached_irq_mask); 130 titan_update_irq_hw(titan_cached_irq_mask);
@@ -144,8 +146,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
144} 146}
145 147
146static int 148static int
147titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) 149titan_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
150 bool force)
148{ 151{
152 unsigned int irq = d->irq;
149 spin_lock(&titan_irq_lock); 153 spin_lock(&titan_irq_lock);
150 titan_cpu_set_irq_affinity(irq - 16, *affinity); 154 titan_cpu_set_irq_affinity(irq - 16, *affinity);
151 titan_update_irq_hw(titan_cached_irq_mask); 155 titan_update_irq_hw(titan_cached_irq_mask);
@@ -175,17 +179,17 @@ init_titan_irqs(struct irq_chip * ops, int imin, int imax)
175{ 179{
176 long i; 180 long i;
177 for (i = imin; i <= imax; ++i) { 181 for (i = imin; i <= imax; ++i) {
178 irq_to_desc(i)->status |= IRQ_LEVEL;
179 set_irq_chip_and_handler(i, ops, handle_level_irq); 182 set_irq_chip_and_handler(i, ops, handle_level_irq);
183 irq_set_status_flags(i, IRQ_LEVEL);
180 } 184 }
181} 185}
182 186
183static struct irq_chip titan_irq_type = { 187static struct irq_chip titan_irq_type = {
184 .name = "TITAN", 188 .name = "TITAN",
185 .unmask = titan_enable_irq, 189 .irq_unmask = titan_enable_irq,
186 .mask = titan_disable_irq, 190 .irq_mask = titan_disable_irq,
187 .mask_ack = titan_disable_irq, 191 .irq_mask_ack = titan_disable_irq,
188 .set_affinity = titan_set_irq_affinity, 192 .irq_set_affinity = titan_set_irq_affinity,
189}; 193};
190 194
191static irqreturn_t 195static irqreturn_t
diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c
index eec52594d410..ca60a387ef0a 100644
--- a/arch/alpha/kernel/sys_wildfire.c
+++ b/arch/alpha/kernel/sys_wildfire.c
@@ -104,10 +104,12 @@ wildfire_init_irq_hw(void)
104} 104}
105 105
106static void 106static void
107wildfire_enable_irq(unsigned int irq) 107wildfire_enable_irq(struct irq_data *d)
108{ 108{
109 unsigned int irq = d->irq;
110
109 if (irq < 16) 111 if (irq < 16)
110 i8259a_enable_irq(irq); 112 i8259a_enable_irq(d);
111 113
112 spin_lock(&wildfire_irq_lock); 114 spin_lock(&wildfire_irq_lock);
113 set_bit(irq, &cached_irq_mask); 115 set_bit(irq, &cached_irq_mask);
@@ -116,10 +118,12 @@ wildfire_enable_irq(unsigned int irq)
116} 118}
117 119
118static void 120static void
119wildfire_disable_irq(unsigned int irq) 121wildfire_disable_irq(struct irq_data *d)
120{ 122{
123 unsigned int irq = d->irq;
124
121 if (irq < 16) 125 if (irq < 16)
122 i8259a_disable_irq(irq); 126 i8259a_disable_irq(d);
123 127
124 spin_lock(&wildfire_irq_lock); 128 spin_lock(&wildfire_irq_lock);
125 clear_bit(irq, &cached_irq_mask); 129 clear_bit(irq, &cached_irq_mask);
@@ -128,10 +132,12 @@ wildfire_disable_irq(unsigned int irq)
128} 132}
129 133
130static void 134static void
131wildfire_mask_and_ack_irq(unsigned int irq) 135wildfire_mask_and_ack_irq(struct irq_data *d)
132{ 136{
137 unsigned int irq = d->irq;
138
133 if (irq < 16) 139 if (irq < 16)
134 i8259a_mask_and_ack_irq(irq); 140 i8259a_mask_and_ack_irq(d);
135 141
136 spin_lock(&wildfire_irq_lock); 142 spin_lock(&wildfire_irq_lock);
137 clear_bit(irq, &cached_irq_mask); 143 clear_bit(irq, &cached_irq_mask);
@@ -141,9 +147,9 @@ wildfire_mask_and_ack_irq(unsigned int irq)
141 147
142static struct irq_chip wildfire_irq_type = { 148static struct irq_chip wildfire_irq_type = {
143 .name = "WILDFIRE", 149 .name = "WILDFIRE",
144 .unmask = wildfire_enable_irq, 150 .irq_unmask = wildfire_enable_irq,
145 .mask = wildfire_disable_irq, 151 .irq_mask = wildfire_disable_irq,
146 .mask_ack = wildfire_mask_and_ack_irq, 152 .irq_mask_ack = wildfire_mask_and_ack_irq,
147}; 153};
148 154
149static void __init 155static void __init
@@ -177,21 +183,21 @@ wildfire_init_irq_per_pca(int qbbno, int pcano)
177 for (i = 0; i < 16; ++i) { 183 for (i = 0; i < 16; ++i) {
178 if (i == 2) 184 if (i == 2)
179 continue; 185 continue;
180 irq_to_desc(i+irq_bias)->status |= IRQ_LEVEL;
181 set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type, 186 set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type,
182 handle_level_irq); 187 handle_level_irq);
188 irq_set_status_flags(i + irq_bias, IRQ_LEVEL);
183 } 189 }
184 190
185 irq_to_desc(36+irq_bias)->status |= IRQ_LEVEL;
186 set_irq_chip_and_handler(36+irq_bias, &wildfire_irq_type, 191 set_irq_chip_and_handler(36+irq_bias, &wildfire_irq_type,
187 handle_level_irq); 192 handle_level_irq);
193 irq_set_status_flags(36 + irq_bias, IRQ_LEVEL);
188 for (i = 40; i < 64; ++i) { 194 for (i = 40; i < 64; ++i) {
189 irq_to_desc(i+irq_bias)->status |= IRQ_LEVEL;
190 set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type, 195 set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type,
191 handle_level_irq); 196 handle_level_irq);
197 irq_set_status_flags(i + irq_bias, IRQ_LEVEL);
192 } 198 }
193 199
194 setup_irq(32+irq_bias, &isa_enable); 200 setup_irq(32+irq_bias, &isa_enable);
195} 201}
196 202
197static void __init 203static void __init
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index c1f3e7cb82a4..a58e84f1a63b 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -159,7 +159,7 @@ void read_persistent_clock(struct timespec *ts)
159 159
160/* 160/*
161 * timer_interrupt() needs to keep up the real-time clock, 161 * timer_interrupt() needs to keep up the real-time clock,
162 * as well as call the "do_timer()" routine every clocktick 162 * as well as call the "xtime_update()" routine every clocktick
163 */ 163 */
164irqreturn_t timer_interrupt(int irq, void *dev) 164irqreturn_t timer_interrupt(int irq, void *dev)
165{ 165{
@@ -172,8 +172,6 @@ irqreturn_t timer_interrupt(int irq, void *dev)
172 profile_tick(CPU_PROFILING); 172 profile_tick(CPU_PROFILING);
173#endif 173#endif
174 174
175 write_seqlock(&xtime_lock);
176
177 /* 175 /*
178 * Calculate how many ticks have passed since the last update, 176 * Calculate how many ticks have passed since the last update,
179 * including any previous partial leftover. Save any resulting 177 * including any previous partial leftover. Save any resulting
@@ -187,9 +185,7 @@ irqreturn_t timer_interrupt(int irq, void *dev)
187 nticks = delta >> FIX_SHIFT; 185 nticks = delta >> FIX_SHIFT;
188 186
189 if (nticks) 187 if (nticks)
190 do_timer(nticks); 188 xtime_update(nticks);
191
192 write_sequnlock(&xtime_lock);
193 189
194 if (test_irq_work_pending()) { 190 if (test_irq_work_pending()) {
195 clear_irq_work_pending(); 191 clear_irq_work_pending();
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 003ef4c02585..433be2a24f31 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -1,5 +1,6 @@
1#include <asm-generic/vmlinux.lds.h> 1#include <asm-generic/vmlinux.lds.h>
2#include <asm/thread_info.h> 2#include <asm/thread_info.h>
3#include <asm/cache.h>
3#include <asm/page.h> 4#include <asm/page.h>
4 5
5OUTPUT_FORMAT("elf64-alpha") 6OUTPUT_FORMAT("elf64-alpha")
@@ -38,7 +39,7 @@ SECTIONS
38 __init_begin = ALIGN(PAGE_SIZE); 39 __init_begin = ALIGN(PAGE_SIZE);
39 INIT_TEXT_SECTION(PAGE_SIZE) 40 INIT_TEXT_SECTION(PAGE_SIZE)
40 INIT_DATA_SECTION(16) 41 INIT_DATA_SECTION(16)
41 PERCPU(PAGE_SIZE) 42 PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
42 /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page 43 /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
43 needed for the THREAD_SIZE aligned init_task gets freed after init */ 44 needed for the THREAD_SIZE aligned init_task gets freed after init */
44 . = ALIGN(THREAD_SIZE); 45 . = ALIGN(THREAD_SIZE);
@@ -46,7 +47,7 @@ SECTIONS
46 /* Freed after init ends here */ 47 /* Freed after init ends here */
47 48
48 _data = .; 49 _data = .;
49 RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) 50 RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
50 51
51 .got : { 52 .got : {
52 *(.got) 53 *(.got)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 26d45e5b636b..166efa2a19cd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1177,6 +1177,31 @@ config ARM_ERRATA_743622
1177 visible impact on the overall performance or power consumption of the 1177 visible impact on the overall performance or power consumption of the
1178 processor. 1178 processor.
1179 1179
1180config ARM_ERRATA_751472
1181 bool "ARM errata: Interrupted ICIALLUIS may prevent completion of broadcasted operation"
1182 depends on CPU_V7 && SMP
1183 help
1184 This option enables the workaround for the 751472 Cortex-A9 (prior
1185 to r3p0) erratum. An interrupted ICIALLUIS operation may prevent the
1186 completion of a following broadcasted operation if the second
1187 operation is received by a CPU before the ICIALLUIS has completed,
1188 potentially leading to corrupted entries in the cache or TLB.
1189
1190config ARM_ERRATA_753970
1191 bool "ARM errata: cache sync operation may be faulty"
1192 depends on CACHE_PL310
1193 help
1194 This option enables the workaround for the 753970 PL310 (r3p0) erratum.
1195
1196 Under some condition the effect of cache sync operation on
1197 the store buffer still remains when the operation completes.
1198 This means that the store buffer is always asked to drain and
1199 this prevents it from merging any further writes. The workaround
1200 is to replace the normal offset of cache sync operation (0x730)
1201 by another offset targeting an unmapped PL310 register 0x740.
1202 This has the same effect as the cache sync operation: store buffer
1203 drain and waiting for all buffers empty.
1204
1180endmenu 1205endmenu
1181 1206
1182source "arch/arm/common/Kconfig" 1207source "arch/arm/common/Kconfig"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index c22c1adfedd6..6f7b29294c80 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -15,7 +15,7 @@ ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
15LDFLAGS_vmlinux += --be8 15LDFLAGS_vmlinux += --be8
16endif 16endif
17 17
18OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S 18OBJCOPYFLAGS :=-O binary -R .comment -S
19GZFLAGS :=-9 19GZFLAGS :=-9
20#KBUILD_CFLAGS +=-pipe 20#KBUILD_CFLAGS +=-pipe
21# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: 21# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb:
diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index ab204db594d3..c6028967d336 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -1,3 +1,7 @@
1font.c 1font.c
2piggy.gz 2lib1funcs.S
3piggy.gzip
4piggy.lzo
5piggy.lzma
6vmlinux
3vmlinux.lds 7vmlinux.lds
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index 778655f0257a..ea5ee4d067f3 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -6,6 +6,8 @@ config ARM_VIC
6 6
7config ARM_VIC_NR 7config ARM_VIC_NR
8 int 8 int
9 default 4 if ARCH_S5PV210
10 default 3 if ARCH_S5P6442 || ARCH_S5PC100
9 default 2 11 default 2
10 depends on ARM_VIC 12 depends on ARM_VIC
11 help 13 help
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index b33fe7065b38..199a6b6de7f4 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -35,7 +35,7 @@
35 : "cc", "memory") 35 : "cc", "memory")
36 36
37static inline int 37static inline int
38futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 38futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
39{ 39{
40 int op = (encoded_op >> 28) & 7; 40 int op = (encoded_op >> 28) & 7;
41 int cmp = (encoded_op >> 24) & 15; 41 int cmp = (encoded_op >> 24) & 15;
@@ -46,7 +46,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
46 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 46 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
47 oparg = 1 << oparg; 47 oparg = 1 << oparg;
48 48
49 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 49 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
50 return -EFAULT; 50 return -EFAULT;
51 51
52 pagefault_disable(); /* implies preempt_disable() */ 52 pagefault_disable(); /* implies preempt_disable() */
@@ -88,36 +88,35 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
88} 88}
89 89
90static inline int 90static inline int
91futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 91futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
92 u32 oldval, u32 newval)
92{ 93{
93 int val; 94 int ret = 0;
95 u32 val;
94 96
95 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 97 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
96 return -EFAULT; 98 return -EFAULT;
97 99
98 pagefault_disable(); /* implies preempt_disable() */
99
100 __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" 100 __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
101 "1: " T(ldr) " %0, [%3]\n" 101 "1: " T(ldr) " %1, [%4]\n"
102 " teq %0, %1\n" 102 " teq %1, %2\n"
103 " it eq @ explicit IT needed for the 2b label\n" 103 " it eq @ explicit IT needed for the 2b label\n"
104 "2: " T(streq) " %2, [%3]\n" 104 "2: " T(streq) " %3, [%4]\n"
105 "3:\n" 105 "3:\n"
106 " .pushsection __ex_table,\"a\"\n" 106 " .pushsection __ex_table,\"a\"\n"
107 " .align 3\n" 107 " .align 3\n"
108 " .long 1b, 4f, 2b, 4f\n" 108 " .long 1b, 4f, 2b, 4f\n"
109 " .popsection\n" 109 " .popsection\n"
110 " .pushsection .fixup,\"ax\"\n" 110 " .pushsection .fixup,\"ax\"\n"
111 "4: mov %0, %4\n" 111 "4: mov %0, %5\n"
112 " b 3b\n" 112 " b 3b\n"
113 " .popsection" 113 " .popsection"
114 : "=&r" (val) 114 : "+r" (ret), "=&r" (val)
115 : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) 115 : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT)
116 : "cc", "memory"); 116 : "cc", "memory");
117 117
118 pagefault_enable(); /* subsumes preempt_enable() */ 118 *uval = val;
119 119 return ret;
120 return val;
121} 120}
122 121
123#endif /* !SMP */ 122#endif /* !SMP */
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 5aeec1e1735c..16bd48031583 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -36,6 +36,7 @@
36#define L2X0_RAW_INTR_STAT 0x21C 36#define L2X0_RAW_INTR_STAT 0x21C
37#define L2X0_INTR_CLEAR 0x220 37#define L2X0_INTR_CLEAR 0x220
38#define L2X0_CACHE_SYNC 0x730 38#define L2X0_CACHE_SYNC 0x730
39#define L2X0_DUMMY_REG 0x740
39#define L2X0_INV_LINE_PA 0x770 40#define L2X0_INV_LINE_PA 0x770
40#define L2X0_INV_WAY 0x77C 41#define L2X0_INV_WAY 0x77C
41#define L2X0_CLEAN_LINE_PA 0x7B0 42#define L2X0_CLEAN_LINE_PA 0x7B0
diff --git a/arch/arm/include/asm/hardware/sp810.h b/arch/arm/include/asm/hardware/sp810.h
index 721847dc68ab..e0d1c0cfa548 100644
--- a/arch/arm/include/asm/hardware/sp810.h
+++ b/arch/arm/include/asm/hardware/sp810.h
@@ -58,6 +58,9 @@
58 58
59static inline void sysctl_soft_reset(void __iomem *base) 59static inline void sysctl_soft_reset(void __iomem *base)
60{ 60{
61 /* switch to slow mode */
62 writel(0x2, base + SCCTRL);
63
61 /* writing any value to SCSYSSTAT reg will reset system */ 64 /* writing any value to SCSYSSTAT reg will reset system */
62 writel(0, base + SCSYSSTAT); 65 writel(0, base + SCSYSSTAT);
63} 66}
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 3a0893a76a3b..bf13b814c1b8 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -15,10 +15,6 @@ struct meminfo;
15struct sys_timer; 15struct sys_timer;
16 16
17struct machine_desc { 17struct machine_desc {
18 /*
19 * Note! The first two elements are used
20 * by assembler code in head.S, head-common.S
21 */
22 unsigned int nr; /* architecture number */ 18 unsigned int nr; /* architecture number */
23 const char *name; /* architecture name */ 19 const char *name; /* architecture name */
24 unsigned long boot_params; /* tagged list */ 20 unsigned long boot_params; /* tagged list */
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 9763be04f77e..22de005f159c 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -10,6 +10,8 @@
10#ifndef _ASMARM_PGALLOC_H 10#ifndef _ASMARM_PGALLOC_H
11#define _ASMARM_PGALLOC_H 11#define _ASMARM_PGALLOC_H
12 12
13#include <linux/pagemap.h>
14
13#include <asm/domain.h> 15#include <asm/domain.h>
14#include <asm/pgtable-hwdef.h> 16#include <asm/pgtable-hwdef.h>
15#include <asm/processor.h> 17#include <asm/processor.h>
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index f41a6f57cd12..82dfe5d0c41e 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -18,16 +18,34 @@
18#define __ASMARM_TLB_H 18#define __ASMARM_TLB_H
19 19
20#include <asm/cacheflush.h> 20#include <asm/cacheflush.h>
21#include <asm/tlbflush.h>
22 21
23#ifndef CONFIG_MMU 22#ifndef CONFIG_MMU
24 23
25#include <linux/pagemap.h> 24#include <linux/pagemap.h>
25
26#define tlb_flush(tlb) ((void) tlb)
27
26#include <asm-generic/tlb.h> 28#include <asm-generic/tlb.h>
27 29
28#else /* !CONFIG_MMU */ 30#else /* !CONFIG_MMU */
29 31
32#include <linux/swap.h>
30#include <asm/pgalloc.h> 33#include <asm/pgalloc.h>
34#include <asm/tlbflush.h>
35
36/*
37 * We need to delay page freeing for SMP as other CPUs can access pages
38 * which have been removed but not yet had their TLB entries invalidated.
39 * Also, as ARMv7 speculative prefetch can drag new entries into the TLB,
40 * we need to apply this same delaying tactic to ensure correct operation.
41 */
42#if defined(CONFIG_SMP) || defined(CONFIG_CPU_32v7)
43#define tlb_fast_mode(tlb) 0
44#define FREE_PTE_NR 500
45#else
46#define tlb_fast_mode(tlb) 1
47#define FREE_PTE_NR 0
48#endif
31 49
32/* 50/*
33 * TLB handling. This allows us to remove pages from the page 51 * TLB handling. This allows us to remove pages from the page
@@ -36,12 +54,58 @@
36struct mmu_gather { 54struct mmu_gather {
37 struct mm_struct *mm; 55 struct mm_struct *mm;
38 unsigned int fullmm; 56 unsigned int fullmm;
57 struct vm_area_struct *vma;
39 unsigned long range_start; 58 unsigned long range_start;
40 unsigned long range_end; 59 unsigned long range_end;
60 unsigned int nr;
61 struct page *pages[FREE_PTE_NR];
41}; 62};
42 63
43DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); 64DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
44 65
66/*
67 * This is unnecessarily complex. There's three ways the TLB shootdown
68 * code is used:
69 * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region().
70 * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
71 * tlb->vma will be non-NULL.
72 * 2. Unmapping all vmas. See exit_mmap().
73 * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
74 * tlb->vma will be non-NULL. Additionally, page tables will be freed.
75 * 3. Unmapping argument pages. See shift_arg_pages().
76 * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
77 * tlb->vma will be NULL.
78 */
79static inline void tlb_flush(struct mmu_gather *tlb)
80{
81 if (tlb->fullmm || !tlb->vma)
82 flush_tlb_mm(tlb->mm);
83 else if (tlb->range_end > 0) {
84 flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end);
85 tlb->range_start = TASK_SIZE;
86 tlb->range_end = 0;
87 }
88}
89
90static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
91{
92 if (!tlb->fullmm) {
93 if (addr < tlb->range_start)
94 tlb->range_start = addr;
95 if (addr + PAGE_SIZE > tlb->range_end)
96 tlb->range_end = addr + PAGE_SIZE;
97 }
98}
99
100static inline void tlb_flush_mmu(struct mmu_gather *tlb)
101{
102 tlb_flush(tlb);
103 if (!tlb_fast_mode(tlb)) {
104 free_pages_and_swap_cache(tlb->pages, tlb->nr);
105 tlb->nr = 0;
106 }
107}
108
45static inline struct mmu_gather * 109static inline struct mmu_gather *
46tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) 110tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
47{ 111{
@@ -49,6 +113,8 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
49 113
50 tlb->mm = mm; 114 tlb->mm = mm;
51 tlb->fullmm = full_mm_flush; 115 tlb->fullmm = full_mm_flush;
116 tlb->vma = NULL;
117 tlb->nr = 0;
52 118
53 return tlb; 119 return tlb;
54} 120}
@@ -56,8 +122,7 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
56static inline void 122static inline void
57tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) 123tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
58{ 124{
59 if (tlb->fullmm) 125 tlb_flush_mmu(tlb);
60 flush_tlb_mm(tlb->mm);
61 126
62 /* keep the page table cache within bounds */ 127 /* keep the page table cache within bounds */
63 check_pgt_cache(); 128 check_pgt_cache();
@@ -71,12 +136,7 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
71static inline void 136static inline void
72tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr) 137tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
73{ 138{
74 if (!tlb->fullmm) { 139 tlb_add_flush(tlb, addr);
75 if (addr < tlb->range_start)
76 tlb->range_start = addr;
77 if (addr + PAGE_SIZE > tlb->range_end)
78 tlb->range_end = addr + PAGE_SIZE;
79 }
80} 140}
81 141
82/* 142/*
@@ -89,6 +149,7 @@ tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
89{ 149{
90 if (!tlb->fullmm) { 150 if (!tlb->fullmm) {
91 flush_cache_range(vma, vma->vm_start, vma->vm_end); 151 flush_cache_range(vma, vma->vm_start, vma->vm_end);
152 tlb->vma = vma;
92 tlb->range_start = TASK_SIZE; 153 tlb->range_start = TASK_SIZE;
93 tlb->range_end = 0; 154 tlb->range_end = 0;
94 } 155 }
@@ -97,12 +158,30 @@ tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
97static inline void 158static inline void
98tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) 159tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
99{ 160{
100 if (!tlb->fullmm && tlb->range_end > 0) 161 if (!tlb->fullmm)
101 flush_tlb_range(vma, tlb->range_start, tlb->range_end); 162 tlb_flush(tlb);
163}
164
165static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
166{
167 if (tlb_fast_mode(tlb)) {
168 free_page_and_swap_cache(page);
169 } else {
170 tlb->pages[tlb->nr++] = page;
171 if (tlb->nr >= FREE_PTE_NR)
172 tlb_flush_mmu(tlb);
173 }
174}
175
176static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
177 unsigned long addr)
178{
179 pgtable_page_dtor(pte);
180 tlb_add_flush(tlb, addr);
181 tlb_remove_page(tlb, pte);
102} 182}
103 183
104#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page) 184#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
105#define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep)
106#define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp) 185#define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp)
107 186
108#define tlb_migrate_finish(mm) do { } while (0) 187#define tlb_migrate_finish(mm) do { } while (0)
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index ce7378ea15a2..d2005de383b8 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -10,12 +10,7 @@
10#ifndef _ASMARM_TLBFLUSH_H 10#ifndef _ASMARM_TLBFLUSH_H
11#define _ASMARM_TLBFLUSH_H 11#define _ASMARM_TLBFLUSH_H
12 12
13 13#ifdef CONFIG_MMU
14#ifndef CONFIG_MMU
15
16#define tlb_flush(tlb) ((void) tlb)
17
18#else /* CONFIG_MMU */
19 14
20#include <asm/glue.h> 15#include <asm/glue.h>
21 16
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index d600bd350704..44b84fe6e1b0 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -836,9 +836,11 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
836/* 836/*
837 * One-time initialisation. 837 * One-time initialisation.
838 */ 838 */
839static void reset_ctrl_regs(void *unused) 839static void reset_ctrl_regs(void *info)
840{ 840{
841 int i; 841 int i, cpu = smp_processor_id();
842 u32 dbg_power;
843 cpumask_t *cpumask = info;
842 844
843 /* 845 /*
844 * v7 debug contains save and restore registers so that debug state 846 * v7 debug contains save and restore registers so that debug state
@@ -850,6 +852,17 @@ static void reset_ctrl_regs(void *unused)
850 */ 852 */
851 if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) { 853 if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) {
852 /* 854 /*
855 * Ensure sticky power-down is clear (i.e. debug logic is
856 * powered up).
857 */
858 asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power));
859 if ((dbg_power & 0x1) == 0) {
860 pr_warning("CPU %d debug is powered down!\n", cpu);
861 cpumask_or(cpumask, cpumask, cpumask_of(cpu));
862 return;
863 }
864
865 /*
853 * Unconditionally clear the lock by writing a value 866 * Unconditionally clear the lock by writing a value
854 * other than 0xC5ACCE55 to the access register. 867 * other than 0xC5ACCE55 to the access register.
855 */ 868 */
@@ -887,6 +900,7 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = {
887static int __init arch_hw_breakpoint_init(void) 900static int __init arch_hw_breakpoint_init(void)
888{ 901{
889 u32 dscr; 902 u32 dscr;
903 cpumask_t cpumask = { CPU_BITS_NONE };
890 904
891 debug_arch = get_debug_arch(); 905 debug_arch = get_debug_arch();
892 906
@@ -911,7 +925,13 @@ static int __init arch_hw_breakpoint_init(void)
911 * Reset the breakpoint resources. We assume that a halting 925 * Reset the breakpoint resources. We assume that a halting
912 * debugger will leave the world in a nice state for us. 926 * debugger will leave the world in a nice state for us.
913 */ 927 */
914 on_each_cpu(reset_ctrl_regs, NULL, 1); 928 on_each_cpu(reset_ctrl_regs, &cpumask, 1);
929 if (!cpumask_empty(&cpumask)) {
930 core_num_brps = 0;
931 core_num_reserved_brps = 0;
932 core_num_wrps = 0;
933 return 0;
934 }
915 935
916 ARM_DBG_READ(c1, 0, dscr); 936 ARM_DBG_READ(c1, 0, dscr);
917 if (dscr & ARM_DSCR_HDBGEN) { 937 if (dscr & ARM_DSCR_HDBGEN) {
diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 2c1f0050c9c4..8f6ed43861f1 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1437,7 +1437,7 @@ arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
1437 1437
1438 return space_cccc_1100_010x(insn, asi); 1438 return space_cccc_1100_010x(insn, asi);
1439 1439
1440 } else if ((insn & 0x0e000000) == 0x0c400000) { 1440 } else if ((insn & 0x0e000000) == 0x0c000000) {
1441 1441
1442 return space_cccc_110x(insn, asi); 1442 return space_cccc_110x(insn, asi);
1443 1443
diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c
index b8af96ea62e6..2c79eec19262 100644
--- a/arch/arm/kernel/pmu.c
+++ b/arch/arm/kernel/pmu.c
@@ -97,28 +97,34 @@ set_irq_affinity(int irq,
97 irq, cpu); 97 irq, cpu);
98 return err; 98 return err;
99#else 99#else
100 return 0; 100 return -EINVAL;
101#endif 101#endif
102} 102}
103 103
104static int 104static int
105init_cpu_pmu(void) 105init_cpu_pmu(void)
106{ 106{
107 int i, err = 0; 107 int i, irqs, err = 0;
108 struct platform_device *pdev = pmu_devices[ARM_PMU_DEVICE_CPU]; 108 struct platform_device *pdev = pmu_devices[ARM_PMU_DEVICE_CPU];
109 109
110 if (!pdev) { 110 if (!pdev)
111 err = -ENODEV; 111 return -ENODEV;
112 goto out; 112
113 } 113 irqs = pdev->num_resources;
114
115 /*
116 * If we have a single PMU interrupt that we can't shift, assume that
117 * we're running on a uniprocessor machine and continue.
118 */
119 if (irqs == 1 && !irq_can_set_affinity(platform_get_irq(pdev, 0)))
120 return 0;
114 121
115 for (i = 0; i < pdev->num_resources; ++i) { 122 for (i = 0; i < irqs; ++i) {
116 err = set_irq_affinity(platform_get_irq(pdev, i), i); 123 err = set_irq_affinity(platform_get_irq(pdev, i), i);
117 if (err) 124 if (err)
118 break; 125 break;
119 } 126 }
120 127
121out:
122 return err; 128 return err;
123} 129}
124 130
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 19c6816db61e..b13e70f63d71 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -996,10 +996,10 @@ static int ptrace_gethbpregs(struct task_struct *tsk, long num,
996 while (!(arch_ctrl.len & 0x1)) 996 while (!(arch_ctrl.len & 0x1))
997 arch_ctrl.len >>= 1; 997 arch_ctrl.len >>= 1;
998 998
999 if (idx & 0x1) 999 if (num & 0x1)
1000 reg = encode_ctrl_reg(arch_ctrl);
1001 else
1002 reg = bp->attr.bp_addr; 1000 reg = bp->attr.bp_addr;
1001 else
1002 reg = encode_ctrl_reg(arch_ctrl);
1003 } 1003 }
1004 1004
1005put: 1005put:
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 420b8d6485d6..5ea4fb718b97 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -226,8 +226,8 @@ int cpu_architecture(void)
226 * Register 0 and check for VMSAv7 or PMSAv7 */ 226 * Register 0 and check for VMSAv7 or PMSAv7 */
227 asm("mrc p15, 0, %0, c0, c1, 4" 227 asm("mrc p15, 0, %0, c0, c1, 4"
228 : "=r" (mmfr0)); 228 : "=r" (mmfr0));
229 if ((mmfr0 & 0x0000000f) == 0x00000003 || 229 if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
230 (mmfr0 & 0x000000f0) == 0x00000030) 230 (mmfr0 & 0x000000f0) >= 0x00000030)
231 cpu_arch = CPU_ARCH_ARMv7; 231 cpu_arch = CPU_ARCH_ARMv7;
232 else if ((mmfr0 & 0x0000000f) == 0x00000002 || 232 else if ((mmfr0 & 0x0000000f) == 0x00000002 ||
233 (mmfr0 & 0x000000f0) == 0x00000020) 233 (mmfr0 & 0x000000f0) == 0x00000020)
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 907d5a620bca..abaf8445ce25 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -474,7 +474,9 @@ setup_return(struct pt_regs *regs, struct k_sigaction *ka,
474 unsigned long handler = (unsigned long)ka->sa.sa_handler; 474 unsigned long handler = (unsigned long)ka->sa.sa_handler;
475 unsigned long retcode; 475 unsigned long retcode;
476 int thumb = 0; 476 int thumb = 0;
477 unsigned long cpsr = regs->ARM_cpsr & ~PSR_f; 477 unsigned long cpsr = regs->ARM_cpsr & ~(PSR_f | PSR_E_BIT);
478
479 cpsr |= PSR_ENDSTATE;
478 480
479 /* 481 /*
480 * Maybe we need to deliver a 32-bit signal to a 26-bit task. 482 * Maybe we need to deliver a 32-bit signal to a 26-bit task.
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 3d76bf233734..1ff46cabc7ef 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -107,9 +107,7 @@ void timer_tick(void)
107{ 107{
108 profile_tick(CPU_PROFILING); 108 profile_tick(CPU_PROFILING);
109 do_leds(); 109 do_leds();
110 write_seqlock(&xtime_lock); 110 xtime_update(1);
111 do_timer(1);
112 write_sequnlock(&xtime_lock);
113#ifndef CONFIG_SMP 111#ifndef CONFIG_SMP
114 update_process_times(user_mode(get_irq_regs())); 112 update_process_times(user_mode(get_irq_regs()));
115#endif 113#endif
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 86b66f3f2031..28fea9b2d129 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -21,6 +21,12 @@
21#define ARM_CPU_KEEP(x) 21#define ARM_CPU_KEEP(x)
22#endif 22#endif
23 23
24#if defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)
25#define ARM_EXIT_KEEP(x) x
26#else
27#define ARM_EXIT_KEEP(x)
28#endif
29
24OUTPUT_ARCH(arm) 30OUTPUT_ARCH(arm)
25ENTRY(stext) 31ENTRY(stext)
26 32
@@ -43,6 +49,7 @@ SECTIONS
43 _sinittext = .; 49 _sinittext = .;
44 HEAD_TEXT 50 HEAD_TEXT
45 INIT_TEXT 51 INIT_TEXT
52 ARM_EXIT_KEEP(EXIT_TEXT)
46 _einittext = .; 53 _einittext = .;
47 ARM_CPU_DISCARD(PROC_INFO) 54 ARM_CPU_DISCARD(PROC_INFO)
48 __arch_info_begin = .; 55 __arch_info_begin = .;
@@ -67,10 +74,11 @@ SECTIONS
67#ifndef CONFIG_XIP_KERNEL 74#ifndef CONFIG_XIP_KERNEL
68 __init_begin = _stext; 75 __init_begin = _stext;
69 INIT_DATA 76 INIT_DATA
77 ARM_EXIT_KEEP(EXIT_DATA)
70#endif 78#endif
71 } 79 }
72 80
73 PERCPU(PAGE_SIZE) 81 PERCPU(32, PAGE_SIZE)
74 82
75#ifndef CONFIG_XIP_KERNEL 83#ifndef CONFIG_XIP_KERNEL
76 . = ALIGN(PAGE_SIZE); 84 . = ALIGN(PAGE_SIZE);
@@ -162,6 +170,7 @@ SECTIONS
162 . = ALIGN(PAGE_SIZE); 170 . = ALIGN(PAGE_SIZE);
163 __init_begin = .; 171 __init_begin = .;
164 INIT_DATA 172 INIT_DATA
173 ARM_EXIT_KEEP(EXIT_DATA)
165 . = ALIGN(PAGE_SIZE); 174 . = ALIGN(PAGE_SIZE);
166 __init_end = .; 175 __init_end = .;
167#endif 176#endif
@@ -247,6 +256,8 @@ SECTIONS
247 } 256 }
248#endif 257#endif
249 258
259 NOTES
260
250 BSS_SECTION(0, 0, 0) 261 BSS_SECTION(0, 0, 0)
251 _end = .; 262 _end = .;
252 263
diff --git a/arch/arm/mach-clps711x/include/mach/time.h b/arch/arm/mach-clps711x/include/mach/time.h
index 8fe283ccd1f3..61fef9129c6a 100644
--- a/arch/arm/mach-clps711x/include/mach/time.h
+++ b/arch/arm/mach-clps711x/include/mach/time.h
@@ -30,7 +30,7 @@ p720t_timer_interrupt(int irq, void *dev_id)
30{ 30{
31 struct pt_regs *regs = get_irq_regs(); 31 struct pt_regs *regs = get_irq_regs();
32 do_leds(); 32 do_leds();
33 do_timer(1); 33 xtime_update(1);
34#ifndef CONFIG_SMP 34#ifndef CONFIG_SMP
35 update_process_times(user_mode(regs)); 35 update_process_times(user_mode(regs));
36#endif 36#endif
diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c
index 343de73161fa..4a68c2b1ec11 100644
--- a/arch/arm/mach-davinci/cpufreq.c
+++ b/arch/arm/mach-davinci/cpufreq.c
@@ -132,7 +132,7 @@ out:
132 return ret; 132 return ret;
133} 133}
134 134
135static int __init davinci_cpu_init(struct cpufreq_policy *policy) 135static int davinci_cpu_init(struct cpufreq_policy *policy)
136{ 136{
137 int result = 0; 137 int result = 0;
138 struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data; 138 struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data;
diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 9eec63070e0c..beda8a4133a0 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -480,8 +480,15 @@ static struct platform_device da850_mcasp_device = {
480 .resource = da850_mcasp_resources, 480 .resource = da850_mcasp_resources,
481}; 481};
482 482
483struct platform_device davinci_pcm_device = {
484 .name = "davinci-pcm-audio",
485 .id = -1,
486};
487
483void __init da8xx_register_mcasp(int id, struct snd_platform_data *pdata) 488void __init da8xx_register_mcasp(int id, struct snd_platform_data *pdata)
484{ 489{
490 platform_device_register(&davinci_pcm_device);
491
485 /* DA830/OMAP-L137 has 3 instances of McASP */ 492 /* DA830/OMAP-L137 has 3 instances of McASP */
486 if (cpu_is_davinci_da830() && id == 1) { 493 if (cpu_is_davinci_da830() && id == 1) {
487 da830_mcasp1_device.dev.platform_data = pdata; 494 da830_mcasp1_device.dev.platform_data = pdata;
diff --git a/arch/arm/mach-davinci/gpio-tnetv107x.c b/arch/arm/mach-davinci/gpio-tnetv107x.c
index d10298620e2c..3fa3e2867e19 100644
--- a/arch/arm/mach-davinci/gpio-tnetv107x.c
+++ b/arch/arm/mach-davinci/gpio-tnetv107x.c
@@ -58,7 +58,7 @@ static int tnetv107x_gpio_request(struct gpio_chip *chip, unsigned offset)
58 58
59 spin_lock_irqsave(&ctlr->lock, flags); 59 spin_lock_irqsave(&ctlr->lock, flags);
60 60
61 gpio_reg_set_bit(&regs->enable, gpio); 61 gpio_reg_set_bit(regs->enable, gpio);
62 62
63 spin_unlock_irqrestore(&ctlr->lock, flags); 63 spin_unlock_irqrestore(&ctlr->lock, flags);
64 64
@@ -74,7 +74,7 @@ static void tnetv107x_gpio_free(struct gpio_chip *chip, unsigned offset)
74 74
75 spin_lock_irqsave(&ctlr->lock, flags); 75 spin_lock_irqsave(&ctlr->lock, flags);
76 76
77 gpio_reg_clear_bit(&regs->enable, gpio); 77 gpio_reg_clear_bit(regs->enable, gpio);
78 78
79 spin_unlock_irqrestore(&ctlr->lock, flags); 79 spin_unlock_irqrestore(&ctlr->lock, flags);
80} 80}
@@ -88,7 +88,7 @@ static int tnetv107x_gpio_dir_in(struct gpio_chip *chip, unsigned offset)
88 88
89 spin_lock_irqsave(&ctlr->lock, flags); 89 spin_lock_irqsave(&ctlr->lock, flags);
90 90
91 gpio_reg_set_bit(&regs->direction, gpio); 91 gpio_reg_set_bit(regs->direction, gpio);
92 92
93 spin_unlock_irqrestore(&ctlr->lock, flags); 93 spin_unlock_irqrestore(&ctlr->lock, flags);
94 94
@@ -106,11 +106,11 @@ static int tnetv107x_gpio_dir_out(struct gpio_chip *chip,
106 spin_lock_irqsave(&ctlr->lock, flags); 106 spin_lock_irqsave(&ctlr->lock, flags);
107 107
108 if (value) 108 if (value)
109 gpio_reg_set_bit(&regs->data_out, gpio); 109 gpio_reg_set_bit(regs->data_out, gpio);
110 else 110 else
111 gpio_reg_clear_bit(&regs->data_out, gpio); 111 gpio_reg_clear_bit(regs->data_out, gpio);
112 112
113 gpio_reg_clear_bit(&regs->direction, gpio); 113 gpio_reg_clear_bit(regs->direction, gpio);
114 114
115 spin_unlock_irqrestore(&ctlr->lock, flags); 115 spin_unlock_irqrestore(&ctlr->lock, flags);
116 116
@@ -124,7 +124,7 @@ static int tnetv107x_gpio_get(struct gpio_chip *chip, unsigned offset)
124 unsigned gpio = chip->base + offset; 124 unsigned gpio = chip->base + offset;
125 int ret; 125 int ret;
126 126
127 ret = gpio_reg_get_bit(&regs->data_in, gpio); 127 ret = gpio_reg_get_bit(regs->data_in, gpio);
128 128
129 return ret ? 1 : 0; 129 return ret ? 1 : 0;
130} 130}
@@ -140,9 +140,9 @@ static void tnetv107x_gpio_set(struct gpio_chip *chip,
140 spin_lock_irqsave(&ctlr->lock, flags); 140 spin_lock_irqsave(&ctlr->lock, flags);
141 141
142 if (value) 142 if (value)
143 gpio_reg_set_bit(&regs->data_out, gpio); 143 gpio_reg_set_bit(regs->data_out, gpio);
144 else 144 else
145 gpio_reg_clear_bit(&regs->data_out, gpio); 145 gpio_reg_clear_bit(regs->data_out, gpio);
146 146
147 spin_unlock_irqrestore(&ctlr->lock, flags); 147 spin_unlock_irqrestore(&ctlr->lock, flags);
148} 148}
diff --git a/arch/arm/mach-davinci/include/mach/clkdev.h b/arch/arm/mach-davinci/include/mach/clkdev.h
index 730c49d1ebd8..14a504887189 100644
--- a/arch/arm/mach-davinci/include/mach/clkdev.h
+++ b/arch/arm/mach-davinci/include/mach/clkdev.h
@@ -1,6 +1,8 @@
1#ifndef __MACH_CLKDEV_H 1#ifndef __MACH_CLKDEV_H
2#define __MACH_CLKDEV_H 2#define __MACH_CLKDEV_H
3 3
4struct clk;
5
4static inline int __clk_get(struct clk *clk) 6static inline int __clk_get(struct clk *clk)
5{ 7{
6 return 1; 8 return 1;
diff --git a/arch/arm/mach-omap2/clkt_dpll.c b/arch/arm/mach-omap2/clkt_dpll.c
index 337392c3f549..acb7ae5b0a25 100644
--- a/arch/arm/mach-omap2/clkt_dpll.c
+++ b/arch/arm/mach-omap2/clkt_dpll.c
@@ -77,7 +77,7 @@ static int _dpll_test_fint(struct clk *clk, u8 n)
77 dd = clk->dpll_data; 77 dd = clk->dpll_data;
78 78
79 /* DPLL divider must result in a valid jitter correction val */ 79 /* DPLL divider must result in a valid jitter correction val */
80 fint = clk->parent->rate / (n + 1); 80 fint = clk->parent->rate / n;
81 if (fint < DPLL_FINT_BAND1_MIN) { 81 if (fint < DPLL_FINT_BAND1_MIN) {
82 82
83 pr_debug("rejecting n=%d due to Fint failure, " 83 pr_debug("rejecting n=%d due to Fint failure, "
diff --git a/arch/arm/mach-omap2/mailbox.c b/arch/arm/mach-omap2/mailbox.c
index 394413dc7deb..24b88504df0f 100644
--- a/arch/arm/mach-omap2/mailbox.c
+++ b/arch/arm/mach-omap2/mailbox.c
@@ -193,10 +193,12 @@ static void omap2_mbox_disable_irq(struct omap_mbox *mbox,
193 omap_mbox_type_t irq) 193 omap_mbox_type_t irq)
194{ 194{
195 struct omap_mbox2_priv *p = mbox->priv; 195 struct omap_mbox2_priv *p = mbox->priv;
196 u32 l, bit = (irq == IRQ_TX) ? p->notfull_bit : p->newmsg_bit; 196 u32 bit = (irq == IRQ_TX) ? p->notfull_bit : p->newmsg_bit;
197 l = mbox_read_reg(p->irqdisable); 197
198 l &= ~bit; 198 if (!cpu_is_omap44xx())
199 mbox_write_reg(l, p->irqdisable); 199 bit = mbox_read_reg(p->irqdisable) & ~bit;
200
201 mbox_write_reg(bit, p->irqdisable);
200} 202}
201 203
202static void omap2_mbox_ack_irq(struct omap_mbox *mbox, 204static void omap2_mbox_ack_irq(struct omap_mbox *mbox,
@@ -334,7 +336,7 @@ static struct omap_mbox mbox_iva_info = {
334 .priv = &omap2_mbox_iva_priv, 336 .priv = &omap2_mbox_iva_priv,
335}; 337};
336 338
337struct omap_mbox *omap2_mboxes[] = { &mbox_iva_info, &mbox_dsp_info, NULL }; 339struct omap_mbox *omap2_mboxes[] = { &mbox_dsp_info, &mbox_iva_info, NULL };
338#endif 340#endif
339 341
340#if defined(CONFIG_ARCH_OMAP4) 342#if defined(CONFIG_ARCH_OMAP4)
diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index 98148b6c36e9..6c84659cf846 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -605,7 +605,7 @@ static void __init omap_mux_dbg_create_entry(
605 list_for_each_entry(e, &partition->muxmodes, node) { 605 list_for_each_entry(e, &partition->muxmodes, node) {
606 struct omap_mux *m = &e->mux; 606 struct omap_mux *m = &e->mux;
607 607
608 (void)debugfs_create_file(m->muxnames[0], S_IWUGO, mux_dbg_dir, 608 (void)debugfs_create_file(m->muxnames[0], S_IWUSR, mux_dbg_dir,
609 m, &omap_mux_dbg_signal_fops); 609 m, &omap_mux_dbg_signal_fops);
610 } 610 }
611} 611}
diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c
index 125f56591fb5..a5a83b358ddd 100644
--- a/arch/arm/mach-omap2/pm-debug.c
+++ b/arch/arm/mach-omap2/pm-debug.c
@@ -637,14 +637,14 @@ static int __init pm_dbg_init(void)
637 637
638 } 638 }
639 639
640 (void) debugfs_create_file("enable_off_mode", S_IRUGO | S_IWUGO, d, 640 (void) debugfs_create_file("enable_off_mode", S_IRUGO | S_IWUSR, d,
641 &enable_off_mode, &pm_dbg_option_fops); 641 &enable_off_mode, &pm_dbg_option_fops);
642 (void) debugfs_create_file("sleep_while_idle", S_IRUGO | S_IWUGO, d, 642 (void) debugfs_create_file("sleep_while_idle", S_IRUGO | S_IWUSR, d,
643 &sleep_while_idle, &pm_dbg_option_fops); 643 &sleep_while_idle, &pm_dbg_option_fops);
644 (void) debugfs_create_file("wakeup_timer_seconds", S_IRUGO | S_IWUGO, d, 644 (void) debugfs_create_file("wakeup_timer_seconds", S_IRUGO | S_IWUSR, d,
645 &wakeup_timer_seconds, &pm_dbg_option_fops); 645 &wakeup_timer_seconds, &pm_dbg_option_fops);
646 (void) debugfs_create_file("wakeup_timer_milliseconds", 646 (void) debugfs_create_file("wakeup_timer_milliseconds",
647 S_IRUGO | S_IWUGO, d, &wakeup_timer_milliseconds, 647 S_IRUGO | S_IWUSR, d, &wakeup_timer_milliseconds,
648 &pm_dbg_option_fops); 648 &pm_dbg_option_fops);
649 pm_dbg_init_done = 1; 649 pm_dbg_init_done = 1;
650 650
diff --git a/arch/arm/mach-omap2/prcm_mpu44xx.h b/arch/arm/mach-omap2/prcm_mpu44xx.h
index 729a644ce852..3300ff6e3cfe 100644
--- a/arch/arm/mach-omap2/prcm_mpu44xx.h
+++ b/arch/arm/mach-omap2/prcm_mpu44xx.h
@@ -38,8 +38,8 @@
38#define OMAP4430_PRCM_MPU_CPU1_INST 0x0800 38#define OMAP4430_PRCM_MPU_CPU1_INST 0x0800
39 39
40/* PRCM_MPU clockdomain register offsets (from instance start) */ 40/* PRCM_MPU clockdomain register offsets (from instance start) */
41#define OMAP4430_PRCM_MPU_CPU0_MPU_CDOFFS 0x0000 41#define OMAP4430_PRCM_MPU_CPU0_MPU_CDOFFS 0x0018
42#define OMAP4430_PRCM_MPU_CPU1_MPU_CDOFFS 0x0000 42#define OMAP4430_PRCM_MPU_CPU1_MPU_CDOFFS 0x0018
43 43
44 44
45/* 45/*
diff --git a/arch/arm/mach-omap2/smartreflex.c b/arch/arm/mach-omap2/smartreflex.c
index c37e823266d3..1a777e34d0c2 100644
--- a/arch/arm/mach-omap2/smartreflex.c
+++ b/arch/arm/mach-omap2/smartreflex.c
@@ -282,6 +282,7 @@ error:
282 dev_err(&sr_info->pdev->dev, "%s: ERROR in registering" 282 dev_err(&sr_info->pdev->dev, "%s: ERROR in registering"
283 "interrupt handler. Smartreflex will" 283 "interrupt handler. Smartreflex will"
284 "not function as desired\n", __func__); 284 "not function as desired\n", __func__);
285 kfree(name);
285 kfree(sr_info); 286 kfree(sr_info);
286 return ret; 287 return ret;
287} 288}
@@ -879,7 +880,7 @@ static int __init omap_sr_probe(struct platform_device *pdev)
879 ret = sr_late_init(sr_info); 880 ret = sr_late_init(sr_info);
880 if (ret) { 881 if (ret) {
881 pr_warning("%s: Error in SR late init\n", __func__); 882 pr_warning("%s: Error in SR late init\n", __func__);
882 return ret; 883 goto err_release_region;
883 } 884 }
884 } 885 }
885 886
@@ -890,17 +891,20 @@ static int __init omap_sr_probe(struct platform_device *pdev)
890 * not try to create rest of the debugfs entries. 891 * not try to create rest of the debugfs entries.
891 */ 892 */
892 vdd_dbg_dir = omap_voltage_get_dbgdir(sr_info->voltdm); 893 vdd_dbg_dir = omap_voltage_get_dbgdir(sr_info->voltdm);
893 if (!vdd_dbg_dir) 894 if (!vdd_dbg_dir) {
894 return -EINVAL; 895 ret = -EINVAL;
896 goto err_release_region;
897 }
895 898
896 dbg_dir = debugfs_create_dir("smartreflex", vdd_dbg_dir); 899 dbg_dir = debugfs_create_dir("smartreflex", vdd_dbg_dir);
897 if (IS_ERR(dbg_dir)) { 900 if (IS_ERR(dbg_dir)) {
898 dev_err(&pdev->dev, "%s: Unable to create debugfs directory\n", 901 dev_err(&pdev->dev, "%s: Unable to create debugfs directory\n",
899 __func__); 902 __func__);
900 return PTR_ERR(dbg_dir); 903 ret = PTR_ERR(dbg_dir);
904 goto err_release_region;
901 } 905 }
902 906
903 (void) debugfs_create_file("autocomp", S_IRUGO | S_IWUGO, dbg_dir, 907 (void) debugfs_create_file("autocomp", S_IRUGO | S_IWUSR, dbg_dir,
904 (void *)sr_info, &pm_sr_fops); 908 (void *)sr_info, &pm_sr_fops);
905 (void) debugfs_create_x32("errweight", S_IRUGO, dbg_dir, 909 (void) debugfs_create_x32("errweight", S_IRUGO, dbg_dir,
906 &sr_info->err_weight); 910 &sr_info->err_weight);
@@ -913,7 +917,8 @@ static int __init omap_sr_probe(struct platform_device *pdev)
913 if (IS_ERR(nvalue_dir)) { 917 if (IS_ERR(nvalue_dir)) {
914 dev_err(&pdev->dev, "%s: Unable to create debugfs directory" 918 dev_err(&pdev->dev, "%s: Unable to create debugfs directory"
915 "for n-values\n", __func__); 919 "for n-values\n", __func__);
916 return PTR_ERR(nvalue_dir); 920 ret = PTR_ERR(nvalue_dir);
921 goto err_release_region;
917 } 922 }
918 923
919 omap_voltage_get_volttable(sr_info->voltdm, &volt_data); 924 omap_voltage_get_volttable(sr_info->voltdm, &volt_data);
@@ -922,24 +927,16 @@ static int __init omap_sr_probe(struct platform_device *pdev)
922 " corresponding vdd vdd_%s. Cannot create debugfs" 927 " corresponding vdd vdd_%s. Cannot create debugfs"
923 "entries for n-values\n", 928 "entries for n-values\n",
924 __func__, sr_info->voltdm->name); 929 __func__, sr_info->voltdm->name);
925 return -ENODATA; 930 ret = -ENODATA;
931 goto err_release_region;
926 } 932 }
927 933
928 for (i = 0; i < sr_info->nvalue_count; i++) { 934 for (i = 0; i < sr_info->nvalue_count; i++) {
929 char *name; 935 char name[NVALUE_NAME_LEN + 1];
930 char volt_name[32];
931
932 name = kzalloc(NVALUE_NAME_LEN + 1, GFP_KERNEL);
933 if (!name) {
934 dev_err(&pdev->dev, "%s: Unable to allocate memory"
935 " for n-value directory name\n", __func__);
936 return -ENOMEM;
937 }
938 936
939 strcpy(name, "volt_"); 937 snprintf(name, sizeof(name), "volt_%d",
940 sprintf(volt_name, "%d", volt_data[i].volt_nominal); 938 volt_data[i].volt_nominal);
941 strcat(name, volt_name); 939 (void) debugfs_create_x32(name, S_IRUGO | S_IWUSR, nvalue_dir,
942 (void) debugfs_create_x32(name, S_IRUGO | S_IWUGO, nvalue_dir,
943 &(sr_info->nvalue_table[i].nvalue)); 940 &(sr_info->nvalue_table[i].nvalue));
944 } 941 }
945 942
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 7b7c2683ae7b..0fc550e7e482 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -39,6 +39,7 @@
39#include <asm/mach/time.h> 39#include <asm/mach/time.h>
40#include <plat/dmtimer.h> 40#include <plat/dmtimer.h>
41#include <asm/localtimer.h> 41#include <asm/localtimer.h>
42#include <asm/sched_clock.h>
42 43
43#include "timer-gp.h" 44#include "timer-gp.h"
44 45
@@ -190,6 +191,7 @@ static void __init omap2_gp_clocksource_init(void)
190/* 191/*
191 * clocksource 192 * clocksource
192 */ 193 */
194static DEFINE_CLOCK_DATA(cd);
193static struct omap_dm_timer *gpt_clocksource; 195static struct omap_dm_timer *gpt_clocksource;
194static cycle_t clocksource_read_cycles(struct clocksource *cs) 196static cycle_t clocksource_read_cycles(struct clocksource *cs)
195{ 197{
@@ -204,6 +206,15 @@ static struct clocksource clocksource_gpt = {
204 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 206 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
205}; 207};
206 208
209static void notrace dmtimer_update_sched_clock(void)
210{
211 u32 cyc;
212
213 cyc = omap_dm_timer_read_counter(gpt_clocksource);
214
215 update_sched_clock(&cd, cyc, (u32)~0);
216}
217
207/* Setup free-running counter for clocksource */ 218/* Setup free-running counter for clocksource */
208static void __init omap2_gp_clocksource_init(void) 219static void __init omap2_gp_clocksource_init(void)
209{ 220{
@@ -224,6 +235,8 @@ static void __init omap2_gp_clocksource_init(void)
224 235
225 omap_dm_timer_set_load_start(gpt, 1, 0); 236 omap_dm_timer_set_load_start(gpt, 1, 0);
226 237
238 init_sched_clock(&cd, dmtimer_update_sched_clock, 32, tick_rate);
239
227 if (clocksource_register_hz(&clocksource_gpt, tick_rate)) 240 if (clocksource_register_hz(&clocksource_gpt, tick_rate))
228 printk(err2, clocksource_gpt.name); 241 printk(err2, clocksource_gpt.name);
229} 242}
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index fbc5b775f895..b166b1d845d7 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -347,6 +347,7 @@ static struct platform_device *pxa25x_devices[] __initdata = {
347 &pxa25x_device_assp, 347 &pxa25x_device_assp,
348 &pxa25x_device_pwm0, 348 &pxa25x_device_pwm0,
349 &pxa25x_device_pwm1, 349 &pxa25x_device_pwm1,
350 &pxa_device_asoc_platform,
350}; 351};
351 352
352static struct sys_device pxa25x_sysdev[] = { 353static struct sys_device pxa25x_sysdev[] = {
diff --git a/arch/arm/mach-pxa/tosa-bt.c b/arch/arm/mach-pxa/tosa-bt.c
index c31e601eb49c..b9b1e5c2b290 100644
--- a/arch/arm/mach-pxa/tosa-bt.c
+++ b/arch/arm/mach-pxa/tosa-bt.c
@@ -81,8 +81,6 @@ static int tosa_bt_probe(struct platform_device *dev)
81 goto err_rfk_alloc; 81 goto err_rfk_alloc;
82 } 82 }
83 83
84 rfkill_set_led_trigger_name(rfk, "tosa-bt");
85
86 rc = rfkill_register(rfk); 84 rc = rfkill_register(rfk);
87 if (rc) 85 if (rc)
88 goto err_rfkill; 86 goto err_rfkill;
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index af152e70cfcf..f2582ec300d9 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -875,6 +875,11 @@ static struct platform_device sharpsl_rom_device = {
875 .dev.platform_data = &sharpsl_rom_data, 875 .dev.platform_data = &sharpsl_rom_data,
876}; 876};
877 877
878static struct platform_device wm9712_device = {
879 .name = "wm9712-codec",
880 .id = -1,
881};
882
878static struct platform_device *devices[] __initdata = { 883static struct platform_device *devices[] __initdata = {
879 &tosascoop_device, 884 &tosascoop_device,
880 &tosascoop_jc_device, 885 &tosascoop_jc_device,
@@ -885,6 +890,7 @@ static struct platform_device *devices[] __initdata = {
885 &tosaled_device, 890 &tosaled_device,
886 &tosa_bt_device, 891 &tosa_bt_device,
887 &sharpsl_rom_device, 892 &sharpsl_rom_device,
893 &wm9712_device,
888}; 894};
889 895
890static void tosa_poweroff(void) 896static void tosa_poweroff(void)
diff --git a/arch/arm/mach-s3c2440/Kconfig b/arch/arm/mach-s3c2440/Kconfig
index a0cb2581894f..50825a3f91cc 100644
--- a/arch/arm/mach-s3c2440/Kconfig
+++ b/arch/arm/mach-s3c2440/Kconfig
@@ -99,6 +99,7 @@ config MACH_NEO1973_GTA02
99 select POWER_SUPPLY 99 select POWER_SUPPLY
100 select MACH_NEO1973 100 select MACH_NEO1973
101 select S3C2410_PWM 101 select S3C2410_PWM
102 select S3C_DEV_USB_HOST
102 help 103 help
103 Say Y here if you are using the Openmoko GTA02 / Freerunner GSM Phone 104 Say Y here if you are using the Openmoko GTA02 / Freerunner GSM Phone
104 105
diff --git a/arch/arm/mach-s3c2440/include/mach/gta02.h b/arch/arm/mach-s3c2440/include/mach/gta02.h
index 953331d8d56a..3a56a229cac6 100644
--- a/arch/arm/mach-s3c2440/include/mach/gta02.h
+++ b/arch/arm/mach-s3c2440/include/mach/gta02.h
@@ -44,19 +44,19 @@
44#define GTA02v3_GPIO_nUSB_FLT S3C2410_GPG(10) /* v3 + v4 only */ 44#define GTA02v3_GPIO_nUSB_FLT S3C2410_GPG(10) /* v3 + v4 only */
45#define GTA02v3_GPIO_nGSM_OC S3C2410_GPG(11) /* v3 + v4 only */ 45#define GTA02v3_GPIO_nGSM_OC S3C2410_GPG(11) /* v3 + v4 only */
46 46
47#define GTA02_GPIO_AMP_SHUT S3C2440_GPJ1 /* v2 + v3 + v4 only */ 47#define GTA02_GPIO_AMP_SHUT S3C2410_GPJ(1) /* v2 + v3 + v4 only */
48#define GTA02v1_GPIO_WLAN_GPIO10 S3C2440_GPJ2 48#define GTA02v1_GPIO_WLAN_GPIO10 S3C2410_GPJ(2)
49#define GTA02_GPIO_HP_IN S3C2440_GPJ2 /* v2 + v3 + v4 only */ 49#define GTA02_GPIO_HP_IN S3C2410_GPJ(2) /* v2 + v3 + v4 only */
50#define GTA02_GPIO_INT0 S3C2440_GPJ3 /* v2 + v3 + v4 only */ 50#define GTA02_GPIO_INT0 S3C2410_GPJ(3) /* v2 + v3 + v4 only */
51#define GTA02_GPIO_nGSM_EN S3C2440_GPJ4 51#define GTA02_GPIO_nGSM_EN S3C2410_GPJ(4)
52#define GTA02_GPIO_3D_RESET S3C2440_GPJ5 52#define GTA02_GPIO_3D_RESET S3C2410_GPJ(5)
53#define GTA02_GPIO_nDL_GSM S3C2440_GPJ6 /* v4 + v5 only */ 53#define GTA02_GPIO_nDL_GSM S3C2410_GPJ(6) /* v4 + v5 only */
54#define GTA02_GPIO_WLAN_GPIO0 S3C2440_GPJ7 54#define GTA02_GPIO_WLAN_GPIO0 S3C2410_GPJ(7)
55#define GTA02v1_GPIO_BAT_ID S3C2440_GPJ8 55#define GTA02v1_GPIO_BAT_ID S3C2410_GPJ(8)
56#define GTA02_GPIO_KEEPACT S3C2440_GPJ8 56#define GTA02_GPIO_KEEPACT S3C2410_GPJ(8)
57#define GTA02v1_GPIO_HP_IN S3C2440_GPJ10 57#define GTA02v1_GPIO_HP_IN S3C2410_GPJ(10)
58#define GTA02_CHIP_PWD S3C2440_GPJ11 /* v2 + v3 + v4 only */ 58#define GTA02_CHIP_PWD S3C2410_GPJ(11) /* v2 + v3 + v4 only */
59#define GTA02_GPIO_nWLAN_RESET S3C2440_GPJ12 /* v2 + v3 + v4 only */ 59#define GTA02_GPIO_nWLAN_RESET S3C2410_GPJ(12) /* v2 + v3 + v4 only */
60 60
61#define GTA02_IRQ_GSENSOR_1 IRQ_EINT0 61#define GTA02_IRQ_GSENSOR_1 IRQ_EINT0
62#define GTA02_IRQ_MODEM IRQ_EINT1 62#define GTA02_IRQ_MODEM IRQ_EINT1
diff --git a/arch/arm/mach-s3c64xx/clock.c b/arch/arm/mach-s3c64xx/clock.c
index dd3782064508..fdfc4d5e37a1 100644
--- a/arch/arm/mach-s3c64xx/clock.c
+++ b/arch/arm/mach-s3c64xx/clock.c
@@ -151,6 +151,12 @@ static struct clk init_clocks_off[] = {
151 .enable = s3c64xx_pclk_ctrl, 151 .enable = s3c64xx_pclk_ctrl,
152 .ctrlbit = S3C_CLKCON_PCLK_IIC, 152 .ctrlbit = S3C_CLKCON_PCLK_IIC,
153 }, { 153 }, {
154 .name = "i2c",
155 .id = 1,
156 .parent = &clk_p,
157 .enable = s3c64xx_pclk_ctrl,
158 .ctrlbit = S3C6410_CLKCON_PCLK_I2C1,
159 }, {
154 .name = "iis", 160 .name = "iis",
155 .id = 0, 161 .id = 0,
156 .parent = &clk_p, 162 .parent = &clk_p,
diff --git a/arch/arm/mach-s3c64xx/dma.c b/arch/arm/mach-s3c64xx/dma.c
index 135db1b41252..c35585cf8c4f 100644
--- a/arch/arm/mach-s3c64xx/dma.c
+++ b/arch/arm/mach-s3c64xx/dma.c
@@ -690,12 +690,12 @@ static int s3c64xx_dma_init1(int chno, enum dma_ch chbase,
690 690
691 regptr = regs + PL080_Cx_BASE(0); 691 regptr = regs + PL080_Cx_BASE(0);
692 692
693 for (ch = 0; ch < 8; ch++, chno++, chptr++) { 693 for (ch = 0; ch < 8; ch++, chptr++) {
694 printk(KERN_INFO "%s: registering DMA %d (%p)\n", 694 pr_debug("%s: registering DMA %d (%p)\n",
695 __func__, chno, regptr); 695 __func__, chno + ch, regptr);
696 696
697 chptr->bit = 1 << ch; 697 chptr->bit = 1 << ch;
698 chptr->number = chno; 698 chptr->number = chno + ch;
699 chptr->dmac = dmac; 699 chptr->dmac = dmac;
700 chptr->regs = regptr; 700 chptr->regs = regptr;
701 regptr += PL080_Cx_STRIDE; 701 regptr += PL080_Cx_STRIDE;
@@ -704,7 +704,8 @@ static int s3c64xx_dma_init1(int chno, enum dma_ch chbase,
704 /* for the moment, permanently enable the controller */ 704 /* for the moment, permanently enable the controller */
705 writel(PL080_CONFIG_ENABLE, regs + PL080_CONFIG); 705 writel(PL080_CONFIG_ENABLE, regs + PL080_CONFIG);
706 706
707 printk(KERN_INFO "PL080: IRQ %d, at %p\n", irq, regs); 707 printk(KERN_INFO "PL080: IRQ %d, at %p, channels %d..%d\n",
708 irq, regs, chno, chno+8);
708 709
709 return 0; 710 return 0;
710 711
diff --git a/arch/arm/mach-s3c64xx/gpiolib.c b/arch/arm/mach-s3c64xx/gpiolib.c
index fd99a82e82c4..92b09085caaa 100644
--- a/arch/arm/mach-s3c64xx/gpiolib.c
+++ b/arch/arm/mach-s3c64xx/gpiolib.c
@@ -72,7 +72,7 @@ static struct s3c_gpio_cfg gpio_4bit_cfg_eint0011 = {
72 .get_pull = s3c_gpio_getpull_updown, 72 .get_pull = s3c_gpio_getpull_updown,
73}; 73};
74 74
75int s3c64xx_gpio2int_gpm(struct gpio_chip *chip, unsigned pin) 75static int s3c64xx_gpio2int_gpm(struct gpio_chip *chip, unsigned pin)
76{ 76{
77 return pin < 5 ? IRQ_EINT(23) + pin : -ENXIO; 77 return pin < 5 ? IRQ_EINT(23) + pin : -ENXIO;
78} 78}
@@ -138,7 +138,7 @@ static struct s3c_gpio_chip gpio_4bit[] = {
138 }, 138 },
139}; 139};
140 140
141int s3c64xx_gpio2int_gpl(struct gpio_chip *chip, unsigned pin) 141static int s3c64xx_gpio2int_gpl(struct gpio_chip *chip, unsigned pin)
142{ 142{
143 return pin >= 8 ? IRQ_EINT(16) + pin - 8 : -ENXIO; 143 return pin >= 8 ? IRQ_EINT(16) + pin - 8 : -ENXIO;
144} 144}
diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c
index e85192a86fbe..a80a3163dd30 100644
--- a/arch/arm/mach-s3c64xx/mach-smdk6410.c
+++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c
@@ -28,6 +28,7 @@
28#include <linux/delay.h> 28#include <linux/delay.h>
29#include <linux/smsc911x.h> 29#include <linux/smsc911x.h>
30#include <linux/regulator/fixed.h> 30#include <linux/regulator/fixed.h>
31#include <linux/regulator/machine.h>
31 32
32#ifdef CONFIG_SMDK6410_WM1190_EV1 33#ifdef CONFIG_SMDK6410_WM1190_EV1
33#include <linux/mfd/wm8350/core.h> 34#include <linux/mfd/wm8350/core.h>
@@ -351,7 +352,7 @@ static struct regulator_init_data smdk6410_vddpll = {
351/* VDD_UH_MMC, LDO5 on J5 */ 352/* VDD_UH_MMC, LDO5 on J5 */
352static struct regulator_init_data smdk6410_vdduh_mmc = { 353static struct regulator_init_data smdk6410_vdduh_mmc = {
353 .constraints = { 354 .constraints = {
354 .name = "PVDD_UH/PVDD_MMC", 355 .name = "PVDD_UH+PVDD_MMC",
355 .always_on = 1, 356 .always_on = 1,
356 }, 357 },
357}; 358};
@@ -417,7 +418,7 @@ static struct regulator_init_data smdk6410_vddaudio = {
417/* S3C64xx internal logic & PLL */ 418/* S3C64xx internal logic & PLL */
418static struct regulator_init_data wm8350_dcdc1_data = { 419static struct regulator_init_data wm8350_dcdc1_data = {
419 .constraints = { 420 .constraints = {
420 .name = "PVDD_INT/PVDD_PLL", 421 .name = "PVDD_INT+PVDD_PLL",
421 .min_uV = 1200000, 422 .min_uV = 1200000,
422 .max_uV = 1200000, 423 .max_uV = 1200000,
423 .always_on = 1, 424 .always_on = 1,
@@ -452,7 +453,7 @@ static struct regulator_consumer_supply wm8350_dcdc4_consumers[] = {
452 453
453static struct regulator_init_data wm8350_dcdc4_data = { 454static struct regulator_init_data wm8350_dcdc4_data = {
454 .constraints = { 455 .constraints = {
455 .name = "PVDD_HI/PVDD_EXT/PVDD_SYS/PVCCM2MTV", 456 .name = "PVDD_HI+PVDD_EXT+PVDD_SYS+PVCCM2MTV",
456 .min_uV = 3000000, 457 .min_uV = 3000000,
457 .max_uV = 3000000, 458 .max_uV = 3000000,
458 .always_on = 1, 459 .always_on = 1,
@@ -464,7 +465,7 @@ static struct regulator_init_data wm8350_dcdc4_data = {
464/* OTGi/1190-EV1 HPVDD & AVDD */ 465/* OTGi/1190-EV1 HPVDD & AVDD */
465static struct regulator_init_data wm8350_ldo4_data = { 466static struct regulator_init_data wm8350_ldo4_data = {
466 .constraints = { 467 .constraints = {
467 .name = "PVDD_OTGI/HPVDD/AVDD", 468 .name = "PVDD_OTGI+HPVDD+AVDD",
468 .min_uV = 1200000, 469 .min_uV = 1200000,
469 .max_uV = 1200000, 470 .max_uV = 1200000,
470 .apply_uV = 1, 471 .apply_uV = 1,
@@ -552,7 +553,7 @@ static struct wm831x_backlight_pdata wm1192_backlight_pdata = {
552 553
553static struct regulator_init_data wm1192_dcdc3 = { 554static struct regulator_init_data wm1192_dcdc3 = {
554 .constraints = { 555 .constraints = {
555 .name = "PVDD_MEM/PVDD_GPS", 556 .name = "PVDD_MEM+PVDD_GPS",
556 .always_on = 1, 557 .always_on = 1,
557 }, 558 },
558}; 559};
@@ -563,7 +564,7 @@ static struct regulator_consumer_supply wm1192_ldo1_consumers[] = {
563 564
564static struct regulator_init_data wm1192_ldo1 = { 565static struct regulator_init_data wm1192_ldo1 = {
565 .constraints = { 566 .constraints = {
566 .name = "PVDD_LCD/PVDD_EXT", 567 .name = "PVDD_LCD+PVDD_EXT",
567 .always_on = 1, 568 .always_on = 1,
568 }, 569 },
569 .consumer_supplies = wm1192_ldo1_consumers, 570 .consumer_supplies = wm1192_ldo1_consumers,
diff --git a/arch/arm/mach-s3c64xx/setup-keypad.c b/arch/arm/mach-s3c64xx/setup-keypad.c
index f8ed0d22db70..1d4d0ee9e870 100644
--- a/arch/arm/mach-s3c64xx/setup-keypad.c
+++ b/arch/arm/mach-s3c64xx/setup-keypad.c
@@ -17,7 +17,7 @@
17void samsung_keypad_cfg_gpio(unsigned int rows, unsigned int cols) 17void samsung_keypad_cfg_gpio(unsigned int rows, unsigned int cols)
18{ 18{
19 /* Set all the necessary GPK pins to special-function 3: KP_ROW[x] */ 19 /* Set all the necessary GPK pins to special-function 3: KP_ROW[x] */
20 s3c_gpio_cfgrange_nopull(S3C64XX_GPK(8), 8 + rows, S3C_GPIO_SFN(3)); 20 s3c_gpio_cfgrange_nopull(S3C64XX_GPK(8), rows, S3C_GPIO_SFN(3));
21 21
22 /* Set all the necessary GPL pins to special-function 3: KP_COL[x] */ 22 /* Set all the necessary GPL pins to special-function 3: KP_COL[x] */
23 s3c_gpio_cfgrange_nopull(S3C64XX_GPL(0), cols, S3C_GPIO_SFN(3)); 23 s3c_gpio_cfgrange_nopull(S3C64XX_GPL(0), cols, S3C_GPIO_SFN(3));
diff --git a/arch/arm/mach-s3c64xx/setup-sdhci.c b/arch/arm/mach-s3c64xx/setup-sdhci.c
index 1a942037c4ef..f344a222bc84 100644
--- a/arch/arm/mach-s3c64xx/setup-sdhci.c
+++ b/arch/arm/mach-s3c64xx/setup-sdhci.c
@@ -56,7 +56,7 @@ void s3c6400_setup_sdhci_cfg_card(struct platform_device *dev,
56 else 56 else
57 ctrl3 = (S3C_SDHCI_CTRL3_FCSEL1 | S3C_SDHCI_CTRL3_FCSEL0); 57 ctrl3 = (S3C_SDHCI_CTRL3_FCSEL1 | S3C_SDHCI_CTRL3_FCSEL0);
58 58
59 printk(KERN_INFO "%s: CTRL 2=%08x, 3=%08x\n", __func__, ctrl2, ctrl3); 59 pr_debug("%s: CTRL 2=%08x, 3=%08x\n", __func__, ctrl2, ctrl3);
60 writel(ctrl2, r + S3C_SDHCI_CONTROL2); 60 writel(ctrl2, r + S3C_SDHCI_CONTROL2);
61 writel(ctrl3, r + S3C_SDHCI_CONTROL3); 61 writel(ctrl3, r + S3C_SDHCI_CONTROL3);
62} 62}
diff --git a/arch/arm/mach-s5p6442/include/mach/map.h b/arch/arm/mach-s5p6442/include/mach/map.h
index 203dd5a18bd5..058dab4482a1 100644
--- a/arch/arm/mach-s5p6442/include/mach/map.h
+++ b/arch/arm/mach-s5p6442/include/mach/map.h
@@ -1,6 +1,6 @@
1/* linux/arch/arm/mach-s5p6442/include/mach/map.h 1/* linux/arch/arm/mach-s5p6442/include/mach/map.h
2 * 2 *
3 * Copyright (c) 2010 Samsung Electronics Co., Ltd. 3 * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
4 * http://www.samsung.com/ 4 * http://www.samsung.com/
5 * 5 *
6 * S5P6442 - Memory map definitions 6 * S5P6442 - Memory map definitions
@@ -16,56 +16,61 @@
16#include <plat/map-base.h> 16#include <plat/map-base.h>
17#include <plat/map-s5p.h> 17#include <plat/map-s5p.h>
18 18
19#define S5P6442_PA_CHIPID (0xE0000000) 19#define S5P6442_PA_SDRAM 0x20000000
20#define S5P_PA_CHIPID S5P6442_PA_CHIPID
21 20
22#define S5P6442_PA_SYSCON (0xE0100000) 21#define S5P6442_PA_I2S0 0xC0B00000
23#define S5P_PA_SYSCON S5P6442_PA_SYSCON 22#define S5P6442_PA_I2S1 0xF2200000
24 23
25#define S5P6442_PA_GPIO (0xE0200000) 24#define S5P6442_PA_CHIPID 0xE0000000
26 25
27#define S5P6442_PA_VIC0 (0xE4000000) 26#define S5P6442_PA_SYSCON 0xE0100000
28#define S5P6442_PA_VIC1 (0xE4100000)
29#define S5P6442_PA_VIC2 (0xE4200000)
30 27
31#define S5P6442_PA_SROMC (0xE7000000) 28#define S5P6442_PA_GPIO 0xE0200000
32#define S5P_PA_SROMC S5P6442_PA_SROMC
33 29
34#define S5P6442_PA_MDMA 0xE8000000 30#define S5P6442_PA_VIC0 0xE4000000
35#define S5P6442_PA_PDMA 0xE9000000 31#define S5P6442_PA_VIC1 0xE4100000
32#define S5P6442_PA_VIC2 0xE4200000
36 33
37#define S5P6442_PA_TIMER (0xEA000000) 34#define S5P6442_PA_SROMC 0xE7000000
38#define S5P_PA_TIMER S5P6442_PA_TIMER
39 35
40#define S5P6442_PA_SYSTIMER (0xEA100000) 36#define S5P6442_PA_MDMA 0xE8000000
37#define S5P6442_PA_PDMA 0xE9000000
41 38
42#define S5P6442_PA_WATCHDOG (0xEA200000) 39#define S5P6442_PA_TIMER 0xEA000000
43 40
44#define S5P6442_PA_UART (0xEC000000) 41#define S5P6442_PA_SYSTIMER 0xEA100000
45 42
46#define S5P_PA_UART0 (S5P6442_PA_UART + 0x0) 43#define S5P6442_PA_WATCHDOG 0xEA200000
47#define S5P_PA_UART1 (S5P6442_PA_UART + 0x400)
48#define S5P_PA_UART2 (S5P6442_PA_UART + 0x800)
49#define S5P_SZ_UART SZ_256
50 44
51#define S5P6442_PA_IIC0 (0xEC100000) 45#define S5P6442_PA_UART 0xEC000000
52 46
53#define S5P6442_PA_SDRAM (0x20000000) 47#define S5P6442_PA_IIC0 0xEC100000
54#define S5P_PA_SDRAM S5P6442_PA_SDRAM
55 48
56#define S5P6442_PA_SPI 0xEC300000 49#define S5P6442_PA_SPI 0xEC300000
57 50
58/* I2S */
59#define S5P6442_PA_I2S0 0xC0B00000
60#define S5P6442_PA_I2S1 0xF2200000
61
62/* PCM */
63#define S5P6442_PA_PCM0 0xF2400000 51#define S5P6442_PA_PCM0 0xF2400000
64#define S5P6442_PA_PCM1 0xF2500000 52#define S5P6442_PA_PCM1 0xF2500000
65 53
66/* compatibiltiy defines. */ 54/* Compatibiltiy Defines */
55
56#define S3C_PA_IIC S5P6442_PA_IIC0
67#define S3C_PA_WDT S5P6442_PA_WATCHDOG 57#define S3C_PA_WDT S5P6442_PA_WATCHDOG
58
59#define S5P_PA_CHIPID S5P6442_PA_CHIPID
60#define S5P_PA_SDRAM S5P6442_PA_SDRAM
61#define S5P_PA_SROMC S5P6442_PA_SROMC
62#define S5P_PA_SYSCON S5P6442_PA_SYSCON
63#define S5P_PA_TIMER S5P6442_PA_TIMER
64
65/* UART */
66
68#define S3C_PA_UART S5P6442_PA_UART 67#define S3C_PA_UART S5P6442_PA_UART
69#define S3C_PA_IIC S5P6442_PA_IIC0 68
69#define S5P_PA_UART(x) (S3C_PA_UART + ((x) * S3C_UART_OFFSET))
70#define S5P_PA_UART0 S5P_PA_UART(0)
71#define S5P_PA_UART1 S5P_PA_UART(1)
72#define S5P_PA_UART2 S5P_PA_UART(2)
73
74#define S5P_SZ_UART SZ_256
70 75
71#endif /* __ASM_ARCH_MAP_H */ 76#endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-s5p64x0/include/mach/gpio.h b/arch/arm/mach-s5p64x0/include/mach/gpio.h
index 5486c8f01f1d..adb5f298ead8 100644
--- a/arch/arm/mach-s5p64x0/include/mach/gpio.h
+++ b/arch/arm/mach-s5p64x0/include/mach/gpio.h
@@ -23,7 +23,7 @@
23#define S5P6440_GPIO_A_NR (6) 23#define S5P6440_GPIO_A_NR (6)
24#define S5P6440_GPIO_B_NR (7) 24#define S5P6440_GPIO_B_NR (7)
25#define S5P6440_GPIO_C_NR (8) 25#define S5P6440_GPIO_C_NR (8)
26#define S5P6440_GPIO_F_NR (2) 26#define S5P6440_GPIO_F_NR (16)
27#define S5P6440_GPIO_G_NR (7) 27#define S5P6440_GPIO_G_NR (7)
28#define S5P6440_GPIO_H_NR (10) 28#define S5P6440_GPIO_H_NR (10)
29#define S5P6440_GPIO_I_NR (16) 29#define S5P6440_GPIO_I_NR (16)
@@ -36,7 +36,7 @@
36#define S5P6450_GPIO_B_NR (7) 36#define S5P6450_GPIO_B_NR (7)
37#define S5P6450_GPIO_C_NR (8) 37#define S5P6450_GPIO_C_NR (8)
38#define S5P6450_GPIO_D_NR (8) 38#define S5P6450_GPIO_D_NR (8)
39#define S5P6450_GPIO_F_NR (2) 39#define S5P6450_GPIO_F_NR (16)
40#define S5P6450_GPIO_G_NR (14) 40#define S5P6450_GPIO_G_NR (14)
41#define S5P6450_GPIO_H_NR (10) 41#define S5P6450_GPIO_H_NR (10)
42#define S5P6450_GPIO_I_NR (16) 42#define S5P6450_GPIO_I_NR (16)
diff --git a/arch/arm/mach-s5p64x0/include/mach/map.h b/arch/arm/mach-s5p64x0/include/mach/map.h
index a9365e5ba614..95c91257c7ca 100644
--- a/arch/arm/mach-s5p64x0/include/mach/map.h
+++ b/arch/arm/mach-s5p64x0/include/mach/map.h
@@ -1,6 +1,6 @@
1/* linux/arch/arm/mach-s5p64x0/include/mach/map.h 1/* linux/arch/arm/mach-s5p64x0/include/mach/map.h
2 * 2 *
3 * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd. 3 * Copyright (c) 2009-2011 Samsung Electronics Co., Ltd.
4 * http://www.samsung.com 4 * http://www.samsung.com
5 * 5 *
6 * S5P64X0 - Memory map definitions 6 * S5P64X0 - Memory map definitions
@@ -16,64 +16,46 @@
16#include <plat/map-base.h> 16#include <plat/map-base.h>
17#include <plat/map-s5p.h> 17#include <plat/map-s5p.h>
18 18
19#define S5P64X0_PA_SDRAM (0x20000000) 19#define S5P64X0_PA_SDRAM 0x20000000
20 20
21#define S5P64X0_PA_CHIPID (0xE0000000) 21#define S5P64X0_PA_CHIPID 0xE0000000
22#define S5P_PA_CHIPID S5P64X0_PA_CHIPID
23
24#define S5P64X0_PA_SYSCON (0xE0100000)
25#define S5P_PA_SYSCON S5P64X0_PA_SYSCON
26
27#define S5P64X0_PA_GPIO (0xE0308000)
28
29#define S5P64X0_PA_VIC0 (0xE4000000)
30#define S5P64X0_PA_VIC1 (0xE4100000)
31 22
32#define S5P64X0_PA_SROMC (0xE7000000) 23#define S5P64X0_PA_SYSCON 0xE0100000
33#define S5P_PA_SROMC S5P64X0_PA_SROMC
34
35#define S5P64X0_PA_PDMA (0xE9000000)
36
37#define S5P64X0_PA_TIMER (0xEA000000)
38#define S5P_PA_TIMER S5P64X0_PA_TIMER
39 24
40#define S5P64X0_PA_RTC (0xEA100000) 25#define S5P64X0_PA_GPIO 0xE0308000
41 26
42#define S5P64X0_PA_WDT (0xEA200000) 27#define S5P64X0_PA_VIC0 0xE4000000
28#define S5P64X0_PA_VIC1 0xE4100000
43 29
44#define S5P6440_PA_UART(x) (0xEC000000 + ((x) * S3C_UART_OFFSET)) 30#define S5P64X0_PA_SROMC 0xE7000000
45#define S5P6450_PA_UART(x) ((x < 5) ? (0xEC800000 + ((x) * S3C_UART_OFFSET)) : (0xEC000000))
46 31
47#define S5P_PA_UART0 S5P6450_PA_UART(0) 32#define S5P64X0_PA_PDMA 0xE9000000
48#define S5P_PA_UART1 S5P6450_PA_UART(1)
49#define S5P_PA_UART2 S5P6450_PA_UART(2)
50#define S5P_PA_UART3 S5P6450_PA_UART(3)
51#define S5P_PA_UART4 S5P6450_PA_UART(4)
52#define S5P_PA_UART5 S5P6450_PA_UART(5)
53 33
54#define S5P_SZ_UART SZ_256 34#define S5P64X0_PA_TIMER 0xEA000000
35#define S5P64X0_PA_RTC 0xEA100000
36#define S5P64X0_PA_WDT 0xEA200000
55 37
56#define S5P6440_PA_IIC0 (0xEC104000) 38#define S5P6440_PA_IIC0 0xEC104000
57#define S5P6440_PA_IIC1 (0xEC20F000) 39#define S5P6440_PA_IIC1 0xEC20F000
58#define S5P6450_PA_IIC0 (0xEC100000) 40#define S5P6450_PA_IIC0 0xEC100000
59#define S5P6450_PA_IIC1 (0xEC200000) 41#define S5P6450_PA_IIC1 0xEC200000
60 42
61#define S5P64X0_PA_SPI0 (0xEC400000) 43#define S5P64X0_PA_SPI0 0xEC400000
62#define S5P64X0_PA_SPI1 (0xEC500000) 44#define S5P64X0_PA_SPI1 0xEC500000
63 45
64#define S5P64X0_PA_HSOTG (0xED100000) 46#define S5P64X0_PA_HSOTG 0xED100000
65 47
66#define S5P64X0_PA_HSMMC(x) (0xED800000 + ((x) * 0x100000)) 48#define S5P64X0_PA_HSMMC(x) (0xED800000 + ((x) * 0x100000))
67 49
68#define S5P64X0_PA_I2S (0xF2000000) 50#define S5P64X0_PA_I2S 0xF2000000
69#define S5P6450_PA_I2S1 0xF2800000 51#define S5P6450_PA_I2S1 0xF2800000
70#define S5P6450_PA_I2S2 0xF2900000 52#define S5P6450_PA_I2S2 0xF2900000
71 53
72#define S5P64X0_PA_PCM (0xF2100000) 54#define S5P64X0_PA_PCM 0xF2100000
73 55
74#define S5P64X0_PA_ADC (0xF3000000) 56#define S5P64X0_PA_ADC 0xF3000000
75 57
76/* compatibiltiy defines. */ 58/* Compatibiltiy Defines */
77 59
78#define S3C_PA_HSMMC0 S5P64X0_PA_HSMMC(0) 60#define S3C_PA_HSMMC0 S5P64X0_PA_HSMMC(0)
79#define S3C_PA_HSMMC1 S5P64X0_PA_HSMMC(1) 61#define S3C_PA_HSMMC1 S5P64X0_PA_HSMMC(1)
@@ -83,6 +65,25 @@
83#define S3C_PA_RTC S5P64X0_PA_RTC 65#define S3C_PA_RTC S5P64X0_PA_RTC
84#define S3C_PA_WDT S5P64X0_PA_WDT 66#define S3C_PA_WDT S5P64X0_PA_WDT
85 67
68#define S5P_PA_CHIPID S5P64X0_PA_CHIPID
69#define S5P_PA_SROMC S5P64X0_PA_SROMC
70#define S5P_PA_SYSCON S5P64X0_PA_SYSCON
71#define S5P_PA_TIMER S5P64X0_PA_TIMER
72
86#define SAMSUNG_PA_ADC S5P64X0_PA_ADC 73#define SAMSUNG_PA_ADC S5P64X0_PA_ADC
87 74
75/* UART */
76
77#define S5P6440_PA_UART(x) (0xEC000000 + ((x) * S3C_UART_OFFSET))
78#define S5P6450_PA_UART(x) ((x < 5) ? (0xEC800000 + ((x) * S3C_UART_OFFSET)) : (0xEC000000))
79
80#define S5P_PA_UART0 S5P6450_PA_UART(0)
81#define S5P_PA_UART1 S5P6450_PA_UART(1)
82#define S5P_PA_UART2 S5P6450_PA_UART(2)
83#define S5P_PA_UART3 S5P6450_PA_UART(3)
84#define S5P_PA_UART4 S5P6450_PA_UART(4)
85#define S5P_PA_UART5 S5P6450_PA_UART(5)
86
87#define S5P_SZ_UART SZ_256
88
88#endif /* __ASM_ARCH_MAP_H */ 89#endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-s5pc100/include/mach/map.h b/arch/arm/mach-s5pc100/include/mach/map.h
index 328467b346aa..ccbe6b767f7d 100644
--- a/arch/arm/mach-s5pc100/include/mach/map.h
+++ b/arch/arm/mach-s5pc100/include/mach/map.h
@@ -1,5 +1,8 @@
1/* linux/arch/arm/mach-s5pc100/include/mach/map.h 1/* linux/arch/arm/mach-s5pc100/include/mach/map.h
2 * 2 *
3 * Copyright (c) 2011 Samsung Electronics Co., Ltd.
4 * http://www.samsung.com/
5 *
3 * Copyright 2009 Samsung Electronics Co. 6 * Copyright 2009 Samsung Electronics Co.
4 * Byungho Min <bhmin@samsung.com> 7 * Byungho Min <bhmin@samsung.com>
5 * 8 *
@@ -16,145 +19,115 @@
16#include <plat/map-base.h> 19#include <plat/map-base.h>
17#include <plat/map-s5p.h> 20#include <plat/map-s5p.h>
18 21
19/* 22#define S5PC100_PA_SDRAM 0x20000000
20 * map-base.h has already defined virtual memory address 23
21 * S3C_VA_IRQ S3C_ADDR(0x00000000) irq controller(s) 24#define S5PC100_PA_ONENAND 0xE7100000
22 * S3C_VA_SYS S3C_ADDR(0x00100000) system control 25#define S5PC100_PA_ONENAND_BUF 0xB0000000
23 * S3C_VA_MEM S3C_ADDR(0x00200000) system control (not used) 26
24 * S3C_VA_TIMER S3C_ADDR(0x00300000) timer block 27#define S5PC100_PA_CHIPID 0xE0000000
25 * S3C_VA_WATCHDOG S3C_ADDR(0x00400000) watchdog
26 * S3C_VA_UART S3C_ADDR(0x01000000) UART
27 *
28 * S5PC100 specific virtual memory address can be defined here
29 * S5PC1XX_VA_GPIO S3C_ADDR(0x00500000) GPIO
30 *
31 */
32 28
33#define S5PC100_PA_ONENAND_BUF (0xB0000000) 29#define S5PC100_PA_SYSCON 0xE0100000
34#define S5PC100_SZ_ONENAND_BUF (SZ_256M - SZ_32M)
35 30
36/* Chip ID */ 31#define S5PC100_PA_OTHERS 0xE0200000
37 32
38#define S5PC100_PA_CHIPID (0xE0000000) 33#define S5PC100_PA_GPIO 0xE0300000
39#define S5P_PA_CHIPID S5PC100_PA_CHIPID
40 34
41#define S5PC100_PA_SYSCON (0xE0100000) 35#define S5PC100_PA_VIC0 0xE4000000
42#define S5P_PA_SYSCON S5PC100_PA_SYSCON 36#define S5PC100_PA_VIC1 0xE4100000
37#define S5PC100_PA_VIC2 0xE4200000
43 38
44#define S5PC100_PA_OTHERS (0xE0200000) 39#define S5PC100_PA_SROMC 0xE7000000
45#define S5PC100_VA_OTHERS (S3C_VA_SYS + 0x10000)
46 40
47#define S5PC100_PA_GPIO (0xE0300000) 41#define S5PC100_PA_CFCON 0xE7800000
48#define S5PC1XX_VA_GPIO S3C_ADDR(0x00500000)
49 42
50/* Interrupt */ 43#define S5PC100_PA_MDMA 0xE8100000
51#define S5PC100_PA_VIC0 (0xE4000000) 44#define S5PC100_PA_PDMA0 0xE9000000
52#define S5PC100_PA_VIC1 (0xE4100000) 45#define S5PC100_PA_PDMA1 0xE9200000
53#define S5PC100_PA_VIC2 (0xE4200000)
54#define S5PC100_VA_VIC S3C_VA_IRQ
55#define S5PC100_VA_VIC_OFFSET 0x10000
56#define S5PC1XX_VA_VIC(x) (S5PC100_VA_VIC + ((x) * S5PC100_VA_VIC_OFFSET))
57 46
58#define S5PC100_PA_SROMC (0xE7000000) 47#define S5PC100_PA_TIMER 0xEA000000
59#define S5P_PA_SROMC S5PC100_PA_SROMC 48#define S5PC100_PA_SYSTIMER 0xEA100000
49#define S5PC100_PA_WATCHDOG 0xEA200000
50#define S5PC100_PA_RTC 0xEA300000
60 51
61#define S5PC100_PA_ONENAND (0xE7100000) 52#define S5PC100_PA_UART 0xEC000000
62 53
63#define S5PC100_PA_CFCON (0xE7800000) 54#define S5PC100_PA_IIC0 0xEC100000
55#define S5PC100_PA_IIC1 0xEC200000
64 56
65/* DMA */ 57#define S5PC100_PA_SPI0 0xEC300000
66#define S5PC100_PA_MDMA (0xE8100000) 58#define S5PC100_PA_SPI1 0xEC400000
67#define S5PC100_PA_PDMA0 (0xE9000000) 59#define S5PC100_PA_SPI2 0xEC500000
68#define S5PC100_PA_PDMA1 (0xE9200000)
69 60
70/* Timer */ 61#define S5PC100_PA_USB_HSOTG 0xED200000
71#define S5PC100_PA_TIMER (0xEA000000) 62#define S5PC100_PA_USB_HSPHY 0xED300000
72#define S5P_PA_TIMER S5PC100_PA_TIMER
73 63
74#define S5PC100_PA_SYSTIMER (0xEA100000) 64#define S5PC100_PA_HSMMC(x) (0xED800000 + ((x) * 0x100000))
75 65
76#define S5PC100_PA_WATCHDOG (0xEA200000) 66#define S5PC100_PA_FB 0xEE000000
77#define S5PC100_PA_RTC (0xEA300000)
78 67
79#define S5PC100_PA_UART (0xEC000000) 68#define S5PC100_PA_FIMC0 0xEE200000
69#define S5PC100_PA_FIMC1 0xEE300000
70#define S5PC100_PA_FIMC2 0xEE400000
80 71
81#define S5P_PA_UART0 (S5PC100_PA_UART + 0x0) 72#define S5PC100_PA_I2S0 0xF2000000
82#define S5P_PA_UART1 (S5PC100_PA_UART + 0x400) 73#define S5PC100_PA_I2S1 0xF2100000
83#define S5P_PA_UART2 (S5PC100_PA_UART + 0x800) 74#define S5PC100_PA_I2S2 0xF2200000
84#define S5P_PA_UART3 (S5PC100_PA_UART + 0xC00)
85#define S5P_SZ_UART SZ_256
86 75
87#define S5PC100_PA_IIC0 (0xEC100000) 76#define S5PC100_PA_AC97 0xF2300000
88#define S5PC100_PA_IIC1 (0xEC200000)
89 77
90/* SPI */ 78#define S5PC100_PA_PCM0 0xF2400000
91#define S5PC100_PA_SPI0 0xEC300000 79#define S5PC100_PA_PCM1 0xF2500000
92#define S5PC100_PA_SPI1 0xEC400000
93#define S5PC100_PA_SPI2 0xEC500000
94 80
95/* USB HS OTG */ 81#define S5PC100_PA_SPDIF 0xF2600000
96#define S5PC100_PA_USB_HSOTG (0xED200000)
97#define S5PC100_PA_USB_HSPHY (0xED300000)
98 82
99#define S5PC100_PA_FB (0xEE000000) 83#define S5PC100_PA_TSADC 0xF3000000
100 84
101#define S5PC100_PA_FIMC0 (0xEE200000) 85#define S5PC100_PA_KEYPAD 0xF3100000
102#define S5PC100_PA_FIMC1 (0xEE300000)
103#define S5PC100_PA_FIMC2 (0xEE400000)
104 86
105#define S5PC100_PA_I2S0 (0xF2000000) 87/* Compatibiltiy Defines */
106#define S5PC100_PA_I2S1 (0xF2100000)
107#define S5PC100_PA_I2S2 (0xF2200000)
108 88
109#define S5PC100_PA_AC97 0xF2300000 89#define S3C_PA_FB S5PC100_PA_FB
90#define S3C_PA_HSMMC0 S5PC100_PA_HSMMC(0)
91#define S3C_PA_HSMMC1 S5PC100_PA_HSMMC(1)
92#define S3C_PA_HSMMC2 S5PC100_PA_HSMMC(2)
93#define S3C_PA_IIC S5PC100_PA_IIC0
94#define S3C_PA_IIC1 S5PC100_PA_IIC1
95#define S3C_PA_KEYPAD S5PC100_PA_KEYPAD
96#define S3C_PA_ONENAND S5PC100_PA_ONENAND
97#define S3C_PA_ONENAND_BUF S5PC100_PA_ONENAND_BUF
98#define S3C_PA_RTC S5PC100_PA_RTC
99#define S3C_PA_TSADC S5PC100_PA_TSADC
100#define S3C_PA_USB_HSOTG S5PC100_PA_USB_HSOTG
101#define S3C_PA_USB_HSPHY S5PC100_PA_USB_HSPHY
102#define S3C_PA_WDT S5PC100_PA_WATCHDOG
110 103
111/* PCM */ 104#define S5P_PA_CHIPID S5PC100_PA_CHIPID
112#define S5PC100_PA_PCM0 0xF2400000 105#define S5P_PA_FIMC0 S5PC100_PA_FIMC0
113#define S5PC100_PA_PCM1 0xF2500000 106#define S5P_PA_FIMC1 S5PC100_PA_FIMC1
107#define S5P_PA_FIMC2 S5PC100_PA_FIMC2
108#define S5P_PA_SDRAM S5PC100_PA_SDRAM
109#define S5P_PA_SROMC S5PC100_PA_SROMC
110#define S5P_PA_SYSCON S5PC100_PA_SYSCON
111#define S5P_PA_TIMER S5PC100_PA_TIMER
114 112
115#define S5PC100_PA_SPDIF 0xF2600000 113#define SAMSUNG_PA_ADC S5PC100_PA_TSADC
114#define SAMSUNG_PA_CFCON S5PC100_PA_CFCON
115#define SAMSUNG_PA_KEYPAD S5PC100_PA_KEYPAD
116 116
117#define S5PC100_PA_TSADC (0xF3000000) 117#define S5PC100_VA_OTHERS (S3C_VA_SYS + 0x10000)
118 118
119/* KEYPAD */ 119#define S3C_SZ_ONENAND_BUF (SZ_256M - SZ_32M)
120#define S5PC100_PA_KEYPAD (0xF3100000)
121 120
122#define S5PC100_PA_HSMMC(x) (0xED800000 + ((x) * 0x100000)) 121/* UART */
123 122
124#define S5PC100_PA_SDRAM (0x20000000) 123#define S3C_PA_UART S5PC100_PA_UART
125#define S5P_PA_SDRAM S5PC100_PA_SDRAM
126 124
127/* compatibiltiy defines. */ 125#define S5P_PA_UART(x) (S3C_PA_UART + ((x) * S3C_UART_OFFSET))
128#define S3C_PA_UART S5PC100_PA_UART 126#define S5P_PA_UART0 S5P_PA_UART(0)
129#define S3C_PA_IIC S5PC100_PA_IIC0 127#define S5P_PA_UART1 S5P_PA_UART(1)
130#define S3C_PA_IIC1 S5PC100_PA_IIC1 128#define S5P_PA_UART2 S5P_PA_UART(2)
131#define S3C_PA_FB S5PC100_PA_FB 129#define S5P_PA_UART3 S5P_PA_UART(3)
132#define S3C_PA_G2D S5PC100_PA_G2D
133#define S3C_PA_G3D S5PC100_PA_G3D
134#define S3C_PA_JPEG S5PC100_PA_JPEG
135#define S3C_PA_ROTATOR S5PC100_PA_ROTATOR
136#define S5P_VA_VIC0 S5PC1XX_VA_VIC(0)
137#define S5P_VA_VIC1 S5PC1XX_VA_VIC(1)
138#define S5P_VA_VIC2 S5PC1XX_VA_VIC(2)
139#define S3C_PA_USB_HSOTG S5PC100_PA_USB_HSOTG
140#define S3C_PA_USB_HSPHY S5PC100_PA_USB_HSPHY
141#define S3C_PA_HSMMC0 S5PC100_PA_HSMMC(0)
142#define S3C_PA_HSMMC1 S5PC100_PA_HSMMC(1)
143#define S3C_PA_HSMMC2 S5PC100_PA_HSMMC(2)
144#define S3C_PA_KEYPAD S5PC100_PA_KEYPAD
145#define S3C_PA_WDT S5PC100_PA_WATCHDOG
146#define S3C_PA_TSADC S5PC100_PA_TSADC
147#define S3C_PA_ONENAND S5PC100_PA_ONENAND
148#define S3C_PA_ONENAND_BUF S5PC100_PA_ONENAND_BUF
149#define S3C_SZ_ONENAND_BUF S5PC100_SZ_ONENAND_BUF
150#define S3C_PA_RTC S5PC100_PA_RTC
151
152#define SAMSUNG_PA_ADC S5PC100_PA_TSADC
153#define SAMSUNG_PA_CFCON S5PC100_PA_CFCON
154#define SAMSUNG_PA_KEYPAD S5PC100_PA_KEYPAD
155 130
156#define S5P_PA_FIMC0 S5PC100_PA_FIMC0 131#define S5P_SZ_UART SZ_256
157#define S5P_PA_FIMC1 S5PC100_PA_FIMC1
158#define S5P_PA_FIMC2 S5PC100_PA_FIMC2
159 132
160#endif /* __ASM_ARCH_C100_MAP_H */ 133#endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-s5pv210/include/mach/map.h b/arch/arm/mach-s5pv210/include/mach/map.h
index 3611492ad681..1dd58836fd4f 100644
--- a/arch/arm/mach-s5pv210/include/mach/map.h
+++ b/arch/arm/mach-s5pv210/include/mach/map.h
@@ -1,6 +1,6 @@
1/* linux/arch/arm/mach-s5pv210/include/mach/map.h 1/* linux/arch/arm/mach-s5pv210/include/mach/map.h
2 * 2 *
3 * Copyright (c) 2010 Samsung Electronics Co., Ltd. 3 * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
4 * http://www.samsung.com/ 4 * http://www.samsung.com/
5 * 5 *
6 * S5PV210 - Memory map definitions 6 * S5PV210 - Memory map definitions
@@ -16,122 +16,120 @@
16#include <plat/map-base.h> 16#include <plat/map-base.h>
17#include <plat/map-s5p.h> 17#include <plat/map-s5p.h>
18 18
19#define S5PV210_PA_SROM_BANK5 (0xA8000000) 19#define S5PV210_PA_SDRAM 0x20000000
20 20
21#define S5PC110_PA_ONENAND (0xB0000000) 21#define S5PV210_PA_SROM_BANK5 0xA8000000
22#define S5P_PA_ONENAND S5PC110_PA_ONENAND
23 22
24#define S5PC110_PA_ONENAND_DMA (0xB0600000) 23#define S5PC110_PA_ONENAND 0xB0000000
25#define S5P_PA_ONENAND_DMA S5PC110_PA_ONENAND_DMA 24#define S5PC110_PA_ONENAND_DMA 0xB0600000
26 25
27#define S5PV210_PA_CHIPID (0xE0000000) 26#define S5PV210_PA_CHIPID 0xE0000000
28#define S5P_PA_CHIPID S5PV210_PA_CHIPID
29 27
30#define S5PV210_PA_SYSCON (0xE0100000) 28#define S5PV210_PA_SYSCON 0xE0100000
31#define S5P_PA_SYSCON S5PV210_PA_SYSCON
32 29
33#define S5PV210_PA_GPIO (0xE0200000) 30#define S5PV210_PA_GPIO 0xE0200000
34 31
35/* SPI */ 32#define S5PV210_PA_SPDIF 0xE1100000
36#define S5PV210_PA_SPI0 0xE1300000
37#define S5PV210_PA_SPI1 0xE1400000
38 33
39#define S5PV210_PA_KEYPAD (0xE1600000) 34#define S5PV210_PA_SPI0 0xE1300000
35#define S5PV210_PA_SPI1 0xE1400000
40 36
41#define S5PV210_PA_IIC0 (0xE1800000) 37#define S5PV210_PA_KEYPAD 0xE1600000
42#define S5PV210_PA_IIC1 (0xFAB00000)
43#define S5PV210_PA_IIC2 (0xE1A00000)
44 38
45#define S5PV210_PA_TIMER (0xE2500000) 39#define S5PV210_PA_ADC 0xE1700000
46#define S5P_PA_TIMER S5PV210_PA_TIMER
47 40
48#define S5PV210_PA_SYSTIMER (0xE2600000) 41#define S5PV210_PA_IIC0 0xE1800000
42#define S5PV210_PA_IIC1 0xFAB00000
43#define S5PV210_PA_IIC2 0xE1A00000
49 44
50#define S5PV210_PA_WATCHDOG (0xE2700000) 45#define S5PV210_PA_AC97 0xE2200000
51 46
52#define S5PV210_PA_RTC (0xE2800000) 47#define S5PV210_PA_PCM0 0xE2300000
53#define S5PV210_PA_UART (0xE2900000) 48#define S5PV210_PA_PCM1 0xE1200000
49#define S5PV210_PA_PCM2 0xE2B00000
54 50
55#define S5P_PA_UART0 (S5PV210_PA_UART + 0x0) 51#define S5PV210_PA_TIMER 0xE2500000
56#define S5P_PA_UART1 (S5PV210_PA_UART + 0x400) 52#define S5PV210_PA_SYSTIMER 0xE2600000
57#define S5P_PA_UART2 (S5PV210_PA_UART + 0x800) 53#define S5PV210_PA_WATCHDOG 0xE2700000
58#define S5P_PA_UART3 (S5PV210_PA_UART + 0xC00) 54#define S5PV210_PA_RTC 0xE2800000
59 55
60#define S5P_SZ_UART SZ_256 56#define S5PV210_PA_UART 0xE2900000
61 57
62#define S3C_VA_UARTx(x) (S3C_VA_UART + ((x) * S3C_UART_OFFSET)) 58#define S5PV210_PA_SROMC 0xE8000000
63 59
64#define S5PV210_PA_SROMC (0xE8000000) 60#define S5PV210_PA_CFCON 0xE8200000
65#define S5P_PA_SROMC S5PV210_PA_SROMC
66 61
67#define S5PV210_PA_CFCON (0xE8200000) 62#define S5PV210_PA_HSMMC(x) (0xEB000000 + ((x) * 0x100000))
68 63
69#define S5PV210_PA_MDMA 0xFA200000 64#define S5PV210_PA_HSOTG 0xEC000000
70#define S5PV210_PA_PDMA0 0xE0900000 65#define S5PV210_PA_HSPHY 0xEC100000
71#define S5PV210_PA_PDMA1 0xE0A00000
72 66
73#define S5PV210_PA_FB (0xF8000000) 67#define S5PV210_PA_IIS0 0xEEE30000
68#define S5PV210_PA_IIS1 0xE2100000
69#define S5PV210_PA_IIS2 0xE2A00000
74 70
75#define S5PV210_PA_FIMC0 (0xFB200000) 71#define S5PV210_PA_DMC0 0xF0000000
76#define S5PV210_PA_FIMC1 (0xFB300000) 72#define S5PV210_PA_DMC1 0xF1400000
77#define S5PV210_PA_FIMC2 (0xFB400000)
78 73
79#define S5PV210_PA_HSMMC(x) (0xEB000000 + ((x) * 0x100000)) 74#define S5PV210_PA_VIC0 0xF2000000
75#define S5PV210_PA_VIC1 0xF2100000
76#define S5PV210_PA_VIC2 0xF2200000
77#define S5PV210_PA_VIC3 0xF2300000
80 78
81#define S5PV210_PA_HSOTG (0xEC000000) 79#define S5PV210_PA_FB 0xF8000000
82#define S5PV210_PA_HSPHY (0xEC100000)
83 80
84#define S5PV210_PA_VIC0 (0xF2000000) 81#define S5PV210_PA_MDMA 0xFA200000
85#define S5PV210_PA_VIC1 (0xF2100000) 82#define S5PV210_PA_PDMA0 0xE0900000
86#define S5PV210_PA_VIC2 (0xF2200000) 83#define S5PV210_PA_PDMA1 0xE0A00000
87#define S5PV210_PA_VIC3 (0xF2300000)
88 84
89#define S5PV210_PA_SDRAM (0x20000000) 85#define S5PV210_PA_MIPI_CSIS 0xFA600000
90#define S5P_PA_SDRAM S5PV210_PA_SDRAM
91 86
92/* S/PDIF */ 87#define S5PV210_PA_FIMC0 0xFB200000
93#define S5PV210_PA_SPDIF 0xE1100000 88#define S5PV210_PA_FIMC1 0xFB300000
89#define S5PV210_PA_FIMC2 0xFB400000
94 90
95/* I2S */ 91/* Compatibiltiy Defines */
96#define S5PV210_PA_IIS0 0xEEE30000
97#define S5PV210_PA_IIS1 0xE2100000
98#define S5PV210_PA_IIS2 0xE2A00000
99 92
100/* PCM */ 93#define S3C_PA_FB S5PV210_PA_FB
101#define S5PV210_PA_PCM0 0xE2300000 94#define S3C_PA_HSMMC0 S5PV210_PA_HSMMC(0)
102#define S5PV210_PA_PCM1 0xE1200000 95#define S3C_PA_HSMMC1 S5PV210_PA_HSMMC(1)
103#define S5PV210_PA_PCM2 0xE2B00000 96#define S3C_PA_HSMMC2 S5PV210_PA_HSMMC(2)
97#define S3C_PA_HSMMC3 S5PV210_PA_HSMMC(3)
98#define S3C_PA_IIC S5PV210_PA_IIC0
99#define S3C_PA_IIC1 S5PV210_PA_IIC1
100#define S3C_PA_IIC2 S5PV210_PA_IIC2
101#define S3C_PA_RTC S5PV210_PA_RTC
102#define S3C_PA_USB_HSOTG S5PV210_PA_HSOTG
103#define S3C_PA_WDT S5PV210_PA_WATCHDOG
104 104
105/* AC97 */ 105#define S5P_PA_CHIPID S5PV210_PA_CHIPID
106#define S5PV210_PA_AC97 0xE2200000 106#define S5P_PA_FIMC0 S5PV210_PA_FIMC0
107#define S5P_PA_FIMC1 S5PV210_PA_FIMC1
108#define S5P_PA_FIMC2 S5PV210_PA_FIMC2
109#define S5P_PA_MIPI_CSIS0 S5PV210_PA_MIPI_CSIS
110#define S5P_PA_ONENAND S5PC110_PA_ONENAND
111#define S5P_PA_ONENAND_DMA S5PC110_PA_ONENAND_DMA
112#define S5P_PA_SDRAM S5PV210_PA_SDRAM
113#define S5P_PA_SROMC S5PV210_PA_SROMC
114#define S5P_PA_SYSCON S5PV210_PA_SYSCON
115#define S5P_PA_TIMER S5PV210_PA_TIMER
107 116
108#define S5PV210_PA_ADC (0xE1700000) 117#define SAMSUNG_PA_ADC S5PV210_PA_ADC
118#define SAMSUNG_PA_CFCON S5PV210_PA_CFCON
119#define SAMSUNG_PA_KEYPAD S5PV210_PA_KEYPAD
109 120
110#define S5PV210_PA_DMC0 (0xF0000000) 121/* UART */
111#define S5PV210_PA_DMC1 (0xF1400000)
112 122
113#define S5PV210_PA_MIPI_CSIS 0xFA600000 123#define S3C_VA_UARTx(x) (S3C_VA_UART + ((x) * S3C_UART_OFFSET))
114 124
115/* compatibiltiy defines. */ 125#define S3C_PA_UART S5PV210_PA_UART
116#define S3C_PA_UART S5PV210_PA_UART
117#define S3C_PA_HSMMC0 S5PV210_PA_HSMMC(0)
118#define S3C_PA_HSMMC1 S5PV210_PA_HSMMC(1)
119#define S3C_PA_HSMMC2 S5PV210_PA_HSMMC(2)
120#define S3C_PA_HSMMC3 S5PV210_PA_HSMMC(3)
121#define S3C_PA_IIC S5PV210_PA_IIC0
122#define S3C_PA_IIC1 S5PV210_PA_IIC1
123#define S3C_PA_IIC2 S5PV210_PA_IIC2
124#define S3C_PA_FB S5PV210_PA_FB
125#define S3C_PA_RTC S5PV210_PA_RTC
126#define S3C_PA_WDT S5PV210_PA_WATCHDOG
127#define S3C_PA_USB_HSOTG S5PV210_PA_HSOTG
128#define S5P_PA_FIMC0 S5PV210_PA_FIMC0
129#define S5P_PA_FIMC1 S5PV210_PA_FIMC1
130#define S5P_PA_FIMC2 S5PV210_PA_FIMC2
131#define S5P_PA_MIPI_CSIS0 S5PV210_PA_MIPI_CSIS
132 126
133#define SAMSUNG_PA_ADC S5PV210_PA_ADC 127#define S5P_PA_UART(x) (S3C_PA_UART + ((x) * S3C_UART_OFFSET))
134#define SAMSUNG_PA_CFCON S5PV210_PA_CFCON 128#define S5P_PA_UART0 S5P_PA_UART(0)
135#define SAMSUNG_PA_KEYPAD S5PV210_PA_KEYPAD 129#define S5P_PA_UART1 S5P_PA_UART(1)
130#define S5P_PA_UART2 S5P_PA_UART(2)
131#define S5P_PA_UART3 S5P_PA_UART(3)
132
133#define S5P_SZ_UART SZ_256
136 134
137#endif /* __ASM_ARCH_MAP_H */ 135#endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-s5pv210/mach-aquila.c b/arch/arm/mach-s5pv210/mach-aquila.c
index 461aa035afc0..557add4fc56c 100644
--- a/arch/arm/mach-s5pv210/mach-aquila.c
+++ b/arch/arm/mach-s5pv210/mach-aquila.c
@@ -149,7 +149,7 @@ static struct regulator_init_data aquila_ldo2_data = {
149 149
150static struct regulator_init_data aquila_ldo3_data = { 150static struct regulator_init_data aquila_ldo3_data = {
151 .constraints = { 151 .constraints = {
152 .name = "VUSB/MIPI_1.1V", 152 .name = "VUSB+MIPI_1.1V",
153 .min_uV = 1100000, 153 .min_uV = 1100000,
154 .max_uV = 1100000, 154 .max_uV = 1100000,
155 .apply_uV = 1, 155 .apply_uV = 1,
@@ -197,7 +197,7 @@ static struct regulator_init_data aquila_ldo7_data = {
197 197
198static struct regulator_init_data aquila_ldo8_data = { 198static struct regulator_init_data aquila_ldo8_data = {
199 .constraints = { 199 .constraints = {
200 .name = "VUSB/VADC_3.3V", 200 .name = "VUSB+VADC_3.3V",
201 .min_uV = 3300000, 201 .min_uV = 3300000,
202 .max_uV = 3300000, 202 .max_uV = 3300000,
203 .apply_uV = 1, 203 .apply_uV = 1,
@@ -207,7 +207,7 @@ static struct regulator_init_data aquila_ldo8_data = {
207 207
208static struct regulator_init_data aquila_ldo9_data = { 208static struct regulator_init_data aquila_ldo9_data = {
209 .constraints = { 209 .constraints = {
210 .name = "VCC/VCAM_2.8V", 210 .name = "VCC+VCAM_2.8V",
211 .min_uV = 2800000, 211 .min_uV = 2800000,
212 .max_uV = 2800000, 212 .max_uV = 2800000,
213 .apply_uV = 1, 213 .apply_uV = 1,
@@ -381,9 +381,12 @@ static struct max8998_platform_data aquila_max8998_pdata = {
381 .buck1_set1 = S5PV210_GPH0(3), 381 .buck1_set1 = S5PV210_GPH0(3),
382 .buck1_set2 = S5PV210_GPH0(4), 382 .buck1_set2 = S5PV210_GPH0(4),
383 .buck2_set3 = S5PV210_GPH0(5), 383 .buck2_set3 = S5PV210_GPH0(5),
384 .buck1_max_voltage1 = 1200000, 384 .buck1_voltage1 = 1200000,
385 .buck1_max_voltage2 = 1200000, 385 .buck1_voltage2 = 1200000,
386 .buck2_max_voltage = 1200000, 386 .buck1_voltage3 = 1200000,
387 .buck1_voltage4 = 1200000,
388 .buck2_voltage1 = 1200000,
389 .buck2_voltage2 = 1200000,
387}; 390};
388#endif 391#endif
389 392
diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index e22d5112fd44..056f5c769b0a 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c
@@ -288,7 +288,7 @@ static struct regulator_init_data goni_ldo2_data = {
288 288
289static struct regulator_init_data goni_ldo3_data = { 289static struct regulator_init_data goni_ldo3_data = {
290 .constraints = { 290 .constraints = {
291 .name = "VUSB/MIPI_1.1V", 291 .name = "VUSB+MIPI_1.1V",
292 .min_uV = 1100000, 292 .min_uV = 1100000,
293 .max_uV = 1100000, 293 .max_uV = 1100000,
294 .apply_uV = 1, 294 .apply_uV = 1,
@@ -337,7 +337,7 @@ static struct regulator_init_data goni_ldo7_data = {
337 337
338static struct regulator_init_data goni_ldo8_data = { 338static struct regulator_init_data goni_ldo8_data = {
339 .constraints = { 339 .constraints = {
340 .name = "VUSB/VADC_3.3V", 340 .name = "VUSB+VADC_3.3V",
341 .min_uV = 3300000, 341 .min_uV = 3300000,
342 .max_uV = 3300000, 342 .max_uV = 3300000,
343 .apply_uV = 1, 343 .apply_uV = 1,
@@ -347,7 +347,7 @@ static struct regulator_init_data goni_ldo8_data = {
347 347
348static struct regulator_init_data goni_ldo9_data = { 348static struct regulator_init_data goni_ldo9_data = {
349 .constraints = { 349 .constraints = {
350 .name = "VCC/VCAM_2.8V", 350 .name = "VCC+VCAM_2.8V",
351 .min_uV = 2800000, 351 .min_uV = 2800000,
352 .max_uV = 2800000, 352 .max_uV = 2800000,
353 .apply_uV = 1, 353 .apply_uV = 1,
@@ -521,9 +521,12 @@ static struct max8998_platform_data goni_max8998_pdata = {
521 .buck1_set1 = S5PV210_GPH0(3), 521 .buck1_set1 = S5PV210_GPH0(3),
522 .buck1_set2 = S5PV210_GPH0(4), 522 .buck1_set2 = S5PV210_GPH0(4),
523 .buck2_set3 = S5PV210_GPH0(5), 523 .buck2_set3 = S5PV210_GPH0(5),
524 .buck1_max_voltage1 = 1200000, 524 .buck1_voltage1 = 1200000,
525 .buck1_max_voltage2 = 1200000, 525 .buck1_voltage2 = 1200000,
526 .buck2_max_voltage = 1200000, 526 .buck1_voltage3 = 1200000,
527 .buck1_voltage4 = 1200000,
528 .buck2_voltage1 = 1200000,
529 .buck2_voltage2 = 1200000,
527}; 530};
528#endif 531#endif
529 532
diff --git a/arch/arm/mach-s5pv310/include/mach/map.h b/arch/arm/mach-s5pv310/include/mach/map.h
index 3060f78e12ab..901657fa7a12 100644
--- a/arch/arm/mach-s5pv310/include/mach/map.h
+++ b/arch/arm/mach-s5pv310/include/mach/map.h
@@ -1,6 +1,6 @@
1/* linux/arch/arm/mach-s5pv310/include/mach/map.h 1/* linux/arch/arm/mach-s5pv310/include/mach/map.h
2 * 2 *
3 * Copyright (c) 2010 Samsung Electronics Co., Ltd. 3 * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
4 * http://www.samsung.com/ 4 * http://www.samsung.com/
5 * 5 *
6 * S5PV310 - Memory map definitions 6 * S5PV310 - Memory map definitions
@@ -23,90 +23,43 @@
23 23
24#include <plat/map-s5p.h> 24#include <plat/map-s5p.h>
25 25
26#define S5PV310_PA_SYSRAM (0x02025000) 26#define S5PV310_PA_SYSRAM 0x02025000
27 27
28#define S5PV310_PA_SROM_BANK(x) (0x04000000 + ((x) * 0x01000000)) 28#define S5PV310_PA_I2S0 0x03830000
29 29#define S5PV310_PA_I2S1 0xE3100000
30#define S5PC210_PA_ONENAND (0x0C000000) 30#define S5PV310_PA_I2S2 0xE2A00000
31#define S5P_PA_ONENAND S5PC210_PA_ONENAND
32
33#define S5PC210_PA_ONENAND_DMA (0x0C600000)
34#define S5P_PA_ONENAND_DMA S5PC210_PA_ONENAND_DMA
35
36#define S5PV310_PA_CHIPID (0x10000000)
37#define S5P_PA_CHIPID S5PV310_PA_CHIPID
38
39#define S5PV310_PA_SYSCON (0x10010000)
40#define S5P_PA_SYSCON S5PV310_PA_SYSCON
41 31
42#define S5PV310_PA_PMU (0x10020000) 32#define S5PV310_PA_PCM0 0x03840000
33#define S5PV310_PA_PCM1 0x13980000
34#define S5PV310_PA_PCM2 0x13990000
43 35
44#define S5PV310_PA_CMU (0x10030000) 36#define S5PV310_PA_SROM_BANK(x) (0x04000000 + ((x) * 0x01000000))
45
46#define S5PV310_PA_WATCHDOG (0x10060000)
47#define S5PV310_PA_RTC (0x10070000)
48
49#define S5PV310_PA_DMC0 (0x10400000)
50
51#define S5PV310_PA_COMBINER (0x10448000)
52
53#define S5PV310_PA_COREPERI (0x10500000)
54#define S5PV310_PA_GIC_CPU (0x10500100)
55#define S5PV310_PA_TWD (0x10500600)
56#define S5PV310_PA_GIC_DIST (0x10501000)
57#define S5PV310_PA_L2CC (0x10502000)
58
59/* DMA */
60#define S5PV310_PA_MDMA 0x10810000
61#define S5PV310_PA_PDMA0 0x12680000
62#define S5PV310_PA_PDMA1 0x12690000
63
64#define S5PV310_PA_GPIO1 (0x11400000)
65#define S5PV310_PA_GPIO2 (0x11000000)
66#define S5PV310_PA_GPIO3 (0x03860000)
67
68#define S5PV310_PA_MIPI_CSIS0 0x11880000
69#define S5PV310_PA_MIPI_CSIS1 0x11890000
70 37
71#define S5PV310_PA_HSMMC(x) (0x12510000 + ((x) * 0x10000)) 38#define S5PC210_PA_ONENAND 0x0C000000
39#define S5PC210_PA_ONENAND_DMA 0x0C600000
72 40
73#define S5PV310_PA_SROMC (0x12570000) 41#define S5PV310_PA_CHIPID 0x10000000
74#define S5P_PA_SROMC S5PV310_PA_SROMC
75 42
76/* S/PDIF */ 43#define S5PV310_PA_SYSCON 0x10010000
77#define S5PV310_PA_SPDIF 0xE1100000 44#define S5PV310_PA_PMU 0x10020000
45#define S5PV310_PA_CMU 0x10030000
78 46
79/* I2S */ 47#define S5PV310_PA_WATCHDOG 0x10060000
80#define S5PV310_PA_I2S0 0x03830000 48#define S5PV310_PA_RTC 0x10070000
81#define S5PV310_PA_I2S1 0xE3100000
82#define S5PV310_PA_I2S2 0xE2A00000
83 49
84/* PCM */ 50#define S5PV310_PA_DMC0 0x10400000
85#define S5PV310_PA_PCM0 0x03840000
86#define S5PV310_PA_PCM1 0x13980000
87#define S5PV310_PA_PCM2 0x13990000
88 51
89/* AC97 */ 52#define S5PV310_PA_COMBINER 0x10448000
90#define S5PV310_PA_AC97 0x139A0000
91 53
92#define S5PV310_PA_UART (0x13800000) 54#define S5PV310_PA_COREPERI 0x10500000
55#define S5PV310_PA_GIC_CPU 0x10500100
56#define S5PV310_PA_TWD 0x10500600
57#define S5PV310_PA_GIC_DIST 0x10501000
58#define S5PV310_PA_L2CC 0x10502000
93 59
94#define S5P_PA_UART(x) (S5PV310_PA_UART + ((x) * S3C_UART_OFFSET)) 60#define S5PV310_PA_MDMA 0x10810000
95#define S5P_PA_UART0 S5P_PA_UART(0) 61#define S5PV310_PA_PDMA0 0x12680000
96#define S5P_PA_UART1 S5P_PA_UART(1) 62#define S5PV310_PA_PDMA1 0x12690000
97#define S5P_PA_UART2 S5P_PA_UART(2)
98#define S5P_PA_UART3 S5P_PA_UART(3)
99#define S5P_PA_UART4 S5P_PA_UART(4)
100
101#define S5P_SZ_UART SZ_256
102
103#define S5PV310_PA_IIC(x) (0x13860000 + ((x) * 0x10000))
104
105#define S5PV310_PA_TIMER (0x139D0000)
106#define S5P_PA_TIMER S5PV310_PA_TIMER
107
108#define S5PV310_PA_SDRAM (0x40000000)
109#define S5P_PA_SDRAM S5PV310_PA_SDRAM
110 63
111#define S5PV310_PA_SYSMMU_MDMA 0x10A40000 64#define S5PV310_PA_SYSMMU_MDMA 0x10A40000
112#define S5PV310_PA_SYSMMU_SSS 0x10A50000 65#define S5PV310_PA_SYSMMU_SSS 0x10A50000
@@ -125,8 +78,31 @@
125#define S5PV310_PA_SYSMMU_MFC_L 0x13620000 78#define S5PV310_PA_SYSMMU_MFC_L 0x13620000
126#define S5PV310_PA_SYSMMU_MFC_R 0x13630000 79#define S5PV310_PA_SYSMMU_MFC_R 0x13630000
127 80
128/* compatibiltiy defines. */ 81#define S5PV310_PA_GPIO1 0x11400000
129#define S3C_PA_UART S5PV310_PA_UART 82#define S5PV310_PA_GPIO2 0x11000000
83#define S5PV310_PA_GPIO3 0x03860000
84
85#define S5PV310_PA_MIPI_CSIS0 0x11880000
86#define S5PV310_PA_MIPI_CSIS1 0x11890000
87
88#define S5PV310_PA_HSMMC(x) (0x12510000 + ((x) * 0x10000))
89
90#define S5PV310_PA_SROMC 0x12570000
91
92#define S5PV310_PA_UART 0x13800000
93
94#define S5PV310_PA_IIC(x) (0x13860000 + ((x) * 0x10000))
95
96#define S5PV310_PA_AC97 0x139A0000
97
98#define S5PV310_PA_TIMER 0x139D0000
99
100#define S5PV310_PA_SDRAM 0x40000000
101
102#define S5PV310_PA_SPDIF 0xE1100000
103
104/* Compatibiltiy Defines */
105
130#define S3C_PA_HSMMC0 S5PV310_PA_HSMMC(0) 106#define S3C_PA_HSMMC0 S5PV310_PA_HSMMC(0)
131#define S3C_PA_HSMMC1 S5PV310_PA_HSMMC(1) 107#define S3C_PA_HSMMC1 S5PV310_PA_HSMMC(1)
132#define S3C_PA_HSMMC2 S5PV310_PA_HSMMC(2) 108#define S3C_PA_HSMMC2 S5PV310_PA_HSMMC(2)
@@ -141,7 +117,28 @@
141#define S3C_PA_IIC7 S5PV310_PA_IIC(7) 117#define S3C_PA_IIC7 S5PV310_PA_IIC(7)
142#define S3C_PA_RTC S5PV310_PA_RTC 118#define S3C_PA_RTC S5PV310_PA_RTC
143#define S3C_PA_WDT S5PV310_PA_WATCHDOG 119#define S3C_PA_WDT S5PV310_PA_WATCHDOG
120
121#define S5P_PA_CHIPID S5PV310_PA_CHIPID
144#define S5P_PA_MIPI_CSIS0 S5PV310_PA_MIPI_CSIS0 122#define S5P_PA_MIPI_CSIS0 S5PV310_PA_MIPI_CSIS0
145#define S5P_PA_MIPI_CSIS1 S5PV310_PA_MIPI_CSIS1 123#define S5P_PA_MIPI_CSIS1 S5PV310_PA_MIPI_CSIS1
124#define S5P_PA_ONENAND S5PC210_PA_ONENAND
125#define S5P_PA_ONENAND_DMA S5PC210_PA_ONENAND_DMA
126#define S5P_PA_SDRAM S5PV310_PA_SDRAM
127#define S5P_PA_SROMC S5PV310_PA_SROMC
128#define S5P_PA_SYSCON S5PV310_PA_SYSCON
129#define S5P_PA_TIMER S5PV310_PA_TIMER
130
131/* UART */
132
133#define S3C_PA_UART S5PV310_PA_UART
134
135#define S5P_PA_UART(x) (S3C_PA_UART + ((x) * S3C_UART_OFFSET))
136#define S5P_PA_UART0 S5P_PA_UART(0)
137#define S5P_PA_UART1 S5P_PA_UART(1)
138#define S5P_PA_UART2 S5P_PA_UART(2)
139#define S5P_PA_UART3 S5P_PA_UART(3)
140#define S5P_PA_UART4 S5P_PA_UART(4)
141
142#define S5P_SZ_UART SZ_256
146 143
147#endif /* __ASM_ARCH_MAP_H */ 144#endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c
index 2123b96b5638..4303a86e6e38 100644
--- a/arch/arm/mach-shmobile/board-ag5evm.c
+++ b/arch/arm/mach-shmobile/board-ag5evm.c
@@ -454,6 +454,7 @@ static void __init ag5evm_init(void)
454 gpio_direction_output(GPIO_PORT217, 0); 454 gpio_direction_output(GPIO_PORT217, 0);
455 mdelay(1); 455 mdelay(1);
456 gpio_set_value(GPIO_PORT217, 1); 456 gpio_set_value(GPIO_PORT217, 1);
457 mdelay(100);
457 458
458 /* LCD backlight controller */ 459 /* LCD backlight controller */
459 gpio_request(GPIO_PORT235, NULL); /* RESET */ 460 gpio_request(GPIO_PORT235, NULL); /* RESET */
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index 3cf0951caa2d..81d6536552a9 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -1303,7 +1303,7 @@ static void __init ap4evb_init(void)
1303 1303
1304 lcdc_info.clock_source = LCDC_CLK_BUS; 1304 lcdc_info.clock_source = LCDC_CLK_BUS;
1305 lcdc_info.ch[0].interface_type = RGB18; 1305 lcdc_info.ch[0].interface_type = RGB18;
1306 lcdc_info.ch[0].clock_divider = 2; 1306 lcdc_info.ch[0].clock_divider = 3;
1307 lcdc_info.ch[0].flags = 0; 1307 lcdc_info.ch[0].flags = 0;
1308 lcdc_info.ch[0].lcd_size_cfg.width = 152; 1308 lcdc_info.ch[0].lcd_size_cfg.width = 152;
1309 lcdc_info.ch[0].lcd_size_cfg.height = 91; 1309 lcdc_info.ch[0].lcd_size_cfg.height = 91;
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index fb4213a4e15a..1657eac5dde2 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -303,7 +303,7 @@ static struct sh_mobile_lcdc_info lcdc_info = {
303 .lcd_cfg = mackerel_lcdc_modes, 303 .lcd_cfg = mackerel_lcdc_modes,
304 .num_cfg = ARRAY_SIZE(mackerel_lcdc_modes), 304 .num_cfg = ARRAY_SIZE(mackerel_lcdc_modes),
305 .interface_type = RGB24, 305 .interface_type = RGB24,
306 .clock_divider = 2, 306 .clock_divider = 3,
307 .flags = 0, 307 .flags = 0,
308 .lcd_size_cfg.width = 152, 308 .lcd_size_cfg.width = 152,
309 .lcd_size_cfg.height = 91, 309 .lcd_size_cfg.height = 91,
diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c
index ddd4a1b775f0..7e58904c1c8c 100644
--- a/arch/arm/mach-shmobile/clock-sh73a0.c
+++ b/arch/arm/mach-shmobile/clock-sh73a0.c
@@ -263,7 +263,7 @@ static struct clk div6_clks[DIV6_NR] = {
263}; 263};
264 264
265enum { MSTP001, 265enum { MSTP001,
266 MSTP125, MSTP118, MSTP116, MSTP100, 266 MSTP129, MSTP128, MSTP127, MSTP126, MSTP125, MSTP118, MSTP116, MSTP100,
267 MSTP219, 267 MSTP219,
268 MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, 268 MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
269 MSTP331, MSTP329, MSTP325, MSTP323, MSTP312, 269 MSTP331, MSTP329, MSTP325, MSTP323, MSTP312,
@@ -275,6 +275,10 @@ enum { MSTP001,
275 275
276static struct clk mstp_clks[MSTP_NR] = { 276static struct clk mstp_clks[MSTP_NR] = {
277 [MSTP001] = MSTP(&div4_clks[DIV4_HP], SMSTPCR0, 1, 0), /* IIC2 */ 277 [MSTP001] = MSTP(&div4_clks[DIV4_HP], SMSTPCR0, 1, 0), /* IIC2 */
278 [MSTP129] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 29, 0), /* CEU1 */
279 [MSTP128] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 28, 0), /* CSI2-RX1 */
280 [MSTP127] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 27, 0), /* CEU0 */
281 [MSTP126] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 26, 0), /* CSI2-RX0 */
278 [MSTP125] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR1, 25, 0), /* TMU0 */ 282 [MSTP125] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR1, 25, 0), /* TMU0 */
279 [MSTP118] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 18, 0), /* DSITX0 */ 283 [MSTP118] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 18, 0), /* DSITX0 */
280 [MSTP116] = MSTP(&div4_clks[DIV4_HP], SMSTPCR1, 16, 0), /* IIC0 */ 284 [MSTP116] = MSTP(&div4_clks[DIV4_HP], SMSTPCR1, 16, 0), /* IIC0 */
@@ -306,6 +310,9 @@ static struct clk_lookup lookups[] = {
306 CLKDEV_CON_ID("r_clk", &r_clk), 310 CLKDEV_CON_ID("r_clk", &r_clk),
307 311
308 /* DIV6 clocks */ 312 /* DIV6 clocks */
313 CLKDEV_CON_ID("vck1_clk", &div6_clks[DIV6_VCK1]),
314 CLKDEV_CON_ID("vck2_clk", &div6_clks[DIV6_VCK2]),
315 CLKDEV_CON_ID("vck3_clk", &div6_clks[DIV6_VCK3]),
309 CLKDEV_ICK_ID("dsit_clk", "sh-mipi-dsi.0", &div6_clks[DIV6_DSIT]), 316 CLKDEV_ICK_ID("dsit_clk", "sh-mipi-dsi.0", &div6_clks[DIV6_DSIT]),
310 CLKDEV_ICK_ID("dsit_clk", "sh-mipi-dsi.1", &div6_clks[DIV6_DSIT]), 317 CLKDEV_ICK_ID("dsit_clk", "sh-mipi-dsi.1", &div6_clks[DIV6_DSIT]),
311 CLKDEV_ICK_ID("dsi0p_clk", "sh-mipi-dsi.0", &div6_clks[DIV6_DSI0P]), 318 CLKDEV_ICK_ID("dsi0p_clk", "sh-mipi-dsi.0", &div6_clks[DIV6_DSI0P]),
@@ -313,11 +320,15 @@ static struct clk_lookup lookups[] = {
313 320
314 /* MSTP32 clocks */ 321 /* MSTP32 clocks */
315 CLKDEV_DEV_ID("i2c-sh_mobile.2", &mstp_clks[MSTP001]), /* I2C2 */ 322 CLKDEV_DEV_ID("i2c-sh_mobile.2", &mstp_clks[MSTP001]), /* I2C2 */
316 CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */ 323 CLKDEV_DEV_ID("sh_mobile_ceu.1", &mstp_clks[MSTP129]), /* CEU1 */
324 CLKDEV_DEV_ID("sh-mobile-csi2.1", &mstp_clks[MSTP128]), /* CSI2-RX1 */
325 CLKDEV_DEV_ID("sh_mobile_ceu.0", &mstp_clks[MSTP127]), /* CEU0 */
326 CLKDEV_DEV_ID("sh-mobile-csi2.0", &mstp_clks[MSTP126]), /* CSI2-RX0 */
317 CLKDEV_DEV_ID("sh_tmu.0", &mstp_clks[MSTP125]), /* TMU00 */ 327 CLKDEV_DEV_ID("sh_tmu.0", &mstp_clks[MSTP125]), /* TMU00 */
318 CLKDEV_DEV_ID("sh_tmu.1", &mstp_clks[MSTP125]), /* TMU01 */ 328 CLKDEV_DEV_ID("sh_tmu.1", &mstp_clks[MSTP125]), /* TMU01 */
319 CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[MSTP116]), /* I2C0 */
320 CLKDEV_DEV_ID("sh-mipi-dsi.0", &mstp_clks[MSTP118]), /* DSITX */ 329 CLKDEV_DEV_ID("sh-mipi-dsi.0", &mstp_clks[MSTP118]), /* DSITX */
330 CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[MSTP116]), /* I2C0 */
331 CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */
321 CLKDEV_DEV_ID("sh-sci.7", &mstp_clks[MSTP219]), /* SCIFA7 */ 332 CLKDEV_DEV_ID("sh-sci.7", &mstp_clks[MSTP219]), /* SCIFA7 */
322 CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */ 333 CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */
323 CLKDEV_DEV_ID("sh-sci.8", &mstp_clks[MSTP206]), /* SCIFB */ 334 CLKDEV_DEV_ID("sh-sci.8", &mstp_clks[MSTP206]), /* SCIFB */
diff --git a/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
index efd3687ba190..3029aba38688 100644
--- a/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
+++ b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
@@ -6,13 +6,10 @@ LIST "RWT Setting"
6EW 0xE6020004, 0xA500 6EW 0xE6020004, 0xA500
7EW 0xE6030004, 0xA500 7EW 0xE6030004, 0xA500
8 8
9DD 0x01001000, 0x01001000
10
11LIST "GPIO Setting" 9LIST "GPIO Setting"
12EB 0xE6051013, 0xA2 10EB 0xE6051013, 0xA2
13 11
14LIST "CPG" 12LIST "CPG"
15ED 0xE6150080, 0x00000180
16ED 0xE61500C0, 0x00000002 13ED 0xE61500C0, 0x00000002
17 14
18WAIT 1, 0xFE40009C 15WAIT 1, 0xFE40009C
@@ -37,6 +34,9 @@ ED 0xE615002C, 0x93000040
37 34
38WAIT 1, 0xFE40009C 35WAIT 1, 0xFE40009C
39 36
37LIST "SUB/USBClk"
38ED 0xE6150080, 0x00000180
39
40LIST "BSC" 40LIST "BSC"
41ED 0xFEC10000, 0x00E0001B 41ED 0xFEC10000, 0x00E0001B
42 42
@@ -53,7 +53,7 @@ ED 0xFE400048, 0x20C18505
53ED 0xFE40004C, 0x00110209 53ED 0xFE40004C, 0x00110209
54ED 0xFE400010, 0x00000087 54ED 0xFE400010, 0x00000087
55 55
56WAIT 10, 0xFE40009C 56WAIT 30, 0xFE40009C
57 57
58ED 0xFE400084, 0x0000003F 58ED 0xFE400084, 0x0000003F
59EB 0xFE500000, 0x00 59EB 0xFE500000, 0x00
@@ -84,7 +84,7 @@ ED 0xE6150004, 0x80331050
84 84
85WAIT 1, 0xFE40009C 85WAIT 1, 0xFE40009C
86 86
87ED 0xE6150354, 0x00000002 87ED 0xFE400354, 0x01AD8002
88 88
89LIST "SCIF0 - Serial port for earlyprintk" 89LIST "SCIF0 - Serial port for earlyprintk"
90EB 0xE6053098, 0x11 90EB 0xE6053098, 0x11
diff --git a/arch/arm/mach-shmobile/include/mach/head-mackerel.txt b/arch/arm/mach-shmobile/include/mach/head-mackerel.txt
index efd3687ba190..3029aba38688 100644
--- a/arch/arm/mach-shmobile/include/mach/head-mackerel.txt
+++ b/arch/arm/mach-shmobile/include/mach/head-mackerel.txt
@@ -6,13 +6,10 @@ LIST "RWT Setting"
6EW 0xE6020004, 0xA500 6EW 0xE6020004, 0xA500
7EW 0xE6030004, 0xA500 7EW 0xE6030004, 0xA500
8 8
9DD 0x01001000, 0x01001000
10
11LIST "GPIO Setting" 9LIST "GPIO Setting"
12EB 0xE6051013, 0xA2 10EB 0xE6051013, 0xA2
13 11
14LIST "CPG" 12LIST "CPG"
15ED 0xE6150080, 0x00000180
16ED 0xE61500C0, 0x00000002 13ED 0xE61500C0, 0x00000002
17 14
18WAIT 1, 0xFE40009C 15WAIT 1, 0xFE40009C
@@ -37,6 +34,9 @@ ED 0xE615002C, 0x93000040
37 34
38WAIT 1, 0xFE40009C 35WAIT 1, 0xFE40009C
39 36
37LIST "SUB/USBClk"
38ED 0xE6150080, 0x00000180
39
40LIST "BSC" 40LIST "BSC"
41ED 0xFEC10000, 0x00E0001B 41ED 0xFEC10000, 0x00E0001B
42 42
@@ -53,7 +53,7 @@ ED 0xFE400048, 0x20C18505
53ED 0xFE40004C, 0x00110209 53ED 0xFE40004C, 0x00110209
54ED 0xFE400010, 0x00000087 54ED 0xFE400010, 0x00000087
55 55
56WAIT 10, 0xFE40009C 56WAIT 30, 0xFE40009C
57 57
58ED 0xFE400084, 0x0000003F 58ED 0xFE400084, 0x0000003F
59EB 0xFE500000, 0x00 59EB 0xFE500000, 0x00
@@ -84,7 +84,7 @@ ED 0xE6150004, 0x80331050
84 84
85WAIT 1, 0xFE40009C 85WAIT 1, 0xFE40009C
86 86
87ED 0xE6150354, 0x00000002 87ED 0xFE400354, 0x01AD8002
88 88
89LIST "SCIF0 - Serial port for earlyprintk" 89LIST "SCIF0 - Serial port for earlyprintk"
90EB 0xE6053098, 0x11 90EB 0xE6053098, 0x11
diff --git a/arch/arm/mach-spear3xx/include/mach/spear320.h b/arch/arm/mach-spear3xx/include/mach/spear320.h
index cacf17a958cd..53677e464d4b 100644
--- a/arch/arm/mach-spear3xx/include/mach/spear320.h
+++ b/arch/arm/mach-spear3xx/include/mach/spear320.h
@@ -62,7 +62,7 @@
62#define SPEAR320_SMII1_BASE 0xAB000000 62#define SPEAR320_SMII1_BASE 0xAB000000
63#define SPEAR320_SMII1_SIZE 0x01000000 63#define SPEAR320_SMII1_SIZE 0x01000000
64 64
65#define SPEAR320_SOC_CONFIG_BASE 0xB4000000 65#define SPEAR320_SOC_CONFIG_BASE 0xB3000000
66#define SPEAR320_SOC_CONFIG_SIZE 0x00000070 66#define SPEAR320_SOC_CONFIG_SIZE 0x00000070
67/* Interrupt registers offsets and masks */ 67/* Interrupt registers offsets and masks */
68#define INT_STS_MASK_REG 0x04 68#define INT_STS_MASK_REG 0x04
diff --git a/arch/arm/mach-tegra/include/mach/kbc.h b/arch/arm/mach-tegra/include/mach/kbc.h
index 66ad2760c621..04c779832c78 100644
--- a/arch/arm/mach-tegra/include/mach/kbc.h
+++ b/arch/arm/mach-tegra/include/mach/kbc.h
@@ -57,5 +57,6 @@ struct tegra_kbc_platform_data {
57 const struct matrix_keymap_data *keymap_data; 57 const struct matrix_keymap_data *keymap_data;
58 58
59 bool wakeup; 59 bool wakeup;
60 bool use_fn_map;
60}; 61};
61#endif 62#endif
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 170c9bb95866..f2ce38e085d2 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -49,7 +49,13 @@ static inline void cache_wait(void __iomem *reg, unsigned long mask)
49static inline void cache_sync(void) 49static inline void cache_sync(void)
50{ 50{
51 void __iomem *base = l2x0_base; 51 void __iomem *base = l2x0_base;
52
53#ifdef CONFIG_ARM_ERRATA_753970
54 /* write to an unmmapped register */
55 writel_relaxed(0, base + L2X0_DUMMY_REG);
56#else
52 writel_relaxed(0, base + L2X0_CACHE_SYNC); 57 writel_relaxed(0, base + L2X0_CACHE_SYNC);
58#endif
53 cache_wait(base + L2X0_CACHE_SYNC, 1); 59 cache_wait(base + L2X0_CACHE_SYNC, 1);
54} 60}
55 61
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 0c1172b56b4e..8e3356239136 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -264,6 +264,12 @@ __v7_setup:
264 orreq r10, r10, #1 << 6 @ set bit #6 264 orreq r10, r10, #1 << 6 @ set bit #6
265 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register 265 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
266#endif 266#endif
267#ifdef CONFIG_ARM_ERRATA_751472
268 cmp r6, #0x30 @ present prior to r3p0
269 mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register
270 orrlt r10, r10, #1 << 11 @ set bit #11
271 mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register
272#endif
267 273
2683: mov r10, #0 2743: mov r10, #0
269#ifdef HARVARD_CACHE 275#ifdef HARVARD_CACHE
diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 459b319a9fad..69ddc9f76c13 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -32,7 +32,6 @@
32 32
33#include <plat/mailbox.h> 33#include <plat/mailbox.h>
34 34
35static struct workqueue_struct *mboxd;
36static struct omap_mbox **mboxes; 35static struct omap_mbox **mboxes;
37 36
38static int mbox_configured; 37static int mbox_configured;
@@ -197,7 +196,7 @@ static void __mbox_rx_interrupt(struct omap_mbox *mbox)
197 /* no more messages in the fifo. clear IRQ source. */ 196 /* no more messages in the fifo. clear IRQ source. */
198 ack_mbox_irq(mbox, IRQ_RX); 197 ack_mbox_irq(mbox, IRQ_RX);
199nomem: 198nomem:
200 queue_work(mboxd, &mbox->rxq->work); 199 schedule_work(&mbox->rxq->work);
201} 200}
202 201
203static irqreturn_t mbox_interrupt(int irq, void *p) 202static irqreturn_t mbox_interrupt(int irq, void *p)
@@ -307,7 +306,7 @@ static void omap_mbox_fini(struct omap_mbox *mbox)
307 if (!--mbox->use_count) { 306 if (!--mbox->use_count) {
308 free_irq(mbox->irq, mbox); 307 free_irq(mbox->irq, mbox);
309 tasklet_kill(&mbox->txq->tasklet); 308 tasklet_kill(&mbox->txq->tasklet);
310 flush_work(&mbox->rxq->work); 309 flush_work_sync(&mbox->rxq->work);
311 mbox_queue_free(mbox->txq); 310 mbox_queue_free(mbox->txq);
312 mbox_queue_free(mbox->rxq); 311 mbox_queue_free(mbox->rxq);
313 } 312 }
@@ -322,15 +321,18 @@ static void omap_mbox_fini(struct omap_mbox *mbox)
322 321
323struct omap_mbox *omap_mbox_get(const char *name, struct notifier_block *nb) 322struct omap_mbox *omap_mbox_get(const char *name, struct notifier_block *nb)
324{ 323{
325 struct omap_mbox *mbox; 324 struct omap_mbox *_mbox, *mbox = NULL;
326 int ret; 325 int i, ret;
327 326
328 if (!mboxes) 327 if (!mboxes)
329 return ERR_PTR(-EINVAL); 328 return ERR_PTR(-EINVAL);
330 329
331 for (mbox = *mboxes; mbox; mbox++) 330 for (i = 0; (_mbox = mboxes[i]); i++) {
332 if (!strcmp(mbox->name, name)) 331 if (!strcmp(_mbox->name, name)) {
332 mbox = _mbox;
333 break; 333 break;
334 }
335 }
334 336
335 if (!mbox) 337 if (!mbox)
336 return ERR_PTR(-ENOENT); 338 return ERR_PTR(-ENOENT);
@@ -406,10 +408,6 @@ static int __init omap_mbox_init(void)
406 if (err) 408 if (err)
407 return err; 409 return err;
408 410
409 mboxd = create_workqueue("mboxd");
410 if (!mboxd)
411 return -ENOMEM;
412
413 /* kfifo size sanity check: alignment and minimal size */ 411 /* kfifo size sanity check: alignment and minimal size */
414 mbox_kfifo_size = ALIGN(mbox_kfifo_size, sizeof(mbox_msg_t)); 412 mbox_kfifo_size = ALIGN(mbox_kfifo_size, sizeof(mbox_msg_t));
415 mbox_kfifo_size = max_t(unsigned int, mbox_kfifo_size, 413 mbox_kfifo_size = max_t(unsigned int, mbox_kfifo_size,
@@ -421,7 +419,6 @@ subsys_initcall(omap_mbox_init);
421 419
422static void __exit omap_mbox_exit(void) 420static void __exit omap_mbox_exit(void)
423{ 421{
424 destroy_workqueue(mboxd);
425 class_unregister(&omap_mbox_class); 422 class_unregister(&omap_mbox_class);
426} 423}
427module_exit(omap_mbox_exit); 424module_exit(omap_mbox_exit);
diff --git a/arch/arm/plat-s5p/dev-uart.c b/arch/arm/plat-s5p/dev-uart.c
index 6a7342886171..afaf87fdb93e 100644
--- a/arch/arm/plat-s5p/dev-uart.c
+++ b/arch/arm/plat-s5p/dev-uart.c
@@ -28,7 +28,7 @@
28static struct resource s5p_uart0_resource[] = { 28static struct resource s5p_uart0_resource[] = {
29 [0] = { 29 [0] = {
30 .start = S5P_PA_UART0, 30 .start = S5P_PA_UART0,
31 .end = S5P_PA_UART0 + S5P_SZ_UART, 31 .end = S5P_PA_UART0 + S5P_SZ_UART - 1,
32 .flags = IORESOURCE_MEM, 32 .flags = IORESOURCE_MEM,
33 }, 33 },
34 [1] = { 34 [1] = {
@@ -51,7 +51,7 @@ static struct resource s5p_uart0_resource[] = {
51static struct resource s5p_uart1_resource[] = { 51static struct resource s5p_uart1_resource[] = {
52 [0] = { 52 [0] = {
53 .start = S5P_PA_UART1, 53 .start = S5P_PA_UART1,
54 .end = S5P_PA_UART1 + S5P_SZ_UART, 54 .end = S5P_PA_UART1 + S5P_SZ_UART - 1,
55 .flags = IORESOURCE_MEM, 55 .flags = IORESOURCE_MEM,
56 }, 56 },
57 [1] = { 57 [1] = {
@@ -74,7 +74,7 @@ static struct resource s5p_uart1_resource[] = {
74static struct resource s5p_uart2_resource[] = { 74static struct resource s5p_uart2_resource[] = {
75 [0] = { 75 [0] = {
76 .start = S5P_PA_UART2, 76 .start = S5P_PA_UART2,
77 .end = S5P_PA_UART2 + S5P_SZ_UART, 77 .end = S5P_PA_UART2 + S5P_SZ_UART - 1,
78 .flags = IORESOURCE_MEM, 78 .flags = IORESOURCE_MEM,
79 }, 79 },
80 [1] = { 80 [1] = {
@@ -98,7 +98,7 @@ static struct resource s5p_uart3_resource[] = {
98#if CONFIG_SERIAL_SAMSUNG_UARTS > 3 98#if CONFIG_SERIAL_SAMSUNG_UARTS > 3
99 [0] = { 99 [0] = {
100 .start = S5P_PA_UART3, 100 .start = S5P_PA_UART3,
101 .end = S5P_PA_UART3 + S5P_SZ_UART, 101 .end = S5P_PA_UART3 + S5P_SZ_UART - 1,
102 .flags = IORESOURCE_MEM, 102 .flags = IORESOURCE_MEM,
103 }, 103 },
104 [1] = { 104 [1] = {
@@ -123,7 +123,7 @@ static struct resource s5p_uart4_resource[] = {
123#if CONFIG_SERIAL_SAMSUNG_UARTS > 4 123#if CONFIG_SERIAL_SAMSUNG_UARTS > 4
124 [0] = { 124 [0] = {
125 .start = S5P_PA_UART4, 125 .start = S5P_PA_UART4,
126 .end = S5P_PA_UART4 + S5P_SZ_UART, 126 .end = S5P_PA_UART4 + S5P_SZ_UART - 1,
127 .flags = IORESOURCE_MEM, 127 .flags = IORESOURCE_MEM,
128 }, 128 },
129 [1] = { 129 [1] = {
@@ -148,7 +148,7 @@ static struct resource s5p_uart5_resource[] = {
148#if CONFIG_SERIAL_SAMSUNG_UARTS > 5 148#if CONFIG_SERIAL_SAMSUNG_UARTS > 5
149 [0] = { 149 [0] = {
150 .start = S5P_PA_UART5, 150 .start = S5P_PA_UART5,
151 .end = S5P_PA_UART5 + S5P_SZ_UART, 151 .end = S5P_PA_UART5 + S5P_SZ_UART - 1,
152 .flags = IORESOURCE_MEM, 152 .flags = IORESOURCE_MEM,
153 }, 153 },
154 [1] = { 154 [1] = {
diff --git a/arch/arm/plat-samsung/dev-ts.c b/arch/arm/plat-samsung/dev-ts.c
index 236ef8427d7d..3e4bd8147bf4 100644
--- a/arch/arm/plat-samsung/dev-ts.c
+++ b/arch/arm/plat-samsung/dev-ts.c
@@ -58,4 +58,3 @@ void __init s3c24xx_ts_set_platdata(struct s3c2410_ts_mach_info *pd)
58 58
59 s3c_device_ts.dev.platform_data = npd; 59 s3c_device_ts.dev.platform_data = npd;
60} 60}
61EXPORT_SYMBOL(s3c24xx_ts_set_platdata);
diff --git a/arch/arm/plat-samsung/dev-uart.c b/arch/arm/plat-samsung/dev-uart.c
index 3776cd952450..5928105490fa 100644
--- a/arch/arm/plat-samsung/dev-uart.c
+++ b/arch/arm/plat-samsung/dev-uart.c
@@ -15,6 +15,8 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/platform_device.h> 16#include <linux/platform_device.h>
17 17
18#include <plat/devs.h>
19
18/* uart devices */ 20/* uart devices */
19 21
20static struct platform_device s3c24xx_uart_device0 = { 22static struct platform_device s3c24xx_uart_device0 = {
diff --git a/arch/arm/plat-spear/include/plat/uncompress.h b/arch/arm/plat-spear/include/plat/uncompress.h
index 99ba6789cc97..6dd455bafdfd 100644
--- a/arch/arm/plat-spear/include/plat/uncompress.h
+++ b/arch/arm/plat-spear/include/plat/uncompress.h
@@ -24,10 +24,10 @@ static inline void putc(int c)
24{ 24{
25 void __iomem *base = (void __iomem *)SPEAR_DBG_UART_BASE; 25 void __iomem *base = (void __iomem *)SPEAR_DBG_UART_BASE;
26 26
27 while (readl(base + UART01x_FR) & UART01x_FR_TXFF) 27 while (readl_relaxed(base + UART01x_FR) & UART01x_FR_TXFF)
28 barrier(); 28 barrier();
29 29
30 writel(c, base + UART01x_DR); 30 writel_relaxed(c, base + UART01x_DR);
31} 31}
32 32
33static inline void flush(void) 33static inline void flush(void)
diff --git a/arch/arm/plat-spear/include/plat/vmalloc.h b/arch/arm/plat-spear/include/plat/vmalloc.h
index 09e9372aea21..8c8b24d07046 100644
--- a/arch/arm/plat-spear/include/plat/vmalloc.h
+++ b/arch/arm/plat-spear/include/plat/vmalloc.h
@@ -14,6 +14,6 @@
14#ifndef __PLAT_VMALLOC_H 14#ifndef __PLAT_VMALLOC_H
15#define __PLAT_VMALLOC_H 15#define __PLAT_VMALLOC_H
16 16
17#define VMALLOC_END 0xF0000000 17#define VMALLOC_END 0xF0000000UL
18 18
19#endif /* __PLAT_VMALLOC_H */ 19#endif /* __PLAT_VMALLOC_H */
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index c9113619029f..8d73724c0092 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -114,16 +114,14 @@ u32 arch_gettimeoffset(void)
114 114
115/* 115/*
116 * timer_interrupt() needs to keep up the real-time clock, 116 * timer_interrupt() needs to keep up the real-time clock,
117 * as well as call the "do_timer()" routine every clocktick 117 * as well as call the "xtime_update()" routine every clocktick
118 */ 118 */
119#ifdef CONFIG_CORE_TIMER_IRQ_L1 119#ifdef CONFIG_CORE_TIMER_IRQ_L1
120__attribute__((l1_text)) 120__attribute__((l1_text))
121#endif 121#endif
122irqreturn_t timer_interrupt(int irq, void *dummy) 122irqreturn_t timer_interrupt(int irq, void *dummy)
123{ 123{
124 write_seqlock(&xtime_lock); 124 xtime_update(1);
125 do_timer(1);
126 write_sequnlock(&xtime_lock);
127 125
128#ifdef CONFIG_IPIPE 126#ifdef CONFIG_IPIPE
129 update_root_process_times(get_irq_regs()); 127 update_root_process_times(get_irq_regs());
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 4122678529c0..c40d07f708e8 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -136,7 +136,7 @@ SECTIONS
136 136
137 . = ALIGN(16); 137 . = ALIGN(16);
138 INIT_DATA_SECTION(16) 138 INIT_DATA_SECTION(16)
139 PERCPU(4) 139 PERCPU(32, 4)
140 140
141 .exit.data : 141 .exit.data :
142 { 142 {
diff --git a/arch/blackfin/lib/outs.S b/arch/blackfin/lib/outs.S
index 250f4d4b9436..06a5e674401f 100644
--- a/arch/blackfin/lib/outs.S
+++ b/arch/blackfin/lib/outs.S
@@ -13,6 +13,8 @@
13.align 2 13.align 2
14 14
15ENTRY(_outsl) 15ENTRY(_outsl)
16 CC = R2 == 0;
17 IF CC JUMP 1f;
16 P0 = R0; /* P0 = port */ 18 P0 = R0; /* P0 = port */
17 P1 = R1; /* P1 = address */ 19 P1 = R1; /* P1 = address */
18 P2 = R2; /* P2 = count */ 20 P2 = R2; /* P2 = count */
@@ -20,10 +22,12 @@ ENTRY(_outsl)
20 LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2; 22 LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
21.Llong_loop_s: R0 = [P1++]; 23.Llong_loop_s: R0 = [P1++];
22.Llong_loop_e: [P0] = R0; 24.Llong_loop_e: [P0] = R0;
23 RTS; 251: RTS;
24ENDPROC(_outsl) 26ENDPROC(_outsl)
25 27
26ENTRY(_outsw) 28ENTRY(_outsw)
29 CC = R2 == 0;
30 IF CC JUMP 1f;
27 P0 = R0; /* P0 = port */ 31 P0 = R0; /* P0 = port */
28 P1 = R1; /* P1 = address */ 32 P1 = R1; /* P1 = address */
29 P2 = R2; /* P2 = count */ 33 P2 = R2; /* P2 = count */
@@ -31,10 +35,12 @@ ENTRY(_outsw)
31 LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2; 35 LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
32.Lword_loop_s: R0 = W[P1++]; 36.Lword_loop_s: R0 = W[P1++];
33.Lword_loop_e: W[P0] = R0; 37.Lword_loop_e: W[P0] = R0;
34 RTS; 381: RTS;
35ENDPROC(_outsw) 39ENDPROC(_outsw)
36 40
37ENTRY(_outsb) 41ENTRY(_outsb)
42 CC = R2 == 0;
43 IF CC JUMP 1f;
38 P0 = R0; /* P0 = port */ 44 P0 = R0; /* P0 = port */
39 P1 = R1; /* P1 = address */ 45 P1 = R1; /* P1 = address */
40 P2 = R2; /* P2 = count */ 46 P2 = R2; /* P2 = count */
@@ -42,10 +48,12 @@ ENTRY(_outsb)
42 LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2; 48 LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
43.Lbyte_loop_s: R0 = B[P1++]; 49.Lbyte_loop_s: R0 = B[P1++];
44.Lbyte_loop_e: B[P0] = R0; 50.Lbyte_loop_e: B[P0] = R0;
45 RTS; 511: RTS;
46ENDPROC(_outsb) 52ENDPROC(_outsb)
47 53
48ENTRY(_outsw_8) 54ENTRY(_outsw_8)
55 CC = R2 == 0;
56 IF CC JUMP 1f;
49 P0 = R0; /* P0 = port */ 57 P0 = R0; /* P0 = port */
50 P1 = R1; /* P1 = address */ 58 P1 = R1; /* P1 = address */
51 P2 = R2; /* P2 = count */ 59 P2 = R2; /* P2 = count */
@@ -56,5 +64,5 @@ ENTRY(_outsw_8)
56 R0 = R0 << 8; 64 R0 = R0 << 8;
57 R0 = R0 + R1; 65 R0 = R0 + R1;
58.Lword8_loop_e: W[P0] = R0; 66.Lword8_loop_e: W[P0] = R0;
59 RTS; 671: RTS;
60ENDPROC(_outsw_8) 68ENDPROC(_outsw_8)
diff --git a/arch/blackfin/mach-common/cache.S b/arch/blackfin/mach-common/cache.S
index 790c767ca95a..ab4a925a443e 100644
--- a/arch/blackfin/mach-common/cache.S
+++ b/arch/blackfin/mach-common/cache.S
@@ -58,6 +58,8 @@
581: 581:
59.ifeqs "\flushins", BROK_FLUSH_INST 59.ifeqs "\flushins", BROK_FLUSH_INST
60 \flushins [P0++]; 60 \flushins [P0++];
61 nop;
62 nop;
612: nop; 632: nop;
62.else 64.else
632: \flushins [P0++]; 652: \flushins [P0++];
diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c
index 00eb36f8debf..20c85b5dc7d0 100644
--- a/arch/cris/arch-v10/kernel/time.c
+++ b/arch/cris/arch-v10/kernel/time.c
@@ -140,7 +140,7 @@ stop_watchdog(void)
140 140
141/* 141/*
142 * timer_interrupt() needs to keep up the real-time clock, 142 * timer_interrupt() needs to keep up the real-time clock,
143 * as well as call the "do_timer()" routine every clocktick 143 * as well as call the "xtime_update()" routine every clocktick
144 */ 144 */
145 145
146//static unsigned short myjiff; /* used by our debug routine print_timestamp */ 146//static unsigned short myjiff; /* used by our debug routine print_timestamp */
@@ -176,7 +176,7 @@ timer_interrupt(int irq, void *dev_id)
176 176
177 /* call the real timer interrupt handler */ 177 /* call the real timer interrupt handler */
178 178
179 do_timer(1); 179 xtime_update(1);
180 180
181 cris_do_profile(regs); /* Save profiling information */ 181 cris_do_profile(regs); /* Save profiling information */
182 return IRQ_HANDLED; 182 return IRQ_HANDLED;
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 84fed3b4b079..4c9e3e1ba5d1 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -26,7 +26,9 @@
26#define FLUSH_ALL (void*)0xffffffff 26#define FLUSH_ALL (void*)0xffffffff
27 27
28/* Vector of locks used for various atomic operations */ 28/* Vector of locks used for various atomic operations */
29spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; 29spinlock_t cris_atomic_locks[] = {
30 [0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(cris_atomic_locks)
31};
30 32
31/* CPU masks */ 33/* CPU masks */
32cpumask_t phys_cpu_present_map = CPU_MASK_NONE; 34cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index a545211e999d..bb978ede8985 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -183,7 +183,7 @@ void handle_watchdog_bite(struct pt_regs *regs)
183 183
184/* 184/*
185 * timer_interrupt() needs to keep up the real-time clock, 185 * timer_interrupt() needs to keep up the real-time clock,
186 * as well as call the "do_timer()" routine every clocktick. 186 * as well as call the "xtime_update()" routine every clocktick.
187 */ 187 */
188extern void cris_do_profile(struct pt_regs *regs); 188extern void cris_do_profile(struct pt_regs *regs);
189 189
@@ -216,9 +216,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
216 return IRQ_HANDLED; 216 return IRQ_HANDLED;
217 217
218 /* Call the real timer interrupt handler */ 218 /* Call the real timer interrupt handler */
219 write_seqlock(&xtime_lock); 219 xtime_update(1);
220 do_timer(1);
221 write_sequnlock(&xtime_lock);
222 return IRQ_HANDLED; 220 return IRQ_HANDLED;
223} 221}
224 222
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index 442218980db0..728bbd9e7d4c 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -72,11 +72,6 @@ SECTIONS
72 INIT_TEXT_SECTION(PAGE_SIZE) 72 INIT_TEXT_SECTION(PAGE_SIZE)
73 .init.data : { INIT_DATA } 73 .init.data : { INIT_DATA }
74 .init.setup : { INIT_SETUP(16) } 74 .init.setup : { INIT_SETUP(16) }
75#ifdef CONFIG_ETRAX_ARCH_V32
76 __start___param = .;
77 __param : { *(__param) }
78 __stop___param = .;
79#endif
80 .initcall.init : { 75 .initcall.init : {
81 INIT_CALLS 76 INIT_CALLS
82 } 77 }
@@ -107,7 +102,7 @@ SECTIONS
107#endif 102#endif
108 __vmlinux_end = .; /* Last address of the physical file. */ 103 __vmlinux_end = .; /* Last address of the physical file. */
109#ifdef CONFIG_ETRAX_ARCH_V32 104#ifdef CONFIG_ETRAX_ARCH_V32
110 PERCPU(PAGE_SIZE) 105 PERCPU(32, PAGE_SIZE)
111 106
112 .init.ramfs : { 107 .init.ramfs : {
113 INIT_RAM_FS 108 INIT_RAM_FS
diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h
index 08b3d1da3583..4bea27f50a7a 100644
--- a/arch/frv/include/asm/futex.h
+++ b/arch/frv/include/asm/futex.h
@@ -7,10 +7,11 @@
7#include <asm/errno.h> 7#include <asm/errno.h>
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr); 10extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr);
11 11
12static inline int 12static inline int
13futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 13futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
14 u32 oldval, u32 newval)
14{ 15{
15 return -ENOSYS; 16 return -ENOSYS;
16} 17}
diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c
index 14f64b054c7e..d155ca9e5098 100644
--- a/arch/frv/kernel/futex.c
+++ b/arch/frv/kernel/futex.c
@@ -18,7 +18,7 @@
18 * the various futex operations; MMU fault checking is ignored under no-MMU 18 * the various futex operations; MMU fault checking is ignored under no-MMU
19 * conditions 19 * conditions
20 */ 20 */
21static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_oldval) 21static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr, int *_oldval)
22{ 22{
23 int oldval, ret; 23 int oldval, ret;
24 24
@@ -50,7 +50,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_o
50 return ret; 50 return ret;
51} 51}
52 52
53static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_oldval) 53static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr, int *_oldval)
54{ 54{
55 int oldval, ret; 55 int oldval, ret;
56 56
@@ -83,7 +83,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_o
83 return ret; 83 return ret;
84} 84}
85 85
86static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_oldval) 86static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr, int *_oldval)
87{ 87{
88 int oldval, ret; 88 int oldval, ret;
89 89
@@ -116,7 +116,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_ol
116 return ret; 116 return ret;
117} 117}
118 118
119static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_oldval) 119static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr, int *_oldval)
120{ 120{
121 int oldval, ret; 121 int oldval, ret;
122 122
@@ -149,7 +149,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_o
149 return ret; 149 return ret;
150} 150}
151 151
152static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_oldval) 152static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_oldval)
153{ 153{
154 int oldval, ret; 154 int oldval, ret;
155 155
@@ -186,7 +186,7 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_o
186/* 186/*
187 * do the futex operations 187 * do the futex operations
188 */ 188 */
189int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 189int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
190{ 190{
191 int op = (encoded_op >> 28) & 7; 191 int op = (encoded_op >> 28) & 7;
192 int cmp = (encoded_op >> 24) & 15; 192 int cmp = (encoded_op >> 24) & 15;
@@ -197,7 +197,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
197 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 197 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
198 oparg = 1 << oparg; 198 oparg = 1 << oparg;
199 199
200 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 200 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
201 return -EFAULT; 201 return -EFAULT;
202 202
203 pagefault_disable(); 203 pagefault_disable();
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 0ddbbae83cb2..b457de496b70 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -50,21 +50,13 @@ static struct irqaction timer_irq = {
50 50
51/* 51/*
52 * timer_interrupt() needs to keep up the real-time clock, 52 * timer_interrupt() needs to keep up the real-time clock,
53 * as well as call the "do_timer()" routine every clocktick 53 * as well as call the "xtime_update()" routine every clocktick
54 */ 54 */
55static irqreturn_t timer_interrupt(int irq, void *dummy) 55static irqreturn_t timer_interrupt(int irq, void *dummy)
56{ 56{
57 profile_tick(CPU_PROFILING); 57 profile_tick(CPU_PROFILING);
58 /*
59 * Here we are in the timer irq handler. We just have irqs locally
60 * disabled but we don't know if the timer_bh is running on the other
61 * CPU. We need to avoid to SMP race with it. NOTE: we don't need
62 * the irq version of write_lock because as just said we have irq
63 * locally disabled. -arca
64 */
65 write_seqlock(&xtime_lock);
66 58
67 do_timer(1); 59 xtime_update(1);
68 60
69#ifdef CONFIG_HEARTBEAT 61#ifdef CONFIG_HEARTBEAT
70 static unsigned short n; 62 static unsigned short n;
@@ -72,8 +64,6 @@ static irqreturn_t timer_interrupt(int irq, void *dummy)
72 __set_LEDS(n); 64 __set_LEDS(n);
73#endif /* CONFIG_HEARTBEAT */ 65#endif /* CONFIG_HEARTBEAT */
74 66
75 write_sequnlock(&xtime_lock);
76
77 update_process_times(user_mode(get_irq_regs())); 67 update_process_times(user_mode(get_irq_regs()));
78 68
79 return IRQ_HANDLED; 69 return IRQ_HANDLED;
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index 8b973f3cc90e..0daae8af5787 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -37,7 +37,7 @@ SECTIONS
37 _einittext = .; 37 _einittext = .;
38 38
39 INIT_DATA_SECTION(8) 39 INIT_DATA_SECTION(8)
40 PERCPU(4096) 40 PERCPU(L1_CACHE_BYTES, 4096)
41 41
42 . = ALIGN(PAGE_SIZE); 42 . = ALIGN(PAGE_SIZE);
43 __init_end = .; 43 __init_end = .;
diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c
index 165005aff9df..32263a138aa6 100644
--- a/arch/h8300/kernel/time.c
+++ b/arch/h8300/kernel/time.c
@@ -35,9 +35,7 @@ void h8300_timer_tick(void)
35{ 35{
36 if (current->pid) 36 if (current->pid)
37 profile_tick(CPU_PROFILING); 37 profile_tick(CPU_PROFILING);
38 write_seqlock(&xtime_lock); 38 xtime_update(1);
39 do_timer(1);
40 write_sequnlock(&xtime_lock);
41 update_process_times(user_mode(get_irq_regs())); 39 update_process_times(user_mode(get_irq_regs()));
42} 40}
43 41
diff --git a/arch/h8300/kernel/timer/timer8.c b/arch/h8300/kernel/timer/timer8.c
index 3946c0fa8374..7a1533fad47d 100644
--- a/arch/h8300/kernel/timer/timer8.c
+++ b/arch/h8300/kernel/timer/timer8.c
@@ -61,7 +61,7 @@
61 61
62/* 62/*
63 * timer_interrupt() needs to keep up the real-time clock, 63 * timer_interrupt() needs to keep up the real-time clock,
64 * as well as call the "do_timer()" routine every clocktick 64 * as well as call the "xtime_update()" routine every clocktick
65 */ 65 */
66 66
67static irqreturn_t timer_interrupt(int irq, void *dev_id) 67static irqreturn_t timer_interrupt(int irq, void *dev_id)
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
index c7f0f062239c..8428525ddb22 100644
--- a/arch/ia64/include/asm/futex.h
+++ b/arch/ia64/include/asm/futex.h
@@ -46,7 +46,7 @@ do { \
46} while (0) 46} while (0)
47 47
48static inline int 48static inline int
49futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 49futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
50{ 50{
51 int op = (encoded_op >> 28) & 7; 51 int op = (encoded_op >> 28) & 7;
52 int cmp = (encoded_op >> 24) & 15; 52 int cmp = (encoded_op >> 24) & 15;
@@ -56,7 +56,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
56 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 56 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
57 oparg = 1 << oparg; 57 oparg = 1 << oparg;
58 58
59 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 59 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
60 return -EFAULT; 60 return -EFAULT;
61 61
62 pagefault_disable(); 62 pagefault_disable();
@@ -100,23 +100,26 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
100} 100}
101 101
102static inline int 102static inline int
103futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 103futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
104 u32 oldval, u32 newval)
104{ 105{
105 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 106 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
106 return -EFAULT; 107 return -EFAULT;
107 108
108 { 109 {
109 register unsigned long r8 __asm ("r8"); 110 register unsigned long r8 __asm ("r8") = 0;
111 unsigned long prev;
110 __asm__ __volatile__( 112 __asm__ __volatile__(
111 " mf;; \n" 113 " mf;; \n"
112 " mov ar.ccv=%3;; \n" 114 " mov ar.ccv=%3;; \n"
113 "[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n" 115 "[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n"
114 " .xdata4 \"__ex_table\", 1b-., 2f-. \n" 116 " .xdata4 \"__ex_table\", 1b-., 2f-. \n"
115 "[2:]" 117 "[2:]"
116 : "=r" (r8) 118 : "=r" (prev)
117 : "r" (uaddr), "r" (newval), 119 : "r" (uaddr), "r" (newval),
118 "rO" ((long) (unsigned) oldval) 120 "rO" ((long) (unsigned) oldval)
119 : "memory"); 121 : "memory");
122 *uval = prev;
120 return r8; 123 return r8;
121 } 124 }
122} 125}
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 215d5454c7d3..3027e7516d85 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -25,20 +25,8 @@
25#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." 25#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
26#endif 26#endif
27 27
28#include <linux/list.h>
29#include <linux/spinlock.h>
30
31#include <asm/intrinsics.h> 28#include <asm/intrinsics.h>
32 29
33/*
34 * the semaphore definition
35 */
36struct rw_semaphore {
37 signed long count;
38 spinlock_t wait_lock;
39 struct list_head wait_list;
40};
41
42#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) 30#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
43#define RWSEM_ACTIVE_BIAS (1L) 31#define RWSEM_ACTIVE_BIAS (1L)
44#define RWSEM_ACTIVE_MASK (0xffffffffL) 32#define RWSEM_ACTIVE_MASK (0xffffffffL)
@@ -46,26 +34,6 @@ struct rw_semaphore {
46#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 34#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
47#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 35#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
48 36
49#define __RWSEM_INITIALIZER(name) \
50 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
51 LIST_HEAD_INIT((name).wait_list) }
52
53#define DECLARE_RWSEM(name) \
54 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
55
56extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
57extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
58extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
59extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
60
61static inline void
62init_rwsem (struct rw_semaphore *sem)
63{
64 sem->count = RWSEM_UNLOCKED_VALUE;
65 spin_lock_init(&sem->wait_lock);
66 INIT_LIST_HEAD(&sem->wait_list);
67}
68
69/* 37/*
70 * lock for reading 38 * lock for reading
71 */ 39 */
@@ -174,9 +142,4 @@ __downgrade_write (struct rw_semaphore *sem)
174#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) 142#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
175#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) 143#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
176 144
177static inline int rwsem_is_locked(struct rw_semaphore *sem)
178{
179 return (sem->count != 0);
180}
181
182#endif /* _ASM_IA64_RWSEM_H */ 145#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
index 96fc62366aa4..ed28bcd5bb85 100644
--- a/arch/ia64/include/asm/xen/hypercall.h
+++ b/arch/ia64/include/asm/xen/hypercall.h
@@ -107,7 +107,7 @@ extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
107static inline int 107static inline int
108xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg) 108xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
109{ 109{
110 return _hypercall2(int, sched_op_new, cmd, arg); 110 return _hypercall2(int, sched_op, cmd, arg);
111} 111}
112 112
113static inline long 113static inline long
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 9702fa92489e..156ad803d5b7 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -190,19 +190,10 @@ timer_interrupt (int irq, void *dev_id)
190 190
191 new_itm += local_cpu_data->itm_delta; 191 new_itm += local_cpu_data->itm_delta;
192 192
193 if (smp_processor_id() == time_keeper_id) { 193 if (smp_processor_id() == time_keeper_id)
194 /* 194 xtime_update(1);
195 * Here we are in the timer irq handler. We have irqs locally 195
196 * disabled, but we don't know if the timer_bh is running on 196 local_cpu_data->itm_next = new_itm;
197 * another CPU. We need to avoid to SMP race by acquiring the
198 * xtime_lock.
199 */
200 write_seqlock(&xtime_lock);
201 do_timer(1);
202 local_cpu_data->itm_next = new_itm;
203 write_sequnlock(&xtime_lock);
204 } else
205 local_cpu_data->itm_next = new_itm;
206 197
207 if (time_after(new_itm, ia64_get_itc())) 198 if (time_after(new_itm, ia64_get_itc()))
208 break; 199 break;
@@ -222,7 +213,7 @@ skip_process_time_accounting:
222 * comfort, we increase the safety margin by 213 * comfort, we increase the safety margin by
223 * intentionally dropping the next tick(s). We do NOT 214 * intentionally dropping the next tick(s). We do NOT
224 * update itm.next because that would force us to call 215 * update itm.next because that would force us to call
225 * do_timer() which in turn would let our clock run 216 * xtime_update() which in turn would let our clock run
226 * too fast (with the potentially devastating effect 217 * too fast (with the potentially devastating effect
227 * of losing monotony of time). 218 * of losing monotony of time).
228 */ 219 */
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 5a4d044dcb1c..787de4a77d82 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -198,7 +198,7 @@ SECTIONS {
198 198
199 /* Per-cpu data: */ 199 /* Per-cpu data: */
200 . = ALIGN(PERCPU_PAGE_SIZE); 200 . = ALIGN(PERCPU_PAGE_SIZE);
201 PERCPU_VADDR(PERCPU_ADDR, :percpu) 201 PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
202 __phys_per_cpu_start = __per_cpu_load; 202 __phys_per_cpu_start = __per_cpu_load;
203 /* 203 /*
204 * ensure percpu data fits 204 * ensure percpu data fits
diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
index fd66b048c6fa..419c8620945a 100644
--- a/arch/ia64/xen/suspend.c
+++ b/arch/ia64/xen/suspend.c
@@ -37,19 +37,14 @@ xen_mm_unpin_all(void)
37 /* nothing */ 37 /* nothing */
38} 38}
39 39
40void xen_pre_device_suspend(void)
41{
42 /* nothing */
43}
44
45void 40void
46xen_pre_suspend() 41xen_arch_pre_suspend()
47{ 42{
48 /* nothing */ 43 /* nothing */
49} 44}
50 45
51void 46void
52xen_post_suspend(int suspend_cancelled) 47xen_arch_post_suspend(int suspend_cancelled)
53{ 48{
54 if (suspend_cancelled) 49 if (suspend_cancelled)
55 return; 50 return;
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index c1c544513e8d..1f8244a78bee 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -139,14 +139,11 @@ consider_steal_time(unsigned long new_itm)
139 run_posix_cpu_timers(p); 139 run_posix_cpu_timers(p);
140 delta_itm += local_cpu_data->itm_delta * (stolen + blocked); 140 delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
141 141
142 if (cpu == time_keeper_id) { 142 if (cpu == time_keeper_id)
143 write_seqlock(&xtime_lock); 143 xtime_update(stolen + blocked);
144 do_timer(stolen + blocked); 144
145 local_cpu_data->itm_next = delta_itm + new_itm; 145 local_cpu_data->itm_next = delta_itm + new_itm;
146 write_sequnlock(&xtime_lock); 146
147 } else {
148 local_cpu_data->itm_next = delta_itm + new_itm;
149 }
150 per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen; 147 per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
151 per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked; 148 per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
152 } 149 }
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index bda86820bffd..84dd04048db9 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -107,15 +107,14 @@ u32 arch_gettimeoffset(void)
107 107
108/* 108/*
109 * timer_interrupt() needs to keep up the real-time clock, 109 * timer_interrupt() needs to keep up the real-time clock,
110 * as well as call the "do_timer()" routine every clocktick 110 * as well as call the "xtime_update()" routine every clocktick
111 */ 111 */
112static irqreturn_t timer_interrupt(int irq, void *dev_id) 112static irqreturn_t timer_interrupt(int irq, void *dev_id)
113{ 113{
114#ifndef CONFIG_SMP 114#ifndef CONFIG_SMP
115 profile_tick(CPU_PROFILING); 115 profile_tick(CPU_PROFILING);
116#endif 116#endif
117 /* XXX FIXME. Uh, the xtime_lock should be held here, no? */ 117 xtime_update(1);
118 do_timer(1);
119 118
120#ifndef CONFIG_SMP 119#ifndef CONFIG_SMP
121 update_process_times(user_mode(get_irq_regs())); 120 update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index 7da94eaa082b..c194d64cdbb9 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -53,7 +53,7 @@ SECTIONS
53 __init_begin = .; 53 __init_begin = .;
54 INIT_TEXT_SECTION(PAGE_SIZE) 54 INIT_TEXT_SECTION(PAGE_SIZE)
55 INIT_DATA_SECTION(16) 55 INIT_DATA_SECTION(16)
56 PERCPU(PAGE_SIZE) 56 PERCPU(32, PAGE_SIZE)
57 . = ALIGN(PAGE_SIZE); 57 . = ALIGN(PAGE_SIZE);
58 __init_end = .; 58 __init_end = .;
59 /* freed after init ends here */ 59 /* freed after init ends here */
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 9fe6fefb5e14..1edd95095cb4 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -45,8 +45,8 @@ extern int bvme6000_set_clock_mmss (unsigned long);
45extern void bvme6000_reset (void); 45extern void bvme6000_reset (void);
46void bvme6000_set_vectors (void); 46void bvme6000_set_vectors (void);
47 47
48/* Save tick handler routine pointer, will point to do_timer() in 48/* Save tick handler routine pointer, will point to xtime_update() in
49 * kernel/sched.c, called via bvme6000_process_int() */ 49 * kernel/timer/timekeeping.c, called via bvme6000_process_int() */
50 50
51static irq_handler_t tick_handler; 51static irq_handler_t tick_handler;
52 52
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 06438dac08ff..18b34ee5db3b 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -37,11 +37,11 @@ static inline int set_rtc_mmss(unsigned long nowtime)
37 37
38/* 38/*
39 * timer_interrupt() needs to keep up the real-time clock, 39 * timer_interrupt() needs to keep up the real-time clock,
40 * as well as call the "do_timer()" routine every clocktick 40 * as well as call the "xtime_update()" routine every clocktick
41 */ 41 */
42static irqreturn_t timer_interrupt(int irq, void *dummy) 42static irqreturn_t timer_interrupt(int irq, void *dummy)
43{ 43{
44 do_timer(1); 44 xtime_update(1);
45 update_process_times(user_mode(get_irq_regs())); 45 update_process_times(user_mode(get_irq_regs()));
46 profile_tick(CPU_PROFILING); 46 profile_tick(CPU_PROFILING);
47 47
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 100baaa692a1..6cb9c3a9b6c9 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -46,8 +46,8 @@ extern void mvme147_reset (void);
46 46
47static int bcd2int (unsigned char b); 47static int bcd2int (unsigned char b);
48 48
49/* Save tick handler routine pointer, will point to do_timer() in 49/* Save tick handler routine pointer, will point to xtime_update() in
50 * kernel/sched.c, called via mvme147_process_int() */ 50 * kernel/time/timekeeping.c, called via mvme147_process_int() */
51 51
52irq_handler_t tick_handler; 52irq_handler_t tick_handler;
53 53
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 11edf61cc2c4..0b28e2621653 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -51,8 +51,8 @@ extern void mvme16x_reset (void);
51 51
52int bcd2int (unsigned char b); 52int bcd2int (unsigned char b);
53 53
54/* Save tick handler routine pointer, will point to do_timer() in 54/* Save tick handler routine pointer, will point to xtime_update() in
55 * kernel/sched.c, called via mvme16x_process_int() */ 55 * kernel/time/timekeeping.c, called via mvme16x_process_int() */
56 56
57static irq_handler_t tick_handler; 57static irq_handler_t tick_handler;
58 58
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index 2d9e21bd313a..6464ad3ae3e6 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -66,7 +66,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id)
66#ifdef CONFIG_SUN3 66#ifdef CONFIG_SUN3
67 intersil_clear(); 67 intersil_clear();
68#endif 68#endif
69 do_timer(1); 69 xtime_update(1);
70 update_process_times(user_mode(get_irq_regs())); 70 update_process_times(user_mode(get_irq_regs()));
71 if (!(kstat_cpu(0).irqs[irq] % 20)) 71 if (!(kstat_cpu(0).irqs[irq] % 20))
72 sun3_leds(led_pattern[(kstat_cpu(0).irqs[irq] % 160) / 20]); 72 sun3_leds(led_pattern[(kstat_cpu(0).irqs[irq] % 160) / 20]);
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index d6ac2a43453c..6623909f70e6 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -36,7 +36,7 @@ static inline int set_rtc_mmss(unsigned long nowtime)
36#ifndef CONFIG_GENERIC_CLOCKEVENTS 36#ifndef CONFIG_GENERIC_CLOCKEVENTS
37/* 37/*
38 * timer_interrupt() needs to keep up the real-time clock, 38 * timer_interrupt() needs to keep up the real-time clock,
39 * as well as call the "do_timer()" routine every clocktick 39 * as well as call the "xtime_update()" routine every clocktick
40 */ 40 */
41irqreturn_t arch_timer_interrupt(int irq, void *dummy) 41irqreturn_t arch_timer_interrupt(int irq, void *dummy)
42{ 42{
@@ -44,11 +44,7 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy)
44 if (current->pid) 44 if (current->pid)
45 profile_tick(CPU_PROFILING); 45 profile_tick(CPU_PROFILING);
46 46
47 write_seqlock(&xtime_lock); 47 xtime_update(1);
48
49 do_timer(1);
50
51 write_sequnlock(&xtime_lock);
52 48
53 update_process_times(user_mode(get_irq_regs())); 49 update_process_times(user_mode(get_irq_regs()));
54 50
diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h
index ad3fd61b2fe7..b0526d2716fa 100644
--- a/arch/microblaze/include/asm/futex.h
+++ b/arch/microblaze/include/asm/futex.h
@@ -29,7 +29,7 @@
29}) 29})
30 30
31static inline int 31static inline int
32futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 32futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
33{ 33{
34 int op = (encoded_op >> 28) & 7; 34 int op = (encoded_op >> 28) & 7;
35 int cmp = (encoded_op >> 24) & 15; 35 int cmp = (encoded_op >> 24) & 15;
@@ -39,7 +39,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
40 oparg = 1 << oparg; 40 oparg = 1 << oparg;
41 41
42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
43 return -EFAULT; 43 return -EFAULT;
44 44
45 pagefault_disable(); 45 pagefault_disable();
@@ -94,31 +94,34 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
94} 94}
95 95
96static inline int 96static inline int
97futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 97futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
98 u32 oldval, u32 newval)
98{ 99{
99 int prev, cmp; 100 int ret = 0, cmp;
101 u32 prev;
100 102
101 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 103 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
102 return -EFAULT; 104 return -EFAULT;
103 105
104 __asm__ __volatile__ ("1: lwx %0, %2, r0; \ 106 __asm__ __volatile__ ("1: lwx %1, %3, r0; \
105 cmp %1, %0, %3; \ 107 cmp %2, %1, %4; \
106 beqi %1, 3f; \ 108 beqi %2, 3f; \
107 2: swx %4, %2, r0; \ 109 2: swx %5, %3, r0; \
108 addic %1, r0, 0; \ 110 addic %2, r0, 0; \
109 bnei %1, 1b; \ 111 bnei %2, 1b; \
110 3: \ 112 3: \
111 .section .fixup,\"ax\"; \ 113 .section .fixup,\"ax\"; \
112 4: brid 3b; \ 114 4: brid 3b; \
113 addik %0, r0, %5; \ 115 addik %0, r0, %6; \
114 .previous; \ 116 .previous; \
115 .section __ex_table,\"a\"; \ 117 .section __ex_table,\"a\"; \
116 .word 1b,4b,2b,4b; \ 118 .word 1b,4b,2b,4b; \
117 .previous;" \ 119 .previous;" \
118 : "=&r" (prev), "=&r"(cmp) \ 120 : "+r" (ret), "=&r" (prev), "=&r"(cmp) \
119 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)); 121 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT));
120 122
121 return prev; 123 *uval = prev;
124 return ret;
122} 125}
123 126
124#endif /* __KERNEL__ */ 127#endif /* __KERNEL__ */
diff --git a/arch/microblaze/include/asm/pci-bridge.h b/arch/microblaze/include/asm/pci-bridge.h
index 0c68764ab547..10717669e0c2 100644
--- a/arch/microblaze/include/asm/pci-bridge.h
+++ b/arch/microblaze/include/asm/pci-bridge.h
@@ -104,11 +104,22 @@ struct pci_controller {
104 int global_number; /* PCI domain number */ 104 int global_number; /* PCI domain number */
105}; 105};
106 106
107#ifdef CONFIG_PCI
107static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus) 108static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus)
108{ 109{
109 return bus->sysdata; 110 return bus->sysdata;
110} 111}
111 112
113static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
114{
115 struct pci_controller *host;
116
117 if (bus->self)
118 return pci_device_to_OF_node(bus->self);
119 host = pci_bus_to_host(bus);
120 return host ? host->dn : NULL;
121}
122
112static inline int isa_vaddr_is_ioport(void __iomem *address) 123static inline int isa_vaddr_is_ioport(void __iomem *address)
113{ 124{
114 /* No specific ISA handling on ppc32 at this stage, it 125 /* No specific ISA handling on ppc32 at this stage, it
@@ -116,6 +127,7 @@ static inline int isa_vaddr_is_ioport(void __iomem *address)
116 */ 127 */
117 return 0; 128 return 0;
118} 129}
130#endif /* CONFIG_PCI */
119 131
120/* These are used for config access before all the PCI probing 132/* These are used for config access before all the PCI probing
121 has been done. */ 133 has been done. */
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 2e72af078b05..d0890d36ef61 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -64,21 +64,6 @@ extern void kdump_move_device_tree(void);
64/* CPU OF node matching */ 64/* CPU OF node matching */
65struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); 65struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
66 66
67/**
68 * of_irq_map_pci - Resolve the interrupt for a PCI device
69 * @pdev: the device whose interrupt is to be resolved
70 * @out_irq: structure of_irq filled by this function
71 *
72 * This function resolves the PCI interrupt for a given PCI device. If a
73 * device-node exists for a given pci_dev, it will use normal OF tree
74 * walking. If not, it will implement standard swizzling and walk up the
75 * PCI tree until an device-node is found, at which point it will finish
76 * resolving using the OF tree walking.
77 */
78struct pci_dev;
79struct of_irq;
80extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
81
82#endif /* __ASSEMBLY__ */ 67#endif /* __ASSEMBLY__ */
83#endif /* __KERNEL__ */ 68#endif /* __KERNEL__ */
84 69
diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c
index 9ae24f4b882b..47187cc2cf00 100644
--- a/arch/microblaze/kernel/prom_parse.c
+++ b/arch/microblaze/kernel/prom_parse.c
@@ -2,88 +2,11 @@
2 2
3#include <linux/kernel.h> 3#include <linux/kernel.h>
4#include <linux/string.h> 4#include <linux/string.h>
5#include <linux/pci_regs.h>
6#include <linux/module.h> 5#include <linux/module.h>
7#include <linux/ioport.h> 6#include <linux/ioport.h>
8#include <linux/etherdevice.h> 7#include <linux/etherdevice.h>
9#include <linux/of_address.h> 8#include <linux/of_address.h>
10#include <asm/prom.h> 9#include <asm/prom.h>
11#include <asm/pci-bridge.h>
12
13#ifdef CONFIG_PCI
14int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
15{
16 struct device_node *dn, *ppnode;
17 struct pci_dev *ppdev;
18 u32 lspec;
19 u32 laddr[3];
20 u8 pin;
21 int rc;
22
23 /* Check if we have a device node, if yes, fallback to standard OF
24 * parsing
25 */
26 dn = pci_device_to_OF_node(pdev);
27 if (dn)
28 return of_irq_map_one(dn, 0, out_irq);
29
30 /* Ok, we don't, time to have fun. Let's start by building up an
31 * interrupt spec. we assume #interrupt-cells is 1, which is standard
32 * for PCI. If you do different, then don't use that routine.
33 */
34 rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
35 if (rc != 0)
36 return rc;
37 /* No pin, exit */
38 if (pin == 0)
39 return -ENODEV;
40
41 /* Now we walk up the PCI tree */
42 lspec = pin;
43 for (;;) {
44 /* Get the pci_dev of our parent */
45 ppdev = pdev->bus->self;
46
47 /* Ouch, it's a host bridge... */
48 if (ppdev == NULL) {
49 struct pci_controller *host;
50 host = pci_bus_to_host(pdev->bus);
51 ppnode = host ? host->dn : NULL;
52 /* No node for host bridge ? give up */
53 if (ppnode == NULL)
54 return -EINVAL;
55 } else
56 /* We found a P2P bridge, check if it has a node */
57 ppnode = pci_device_to_OF_node(ppdev);
58
59 /* Ok, we have found a parent with a device-node, hand over to
60 * the OF parsing code.
61 * We build a unit address from the linux device to be used for
62 * resolution. Note that we use the linux bus number which may
63 * not match your firmware bus numbering.
64 * Fortunately, in most cases, interrupt-map-mask doesn't
65 * include the bus number as part of the matching.
66 * You should still be careful about that though if you intend
67 * to rely on this function (you ship a firmware that doesn't
68 * create device nodes for all PCI devices).
69 */
70 if (ppnode)
71 break;
72
73 /* We can only get here if we hit a P2P bridge with no node,
74 * let's do standard swizzling and try again
75 */
76 lspec = pci_swizzle_interrupt_pin(pdev, lspec);
77 pdev = ppdev;
78 }
79
80 laddr[0] = (pdev->bus->number << 16)
81 | (pdev->devfn << 8);
82 laddr[1] = laddr[2] = 0;
83 return of_irq_map_raw(ppnode, &lspec, 1, laddr, out_irq);
84}
85EXPORT_SYMBOL_GPL(of_irq_map_pci);
86#endif /* CONFIG_PCI */
87 10
88void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, 11void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
89 unsigned long *busno, unsigned long *phys, unsigned long *size) 12 unsigned long *busno, unsigned long *phys, unsigned long *size)
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index e363615d6798..1e01a1253631 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -29,6 +29,7 @@
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/of.h> 30#include <linux/of.h>
31#include <linux/of_address.h> 31#include <linux/of_address.h>
32#include <linux/of_pci.h>
32 33
33#include <asm/processor.h> 34#include <asm/processor.h>
34#include <asm/io.h> 35#include <asm/io.h>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f5ecc0566bc2..d88983516e26 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -4,6 +4,7 @@ config MIPS
4 select HAVE_GENERIC_DMA_COHERENT 4 select HAVE_GENERIC_DMA_COHERENT
5 select HAVE_IDE 5 select HAVE_IDE
6 select HAVE_OPROFILE 6 select HAVE_OPROFILE
7 select HAVE_IRQ_WORK
7 select HAVE_PERF_EVENTS 8 select HAVE_PERF_EVENTS
8 select PERF_USE_VMALLOC 9 select PERF_USE_VMALLOC
9 select HAVE_ARCH_KGDB 10 select HAVE_ARCH_KGDB
@@ -208,6 +209,7 @@ config MACH_JZ4740
208 select ARCH_REQUIRE_GPIOLIB 209 select ARCH_REQUIRE_GPIOLIB
209 select SYS_HAS_EARLY_PRINTK 210 select SYS_HAS_EARLY_PRINTK
210 select HAVE_PWM 211 select HAVE_PWM
212 select HAVE_CLK
211 213
212config LASAT 214config LASAT
213 bool "LASAT Networks platforms" 215 bool "LASAT Networks platforms"
@@ -333,6 +335,8 @@ config PNX8550_STB810
333config PMC_MSP 335config PMC_MSP
334 bool "PMC-Sierra MSP chipsets" 336 bool "PMC-Sierra MSP chipsets"
335 depends on EXPERIMENTAL 337 depends on EXPERIMENTAL
338 select CEVT_R4K
339 select CSRC_R4K
336 select DMA_NONCOHERENT 340 select DMA_NONCOHERENT
337 select SWAP_IO_SPACE 341 select SWAP_IO_SPACE
338 select NO_EXCEPT_FILL 342 select NO_EXCEPT_FILL
diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c
index 6398fa95905c..40b84b991191 100644
--- a/arch/mips/alchemy/mtx-1/board_setup.c
+++ b/arch/mips/alchemy/mtx-1/board_setup.c
@@ -54,8 +54,8 @@ int mtx1_pci_idsel(unsigned int devsel, int assert);
54 54
55static void mtx1_reset(char *c) 55static void mtx1_reset(char *c)
56{ 56{
57 /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */ 57 /* Jump to the reset vector */
58 au_writel(0x00000000, 0xAE00001C); 58 __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
59} 59}
60 60
61static void mtx1_power_off(void) 61static void mtx1_power_off(void)
diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c
index e30e42add697..956f946218c5 100644
--- a/arch/mips/alchemy/mtx-1/platform.c
+++ b/arch/mips/alchemy/mtx-1/platform.c
@@ -28,6 +28,8 @@
28#include <linux/mtd/physmap.h> 28#include <linux/mtd/physmap.h>
29#include <mtd/mtd-abi.h> 29#include <mtd/mtd-abi.h>
30 30
31#include <asm/mach-au1x00/au1xxx_eth.h>
32
31static struct gpio_keys_button mtx1_gpio_button[] = { 33static struct gpio_keys_button mtx1_gpio_button[] = {
32 { 34 {
33 .gpio = 207, 35 .gpio = 207,
@@ -140,10 +142,17 @@ static struct __initdata platform_device * mtx1_devs[] = {
140 &mtx1_mtd, 142 &mtx1_mtd,
141}; 143};
142 144
145static struct au1000_eth_platform_data mtx1_au1000_eth0_pdata = {
146 .phy_search_highest_addr = 1,
147 .phy1_search_mac0 = 1,
148};
149
143static int __init mtx1_register_devices(void) 150static int __init mtx1_register_devices(void)
144{ 151{
145 int rc; 152 int rc;
146 153
154 au1xxx_override_eth_cfg(0, &mtx1_au1000_eth0_pdata);
155
147 rc = gpio_request(mtx1_gpio_button[0].gpio, 156 rc = gpio_request(mtx1_gpio_button[0].gpio,
148 mtx1_gpio_button[0].desc); 157 mtx1_gpio_button[0].desc);
149 if (rc < 0) { 158 if (rc < 0) {
diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c
index b43c918925d3..80c521e5290d 100644
--- a/arch/mips/alchemy/xxs1500/board_setup.c
+++ b/arch/mips/alchemy/xxs1500/board_setup.c
@@ -36,8 +36,8 @@
36 36
37static void xxs1500_reset(char *c) 37static void xxs1500_reset(char *c)
38{ 38{
39 /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */ 39 /* Jump to the reset vector */
40 au_writel(0x00000000, 0xAE00001C); 40 __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
41} 41}
42 42
43static void xxs1500_power_off(void) 43static void xxs1500_power_off(void)
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index b9cce90346cf..6ebf1734b411 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -75,7 +75,7 @@
75} 75}
76 76
77static inline int 77static inline int
78futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 78futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
79{ 79{
80 int op = (encoded_op >> 28) & 7; 80 int op = (encoded_op >> 28) & 7;
81 int cmp = (encoded_op >> 24) & 15; 81 int cmp = (encoded_op >> 24) & 15;
@@ -85,7 +85,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
85 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 85 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
86 oparg = 1 << oparg; 86 oparg = 1 << oparg;
87 87
88 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 88 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
89 return -EFAULT; 89 return -EFAULT;
90 90
91 pagefault_disable(); 91 pagefault_disable();
@@ -132,11 +132,13 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
132} 132}
133 133
134static inline int 134static inline int
135futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 135futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
136 u32 oldval, u32 newval)
136{ 137{
137 int retval; 138 int ret = 0;
139 u32 val;
138 140
139 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 141 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
140 return -EFAULT; 142 return -EFAULT;
141 143
142 if (cpu_has_llsc && R10000_LLSC_WAR) { 144 if (cpu_has_llsc && R10000_LLSC_WAR) {
@@ -145,25 +147,25 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
145 " .set push \n" 147 " .set push \n"
146 " .set noat \n" 148 " .set noat \n"
147 " .set mips3 \n" 149 " .set mips3 \n"
148 "1: ll %0, %2 \n" 150 "1: ll %1, %3 \n"
149 " bne %0, %z3, 3f \n" 151 " bne %1, %z4, 3f \n"
150 " .set mips0 \n" 152 " .set mips0 \n"
151 " move $1, %z4 \n" 153 " move $1, %z5 \n"
152 " .set mips3 \n" 154 " .set mips3 \n"
153 "2: sc $1, %1 \n" 155 "2: sc $1, %2 \n"
154 " beqzl $1, 1b \n" 156 " beqzl $1, 1b \n"
155 __WEAK_LLSC_MB 157 __WEAK_LLSC_MB
156 "3: \n" 158 "3: \n"
157 " .set pop \n" 159 " .set pop \n"
158 " .section .fixup,\"ax\" \n" 160 " .section .fixup,\"ax\" \n"
159 "4: li %0, %5 \n" 161 "4: li %0, %6 \n"
160 " j 3b \n" 162 " j 3b \n"
161 " .previous \n" 163 " .previous \n"
162 " .section __ex_table,\"a\" \n" 164 " .section __ex_table,\"a\" \n"
163 " "__UA_ADDR "\t1b, 4b \n" 165 " "__UA_ADDR "\t1b, 4b \n"
164 " "__UA_ADDR "\t2b, 4b \n" 166 " "__UA_ADDR "\t2b, 4b \n"
165 " .previous \n" 167 " .previous \n"
166 : "=&r" (retval), "=R" (*uaddr) 168 : "+r" (ret), "=&r" (val), "=R" (*uaddr)
167 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) 169 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT)
168 : "memory"); 170 : "memory");
169 } else if (cpu_has_llsc) { 171 } else if (cpu_has_llsc) {
@@ -172,31 +174,32 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
172 " .set push \n" 174 " .set push \n"
173 " .set noat \n" 175 " .set noat \n"
174 " .set mips3 \n" 176 " .set mips3 \n"
175 "1: ll %0, %2 \n" 177 "1: ll %1, %3 \n"
176 " bne %0, %z3, 3f \n" 178 " bne %1, %z4, 3f \n"
177 " .set mips0 \n" 179 " .set mips0 \n"
178 " move $1, %z4 \n" 180 " move $1, %z5 \n"
179 " .set mips3 \n" 181 " .set mips3 \n"
180 "2: sc $1, %1 \n" 182 "2: sc $1, %2 \n"
181 " beqz $1, 1b \n" 183 " beqz $1, 1b \n"
182 __WEAK_LLSC_MB 184 __WEAK_LLSC_MB
183 "3: \n" 185 "3: \n"
184 " .set pop \n" 186 " .set pop \n"
185 " .section .fixup,\"ax\" \n" 187 " .section .fixup,\"ax\" \n"
186 "4: li %0, %5 \n" 188 "4: li %0, %6 \n"
187 " j 3b \n" 189 " j 3b \n"
188 " .previous \n" 190 " .previous \n"
189 " .section __ex_table,\"a\" \n" 191 " .section __ex_table,\"a\" \n"
190 " "__UA_ADDR "\t1b, 4b \n" 192 " "__UA_ADDR "\t1b, 4b \n"
191 " "__UA_ADDR "\t2b, 4b \n" 193 " "__UA_ADDR "\t2b, 4b \n"
192 " .previous \n" 194 " .previous \n"
193 : "=&r" (retval), "=R" (*uaddr) 195 : "+r" (ret), "=&r" (val), "=R" (*uaddr)
194 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) 196 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT)
195 : "memory"); 197 : "memory");
196 } else 198 } else
197 return -ENOSYS; 199 return -ENOSYS;
198 200
199 return retval; 201 *uval = val;
202 return ret;
200} 203}
201 204
202#endif 205#endif
diff --git a/arch/mips/include/asm/perf_event.h b/arch/mips/include/asm/perf_event.h
index e00007cf8162..d0c77496c728 100644
--- a/arch/mips/include/asm/perf_event.h
+++ b/arch/mips/include/asm/perf_event.h
@@ -11,15 +11,5 @@
11 11
12#ifndef __MIPS_PERF_EVENT_H__ 12#ifndef __MIPS_PERF_EVENT_H__
13#define __MIPS_PERF_EVENT_H__ 13#define __MIPS_PERF_EVENT_H__
14 14/* Leave it empty here. The file is required by linux/perf_event.h */
15/*
16 * MIPS performance counters do not raise NMI upon overflow, a regular
17 * interrupt will be signaled. Hence we can do the pending perf event
18 * work at the tail of the irq handler.
19 */
20static inline void
21set_perf_event_pending(void)
22{
23}
24
25#endif /* __MIPS_PERF_EVENT_H__ */ 15#endif /* __MIPS_PERF_EVENT_H__ */
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 5a84a1f11231..94ca2b018af7 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -17,29 +17,13 @@
17#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
18#include <asm/uasm.h> 18#include <asm/uasm.h>
19 19
20/* 20#include <asm-generic/sections.h>
21 * If the Instruction Pointer is in module space (0xc0000000), return true;
22 * otherwise, it is in kernel space (0x80000000), return false.
23 *
24 * FIXME: This will not work when the kernel space and module space are the
25 * same. If they are the same, we need to modify scripts/recordmcount.pl,
26 * ftrace_make_nop/call() and the other related parts to ensure the
27 * enabling/disabling of the calling site to _mcount is right for both kernel
28 * and module.
29 */
30
31static inline int in_module(unsigned long ip)
32{
33 return ip & 0x40000000;
34}
35 21
36#ifdef CONFIG_DYNAMIC_FTRACE 22#ifdef CONFIG_DYNAMIC_FTRACE
37 23
38#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ 24#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */
39#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */ 25#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */
40 26
41#define INSN_B_1F_4 0x10000004 /* b 1f; offset = 4 */
42#define INSN_B_1F_5 0x10000005 /* b 1f; offset = 5 */
43#define INSN_NOP 0x00000000 /* nop */ 27#define INSN_NOP 0x00000000 /* nop */
44#define INSN_JAL(addr) \ 28#define INSN_JAL(addr) \
45 ((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK))) 29 ((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK)))
@@ -69,6 +53,20 @@ static inline void ftrace_dyn_arch_init_insns(void)
69#endif 53#endif
70} 54}
71 55
56/*
57 * Check if the address is in kernel space
58 *
59 * Clone core_kernel_text() from kernel/extable.c, but doesn't call
60 * init_kernel_text() for Ftrace doesn't trace functions in init sections.
61 */
62static inline int in_kernel_space(unsigned long ip)
63{
64 if (ip >= (unsigned long)_stext &&
65 ip <= (unsigned long)_etext)
66 return 1;
67 return 0;
68}
69
72static int ftrace_modify_code(unsigned long ip, unsigned int new_code) 70static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
73{ 71{
74 int faulted; 72 int faulted;
@@ -84,6 +82,42 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
84 return 0; 82 return 0;
85} 83}
86 84
85/*
86 * The details about the calling site of mcount on MIPS
87 *
88 * 1. For kernel:
89 *
90 * move at, ra
91 * jal _mcount --> nop
92 *
93 * 2. For modules:
94 *
95 * 2.1 For KBUILD_MCOUNT_RA_ADDRESS and CONFIG_32BIT
96 *
97 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
98 * addiu v1, v1, low_16bit_of_mcount
99 * move at, ra
100 * move $12, ra_address
101 * jalr v1
102 * sub sp, sp, 8
103 * 1: offset = 5 instructions
104 * 2.2 For the Other situations
105 *
106 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
107 * addiu v1, v1, low_16bit_of_mcount
108 * move at, ra
109 * jalr v1
110 * nop | move $12, ra_address | sub sp, sp, 8
111 * 1: offset = 4 instructions
112 */
113
114#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
115#define MCOUNT_OFFSET_INSNS 5
116#else
117#define MCOUNT_OFFSET_INSNS 4
118#endif
119#define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS)
120
87int ftrace_make_nop(struct module *mod, 121int ftrace_make_nop(struct module *mod,
88 struct dyn_ftrace *rec, unsigned long addr) 122 struct dyn_ftrace *rec, unsigned long addr)
89{ 123{
@@ -91,39 +125,11 @@ int ftrace_make_nop(struct module *mod,
91 unsigned long ip = rec->ip; 125 unsigned long ip = rec->ip;
92 126
93 /* 127 /*
94 * We have compiled module with -mlong-calls, but compiled the kernel 128 * If ip is in kernel space, no long call, otherwise, long call is
95 * without it, we need to cope with them respectively. 129 * needed.
96 */ 130 */
97 if (in_module(ip)) { 131 new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
98#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) 132
99 /*
100 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
101 * addiu v1, v1, low_16bit_of_mcount
102 * move at, ra
103 * move $12, ra_address
104 * jalr v1
105 * sub sp, sp, 8
106 * 1: offset = 5 instructions
107 */
108 new = INSN_B_1F_5;
109#else
110 /*
111 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
112 * addiu v1, v1, low_16bit_of_mcount
113 * move at, ra
114 * jalr v1
115 * nop | move $12, ra_address | sub sp, sp, 8
116 * 1: offset = 4 instructions
117 */
118 new = INSN_B_1F_4;
119#endif
120 } else {
121 /*
122 * move at, ra
123 * jal _mcount --> nop
124 */
125 new = INSN_NOP;
126 }
127 return ftrace_modify_code(ip, new); 133 return ftrace_modify_code(ip, new);
128} 134}
129 135
@@ -132,8 +138,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
132 unsigned int new; 138 unsigned int new;
133 unsigned long ip = rec->ip; 139 unsigned long ip = rec->ip;
134 140
135 /* ip, module: 0xc0000000, kernel: 0x80000000 */ 141 new = in_kernel_space(ip) ? insn_jal_ftrace_caller :
136 new = in_module(ip) ? insn_lui_v1_hi16_mcount : insn_jal_ftrace_caller; 142 insn_lui_v1_hi16_mcount;
137 143
138 return ftrace_modify_code(ip, new); 144 return ftrace_modify_code(ip, new);
139} 145}
@@ -190,29 +196,25 @@ int ftrace_disable_ftrace_graph_caller(void)
190#define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */ 196#define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */
191#define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */ 197#define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */
192 198
193unsigned long ftrace_get_parent_addr(unsigned long self_addr, 199unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long
194 unsigned long parent, 200 old_parent_ra, unsigned long parent_ra_addr, unsigned long fp)
195 unsigned long parent_addr,
196 unsigned long fp)
197{ 201{
198 unsigned long sp, ip, ra; 202 unsigned long sp, ip, tmp;
199 unsigned int code; 203 unsigned int code;
200 int faulted; 204 int faulted;
201 205
202 /* 206 /*
203 * For module, move the ip from calling site of mcount to the 207 * For module, move the ip from the return address after the
204 * instruction "lui v1, hi_16bit_of_mcount"(offset is 20), but for 208 * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
205 * kernel, move to the instruction "move ra, at"(offset is 12) 209 * kernel, move after the instruction "move ra, at"(offset is 16)
206 */ 210 */
207 ip = self_addr - (in_module(self_addr) ? 20 : 12); 211 ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
208 212
209 /* 213 /*
210 * search the text until finding the non-store instruction or "s{d,w} 214 * search the text until finding the non-store instruction or "s{d,w}
211 * ra, offset(sp)" instruction 215 * ra, offset(sp)" instruction
212 */ 216 */
213 do { 217 do {
214 ip -= 4;
215
216 /* get the code at "ip": code = *(unsigned int *)ip; */ 218 /* get the code at "ip": code = *(unsigned int *)ip; */
217 safe_load_code(code, ip, faulted); 219 safe_load_code(code, ip, faulted);
218 220
@@ -224,18 +226,20 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
224 * store the ra on the stack 226 * store the ra on the stack
225 */ 227 */
226 if ((code & S_R_SP) != S_R_SP) 228 if ((code & S_R_SP) != S_R_SP)
227 return parent_addr; 229 return parent_ra_addr;
228 230
229 } while (((code & S_RA_SP) != S_RA_SP)); 231 /* Move to the next instruction */
232 ip -= 4;
233 } while ((code & S_RA_SP) != S_RA_SP);
230 234
231 sp = fp + (code & OFFSET_MASK); 235 sp = fp + (code & OFFSET_MASK);
232 236
233 /* ra = *(unsigned long *)sp; */ 237 /* tmp = *(unsigned long *)sp; */
234 safe_load_stack(ra, sp, faulted); 238 safe_load_stack(tmp, sp, faulted);
235 if (unlikely(faulted)) 239 if (unlikely(faulted))
236 return 0; 240 return 0;
237 241
238 if (ra == parent) 242 if (tmp == old_parent_ra)
239 return sp; 243 return sp;
240 return 0; 244 return 0;
241} 245}
@@ -246,21 +250,21 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
246 * Hook the return address and push it in the stack of return addrs 250 * Hook the return address and push it in the stack of return addrs
247 * in current thread info. 251 * in current thread info.
248 */ 252 */
249void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, 253void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
250 unsigned long fp) 254 unsigned long fp)
251{ 255{
252 unsigned long old; 256 unsigned long old_parent_ra;
253 struct ftrace_graph_ent trace; 257 struct ftrace_graph_ent trace;
254 unsigned long return_hooker = (unsigned long) 258 unsigned long return_hooker = (unsigned long)
255 &return_to_handler; 259 &return_to_handler;
256 int faulted; 260 int faulted, insns;
257 261
258 if (unlikely(atomic_read(&current->tracing_graph_pause))) 262 if (unlikely(atomic_read(&current->tracing_graph_pause)))
259 return; 263 return;
260 264
261 /* 265 /*
262 * "parent" is the stack address saved the return address of the caller 266 * "parent_ra_addr" is the stack address saved the return address of
263 * of _mcount. 267 * the caller of _mcount.
264 * 268 *
265 * if the gcc < 4.5, a leaf function does not save the return address 269 * if the gcc < 4.5, a leaf function does not save the return address
266 * in the stack address, so, we "emulate" one in _mcount's stack space, 270 * in the stack address, so, we "emulate" one in _mcount's stack space,
@@ -275,37 +279,44 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
275 * do it in ftrace_graph_caller of mcount.S. 279 * do it in ftrace_graph_caller of mcount.S.
276 */ 280 */
277 281
278 /* old = *parent; */ 282 /* old_parent_ra = *parent_ra_addr; */
279 safe_load_stack(old, parent, faulted); 283 safe_load_stack(old_parent_ra, parent_ra_addr, faulted);
280 if (unlikely(faulted)) 284 if (unlikely(faulted))
281 goto out; 285 goto out;
282#ifndef KBUILD_MCOUNT_RA_ADDRESS 286#ifndef KBUILD_MCOUNT_RA_ADDRESS
283 parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old, 287 parent_ra_addr = (unsigned long *)ftrace_get_parent_ra_addr(self_ra,
284 (unsigned long)parent, fp); 288 old_parent_ra, (unsigned long)parent_ra_addr, fp);
285 /* 289 /*
286 * If fails when getting the stack address of the non-leaf function's 290 * If fails when getting the stack address of the non-leaf function's
287 * ra, stop function graph tracer and return 291 * ra, stop function graph tracer and return
288 */ 292 */
289 if (parent == 0) 293 if (parent_ra_addr == 0)
290 goto out; 294 goto out;
291#endif 295#endif
292 /* *parent = return_hooker; */ 296 /* *parent_ra_addr = return_hooker; */
293 safe_store_stack(return_hooker, parent, faulted); 297 safe_store_stack(return_hooker, parent_ra_addr, faulted);
294 if (unlikely(faulted)) 298 if (unlikely(faulted))
295 goto out; 299 goto out;
296 300
297 if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) == 301 if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp)
298 -EBUSY) { 302 == -EBUSY) {
299 *parent = old; 303 *parent_ra_addr = old_parent_ra;
300 return; 304 return;
301 } 305 }
302 306
303 trace.func = self_addr; 307 /*
308 * Get the recorded ip of the current mcount calling site in the
309 * __mcount_loc section, which will be used to filter the function
310 * entries configured through the tracing/set_graph_function interface.
311 */
312
313 insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
314 trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
304 315
305 /* Only trace if the calling function expects to */ 316 /* Only trace if the calling function expects to */
306 if (!ftrace_graph_entry(&trace)) { 317 if (!ftrace_graph_entry(&trace)) {
307 current->curr_ret_stack--; 318 current->curr_ret_stack--;
308 *parent = old; 319 *parent_ra_addr = old_parent_ra;
309 } 320 }
310 return; 321 return;
311out: 322out:
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index 2b7f3f703b83..a8244854d3dc 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -161,41 +161,6 @@ mipspmu_event_set_period(struct perf_event *event,
161 return ret; 161 return ret;
162} 162}
163 163
164static int mipspmu_enable(struct perf_event *event)
165{
166 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
167 struct hw_perf_event *hwc = &event->hw;
168 int idx;
169 int err = 0;
170
171 /* To look for a free counter for this event. */
172 idx = mipspmu->alloc_counter(cpuc, hwc);
173 if (idx < 0) {
174 err = idx;
175 goto out;
176 }
177
178 /*
179 * If there is an event in the counter we are going to use then
180 * make sure it is disabled.
181 */
182 event->hw.idx = idx;
183 mipspmu->disable_event(idx);
184 cpuc->events[idx] = event;
185
186 /* Set the period for the event. */
187 mipspmu_event_set_period(event, hwc, idx);
188
189 /* Enable the event. */
190 mipspmu->enable_event(hwc, idx);
191
192 /* Propagate our changes to the userspace mapping. */
193 perf_event_update_userpage(event);
194
195out:
196 return err;
197}
198
199static void mipspmu_event_update(struct perf_event *event, 164static void mipspmu_event_update(struct perf_event *event,
200 struct hw_perf_event *hwc, 165 struct hw_perf_event *hwc,
201 int idx) 166 int idx)
@@ -204,7 +169,7 @@ static void mipspmu_event_update(struct perf_event *event,
204 unsigned long flags; 169 unsigned long flags;
205 int shift = 64 - TOTAL_BITS; 170 int shift = 64 - TOTAL_BITS;
206 s64 prev_raw_count, new_raw_count; 171 s64 prev_raw_count, new_raw_count;
207 s64 delta; 172 u64 delta;
208 173
209again: 174again:
210 prev_raw_count = local64_read(&hwc->prev_count); 175 prev_raw_count = local64_read(&hwc->prev_count);
@@ -231,32 +196,90 @@ again:
231 return; 196 return;
232} 197}
233 198
234static void mipspmu_disable(struct perf_event *event) 199static void mipspmu_start(struct perf_event *event, int flags)
200{
201 struct hw_perf_event *hwc = &event->hw;
202
203 if (!mipspmu)
204 return;
205
206 if (flags & PERF_EF_RELOAD)
207 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
208
209 hwc->state = 0;
210
211 /* Set the period for the event. */
212 mipspmu_event_set_period(event, hwc, hwc->idx);
213
214 /* Enable the event. */
215 mipspmu->enable_event(hwc, hwc->idx);
216}
217
218static void mipspmu_stop(struct perf_event *event, int flags)
219{
220 struct hw_perf_event *hwc = &event->hw;
221
222 if (!mipspmu)
223 return;
224
225 if (!(hwc->state & PERF_HES_STOPPED)) {
226 /* We are working on a local event. */
227 mipspmu->disable_event(hwc->idx);
228 barrier();
229 mipspmu_event_update(event, hwc, hwc->idx);
230 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
231 }
232}
233
234static int mipspmu_add(struct perf_event *event, int flags)
235{ 235{
236 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 236 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
237 struct hw_perf_event *hwc = &event->hw; 237 struct hw_perf_event *hwc = &event->hw;
238 int idx = hwc->idx; 238 int idx;
239 int err = 0;
239 240
241 perf_pmu_disable(event->pmu);
240 242
241 WARN_ON(idx < 0 || idx >= mipspmu->num_counters); 243 /* To look for a free counter for this event. */
244 idx = mipspmu->alloc_counter(cpuc, hwc);
245 if (idx < 0) {
246 err = idx;
247 goto out;
248 }
242 249
243 /* We are working on a local event. */ 250 /*
251 * If there is an event in the counter we are going to use then
252 * make sure it is disabled.
253 */
254 event->hw.idx = idx;
244 mipspmu->disable_event(idx); 255 mipspmu->disable_event(idx);
256 cpuc->events[idx] = event;
245 257
246 barrier(); 258 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
247 259 if (flags & PERF_EF_START)
248 mipspmu_event_update(event, hwc, idx); 260 mipspmu_start(event, PERF_EF_RELOAD);
249 cpuc->events[idx] = NULL;
250 clear_bit(idx, cpuc->used_mask);
251 261
262 /* Propagate our changes to the userspace mapping. */
252 perf_event_update_userpage(event); 263 perf_event_update_userpage(event);
264
265out:
266 perf_pmu_enable(event->pmu);
267 return err;
253} 268}
254 269
255static void mipspmu_unthrottle(struct perf_event *event) 270static void mipspmu_del(struct perf_event *event, int flags)
256{ 271{
272 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
257 struct hw_perf_event *hwc = &event->hw; 273 struct hw_perf_event *hwc = &event->hw;
274 int idx = hwc->idx;
258 275
259 mipspmu->enable_event(hwc, hwc->idx); 276 WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
277
278 mipspmu_stop(event, PERF_EF_UPDATE);
279 cpuc->events[idx] = NULL;
280 clear_bit(idx, cpuc->used_mask);
281
282 perf_event_update_userpage(event);
260} 283}
261 284
262static void mipspmu_read(struct perf_event *event) 285static void mipspmu_read(struct perf_event *event)
@@ -270,12 +293,17 @@ static void mipspmu_read(struct perf_event *event)
270 mipspmu_event_update(event, hwc, hwc->idx); 293 mipspmu_event_update(event, hwc, hwc->idx);
271} 294}
272 295
273static struct pmu pmu = { 296static void mipspmu_enable(struct pmu *pmu)
274 .enable = mipspmu_enable, 297{
275 .disable = mipspmu_disable, 298 if (mipspmu)
276 .unthrottle = mipspmu_unthrottle, 299 mipspmu->start();
277 .read = mipspmu_read, 300}
278}; 301
302static void mipspmu_disable(struct pmu *pmu)
303{
304 if (mipspmu)
305 mipspmu->stop();
306}
279 307
280static atomic_t active_events = ATOMIC_INIT(0); 308static atomic_t active_events = ATOMIC_INIT(0);
281static DEFINE_MUTEX(pmu_reserve_mutex); 309static DEFINE_MUTEX(pmu_reserve_mutex);
@@ -318,6 +346,82 @@ static void mipspmu_free_irq(void)
318 perf_irq = save_perf_irq; 346 perf_irq = save_perf_irq;
319} 347}
320 348
349/*
350 * mipsxx/rm9000/loongson2 have different performance counters, they have
351 * specific low-level init routines.
352 */
353static void reset_counters(void *arg);
354static int __hw_perf_event_init(struct perf_event *event);
355
356static void hw_perf_event_destroy(struct perf_event *event)
357{
358 if (atomic_dec_and_mutex_lock(&active_events,
359 &pmu_reserve_mutex)) {
360 /*
361 * We must not call the destroy function with interrupts
362 * disabled.
363 */
364 on_each_cpu(reset_counters,
365 (void *)(long)mipspmu->num_counters, 1);
366 mipspmu_free_irq();
367 mutex_unlock(&pmu_reserve_mutex);
368 }
369}
370
371static int mipspmu_event_init(struct perf_event *event)
372{
373 int err = 0;
374
375 switch (event->attr.type) {
376 case PERF_TYPE_RAW:
377 case PERF_TYPE_HARDWARE:
378 case PERF_TYPE_HW_CACHE:
379 break;
380
381 default:
382 return -ENOENT;
383 }
384
385 if (!mipspmu || event->cpu >= nr_cpumask_bits ||
386 (event->cpu >= 0 && !cpu_online(event->cpu)))
387 return -ENODEV;
388
389 if (!atomic_inc_not_zero(&active_events)) {
390 if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
391 atomic_dec(&active_events);
392 return -ENOSPC;
393 }
394
395 mutex_lock(&pmu_reserve_mutex);
396 if (atomic_read(&active_events) == 0)
397 err = mipspmu_get_irq();
398
399 if (!err)
400 atomic_inc(&active_events);
401 mutex_unlock(&pmu_reserve_mutex);
402 }
403
404 if (err)
405 return err;
406
407 err = __hw_perf_event_init(event);
408 if (err)
409 hw_perf_event_destroy(event);
410
411 return err;
412}
413
414static struct pmu pmu = {
415 .pmu_enable = mipspmu_enable,
416 .pmu_disable = mipspmu_disable,
417 .event_init = mipspmu_event_init,
418 .add = mipspmu_add,
419 .del = mipspmu_del,
420 .start = mipspmu_start,
421 .stop = mipspmu_stop,
422 .read = mipspmu_read,
423};
424
321static inline unsigned int 425static inline unsigned int
322mipspmu_perf_event_encode(const struct mips_perf_event *pev) 426mipspmu_perf_event_encode(const struct mips_perf_event *pev)
323{ 427{
@@ -382,8 +486,9 @@ static int validate_event(struct cpu_hw_events *cpuc,
382{ 486{
383 struct hw_perf_event fake_hwc = event->hw; 487 struct hw_perf_event fake_hwc = event->hw;
384 488
385 if (event->pmu && event->pmu != &pmu) 489 /* Allow mixed event group. So return 1 to pass validation. */
386 return 0; 490 if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
491 return 1;
387 492
388 return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0; 493 return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0;
389} 494}
@@ -409,73 +514,6 @@ static int validate_group(struct perf_event *event)
409 return 0; 514 return 0;
410} 515}
411 516
412/*
413 * mipsxx/rm9000/loongson2 have different performance counters, they have
414 * specific low-level init routines.
415 */
416static void reset_counters(void *arg);
417static int __hw_perf_event_init(struct perf_event *event);
418
419static void hw_perf_event_destroy(struct perf_event *event)
420{
421 if (atomic_dec_and_mutex_lock(&active_events,
422 &pmu_reserve_mutex)) {
423 /*
424 * We must not call the destroy function with interrupts
425 * disabled.
426 */
427 on_each_cpu(reset_counters,
428 (void *)(long)mipspmu->num_counters, 1);
429 mipspmu_free_irq();
430 mutex_unlock(&pmu_reserve_mutex);
431 }
432}
433
434const struct pmu *hw_perf_event_init(struct perf_event *event)
435{
436 int err = 0;
437
438 if (!mipspmu || event->cpu >= nr_cpumask_bits ||
439 (event->cpu >= 0 && !cpu_online(event->cpu)))
440 return ERR_PTR(-ENODEV);
441
442 if (!atomic_inc_not_zero(&active_events)) {
443 if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
444 atomic_dec(&active_events);
445 return ERR_PTR(-ENOSPC);
446 }
447
448 mutex_lock(&pmu_reserve_mutex);
449 if (atomic_read(&active_events) == 0)
450 err = mipspmu_get_irq();
451
452 if (!err)
453 atomic_inc(&active_events);
454 mutex_unlock(&pmu_reserve_mutex);
455 }
456
457 if (err)
458 return ERR_PTR(err);
459
460 err = __hw_perf_event_init(event);
461 if (err)
462 hw_perf_event_destroy(event);
463
464 return err ? ERR_PTR(err) : &pmu;
465}
466
467void hw_perf_enable(void)
468{
469 if (mipspmu)
470 mipspmu->start();
471}
472
473void hw_perf_disable(void)
474{
475 if (mipspmu)
476 mipspmu->stop();
477}
478
479/* This is needed by specific irq handlers in perf_event_*.c */ 517/* This is needed by specific irq handlers in perf_event_*.c */
480static void 518static void
481handle_associated_event(struct cpu_hw_events *cpuc, 519handle_associated_event(struct cpu_hw_events *cpuc,
@@ -496,21 +534,13 @@ handle_associated_event(struct cpu_hw_events *cpuc,
496#include "perf_event_mipsxx.c" 534#include "perf_event_mipsxx.c"
497 535
498/* Callchain handling code. */ 536/* Callchain handling code. */
499static inline void
500callchain_store(struct perf_callchain_entry *entry,
501 u64 ip)
502{
503 if (entry->nr < PERF_MAX_STACK_DEPTH)
504 entry->ip[entry->nr++] = ip;
505}
506 537
507/* 538/*
508 * Leave userspace callchain empty for now. When we find a way to trace 539 * Leave userspace callchain empty for now. When we find a way to trace
509 * the user stack callchains, we add here. 540 * the user stack callchains, we add here.
510 */ 541 */
511static void 542void perf_callchain_user(struct perf_callchain_entry *entry,
512perf_callchain_user(struct pt_regs *regs, 543 struct pt_regs *regs)
513 struct perf_callchain_entry *entry)
514{ 544{
515} 545}
516 546
@@ -523,23 +553,21 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
523 while (!kstack_end(sp)) { 553 while (!kstack_end(sp)) {
524 addr = *sp++; 554 addr = *sp++;
525 if (__kernel_text_address(addr)) { 555 if (__kernel_text_address(addr)) {
526 callchain_store(entry, addr); 556 perf_callchain_store(entry, addr);
527 if (entry->nr >= PERF_MAX_STACK_DEPTH) 557 if (entry->nr >= PERF_MAX_STACK_DEPTH)
528 break; 558 break;
529 } 559 }
530 } 560 }
531} 561}
532 562
533static void 563void perf_callchain_kernel(struct perf_callchain_entry *entry,
534perf_callchain_kernel(struct pt_regs *regs, 564 struct pt_regs *regs)
535 struct perf_callchain_entry *entry)
536{ 565{
537 unsigned long sp = regs->regs[29]; 566 unsigned long sp = regs->regs[29];
538#ifdef CONFIG_KALLSYMS 567#ifdef CONFIG_KALLSYMS
539 unsigned long ra = regs->regs[31]; 568 unsigned long ra = regs->regs[31];
540 unsigned long pc = regs->cp0_epc; 569 unsigned long pc = regs->cp0_epc;
541 570
542 callchain_store(entry, PERF_CONTEXT_KERNEL);
543 if (raw_show_trace || !__kernel_text_address(pc)) { 571 if (raw_show_trace || !__kernel_text_address(pc)) {
544 unsigned long stack_page = 572 unsigned long stack_page =
545 (unsigned long)task_stack_page(current); 573 (unsigned long)task_stack_page(current);
@@ -549,53 +577,12 @@ perf_callchain_kernel(struct pt_regs *regs,
549 return; 577 return;
550 } 578 }
551 do { 579 do {
552 callchain_store(entry, pc); 580 perf_callchain_store(entry, pc);
553 if (entry->nr >= PERF_MAX_STACK_DEPTH) 581 if (entry->nr >= PERF_MAX_STACK_DEPTH)
554 break; 582 break;
555 pc = unwind_stack(current, &sp, pc, &ra); 583 pc = unwind_stack(current, &sp, pc, &ra);
556 } while (pc); 584 } while (pc);
557#else 585#else
558 callchain_store(entry, PERF_CONTEXT_KERNEL);
559 save_raw_perf_callchain(entry, sp); 586 save_raw_perf_callchain(entry, sp);
560#endif 587#endif
561} 588}
562
563static void
564perf_do_callchain(struct pt_regs *regs,
565 struct perf_callchain_entry *entry)
566{
567 int is_user;
568
569 if (!regs)
570 return;
571
572 is_user = user_mode(regs);
573
574 if (!current || !current->pid)
575 return;
576
577 if (is_user && current->state != TASK_RUNNING)
578 return;
579
580 if (!is_user) {
581 perf_callchain_kernel(regs, entry);
582 if (current->mm)
583 regs = task_pt_regs(current);
584 else
585 regs = NULL;
586 }
587 if (regs)
588 perf_callchain_user(regs, entry);
589}
590
591static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
592
593struct perf_callchain_entry *
594perf_callchain(struct pt_regs *regs)
595{
596 struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
597
598 entry->nr = 0;
599 perf_do_callchain(regs, entry);
600 return entry;
601}
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 183e0d226669..d9a7db78ed62 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -696,7 +696,7 @@ static int mipsxx_pmu_handle_shared_irq(void)
696 * interrupt, not NMI. 696 * interrupt, not NMI.
697 */ 697 */
698 if (handled == IRQ_HANDLED) 698 if (handled == IRQ_HANDLED)
699 perf_event_do_pending(); 699 irq_work_run();
700 700
701#ifdef CONFIG_MIPS_MT_SMP 701#ifdef CONFIG_MIPS_MT_SMP
702 read_unlock(&pmuint_rwlock); 702 read_unlock(&pmuint_rwlock);
@@ -1045,6 +1045,8 @@ init_hw_perf_events(void)
1045 "CPU, irq %d%s\n", mipspmu->name, counters, irq, 1045 "CPU, irq %d%s\n", mipspmu->name, counters, irq,
1046 irq < 0 ? " (share with timer interrupt)" : ""); 1046 irq < 0 ? " (share with timer interrupt)" : "");
1047 1047
1048 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1049
1048 return 0; 1050 return 0;
1049} 1051}
1050early_initcall(init_hw_perf_events); 1052early_initcall(init_hw_perf_events);
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 5922342bca39..dbbe0ce48d89 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -84,7 +84,7 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
84 84
85static int protected_restore_fp_context(struct sigcontext __user *sc) 85static int protected_restore_fp_context(struct sigcontext __user *sc)
86{ 86{
87 int err, tmp; 87 int err, tmp __maybe_unused;
88 while (1) { 88 while (1) {
89 lock_fpu_owner(); 89 lock_fpu_owner();
90 own_fpu_inatomic(0); 90 own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index a0ed0e052b2e..aae986613795 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -115,7 +115,7 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
115 115
116static int protected_restore_fp_context32(struct sigcontext32 __user *sc) 116static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
117{ 117{
118 int err, tmp; 118 int err, tmp __maybe_unused;
119 while (1) { 119 while (1) {
120 lock_fpu_owner(); 120 lock_fpu_owner();
121 own_fpu_inatomic(0); 121 own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 383aeb95cb49..32a256101082 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -193,6 +193,22 @@ void __devinit smp_prepare_boot_cpu(void)
193 */ 193 */
194static struct task_struct *cpu_idle_thread[NR_CPUS]; 194static struct task_struct *cpu_idle_thread[NR_CPUS];
195 195
196struct create_idle {
197 struct work_struct work;
198 struct task_struct *idle;
199 struct completion done;
200 int cpu;
201};
202
203static void __cpuinit do_fork_idle(struct work_struct *work)
204{
205 struct create_idle *c_idle =
206 container_of(work, struct create_idle, work);
207
208 c_idle->idle = fork_idle(c_idle->cpu);
209 complete(&c_idle->done);
210}
211
196int __cpuinit __cpu_up(unsigned int cpu) 212int __cpuinit __cpu_up(unsigned int cpu)
197{ 213{
198 struct task_struct *idle; 214 struct task_struct *idle;
@@ -203,8 +219,19 @@ int __cpuinit __cpu_up(unsigned int cpu)
203 * Linux can schedule processes on this slave. 219 * Linux can schedule processes on this slave.
204 */ 220 */
205 if (!cpu_idle_thread[cpu]) { 221 if (!cpu_idle_thread[cpu]) {
206 idle = fork_idle(cpu); 222 /*
207 cpu_idle_thread[cpu] = idle; 223 * Schedule work item to avoid forking user task
224 * Ported from arch/x86/kernel/smpboot.c
225 */
226 struct create_idle c_idle = {
227 .cpu = cpu,
228 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
229 };
230
231 INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
232 schedule_work(&c_idle.work);
233 wait_for_completion(&c_idle.done);
234 idle = cpu_idle_thread[cpu] = c_idle.idle;
208 235
209 if (IS_ERR(idle)) 236 if (IS_ERR(idle))
210 panic(KERN_ERR "Fork failed for CPU %d", cpu); 237 panic(KERN_ERR "Fork failed for CPU %d", cpu);
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 1dc6edff45e0..58beabf50b3c 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -383,12 +383,11 @@ save_static_function(sys_sysmips);
383static int __used noinline 383static int __used noinline
384_sys_sysmips(nabi_no_regargs struct pt_regs regs) 384_sys_sysmips(nabi_no_regargs struct pt_regs regs)
385{ 385{
386 long cmd, arg1, arg2, arg3; 386 long cmd, arg1, arg2;
387 387
388 cmd = regs.regs[4]; 388 cmd = regs.regs[4];
389 arg1 = regs.regs[5]; 389 arg1 = regs.regs[5];
390 arg2 = regs.regs[6]; 390 arg2 = regs.regs[6];
391 arg3 = regs.regs[7];
392 391
393 switch (cmd) { 392 switch (cmd) {
394 case MIPS_ATOMIC_SET: 393 case MIPS_ATOMIC_SET:
@@ -405,7 +404,7 @@ _sys_sysmips(nabi_no_regargs struct pt_regs regs)
405 if (arg1 & 2) 404 if (arg1 & 2)
406 set_thread_flag(TIF_LOGADE); 405 set_thread_flag(TIF_LOGADE);
407 else 406 else
408 clear_thread_flag(TIF_FIXADE); 407 clear_thread_flag(TIF_LOGADE);
409 408
410 return 0; 409 return 0;
411 410
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 570607b376b5..832afbb87588 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -115,7 +115,7 @@ SECTIONS
115 EXIT_DATA 115 EXIT_DATA
116 } 116 }
117 117
118 PERCPU(PAGE_SIZE) 118 PERCPU(1 << CONFIG_MIPS_L1_CACHE_SHIFT, PAGE_SIZE)
119 . = ALIGN(PAGE_SIZE); 119 . = ALIGN(PAGE_SIZE);
120 __init_end = .; 120 __init_end = .;
121 /* freed after init ends here */ 121 /* freed after init ends here */
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6a1fdfef8fde..ab52b7cf3b6b 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -148,9 +148,9 @@ struct {
148 spinlock_t tc_list_lock; 148 spinlock_t tc_list_lock;
149 struct list_head tc_list; /* Thread contexts */ 149 struct list_head tc_list; /* Thread contexts */
150} vpecontrol = { 150} vpecontrol = {
151 .vpe_list_lock = SPIN_LOCK_UNLOCKED, 151 .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock),
152 .vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list), 152 .vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list),
153 .tc_list_lock = SPIN_LOCK_UNLOCKED, 153 .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock),
154 .tc_list = LIST_HEAD_INIT(vpecontrol.tc_list) 154 .tc_list = LIST_HEAD_INIT(vpecontrol.tc_list)
155}; 155};
156 156
diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig
index 6e1b77fec7ea..aca93eed8779 100644
--- a/arch/mips/loongson/Kconfig
+++ b/arch/mips/loongson/Kconfig
@@ -1,6 +1,7 @@
1if MACH_LOONGSON
2
1choice 3choice
2 prompt "Machine Type" 4 prompt "Machine Type"
3 depends on MACH_LOONGSON
4 5
5config LEMOTE_FULOONG2E 6config LEMOTE_FULOONG2E
6 bool "Lemote Fuloong(2e) mini-PC" 7 bool "Lemote Fuloong(2e) mini-PC"
@@ -87,3 +88,5 @@ config LOONGSON_UART_BASE
87config LOONGSON_MC146818 88config LOONGSON_MC146818
88 bool 89 bool
89 default n 90 default n
91
92endif # MACH_LOONGSON
diff --git a/arch/mips/loongson/common/cmdline.c b/arch/mips/loongson/common/cmdline.c
index 1a06defc4f7f..353e1d2e41a5 100644
--- a/arch/mips/loongson/common/cmdline.c
+++ b/arch/mips/loongson/common/cmdline.c
@@ -44,10 +44,5 @@ void __init prom_init_cmdline(void)
44 strcat(arcs_cmdline, " "); 44 strcat(arcs_cmdline, " ");
45 } 45 }
46 46
47 if ((strstr(arcs_cmdline, "console=")) == NULL)
48 strcat(arcs_cmdline, " console=ttyS0,115200");
49 if ((strstr(arcs_cmdline, "root=")) == NULL)
50 strcat(arcs_cmdline, " root=/dev/hda1");
51
52 prom_init_machtype(); 47 prom_init_machtype();
53} 48}
diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson/common/machtype.c
index 81fbe6b73f91..2efd5d9dee27 100644
--- a/arch/mips/loongson/common/machtype.c
+++ b/arch/mips/loongson/common/machtype.c
@@ -41,7 +41,7 @@ void __weak __init mach_prom_init_machtype(void)
41 41
42void __init prom_init_machtype(void) 42void __init prom_init_machtype(void)
43{ 43{
44 char *p, str[MACHTYPE_LEN]; 44 char *p, str[MACHTYPE_LEN + 1];
45 int machtype = MACH_LEMOTE_FL2E; 45 int machtype = MACH_LEMOTE_FL2E;
46 46
47 mips_machtype = LOONGSON_MACHTYPE; 47 mips_machtype = LOONGSON_MACHTYPE;
@@ -53,6 +53,7 @@ void __init prom_init_machtype(void)
53 } 53 }
54 p += strlen("machtype="); 54 p += strlen("machtype=");
55 strncpy(str, p, MACHTYPE_LEN); 55 strncpy(str, p, MACHTYPE_LEN);
56 str[MACHTYPE_LEN] = '\0';
56 p = strstr(str, " "); 57 p = strstr(str, " ");
57 if (p) 58 if (p)
58 *p = '\0'; 59 *p = '\0';
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 2701d9500959..2a7d43f4f161 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -70,7 +70,7 @@
70 70
71 71
72#define COMPXSP \ 72#define COMPXSP \
73 unsigned xm; int xe; int xs; int xc 73 unsigned xm; int xe; int xs __maybe_unused; int xc
74 74
75#define COMPYSP \ 75#define COMPYSP \
76 unsigned ym; int ye; int ys; int yc 76 unsigned ym; int ye; int ys; int yc
@@ -104,7 +104,7 @@
104 104
105 105
106#define COMPXDP \ 106#define COMPXDP \
107u64 xm; int xe; int xs; int xc 107u64 xm; int xe; int xs __maybe_unused; int xc
108 108
109#define COMPYDP \ 109#define COMPYDP \
110u64 ym; int ye; int ys; int yc 110u64 ym; int ye; int ys; int yc
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 2efcbd24c82f..279599e9a779 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -324,7 +324,7 @@ int page_is_ram(unsigned long pagenr)
324void __init paging_init(void) 324void __init paging_init(void)
325{ 325{
326 unsigned long max_zone_pfns[MAX_NR_ZONES]; 326 unsigned long max_zone_pfns[MAX_NR_ZONES];
327 unsigned long lastpfn; 327 unsigned long lastpfn __maybe_unused;
328 328
329 pagetable_init(); 329 pagetable_init();
330 330
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 083d3412d0bc..04f9e17db9d0 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -109,6 +109,8 @@ static bool scratchpad_available(void)
109static int scratchpad_offset(int i) 109static int scratchpad_offset(int i)
110{ 110{
111 BUG(); 111 BUG();
112 /* Really unreachable, but evidently some GCC want this. */
113 return 0;
112} 114}
113#endif 115#endif
114/* 116/*
diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c
index b7c03d80c88c..68798f869c0f 100644
--- a/arch/mips/pci/ops-pmcmsp.c
+++ b/arch/mips/pci/ops-pmcmsp.c
@@ -308,7 +308,7 @@ static struct resource pci_mem_resource = {
308 * RETURNS: PCIBIOS_SUCCESSFUL - success 308 * RETURNS: PCIBIOS_SUCCESSFUL - success
309 * 309 *
310 ****************************************************************************/ 310 ****************************************************************************/
311static int bpci_interrupt(int irq, void *dev_id) 311static irqreturn_t bpci_interrupt(int irq, void *dev_id)
312{ 312{
313 struct msp_pci_regs *preg = (void *)PCI_BASE_REG; 313 struct msp_pci_regs *preg = (void *)PCI_BASE_REG;
314 unsigned int stat = preg->if_status; 314 unsigned int stat = preg->if_status;
@@ -326,7 +326,7 @@ static int bpci_interrupt(int irq, void *dev_id)
326 /* write to clear all asserted interrupts */ 326 /* write to clear all asserted interrupts */
327 preg->if_status = stat; 327 preg->if_status = stat;
328 328
329 return PCIBIOS_SUCCESSFUL; 329 return IRQ_HANDLED;
330} 330}
331 331
332/***************************************************************************** 332/*****************************************************************************
diff --git a/arch/mips/pmc-sierra/Kconfig b/arch/mips/pmc-sierra/Kconfig
index c139988bb85d..8d798497c614 100644
--- a/arch/mips/pmc-sierra/Kconfig
+++ b/arch/mips/pmc-sierra/Kconfig
@@ -4,15 +4,11 @@ choice
4 4
5config PMC_MSP4200_EVAL 5config PMC_MSP4200_EVAL
6 bool "PMC-Sierra MSP4200 Eval Board" 6 bool "PMC-Sierra MSP4200 Eval Board"
7 select CEVT_R4K
8 select CSRC_R4K
9 select IRQ_MSP_SLP 7 select IRQ_MSP_SLP
10 select HW_HAS_PCI 8 select HW_HAS_PCI
11 9
12config PMC_MSP4200_GW 10config PMC_MSP4200_GW
13 bool "PMC-Sierra MSP4200 VoIP Gateway" 11 bool "PMC-Sierra MSP4200 VoIP Gateway"
14 select CEVT_R4K
15 select CSRC_R4K
16 select IRQ_MSP_SLP 12 select IRQ_MSP_SLP
17 select HW_HAS_PCI 13 select HW_HAS_PCI
18 14
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c
index cca64e15f57f..01df84ce31e2 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_time.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c
@@ -81,7 +81,7 @@ void __init plat_time_init(void)
81 mips_hpt_frequency = cpu_rate/2; 81 mips_hpt_frequency = cpu_rate/2;
82} 82}
83 83
84unsigned int __init get_c0_compare_int(void) 84unsigned int __cpuinit get_c0_compare_int(void)
85{ 85{
86 return MSP_INT_VPE0_TIMER; 86 return MSP_INT_VPE0_TIMER;
87} 87}
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 92d2f9298e38..9d773a639513 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -139,7 +139,7 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
139 * Atomically reads the value of @v. Note that the guaranteed 139 * Atomically reads the value of @v. Note that the guaranteed
140 * useful range of an atomic_t is only 24 bits. 140 * useful range of an atomic_t is only 24 bits.
141 */ 141 */
142#define atomic_read(v) ((v)->counter) 142#define atomic_read(v) (ACCESS_ONCE((v)->counter))
143 143
144/** 144/**
145 * atomic_set - set atomic variable 145 * atomic_set - set atomic variable
diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h
index 679dee0bbd08..3d6e60dad9d9 100644
--- a/arch/mn10300/include/asm/uaccess.h
+++ b/arch/mn10300/include/asm/uaccess.h
@@ -160,9 +160,10 @@ struct __large_struct { unsigned long buf[100]; };
160 160
161#define __get_user_check(x, ptr, size) \ 161#define __get_user_check(x, ptr, size) \
162({ \ 162({ \
163 const __typeof__(ptr) __guc_ptr = (ptr); \
163 int _e; \ 164 int _e; \
164 if (likely(__access_ok((unsigned long) (ptr), (size)))) \ 165 if (likely(__access_ok((unsigned long) __guc_ptr, (size)))) \
165 _e = __get_user_nocheck((x), (ptr), (size)); \ 166 _e = __get_user_nocheck((x), __guc_ptr, (size)); \
166 else { \ 167 else { \
167 _e = -EFAULT; \ 168 _e = -EFAULT; \
168 (x) = (__typeof__(x))0; \ 169 (x) = (__typeof__(x))0; \
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index 75da468090b9..5b955000626d 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -104,8 +104,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
104 unsigned tsc, elapse; 104 unsigned tsc, elapse;
105 irqreturn_t ret; 105 irqreturn_t ret;
106 106
107 write_seqlock(&xtime_lock);
108
109 while (tsc = get_cycles(), 107 while (tsc = get_cycles(),
110 elapse = tsc - mn10300_last_tsc, /* time elapsed since last 108 elapse = tsc - mn10300_last_tsc, /* time elapsed since last
111 * tick */ 109 * tick */
@@ -114,11 +112,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
114 mn10300_last_tsc += MN10300_TSC_PER_HZ; 112 mn10300_last_tsc += MN10300_TSC_PER_HZ;
115 113
116 /* advance the kernel's time tracking system */ 114 /* advance the kernel's time tracking system */
117 do_timer(1); 115 xtime_update(1);
118 } 116 }
119 117
120 write_sequnlock(&xtime_lock);
121
122 ret = local_timer_interrupt(); 118 ret = local_timer_interrupt();
123#ifdef CONFIG_SMP 119#ifdef CONFIG_SMP
124 send_IPI_allbutself(LOCAL_TIMER_IPI); 120 send_IPI_allbutself(LOCAL_TIMER_IPI);
diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
index febbeee7f2f5..968bcd2cb022 100644
--- a/arch/mn10300/kernel/vmlinux.lds.S
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -70,7 +70,7 @@ SECTIONS
70 .exit.text : { EXIT_TEXT; } 70 .exit.text : { EXIT_TEXT; }
71 .exit.data : { EXIT_DATA; } 71 .exit.data : { EXIT_DATA; }
72 72
73 PERCPU(PAGE_SIZE) 73 PERCPU(32, PAGE_SIZE)
74 . = ALIGN(PAGE_SIZE); 74 . = ALIGN(PAGE_SIZE);
75 __init_end = .; 75 __init_end = .;
76 /* freed after init ends here */ 76 /* freed after init ends here */
diff --git a/arch/mn10300/mm/cache-inv-icache.c b/arch/mn10300/mm/cache-inv-icache.c
index a8933a60b2d4..a6b63dde603d 100644
--- a/arch/mn10300/mm/cache-inv-icache.c
+++ b/arch/mn10300/mm/cache-inv-icache.c
@@ -69,7 +69,7 @@ static void flush_icache_page_range(unsigned long start, unsigned long end)
69 69
70 /* invalidate the icache coverage on that region */ 70 /* invalidate the icache coverage on that region */
71 mn10300_local_icache_inv_range2(addr + off, size); 71 mn10300_local_icache_inv_range2(addr + off, size);
72 smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end); 72 smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
73} 73}
74 74
75/** 75/**
@@ -101,7 +101,7 @@ void flush_icache_range(unsigned long start, unsigned long end)
101 * directly */ 101 * directly */
102 start_page = (start >= 0x80000000UL) ? start : 0x80000000UL; 102 start_page = (start >= 0x80000000UL) ? start : 0x80000000UL;
103 mn10300_icache_inv_range(start_page, end); 103 mn10300_icache_inv_range(start_page, end);
104 smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end); 104 smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
105 if (start_page == start) 105 if (start_page == start)
106 goto done; 106 goto done;
107 end = start_page; 107 end = start_page;
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 30394081d9b6..6ab9580b0b00 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -185,26 +185,21 @@ struct hpux_statfs {
185 int16_t f_pad; 185 int16_t f_pad;
186}; 186};
187 187
188static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf) 188static int do_statfs_hpux(struct kstatfs *st, struct hpux_statfs __user *p)
189{ 189{
190 struct kstatfs st; 190 struct hpux_statfs buf;
191 int retval; 191 memset(&buf, 0, sizeof(buf));
192 192 buf.f_type = st->f_type;
193 retval = vfs_statfs(path, &st); 193 buf.f_bsize = st->f_bsize;
194 if (retval) 194 buf.f_blocks = st->f_blocks;
195 return retval; 195 buf.f_bfree = st->f_bfree;
196 196 buf.f_bavail = st->f_bavail;
197 memset(buf, 0, sizeof(*buf)); 197 buf.f_files = st->f_files;
198 buf->f_type = st.f_type; 198 buf.f_ffree = st->f_ffree;
199 buf->f_bsize = st.f_bsize; 199 buf.f_fsid[0] = st->f_fsid.val[0];
200 buf->f_blocks = st.f_blocks; 200 buf.f_fsid[1] = st->f_fsid.val[1];
201 buf->f_bfree = st.f_bfree; 201 if (copy_to_user(p, &buf, sizeof(buf)))
202 buf->f_bavail = st.f_bavail; 202 return -EFAULT;
203 buf->f_files = st.f_files;
204 buf->f_ffree = st.f_ffree;
205 buf->f_fsid[0] = st.f_fsid.val[0];
206 buf->f_fsid[1] = st.f_fsid.val[1];
207
208 return 0; 203 return 0;
209} 204}
210 205
@@ -212,35 +207,19 @@ static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf)
212asmlinkage long hpux_statfs(const char __user *pathname, 207asmlinkage long hpux_statfs(const char __user *pathname,
213 struct hpux_statfs __user *buf) 208 struct hpux_statfs __user *buf)
214{ 209{
215 struct path path; 210 struct kstatfs st;
216 int error; 211 int error = user_statfs(pathname, &st);
217 212 if (!error)
218 error = user_path(pathname, &path); 213 error = do_statfs_hpux(&st, buf);
219 if (!error) {
220 struct hpux_statfs tmp;
221 error = do_statfs_hpux(&path, &tmp);
222 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
223 error = -EFAULT;
224 path_put(&path);
225 }
226 return error; 214 return error;
227} 215}
228 216
229asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf) 217asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf)
230{ 218{
231 struct file *file; 219 struct kstatfs st;
232 struct hpux_statfs tmp; 220 int error = fd_statfs(fd, &st);
233 int error; 221 if (!error)
234 222 error = do_statfs_hpux(&st, buf);
235 error = -EBADF;
236 file = fget(fd);
237 if (!file)
238 goto out;
239 error = do_statfs_hpux(&file->f_path, &tmp);
240 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
241 error = -EFAULT;
242 fput(file);
243 out:
244 return error; 223 return error;
245} 224}
246 225
diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h
index f357fc693c89..0304b92ccfea 100644
--- a/arch/parisc/include/asm/fcntl.h
+++ b/arch/parisc/include/asm/fcntl.h
@@ -19,6 +19,8 @@
19#define O_NOFOLLOW 000000200 /* don't follow links */ 19#define O_NOFOLLOW 000000200 /* don't follow links */
20#define O_INVISIBLE 004000000 /* invisible I/O, for DMAPI/XDSM */ 20#define O_INVISIBLE 004000000 /* invisible I/O, for DMAPI/XDSM */
21 21
22#define O_PATH 020000000
23
22#define F_GETLK64 8 24#define F_GETLK64 8
23#define F_SETLK64 9 25#define F_SETLK64 9
24#define F_SETLKW64 10 26#define F_SETLKW64 10
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index 0c705c3a55ef..67a33cc27ef2 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -8,7 +8,7 @@
8#include <asm/errno.h> 8#include <asm/errno.h>
9 9
10static inline int 10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 11futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
12{ 12{
13 int op = (encoded_op >> 28) & 7; 13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15; 14 int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg; 19 oparg = 1 << oparg;
20 20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
22 return -EFAULT; 22 return -EFAULT;
23 23
24 pagefault_disable(); 24 pagefault_disable();
@@ -51,10 +51,10 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
51 51
52/* Non-atomic version */ 52/* Non-atomic version */
53static inline int 53static inline int
54futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 54futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
55 u32 oldval, u32 newval)
55{ 56{
56 int err = 0; 57 u32 val;
57 int uval;
58 58
59 /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is 59 /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
60 * our gateway page, and causes no end of trouble... 60 * our gateway page, and causes no end of trouble...
@@ -62,15 +62,15 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
62 if (segment_eq(KERNEL_DS, get_fs()) && !uaddr) 62 if (segment_eq(KERNEL_DS, get_fs()) && !uaddr)
63 return -EFAULT; 63 return -EFAULT;
64 64
65 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 65 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
66 return -EFAULT; 66 return -EFAULT;
67 67
68 err = get_user(uval, uaddr); 68 if (get_user(val, uaddr))
69 if (err) return -EFAULT; 69 return -EFAULT;
70 if (uval == oldval) 70 if (val == oldval && put_user(newval, uaddr))
71 err = put_user(newval, uaddr); 71 return -EFAULT;
72 if (err) return -EFAULT; 72 *uval = val;
73 return uval; 73 return 0;
74} 74}
75 75
76#endif /*__KERNEL__*/ 76#endif /*__KERNEL__*/
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 05511ccb61d2..45b7389d77aa 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -162,11 +162,8 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
162 update_process_times(user_mode(get_irq_regs())); 162 update_process_times(user_mode(get_irq_regs()));
163 } 163 }
164 164
165 if (cpu == 0) { 165 if (cpu == 0)
166 write_seqlock(&xtime_lock); 166 xtime_update(ticks_elapsed);
167 do_timer(ticks_elapsed);
168 write_sequnlock(&xtime_lock);
169 }
170 167
171 return IRQ_HANDLED; 168 return IRQ_HANDLED;
172} 169}
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index d64a6bbec2aa..8f1e4efd143e 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -145,7 +145,7 @@ SECTIONS
145 EXIT_DATA 145 EXIT_DATA
146 } 146 }
147 147
148 PERCPU(PAGE_SIZE) 148 PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
149 . = ALIGN(PAGE_SIZE); 149 . = ALIGN(PAGE_SIZE);
150 __init_end = .; 150 __init_end = .;
151 /* freed after init ends here */ 151 /* freed after init ends here */
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 7c589ef81fb0..c94e4a3fe2ef 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -30,7 +30,7 @@
30 : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ 30 : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
31 : "cr0", "memory") 31 : "cr0", "memory")
32 32
33static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 33static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
34{ 34{
35 int op = (encoded_op >> 28) & 7; 35 int op = (encoded_op >> 28) & 7;
36 int cmp = (encoded_op >> 24) & 15; 36 int cmp = (encoded_op >> 24) & 15;
@@ -40,7 +40,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
40 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 40 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
41 oparg = 1 << oparg; 41 oparg = 1 << oparg;
42 42
43 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 43 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
44 return -EFAULT; 44 return -EFAULT;
45 45
46 pagefault_disable(); 46 pagefault_disable();
@@ -82,35 +82,38 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
82} 82}
83 83
84static inline int 84static inline int
85futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 85futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
86 u32 oldval, u32 newval)
86{ 87{
87 int prev; 88 int ret = 0;
89 u32 prev;
88 90
89 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 91 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
90 return -EFAULT; 92 return -EFAULT;
91 93
92 __asm__ __volatile__ ( 94 __asm__ __volatile__ (
93 PPC_RELEASE_BARRIER 95 PPC_RELEASE_BARRIER
94"1: lwarx %0,0,%2 # futex_atomic_cmpxchg_inatomic\n\ 96"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
95 cmpw 0,%0,%3\n\ 97 cmpw 0,%1,%4\n\
96 bne- 3f\n" 98 bne- 3f\n"
97 PPC405_ERR77(0,%2) 99 PPC405_ERR77(0,%3)
98"2: stwcx. %4,0,%2\n\ 100"2: stwcx. %5,0,%3\n\
99 bne- 1b\n" 101 bne- 1b\n"
100 PPC_ACQUIRE_BARRIER 102 PPC_ACQUIRE_BARRIER
101"3: .section .fixup,\"ax\"\n\ 103"3: .section .fixup,\"ax\"\n\
1024: li %0,%5\n\ 1044: li %0,%6\n\
103 b 3b\n\ 105 b 3b\n\
104 .previous\n\ 106 .previous\n\
105 .section __ex_table,\"a\"\n\ 107 .section __ex_table,\"a\"\n\
106 .align 3\n\ 108 .align 3\n\
107 " PPC_LONG "1b,4b,2b,4b\n\ 109 " PPC_LONG "1b,4b,2b,4b\n\
108 .previous" \ 110 .previous" \
109 : "=&r" (prev), "+m" (*uaddr) 111 : "+r" (ret), "=&r" (prev), "+m" (*uaddr)
110 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) 112 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)
111 : "cc", "memory"); 113 : "cc", "memory");
112 114
113 return prev; 115 *uval = prev;
116 return ret;
114} 117}
115 118
116#endif /* __KERNEL__ */ 119#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 380d48bacd16..26b8c807f8f1 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -33,9 +33,25 @@
33// 33//
34//---------------------------------------------------------------------------- 34//----------------------------------------------------------------------------
35#include <linux/cache.h> 35#include <linux/cache.h>
36#include <linux/threads.h>
36#include <asm/types.h> 37#include <asm/types.h>
37#include <asm/mmu.h> 38#include <asm/mmu.h>
38 39
40/*
41 * We only have to have statically allocated lppaca structs on
42 * legacy iSeries, which supports at most 64 cpus.
43 */
44#ifdef CONFIG_PPC_ISERIES
45#if NR_CPUS < 64
46#define NR_LPPACAS NR_CPUS
47#else
48#define NR_LPPACAS 64
49#endif
50#else /* not iSeries */
51#define NR_LPPACAS 1
52#endif
53
54
39/* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k 55/* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
40 * alignment is sufficient to prevent this */ 56 * alignment is sufficient to prevent this */
41struct lppaca { 57struct lppaca {
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 991d5998d6be..fe56a23e1ff0 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -240,6 +240,12 @@ struct machdep_calls {
240 * claims to support kexec. 240 * claims to support kexec.
241 */ 241 */
242 int (*machine_kexec_prepare)(struct kimage *image); 242 int (*machine_kexec_prepare)(struct kimage *image);
243
244 /* Called to perform the _real_ kexec.
245 * Do NOT allocate memory or fail here. We are past the point of
246 * no return.
247 */
248 void (*machine_kexec)(struct kimage *image);
243#endif /* CONFIG_KEXEC */ 249#endif /* CONFIG_KEXEC */
244 250
245#ifdef CONFIG_SUSPEND 251#ifdef CONFIG_SUSPEND
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 51e9e6f90d12..edeb80fdd2c3 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -171,6 +171,16 @@ static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus)
171 return bus->sysdata; 171 return bus->sysdata;
172} 172}
173 173
174static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
175{
176 struct pci_controller *host;
177
178 if (bus->self)
179 return pci_device_to_OF_node(bus->self);
180 host = pci_bus_to_host(bus);
181 return host ? host->dn : NULL;
182}
183
174static inline int isa_vaddr_is_ioport(void __iomem *address) 184static inline int isa_vaddr_is_ioport(void __iomem *address)
175{ 185{
176 /* No specific ISA handling on ppc32 at this stage, it 186 /* No specific ISA handling on ppc32 at this stage, it
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index d72757585595..c189aa5fe1f4 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -70,21 +70,6 @@ static inline int of_node_to_nid(struct device_node *device) { return 0; }
70#endif 70#endif
71#define of_node_to_nid of_node_to_nid 71#define of_node_to_nid of_node_to_nid
72 72
73/**
74 * of_irq_map_pci - Resolve the interrupt for a PCI device
75 * @pdev: the device whose interrupt is to be resolved
76 * @out_irq: structure of_irq filled by this function
77 *
78 * This function resolves the PCI interrupt for a given PCI device. If a
79 * device-node exists for a given pci_dev, it will use normal OF tree
80 * walking. If not, it will implement standard swizzling and walk up the
81 * PCI tree until an device-node is found, at which point it will finish
82 * resolving using the OF tree walking.
83 */
84struct pci_dev;
85struct of_irq;
86extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
87
88extern void of_instantiate_rtc(void); 73extern void of_instantiate_rtc(void);
89 74
90/* These includes are put at the bottom because they may contain things 75/* These includes are put at the bottom because they may contain things
diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h
index 8447d89fbe72..bb1e2cdeb9bf 100644
--- a/arch/powerpc/include/asm/rwsem.h
+++ b/arch/powerpc/include/asm/rwsem.h
@@ -13,11 +13,6 @@
13 * by Paul Mackerras <paulus@samba.org>. 13 * by Paul Mackerras <paulus@samba.org>.
14 */ 14 */
15 15
16#include <linux/list.h>
17#include <linux/spinlock.h>
18#include <asm/atomic.h>
19#include <asm/system.h>
20
21/* 16/*
22 * the semaphore definition 17 * the semaphore definition
23 */ 18 */
@@ -33,47 +28,6 @@
33#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 28#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
34#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 29#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
35 30
36struct rw_semaphore {
37 long count;
38 spinlock_t wait_lock;
39 struct list_head wait_list;
40#ifdef CONFIG_DEBUG_LOCK_ALLOC
41 struct lockdep_map dep_map;
42#endif
43};
44
45#ifdef CONFIG_DEBUG_LOCK_ALLOC
46# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
47#else
48# define __RWSEM_DEP_MAP_INIT(lockname)
49#endif
50
51#define __RWSEM_INITIALIZER(name) \
52{ \
53 RWSEM_UNLOCKED_VALUE, \
54 __SPIN_LOCK_UNLOCKED((name).wait_lock), \
55 LIST_HEAD_INIT((name).wait_list) \
56 __RWSEM_DEP_MAP_INIT(name) \
57}
58
59#define DECLARE_RWSEM(name) \
60 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
61
62extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
63extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
64extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
65extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
66
67extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
68 struct lock_class_key *key);
69
70#define init_rwsem(sem) \
71 do { \
72 static struct lock_class_key __key; \
73 \
74 __init_rwsem((sem), #sem, &__key); \
75 } while (0)
76
77/* 31/*
78 * lock for reading 32 * lock for reading
79 */ 33 */
@@ -174,10 +128,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
174 return atomic_long_add_return(delta, (atomic_long_t *)&sem->count); 128 return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
175} 129}
176 130
177static inline int rwsem_is_locked(struct rw_semaphore *sem)
178{
179 return sem->count != 0;
180}
181
182#endif /* __KERNEL__ */ 131#endif /* __KERNEL__ */
183#endif /* _ASM_POWERPC_RWSEM_H */ 132#endif /* _ASM_POWERPC_RWSEM_H */
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 49a170af8145..a5f8672eeff3 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -87,7 +87,10 @@ void machine_kexec(struct kimage *image)
87 87
88 save_ftrace_enabled = __ftrace_enabled_save(); 88 save_ftrace_enabled = __ftrace_enabled_save();
89 89
90 default_machine_kexec(image); 90 if (ppc_md.machine_kexec)
91 ppc_md.machine_kexec(image);
92 else
93 default_machine_kexec(image);
91 94
92 __ftrace_enabled_restore(save_ftrace_enabled); 95 __ftrace_enabled_restore(save_ftrace_enabled);
93 96
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index ebf9846f3c3b..f4adf89d7614 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -27,20 +27,6 @@ extern unsigned long __toc_start;
27#ifdef CONFIG_PPC_BOOK3S 27#ifdef CONFIG_PPC_BOOK3S
28 28
29/* 29/*
30 * We only have to have statically allocated lppaca structs on
31 * legacy iSeries, which supports at most 64 cpus.
32 */
33#ifdef CONFIG_PPC_ISERIES
34#if NR_CPUS < 64
35#define NR_LPPACAS NR_CPUS
36#else
37#define NR_LPPACAS 64
38#endif
39#else /* not iSeries */
40#define NR_LPPACAS 1
41#endif
42
43/*
44 * The structure which the hypervisor knows about - this structure 30 * The structure which the hypervisor knows about - this structure
45 * should not cross a page boundary. The vpa_init/register_vpa call 31 * should not cross a page boundary. The vpa_init/register_vpa call
46 * is now known to fail if the lppaca structure crosses a page 32 * is now known to fail if the lppaca structure crosses a page
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 10a44e68ef11..eb341be9a4d9 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -22,6 +22,7 @@
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/bootmem.h> 23#include <linux/bootmem.h>
24#include <linux/of_address.h> 24#include <linux/of_address.h>
25#include <linux/of_pci.h>
25#include <linux/mm.h> 26#include <linux/mm.h>
26#include <linux/list.h> 27#include <linux/list.h>
27#include <linux/syscalls.h> 28#include <linux/syscalls.h>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7a1d5cb76932..8303a6c65ef7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -353,6 +353,7 @@ static void switch_booke_debug_regs(struct thread_struct *new_thread)
353 prime_debug_regs(new_thread); 353 prime_debug_regs(new_thread);
354} 354}
355#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ 355#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
356#ifndef CONFIG_HAVE_HW_BREAKPOINT
356static void set_debug_reg_defaults(struct thread_struct *thread) 357static void set_debug_reg_defaults(struct thread_struct *thread)
357{ 358{
358 if (thread->dabr) { 359 if (thread->dabr) {
@@ -360,6 +361,7 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
360 set_dabr(0); 361 set_dabr(0);
361 } 362 }
362} 363}
364#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
363#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ 365#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
364 366
365int set_dabr(unsigned long dabr) 367int set_dabr(unsigned long dabr)
@@ -670,11 +672,11 @@ void flush_thread(void)
670{ 672{
671 discard_lazy_cpu_state(); 673 discard_lazy_cpu_state();
672 674
673#ifdef CONFIG_HAVE_HW_BREAKPOINTS 675#ifdef CONFIG_HAVE_HW_BREAKPOINT
674 flush_ptrace_hw_breakpoint(current); 676 flush_ptrace_hw_breakpoint(current);
675#else /* CONFIG_HAVE_HW_BREAKPOINTS */ 677#else /* CONFIG_HAVE_HW_BREAKPOINT */
676 set_debug_reg_defaults(&current->thread); 678 set_debug_reg_defaults(&current->thread);
677#endif /* CONFIG_HAVE_HW_BREAKPOINTS */ 679#endif /* CONFIG_HAVE_HW_BREAKPOINT */
678} 680}
679 681
680void 682void
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index c2b7a07cc3d3..47187cc2cf00 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -2,95 +2,11 @@
2 2
3#include <linux/kernel.h> 3#include <linux/kernel.h>
4#include <linux/string.h> 4#include <linux/string.h>
5#include <linux/pci_regs.h>
6#include <linux/module.h> 5#include <linux/module.h>
7#include <linux/ioport.h> 6#include <linux/ioport.h>
8#include <linux/etherdevice.h> 7#include <linux/etherdevice.h>
9#include <linux/of_address.h> 8#include <linux/of_address.h>
10#include <asm/prom.h> 9#include <asm/prom.h>
11#include <asm/pci-bridge.h>
12
13#ifdef CONFIG_PCI
14int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
15{
16 struct device_node *dn, *ppnode;
17 struct pci_dev *ppdev;
18 u32 lspec;
19 u32 laddr[3];
20 u8 pin;
21 int rc;
22
23 /* Check if we have a device node, if yes, fallback to standard OF
24 * parsing
25 */
26 dn = pci_device_to_OF_node(pdev);
27 if (dn) {
28 rc = of_irq_map_one(dn, 0, out_irq);
29 if (!rc)
30 return rc;
31 }
32
33 /* Ok, we don't, time to have fun. Let's start by building up an
34 * interrupt spec. we assume #interrupt-cells is 1, which is standard
35 * for PCI. If you do different, then don't use that routine.
36 */
37 rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
38 if (rc != 0)
39 return rc;
40 /* No pin, exit */
41 if (pin == 0)
42 return -ENODEV;
43
44 /* Now we walk up the PCI tree */
45 lspec = pin;
46 for (;;) {
47 /* Get the pci_dev of our parent */
48 ppdev = pdev->bus->self;
49
50 /* Ouch, it's a host bridge... */
51 if (ppdev == NULL) {
52#ifdef CONFIG_PPC64
53 ppnode = pci_bus_to_OF_node(pdev->bus);
54#else
55 struct pci_controller *host;
56 host = pci_bus_to_host(pdev->bus);
57 ppnode = host ? host->dn : NULL;
58#endif
59 /* No node for host bridge ? give up */
60 if (ppnode == NULL)
61 return -EINVAL;
62 } else
63 /* We found a P2P bridge, check if it has a node */
64 ppnode = pci_device_to_OF_node(ppdev);
65
66 /* Ok, we have found a parent with a device-node, hand over to
67 * the OF parsing code.
68 * We build a unit address from the linux device to be used for
69 * resolution. Note that we use the linux bus number which may
70 * not match your firmware bus numbering.
71 * Fortunately, in most cases, interrupt-map-mask doesn't include
72 * the bus number as part of the matching.
73 * You should still be careful about that though if you intend
74 * to rely on this function (you ship a firmware that doesn't
75 * create device nodes for all PCI devices).
76 */
77 if (ppnode)
78 break;
79
80 /* We can only get here if we hit a P2P bridge with no node,
81 * let's do standard swizzling and try again
82 */
83 lspec = pci_swizzle_interrupt_pin(pdev, lspec);
84 pdev = ppdev;
85 }
86
87 laddr[0] = (pdev->bus->number << 16)
88 | (pdev->devfn << 8);
89 laddr[1] = laddr[2] = 0;
90 return of_irq_map_raw(ppnode, &lspec, 1, laddr, out_irq);
91}
92EXPORT_SYMBOL_GPL(of_irq_map_pci);
93#endif /* CONFIG_PCI */
94 10
95void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, 11void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
96 unsigned long *busno, unsigned long *phys, unsigned long *size) 12 unsigned long *busno, unsigned long *phys, unsigned long *size)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 8a0deefac08d..b9150f07d266 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -160,7 +160,7 @@ SECTIONS
160 INIT_RAM_FS 160 INIT_RAM_FS
161 } 161 }
162 162
163 PERCPU(PAGE_SIZE) 163 PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
164 164
165 . = ALIGN(8); 165 . = ALIGN(8);
166 .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { 166 .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fd4812329570..0dc95c0aa3be 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1516,7 +1516,8 @@ int start_topology_update(void)
1516{ 1516{
1517 int rc = 0; 1517 int rc = 0;
1518 1518
1519 if (firmware_has_feature(FW_FEATURE_VPHN) && 1519 /* Disabled until races with load balancing are fixed */
1520 if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
1520 get_lppaca()->shared_proc) { 1521 get_lppaca()->shared_proc) {
1521 vphn_enabled = 1; 1522 vphn_enabled = 1;
1522 setup_cpu_associativity_change_counters(); 1523 setup_cpu_associativity_change_counters();
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 1ec06576f619..c14d09f614f3 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -38,13 +38,11 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
38 * neesd to be flushed. This function will either perform the flush 38 * neesd to be flushed. This function will either perform the flush
39 * immediately or will batch it up if the current CPU has an active 39 * immediately or will batch it up if the current CPU has an active
40 * batch on it. 40 * batch on it.
41 *
42 * Must be called from within some kind of spinlock/non-preempt region...
43 */ 41 */
44void hpte_need_flush(struct mm_struct *mm, unsigned long addr, 42void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
45 pte_t *ptep, unsigned long pte, int huge) 43 pte_t *ptep, unsigned long pte, int huge)
46{ 44{
47 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 45 struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
48 unsigned long vsid, vaddr; 46 unsigned long vsid, vaddr;
49 unsigned int psize; 47 unsigned int psize;
50 int ssize; 48 int ssize;
@@ -99,6 +97,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
99 */ 97 */
100 if (!batch->active) { 98 if (!batch->active) {
101 flush_hash_page(vaddr, rpte, psize, ssize, 0); 99 flush_hash_page(vaddr, rpte, psize, ssize, 0);
100 put_cpu_var(ppc64_tlb_batch);
102 return; 101 return;
103 } 102 }
104 103
@@ -127,6 +126,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
127 batch->index = ++i; 126 batch->index = ++i;
128 if (i >= PPC64_TLB_BATCH_NR) 127 if (i >= PPC64_TLB_BATCH_NR)
129 __flush_tlb_pending(batch); 128 __flush_tlb_pending(batch);
129 put_cpu_var(ppc64_tlb_batch);
130} 130}
131 131
132/* 132/*
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 187a7d32f86a..a3d2ce54ea2e 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
70 if (!IS_ERR(tmp)) { 70 if (!IS_ERR(tmp)) {
71 struct nameidata nd; 71 struct nameidata nd;
72 72
73 ret = path_lookup(tmp, LOOKUP_PARENT, &nd); 73 ret = kern_path_parent(tmp, &nd);
74 if (!ret) { 74 if (!ret) {
75 nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE; 75 nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE;
76 ret = spufs_create(&nd, flags, mode, neighbor); 76 ret = spufs_create(&nd, flags, mode, neighbor);
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index fdb7384c0c4f..f0491cc28900 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -242,8 +242,8 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
242 pft_size[0] = 0; /* NUMA CEC cookie, 0 for non NUMA */ 242 pft_size[0] = 0; /* NUMA CEC cookie, 0 for non NUMA */
243 pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE); 243 pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE);
244 244
245 for (i = 0; i < NR_CPUS; i++) { 245 for (i = 0; i < NR_LPPACAS; i++) {
246 if (lppaca_of(i).dyn_proc_status >= 2) 246 if (lppaca[i].dyn_proc_status >= 2)
247 continue; 247 continue;
248 248
249 snprintf(p, 32 - (p - buf), "@%d", i); 249 snprintf(p, 32 - (p - buf), "@%d", i);
@@ -251,7 +251,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
251 251
252 dt_prop_str(dt, "device_type", device_type_cpu); 252 dt_prop_str(dt, "device_type", device_type_cpu);
253 253
254 index = lppaca_of(i).dyn_hv_phys_proc_index; 254 index = lppaca[i].dyn_hv_phys_proc_index;
255 d = &xIoHriProcessorVpd[index]; 255 d = &xIoHriProcessorVpd[index];
256 256
257 dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024); 257 dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index b0863410517f..2946ae10fbfd 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -680,6 +680,7 @@ void * __init iSeries_early_setup(void)
680 * on but calling this function multiple times is fine. 680 * on but calling this function multiple times is fine.
681 */ 681 */
682 identify_cpu(0, mfspr(SPRN_PVR)); 682 identify_cpu(0, mfspr(SPRN_PVR));
683 initialise_paca(&boot_paca, 0);
683 684
684 powerpc_firmware_features |= FW_FEATURE_ISERIES; 685 powerpc_firmware_features |= FW_FEATURE_ISERIES;
685 powerpc_firmware_features |= FW_FEATURE_LPAR; 686 powerpc_firmware_features |= FW_FEATURE_LPAR;
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index 0851eb1e919e..2751b3a8a66f 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -133,11 +133,12 @@ unsigned long decompress_kernel(void)
133 unsigned long output_addr; 133 unsigned long output_addr;
134 unsigned char *output; 134 unsigned char *output;
135 135
136 check_ipl_parmblock((void *) 0, (unsigned long) output + SZ__bss_start); 136 output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL;
137 check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start);
137 memset(&_bss, 0, &_ebss - &_bss); 138 memset(&_bss, 0, &_ebss - &_bss);
138 free_mem_ptr = (unsigned long)&_end; 139 free_mem_ptr = (unsigned long)&_end;
139 free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; 140 free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
140 output = (unsigned char *) ((free_mem_end_ptr + 4095UL) & -4096UL); 141 output = (unsigned char *) output_addr;
141 142
142#ifdef CONFIG_BLK_DEV_INITRD 143#ifdef CONFIG_BLK_DEV_INITRD
143 /* 144 /*
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index f42dbabc0d30..48884f89ab92 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -38,6 +38,7 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
38 BUG_ON(ret != bsize); 38 BUG_ON(ret != bsize);
39 data += bsize - index; 39 data += bsize - index;
40 len -= bsize - index; 40 len -= bsize - index;
41 index = 0;
41 } 42 }
42 43
43 /* process as many blocks as possible */ 44 /* process as many blocks as possible */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 76daea117181..5c5ba10384c2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -36,14 +36,19 @@
36 36
37static inline int atomic_read(const atomic_t *v) 37static inline int atomic_read(const atomic_t *v)
38{ 38{
39 barrier(); 39 int c;
40 return v->counter; 40
41 asm volatile(
42 " l %0,%1\n"
43 : "=d" (c) : "Q" (v->counter));
44 return c;
41} 45}
42 46
43static inline void atomic_set(atomic_t *v, int i) 47static inline void atomic_set(atomic_t *v, int i)
44{ 48{
45 v->counter = i; 49 asm volatile(
46 barrier(); 50 " st %1,%0\n"
51 : "=Q" (v->counter) : "d" (i));
47} 52}
48 53
49static inline int atomic_add_return(int i, atomic_t *v) 54static inline int atomic_add_return(int i, atomic_t *v)
@@ -128,14 +133,19 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
128 133
129static inline long long atomic64_read(const atomic64_t *v) 134static inline long long atomic64_read(const atomic64_t *v)
130{ 135{
131 barrier(); 136 long long c;
132 return v->counter; 137
138 asm volatile(
139 " lg %0,%1\n"
140 : "=d" (c) : "Q" (v->counter));
141 return c;
133} 142}
134 143
135static inline void atomic64_set(atomic64_t *v, long long i) 144static inline void atomic64_set(atomic64_t *v, long long i)
136{ 145{
137 v->counter = i; 146 asm volatile(
138 barrier(); 147 " stg %1,%0\n"
148 : "=Q" (v->counter) : "d" (i));
139} 149}
140 150
141static inline long long atomic64_add_return(long long i, atomic64_t *v) 151static inline long long atomic64_add_return(long long i, atomic64_t *v)
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
index 24aafa68b643..2a30d5ac0667 100644
--- a/arch/s390/include/asm/cache.h
+++ b/arch/s390/include/asm/cache.h
@@ -13,6 +13,7 @@
13 13
14#define L1_CACHE_BYTES 256 14#define L1_CACHE_BYTES 256
15#define L1_CACHE_SHIFT 8 15#define L1_CACHE_SHIFT 8
16#define NET_SKB_PAD 32
16 17
17#define __read_mostly __attribute__((__section__(".data..read_mostly"))) 18#define __read_mostly __attribute__((__section__(".data..read_mostly")))
18 19
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 5c5d02de49e9..81cf36b691f1 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -7,7 +7,7 @@
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <asm/errno.h> 8#include <asm/errno.h>
9 9
10static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 10static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
11{ 11{
12 int op = (encoded_op >> 28) & 7; 12 int op = (encoded_op >> 28) & 7;
13 int cmp = (encoded_op >> 24) & 15; 13 int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg; 19 oparg = 1 << oparg;
20 20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
22 return -EFAULT; 22 return -EFAULT;
23 23
24 pagefault_disable(); 24 pagefault_disable();
@@ -39,13 +39,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
39 return ret; 39 return ret;
40} 40}
41 41
42static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, 42static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
43 int oldval, int newval) 43 u32 oldval, u32 newval)
44{ 44{
45 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 45 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
46 return -EFAULT; 46 return -EFAULT;
47 47
48 return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval); 48 return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval);
49} 49}
50 50
51#endif /* __KERNEL__ */ 51#endif /* __KERNEL__ */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 423fdda2322d..d0eb4653cebd 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -43,29 +43,6 @@
43 43
44#ifdef __KERNEL__ 44#ifdef __KERNEL__
45 45
46#include <linux/list.h>
47#include <linux/spinlock.h>
48
49struct rwsem_waiter;
50
51extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *);
52extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *);
53extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
54extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *);
55extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *);
56
57/*
58 * the semaphore definition
59 */
60struct rw_semaphore {
61 signed long count;
62 spinlock_t wait_lock;
63 struct list_head wait_list;
64#ifdef CONFIG_DEBUG_LOCK_ALLOC
65 struct lockdep_map dep_map;
66#endif
67};
68
69#ifndef __s390x__ 46#ifndef __s390x__
70#define RWSEM_UNLOCKED_VALUE 0x00000000 47#define RWSEM_UNLOCKED_VALUE 0x00000000
71#define RWSEM_ACTIVE_BIAS 0x00000001 48#define RWSEM_ACTIVE_BIAS 0x00000001
@@ -81,41 +58,6 @@ struct rw_semaphore {
81#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 58#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
82 59
83/* 60/*
84 * initialisation
85 */
86
87#ifdef CONFIG_DEBUG_LOCK_ALLOC
88# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
89#else
90# define __RWSEM_DEP_MAP_INIT(lockname)
91#endif
92
93#define __RWSEM_INITIALIZER(name) \
94 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \
95 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
96
97#define DECLARE_RWSEM(name) \
98 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
99
100static inline void init_rwsem(struct rw_semaphore *sem)
101{
102 sem->count = RWSEM_UNLOCKED_VALUE;
103 spin_lock_init(&sem->wait_lock);
104 INIT_LIST_HEAD(&sem->wait_list);
105}
106
107extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
108 struct lock_class_key *key);
109
110#define init_rwsem(sem) \
111do { \
112 static struct lock_class_key __key; \
113 \
114 __init_rwsem((sem), #sem, &__key); \
115} while (0)
116
117
118/*
119 * lock for reading 61 * lock for reading
120 */ 62 */
121static inline void __down_read(struct rw_semaphore *sem) 63static inline void __down_read(struct rw_semaphore *sem)
@@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
377 return new; 319 return new;
378} 320}
379 321
380static inline int rwsem_is_locked(struct rw_semaphore *sem)
381{
382 return (sem->count != 0);
383}
384
385#endif /* __KERNEL__ */ 322#endif /* __KERNEL__ */
386#endif /* _S390_RWSEM_H */ 323#endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index d6b1ed0ec52b..2d9ea11f919a 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -83,8 +83,8 @@ struct uaccess_ops {
83 size_t (*clear_user)(size_t, void __user *); 83 size_t (*clear_user)(size_t, void __user *);
84 size_t (*strnlen_user)(size_t, const char __user *); 84 size_t (*strnlen_user)(size_t, const char __user *);
85 size_t (*strncpy_from_user)(size_t, const char __user *, char *); 85 size_t (*strncpy_from_user)(size_t, const char __user *, char *);
86 int (*futex_atomic_op)(int op, int __user *, int oparg, int *old); 86 int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old);
87 int (*futex_atomic_cmpxchg)(int __user *, int old, int new); 87 int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new);
88}; 88};
89 89
90extern struct uaccess_ops uaccess; 90extern struct uaccess_ops uaccess;
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a68ac10213b2..1bc18cdb525b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -77,7 +77,7 @@ SECTIONS
77 . = ALIGN(PAGE_SIZE); 77 . = ALIGN(PAGE_SIZE);
78 INIT_DATA_SECTION(0x100) 78 INIT_DATA_SECTION(0x100)
79 79
80 PERCPU(PAGE_SIZE) 80 PERCPU(0x100, PAGE_SIZE)
81 . = ALIGN(PAGE_SIZE); 81 . = ALIGN(PAGE_SIZE);
82 __init_end = .; /* freed after init ends here */ 82 __init_end = .; /* freed after init ends here */
83 83
diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h
index 126011df14f1..1d2536cb630b 100644
--- a/arch/s390/lib/uaccess.h
+++ b/arch/s390/lib/uaccess.h
@@ -12,12 +12,12 @@ extern size_t copy_from_user_std(size_t, const void __user *, void *);
12extern size_t copy_to_user_std(size_t, void __user *, const void *); 12extern size_t copy_to_user_std(size_t, void __user *, const void *);
13extern size_t strnlen_user_std(size_t, const char __user *); 13extern size_t strnlen_user_std(size_t, const char __user *);
14extern size_t strncpy_from_user_std(size_t, const char __user *, char *); 14extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
15extern int futex_atomic_cmpxchg_std(int __user *, int, int); 15extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32);
16extern int futex_atomic_op_std(int, int __user *, int, int *); 16extern int futex_atomic_op_std(int, u32 __user *, int, int *);
17 17
18extern size_t copy_from_user_pt(size_t, const void __user *, void *); 18extern size_t copy_from_user_pt(size_t, const void __user *, void *);
19extern size_t copy_to_user_pt(size_t, void __user *, const void *); 19extern size_t copy_to_user_pt(size_t, void __user *, const void *);
20extern int futex_atomic_op_pt(int, int __user *, int, int *); 20extern int futex_atomic_op_pt(int, u32 __user *, int, int *);
21extern int futex_atomic_cmpxchg_pt(int __user *, int, int); 21extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32);
22 22
23#endif /* __ARCH_S390_LIB_UACCESS_H */ 23#endif /* __ARCH_S390_LIB_UACCESS_H */
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 404f2de296dc..74833831417f 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -302,7 +302,7 @@ fault:
302 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ 302 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
303 "m" (*uaddr) : "cc" ); 303 "m" (*uaddr) : "cc" );
304 304
305static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) 305static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
306{ 306{
307 int oldval = 0, newval, ret; 307 int oldval = 0, newval, ret;
308 308
@@ -335,7 +335,7 @@ static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
335 return ret; 335 return ret;
336} 336}
337 337
338int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) 338int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
339{ 339{
340 int ret; 340 int ret;
341 341
@@ -354,26 +354,29 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
354 return ret; 354 return ret;
355} 355}
356 356
357static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) 357static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
358 u32 oldval, u32 newval)
358{ 359{
359 int ret; 360 int ret;
360 361
361 asm volatile("0: cs %1,%4,0(%5)\n" 362 asm volatile("0: cs %1,%4,0(%5)\n"
362 "1: lr %0,%1\n" 363 "1: la %0,0\n"
363 "2:\n" 364 "2:\n"
364 EX_TABLE(0b,2b) EX_TABLE(1b,2b) 365 EX_TABLE(0b,2b) EX_TABLE(1b,2b)
365 : "=d" (ret), "+d" (oldval), "=m" (*uaddr) 366 : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
366 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) 367 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
367 : "cc", "memory" ); 368 : "cc", "memory" );
369 *uval = oldval;
368 return ret; 370 return ret;
369} 371}
370 372
371int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) 373int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
374 u32 oldval, u32 newval)
372{ 375{
373 int ret; 376 int ret;
374 377
375 if (segment_eq(get_fs(), KERNEL_DS)) 378 if (segment_eq(get_fs(), KERNEL_DS))
376 return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); 379 return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
377 spin_lock(&current->mm->page_table_lock); 380 spin_lock(&current->mm->page_table_lock);
378 uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); 381 uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
379 if (!uaddr) { 382 if (!uaddr) {
@@ -382,7 +385,7 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
382 } 385 }
383 get_page(virt_to_page(uaddr)); 386 get_page(virt_to_page(uaddr));
384 spin_unlock(&current->mm->page_table_lock); 387 spin_unlock(&current->mm->page_table_lock);
385 ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); 388 ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
386 put_page(virt_to_page(uaddr)); 389 put_page(virt_to_page(uaddr));
387 return ret; 390 return ret;
388} 391}
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
index a6c4f7ed24a4..bb1a7eed42ce 100644
--- a/arch/s390/lib/uaccess_std.c
+++ b/arch/s390/lib/uaccess_std.c
@@ -255,7 +255,7 @@ size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst)
255 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ 255 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
256 "m" (*uaddr) : "cc"); 256 "m" (*uaddr) : "cc");
257 257
258int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old) 258int futex_atomic_op_std(int op, u32 __user *uaddr, int oparg, int *old)
259{ 259{
260 int oldval = 0, newval, ret; 260 int oldval = 0, newval, ret;
261 261
@@ -287,19 +287,21 @@ int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old)
287 return ret; 287 return ret;
288} 288}
289 289
290int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval) 290int futex_atomic_cmpxchg_std(u32 *uval, u32 __user *uaddr,
291 u32 oldval, u32 newval)
291{ 292{
292 int ret; 293 int ret;
293 294
294 asm volatile( 295 asm volatile(
295 " sacf 256\n" 296 " sacf 256\n"
296 "0: cs %1,%4,0(%5)\n" 297 "0: cs %1,%4,0(%5)\n"
297 "1: lr %0,%1\n" 298 "1: la %0,0\n"
298 "2: sacf 0\n" 299 "2: sacf 0\n"
299 EX_TABLE(0b,2b) EX_TABLE(1b,2b) 300 EX_TABLE(0b,2b) EX_TABLE(1b,2b)
300 : "=d" (ret), "+d" (oldval), "=m" (*uaddr) 301 : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
301 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) 302 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
302 : "cc", "memory" ); 303 : "cc", "memory" );
304 *uval = oldval;
303 return ret; 305 return ret;
304} 306}
305 307
diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h
index a9f16a7f9aea..6cb9f193a95e 100644
--- a/arch/sh/include/asm/futex-irq.h
+++ b/arch/sh/include/asm/futex-irq.h
@@ -3,7 +3,7 @@
3 3
4#include <asm/system.h> 4#include <asm/system.h>
5 5
6static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, 6static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr,
7 int *oldval) 7 int *oldval)
8{ 8{
9 unsigned long flags; 9 unsigned long flags;
@@ -20,7 +20,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr,
20 return ret; 20 return ret;
21} 21}
22 22
23static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, 23static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr,
24 int *oldval) 24 int *oldval)
25{ 25{
26 unsigned long flags; 26 unsigned long flags;
@@ -37,7 +37,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr,
37 return ret; 37 return ret;
38} 38}
39 39
40static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, 40static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr,
41 int *oldval) 41 int *oldval)
42{ 42{
43 unsigned long flags; 43 unsigned long flags;
@@ -54,7 +54,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr,
54 return ret; 54 return ret;
55} 55}
56 56
57static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, 57static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr,
58 int *oldval) 58 int *oldval)
59{ 59{
60 unsigned long flags; 60 unsigned long flags;
@@ -71,7 +71,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr,
71 return ret; 71 return ret;
72} 72}
73 73
74static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, 74static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr,
75 int *oldval) 75 int *oldval)
76{ 76{
77 unsigned long flags; 77 unsigned long flags;
@@ -88,11 +88,13 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr,
88 return ret; 88 return ret;
89} 89}
90 90
91static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr, 91static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
92 int oldval, int newval) 92 u32 __user *uaddr,
93 u32 oldval, u32 newval)
93{ 94{
94 unsigned long flags; 95 unsigned long flags;
95 int ret, prev = 0; 96 int ret;
97 u32 prev = 0;
96 98
97 local_irq_save(flags); 99 local_irq_save(flags);
98 100
@@ -102,10 +104,8 @@ static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr,
102 104
103 local_irq_restore(flags); 105 local_irq_restore(flags);
104 106
105 if (ret) 107 *uval = prev;
106 return ret; 108 return ret;
107
108 return prev;
109} 109}
110 110
111#endif /* __ASM_SH_FUTEX_IRQ_H */ 111#endif /* __ASM_SH_FUTEX_IRQ_H */
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 68256ec5fa35..7be39a646fbd 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -10,7 +10,7 @@
10/* XXX: UP variants, fix for SH-4A and SMP.. */ 10/* XXX: UP variants, fix for SH-4A and SMP.. */
11#include <asm/futex-irq.h> 11#include <asm/futex-irq.h>
12 12
13static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 13static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
14{ 14{
15 int op = (encoded_op >> 28) & 7; 15 int op = (encoded_op >> 28) & 7;
16 int cmp = (encoded_op >> 24) & 15; 16 int cmp = (encoded_op >> 24) & 15;
@@ -21,7 +21,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
21 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 21 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
22 oparg = 1 << oparg; 22 oparg = 1 << oparg;
23 23
24 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 24 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
25 return -EFAULT; 25 return -EFAULT;
26 26
27 pagefault_disable(); 27 pagefault_disable();
@@ -65,12 +65,13 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
65} 65}
66 66
67static inline int 67static inline int
68futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 68futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
69 u32 oldval, u32 newval)
69{ 70{
70 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 71 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
71 return -EFAULT; 72 return -EFAULT;
72 73
73 return atomic_futex_op_cmpxchg_inatomic(uaddr, oldval, newval); 74 return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
74} 75}
75 76
76#endif /* __KERNEL__ */ 77#endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
index 06e2251a5e48..edab57265293 100644
--- a/arch/sh/include/asm/rwsem.h
+++ b/arch/sh/include/asm/rwsem.h
@@ -11,64 +11,13 @@
11#endif 11#endif
12 12
13#ifdef __KERNEL__ 13#ifdef __KERNEL__
14#include <linux/list.h>
15#include <linux/spinlock.h>
16#include <asm/atomic.h>
17#include <asm/system.h>
18 14
19/*
20 * the semaphore definition
21 */
22struct rw_semaphore {
23 long count;
24#define RWSEM_UNLOCKED_VALUE 0x00000000 15#define RWSEM_UNLOCKED_VALUE 0x00000000
25#define RWSEM_ACTIVE_BIAS 0x00000001 16#define RWSEM_ACTIVE_BIAS 0x00000001
26#define RWSEM_ACTIVE_MASK 0x0000ffff 17#define RWSEM_ACTIVE_MASK 0x0000ffff
27#define RWSEM_WAITING_BIAS (-0x00010000) 18#define RWSEM_WAITING_BIAS (-0x00010000)
28#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 19#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
29#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 20#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
30 spinlock_t wait_lock;
31 struct list_head wait_list;
32#ifdef CONFIG_DEBUG_LOCK_ALLOC
33 struct lockdep_map dep_map;
34#endif
35};
36
37#ifdef CONFIG_DEBUG_LOCK_ALLOC
38# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
39#else
40# define __RWSEM_DEP_MAP_INIT(lockname)
41#endif
42
43#define __RWSEM_INITIALIZER(name) \
44 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
45 LIST_HEAD_INIT((name).wait_list) \
46 __RWSEM_DEP_MAP_INIT(name) }
47
48#define DECLARE_RWSEM(name) \
49 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
50
51extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
52extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
53extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
54extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
55
56extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
57 struct lock_class_key *key);
58
59#define init_rwsem(sem) \
60do { \
61 static struct lock_class_key __key; \
62 \
63 __init_rwsem((sem), #sem, &__key); \
64} while (0)
65
66static inline void init_rwsem(struct rw_semaphore *sem)
67{
68 sem->count = RWSEM_UNLOCKED_VALUE;
69 spin_lock_init(&sem->wait_lock);
70 INIT_LIST_HEAD(&sem->wait_list);
71}
72 21
73/* 22/*
74 * lock for reading 23 * lock for reading
@@ -179,10 +128,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
179 return atomic_add_return(delta, (atomic_t *)(&sem->count)); 128 return atomic_add_return(delta, (atomic_t *)(&sem->count));
180} 129}
181 130
182static inline int rwsem_is_locked(struct rw_semaphore *sem)
183{
184 return (sem->count != 0);
185}
186
187#endif /* __KERNEL__ */ 131#endif /* __KERNEL__ */
188#endif /* _ASM_SH_RWSEM_H */ 132#endif /* _ASM_SH_RWSEM_H */
diff --git a/arch/sh/include/asm/sections.h b/arch/sh/include/asm/sections.h
index a78701da775b..4a5350037c8f 100644
--- a/arch/sh/include/asm/sections.h
+++ b/arch/sh/include/asm/sections.h
@@ -3,7 +3,7 @@
3 3
4#include <asm-generic/sections.h> 4#include <asm-generic/sections.h>
5 5
6extern void __nosave_begin, __nosave_end; 6extern long __nosave_begin, __nosave_end;
7extern long __machvec_start, __machvec_end; 7extern long __machvec_start, __machvec_end;
8extern char __uncached_start, __uncached_end; 8extern char __uncached_start, __uncached_end;
9extern char _ebss[]; 9extern char _ebss[];
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index 672944f5b19c..e53b4b38bd11 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -14,7 +14,7 @@
14#include <linux/io.h> 14#include <linux/io.h>
15#include <linux/sh_timer.h> 15#include <linux/sh_timer.h>
16#include <linux/serial_sci.h> 16#include <linux/serial_sci.h>
17#include <asm/machtypes.h> 17#include <generated/machtypes.h>
18 18
19static struct resource rtc_resources[] = { 19static struct resource rtc_resources[] = {
20 [0] = { 20 [0] = {
@@ -255,12 +255,17 @@ static struct platform_device *sh7750_early_devices[] __initdata = {
255 255
256void __init plat_early_device_setup(void) 256void __init plat_early_device_setup(void)
257{ 257{
258 struct platform_device *dev[1];
259
258 if (mach_is_rts7751r2d()) { 260 if (mach_is_rts7751r2d()) {
259 scif_platform_data.scscr |= SCSCR_CKE1; 261 scif_platform_data.scscr |= SCSCR_CKE1;
260 early_platform_add_devices(&scif_device, 1); 262 dev[0] = &scif_device;
263 early_platform_add_devices(dev, 1);
261 } else { 264 } else {
262 early_platform_add_devices(&sci_device, 1); 265 dev[0] = &sci_device;
263 early_platform_add_devices(&scif_device, 1); 266 early_platform_add_devices(dev, 1);
267 dev[0] = &scif_device;
268 early_platform_add_devices(dev, 1);
264 } 269 }
265 270
266 early_platform_add_devices(sh7750_early_devices, 271 early_platform_add_devices(sh7750_early_devices,
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 7f8a709c3ada..af4d46187a79 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -66,7 +66,7 @@ SECTIONS
66 __machvec_end = .; 66 __machvec_end = .;
67 } 67 }
68 68
69 PERCPU(PAGE_SIZE) 69 PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
70 70
71 /* 71 /*
72 * .exit.text is discarded at runtime, not link time, to deal with 72 * .exit.text is discarded at runtime, not link time, to deal with
diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c
index faa8f86c0db4..0901b2f14e15 100644
--- a/arch/sh/lib/delay.c
+++ b/arch/sh/lib/delay.c
@@ -10,6 +10,16 @@
10void __delay(unsigned long loops) 10void __delay(unsigned long loops)
11{ 11{
12 __asm__ __volatile__( 12 __asm__ __volatile__(
13 /*
14 * ST40-300 appears to have an issue with this code,
15 * normally taking two cycles each loop, as with all
16 * other SH variants. If however the branch and the
17 * delay slot straddle an 8 byte boundary, this increases
18 * to 3 cycles.
19 * This align directive ensures this doesn't occur.
20 */
21 ".balign 8\n\t"
22
13 "tst %0, %0\n\t" 23 "tst %0, %0\n\t"
14 "1:\t" 24 "1:\t"
15 "bf/s 1b\n\t" 25 "bf/s 1b\n\t"
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index 88d3dc3d30d5..5a580ea04429 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -108,7 +108,8 @@ void copy_user_highpage(struct page *to, struct page *from,
108 kunmap_atomic(vfrom, KM_USER0); 108 kunmap_atomic(vfrom, KM_USER0);
109 } 109 }
110 110
111 if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK)) 111 if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK) ||
112 (vma->vm_flags & VM_EXEC))
112 __flush_purge_region(vto, PAGE_SIZE); 113 __flush_purge_region(vto, PAGE_SIZE);
113 114
114 kunmap_atomic(vto, KM_USER1); 115 kunmap_atomic(vto, KM_USER1);
diff --git a/arch/sparc/include/asm/fcntl.h b/arch/sparc/include/asm/fcntl.h
index 38f37b333cc7..d0b83f66f356 100644
--- a/arch/sparc/include/asm/fcntl.h
+++ b/arch/sparc/include/asm/fcntl.h
@@ -34,6 +34,8 @@
34#define __O_SYNC 0x800000 34#define __O_SYNC 0x800000
35#define O_SYNC (__O_SYNC|O_DSYNC) 35#define O_SYNC (__O_SYNC|O_DSYNC)
36 36
37#define O_PATH 0x1000000
38
37#define F_GETOWN 5 /* for sockets. */ 39#define F_GETOWN 5 /* for sockets. */
38#define F_SETOWN 6 /* for sockets. */ 40#define F_SETOWN 6 /* for sockets. */
39#define F_GETLK 7 41#define F_GETLK 7
diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h
index 47f95839dc69..444e7bea23bc 100644
--- a/arch/sparc/include/asm/futex_64.h
+++ b/arch/sparc/include/asm/futex_64.h
@@ -30,7 +30,7 @@
30 : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \ 30 : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \
31 : "memory") 31 : "memory")
32 32
33static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 33static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
34{ 34{
35 int op = (encoded_op >> 28) & 7; 35 int op = (encoded_op >> 28) & 7;
36 int cmp = (encoded_op >> 24) & 15; 36 int cmp = (encoded_op >> 24) & 15;
@@ -38,7 +38,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
38 int cmparg = (encoded_op << 20) >> 20; 38 int cmparg = (encoded_op << 20) >> 20;
39 int oldval = 0, ret, tem; 39 int oldval = 0, ret, tem;
40 40
41 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))) 41 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
42 return -EFAULT; 42 return -EFAULT;
43 if (unlikely((((unsigned long) uaddr) & 0x3UL))) 43 if (unlikely((((unsigned long) uaddr) & 0x3UL)))
44 return -EINVAL; 44 return -EINVAL;
@@ -85,26 +85,30 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
85} 85}
86 86
87static inline int 87static inline int
88futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 88futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
89 u32 oldval, u32 newval)
89{ 90{
91 int ret = 0;
92
90 __asm__ __volatile__( 93 __asm__ __volatile__(
91 "\n1: casa [%3] %%asi, %2, %0\n" 94 "\n1: casa [%4] %%asi, %3, %1\n"
92 "2:\n" 95 "2:\n"
93 " .section .fixup,#alloc,#execinstr\n" 96 " .section .fixup,#alloc,#execinstr\n"
94 " .align 4\n" 97 " .align 4\n"
95 "3: sethi %%hi(2b), %0\n" 98 "3: sethi %%hi(2b), %0\n"
96 " jmpl %0 + %%lo(2b), %%g0\n" 99 " jmpl %0 + %%lo(2b), %%g0\n"
97 " mov %4, %0\n" 100 " mov %5, %0\n"
98 " .previous\n" 101 " .previous\n"
99 " .section __ex_table,\"a\"\n" 102 " .section __ex_table,\"a\"\n"
100 " .align 4\n" 103 " .align 4\n"
101 " .word 1b, 3b\n" 104 " .word 1b, 3b\n"
102 " .previous\n" 105 " .previous\n"
103 : "=r" (newval) 106 : "+r" (ret), "=r" (newval)
104 : "0" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT) 107 : "1" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
105 : "memory"); 108 : "memory");
106 109
107 return newval; 110 *uval = newval;
111 return ret;
108} 112}
109 113
110#endif /* !(_SPARC64_FUTEX_H) */ 114#endif /* !(_SPARC64_FUTEX_H) */
diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h
index a2f5c61f924e..843e4faf6a50 100644
--- a/arch/sparc/include/asm/pcr.h
+++ b/arch/sparc/include/asm/pcr.h
@@ -43,4 +43,6 @@ static inline u64 picl_value(unsigned int nmi_hz)
43 43
44extern u64 pcr_enable; 44extern u64 pcr_enable;
45 45
46extern int pcr_arch_init(void);
47
46#endif /* __PCR_H */ 48#endif /* __PCR_H */
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
index a2b4302869bc..069bf4d663a1 100644
--- a/arch/sparc/include/asm/rwsem.h
+++ b/arch/sparc/include/asm/rwsem.h
@@ -13,53 +13,12 @@
13 13
14#ifdef __KERNEL__ 14#ifdef __KERNEL__
15 15
16#include <linux/list.h>
17#include <linux/spinlock.h>
18
19struct rwsem_waiter;
20
21struct rw_semaphore {
22 signed long count;
23#define RWSEM_UNLOCKED_VALUE 0x00000000L 16#define RWSEM_UNLOCKED_VALUE 0x00000000L
24#define RWSEM_ACTIVE_BIAS 0x00000001L 17#define RWSEM_ACTIVE_BIAS 0x00000001L
25#define RWSEM_ACTIVE_MASK 0xffffffffL 18#define RWSEM_ACTIVE_MASK 0xffffffffL
26#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) 19#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
27#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 20#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
28#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 21#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
29 spinlock_t wait_lock;
30 struct list_head wait_list;
31#ifdef CONFIG_DEBUG_LOCK_ALLOC
32 struct lockdep_map dep_map;
33#endif
34};
35
36#ifdef CONFIG_DEBUG_LOCK_ALLOC
37# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
38#else
39# define __RWSEM_DEP_MAP_INIT(lockname)
40#endif
41
42#define __RWSEM_INITIALIZER(name) \
43{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
44 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
45
46#define DECLARE_RWSEM(name) \
47 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
48
49extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
50extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
51extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
52extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
53
54extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
55 struct lock_class_key *key);
56
57#define init_rwsem(sem) \
58do { \
59 static struct lock_class_key __key; \
60 \
61 __init_rwsem((sem), #sem, &__key); \
62} while (0)
63 22
64/* 23/*
65 * lock for reading 24 * lock for reading
@@ -160,11 +119,6 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
160 return atomic64_add_return(delta, (atomic64_t *)(&sem->count)); 119 return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
161} 120}
162 121
163static inline int rwsem_is_locked(struct rw_semaphore *sem)
164{
165 return (sem->count != 0);
166}
167
168#endif /* __KERNEL__ */ 122#endif /* __KERNEL__ */
169 123
170#endif /* _SPARC64_RWSEM_H */ 124#endif /* _SPARC64_RWSEM_H */
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 47977a77f6c6..72509d0e34be 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -255,10 +255,9 @@ static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
255static int iommu_alloc_ctx(struct iommu *iommu) 255static int iommu_alloc_ctx(struct iommu *iommu)
256{ 256{
257 int lowest = iommu->ctx_lowest_free; 257 int lowest = iommu->ctx_lowest_free;
258 int sz = IOMMU_NUM_CTXS - lowest; 258 int n = find_next_zero_bit(iommu->ctx_bitmap, IOMMU_NUM_CTXS, lowest);
259 int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest);
260 259
261 if (unlikely(n == sz)) { 260 if (unlikely(n == IOMMU_NUM_CTXS)) {
262 n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1); 261 n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1);
263 if (unlikely(n == lowest)) { 262 if (unlikely(n == lowest)) {
264 printk(KERN_WARNING "IOMMU: Ran out of contexts.\n"); 263 printk(KERN_WARNING "IOMMU: Ran out of contexts.\n");
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index aeaa09a3c655..2cdc131b50ac 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -700,10 +700,8 @@ static void pcic_clear_clock_irq(void)
700 700
701static irqreturn_t pcic_timer_handler (int irq, void *h) 701static irqreturn_t pcic_timer_handler (int irq, void *h)
702{ 702{
703 write_seqlock(&xtime_lock); /* Dummy, to show that we remember */
704 pcic_clear_clock_irq(); 703 pcic_clear_clock_irq();
705 do_timer(1); 704 xtime_update(1);
706 write_sequnlock(&xtime_lock);
707#ifndef CONFIG_SMP 705#ifndef CONFIG_SMP
708 update_process_times(user_mode(get_irq_regs())); 706 update_process_times(user_mode(get_irq_regs()));
709#endif 707#endif
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index ae96cf52a955..7c2ced612b8f 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -167,5 +167,3 @@ out_unregister:
167 unregister_perf_hsvc(); 167 unregister_perf_hsvc();
168 return err; 168 return err;
169} 169}
170
171early_initcall(pcr_arch_init);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b6a2b8f47040..555a76d1f4a1 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -49,6 +49,7 @@
49#include <asm/mdesc.h> 49#include <asm/mdesc.h>
50#include <asm/ldc.h> 50#include <asm/ldc.h>
51#include <asm/hypervisor.h> 51#include <asm/hypervisor.h>
52#include <asm/pcr.h>
52 53
53#include "cpumap.h" 54#include "cpumap.h"
54 55
@@ -1358,6 +1359,7 @@ void __cpu_die(unsigned int cpu)
1358 1359
1359void __init smp_cpus_done(unsigned int max_cpus) 1360void __init smp_cpus_done(unsigned int max_cpus)
1360{ 1361{
1362 pcr_arch_init();
1361} 1363}
1362 1364
1363void smp_send_reschedule(int cpu) 1365void smp_send_reschedule(int cpu)
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 9c743b1886ff..4211bfc9bcad 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -85,7 +85,7 @@ int update_persistent_clock(struct timespec now)
85 85
86/* 86/*
87 * timer_interrupt() needs to keep up the real-time clock, 87 * timer_interrupt() needs to keep up the real-time clock,
88 * as well as call the "do_timer()" routine every clocktick 88 * as well as call the "xtime_update()" routine every clocktick
89 */ 89 */
90 90
91#define TICK_SIZE (tick_nsec / 1000) 91#define TICK_SIZE (tick_nsec / 1000)
@@ -96,14 +96,9 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id)
96 profile_tick(CPU_PROFILING); 96 profile_tick(CPU_PROFILING);
97#endif 97#endif
98 98
99 /* Protect counter clear so that do_gettimeoffset works */
100 write_seqlock(&xtime_lock);
101
102 clear_clock_irq(); 99 clear_clock_irq();
103 100
104 do_timer(1); 101 xtime_update(1);
105
106 write_sequnlock(&xtime_lock);
107 102
108#ifndef CONFIG_SMP 103#ifndef CONFIG_SMP
109 update_process_times(user_mode(get_irq_regs())); 104 update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/sparc/kernel/una_asm_32.S b/arch/sparc/kernel/una_asm_32.S
index 8cc03458eb7e..8f096e84a937 100644
--- a/arch/sparc/kernel/una_asm_32.S
+++ b/arch/sparc/kernel/una_asm_32.S
@@ -24,9 +24,9 @@ retl_efault:
24 .globl __do_int_store 24 .globl __do_int_store
25__do_int_store: 25__do_int_store:
26 ld [%o2], %g1 26 ld [%o2], %g1
27 cmp %1, 2 27 cmp %o1, 2
28 be 2f 28 be 2f
29 cmp %1, 4 29 cmp %o1, 4
30 be 1f 30 be 1f
31 srl %g1, 24, %g2 31 srl %g1, 24, %g2
32 srl %g1, 16, %g7 32 srl %g1, 16, %g7
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 0c1e6783657f..92b557afe535 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -108,7 +108,7 @@ SECTIONS
108 __sun4v_2insn_patch_end = .; 108 __sun4v_2insn_patch_end = .;
109 } 109 }
110 110
111 PERCPU(PAGE_SIZE) 111 PERCPU(SMP_CACHE_BYTES, PAGE_SIZE)
112 112
113 . = ALIGN(PAGE_SIZE); 113 . = ALIGN(PAGE_SIZE);
114 __init_end = .; 114 __init_end = .;
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index cbddeb38ffda..d3c7a12ad879 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -16,7 +16,7 @@
16#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)]) 16#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)])
17 17
18spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = { 18spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = {
19 [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED 19 [0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash)
20}; 20};
21 21
22#else /* SMP */ 22#else /* SMP */
diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c
index 764b3eb7b604..48d00e72ce15 100644
--- a/arch/sparc/lib/bitext.c
+++ b/arch/sparc/lib/bitext.c
@@ -10,7 +10,7 @@
10 */ 10 */
11 11
12#include <linux/string.h> 12#include <linux/string.h>
13#include <linux/bitops.h> 13#include <linux/bitmap.h>
14 14
15#include <asm/bitext.h> 15#include <asm/bitext.h>
16 16
@@ -80,8 +80,7 @@ int bit_map_string_get(struct bit_map *t, int len, int align)
80 while (test_bit(offset + i, t->map) == 0) { 80 while (test_bit(offset + i, t->map) == 0) {
81 i++; 81 i++;
82 if (i == len) { 82 if (i == len) {
83 for (i = 0; i < len; i++) 83 bitmap_set(t->map, offset, len);
84 __set_bit(offset + i, t->map);
85 if (offset == t->first_free) 84 if (offset == t->first_free)
86 t->first_free = find_next_zero_bit 85 t->first_free = find_next_zero_bit
87 (t->map, t->size, 86 (t->map, t->size,
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index fe0d10dcae57..d03ec124a598 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -29,16 +29,16 @@
29#include <linux/uaccess.h> 29#include <linux/uaccess.h>
30#include <linux/errno.h> 30#include <linux/errno.h>
31 31
32extern struct __get_user futex_set(int __user *v, int i); 32extern struct __get_user futex_set(u32 __user *v, int i);
33extern struct __get_user futex_add(int __user *v, int n); 33extern struct __get_user futex_add(u32 __user *v, int n);
34extern struct __get_user futex_or(int __user *v, int n); 34extern struct __get_user futex_or(u32 __user *v, int n);
35extern struct __get_user futex_andn(int __user *v, int n); 35extern struct __get_user futex_andn(u32 __user *v, int n);
36extern struct __get_user futex_cmpxchg(int __user *v, int o, int n); 36extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
37 37
38#ifndef __tilegx__ 38#ifndef __tilegx__
39extern struct __get_user futex_xor(int __user *v, int n); 39extern struct __get_user futex_xor(u32 __user *v, int n);
40#else 40#else
41static inline struct __get_user futex_xor(int __user *uaddr, int n) 41static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
42{ 42{
43 struct __get_user asm_ret = __get_user_4(uaddr); 43 struct __get_user asm_ret = __get_user_4(uaddr);
44 if (!asm_ret.err) { 44 if (!asm_ret.err) {
@@ -53,7 +53,7 @@ static inline struct __get_user futex_xor(int __user *uaddr, int n)
53} 53}
54#endif 54#endif
55 55
56static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 56static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
57{ 57{
58 int op = (encoded_op >> 28) & 7; 58 int op = (encoded_op >> 28) & 7;
59 int cmp = (encoded_op >> 24) & 15; 59 int cmp = (encoded_op >> 24) & 15;
@@ -65,7 +65,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
65 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 65 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
66 oparg = 1 << oparg; 66 oparg = 1 << oparg;
67 67
68 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 68 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
69 return -EFAULT; 69 return -EFAULT;
70 70
71 pagefault_disable(); 71 pagefault_disable();
@@ -119,16 +119,17 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
119 return ret; 119 return ret;
120} 120}
121 121
122static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, 122static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
123 int newval) 123 u32 oldval, u32 newval)
124{ 124{
125 struct __get_user asm_ret; 125 struct __get_user asm_ret;
126 126
127 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 127 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
128 return -EFAULT; 128 return -EFAULT;
129 129
130 asm_ret = futex_cmpxchg(uaddr, oldval, newval); 130 asm_ret = futex_cmpxchg(uaddr, oldval, newval);
131 return asm_ret.err ? asm_ret.err : asm_ret.val; 131 *uval = asm_ret.val;
132 return asm_ret.err;
132} 133}
133 134
134#ifndef __tilegx__ 135#ifndef __tilegx__
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index 25fdc0c1839a..c6ce378e0678 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -63,7 +63,7 @@ SECTIONS
63 *(.init.page) 63 *(.init.page)
64 } :data =0 64 } :data =0
65 INIT_DATA_SECTION(16) 65 INIT_DATA_SECTION(16)
66 PERCPU(PAGE_SIZE) 66 PERCPU(L2_CACHE_BYTES, PAGE_SIZE)
67 . = ALIGN(PAGE_SIZE); 67 . = ALIGN(PAGE_SIZE);
68 VMLINUX_SYMBOL(_einitdata) = .; 68 VMLINUX_SYMBOL(_einitdata) = .;
69 69
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index e351e14b4339..1e78940218c0 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -7,6 +7,7 @@ config UML
7 bool 7 bool
8 default y 8 default y
9 select HAVE_GENERIC_HARDIRQS 9 select HAVE_GENERIC_HARDIRQS
10 select GENERIC_HARDIRQS_NO_DEPRECATED
10 11
11config MMU 12config MMU
12 bool 13 bool
diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86
index 5ee328099c63..02fb017fed47 100644
--- a/arch/um/Kconfig.x86
+++ b/arch/um/Kconfig.x86
@@ -10,6 +10,8 @@ endmenu
10 10
11config UML_X86 11config UML_X86
12 def_bool y 12 def_bool y
13 select GENERIC_FIND_FIRST_BIT
14 select GENERIC_FIND_NEXT_BIT
13 15
14config 64BIT 16config 64BIT
15 bool 17 bool
@@ -19,6 +21,9 @@ config X86_32
19 def_bool !64BIT 21 def_bool !64BIT
20 select HAVE_AOUT 22 select HAVE_AOUT
21 23
24config X86_64
25 def_bool 64BIT
26
22config RWSEM_XCHGADD_ALGORITHM 27config RWSEM_XCHGADD_ALGORITHM
23 def_bool X86_XADD 28 def_bool X86_XADD
24 29
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 975613b23dcf..c70e047eed72 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -124,35 +124,18 @@ void mconsole_log(struct mc_request *req)
124#if 0 124#if 0
125void mconsole_proc(struct mc_request *req) 125void mconsole_proc(struct mc_request *req)
126{ 126{
127 struct nameidata nd;
128 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; 127 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
129 struct file *file; 128 struct file *file;
130 int n, err; 129 int n;
131 char *ptr = req->request.data, *buf; 130 char *ptr = req->request.data, *buf;
132 mm_segment_t old_fs = get_fs(); 131 mm_segment_t old_fs = get_fs();
133 132
134 ptr += strlen("proc"); 133 ptr += strlen("proc");
135 ptr = skip_spaces(ptr); 134 ptr = skip_spaces(ptr);
136 135
137 err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd); 136 file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
138 if (err) {
139 mconsole_reply(req, "Failed to look up file", 1, 0);
140 goto out;
141 }
142
143 err = may_open(&nd.path, MAY_READ, O_RDONLY);
144 if (result) {
145 mconsole_reply(req, "Failed to open file", 1, 0);
146 path_put(&nd.path);
147 goto out;
148 }
149
150 file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
151 current_cred());
152 err = PTR_ERR(file);
153 if (IS_ERR(file)) { 137 if (IS_ERR(file)) {
154 mconsole_reply(req, "Failed to open file", 1, 0); 138 mconsole_reply(req, "Failed to open file", 1, 0);
155 path_put(&nd.path);
156 goto out; 139 goto out;
157 } 140 }
158 141
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index ba4a98ba39c0..620f5b70957d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -185,7 +185,7 @@ struct ubd {
185 .no_cow = 0, \ 185 .no_cow = 0, \
186 .shared = 0, \ 186 .shared = 0, \
187 .cow = DEFAULT_COW, \ 187 .cow = DEFAULT_COW, \
188 .lock = SPIN_LOCK_UNLOCKED, \ 188 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
189 .request = NULL, \ 189 .request = NULL, \
190 .start_sg = 0, \ 190 .start_sg = 0, \
191 .end_sg = 0, \ 191 .end_sg = 0, \
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index ac55b9efa1ce..34bede8aad4a 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -42,7 +42,7 @@
42 INIT_SETUP(0) 42 INIT_SETUP(0)
43 } 43 }
44 44
45 PERCPU(32) 45 PERCPU(32, 32)
46 46
47 .initcall.init : { 47 .initcall.init : {
48 INIT_CALLS 48 INIT_CALLS
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 3f0ac9e0c966..64cfea80cfe2 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -35,8 +35,10 @@ int show_interrupts(struct seq_file *p, void *v)
35 } 35 }
36 36
37 if (i < NR_IRQS) { 37 if (i < NR_IRQS) {
38 raw_spin_lock_irqsave(&irq_desc[i].lock, flags); 38 struct irq_desc *desc = irq_to_desc(i);
39 action = irq_desc[i].action; 39
40 raw_spin_lock_irqsave(&desc->lock, flags);
41 action = desc->action;
40 if (!action) 42 if (!action)
41 goto skip; 43 goto skip;
42 seq_printf(p, "%3d: ",i); 44 seq_printf(p, "%3d: ",i);
@@ -46,7 +48,7 @@ int show_interrupts(struct seq_file *p, void *v)
46 for_each_online_cpu(j) 48 for_each_online_cpu(j)
47 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 49 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
48#endif 50#endif
49 seq_printf(p, " %14s", irq_desc[i].chip->name); 51 seq_printf(p, " %14s", get_irq_desc_chip(desc)->name);
50 seq_printf(p, " %s", action->name); 52 seq_printf(p, " %s", action->name);
51 53
52 for (action=action->next; action; action = action->next) 54 for (action=action->next; action; action = action->next)
@@ -54,7 +56,7 @@ int show_interrupts(struct seq_file *p, void *v)
54 56
55 seq_putc(p, '\n'); 57 seq_putc(p, '\n');
56skip: 58skip:
57 raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); 59 raw_spin_unlock_irqrestore(&desc->lock, flags);
58 } else if (i == NR_IRQS) 60 } else if (i == NR_IRQS)
59 seq_putc(p, '\n'); 61 seq_putc(p, '\n');
60 62
@@ -360,10 +362,10 @@ EXPORT_SYMBOL(um_request_irq);
360EXPORT_SYMBOL(reactivate_fd); 362EXPORT_SYMBOL(reactivate_fd);
361 363
362/* 364/*
363 * irq_chip must define (startup || enable) && 365 * irq_chip must define at least enable/disable and ack when
364 * (shutdown || disable) && end 366 * the edge handler is used.
365 */ 367 */
366static void dummy(unsigned int irq) 368static void dummy(struct irq_data *d)
367{ 369{
368} 370}
369 371
@@ -371,20 +373,17 @@ static void dummy(unsigned int irq)
371static struct irq_chip normal_irq_type = { 373static struct irq_chip normal_irq_type = {
372 .name = "SIGIO", 374 .name = "SIGIO",
373 .release = free_irq_by_irq_and_dev, 375 .release = free_irq_by_irq_and_dev,
374 .disable = dummy, 376 .irq_disable = dummy,
375 .enable = dummy, 377 .irq_enable = dummy,
376 .ack = dummy, 378 .irq_ack = dummy,
377 .end = dummy
378}; 379};
379 380
380static struct irq_chip SIGVTALRM_irq_type = { 381static struct irq_chip SIGVTALRM_irq_type = {
381 .name = "SIGVTALRM", 382 .name = "SIGVTALRM",
382 .release = free_irq_by_irq_and_dev, 383 .release = free_irq_by_irq_and_dev,
383 .shutdown = dummy, /* never called */ 384 .irq_disable = dummy,
384 .disable = dummy, 385 .irq_enable = dummy,
385 .enable = dummy, 386 .irq_ack = dummy,
386 .ack = dummy,
387 .end = dummy
388}; 387};
389 388
390void __init init_IRQ(void) 389void __init init_IRQ(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1359bc9f4fd3..e1f65c46bc93 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -64,8 +64,12 @@ config X86
64 select HAVE_TEXT_POKE_SMP 64 select HAVE_TEXT_POKE_SMP
65 select HAVE_GENERIC_HARDIRQS 65 select HAVE_GENERIC_HARDIRQS
66 select HAVE_SPARSE_IRQ 66 select HAVE_SPARSE_IRQ
67 select GENERIC_FIND_FIRST_BIT
68 select GENERIC_FIND_NEXT_BIT
67 select GENERIC_IRQ_PROBE 69 select GENERIC_IRQ_PROBE
68 select GENERIC_PENDING_IRQ if SMP 70 select GENERIC_PENDING_IRQ if SMP
71 select GENERIC_IRQ_SHOW
72 select IRQ_FORCED_THREADING
69 select USE_GENERIC_SMP_HELPERS if SMP 73 select USE_GENERIC_SMP_HELPERS if SMP
70 74
71config INSTRUCTION_DECODER 75config INSTRUCTION_DECODER
@@ -378,6 +382,8 @@ config X86_INTEL_CE
378 depends on X86_32 382 depends on X86_32
379 depends on X86_EXTENDED_PLATFORM 383 depends on X86_EXTENDED_PLATFORM
380 select X86_REBOOTFIXUPS 384 select X86_REBOOTFIXUPS
385 select OF
386 select OF_EARLY_FLATTREE
381 ---help--- 387 ---help---
382 Select for the Intel CE media processor (CE4100) SOC. 388 Select for the Intel CE media processor (CE4100) SOC.
383 This option compiles in support for the CE4100 SOC for settop 389 This option compiles in support for the CE4100 SOC for settop
@@ -807,7 +813,7 @@ config X86_LOCAL_APIC
807 813
808config X86_IO_APIC 814config X86_IO_APIC
809 def_bool y 815 def_bool y
810 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 816 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
811 817
812config X86_VISWS_APIC 818config X86_VISWS_APIC
813 def_bool y 819 def_bool y
@@ -1701,7 +1707,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
1701 depends on NUMA 1707 depends on NUMA
1702 1708
1703config USE_PERCPU_NUMA_NODE_ID 1709config USE_PERCPU_NUMA_NODE_ID
1704 def_bool X86_64 1710 def_bool y
1705 depends on NUMA 1711 depends on NUMA
1706 1712
1707menu "Power management and ACPI options" 1713menu "Power management and ACPI options"
@@ -2062,9 +2068,10 @@ config SCx200HR_TIMER
2062 2068
2063config OLPC 2069config OLPC
2064 bool "One Laptop Per Child support" 2070 bool "One Laptop Per Child support"
2071 depends on !X86_PAE
2065 select GPIOLIB 2072 select GPIOLIB
2066 select OLPC_OPENFIRMWARE 2073 select OF
2067 depends on !X86_64 && !X86_PAE 2074 select OF_PROMTREE if PROC_DEVICETREE
2068 ---help--- 2075 ---help---
2069 Add support for detecting the unique features of the OLPC 2076 Add support for detecting the unique features of the OLPC
2070 XO hardware. 2077 XO hardware.
@@ -2075,21 +2082,6 @@ config OLPC_XO1
2075 ---help--- 2082 ---help---
2076 Add support for non-essential features of the OLPC XO-1 laptop. 2083 Add support for non-essential features of the OLPC XO-1 laptop.
2077 2084
2078config OLPC_OPENFIRMWARE
2079 bool "Support for OLPC's Open Firmware"
2080 depends on !X86_64 && !X86_PAE
2081 default n
2082 select OF
2083 help
2084 This option adds support for the implementation of Open Firmware
2085 that is used on the OLPC XO-1 Children's Machine.
2086 If unsure, say N here.
2087
2088config OLPC_OPENFIRMWARE_DT
2089 bool
2090 default y if OLPC_OPENFIRMWARE && PROC_DEVICETREE
2091 select OF_PROMTREE
2092
2093endif # X86_32 2085endif # X86_32
2094 2086
2095config AMD_NB 2087config AMD_NB
@@ -2134,6 +2126,11 @@ config SYSVIPC_COMPAT
2134 def_bool y 2126 def_bool y
2135 depends on COMPAT && SYSVIPC 2127 depends on COMPAT && SYSVIPC
2136 2128
2129config KEYS_COMPAT
2130 bool
2131 depends on COMPAT && KEYS
2132 default y
2133
2137endmenu 2134endmenu
2138 2135
2139 2136
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 283c5a6a03a6..ed47e6e1747f 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -294,11 +294,6 @@ config X86_GENERIC
294 294
295endif 295endif
296 296
297config X86_CPU
298 def_bool y
299 select GENERIC_FIND_FIRST_BIT
300 select GENERIC_FIND_NEXT_BIT
301
302# 297#
303# Define implied options from the CPU selection here 298# Define implied options from the CPU selection here
304config X86_INTERNODE_CACHE_SHIFT 299config X86_INTERNODE_CACHE_SHIFT
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 646aa78ba5fd..46a823882437 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -62,7 +62,12 @@ int main(int argc, char *argv[])
62 if (fseek(f, -4L, SEEK_END)) { 62 if (fseek(f, -4L, SEEK_END)) {
63 perror(argv[1]); 63 perror(argv[1]);
64 } 64 }
65 fread(&olen, sizeof olen, 1, f); 65
66 if (fread(&olen, sizeof(olen), 1, f) != 1) {
67 perror(argv[1]);
68 return 1;
69 }
70
66 ilen = ftell(f); 71 ilen = ftell(f);
67 olen = getle32(&olen); 72 olen = getle32(&olen);
68 fclose(f); 73 fclose(f);
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index e1e60c7d5813..e0e6340c8dad 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -873,22 +873,18 @@ rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
873 crypto_ablkcipher_clear_flags(ctr_tfm, ~0); 873 crypto_ablkcipher_clear_flags(ctr_tfm, ~0);
874 874
875 ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); 875 ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len);
876 if (ret) { 876 if (ret)
877 crypto_free_ablkcipher(ctr_tfm); 877 goto out_free_ablkcipher;
878 return ret;
879 }
880 878
879 ret = -ENOMEM;
881 req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); 880 req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL);
882 if (!req) { 881 if (!req)
883 crypto_free_ablkcipher(ctr_tfm); 882 goto out_free_ablkcipher;
884 return -EINVAL;
885 }
886 883
887 req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); 884 req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
888 if (!req_data) { 885 if (!req_data)
889 crypto_free_ablkcipher(ctr_tfm); 886 goto out_free_request;
890 return -ENOMEM; 887
891 }
892 memset(req_data->iv, 0, sizeof(req_data->iv)); 888 memset(req_data->iv, 0, sizeof(req_data->iv));
893 889
894 /* Clear the data in the hash sub key container to zero.*/ 890 /* Clear the data in the hash sub key container to zero.*/
@@ -913,8 +909,10 @@ rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
913 if (!ret) 909 if (!ret)
914 ret = req_data->result.err; 910 ret = req_data->result.err;
915 } 911 }
916 ablkcipher_request_free(req);
917 kfree(req_data); 912 kfree(req_data);
913out_free_request:
914 ablkcipher_request_free(req);
915out_free_ablkcipher:
918 crypto_free_ablkcipher(ctr_tfm); 916 crypto_free_ablkcipher(ctr_tfm);
919 return ret; 917 return ret;
920} 918}
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 518bb99c3394..430312ba6e3f 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -25,6 +25,8 @@
25#define sysretl_audit ia32_ret_from_sys_call 25#define sysretl_audit ia32_ret_from_sys_call
26#endif 26#endif
27 27
28 .section .entry.text, "ax"
29
28#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) 30#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
29 31
30 .macro IA32_ARG_FIXUP noebp=0 32 .macro IA32_ARG_FIXUP noebp=0
@@ -126,26 +128,20 @@ ENTRY(ia32_sysenter_target)
126 */ 128 */
127 ENABLE_INTERRUPTS(CLBR_NONE) 129 ENABLE_INTERRUPTS(CLBR_NONE)
128 movl %ebp,%ebp /* zero extension */ 130 movl %ebp,%ebp /* zero extension */
129 pushq $__USER32_DS 131 pushq_cfi $__USER32_DS
130 CFI_ADJUST_CFA_OFFSET 8
131 /*CFI_REL_OFFSET ss,0*/ 132 /*CFI_REL_OFFSET ss,0*/
132 pushq %rbp 133 pushq_cfi %rbp
133 CFI_ADJUST_CFA_OFFSET 8
134 CFI_REL_OFFSET rsp,0 134 CFI_REL_OFFSET rsp,0
135 pushfq 135 pushfq_cfi
136 CFI_ADJUST_CFA_OFFSET 8
137 /*CFI_REL_OFFSET rflags,0*/ 136 /*CFI_REL_OFFSET rflags,0*/
138 movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d 137 movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
139 CFI_REGISTER rip,r10 138 CFI_REGISTER rip,r10
140 pushq $__USER32_CS 139 pushq_cfi $__USER32_CS
141 CFI_ADJUST_CFA_OFFSET 8
142 /*CFI_REL_OFFSET cs,0*/ 140 /*CFI_REL_OFFSET cs,0*/
143 movl %eax, %eax 141 movl %eax, %eax
144 pushq %r10 142 pushq_cfi %r10
145 CFI_ADJUST_CFA_OFFSET 8
146 CFI_REL_OFFSET rip,0 143 CFI_REL_OFFSET rip,0
147 pushq %rax 144 pushq_cfi %rax
148 CFI_ADJUST_CFA_OFFSET 8
149 cld 145 cld
150 SAVE_ARGS 0,0,1 146 SAVE_ARGS 0,0,1
151 /* no need to do an access_ok check here because rbp has been 147 /* no need to do an access_ok check here because rbp has been
@@ -182,11 +178,9 @@ sysexit_from_sys_call:
182 xorq %r9,%r9 178 xorq %r9,%r9
183 xorq %r10,%r10 179 xorq %r10,%r10
184 xorq %r11,%r11 180 xorq %r11,%r11
185 popfq 181 popfq_cfi
186 CFI_ADJUST_CFA_OFFSET -8
187 /*CFI_RESTORE rflags*/ 182 /*CFI_RESTORE rflags*/
188 popq %rcx /* User %esp */ 183 popq_cfi %rcx /* User %esp */
189 CFI_ADJUST_CFA_OFFSET -8
190 CFI_REGISTER rsp,rcx 184 CFI_REGISTER rsp,rcx
191 TRACE_IRQS_ON 185 TRACE_IRQS_ON
192 ENABLE_INTERRUPTS_SYSEXIT32 186 ENABLE_INTERRUPTS_SYSEXIT32
@@ -421,8 +415,7 @@ ENTRY(ia32_syscall)
421 */ 415 */
422 ENABLE_INTERRUPTS(CLBR_NONE) 416 ENABLE_INTERRUPTS(CLBR_NONE)
423 movl %eax,%eax 417 movl %eax,%eax
424 pushq %rax 418 pushq_cfi %rax
425 CFI_ADJUST_CFA_OFFSET 8
426 cld 419 cld
427 /* note the registers are not zero extended to the sf. 420 /* note the registers are not zero extended to the sf.
428 this could be a problem. */ 421 this could be a problem. */
@@ -851,4 +844,7 @@ ia32_sys_call_table:
851 .quad sys_fanotify_init 844 .quad sys_fanotify_init
852 .quad sys32_fanotify_mark 845 .quad sys32_fanotify_mark
853 .quad sys_prlimit64 /* 340 */ 846 .quad sys_prlimit64 /* 340 */
847 .quad sys_name_to_handle_at
848 .quad compat_sys_open_by_handle_at
849 .quad compat_sys_clock_adjtime
854ia32_syscall_end: 850ia32_syscall_end:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 4784df504d28..448d73a371ba 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -89,6 +89,7 @@ extern int acpi_disabled;
89extern int acpi_pci_disabled; 89extern int acpi_pci_disabled;
90extern int acpi_skip_timer_override; 90extern int acpi_skip_timer_override;
91extern int acpi_use_timer_override; 91extern int acpi_use_timer_override;
92extern int acpi_fix_pin2_polarity;
92 93
93extern u8 acpi_sci_flags; 94extern u8 acpi_sci_flags;
94extern int acpi_sci_override_gsi; 95extern int acpi_sci_override_gsi;
@@ -187,15 +188,7 @@ struct bootnode;
187 188
188#ifdef CONFIG_ACPI_NUMA 189#ifdef CONFIG_ACPI_NUMA
189extern int acpi_numa; 190extern int acpi_numa;
190extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start, 191extern int x86_acpi_numa_init(void);
191 unsigned long end);
192extern int acpi_scan_nodes(unsigned long start, unsigned long end);
193#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
194
195#ifdef CONFIG_NUMA_EMU
196extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
197 int num_nodes);
198#endif
199#endif /* CONFIG_ACPI_NUMA */ 192#endif /* CONFIG_ACPI_NUMA */
200 193
201#define acpi_unlazy_tlb(x) leave_mm(x) 194#define acpi_unlazy_tlb(x) leave_mm(x)
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 64dc82ee19f0..e264ae5a1443 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -9,23 +9,20 @@ struct amd_nb_bus_dev_range {
9 u8 dev_limit; 9 u8 dev_limit;
10}; 10};
11 11
12extern struct pci_device_id amd_nb_misc_ids[]; 12extern const struct pci_device_id amd_nb_misc_ids[];
13extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; 13extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
14struct bootnode; 14struct bootnode;
15 15
16extern int early_is_amd_nb(u32 value); 16extern int early_is_amd_nb(u32 value);
17extern int amd_cache_northbridges(void); 17extern int amd_cache_northbridges(void);
18extern void amd_flush_garts(void); 18extern void amd_flush_garts(void);
19extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn); 19extern int amd_numa_init(void);
20extern int amd_scan_nodes(void); 20extern int amd_get_subcaches(int);
21 21extern int amd_set_subcaches(int, int);
22#ifdef CONFIG_NUMA_EMU
23extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
24extern void amd_get_nodes(struct bootnode *nodes);
25#endif
26 22
27struct amd_northbridge { 23struct amd_northbridge {
28 struct pci_dev *misc; 24 struct pci_dev *misc;
25 struct pci_dev *link;
29}; 26};
30 27
31struct amd_northbridge_info { 28struct amd_northbridge_info {
@@ -37,6 +34,7 @@ extern struct amd_northbridge_info amd_northbridges;
37 34
38#define AMD_NB_GART 0x1 35#define AMD_NB_GART 0x1
39#define AMD_NB_L3_INDEX_DISABLE 0x2 36#define AMD_NB_L3_INDEX_DISABLE 0x2
37#define AMD_NB_L3_PARTITIONING 0x4
40 38
41#ifdef CONFIG_AMD_NB 39#ifdef CONFIG_AMD_NB
42 40
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3c896946f4cc..a279d98ea95e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -220,7 +220,6 @@ extern void enable_IR_x2apic(void);
220 220
221extern int get_physical_broadcast(void); 221extern int get_physical_broadcast(void);
222 222
223extern void apic_disable(void);
224extern int lapic_get_maxlvt(void); 223extern int lapic_get_maxlvt(void);
225extern void clear_local_APIC(void); 224extern void clear_local_APIC(void);
226extern void connect_bsp_APIC(void); 225extern void connect_bsp_APIC(void);
@@ -228,7 +227,6 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
228extern void disable_local_APIC(void); 227extern void disable_local_APIC(void);
229extern void lapic_shutdown(void); 228extern void lapic_shutdown(void);
230extern int verify_local_APIC(void); 229extern int verify_local_APIC(void);
231extern void cache_APIC_registers(void);
232extern void sync_Arb_IDs(void); 230extern void sync_Arb_IDs(void);
233extern void init_bsp_APIC(void); 231extern void init_bsp_APIC(void);
234extern void setup_local_APIC(void); 232extern void setup_local_APIC(void);
@@ -239,8 +237,7 @@ void register_lapic_address(unsigned long address);
239extern void setup_boot_APIC_clock(void); 237extern void setup_boot_APIC_clock(void);
240extern void setup_secondary_APIC_clock(void); 238extern void setup_secondary_APIC_clock(void);
241extern int APIC_init_uniprocessor(void); 239extern int APIC_init_uniprocessor(void);
242extern void enable_NMI_through_LVT0(void); 240extern int apic_force_enable(unsigned long addr);
243extern int apic_force_enable(void);
244 241
245/* 242/*
246 * On 32bit this is mach-xxx local 243 * On 32bit this is mach-xxx local
@@ -261,7 +258,6 @@ static inline void lapic_shutdown(void) { }
261#define local_apic_timer_c2_ok 1 258#define local_apic_timer_c2_ok 1
262static inline void init_apic_mappings(void) { } 259static inline void init_apic_mappings(void) { }
263static inline void disable_local_APIC(void) { } 260static inline void disable_local_APIC(void) { }
264static inline void apic_disable(void) { }
265# define setup_boot_APIC_clock x86_init_noop 261# define setup_boot_APIC_clock x86_init_noop
266# define setup_secondary_APIC_clock x86_init_noop 262# define setup_secondary_APIC_clock x86_init_noop
267#endif /* !CONFIG_X86_LOCAL_APIC */ 263#endif /* !CONFIG_X86_LOCAL_APIC */
@@ -307,8 +303,6 @@ struct apic {
307 303
308 void (*setup_apic_routing)(void); 304 void (*setup_apic_routing)(void);
309 int (*multi_timer_check)(int apic, int irq); 305 int (*multi_timer_check)(int apic, int irq);
310 int (*apicid_to_node)(int logical_apicid);
311 int (*cpu_to_logical_apicid)(int cpu);
312 int (*cpu_present_to_apicid)(int mps_cpu); 306 int (*cpu_present_to_apicid)(int mps_cpu);
313 void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); 307 void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
314 void (*setup_portio_remap)(void); 308 void (*setup_portio_remap)(void);
@@ -356,6 +350,23 @@ struct apic {
356 void (*icr_write)(u32 low, u32 high); 350 void (*icr_write)(u32 low, u32 high);
357 void (*wait_icr_idle)(void); 351 void (*wait_icr_idle)(void);
358 u32 (*safe_wait_icr_idle)(void); 352 u32 (*safe_wait_icr_idle)(void);
353
354#ifdef CONFIG_X86_32
355 /*
356 * Called very early during boot from get_smp_config(). It should
357 * return the logical apicid. x86_[bios]_cpu_to_apicid is
358 * initialized before this function is called.
359 *
360 * If logical apicid can't be determined that early, the function
361 * may return BAD_APICID. Logical apicid will be configured after
362 * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity
363 * won't be applied properly during early boot in this case.
364 */
365 int (*x86_32_early_logical_apicid)(int cpu);
366
367 /* determine CPU -> NUMA node mapping */
368 int (*x86_32_numa_cpu_node)(int cpu);
369#endif
359}; 370};
360 371
361/* 372/*
@@ -503,6 +514,11 @@ extern struct apic apic_noop;
503 514
504extern struct apic apic_default; 515extern struct apic apic_default;
505 516
517static inline int noop_x86_32_early_logical_apicid(int cpu)
518{
519 return BAD_APICID;
520}
521
506/* 522/*
507 * Set up the logical destination ID. 523 * Set up the logical destination ID.
508 * 524 *
@@ -522,7 +538,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
522 return cpuid_apic >> index_msb; 538 return cpuid_apic >> index_msb;
523} 539}
524 540
525extern int default_apicid_to_node(int logical_apicid); 541extern int default_x86_32_numa_cpu_node(int cpu);
526 542
527#endif 543#endif
528 544
@@ -558,12 +574,6 @@ static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_ma
558 *retmap = *phys_map; 574 *retmap = *phys_map;
559} 575}
560 576
561/* Mapping from cpu number to logical apicid */
562static inline int default_cpu_to_logical_apicid(int cpu)
563{
564 return 1 << cpu;
565}
566
567static inline int __default_cpu_present_to_apicid(int mps_cpu) 577static inline int __default_cpu_present_to_apicid(int mps_cpu)
568{ 578{
569 if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) 579 if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
@@ -596,8 +606,4 @@ extern int default_check_phys_apicid_present(int phys_apicid);
596 606
597#endif /* CONFIG_X86_LOCAL_APIC */ 607#endif /* CONFIG_X86_LOCAL_APIC */
598 608
599#ifdef CONFIG_X86_32
600extern u8 cpu_2_logical_apicid[NR_CPUS];
601#endif
602
603#endif /* _ASM_X86_APIC_H */ 609#endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 47a30ff8e517..d87988bacf3e 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -426,4 +426,16 @@ struct local_apic {
426#else 426#else
427 #define BAD_APICID 0xFFFFu 427 #define BAD_APICID 0xFFFFu
428#endif 428#endif
429
430enum ioapic_irq_destination_types {
431 dest_Fixed = 0,
432 dest_LowestPrio = 1,
433 dest_SMI = 2,
434 dest__reserved_1 = 3,
435 dest_NMI = 4,
436 dest_INIT = 5,
437 dest__reserved_2 = 6,
438 dest_ExtINT = 7
439};
440
429#endif /* _ASM_X86_APICDEF_H */ 441#endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index c8bfe63a06de..e020d88ec02d 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -12,6 +12,7 @@
12/* setup data types */ 12/* setup data types */
13#define SETUP_NONE 0 13#define SETUP_NONE 0
14#define SETUP_E820_EXT 1 14#define SETUP_E820_EXT 1
15#define SETUP_DTB 2
15 16
16/* extensible setup data list node */ 17/* extensible setup data list node */
17struct setup_data { 18struct setup_data {
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
new file mode 100644
index 000000000000..e656ad8c0a2e
--- /dev/null
+++ b/arch/x86/include/asm/ce4100.h
@@ -0,0 +1,6 @@
1#ifndef _ASM_CE4100_H_
2#define _ASM_CE4100_H_
3
4int ce4100_pci_init(void);
5
6#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 220e2ea08e80..91f3e087cf21 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -160,6 +160,7 @@
160#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ 160#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
161#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ 161#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
162#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ 162#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
163#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
163 164
164/* 165/*
165 * Auxiliary flags: Linux defined - For features scattered in various 166 * Auxiliary flags: Linux defined - For features scattered in various
@@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32];
279#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) 280#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
280#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) 281#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
281#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) 282#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
283#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
282 284
283#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 285#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
284# define cpu_has_invlpg 1 286# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index e99d55d74df5..908b96957d88 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -96,7 +96,7 @@ extern void e820_setup_gap(void);
96extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, 96extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
97 unsigned long start_addr, unsigned long long end_addr); 97 unsigned long start_addr, unsigned long long end_addr);
98struct setup_data; 98struct setup_data;
99extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data); 99extern void parse_e820_ext(struct setup_data *data);
100 100
101#if defined(CONFIG_X86_64) || \ 101#if defined(CONFIG_X86_64) || \
102 (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) 102 (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 57650ab4a5f5..1cd6d26a0a8d 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -16,10 +16,13 @@ BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) 17BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
18 18
19.irpc idx, "01234567" 19.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
20 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
21.if NUM_INVALIDATE_TLB_VECTORS > \idx
20BUILD_INTERRUPT3(invalidate_interrupt\idx, 22BUILD_INTERRUPT3(invalidate_interrupt\idx,
21 (INVALIDATE_TLB_VECTOR_START)+\idx, 23 (INVALIDATE_TLB_VECTOR_START)+\idx,
22 smp_invalidate_interrupt) 24 smp_invalidate_interrupt)
25.endif
23.endr 26.endr
24#endif 27#endif
25 28
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 06850a7194e1..2c6fc9e62812 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -7,14 +7,12 @@
7 frame pointer later */ 7 frame pointer later */
8#ifdef CONFIG_FRAME_POINTER 8#ifdef CONFIG_FRAME_POINTER
9 .macro FRAME 9 .macro FRAME
10 pushl %ebp 10 pushl_cfi %ebp
11 CFI_ADJUST_CFA_OFFSET 4
12 CFI_REL_OFFSET ebp,0 11 CFI_REL_OFFSET ebp,0
13 movl %esp,%ebp 12 movl %esp,%ebp
14 .endm 13 .endm
15 .macro ENDFRAME 14 .macro ENDFRAME
16 popl %ebp 15 popl_cfi %ebp
17 CFI_ADJUST_CFA_OFFSET -4
18 CFI_RESTORE ebp 16 CFI_RESTORE ebp
19 .endm 17 .endm
20#else 18#else
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 1f11ce44e956..d09bb03653f0 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -37,7 +37,7 @@
37 "+m" (*uaddr), "=&r" (tem) \ 37 "+m" (*uaddr), "=&r" (tem) \
38 : "r" (oparg), "i" (-EFAULT), "1" (0)) 38 : "r" (oparg), "i" (-EFAULT), "1" (0))
39 39
40static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 40static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
41{ 41{
42 int op = (encoded_op >> 28) & 7; 42 int op = (encoded_op >> 28) & 7;
43 int cmp = (encoded_op >> 24) & 15; 43 int cmp = (encoded_op >> 24) & 15;
@@ -48,7 +48,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
48 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 48 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
49 oparg = 1 << oparg; 49 oparg = 1 << oparg;
50 50
51 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 51 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
52 return -EFAULT; 52 return -EFAULT;
53 53
54#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) 54#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
@@ -109,9 +109,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
109 return ret; 109 return ret;
110} 110}
111 111
112static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, 112static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
113 int newval) 113 u32 oldval, u32 newval)
114{ 114{
115 int ret = 0;
115 116
116#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) 117#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
117 /* Real i386 machines have no cmpxchg instruction */ 118 /* Real i386 machines have no cmpxchg instruction */
@@ -119,21 +120,22 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
119 return -ENOSYS; 120 return -ENOSYS;
120#endif 121#endif
121 122
122 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 123 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
123 return -EFAULT; 124 return -EFAULT;
124 125
125 asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" 126 asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
126 "2:\t.section .fixup, \"ax\"\n" 127 "2:\t.section .fixup, \"ax\"\n"
127 "3:\tmov %2, %0\n" 128 "3:\tmov %3, %0\n"
128 "\tjmp 2b\n" 129 "\tjmp 2b\n"
129 "\t.previous\n" 130 "\t.previous\n"
130 _ASM_EXTABLE(1b, 3b) 131 _ASM_EXTABLE(1b, 3b)
131 : "=a" (oldval), "+m" (*uaddr) 132 : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
132 : "i" (-EFAULT), "r" (newval), "0" (oldval) 133 : "i" (-EFAULT), "r" (newval), "1" (oldval)
133 : "memory" 134 : "memory"
134 ); 135 );
135 136
136 return oldval; 137 *uval = oldval;
138 return ret;
137} 139}
138 140
139#endif 141#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 0274ec5a7e62..bb9efe8706e2 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -45,6 +45,30 @@ extern void invalidate_interrupt4(void);
45extern void invalidate_interrupt5(void); 45extern void invalidate_interrupt5(void);
46extern void invalidate_interrupt6(void); 46extern void invalidate_interrupt6(void);
47extern void invalidate_interrupt7(void); 47extern void invalidate_interrupt7(void);
48extern void invalidate_interrupt8(void);
49extern void invalidate_interrupt9(void);
50extern void invalidate_interrupt10(void);
51extern void invalidate_interrupt11(void);
52extern void invalidate_interrupt12(void);
53extern void invalidate_interrupt13(void);
54extern void invalidate_interrupt14(void);
55extern void invalidate_interrupt15(void);
56extern void invalidate_interrupt16(void);
57extern void invalidate_interrupt17(void);
58extern void invalidate_interrupt18(void);
59extern void invalidate_interrupt19(void);
60extern void invalidate_interrupt20(void);
61extern void invalidate_interrupt21(void);
62extern void invalidate_interrupt22(void);
63extern void invalidate_interrupt23(void);
64extern void invalidate_interrupt24(void);
65extern void invalidate_interrupt25(void);
66extern void invalidate_interrupt26(void);
67extern void invalidate_interrupt27(void);
68extern void invalidate_interrupt28(void);
69extern void invalidate_interrupt29(void);
70extern void invalidate_interrupt30(void);
71extern void invalidate_interrupt31(void);
48 72
49extern void irq_move_cleanup_interrupt(void); 73extern void irq_move_cleanup_interrupt(void);
50extern void reboot_interrupt(void); 74extern void reboot_interrupt(void);
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 36fb1a6a5109..8dbe353e41e1 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -11,8 +11,8 @@ kernel_physical_mapping_init(unsigned long start,
11 unsigned long page_size_mask); 11 unsigned long page_size_mask);
12 12
13 13
14extern unsigned long __initdata e820_table_start; 14extern unsigned long __initdata pgt_buf_start;
15extern unsigned long __meminitdata e820_table_end; 15extern unsigned long __meminitdata pgt_buf_end;
16extern unsigned long __meminitdata e820_table_top; 16extern unsigned long __meminitdata pgt_buf_top;
17 17
18#endif /* _ASM_X86_INIT_32_H */ 18#endif /* _ASM_X86_INIT_32_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index f327d386d6cc..c4bd267dfc50 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -63,17 +63,6 @@ union IO_APIC_reg_03 {
63 } __attribute__ ((packed)) bits; 63 } __attribute__ ((packed)) bits;
64}; 64};
65 65
66enum ioapic_irq_destination_types {
67 dest_Fixed = 0,
68 dest_LowestPrio = 1,
69 dest_SMI = 2,
70 dest__reserved_1 = 3,
71 dest_NMI = 4,
72 dest_INIT = 5,
73 dest__reserved_2 = 6,
74 dest_ExtINT = 7
75};
76
77struct IO_APIC_route_entry { 66struct IO_APIC_route_entry {
78 __u32 vector : 8, 67 __u32 vector : 8,
79 delivery_mode : 3, /* 000: FIXED 68 delivery_mode : 3, /* 000: FIXED
@@ -106,6 +95,10 @@ struct IR_IO_APIC_route_entry {
106 index : 15; 95 index : 15;
107} __attribute__ ((packed)); 96} __attribute__ ((packed));
108 97
98#define IOAPIC_AUTO -1
99#define IOAPIC_EDGE 0
100#define IOAPIC_LEVEL 1
101
109#ifdef CONFIG_X86_IO_APIC 102#ifdef CONFIG_X86_IO_APIC
110 103
111/* 104/*
@@ -150,11 +143,6 @@ extern int timer_through_8259;
150#define io_apic_assign_pci_irqs \ 143#define io_apic_assign_pci_irqs \
151 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) 144 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
152 145
153extern u8 io_apic_unique_id(u8 id);
154extern int io_apic_get_unique_id(int ioapic, int apic_id);
155extern int io_apic_get_version(int ioapic);
156extern int io_apic_get_redir_entries(int ioapic);
157
158struct io_apic_irq_attr; 146struct io_apic_irq_attr;
159extern int io_apic_set_pci_routing(struct device *dev, int irq, 147extern int io_apic_set_pci_routing(struct device *dev, int irq,
160 struct io_apic_irq_attr *irq_attr); 148 struct io_apic_irq_attr *irq_attr);
@@ -162,6 +150,8 @@ void setup_IO_APIC_irq_extra(u32 gsi);
162extern void ioapic_and_gsi_init(void); 150extern void ioapic_and_gsi_init(void);
163extern void ioapic_insert_resources(void); 151extern void ioapic_insert_resources(void);
164 152
153int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr);
154
165extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); 155extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
166extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); 156extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
167extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); 157extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
@@ -186,6 +176,8 @@ extern void __init pre_init_apic_IRQ0(void);
186 176
187extern void mp_save_irq(struct mpc_intsrc *m); 177extern void mp_save_irq(struct mpc_intsrc *m);
188 178
179extern void disable_ioapic_support(void);
180
189#else /* !CONFIG_X86_IO_APIC */ 181#else /* !CONFIG_X86_IO_APIC */
190 182
191#define io_apic_assign_pci_irqs 0 183#define io_apic_assign_pci_irqs 0
@@ -199,6 +191,26 @@ static inline int mp_find_ioapic(u32 gsi) { return 0; }
199struct io_apic_irq_attr; 191struct io_apic_irq_attr;
200static inline int io_apic_set_pci_routing(struct device *dev, int irq, 192static inline int io_apic_set_pci_routing(struct device *dev, int irq,
201 struct io_apic_irq_attr *irq_attr) { return 0; } 193 struct io_apic_irq_attr *irq_attr) { return 0; }
194
195static inline struct IO_APIC_route_entry **alloc_ioapic_entries(void)
196{
197 return NULL;
198}
199
200static inline void free_ioapic_entries(struct IO_APIC_route_entry **ent) { }
201static inline int save_IO_APIC_setup(struct IO_APIC_route_entry **ent)
202{
203 return -ENOMEM;
204}
205
206static inline void mask_IO_APIC_setup(struct IO_APIC_route_entry **ent) { }
207static inline int restore_IO_APIC_setup(struct IO_APIC_route_entry **ent)
208{
209 return -ENOMEM;
210}
211
212static inline void mp_save_irq(struct mpc_intsrc *m) { };
213static inline void disable_ioapic_support(void) { }
202#endif 214#endif
203 215
204#endif /* _ASM_X86_IO_APIC_H */ 216#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 0b7228268a63..615fa9061b57 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -123,10 +123,6 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
123 int vector); 123 int vector);
124extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, 124extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
125 int vector); 125 int vector);
126extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
127 int vector);
128extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
129 int vector);
130 126
131/* Avoid include hell */ 127/* Avoid include hell */
132#define NMI_VECTOR 0x02 128#define NMI_VECTOR 0x02
@@ -150,6 +146,10 @@ static inline void __default_local_send_IPI_all(int vector)
150} 146}
151 147
152#ifdef CONFIG_X86_32 148#ifdef CONFIG_X86_32
149extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
150 int vector);
151extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
152 int vector);
153extern void default_send_IPI_mask_logical(const struct cpumask *mask, 153extern void default_send_IPI_mask_logical(const struct cpumask *mask,
154 int vector); 154 int vector);
155extern void default_send_IPI_allbutself(int vector); 155extern void default_send_IPI_allbutself(int vector);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index c704b38c57a2..ba870bb6dd8e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -10,9 +10,6 @@
10#include <asm/apicdef.h> 10#include <asm/apicdef.h>
11#include <asm/irq_vectors.h> 11#include <asm/irq_vectors.h>
12 12
13/* Even though we don't support this, supply it to appease OF */
14static inline void irq_dispose_mapping(unsigned int virq) { }
15
16static inline int irq_canonicalize(int irq) 13static inline int irq_canonicalize(int irq)
17{ 14{
18 return ((irq == 2) ? 9 : irq); 15 return ((irq == 2) ? 9 : irq);
diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h
new file mode 100644
index 000000000000..423bbbddf36d
--- /dev/null
+++ b/arch/x86/include/asm/irq_controller.h
@@ -0,0 +1,12 @@
1#ifndef __IRQ_CONTROLLER__
2#define __IRQ_CONTROLLER__
3
4struct irq_domain {
5 int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize,
6 u32 *out_hwirq, u32 *out_type);
7 void *priv;
8 struct device_node *controller;
9 struct list_head l;
10};
11
12#endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6af0894dafb4..6e976ee3b3ef 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -1,6 +1,7 @@
1#ifndef _ASM_X86_IRQ_VECTORS_H 1#ifndef _ASM_X86_IRQ_VECTORS_H
2#define _ASM_X86_IRQ_VECTORS_H 2#define _ASM_X86_IRQ_VECTORS_H
3 3
4#include <linux/threads.h>
4/* 5/*
5 * Linux IRQ vector layout. 6 * Linux IRQ vector layout.
6 * 7 *
@@ -16,8 +17,8 @@
16 * Vectors 0 ... 31 : system traps and exceptions - hardcoded events 17 * Vectors 0 ... 31 : system traps and exceptions - hardcoded events
17 * Vectors 32 ... 127 : device interrupts 18 * Vectors 32 ... 127 : device interrupts
18 * Vector 128 : legacy int80 syscall interface 19 * Vector 128 : legacy int80 syscall interface
19 * Vectors 129 ... 237 : device interrupts 20 * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts
20 * Vectors 238 ... 255 : special interrupts 21 * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
21 * 22 *
22 * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. 23 * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
23 * 24 *
@@ -96,37 +97,43 @@
96#define THRESHOLD_APIC_VECTOR 0xf9 97#define THRESHOLD_APIC_VECTOR 0xf9
97#define REBOOT_VECTOR 0xf8 98#define REBOOT_VECTOR 0xf8
98 99
99/* f0-f7 used for spreading out TLB flushes: */
100#define INVALIDATE_TLB_VECTOR_END 0xf7
101#define INVALIDATE_TLB_VECTOR_START 0xf0
102#define NUM_INVALIDATE_TLB_VECTORS 8
103
104/*
105 * Local APIC timer IRQ vector is on a different priority level,
106 * to work around the 'lost local interrupt if more than 2 IRQ
107 * sources per level' errata.
108 */
109#define LOCAL_TIMER_VECTOR 0xef
110
111/* 100/*
112 * Generic system vector for platform specific use 101 * Generic system vector for platform specific use
113 */ 102 */
114#define X86_PLATFORM_IPI_VECTOR 0xed 103#define X86_PLATFORM_IPI_VECTOR 0xf7
115 104
116/* 105/*
117 * IRQ work vector: 106 * IRQ work vector:
118 */ 107 */
119#define IRQ_WORK_VECTOR 0xec 108#define IRQ_WORK_VECTOR 0xf6
120 109
121#define UV_BAU_MESSAGE 0xea 110#define UV_BAU_MESSAGE 0xf5
122 111
123/* 112/*
124 * Self IPI vector for machine checks 113 * Self IPI vector for machine checks
125 */ 114 */
126#define MCE_SELF_VECTOR 0xeb 115#define MCE_SELF_VECTOR 0xf4
127 116
128/* Xen vector callback to receive events in a HVM domain */ 117/* Xen vector callback to receive events in a HVM domain */
129#define XEN_HVM_EVTCHN_CALLBACK 0xe9 118#define XEN_HVM_EVTCHN_CALLBACK 0xf3
119
120/*
121 * Local APIC timer IRQ vector is on a different priority level,
122 * to work around the 'lost local interrupt if more than 2 IRQ
123 * sources per level' errata.
124 */
125#define LOCAL_TIMER_VECTOR 0xef
126
127/* up to 32 vectors used for spreading out TLB flushes: */
128#if NR_CPUS <= 32
129# define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS)
130#else
131# define NUM_INVALIDATE_TLB_VECTORS (32)
132#endif
133
134#define INVALIDATE_TLB_VECTOR_END (0xee)
135#define INVALIDATE_TLB_VECTOR_START \
136 (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)
130 137
131#define NR_VECTORS 256 138#define NR_VECTORS 256
132 139
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index ca242d35e873..518bbbb9ee59 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -13,7 +13,6 @@ enum die_val {
13 DIE_PANIC, 13 DIE_PANIC,
14 DIE_NMI, 14 DIE_NMI,
15 DIE_DIE, 15 DIE_DIE,
16 DIE_NMIWATCHDOG,
17 DIE_KERNELDEBUG, 16 DIE_KERNELDEBUG,
18 DIE_TRAP, 17 DIE_TRAP,
19 DIE_GPF, 18 DIE_GPF,
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 0c90dd9f0505..9c7d95f6174b 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -25,7 +25,6 @@ extern int pic_mode;
25#define MAX_IRQ_SOURCES 256 25#define MAX_IRQ_SOURCES 256
26 26
27extern unsigned int def_to_bigsmp; 27extern unsigned int def_to_bigsmp;
28extern u8 apicid_2_node[];
29 28
30#ifdef CONFIG_X86_NUMAQ 29#ifdef CONFIG_X86_NUMAQ
31extern int mp_bus_id_to_node[MAX_MP_BUSSES]; 30extern int mp_bus_id_to_node[MAX_MP_BUSSES];
@@ -33,8 +32,6 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES];
33extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; 32extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
34#endif 33#endif
35 34
36#define MAX_APICID 256
37
38#else /* CONFIG_X86_64: */ 35#else /* CONFIG_X86_64: */
39 36
40#define MAX_MP_BUSSES 256 37#define MAX_MP_BUSSES 256
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4d0dfa0d998e..823d48223400 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -36,6 +36,11 @@
36#define MSR_IA32_PERFCTR1 0x000000c2 36#define MSR_IA32_PERFCTR1 0x000000c2
37#define MSR_FSB_FREQ 0x000000cd 37#define MSR_FSB_FREQ 0x000000cd
38 38
39#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
40#define NHM_C3_AUTO_DEMOTE (1UL << 25)
41#define NHM_C1_AUTO_DEMOTE (1UL << 26)
42#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
43
39#define MSR_MTRRcap 0x000000fe 44#define MSR_MTRRcap 0x000000fe
40#define MSR_IA32_BBL_CR_CTL 0x00000119 45#define MSR_IA32_BBL_CR_CTL 0x00000119
41 46
@@ -47,6 +52,9 @@
47#define MSR_IA32_MCG_STATUS 0x0000017a 52#define MSR_IA32_MCG_STATUS 0x0000017a
48#define MSR_IA32_MCG_CTL 0x0000017b 53#define MSR_IA32_MCG_CTL 0x0000017b
49 54
55#define MSR_OFFCORE_RSP_0 0x000001a6
56#define MSR_OFFCORE_RSP_1 0x000001a7
57
50#define MSR_IA32_PEBS_ENABLE 0x000003f1 58#define MSR_IA32_PEBS_ENABLE 0x000003f1
51#define MSR_IA32_DS_AREA 0x00000600 59#define MSR_IA32_DS_AREA 0x00000600
52#define MSR_IA32_PERF_CAPABILITIES 0x00000345 60#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c76f5b92b840..07f46016d3ff 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -7,7 +7,6 @@
7 7
8#ifdef CONFIG_X86_LOCAL_APIC 8#ifdef CONFIG_X86_LOCAL_APIC
9 9
10extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
11extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 10extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
12extern int reserve_perfctr_nmi(unsigned int); 11extern int reserve_perfctr_nmi(unsigned int);
13extern void release_perfctr_nmi(unsigned int); 12extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400d3138..3d4dab43c994 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,57 @@
1#ifndef _ASM_X86_NUMA_H
2#define _ASM_X86_NUMA_H
3
4#include <asm/topology.h>
5#include <asm/apicdef.h>
6
7#ifdef CONFIG_NUMA
8
9#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
10
11/*
12 * __apicid_to_node[] stores the raw mapping between physical apicid and
13 * node and is used to initialize cpu_to_node mapping.
14 *
15 * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
16 * should be accessed by the accessors - set_apicid_to_node() and
17 * numa_cpu_node().
18 */
19extern s16 __apicid_to_node[MAX_LOCAL_APIC];
20
21static inline void set_apicid_to_node(int apicid, s16 node)
22{
23 __apicid_to_node[apicid] = node;
24}
25#else /* CONFIG_NUMA */
26static inline void set_apicid_to_node(int apicid, s16 node)
27{
28}
29#endif /* CONFIG_NUMA */
30
1#ifdef CONFIG_X86_32 31#ifdef CONFIG_X86_32
2# include "numa_32.h" 32# include "numa_32.h"
3#else 33#else
4# include "numa_64.h" 34# include "numa_64.h"
5#endif 35#endif
36
37#ifdef CONFIG_NUMA
38extern void __cpuinit numa_set_node(int cpu, int node);
39extern void __cpuinit numa_clear_node(int cpu);
40extern void __init numa_init_array(void);
41extern void __init init_cpu_to_node(void);
42extern void __cpuinit numa_add_cpu(int cpu);
43extern void __cpuinit numa_remove_cpu(int cpu);
44#else /* CONFIG_NUMA */
45static inline void numa_set_node(int cpu, int node) { }
46static inline void numa_clear_node(int cpu) { }
47static inline void numa_init_array(void) { }
48static inline void init_cpu_to_node(void) { }
49static inline void numa_add_cpu(int cpu) { }
50static inline void numa_remove_cpu(int cpu) { }
51#endif /* CONFIG_NUMA */
52
53#ifdef CONFIG_DEBUG_PER_CPU_MAPS
54struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable);
55#endif
56
57#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index b0ef2b449a9d..c6beed1ef103 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,7 +4,12 @@
4extern int numa_off; 4extern int numa_off;
5 5
6extern int pxm_to_nid(int pxm); 6extern int pxm_to_nid(int pxm);
7extern void numa_remove_cpu(int cpu); 7
8#ifdef CONFIG_NUMA
9extern int __cpuinit numa_cpu_node(int cpu);
10#else /* CONFIG_NUMA */
11static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
12#endif /* CONFIG_NUMA */
8 13
9#ifdef CONFIG_HIGHMEM 14#ifdef CONFIG_HIGHMEM
10extern void set_highmem_pages_init(void); 15extern void set_highmem_pages_init(void);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 0493be39607c..344eb1790b46 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,23 +2,16 @@
2#define _ASM_X86_NUMA_64_H 2#define _ASM_X86_NUMA_64_H
3 3
4#include <linux/nodemask.h> 4#include <linux/nodemask.h>
5#include <asm/apicdef.h>
6 5
7struct bootnode { 6struct bootnode {
8 u64 start; 7 u64 start;
9 u64 end; 8 u64 end;
10}; 9};
11 10
12extern int compute_hash_shift(struct bootnode *nodes, int numblks,
13 int *nodeids);
14
15#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) 11#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
16 12
17extern void numa_init_array(void);
18extern int numa_off; 13extern int numa_off;
19 14
20extern s16 apicid_to_node[MAX_LOCAL_APIC];
21
22extern unsigned long numa_free_all_bootmem(void); 15extern unsigned long numa_free_all_bootmem(void);
23extern void setup_node_bootmem(int nodeid, unsigned long start, 16extern void setup_node_bootmem(int nodeid, unsigned long start,
24 unsigned long end); 17 unsigned long end);
@@ -31,11 +24,11 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
31 */ 24 */
32#define NODE_MIN_SIZE (4*1024*1024) 25#define NODE_MIN_SIZE (4*1024*1024)
33 26
34extern void __init init_cpu_to_node(void); 27extern nodemask_t numa_nodes_parsed __initdata;
35extern void __cpuinit numa_set_node(int cpu, int node); 28
36extern void __cpuinit numa_clear_node(int cpu); 29extern int __cpuinit numa_cpu_node(int cpu);
37extern void __cpuinit numa_add_cpu(int cpu); 30extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
38extern void __cpuinit numa_remove_cpu(int cpu); 31extern void __init numa_set_distance(int from, int to, int distance);
39 32
40#ifdef CONFIG_NUMA_EMU 33#ifdef CONFIG_NUMA_EMU
41#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) 34#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
@@ -43,11 +36,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
43void numa_emu_cmdline(char *); 36void numa_emu_cmdline(char *);
44#endif /* CONFIG_NUMA_EMU */ 37#endif /* CONFIG_NUMA_EMU */
45#else 38#else
46static inline void init_cpu_to_node(void) { } 39static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
47static inline void numa_set_node(int cpu, int node) { }
48static inline void numa_clear_node(int cpu) { }
49static inline void numa_add_cpu(int cpu, int node) { }
50static inline void numa_remove_cpu(int cpu) { }
51#endif 40#endif
52 41
53#endif /* _ASM_X86_NUMA_64_H */ 42#endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h
index 641988efe063..c5d3a5abbb9f 100644
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@@ -6,7 +6,7 @@
6 6
7#define OLPC_OFW_SIG 0x2057464F /* aka "OFW " */ 7#define OLPC_OFW_SIG 0x2057464F /* aka "OFW " */
8 8
9#ifdef CONFIG_OLPC_OPENFIRMWARE 9#ifdef CONFIG_OLPC
10 10
11extern bool olpc_ofw_is_installed(void); 11extern bool olpc_ofw_is_installed(void);
12 12
@@ -26,19 +26,15 @@ extern void setup_olpc_ofw_pgd(void);
26/* check if OFW was detected during boot */ 26/* check if OFW was detected during boot */
27extern bool olpc_ofw_present(void); 27extern bool olpc_ofw_present(void);
28 28
29#else /* !CONFIG_OLPC_OPENFIRMWARE */ 29#else /* !CONFIG_OLPC */
30
31static inline bool olpc_ofw_is_installed(void) { return false; }
32static inline void olpc_ofw_detect(void) { } 30static inline void olpc_ofw_detect(void) { }
33static inline void setup_olpc_ofw_pgd(void) { } 31static inline void setup_olpc_ofw_pgd(void) { }
34static inline bool olpc_ofw_present(void) { return false; } 32#endif /* !CONFIG_OLPC */
35
36#endif /* !CONFIG_OLPC_OPENFIRMWARE */
37 33
38#ifdef CONFIG_OLPC_OPENFIRMWARE_DT 34#ifdef CONFIG_OF_PROMTREE
39extern void olpc_dt_build_devicetree(void); 35extern void olpc_dt_build_devicetree(void);
40#else 36#else
41static inline void olpc_dt_build_devicetree(void) { } 37static inline void olpc_dt_build_devicetree(void) { }
42#endif /* CONFIG_OLPC_OPENFIRMWARE_DT */ 38#endif
43 39
44#endif /* _ASM_X86_OLPC_OFW_H */ 40#endif /* _ASM_X86_OLPC_OFW_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 1df66211fd1b..bce688d54c12 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_PAGE_DEFS_H 2#define _ASM_X86_PAGE_DEFS_H
3 3
4#include <linux/const.h> 4#include <linux/const.h>
5#include <linux/types.h>
5 6
6/* PAGE_SHIFT determines the page size */ 7/* PAGE_SHIFT determines the page size */
7#define PAGE_SHIFT 12 8#define PAGE_SHIFT 12
@@ -45,11 +46,15 @@ extern int devmem_is_allowed(unsigned long pagenr);
45extern unsigned long max_low_pfn_mapped; 46extern unsigned long max_low_pfn_mapped;
46extern unsigned long max_pfn_mapped; 47extern unsigned long max_pfn_mapped;
47 48
49static inline phys_addr_t get_max_mapped(void)
50{
51 return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
52}
53
48extern unsigned long init_memory_mapping(unsigned long start, 54extern unsigned long init_memory_mapping(unsigned long start,
49 unsigned long end); 55 unsigned long end);
50 56
51extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn, 57extern void initmem_init(void);
52 int acpi, int k8);
53extern void free_initmem(void); 58extern void free_initmem(void);
54 59
55#endif /* !__ASSEMBLY__ */ 60#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 7e172955ee57..a09e1f052d84 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -451,6 +451,26 @@ do { \
451#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 451#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
452#endif /* !CONFIG_M386 */ 452#endif /* !CONFIG_M386 */
453 453
454#ifdef CONFIG_X86_CMPXCHG64
455#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \
456({ \
457 char __ret; \
458 typeof(o1) __o1 = o1; \
459 typeof(o1) __n1 = n1; \
460 typeof(o2) __o2 = o2; \
461 typeof(o2) __n2 = n2; \
462 typeof(o2) __dummy = n2; \
463 asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
464 : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \
465 : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \
466 __ret; \
467})
468
469#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
470#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
471#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
472#endif /* CONFIG_X86_CMPXCHG64 */
473
454/* 474/*
455 * Per cpu atomic 64 bit operations are only available under 64 bit. 475 * Per cpu atomic 64 bit operations are only available under 64 bit.
456 * 32 bit must fall back to generic operations. 476 * 32 bit must fall back to generic operations.
@@ -480,6 +500,34 @@ do { \
480#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) 500#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
481#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 501#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
482#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 502#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
503
504/*
505 * Pretty complex macro to generate cmpxchg16 instruction. The instruction
506 * is not supported on early AMD64 processors so we must be able to emulate
507 * it in software. The address used in the cmpxchg16 instruction must be
508 * aligned to a 16 byte boundary.
509 */
510#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
511({ \
512 char __ret; \
513 typeof(o1) __o1 = o1; \
514 typeof(o1) __n1 = n1; \
515 typeof(o2) __o2 = o2; \
516 typeof(o2) __n2 = n2; \
517 typeof(o2) __dummy; \
518 alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \
519 "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \
520 X86_FEATURE_CX16, \
521 ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
522 "S" (&pcp1), "b"(__n1), "c"(__n2), \
523 "a"(__o1), "d"(__o2)); \
524 __ret; \
525})
526
527#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
528#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
529#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
530
483#endif 531#endif
484 532
485/* This is not atomic against other CPUs -- CPU preemption needs to be off */ 533/* This is not atomic against other CPUs -- CPU preemption needs to be off */
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index e2f6a99f14ab..cc29086e30cd 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -22,6 +22,7 @@
22 22
23#define ARCH_P4_CNTRVAL_BITS (40) 23#define ARCH_P4_CNTRVAL_BITS (40)
24#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) 24#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
25#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1))
25 26
26#define P4_ESCR_EVENT_MASK 0x7e000000U 27#define P4_ESCR_EVENT_MASK 0x7e000000U
27#define P4_ESCR_EVENT_SHIFT 25 28#define P4_ESCR_EVENT_SHIFT 25
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 45636cefa186..4c25ab48257b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -94,10 +94,6 @@ struct cpuinfo_x86 {
94 int x86_cache_alignment; /* In bytes */ 94 int x86_cache_alignment; /* In bytes */
95 int x86_power; 95 int x86_power;
96 unsigned long loops_per_jiffy; 96 unsigned long loops_per_jiffy;
97#ifdef CONFIG_SMP
98 /* cpus sharing the last level cache: */
99 cpumask_var_t llc_shared_map;
100#endif
101 /* cpuid returned max cores value: */ 97 /* cpuid returned max cores value: */
102 u16 x86_max_cores; 98 u16 x86_max_cores;
103 u16 apicid; 99 u16 apicid;
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index b4ec95f07518..971e0b46446e 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -1 +1,69 @@
1/* dummy prom.h; here to make linux/of.h's #includes happy */ 1/*
2 * Definitions for Device tree / OpenFirmware handling on X86
3 *
4 * based on arch/powerpc/include/asm/prom.h which is
5 * Copyright (C) 1996-2005 Paul Mackerras.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#ifndef _ASM_X86_PROM_H
14#define _ASM_X86_PROM_H
15#ifndef __ASSEMBLY__
16
17#include <linux/of.h>
18#include <linux/types.h>
19#include <linux/pci.h>
20
21#include <asm/irq.h>
22#include <asm/atomic.h>
23#include <asm/setup.h>
24#include <asm/irq_controller.h>
25
26#ifdef CONFIG_OF
27extern int of_ioapic;
28extern u64 initial_dtb;
29extern void add_dtb(u64 data);
30extern void x86_add_irq_domains(void);
31void __cpuinit x86_of_pci_init(void);
32void x86_dtb_init(void);
33
34static inline struct device_node *pci_device_to_OF_node(struct pci_dev *pdev)
35{
36 return pdev ? pdev->dev.of_node : NULL;
37}
38
39static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
40{
41 return pci_device_to_OF_node(bus->self);
42}
43
44#else
45static inline void add_dtb(u64 data) { }
46static inline void x86_add_irq_domains(void) { }
47static inline void x86_of_pci_init(void) { }
48static inline void x86_dtb_init(void) { }
49#define of_ioapic 0
50#endif
51
52extern char cmd_line[COMMAND_LINE_SIZE];
53
54#define pci_address_to_pio pci_address_to_pio
55unsigned long pci_address_to_pio(phys_addr_t addr);
56
57/**
58 * irq_dispose_mapping - Unmap an interrupt
59 * @virq: linux virq number of the interrupt to unmap
60 *
61 * FIXME: We really should implement proper virq handling like power,
62 * but that's going to be major surgery.
63 */
64static inline void irq_dispose_mapping(unsigned int virq) { }
65
66#define HAVE_ARCH_DEVTREE_FIXUPS
67
68#endif /* __ASSEMBLY__ */
69#endif
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index d1e41b0f9b60..df4cd32b4cc6 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -37,26 +37,9 @@
37#endif 37#endif
38 38
39#ifdef __KERNEL__ 39#ifdef __KERNEL__
40
41#include <linux/list.h>
42#include <linux/spinlock.h>
43#include <linux/lockdep.h>
44#include <asm/asm.h> 40#include <asm/asm.h>
45 41
46struct rwsem_waiter;
47
48extern asmregparm struct rw_semaphore *
49 rwsem_down_read_failed(struct rw_semaphore *sem);
50extern asmregparm struct rw_semaphore *
51 rwsem_down_write_failed(struct rw_semaphore *sem);
52extern asmregparm struct rw_semaphore *
53 rwsem_wake(struct rw_semaphore *);
54extern asmregparm struct rw_semaphore *
55 rwsem_downgrade_wake(struct rw_semaphore *sem);
56
57/* 42/*
58 * the semaphore definition
59 *
60 * The bias values and the counter type limits the number of 43 * The bias values and the counter type limits the number of
61 * potential readers/writers to 32767 for 32 bits and 2147483647 44 * potential readers/writers to 32767 for 32 bits and 2147483647
62 * for 64 bits. 45 * for 64 bits.
@@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore *
74#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 57#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
75#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 58#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
76 59
77typedef signed long rwsem_count_t;
78
79struct rw_semaphore {
80 rwsem_count_t count;
81 spinlock_t wait_lock;
82 struct list_head wait_list;
83#ifdef CONFIG_DEBUG_LOCK_ALLOC
84 struct lockdep_map dep_map;
85#endif
86};
87
88#ifdef CONFIG_DEBUG_LOCK_ALLOC
89# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
90#else
91# define __RWSEM_DEP_MAP_INIT(lockname)
92#endif
93
94
95#define __RWSEM_INITIALIZER(name) \
96{ \
97 RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
98 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
99}
100
101#define DECLARE_RWSEM(name) \
102 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
103
104extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
105 struct lock_class_key *key);
106
107#define init_rwsem(sem) \
108do { \
109 static struct lock_class_key __key; \
110 \
111 __init_rwsem((sem), #sem, &__key); \
112} while (0)
113
114/* 60/*
115 * lock for reading 61 * lock for reading
116 */ 62 */
@@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem)
133 */ 79 */
134static inline int __down_read_trylock(struct rw_semaphore *sem) 80static inline int __down_read_trylock(struct rw_semaphore *sem)
135{ 81{
136 rwsem_count_t result, tmp; 82 long result, tmp;
137 asm volatile("# beginning __down_read_trylock\n\t" 83 asm volatile("# beginning __down_read_trylock\n\t"
138 " mov %0,%1\n\t" 84 " mov %0,%1\n\t"
139 "1:\n\t" 85 "1:\n\t"
@@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
155 */ 101 */
156static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) 102static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
157{ 103{
158 rwsem_count_t tmp; 104 long tmp;
159 asm volatile("# beginning down_write\n\t" 105 asm volatile("# beginning down_write\n\t"
160 LOCK_PREFIX " xadd %1,(%2)\n\t" 106 LOCK_PREFIX " xadd %1,(%2)\n\t"
161 /* adds 0xffff0001, returns the old value */ 107 /* adds 0xffff0001, returns the old value */
@@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem)
180 */ 126 */
181static inline int __down_write_trylock(struct rw_semaphore *sem) 127static inline int __down_write_trylock(struct rw_semaphore *sem)
182{ 128{
183 rwsem_count_t ret = cmpxchg(&sem->count, 129 long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
184 RWSEM_UNLOCKED_VALUE, 130 RWSEM_ACTIVE_WRITE_BIAS);
185 RWSEM_ACTIVE_WRITE_BIAS);
186 if (ret == RWSEM_UNLOCKED_VALUE) 131 if (ret == RWSEM_UNLOCKED_VALUE)
187 return 1; 132 return 1;
188 return 0; 133 return 0;
@@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
193 */ 138 */
194static inline void __up_read(struct rw_semaphore *sem) 139static inline void __up_read(struct rw_semaphore *sem)
195{ 140{
196 rwsem_count_t tmp; 141 long tmp;
197 asm volatile("# beginning __up_read\n\t" 142 asm volatile("# beginning __up_read\n\t"
198 LOCK_PREFIX " xadd %1,(%2)\n\t" 143 LOCK_PREFIX " xadd %1,(%2)\n\t"
199 /* subtracts 1, returns the old value */ 144 /* subtracts 1, returns the old value */
@@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem)
211 */ 156 */
212static inline void __up_write(struct rw_semaphore *sem) 157static inline void __up_write(struct rw_semaphore *sem)
213{ 158{
214 rwsem_count_t tmp; 159 long tmp;
215 asm volatile("# beginning __up_write\n\t" 160 asm volatile("# beginning __up_write\n\t"
216 LOCK_PREFIX " xadd %1,(%2)\n\t" 161 LOCK_PREFIX " xadd %1,(%2)\n\t"
217 /* subtracts 0xffff0001, returns the old value */ 162 /* subtracts 0xffff0001, returns the old value */
@@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
247/* 192/*
248 * implement atomic add functionality 193 * implement atomic add functionality
249 */ 194 */
250static inline void rwsem_atomic_add(rwsem_count_t delta, 195static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
251 struct rw_semaphore *sem)
252{ 196{
253 asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" 197 asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
254 : "+m" (sem->count) 198 : "+m" (sem->count)
@@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta,
258/* 202/*
259 * implement exchange and add functionality 203 * implement exchange and add functionality
260 */ 204 */
261static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, 205static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
262 struct rw_semaphore *sem)
263{ 206{
264 rwsem_count_t tmp = delta; 207 long tmp = delta;
265 208
266 asm volatile(LOCK_PREFIX "xadd %0,%1" 209 asm volatile(LOCK_PREFIX "xadd %0,%1"
267 : "+r" (tmp), "+m" (sem->count) 210 : "+r" (tmp), "+m" (sem->count)
@@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
270 return tmp + delta; 213 return tmp + delta;
271} 214}
272 215
273static inline int rwsem_is_locked(struct rw_semaphore *sem)
274{
275 return (sem->count != 0);
276}
277
278#endif /* __KERNEL__ */ 216#endif /* __KERNEL__ */
279#endif /* _ASM_X86_RWSEM_H */ 217#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1f4695136776..73b11bc0ae6f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -17,12 +17,24 @@
17#endif 17#endif
18#include <asm/thread_info.h> 18#include <asm/thread_info.h>
19#include <asm/cpumask.h> 19#include <asm/cpumask.h>
20#include <asm/cpufeature.h>
20 21
21extern int smp_num_siblings; 22extern int smp_num_siblings;
22extern unsigned int num_processors; 23extern unsigned int num_processors;
23 24
25static inline bool cpu_has_ht_siblings(void)
26{
27 bool has_siblings = false;
28#ifdef CONFIG_SMP
29 has_siblings = cpu_has_ht && smp_num_siblings > 1;
30#endif
31 return has_siblings;
32}
33
24DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); 34DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
25DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); 35DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
36/* cpus sharing the last level cache: */
37DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
26DECLARE_PER_CPU(u16, cpu_llc_id); 38DECLARE_PER_CPU(u16, cpu_llc_id);
27DECLARE_PER_CPU(int, cpu_number); 39DECLARE_PER_CPU(int, cpu_number);
28 40
@@ -36,8 +48,16 @@ static inline struct cpumask *cpu_core_mask(int cpu)
36 return per_cpu(cpu_core_map, cpu); 48 return per_cpu(cpu_core_map, cpu);
37} 49}
38 50
51static inline struct cpumask *cpu_llc_shared_mask(int cpu)
52{
53 return per_cpu(cpu_llc_shared_map, cpu);
54}
55
39DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); 56DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
40DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); 57DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
58#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
59DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
60#endif
41 61
42/* Static state in head.S used to set up a CPU */ 62/* Static state in head.S used to set up a CPU */
43extern unsigned long stack_start; /* Initial stack pointer address */ 63extern unsigned long stack_start; /* Initial stack pointer address */
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 6c22bf353f26..725b77831993 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -34,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
34 */ 34 */
35 CMOS_WRITE(0, 0xf); 35 CMOS_WRITE(0, 0xf);
36 36
37 *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0; 37 *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0;
38} 38}
39 39
40static inline void __init smpboot_setup_io_apic(void) 40static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 33ecc3ea8782..12569e691ce3 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -98,8 +98,6 @@ do { \
98 */ 98 */
99#define HAVE_DISABLE_HLT 99#define HAVE_DISABLE_HLT
100#else 100#else
101#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
102#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
103 101
104/* frame pointer must be last for get_wchan */ 102/* frame pointer must be last for get_wchan */
105#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" 103#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 21899cc31e52..910a7084f7f2 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -47,21 +47,6 @@
47 47
48#include <asm/mpspec.h> 48#include <asm/mpspec.h>
49 49
50#ifdef CONFIG_X86_32
51
52/* Mappings between logical cpu number and node number */
53extern int cpu_to_node_map[];
54
55/* Returns the number of the node containing CPU 'cpu' */
56static inline int __cpu_to_node(int cpu)
57{
58 return cpu_to_node_map[cpu];
59}
60#define early_cpu_to_node __cpu_to_node
61#define cpu_to_node __cpu_to_node
62
63#else /* CONFIG_X86_64 */
64
65/* Mappings between logical cpu number and node number */ 50/* Mappings between logical cpu number and node number */
66DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); 51DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
67 52
@@ -84,8 +69,6 @@ static inline int early_cpu_to_node(int cpu)
84 69
85#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ 70#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
86 71
87#endif /* CONFIG_X86_64 */
88
89/* Mappings between node number and cpus on that node. */ 72/* Mappings between node number and cpus on that node. */
90extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; 73extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
91 74
@@ -155,7 +138,7 @@ extern unsigned long node_remap_size[];
155 .balance_interval = 1, \ 138 .balance_interval = 1, \
156} 139}
157 140
158#ifdef CONFIG_X86_64_ACPI_NUMA 141#ifdef CONFIG_X86_64
159extern int __node_distance(int, int); 142extern int __node_distance(int, int);
160#define node_distance(a, b) __node_distance(a, b) 143#define node_distance(a, b) __node_distance(a, b)
161#endif 144#endif
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e8ba0e..ffaf183c619a 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -346,10 +346,13 @@
346#define __NR_fanotify_init 338 346#define __NR_fanotify_init 338
347#define __NR_fanotify_mark 339 347#define __NR_fanotify_mark 339
348#define __NR_prlimit64 340 348#define __NR_prlimit64 340
349#define __NR_name_to_handle_at 341
350#define __NR_open_by_handle_at 342
351#define __NR_clock_adjtime 343
349 352
350#ifdef __KERNEL__ 353#ifdef __KERNEL__
351 354
352#define NR_syscalls 341 355#define NR_syscalls 344
353 356
354#define __ARCH_WANT_IPC_PARSE_VERSION 357#define __ARCH_WANT_IPC_PARSE_VERSION
355#define __ARCH_WANT_OLD_READDIR 358#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8a715b..5466bea670e7 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -669,6 +669,12 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
669__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) 669__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
670#define __NR_prlimit64 302 670#define __NR_prlimit64 302
671__SYSCALL(__NR_prlimit64, sys_prlimit64) 671__SYSCALL(__NR_prlimit64, sys_prlimit64)
672#define __NR_name_to_handle_at 303
673__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
674#define __NR_open_by_handle_at 304
675__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
676#define __NR_clock_adjtime 305
677__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
672 678
673#ifndef __NO_STUBS 679#ifndef __NO_STUBS
674#define __ARCH_WANT_OLD_READDIR 680#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index ce1d54c8a433..3e094af443c3 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -176,7 +176,7 @@ struct bau_msg_payload {
176struct bau_msg_header { 176struct bau_msg_header {
177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
178 /* bits 5:0 */ 178 /* bits 5:0 */
179 unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */ 179 unsigned int base_dest_nodeid:15; /* nasid of the */
180 /* bits 20:6 */ /* first bit in uvhub map */ 180 /* bits 20:6 */ /* first bit in uvhub map */
181 unsigned int command:8; /* message type */ 181 unsigned int command:8; /* message type */
182 /* bits 28:21 */ 182 /* bits 28:21 */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 64642ad019fb..643ebf2e2ad8 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -83,11 +83,13 @@ struct x86_init_paging {
83 * boot cpu 83 * boot cpu
84 * @tsc_pre_init: platform function called before TSC init 84 * @tsc_pre_init: platform function called before TSC init
85 * @timer_init: initialize the platform timer (default PIT/HPET) 85 * @timer_init: initialize the platform timer (default PIT/HPET)
86 * @wallclock_init: init the wallclock device
86 */ 87 */
87struct x86_init_timers { 88struct x86_init_timers {
88 void (*setup_percpu_clockev)(void); 89 void (*setup_percpu_clockev)(void);
89 void (*tsc_pre_init)(void); 90 void (*tsc_pre_init)(void);
90 void (*timer_init)(void); 91 void (*timer_init)(void);
92 void (*wallclock_init)(void);
91}; 93};
92 94
93/** 95/**
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a3c28ae4025b..8508bfe52296 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set)
287static inline int 287static inline int
288HYPERVISOR_sched_op(int cmd, void *arg) 288HYPERVISOR_sched_op(int cmd, void *arg)
289{ 289{
290 return _hypercall2(int, sched_op_new, cmd, arg); 290 return _hypercall2(int, sched_op, cmd, arg);
291} 291}
292 292
293static inline long 293static inline long
@@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value)
422#endif 422#endif
423 423
424static inline int 424static inline int
425HYPERVISOR_suspend(unsigned long srec) 425HYPERVISOR_suspend(unsigned long start_info_mfn)
426{ 426{
427 return _hypercall3(int, sched_op, SCHEDOP_shutdown, 427 struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
428 SHUTDOWN_suspend, srec); 428
429 /*
430 * For a PV guest the tools require that the start_info mfn be
431 * present in rdx/edx when the hypercall is made. Per the
432 * hypercall calling convention this is the third hypercall
433 * argument, which is start_info_mfn here.
434 */
435 return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn);
429} 436}
430 437
431static inline int 438static inline int
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f25bdf238a33..c61934fbf22a 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -29,8 +29,10 @@ typedef struct xpaddr {
29 29
30/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ 30/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
31#define INVALID_P2M_ENTRY (~0UL) 31#define INVALID_P2M_ENTRY (~0UL)
32#define FOREIGN_FRAME_BIT (1UL<<31) 32#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
33#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
33#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) 34#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
35#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
34 36
35/* Maximum amount of memory we can handle in a domain in pages */ 37/* Maximum amount of memory we can handle in a domain in pages */
36#define MAX_DOMAIN_PAGES \ 38#define MAX_DOMAIN_PAGES \
@@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order;
41 43
42extern unsigned long get_phys_to_machine(unsigned long pfn); 44extern unsigned long get_phys_to_machine(unsigned long pfn);
43extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); 45extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
46extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
47extern unsigned long set_phys_range_identity(unsigned long pfn_s,
48 unsigned long pfn_e);
44 49
45extern int m2p_add_override(unsigned long mfn, struct page *page); 50extern int m2p_add_override(unsigned long mfn, struct page *page);
46extern int m2p_remove_override(struct page *page); 51extern int m2p_remove_override(struct page *page);
47extern struct page *m2p_find_override(unsigned long mfn); 52extern struct page *m2p_find_override(unsigned long mfn);
48extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); 53extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
49 54
55#ifdef CONFIG_XEN_DEBUG_FS
56extern int p2m_dump_show(struct seq_file *m, void *v);
57#endif
50static inline unsigned long pfn_to_mfn(unsigned long pfn) 58static inline unsigned long pfn_to_mfn(unsigned long pfn)
51{ 59{
52 unsigned long mfn; 60 unsigned long mfn;
@@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
57 mfn = get_phys_to_machine(pfn); 65 mfn = get_phys_to_machine(pfn);
58 66
59 if (mfn != INVALID_P2M_ENTRY) 67 if (mfn != INVALID_P2M_ENTRY)
60 mfn &= ~FOREIGN_FRAME_BIT; 68 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
61 69
62 return mfn; 70 return mfn;
63} 71}
@@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
73static inline unsigned long mfn_to_pfn(unsigned long mfn) 81static inline unsigned long mfn_to_pfn(unsigned long mfn)
74{ 82{
75 unsigned long pfn; 83 unsigned long pfn;
84 int ret = 0;
76 85
77 if (xen_feature(XENFEAT_auto_translated_physmap)) 86 if (xen_feature(XENFEAT_auto_translated_physmap))
78 return mfn; 87 return mfn;
79 88
89 if (unlikely((mfn >> machine_to_phys_order) != 0)) {
90 pfn = ~0;
91 goto try_override;
92 }
80 pfn = 0; 93 pfn = 0;
81 /* 94 /*
82 * The array access can fail (e.g., device space beyond end of RAM). 95 * The array access can fail (e.g., device space beyond end of RAM).
83 * In such cases it doesn't matter what we return (we return garbage), 96 * In such cases it doesn't matter what we return (we return garbage),
84 * but we must handle the fault without crashing! 97 * but we must handle the fault without crashing!
85 */ 98 */
86 __get_user(pfn, &machine_to_phys_mapping[mfn]); 99 ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
87 100try_override:
88 /* 101 /* ret might be < 0 if there are no entries in the m2p for mfn */
89 * If this appears to be a foreign mfn (because the pfn 102 if (ret < 0)
90 * doesn't map back to the mfn), then check the local override 103 pfn = ~0;
91 * table to see if there's a better pfn to use. 104 else if (get_phys_to_machine(pfn) != mfn)
105 /*
106 * If this appears to be a foreign mfn (because the pfn
107 * doesn't map back to the mfn), then check the local override
108 * table to see if there's a better pfn to use.
109 *
110 * m2p_find_override_pfn returns ~0 if it doesn't find anything.
111 */
112 pfn = m2p_find_override_pfn(mfn, ~0);
113
114 /*
115 * pfn is ~0 if there are no entries in the m2p for mfn or if the
116 * entry doesn't map back to the mfn and m2p_override doesn't have a
117 * valid entry for it.
92 */ 118 */
93 if (get_phys_to_machine(pfn) != mfn) 119 if (pfn == ~0 &&
94 pfn = m2p_find_override_pfn(mfn, pfn); 120 get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
121 pfn = mfn;
95 122
96 return pfn; 123 return pfn;
97} 124}
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 2329b3eaf8d3..aa8620989162 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void)
27 * its own functions. 27 * its own functions.
28 */ 28 */
29struct xen_pci_frontend_ops { 29struct xen_pci_frontend_ops {
30 int (*enable_msi)(struct pci_dev *dev, int **vectors); 30 int (*enable_msi)(struct pci_dev *dev, int vectors[]);
31 void (*disable_msi)(struct pci_dev *dev); 31 void (*disable_msi)(struct pci_dev *dev);
32 int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); 32 int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
33 void (*disable_msix)(struct pci_dev *dev); 33 void (*disable_msix)(struct pci_dev *dev);
34}; 34};
35 35
36extern struct xen_pci_frontend_ops *xen_pci_frontend; 36extern struct xen_pci_frontend_ops *xen_pci_frontend;
37 37
38static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, 38static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
39 int **vectors) 39 int vectors[])
40{ 40{
41 if (xen_pci_frontend && xen_pci_frontend->enable_msi) 41 if (xen_pci_frontend && xen_pci_frontend->enable_msi)
42 return xen_pci_frontend->enable_msi(dev, vectors); 42 return xen_pci_frontend->enable_msi(dev, vectors);
@@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
48 xen_pci_frontend->disable_msi(dev); 48 xen_pci_frontend->disable_msi(dev);
49} 49}
50static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, 50static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
51 int **vectors, int nvec) 51 int vectors[], int nvec)
52{ 52{
53 if (xen_pci_frontend && xen_pci_frontend->enable_msix) 53 if (xen_pci_frontend && xen_pci_frontend->enable_msix)
54 return xen_pci_frontend->enable_msix(dev, vectors, nvec); 54 return xen_pci_frontend->enable_msix(dev, vectors, nvec);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 778c5b93676d..743642f1a36c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -67,9 +67,9 @@ obj-$(CONFIG_PCI) += early-quirks.o
67apm-y := apm_32.o 67apm-y := apm_32.o
68obj-$(CONFIG_APM) += apm.o 68obj-$(CONFIG_APM) += apm.o
69obj-$(CONFIG_SMP) += smp.o 69obj-$(CONFIG_SMP) += smp.o
70obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o 70obj-$(CONFIG_SMP) += smpboot.o
71obj-$(CONFIG_SMP) += tsc_sync.o
71obj-$(CONFIG_SMP) += setup_percpu.o 72obj-$(CONFIG_SMP) += setup_percpu.o
72obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
73obj-$(CONFIG_X86_MPPARSE) += mpparse.o 73obj-$(CONFIG_X86_MPPARSE) += mpparse.o
74obj-y += apic/ 74obj-y += apic/
75obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 75obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
@@ -109,6 +109,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
109obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o 109obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
110 110
111obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 111obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
112obj-$(CONFIG_OF) += devicetree.o
112 113
113### 114###
114# 64 bit specific files 115# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b3a71137983a..9a966c579af5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -72,6 +72,7 @@ u8 acpi_sci_flags __initdata;
72int acpi_sci_override_gsi __initdata; 72int acpi_sci_override_gsi __initdata;
73int acpi_skip_timer_override __initdata; 73int acpi_skip_timer_override __initdata;
74int acpi_use_timer_override __initdata; 74int acpi_use_timer_override __initdata;
75int acpi_fix_pin2_polarity __initdata;
75 76
76#ifdef CONFIG_X86_LOCAL_APIC 77#ifdef CONFIG_X86_LOCAL_APIC
77static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 78static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -415,10 +416,15 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
415 return 0; 416 return 0;
416 } 417 }
417 418
418 if (acpi_skip_timer_override && 419 if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
419 intsrc->source_irq == 0 && intsrc->global_irq == 2) { 420 if (acpi_skip_timer_override) {
420 printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); 421 printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
421 return 0; 422 return 0;
423 }
424 if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
425 intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
426 printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
427 }
422 } 428 }
423 429
424 mp_override_legacy_irq(intsrc->source_irq, 430 mp_override_legacy_irq(intsrc->source_irq,
@@ -589,14 +595,8 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
589 nid = acpi_get_node(handle); 595 nid = acpi_get_node(handle);
590 if (nid == -1 || !node_online(nid)) 596 if (nid == -1 || !node_online(nid))
591 return; 597 return;
592#ifdef CONFIG_X86_64 598 set_apicid_to_node(physid, nid);
593 apicid_to_node[physid] = nid;
594 numa_set_node(cpu, nid); 599 numa_set_node(cpu, nid);
595#else /* CONFIG_X86_32 */
596 apicid_2_node[physid] = nid;
597 cpu_to_node_map[cpu] = nid;
598#endif
599
600#endif 600#endif
601} 601}
602 602
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 0a99f7198bc3..ed3c2e5b714a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,7 +12,7 @@
12 12
13static u32 *flush_words; 13static u32 *flush_words;
14 14
15struct pci_device_id amd_nb_misc_ids[] = { 15const struct pci_device_id amd_nb_misc_ids[] = {
16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, 18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
@@ -20,6 +20,11 @@ struct pci_device_id amd_nb_misc_ids[] = {
20}; 20};
21EXPORT_SYMBOL(amd_nb_misc_ids); 21EXPORT_SYMBOL(amd_nb_misc_ids);
22 22
23static struct pci_device_id amd_nb_link_ids[] = {
24 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) },
25 {}
26};
27
23const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { 28const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
24 { 0x00, 0x18, 0x20 }, 29 { 0x00, 0x18, 0x20 },
25 { 0xff, 0x00, 0x20 }, 30 { 0xff, 0x00, 0x20 },
@@ -31,7 +36,7 @@ struct amd_northbridge_info amd_northbridges;
31EXPORT_SYMBOL(amd_northbridges); 36EXPORT_SYMBOL(amd_northbridges);
32 37
33static struct pci_dev *next_northbridge(struct pci_dev *dev, 38static struct pci_dev *next_northbridge(struct pci_dev *dev,
34 struct pci_device_id *ids) 39 const struct pci_device_id *ids)
35{ 40{
36 do { 41 do {
37 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); 42 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
@@ -45,7 +50,7 @@ int amd_cache_northbridges(void)
45{ 50{
46 int i = 0; 51 int i = 0;
47 struct amd_northbridge *nb; 52 struct amd_northbridge *nb;
48 struct pci_dev *misc; 53 struct pci_dev *misc, *link;
49 54
50 if (amd_nb_num()) 55 if (amd_nb_num())
51 return 0; 56 return 0;
@@ -64,10 +69,12 @@ int amd_cache_northbridges(void)
64 amd_northbridges.nb = nb; 69 amd_northbridges.nb = nb;
65 amd_northbridges.num = i; 70 amd_northbridges.num = i;
66 71
67 misc = NULL; 72 link = misc = NULL;
68 for (i = 0; i != amd_nb_num(); i++) { 73 for (i = 0; i != amd_nb_num(); i++) {
69 node_to_amd_nb(i)->misc = misc = 74 node_to_amd_nb(i)->misc = misc =
70 next_northbridge(misc, amd_nb_misc_ids); 75 next_northbridge(misc, amd_nb_misc_ids);
76 node_to_amd_nb(i)->link = link =
77 next_northbridge(link, amd_nb_link_ids);
71 } 78 }
72 79
73 /* some CPU families (e.g. family 0x11) do not support GART */ 80 /* some CPU families (e.g. family 0x11) do not support GART */
@@ -85,6 +92,13 @@ int amd_cache_northbridges(void)
85 boot_cpu_data.x86_mask >= 0x1)) 92 boot_cpu_data.x86_mask >= 0x1))
86 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; 93 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
87 94
95 if (boot_cpu_data.x86 == 0x15)
96 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
97
98 /* L3 cache partitioning is supported on family 0x15 */
99 if (boot_cpu_data.x86 == 0x15)
100 amd_northbridges.flags |= AMD_NB_L3_PARTITIONING;
101
88 return 0; 102 return 0;
89} 103}
90EXPORT_SYMBOL_GPL(amd_cache_northbridges); 104EXPORT_SYMBOL_GPL(amd_cache_northbridges);
@@ -93,8 +107,9 @@ EXPORT_SYMBOL_GPL(amd_cache_northbridges);
93 they're useless anyways */ 107 they're useless anyways */
94int __init early_is_amd_nb(u32 device) 108int __init early_is_amd_nb(u32 device)
95{ 109{
96 struct pci_device_id *id; 110 const struct pci_device_id *id;
97 u32 vendor = device & 0xffff; 111 u32 vendor = device & 0xffff;
112
98 device >>= 16; 113 device >>= 16;
99 for (id = amd_nb_misc_ids; id->vendor; id++) 114 for (id = amd_nb_misc_ids; id->vendor; id++)
100 if (vendor == id->vendor && device == id->device) 115 if (vendor == id->vendor && device == id->device)
@@ -102,6 +117,65 @@ int __init early_is_amd_nb(u32 device)
102 return 0; 117 return 0;
103} 118}
104 119
120int amd_get_subcaches(int cpu)
121{
122 struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
123 unsigned int mask;
124 int cuid = 0;
125
126 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
127 return 0;
128
129 pci_read_config_dword(link, 0x1d4, &mask);
130
131#ifdef CONFIG_SMP
132 cuid = cpu_data(cpu).compute_unit_id;
133#endif
134 return (mask >> (4 * cuid)) & 0xf;
135}
136
137int amd_set_subcaches(int cpu, int mask)
138{
139 static unsigned int reset, ban;
140 struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
141 unsigned int reg;
142 int cuid = 0;
143
144 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
145 return -EINVAL;
146
147 /* if necessary, collect reset state of L3 partitioning and BAN mode */
148 if (reset == 0) {
149 pci_read_config_dword(nb->link, 0x1d4, &reset);
150 pci_read_config_dword(nb->misc, 0x1b8, &ban);
151 ban &= 0x180000;
152 }
153
154 /* deactivate BAN mode if any subcaches are to be disabled */
155 if (mask != 0xf) {
156 pci_read_config_dword(nb->misc, 0x1b8, &reg);
157 pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
158 }
159
160#ifdef CONFIG_SMP
161 cuid = cpu_data(cpu).compute_unit_id;
162#endif
163 mask <<= 4 * cuid;
164 mask |= (0xf ^ (1 << cuid)) << 26;
165
166 pci_write_config_dword(nb->link, 0x1d4, mask);
167
168 /* reset BAN mode if L3 partitioning returned to reset state */
169 pci_read_config_dword(nb->link, 0x1d4, &reg);
170 if (reg == reset) {
171 pci_read_config_dword(nb->misc, 0x1b8, &reg);
172 reg &= ~0x180000;
173 pci_write_config_dword(nb->misc, 0x1b8, reg | ban);
174 }
175
176 return 0;
177}
178
105int amd_cache_gart(void) 179int amd_cache_gart(void)
106{ 180{
107 int i; 181 int i;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 51ef31a89be9..1293c709ee85 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -284,7 +284,7 @@ static int __init apbt_clockevent_register(void)
284 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); 284 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device));
285 285
286 if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { 286 if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
287 apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; 287 adev->evt.rating = APBT_CLOCKEVENT_RATING - 100;
288 global_clock_event = &adev->evt; 288 global_clock_event = &adev->evt;
289 printk(KERN_DEBUG "%s clockevent registered as global\n", 289 printk(KERN_DEBUG "%s clockevent registered as global\n",
290 global_clock_event->name); 290 global_clock_event->name);
@@ -508,64 +508,12 @@ static int apbt_next_event(unsigned long delta,
508 return 0; 508 return 0;
509} 509}
510 510
511/*
512 * APB timer clock is not in sync with pclk on Langwell, which translates to
513 * unreliable read value caused by sampling error. the error does not add up
514 * overtime and only happens when sampling a 0 as a 1 by mistake. so the time
515 * would go backwards. the following code is trying to prevent time traveling
516 * backwards. little bit paranoid.
517 */
518static cycle_t apbt_read_clocksource(struct clocksource *cs) 511static cycle_t apbt_read_clocksource(struct clocksource *cs)
519{ 512{
520 unsigned long t0, t1, t2; 513 unsigned long current_count;
521 static unsigned long last_read; 514
522 515 current_count = apbt_readl(phy_cs_timer_id, APBTMR_N_CURRENT_VALUE);
523bad_count: 516 return (cycle_t)~current_count;
524 t1 = apbt_readl(phy_cs_timer_id,
525 APBTMR_N_CURRENT_VALUE);
526 t2 = apbt_readl(phy_cs_timer_id,
527 APBTMR_N_CURRENT_VALUE);
528 if (unlikely(t1 < t2)) {
529 pr_debug("APBT: read current count error %lx:%lx:%lx\n",
530 t1, t2, t2 - t1);
531 goto bad_count;
532 }
533 /*
534 * check against cached last read, makes sure time does not go back.
535 * it could be a normal rollover but we will do tripple check anyway
536 */
537 if (unlikely(t2 > last_read)) {
538 /* check if we have a normal rollover */
539 unsigned long raw_intr_status =
540 apbt_readl_reg(APBTMRS_RAW_INT_STATUS);
541 /*
542 * cs timer interrupt is masked but raw intr bit is set if
543 * rollover occurs. then we read EOI reg to clear it.
544 */
545 if (raw_intr_status & (1 << phy_cs_timer_id)) {
546 apbt_readl(phy_cs_timer_id, APBTMR_N_EOI);
547 goto out;
548 }
549 pr_debug("APB CS going back %lx:%lx:%lx ",
550 t2, last_read, t2 - last_read);
551bad_count_x3:
552 pr_debug("triple check enforced\n");
553 t0 = apbt_readl(phy_cs_timer_id,
554 APBTMR_N_CURRENT_VALUE);
555 udelay(1);
556 t1 = apbt_readl(phy_cs_timer_id,
557 APBTMR_N_CURRENT_VALUE);
558 udelay(1);
559 t2 = apbt_readl(phy_cs_timer_id,
560 APBTMR_N_CURRENT_VALUE);
561 if ((t2 > t1) || (t1 > t0)) {
562 printk(KERN_ERR "Error: APB CS tripple check failed\n");
563 goto bad_count_x3;
564 }
565 }
566out:
567 last_read = t2;
568 return (cycle_t)~t2;
569} 517}
570 518
571static int apbt_clocksource_register(void) 519static int apbt_clocksource_register(void)
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 5955a7800a96..7b1e8e10b89c 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -13,7 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/types.h> 14#include <linux/types.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/bootmem.h> 16#include <linux/memblock.h>
17#include <linux/mmzone.h> 17#include <linux/mmzone.h>
18#include <linux/pci_ids.h> 18#include <linux/pci_ids.h>
19#include <linux/pci.h> 19#include <linux/pci.h>
@@ -57,7 +57,7 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
57static u32 __init allocate_aperture(void) 57static u32 __init allocate_aperture(void)
58{ 58{
59 u32 aper_size; 59 u32 aper_size;
60 void *p; 60 unsigned long addr;
61 61
62 /* aper_size should <= 1G */ 62 /* aper_size should <= 1G */
63 if (fallback_aper_order > 5) 63 if (fallback_aper_order > 5)
@@ -83,27 +83,26 @@ static u32 __init allocate_aperture(void)
83 * so don't use 512M below as gart iommu, leave the space for kernel 83 * so don't use 512M below as gart iommu, leave the space for kernel
84 * code for safe 84 * code for safe
85 */ 85 */
86 p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); 86 addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
87 if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
88 printk(KERN_ERR
89 "Cannot allocate aperture memory hole (%lx,%uK)\n",
90 addr, aper_size>>10);
91 return 0;
92 }
93 memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
87 /* 94 /*
88 * Kmemleak should not scan this block as it may not be mapped via the 95 * Kmemleak should not scan this block as it may not be mapped via the
89 * kernel direct mapping. 96 * kernel direct mapping.
90 */ 97 */
91 kmemleak_ignore(p); 98 kmemleak_ignore(phys_to_virt(addr));
92 if (!p || __pa(p)+aper_size > 0xffffffff) {
93 printk(KERN_ERR
94 "Cannot allocate aperture memory hole (%p,%uK)\n",
95 p, aper_size>>10);
96 if (p)
97 free_bootmem(__pa(p), aper_size);
98 return 0;
99 }
100 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", 99 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
101 aper_size >> 10, __pa(p)); 100 aper_size >> 10, addr);
102 insert_aperture_resource((u32)__pa(p), aper_size); 101 insert_aperture_resource((u32)addr, aper_size);
103 register_nosave_region((u32)__pa(p) >> PAGE_SHIFT, 102 register_nosave_region(addr >> PAGE_SHIFT,
104 (u32)__pa(p+aper_size) >> PAGE_SHIFT); 103 (addr+aper_size) >> PAGE_SHIFT);
105 104
106 return (u32)__pa(p); 105 return (u32)addr;
107} 106}
108 107
109 108
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 76b96d74978a..966673f44141 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -43,6 +43,7 @@
43#include <asm/i8259.h> 43#include <asm/i8259.h>
44#include <asm/proto.h> 44#include <asm/proto.h>
45#include <asm/apic.h> 45#include <asm/apic.h>
46#include <asm/io_apic.h>
46#include <asm/desc.h> 47#include <asm/desc.h>
47#include <asm/hpet.h> 48#include <asm/hpet.h>
48#include <asm/idle.h> 49#include <asm/idle.h>
@@ -78,12 +79,21 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
78EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 79EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
79 80
80#ifdef CONFIG_X86_32 81#ifdef CONFIG_X86_32
82
83/*
84 * On x86_32, the mapping between cpu and logical apicid may vary
85 * depending on apic in use. The following early percpu variable is
86 * used for the mapping. This is where the behaviors of x86_64 and 32
87 * actually diverge. Let's keep it ugly for now.
88 */
89DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
90
81/* 91/*
82 * Knob to control our willingness to enable the local APIC. 92 * Knob to control our willingness to enable the local APIC.
83 * 93 *
84 * +1=force-enable 94 * +1=force-enable
85 */ 95 */
86static int force_enable_local_apic; 96static int force_enable_local_apic __initdata;
87/* 97/*
88 * APIC command line parameters 98 * APIC command line parameters
89 */ 99 */
@@ -153,7 +163,7 @@ early_param("nox2apic", setup_nox2apic);
153unsigned long mp_lapic_addr; 163unsigned long mp_lapic_addr;
154int disable_apic; 164int disable_apic;
155/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 165/* Disable local APIC timer from the kernel commandline or via dmi quirk */
156static int disable_apic_timer __cpuinitdata; 166static int disable_apic_timer __initdata;
157/* Local APIC timer works in C2 */ 167/* Local APIC timer works in C2 */
158int local_apic_timer_c2_ok; 168int local_apic_timer_c2_ok;
159EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 169EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -177,29 +187,8 @@ static struct resource lapic_resource = {
177 187
178static unsigned int calibration_result; 188static unsigned int calibration_result;
179 189
180static int lapic_next_event(unsigned long delta,
181 struct clock_event_device *evt);
182static void lapic_timer_setup(enum clock_event_mode mode,
183 struct clock_event_device *evt);
184static void lapic_timer_broadcast(const struct cpumask *mask);
185static void apic_pm_activate(void); 190static void apic_pm_activate(void);
186 191
187/*
188 * The local apic timer can be used for any function which is CPU local.
189 */
190static struct clock_event_device lapic_clockevent = {
191 .name = "lapic",
192 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
193 | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
194 .shift = 32,
195 .set_mode = lapic_timer_setup,
196 .set_next_event = lapic_next_event,
197 .broadcast = lapic_timer_broadcast,
198 .rating = 100,
199 .irq = -1,
200};
201static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
202
203static unsigned long apic_phys; 192static unsigned long apic_phys;
204 193
205/* 194/*
@@ -238,7 +227,7 @@ static int modern_apic(void)
238 * right after this call apic become NOOP driven 227 * right after this call apic become NOOP driven
239 * so apic->write/read doesn't do anything 228 * so apic->write/read doesn't do anything
240 */ 229 */
241void apic_disable(void) 230static void __init apic_disable(void)
242{ 231{
243 pr_info("APIC: switched to apic NOOP\n"); 232 pr_info("APIC: switched to apic NOOP\n");
244 apic = &apic_noop; 233 apic = &apic_noop;
@@ -282,23 +271,6 @@ u64 native_apic_icr_read(void)
282 return icr1 | ((u64)icr2 << 32); 271 return icr1 | ((u64)icr2 << 32);
283} 272}
284 273
285/**
286 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
287 */
288void __cpuinit enable_NMI_through_LVT0(void)
289{
290 unsigned int v;
291
292 /* unmask and set to NMI */
293 v = APIC_DM_NMI;
294
295 /* Level triggered for 82489DX (32bit mode) */
296 if (!lapic_is_integrated())
297 v |= APIC_LVT_LEVEL_TRIGGER;
298
299 apic_write(APIC_LVT0, v);
300}
301
302#ifdef CONFIG_X86_32 274#ifdef CONFIG_X86_32
303/** 275/**
304 * get_physical_broadcast - Get number of physical broadcast IDs 276 * get_physical_broadcast - Get number of physical broadcast IDs
@@ -508,6 +480,23 @@ static void lapic_timer_broadcast(const struct cpumask *mask)
508#endif 480#endif
509} 481}
510 482
483
484/*
485 * The local apic timer can be used for any function which is CPU local.
486 */
487static struct clock_event_device lapic_clockevent = {
488 .name = "lapic",
489 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
490 | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
491 .shift = 32,
492 .set_mode = lapic_timer_setup,
493 .set_next_event = lapic_next_event,
494 .broadcast = lapic_timer_broadcast,
495 .rating = 100,
496 .irq = -1,
497};
498static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
499
511/* 500/*
512 * Setup the local APIC timer for this CPU. Copy the initialized values 501 * Setup the local APIC timer for this CPU. Copy the initialized values
513 * of the boot CPU and register the clock event in the framework. 502 * of the boot CPU and register the clock event in the framework.
@@ -1209,7 +1198,7 @@ void __cpuinit setup_local_APIC(void)
1209 rdtscll(tsc); 1198 rdtscll(tsc);
1210 1199
1211 if (disable_apic) { 1200 if (disable_apic) {
1212 arch_disable_smp_support(); 1201 disable_ioapic_support();
1213 return; 1202 return;
1214 } 1203 }
1215 1204
@@ -1237,6 +1226,19 @@ void __cpuinit setup_local_APIC(void)
1237 */ 1226 */
1238 apic->init_apic_ldr(); 1227 apic->init_apic_ldr();
1239 1228
1229#ifdef CONFIG_X86_32
1230 /*
1231 * APIC LDR is initialized. If logical_apicid mapping was
1232 * initialized during get_smp_config(), make sure it matches the
1233 * actual value.
1234 */
1235 i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
1236 WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
1237 /* always use the value from LDR */
1238 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
1239 logical_smp_processor_id();
1240#endif
1241
1240 /* 1242 /*
1241 * Set Task Priority to 'accept all'. We never change this 1243 * Set Task Priority to 'accept all'. We never change this
1242 * later on. 1244 * later on.
@@ -1448,7 +1450,7 @@ int __init enable_IR(void)
1448void __init enable_IR_x2apic(void) 1450void __init enable_IR_x2apic(void)
1449{ 1451{
1450 unsigned long flags; 1452 unsigned long flags;
1451 struct IO_APIC_route_entry **ioapic_entries = NULL; 1453 struct IO_APIC_route_entry **ioapic_entries;
1452 int ret, x2apic_enabled = 0; 1454 int ret, x2apic_enabled = 0;
1453 int dmar_table_init_ret; 1455 int dmar_table_init_ret;
1454 1456
@@ -1537,7 +1539,7 @@ static int __init detect_init_APIC(void)
1537} 1539}
1538#else 1540#else
1539 1541
1540static int apic_verify(void) 1542static int __init apic_verify(void)
1541{ 1543{
1542 u32 features, h, l; 1544 u32 features, h, l;
1543 1545
@@ -1562,7 +1564,7 @@ static int apic_verify(void)
1562 return 0; 1564 return 0;
1563} 1565}
1564 1566
1565int apic_force_enable(void) 1567int __init apic_force_enable(unsigned long addr)
1566{ 1568{
1567 u32 h, l; 1569 u32 h, l;
1568 1570
@@ -1578,7 +1580,7 @@ int apic_force_enable(void)
1578 if (!(l & MSR_IA32_APICBASE_ENABLE)) { 1580 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1579 pr_info("Local APIC disabled by BIOS -- reenabling.\n"); 1581 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1580 l &= ~MSR_IA32_APICBASE_BASE; 1582 l &= ~MSR_IA32_APICBASE_BASE;
1581 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; 1583 l |= MSR_IA32_APICBASE_ENABLE | addr;
1582 wrmsr(MSR_IA32_APICBASE, l, h); 1584 wrmsr(MSR_IA32_APICBASE, l, h);
1583 enabled_via_apicbase = 1; 1585 enabled_via_apicbase = 1;
1584 } 1586 }
@@ -1619,7 +1621,7 @@ static int __init detect_init_APIC(void)
1619 "you can enable it with \"lapic\"\n"); 1621 "you can enable it with \"lapic\"\n");
1620 return -1; 1622 return -1;
1621 } 1623 }
1622 if (apic_force_enable()) 1624 if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
1623 return -1; 1625 return -1;
1624 } else { 1626 } else {
1625 if (apic_verify()) 1627 if (apic_verify())
@@ -1930,17 +1932,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1930{ 1932{
1931 int cpu; 1933 int cpu;
1932 1934
1933 /*
1934 * Validate version
1935 */
1936 if (version == 0x0) {
1937 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
1938 "fixing up to 0x10. (tell your hw vendor)\n",
1939 version);
1940 version = 0x10;
1941 }
1942 apic_version[apicid] = version;
1943
1944 if (num_processors >= nr_cpu_ids) { 1935 if (num_processors >= nr_cpu_ids) {
1945 int max = nr_cpu_ids; 1936 int max = nr_cpu_ids;
1946 int thiscpu = max + disabled_cpus; 1937 int thiscpu = max + disabled_cpus;
@@ -1954,22 +1945,34 @@ void __cpuinit generic_processor_info(int apicid, int version)
1954 } 1945 }
1955 1946
1956 num_processors++; 1947 num_processors++;
1957 cpu = cpumask_next_zero(-1, cpu_present_mask);
1958
1959 if (version != apic_version[boot_cpu_physical_apicid])
1960 WARN_ONCE(1,
1961 "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
1962 apic_version[boot_cpu_physical_apicid], cpu, version);
1963
1964 physid_set(apicid, phys_cpu_present_map);
1965 if (apicid == boot_cpu_physical_apicid) { 1948 if (apicid == boot_cpu_physical_apicid) {
1966 /* 1949 /*
1967 * x86_bios_cpu_apicid is required to have processors listed 1950 * x86_bios_cpu_apicid is required to have processors listed
1968 * in same order as logical cpu numbers. Hence the first 1951 * in same order as logical cpu numbers. Hence the first
1969 * entry is BSP, and so on. 1952 * entry is BSP, and so on.
1953 * boot_cpu_init() already hold bit 0 in cpu_present_mask
1954 * for BSP.
1970 */ 1955 */
1971 cpu = 0; 1956 cpu = 0;
1957 } else
1958 cpu = cpumask_next_zero(-1, cpu_present_mask);
1959
1960 /*
1961 * Validate version
1962 */
1963 if (version == 0x0) {
1964 pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
1965 cpu, apicid);
1966 version = 0x10;
1972 } 1967 }
1968 apic_version[apicid] = version;
1969
1970 if (version != apic_version[boot_cpu_physical_apicid]) {
1971 pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
1972 apic_version[boot_cpu_physical_apicid], cpu, version);
1973 }
1974
1975 physid_set(apicid, phys_cpu_present_map);
1973 if (apicid > max_physical_apicid) 1976 if (apicid > max_physical_apicid)
1974 max_physical_apicid = apicid; 1977 max_physical_apicid = apicid;
1975 1978
@@ -1977,7 +1980,10 @@ void __cpuinit generic_processor_info(int apicid, int version)
1977 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1980 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1978 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1981 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1979#endif 1982#endif
1980 1983#ifdef CONFIG_X86_32
1984 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
1985 apic->x86_32_early_logical_apicid(cpu);
1986#endif
1981 set_cpu_possible(cpu, true); 1987 set_cpu_possible(cpu, true);
1982 set_cpu_present(cpu, true); 1988 set_cpu_present(cpu, true);
1983} 1989}
@@ -1998,10 +2004,14 @@ void default_init_apic_ldr(void)
1998} 2004}
1999 2005
2000#ifdef CONFIG_X86_32 2006#ifdef CONFIG_X86_32
2001int default_apicid_to_node(int logical_apicid) 2007int default_x86_32_numa_cpu_node(int cpu)
2002{ 2008{
2003#ifdef CONFIG_SMP 2009#ifdef CONFIG_NUMA
2004 return apicid_2_node[hard_smp_processor_id()]; 2010 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
2011
2012 if (apicid != BAD_APICID)
2013 return __apicid_to_node[apicid];
2014 return NUMA_NO_NODE;
2005#else 2015#else
2006 return 0; 2016 return 0;
2007#endif 2017#endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 09d3b17ce0c2..5652d31fe108 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,8 +185,6 @@ struct apic apic_flat = {
185 .ioapic_phys_id_map = NULL, 185 .ioapic_phys_id_map = NULL,
186 .setup_apic_routing = NULL, 186 .setup_apic_routing = NULL,
187 .multi_timer_check = NULL, 187 .multi_timer_check = NULL,
188 .apicid_to_node = NULL,
189 .cpu_to_logical_apicid = NULL,
190 .cpu_present_to_apicid = default_cpu_present_to_apicid, 188 .cpu_present_to_apicid = default_cpu_present_to_apicid,
191 .apicid_to_cpu_present = NULL, 189 .apicid_to_cpu_present = NULL,
192 .setup_portio_remap = NULL, 190 .setup_portio_remap = NULL,
@@ -337,8 +335,6 @@ struct apic apic_physflat = {
337 .ioapic_phys_id_map = NULL, 335 .ioapic_phys_id_map = NULL,
338 .setup_apic_routing = NULL, 336 .setup_apic_routing = NULL,
339 .multi_timer_check = NULL, 337 .multi_timer_check = NULL,
340 .apicid_to_node = NULL,
341 .cpu_to_logical_apicid = NULL,
342 .cpu_present_to_apicid = default_cpu_present_to_apicid, 338 .cpu_present_to_apicid = default_cpu_present_to_apicid,
343 .apicid_to_cpu_present = NULL, 339 .apicid_to_cpu_present = NULL,
344 .setup_portio_remap = NULL, 340 .setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e31b9ffe25f5..f1baa2dc087a 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void)
54 return 0; 54 return 0;
55} 55}
56 56
57static int noop_cpu_to_logical_apicid(int cpu)
58{
59 return 0;
60}
61
62static int noop_phys_pkg_id(int cpuid_apic, int index_msb) 57static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
63{ 58{
64 return 0; 59 return 0;
@@ -113,12 +108,6 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
113 cpumask_set_cpu(cpu, retmask); 108 cpumask_set_cpu(cpu, retmask);
114} 109}
115 110
116int noop_apicid_to_node(int logical_apicid)
117{
118 /* we're always on node 0 */
119 return 0;
120}
121
122static u32 noop_apic_read(u32 reg) 111static u32 noop_apic_read(u32 reg)
123{ 112{
124 WARN_ON_ONCE((cpu_has_apic && !disable_apic)); 113 WARN_ON_ONCE((cpu_has_apic && !disable_apic));
@@ -130,6 +119,14 @@ static void noop_apic_write(u32 reg, u32 v)
130 WARN_ON_ONCE(cpu_has_apic && !disable_apic); 119 WARN_ON_ONCE(cpu_has_apic && !disable_apic);
131} 120}
132 121
122#ifdef CONFIG_X86_32
123static int noop_x86_32_numa_cpu_node(int cpu)
124{
125 /* we're always on node 0 */
126 return 0;
127}
128#endif
129
133struct apic apic_noop = { 130struct apic apic_noop = {
134 .name = "noop", 131 .name = "noop",
135 .probe = noop_probe, 132 .probe = noop_probe,
@@ -153,9 +150,7 @@ struct apic apic_noop = {
153 .ioapic_phys_id_map = default_ioapic_phys_id_map, 150 .ioapic_phys_id_map = default_ioapic_phys_id_map,
154 .setup_apic_routing = NULL, 151 .setup_apic_routing = NULL,
155 .multi_timer_check = NULL, 152 .multi_timer_check = NULL,
156 .apicid_to_node = noop_apicid_to_node,
157 153
158 .cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
159 .cpu_present_to_apicid = default_cpu_present_to_apicid, 154 .cpu_present_to_apicid = default_cpu_present_to_apicid,
160 .apicid_to_cpu_present = physid_set_mask_of_physid, 155 .apicid_to_cpu_present = physid_set_mask_of_physid,
161 156
@@ -197,4 +192,9 @@ struct apic apic_noop = {
197 .icr_write = noop_apic_icr_write, 192 .icr_write = noop_apic_icr_write,
198 .wait_icr_idle = noop_apic_wait_icr_idle, 193 .wait_icr_idle = noop_apic_wait_icr_idle,
199 .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, 194 .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
195
196#ifdef CONFIG_X86_32
197 .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
198 .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node,
199#endif
200}; 200};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cb804c5091b9..541a2e431659 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -45,6 +45,12 @@ static unsigned long bigsmp_check_apicid_present(int bit)
45 return 1; 45 return 1;
46} 46}
47 47
48static int bigsmp_early_logical_apicid(int cpu)
49{
50 /* on bigsmp, logical apicid is the same as physical */
51 return early_per_cpu(x86_cpu_to_apicid, cpu);
52}
53
48static inline unsigned long calculate_ldr(int cpu) 54static inline unsigned long calculate_ldr(int cpu)
49{ 55{
50 unsigned long val, id; 56 unsigned long val, id;
@@ -80,11 +86,6 @@ static void bigsmp_setup_apic_routing(void)
80 nr_ioapics); 86 nr_ioapics);
81} 87}
82 88
83static int bigsmp_apicid_to_node(int logical_apicid)
84{
85 return apicid_2_node[hard_smp_processor_id()];
86}
87
88static int bigsmp_cpu_present_to_apicid(int mps_cpu) 89static int bigsmp_cpu_present_to_apicid(int mps_cpu)
89{ 90{
90 if (mps_cpu < nr_cpu_ids) 91 if (mps_cpu < nr_cpu_ids)
@@ -93,14 +94,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
93 return BAD_APICID; 94 return BAD_APICID;
94} 95}
95 96
96/* Mapping from cpu number to logical apicid */
97static inline int bigsmp_cpu_to_logical_apicid(int cpu)
98{
99 if (cpu >= nr_cpu_ids)
100 return BAD_APICID;
101 return cpu_physical_id(cpu);
102}
103
104static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) 97static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
105{ 98{
106 /* For clustered we don't have a good way to do this yet - hack */ 99 /* For clustered we don't have a good way to do this yet - hack */
@@ -115,7 +108,11 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
115/* As we are using single CPU as destination, pick only one CPU here */ 108/* As we are using single CPU as destination, pick only one CPU here */
116static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) 109static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
117{ 110{
118 return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask)); 111 int cpu = cpumask_first(cpumask);
112
113 if (cpu < nr_cpu_ids)
114 return cpu_physical_id(cpu);
115 return BAD_APICID;
119} 116}
120 117
121static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 118static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -129,9 +126,9 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
129 */ 126 */
130 for_each_cpu_and(cpu, cpumask, andmask) { 127 for_each_cpu_and(cpu, cpumask, andmask) {
131 if (cpumask_test_cpu(cpu, cpu_online_mask)) 128 if (cpumask_test_cpu(cpu, cpu_online_mask))
132 break; 129 return cpu_physical_id(cpu);
133 } 130 }
134 return bigsmp_cpu_to_logical_apicid(cpu); 131 return BAD_APICID;
135} 132}
136 133
137static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) 134static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -219,8 +216,6 @@ struct apic apic_bigsmp = {
219 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, 216 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
220 .setup_apic_routing = bigsmp_setup_apic_routing, 217 .setup_apic_routing = bigsmp_setup_apic_routing,
221 .multi_timer_check = NULL, 218 .multi_timer_check = NULL,
222 .apicid_to_node = bigsmp_apicid_to_node,
223 .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
224 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, 219 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
225 .apicid_to_cpu_present = physid_set_mask_of_physid, 220 .apicid_to_cpu_present = physid_set_mask_of_physid,
226 .setup_portio_remap = NULL, 221 .setup_portio_remap = NULL,
@@ -256,4 +251,7 @@ struct apic apic_bigsmp = {
256 .icr_write = native_apic_icr_write, 251 .icr_write = native_apic_icr_write,
257 .wait_icr_idle = native_apic_wait_icr_idle, 252 .wait_icr_idle = native_apic_wait_icr_idle,
258 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 253 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
254
255 .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
256 .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
259}; 257};
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8593582d8022..3e9de4854c5b 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -460,6 +460,12 @@ static unsigned long es7000_check_apicid_present(int bit)
460 return physid_isset(bit, phys_cpu_present_map); 460 return physid_isset(bit, phys_cpu_present_map);
461} 461}
462 462
463static int es7000_early_logical_apicid(int cpu)
464{
465 /* on es7000, logical apicid is the same as physical */
466 return early_per_cpu(x86_bios_cpu_apicid, cpu);
467}
468
463static unsigned long calculate_ldr(int cpu) 469static unsigned long calculate_ldr(int cpu)
464{ 470{
465 unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu); 471 unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
@@ -504,12 +510,11 @@ static void es7000_setup_apic_routing(void)
504 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); 510 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
505} 511}
506 512
507static int es7000_apicid_to_node(int logical_apicid) 513static int es7000_numa_cpu_node(int cpu)
508{ 514{
509 return 0; 515 return 0;
510} 516}
511 517
512
513static int es7000_cpu_present_to_apicid(int mps_cpu) 518static int es7000_cpu_present_to_apicid(int mps_cpu)
514{ 519{
515 if (!mps_cpu) 520 if (!mps_cpu)
@@ -528,18 +533,6 @@ static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
528 ++cpu_id; 533 ++cpu_id;
529} 534}
530 535
531/* Mapping from cpu number to logical apicid */
532static int es7000_cpu_to_logical_apicid(int cpu)
533{
534#ifdef CONFIG_SMP
535 if (cpu >= nr_cpu_ids)
536 return BAD_APICID;
537 return cpu_2_logical_apicid[cpu];
538#else
539 return logical_smp_processor_id();
540#endif
541}
542
543static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) 536static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
544{ 537{
545 /* For clustered we don't have a good way to do this yet - hack */ 538 /* For clustered we don't have a good way to do this yet - hack */
@@ -561,7 +554,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
561 * The cpus in the mask must all be on the apic cluster. 554 * The cpus in the mask must all be on the apic cluster.
562 */ 555 */
563 for_each_cpu(cpu, cpumask) { 556 for_each_cpu(cpu, cpumask) {
564 int new_apicid = es7000_cpu_to_logical_apicid(cpu); 557 int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
565 558
566 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { 559 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
567 WARN(1, "Not a valid mask!"); 560 WARN(1, "Not a valid mask!");
@@ -578,7 +571,7 @@ static unsigned int
578es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, 571es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
579 const struct cpumask *andmask) 572 const struct cpumask *andmask)
580{ 573{
581 int apicid = es7000_cpu_to_logical_apicid(0); 574 int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
582 cpumask_var_t cpumask; 575 cpumask_var_t cpumask;
583 576
584 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) 577 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -655,8 +648,6 @@ struct apic __refdata apic_es7000_cluster = {
655 .ioapic_phys_id_map = es7000_ioapic_phys_id_map, 648 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
656 .setup_apic_routing = es7000_setup_apic_routing, 649 .setup_apic_routing = es7000_setup_apic_routing,
657 .multi_timer_check = NULL, 650 .multi_timer_check = NULL,
658 .apicid_to_node = es7000_apicid_to_node,
659 .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
660 .cpu_present_to_apicid = es7000_cpu_present_to_apicid, 651 .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
661 .apicid_to_cpu_present = es7000_apicid_to_cpu_present, 652 .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
662 .setup_portio_remap = NULL, 653 .setup_portio_remap = NULL,
@@ -695,6 +686,9 @@ struct apic __refdata apic_es7000_cluster = {
695 .icr_write = native_apic_icr_write, 686 .icr_write = native_apic_icr_write,
696 .wait_icr_idle = native_apic_wait_icr_idle, 687 .wait_icr_idle = native_apic_wait_icr_idle,
697 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 688 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
689
690 .x86_32_early_logical_apicid = es7000_early_logical_apicid,
691 .x86_32_numa_cpu_node = es7000_numa_cpu_node,
698}; 692};
699 693
700struct apic __refdata apic_es7000 = { 694struct apic __refdata apic_es7000 = {
@@ -720,8 +714,6 @@ struct apic __refdata apic_es7000 = {
720 .ioapic_phys_id_map = es7000_ioapic_phys_id_map, 714 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
721 .setup_apic_routing = es7000_setup_apic_routing, 715 .setup_apic_routing = es7000_setup_apic_routing,
722 .multi_timer_check = NULL, 716 .multi_timer_check = NULL,
723 .apicid_to_node = es7000_apicid_to_node,
724 .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
725 .cpu_present_to_apicid = es7000_cpu_present_to_apicid, 717 .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
726 .apicid_to_cpu_present = es7000_apicid_to_cpu_present, 718 .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
727 .setup_portio_remap = NULL, 719 .setup_portio_remap = NULL,
@@ -758,4 +750,7 @@ struct apic __refdata apic_es7000 = {
758 .icr_write = native_apic_icr_write, 750 .icr_write = native_apic_icr_write,
759 .wait_icr_idle = native_apic_wait_icr_idle, 751 .wait_icr_idle = native_apic_wait_icr_idle,
760 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 752 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
753
754 .x86_32_early_logical_apicid = es7000_early_logical_apicid,
755 .x86_32_numa_cpu_node = es7000_numa_cpu_node,
761}; 756};
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 79fd43ca6f96..c4e557a1ebb6 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -83,7 +83,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
83 arch_spin_lock(&lock); 83 arch_spin_lock(&lock);
84 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); 84 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
85 show_regs(regs); 85 show_regs(regs);
86 dump_stack();
87 arch_spin_unlock(&lock); 86 arch_spin_unlock(&lock);
88 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 87 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
89 return NOTIFY_STOP; 88 return NOTIFY_STOP;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ca9e2a3545a9..4b5ebd26f565 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -108,7 +108,10 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
108 108
109int skip_ioapic_setup; 109int skip_ioapic_setup;
110 110
111void arch_disable_smp_support(void) 111/**
112 * disable_ioapic_support() - disables ioapic support at runtime
113 */
114void disable_ioapic_support(void)
112{ 115{
113#ifdef CONFIG_PCI 116#ifdef CONFIG_PCI
114 noioapicquirk = 1; 117 noioapicquirk = 1;
@@ -120,11 +123,14 @@ void arch_disable_smp_support(void)
120static int __init parse_noapic(char *str) 123static int __init parse_noapic(char *str)
121{ 124{
122 /* disable IO-APIC */ 125 /* disable IO-APIC */
123 arch_disable_smp_support(); 126 disable_ioapic_support();
124 return 0; 127 return 0;
125} 128}
126early_param("noapic", parse_noapic); 129early_param("noapic", parse_noapic);
127 130
131static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
132 struct io_apic_irq_attr *attr);
133
128/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ 134/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
129void mp_save_irq(struct mpc_intsrc *m) 135void mp_save_irq(struct mpc_intsrc *m)
130{ 136{
@@ -181,7 +187,7 @@ int __init arch_early_irq_init(void)
181 irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs); 187 irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
182 188
183 for (i = 0; i < count; i++) { 189 for (i = 0; i < count; i++) {
184 set_irq_chip_data(i, &cfg[i]); 190 irq_set_chip_data(i, &cfg[i]);
185 zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); 191 zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
186 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); 192 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
187 /* 193 /*
@@ -200,7 +206,7 @@ int __init arch_early_irq_init(void)
200#ifdef CONFIG_SPARSE_IRQ 206#ifdef CONFIG_SPARSE_IRQ
201static struct irq_cfg *irq_cfg(unsigned int irq) 207static struct irq_cfg *irq_cfg(unsigned int irq)
202{ 208{
203 return get_irq_chip_data(irq); 209 return irq_get_chip_data(irq);
204} 210}
205 211
206static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) 212static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
@@ -226,7 +232,7 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
226{ 232{
227 if (!cfg) 233 if (!cfg)
228 return; 234 return;
229 set_irq_chip_data(at, NULL); 235 irq_set_chip_data(at, NULL);
230 free_cpumask_var(cfg->domain); 236 free_cpumask_var(cfg->domain);
231 free_cpumask_var(cfg->old_domain); 237 free_cpumask_var(cfg->old_domain);
232 kfree(cfg); 238 kfree(cfg);
@@ -256,14 +262,14 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
256 if (res < 0) { 262 if (res < 0) {
257 if (res != -EEXIST) 263 if (res != -EEXIST)
258 return NULL; 264 return NULL;
259 cfg = get_irq_chip_data(at); 265 cfg = irq_get_chip_data(at);
260 if (cfg) 266 if (cfg)
261 return cfg; 267 return cfg;
262 } 268 }
263 269
264 cfg = alloc_irq_cfg(at, node); 270 cfg = alloc_irq_cfg(at, node);
265 if (cfg) 271 if (cfg)
266 set_irq_chip_data(at, cfg); 272 irq_set_chip_data(at, cfg);
267 else 273 else
268 irq_free_desc(at); 274 irq_free_desc(at);
269 return cfg; 275 return cfg;
@@ -818,7 +824,7 @@ static int EISA_ELCR(unsigned int irq)
818#define default_MCA_trigger(idx) (1) 824#define default_MCA_trigger(idx) (1)
819#define default_MCA_polarity(idx) default_ISA_polarity(idx) 825#define default_MCA_polarity(idx) default_ISA_polarity(idx)
820 826
821static int MPBIOS_polarity(int idx) 827static int irq_polarity(int idx)
822{ 828{
823 int bus = mp_irqs[idx].srcbus; 829 int bus = mp_irqs[idx].srcbus;
824 int polarity; 830 int polarity;
@@ -860,7 +866,7 @@ static int MPBIOS_polarity(int idx)
860 return polarity; 866 return polarity;
861} 867}
862 868
863static int MPBIOS_trigger(int idx) 869static int irq_trigger(int idx)
864{ 870{
865 int bus = mp_irqs[idx].srcbus; 871 int bus = mp_irqs[idx].srcbus;
866 int trigger; 872 int trigger;
@@ -932,16 +938,6 @@ static int MPBIOS_trigger(int idx)
932 return trigger; 938 return trigger;
933} 939}
934 940
935static inline int irq_polarity(int idx)
936{
937 return MPBIOS_polarity(idx);
938}
939
940static inline int irq_trigger(int idx)
941{
942 return MPBIOS_trigger(idx);
943}
944
945static int pin_2_irq(int idx, int apic, int pin) 941static int pin_2_irq(int idx, int apic, int pin)
946{ 942{
947 int irq; 943 int irq;
@@ -1189,7 +1185,7 @@ void __setup_vector_irq(int cpu)
1189 raw_spin_lock(&vector_lock); 1185 raw_spin_lock(&vector_lock);
1190 /* Mark the inuse vectors */ 1186 /* Mark the inuse vectors */
1191 for_each_active_irq(irq) { 1187 for_each_active_irq(irq) {
1192 cfg = get_irq_chip_data(irq); 1188 cfg = irq_get_chip_data(irq);
1193 if (!cfg) 1189 if (!cfg)
1194 continue; 1190 continue;
1195 /* 1191 /*
@@ -1220,10 +1216,6 @@ void __setup_vector_irq(int cpu)
1220static struct irq_chip ioapic_chip; 1216static struct irq_chip ioapic_chip;
1221static struct irq_chip ir_ioapic_chip; 1217static struct irq_chip ir_ioapic_chip;
1222 1218
1223#define IOAPIC_AUTO -1
1224#define IOAPIC_EDGE 0
1225#define IOAPIC_LEVEL 1
1226
1227#ifdef CONFIG_X86_32 1219#ifdef CONFIG_X86_32
1228static inline int IO_APIC_irq_trigger(int irq) 1220static inline int IO_APIC_irq_trigger(int irq)
1229{ 1221{
@@ -1248,35 +1240,31 @@ static inline int IO_APIC_irq_trigger(int irq)
1248} 1240}
1249#endif 1241#endif
1250 1242
1251static void ioapic_register_intr(unsigned int irq, unsigned long trigger) 1243static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1244 unsigned long trigger)
1252{ 1245{
1246 struct irq_chip *chip = &ioapic_chip;
1247 irq_flow_handler_t hdl;
1248 bool fasteoi;
1253 1249
1254 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1250 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1255 trigger == IOAPIC_LEVEL) 1251 trigger == IOAPIC_LEVEL) {
1256 irq_set_status_flags(irq, IRQ_LEVEL); 1252 irq_set_status_flags(irq, IRQ_LEVEL);
1257 else 1253 fasteoi = true;
1254 } else {
1258 irq_clear_status_flags(irq, IRQ_LEVEL); 1255 irq_clear_status_flags(irq, IRQ_LEVEL);
1256 fasteoi = false;
1257 }
1259 1258
1260 if (irq_remapped(get_irq_chip_data(irq))) { 1259 if (irq_remapped(cfg)) {
1261 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 1260 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1262 if (trigger) 1261 chip = &ir_ioapic_chip;
1263 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, 1262 fasteoi = trigger != 0;
1264 handle_fasteoi_irq,
1265 "fasteoi");
1266 else
1267 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1268 handle_edge_irq, "edge");
1269 return;
1270 } 1263 }
1271 1264
1272 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1265 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
1273 trigger == IOAPIC_LEVEL) 1266 irq_set_chip_and_handler_name(irq, chip, hdl,
1274 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1267 fasteoi ? "fasteoi" : "edge");
1275 handle_fasteoi_irq,
1276 "fasteoi");
1277 else
1278 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1279 handle_edge_irq, "edge");
1280} 1268}
1281 1269
1282static int setup_ioapic_entry(int apic_id, int irq, 1270static int setup_ioapic_entry(int apic_id, int irq,
@@ -1374,7 +1362,7 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
1374 return; 1362 return;
1375 } 1363 }
1376 1364
1377 ioapic_register_intr(irq, trigger); 1365 ioapic_register_intr(irq, cfg, trigger);
1378 if (irq < legacy_pic->nr_legacy_irqs) 1366 if (irq < legacy_pic->nr_legacy_irqs)
1379 legacy_pic->mask(irq); 1367 legacy_pic->mask(irq);
1380 1368
@@ -1385,33 +1373,26 @@ static struct {
1385 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); 1373 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
1386} mp_ioapic_routing[MAX_IO_APICS]; 1374} mp_ioapic_routing[MAX_IO_APICS];
1387 1375
1388static void __init setup_IO_APIC_irqs(void) 1376static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin)
1389{ 1377{
1390 int apic_id, pin, idx, irq, notcon = 0; 1378 if (idx != -1)
1391 int node = cpu_to_node(0); 1379 return false;
1392 struct irq_cfg *cfg;
1393 1380
1394 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1381 apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
1382 mp_ioapics[apic_id].apicid, pin);
1383 return true;
1384}
1385
1386static void __init __io_apic_setup_irqs(unsigned int apic_id)
1387{
1388 int idx, node = cpu_to_node(0);
1389 struct io_apic_irq_attr attr;
1390 unsigned int pin, irq;
1395 1391
1396 for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
1397 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { 1392 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
1398 idx = find_irq_entry(apic_id, pin, mp_INT); 1393 idx = find_irq_entry(apic_id, pin, mp_INT);
1399 if (idx == -1) { 1394 if (io_apic_pin_not_connected(idx, apic_id, pin))
1400 if (!notcon) {
1401 notcon = 1;
1402 apic_printk(APIC_VERBOSE,
1403 KERN_DEBUG " %d-%d",
1404 mp_ioapics[apic_id].apicid, pin);
1405 } else
1406 apic_printk(APIC_VERBOSE, " %d-%d",
1407 mp_ioapics[apic_id].apicid, pin);
1408 continue; 1395 continue;
1409 }
1410 if (notcon) {
1411 apic_printk(APIC_VERBOSE,
1412 " (apicid-pin) not connected\n");
1413 notcon = 0;
1414 }
1415 1396
1416 irq = pin_2_irq(idx, apic_id, pin); 1397 irq = pin_2_irq(idx, apic_id, pin);
1417 1398
@@ -1423,25 +1404,24 @@ static void __init setup_IO_APIC_irqs(void)
1423 * installed and if it returns 1: 1404 * installed and if it returns 1:
1424 */ 1405 */
1425 if (apic->multi_timer_check && 1406 if (apic->multi_timer_check &&
1426 apic->multi_timer_check(apic_id, irq)) 1407 apic->multi_timer_check(apic_id, irq))
1427 continue; 1408 continue;
1428 1409
1429 cfg = alloc_irq_and_cfg_at(irq, node); 1410 set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
1430 if (!cfg) 1411 irq_polarity(idx));
1431 continue;
1432 1412
1433 add_pin_to_irq_node(cfg, node, apic_id, pin); 1413 io_apic_setup_irq_pin(irq, node, &attr);
1434 /*
1435 * don't mark it in pin_programmed, so later acpi could
1436 * set it correctly when irq < 16
1437 */
1438 setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
1439 irq_polarity(idx));
1440 } 1414 }
1415}
1441 1416
1442 if (notcon) 1417static void __init setup_IO_APIC_irqs(void)
1443 apic_printk(APIC_VERBOSE, 1418{
1444 " (apicid-pin) not connected\n"); 1419 unsigned int apic_id;
1420
1421 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1422
1423 for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
1424 __io_apic_setup_irqs(apic_id);
1445} 1425}
1446 1426
1447/* 1427/*
@@ -1452,7 +1432,7 @@ static void __init setup_IO_APIC_irqs(void)
1452void setup_IO_APIC_irq_extra(u32 gsi) 1432void setup_IO_APIC_irq_extra(u32 gsi)
1453{ 1433{
1454 int apic_id = 0, pin, idx, irq, node = cpu_to_node(0); 1434 int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
1455 struct irq_cfg *cfg; 1435 struct io_apic_irq_attr attr;
1456 1436
1457 /* 1437 /*
1458 * Convert 'gsi' to 'ioapic.pin'. 1438 * Convert 'gsi' to 'ioapic.pin'.
@@ -1472,21 +1452,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)
1472 if (apic_id == 0 || irq < NR_IRQS_LEGACY) 1452 if (apic_id == 0 || irq < NR_IRQS_LEGACY)
1473 return; 1453 return;
1474 1454
1475 cfg = alloc_irq_and_cfg_at(irq, node); 1455 set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
1476 if (!cfg) 1456 irq_polarity(idx));
1477 return;
1478
1479 add_pin_to_irq_node(cfg, node, apic_id, pin);
1480
1481 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
1482 pr_debug("Pin %d-%d already programmed\n",
1483 mp_ioapics[apic_id].apicid, pin);
1484 return;
1485 }
1486 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
1487 1457
1488 setup_ioapic_irq(apic_id, pin, irq, cfg, 1458 io_apic_setup_irq_pin_once(irq, node, &attr);
1489 irq_trigger(idx), irq_polarity(idx));
1490} 1459}
1491 1460
1492/* 1461/*
@@ -1518,7 +1487,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1518 * The timer IRQ doesn't have to know that behind the 1487 * The timer IRQ doesn't have to know that behind the
1519 * scene we may have a 8259A-master in AEOI mode ... 1488 * scene we may have a 8259A-master in AEOI mode ...
1520 */ 1489 */
1521 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 1490 irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
1491 "edge");
1522 1492
1523 /* 1493 /*
1524 * Add it to the IO-APIC irq-routing table: 1494 * Add it to the IO-APIC irq-routing table:
@@ -1625,7 +1595,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1625 for_each_active_irq(irq) { 1595 for_each_active_irq(irq) {
1626 struct irq_pin_list *entry; 1596 struct irq_pin_list *entry;
1627 1597
1628 cfg = get_irq_chip_data(irq); 1598 cfg = irq_get_chip_data(irq);
1629 if (!cfg) 1599 if (!cfg)
1630 continue; 1600 continue;
1631 entry = cfg->irq_2_pin; 1601 entry = cfg->irq_2_pin;
@@ -2391,7 +2361,7 @@ static void irq_complete_move(struct irq_cfg *cfg)
2391 2361
2392void irq_force_complete_move(int irq) 2362void irq_force_complete_move(int irq)
2393{ 2363{
2394 struct irq_cfg *cfg = get_irq_chip_data(irq); 2364 struct irq_cfg *cfg = irq_get_chip_data(irq);
2395 2365
2396 if (!cfg) 2366 if (!cfg)
2397 return; 2367 return;
@@ -2405,7 +2375,7 @@ static inline void irq_complete_move(struct irq_cfg *cfg) { }
2405static void ack_apic_edge(struct irq_data *data) 2375static void ack_apic_edge(struct irq_data *data)
2406{ 2376{
2407 irq_complete_move(data->chip_data); 2377 irq_complete_move(data->chip_data);
2408 move_native_irq(data->irq); 2378 irq_move_irq(data);
2409 ack_APIC_irq(); 2379 ack_APIC_irq();
2410} 2380}
2411 2381
@@ -2462,7 +2432,7 @@ static void ack_apic_level(struct irq_data *data)
2462 irq_complete_move(cfg); 2432 irq_complete_move(cfg);
2463#ifdef CONFIG_GENERIC_PENDING_IRQ 2433#ifdef CONFIG_GENERIC_PENDING_IRQ
2464 /* If we are moving the irq we need to mask it */ 2434 /* If we are moving the irq we need to mask it */
2465 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { 2435 if (unlikely(irqd_is_setaffinity_pending(data))) {
2466 do_unmask_irq = 1; 2436 do_unmask_irq = 1;
2467 mask_ioapic(cfg); 2437 mask_ioapic(cfg);
2468 } 2438 }
@@ -2551,7 +2521,7 @@ static void ack_apic_level(struct irq_data *data)
2551 * and you can go talk to the chipset vendor about it. 2521 * and you can go talk to the chipset vendor about it.
2552 */ 2522 */
2553 if (!io_apic_level_ack_pending(cfg)) 2523 if (!io_apic_level_ack_pending(cfg))
2554 move_masked_irq(irq); 2524 irq_move_masked_irq(data);
2555 unmask_ioapic(cfg); 2525 unmask_ioapic(cfg);
2556 } 2526 }
2557} 2527}
@@ -2614,7 +2584,7 @@ static inline void init_IO_APIC_traps(void)
2614 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2584 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2615 */ 2585 */
2616 for_each_active_irq(irq) { 2586 for_each_active_irq(irq) {
2617 cfg = get_irq_chip_data(irq); 2587 cfg = irq_get_chip_data(irq);
2618 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { 2588 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2619 /* 2589 /*
2620 * Hmm.. We don't have an entry for this, 2590 * Hmm.. We don't have an entry for this,
@@ -2625,7 +2595,7 @@ static inline void init_IO_APIC_traps(void)
2625 legacy_pic->make_irq(irq); 2595 legacy_pic->make_irq(irq);
2626 else 2596 else
2627 /* Strange. Oh, well.. */ 2597 /* Strange. Oh, well.. */
2628 set_irq_chip(irq, &no_irq_chip); 2598 irq_set_chip(irq, &no_irq_chip);
2629 } 2599 }
2630 } 2600 }
2631} 2601}
@@ -2665,7 +2635,7 @@ static struct irq_chip lapic_chip __read_mostly = {
2665static void lapic_register_intr(int irq) 2635static void lapic_register_intr(int irq)
2666{ 2636{
2667 irq_clear_status_flags(irq, IRQ_LEVEL); 2637 irq_clear_status_flags(irq, IRQ_LEVEL);
2668 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2638 irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2669 "edge"); 2639 "edge");
2670} 2640}
2671 2641
@@ -2749,7 +2719,7 @@ int timer_through_8259 __initdata;
2749 */ 2719 */
2750static inline void __init check_timer(void) 2720static inline void __init check_timer(void)
2751{ 2721{
2752 struct irq_cfg *cfg = get_irq_chip_data(0); 2722 struct irq_cfg *cfg = irq_get_chip_data(0);
2753 int node = cpu_to_node(0); 2723 int node = cpu_to_node(0);
2754 int apic1, pin1, apic2, pin2; 2724 int apic1, pin1, apic2, pin2;
2755 unsigned long flags; 2725 unsigned long flags;
@@ -3060,7 +3030,7 @@ unsigned int create_irq_nr(unsigned int from, int node)
3060 raw_spin_unlock_irqrestore(&vector_lock, flags); 3030 raw_spin_unlock_irqrestore(&vector_lock, flags);
3061 3031
3062 if (ret) { 3032 if (ret) {
3063 set_irq_chip_data(irq, cfg); 3033 irq_set_chip_data(irq, cfg);
3064 irq_clear_status_flags(irq, IRQ_NOREQUEST); 3034 irq_clear_status_flags(irq, IRQ_NOREQUEST);
3065 } else { 3035 } else {
3066 free_irq_at(irq, cfg); 3036 free_irq_at(irq, cfg);
@@ -3085,7 +3055,7 @@ int create_irq(void)
3085 3055
3086void destroy_irq(unsigned int irq) 3056void destroy_irq(unsigned int irq)
3087{ 3057{
3088 struct irq_cfg *cfg = get_irq_chip_data(irq); 3058 struct irq_cfg *cfg = irq_get_chip_data(irq);
3089 unsigned long flags; 3059 unsigned long flags;
3090 3060
3091 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 3061 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
@@ -3119,7 +3089,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3119 3089
3120 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3090 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3121 3091
3122 if (irq_remapped(get_irq_chip_data(irq))) { 3092 if (irq_remapped(cfg)) {
3123 struct irte irte; 3093 struct irte irte;
3124 int ir_index; 3094 int ir_index;
3125 u16 sub_handle; 3095 u16 sub_handle;
@@ -3291,6 +3261,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3291 3261
3292static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3262static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3293{ 3263{
3264 struct irq_chip *chip = &msi_chip;
3294 struct msi_msg msg; 3265 struct msi_msg msg;
3295 int ret; 3266 int ret;
3296 3267
@@ -3298,14 +3269,15 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3298 if (ret < 0) 3269 if (ret < 0)
3299 return ret; 3270 return ret;
3300 3271
3301 set_irq_msi(irq, msidesc); 3272 irq_set_msi_desc(irq, msidesc);
3302 write_msi_msg(irq, &msg); 3273 write_msi_msg(irq, &msg);
3303 3274
3304 if (irq_remapped(get_irq_chip_data(irq))) { 3275 if (irq_remapped(irq_get_chip_data(irq))) {
3305 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3276 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3306 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); 3277 chip = &msi_ir_chip;
3307 } else 3278 }
3308 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 3279
3280 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3309 3281
3310 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); 3282 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
3311 3283
@@ -3423,8 +3395,8 @@ int arch_setup_dmar_msi(unsigned int irq)
3423 if (ret < 0) 3395 if (ret < 0)
3424 return ret; 3396 return ret;
3425 dmar_msi_write(irq, &msg); 3397 dmar_msi_write(irq, &msg);
3426 set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, 3398 irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
3427 "edge"); 3399 "edge");
3428 return 0; 3400 return 0;
3429} 3401}
3430#endif 3402#endif
@@ -3482,6 +3454,7 @@ static struct irq_chip hpet_msi_type = {
3482 3454
3483int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 3455int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3484{ 3456{
3457 struct irq_chip *chip = &hpet_msi_type;
3485 struct msi_msg msg; 3458 struct msi_msg msg;
3486 int ret; 3459 int ret;
3487 3460
@@ -3501,15 +3474,12 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3501 if (ret < 0) 3474 if (ret < 0)
3502 return ret; 3475 return ret;
3503 3476
3504 hpet_msi_write(get_irq_data(irq), &msg); 3477 hpet_msi_write(irq_get_handler_data(irq), &msg);
3505 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3478 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3506 if (irq_remapped(get_irq_chip_data(irq))) 3479 if (irq_remapped(irq_get_chip_data(irq)))
3507 set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, 3480 chip = &ir_hpet_msi_type;
3508 handle_edge_irq, "edge");
3509 else
3510 set_irq_chip_and_handler_name(irq, &hpet_msi_type,
3511 handle_edge_irq, "edge");
3512 3481
3482 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3513 return 0; 3483 return 0;
3514} 3484}
3515#endif 3485#endif
@@ -3596,7 +3566,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3596 3566
3597 write_ht_irq_msg(irq, &msg); 3567 write_ht_irq_msg(irq, &msg);
3598 3568
3599 set_irq_chip_and_handler_name(irq, &ht_irq_chip, 3569 irq_set_chip_and_handler_name(irq, &ht_irq_chip,
3600 handle_edge_irq, "edge"); 3570 handle_edge_irq, "edge");
3601 3571
3602 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); 3572 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
@@ -3605,7 +3575,40 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3605} 3575}
3606#endif /* CONFIG_HT_IRQ */ 3576#endif /* CONFIG_HT_IRQ */
3607 3577
3608int __init io_apic_get_redir_entries (int ioapic) 3578int
3579io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3580{
3581 struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
3582 int ret;
3583
3584 if (!cfg)
3585 return -EINVAL;
3586 ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
3587 if (!ret)
3588 setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg,
3589 attr->trigger, attr->polarity);
3590 return ret;
3591}
3592
3593static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
3594 struct io_apic_irq_attr *attr)
3595{
3596 unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
3597 int ret;
3598
3599 /* Avoid redundant programming */
3600 if (test_bit(pin, mp_ioapic_routing[id].pin_programmed)) {
3601 pr_debug("Pin %d-%d already programmed\n",
3602 mp_ioapics[id].apicid, pin);
3603 return 0;
3604 }
3605 ret = io_apic_setup_irq_pin(irq, node, attr);
3606 if (!ret)
3607 set_bit(pin, mp_ioapic_routing[id].pin_programmed);
3608 return ret;
3609}
3610
3611static int __init io_apic_get_redir_entries(int ioapic)
3609{ 3612{
3610 union IO_APIC_reg_01 reg_01; 3613 union IO_APIC_reg_01 reg_01;
3611 unsigned long flags; 3614 unsigned long flags;
@@ -3659,96 +3662,24 @@ int __init arch_probe_nr_irqs(void)
3659} 3662}
3660#endif 3663#endif
3661 3664
3662static int __io_apic_set_pci_routing(struct device *dev, int irq, 3665int io_apic_set_pci_routing(struct device *dev, int irq,
3663 struct io_apic_irq_attr *irq_attr) 3666 struct io_apic_irq_attr *irq_attr)
3664{ 3667{
3665 struct irq_cfg *cfg;
3666 int node; 3668 int node;
3667 int ioapic, pin;
3668 int trigger, polarity;
3669 3669
3670 ioapic = irq_attr->ioapic;
3671 if (!IO_APIC_IRQ(irq)) { 3670 if (!IO_APIC_IRQ(irq)) {
3672 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 3671 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3673 ioapic); 3672 irq_attr->ioapic);
3674 return -EINVAL; 3673 return -EINVAL;
3675 } 3674 }
3676 3675
3677 if (dev) 3676 node = dev ? dev_to_node(dev) : cpu_to_node(0);
3678 node = dev_to_node(dev);
3679 else
3680 node = cpu_to_node(0);
3681
3682 cfg = alloc_irq_and_cfg_at(irq, node);
3683 if (!cfg)
3684 return 0;
3685
3686 pin = irq_attr->ioapic_pin;
3687 trigger = irq_attr->trigger;
3688 polarity = irq_attr->polarity;
3689 3677
3690 /* 3678 return io_apic_setup_irq_pin_once(irq, node, irq_attr);
3691 * IRQs < 16 are already in the irq_2_pin[] map
3692 */
3693 if (irq >= legacy_pic->nr_legacy_irqs) {
3694 if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
3695 printk(KERN_INFO "can not add pin %d for irq %d\n",
3696 pin, irq);
3697 return 0;
3698 }
3699 }
3700
3701 setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
3702
3703 return 0;
3704} 3679}
3705 3680
3706int io_apic_set_pci_routing(struct device *dev, int irq,
3707 struct io_apic_irq_attr *irq_attr)
3708{
3709 int ioapic, pin;
3710 /*
3711 * Avoid pin reprogramming. PRTs typically include entries
3712 * with redundant pin->gsi mappings (but unique PCI devices);
3713 * we only program the IOAPIC on the first.
3714 */
3715 ioapic = irq_attr->ioapic;
3716 pin = irq_attr->ioapic_pin;
3717 if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
3718 pr_debug("Pin %d-%d already programmed\n",
3719 mp_ioapics[ioapic].apicid, pin);
3720 return 0;
3721 }
3722 set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
3723
3724 return __io_apic_set_pci_routing(dev, irq, irq_attr);
3725}
3726
3727u8 __init io_apic_unique_id(u8 id)
3728{
3729#ifdef CONFIG_X86_32 3681#ifdef CONFIG_X86_32
3730 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 3682static int __init io_apic_get_unique_id(int ioapic, int apic_id)
3731 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
3732 return io_apic_get_unique_id(nr_ioapics, id);
3733 else
3734 return id;
3735#else
3736 int i;
3737 DECLARE_BITMAP(used, 256);
3738
3739 bitmap_zero(used, 256);
3740 for (i = 0; i < nr_ioapics; i++) {
3741 struct mpc_ioapic *ia = &mp_ioapics[i];
3742 __set_bit(ia->apicid, used);
3743 }
3744 if (!test_bit(id, used))
3745 return id;
3746 return find_first_zero_bit(used, 256);
3747#endif
3748}
3749
3750#ifdef CONFIG_X86_32
3751int __init io_apic_get_unique_id(int ioapic, int apic_id)
3752{ 3683{
3753 union IO_APIC_reg_00 reg_00; 3684 union IO_APIC_reg_00 reg_00;
3754 static physid_mask_t apic_id_map = PHYSID_MASK_NONE; 3685 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -3821,9 +3752,33 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3821 3752
3822 return apic_id; 3753 return apic_id;
3823} 3754}
3755
3756static u8 __init io_apic_unique_id(u8 id)
3757{
3758 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
3759 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
3760 return io_apic_get_unique_id(nr_ioapics, id);
3761 else
3762 return id;
3763}
3764#else
3765static u8 __init io_apic_unique_id(u8 id)
3766{
3767 int i;
3768 DECLARE_BITMAP(used, 256);
3769
3770 bitmap_zero(used, 256);
3771 for (i = 0; i < nr_ioapics; i++) {
3772 struct mpc_ioapic *ia = &mp_ioapics[i];
3773 __set_bit(ia->apicid, used);
3774 }
3775 if (!test_bit(id, used))
3776 return id;
3777 return find_first_zero_bit(used, 256);
3778}
3824#endif 3779#endif
3825 3780
3826int __init io_apic_get_version(int ioapic) 3781static int __init io_apic_get_version(int ioapic)
3827{ 3782{
3828 union IO_APIC_reg_01 reg_01; 3783 union IO_APIC_reg_01 reg_01;
3829 unsigned long flags; 3784 unsigned long flags;
@@ -3868,8 +3823,8 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
3868void __init setup_ioapic_dest(void) 3823void __init setup_ioapic_dest(void)
3869{ 3824{
3870 int pin, ioapic, irq, irq_entry; 3825 int pin, ioapic, irq, irq_entry;
3871 struct irq_desc *desc;
3872 const struct cpumask *mask; 3826 const struct cpumask *mask;
3827 struct irq_data *idata;
3873 3828
3874 if (skip_ioapic_setup == 1) 3829 if (skip_ioapic_setup == 1)
3875 return; 3830 return;
@@ -3884,21 +3839,20 @@ void __init setup_ioapic_dest(void)
3884 if ((ioapic > 0) && (irq > 16)) 3839 if ((ioapic > 0) && (irq > 16))
3885 continue; 3840 continue;
3886 3841
3887 desc = irq_to_desc(irq); 3842 idata = irq_get_irq_data(irq);
3888 3843
3889 /* 3844 /*
3890 * Honour affinities which have been set in early boot 3845 * Honour affinities which have been set in early boot
3891 */ 3846 */
3892 if (desc->status & 3847 if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
3893 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 3848 mask = idata->affinity;
3894 mask = desc->irq_data.affinity;
3895 else 3849 else
3896 mask = apic->target_cpus(); 3850 mask = apic->target_cpus();
3897 3851
3898 if (intr_remapping_enabled) 3852 if (intr_remapping_enabled)
3899 ir_ioapic_set_affinity(&desc->irq_data, mask, false); 3853 ir_ioapic_set_affinity(idata, mask, false);
3900 else 3854 else
3901 ioapic_set_affinity(&desc->irq_data, mask, false); 3855 ioapic_set_affinity(idata, mask, false);
3902 } 3856 }
3903 3857
3904} 3858}
@@ -4026,7 +3980,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
4026 return gsi - mp_gsi_routing[ioapic].gsi_base; 3980 return gsi - mp_gsi_routing[ioapic].gsi_base;
4027} 3981}
4028 3982
4029static int bad_ioapic(unsigned long address) 3983static __init int bad_ioapic(unsigned long address)
4030{ 3984{
4031 if (nr_ioapics >= MAX_IO_APICS) { 3985 if (nr_ioapics >= MAX_IO_APICS) {
4032 printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " 3986 printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
@@ -4086,20 +4040,16 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4086/* Enable IOAPIC early just for system timer */ 4040/* Enable IOAPIC early just for system timer */
4087void __init pre_init_apic_IRQ0(void) 4041void __init pre_init_apic_IRQ0(void)
4088{ 4042{
4089 struct irq_cfg *cfg; 4043 struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
4090 4044
4091 printk(KERN_INFO "Early APIC setup for system timer0\n"); 4045 printk(KERN_INFO "Early APIC setup for system timer0\n");
4092#ifndef CONFIG_SMP 4046#ifndef CONFIG_SMP
4093 physid_set_mask_of_physid(boot_cpu_physical_apicid, 4047 physid_set_mask_of_physid(boot_cpu_physical_apicid,
4094 &phys_cpu_present_map); 4048 &phys_cpu_present_map);
4095#endif 4049#endif
4096 /* Make sure the irq descriptor is set up */
4097 cfg = alloc_irq_and_cfg_at(0, 0);
4098
4099 setup_local_APIC(); 4050 setup_local_APIC();
4100 4051
4101 add_pin_to_irq_node(cfg, 0, 0, 0); 4052 io_apic_setup_irq_pin(0, 0, &attr);
4102 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 4053 irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
4103 4054 "edge");
4104 setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
4105} 4055}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 08385e090a6f..cce91bf26676 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -56,6 +56,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
56 local_irq_restore(flags); 56 local_irq_restore(flags);
57} 57}
58 58
59#ifdef CONFIG_X86_32
60
59void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, 61void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
60 int vector) 62 int vector)
61{ 63{
@@ -71,8 +73,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
71 local_irq_save(flags); 73 local_irq_save(flags);
72 for_each_cpu(query_cpu, mask) 74 for_each_cpu(query_cpu, mask)
73 __default_send_IPI_dest_field( 75 __default_send_IPI_dest_field(
74 apic->cpu_to_logical_apicid(query_cpu), vector, 76 early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
75 apic->dest_logical); 77 vector, apic->dest_logical);
76 local_irq_restore(flags); 78 local_irq_restore(flags);
77} 79}
78 80
@@ -90,14 +92,12 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
90 if (query_cpu == this_cpu) 92 if (query_cpu == this_cpu)
91 continue; 93 continue;
92 __default_send_IPI_dest_field( 94 __default_send_IPI_dest_field(
93 apic->cpu_to_logical_apicid(query_cpu), vector, 95 early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
94 apic->dest_logical); 96 vector, apic->dest_logical);
95 } 97 }
96 local_irq_restore(flags); 98 local_irq_restore(flags);
97} 99}
98 100
99#ifdef CONFIG_X86_32
100
101/* 101/*
102 * This is only used on smaller machines. 102 * This is only used on smaller machines.
103 */ 103 */
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 960f26ab5c9f..6273eee5134b 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -373,13 +373,6 @@ static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask
373 return physids_promote(0xFUL, retmap); 373 return physids_promote(0xFUL, retmap);
374} 374}
375 375
376static inline int numaq_cpu_to_logical_apicid(int cpu)
377{
378 if (cpu >= nr_cpu_ids)
379 return BAD_APICID;
380 return cpu_2_logical_apicid[cpu];
381}
382
383/* 376/*
384 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent 377 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
385 * cpu to APIC ID relation to properly interact with the intelligent 378 * cpu to APIC ID relation to properly interact with the intelligent
@@ -398,6 +391,15 @@ static inline int numaq_apicid_to_node(int logical_apicid)
398 return logical_apicid >> 4; 391 return logical_apicid >> 4;
399} 392}
400 393
394static int numaq_numa_cpu_node(int cpu)
395{
396 int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
397
398 if (logical_apicid != BAD_APICID)
399 return numaq_apicid_to_node(logical_apicid);
400 return NUMA_NO_NODE;
401}
402
401static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) 403static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
402{ 404{
403 int node = numaq_apicid_to_node(logical_apicid); 405 int node = numaq_apicid_to_node(logical_apicid);
@@ -508,8 +510,6 @@ struct apic __refdata apic_numaq = {
508 .ioapic_phys_id_map = numaq_ioapic_phys_id_map, 510 .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
509 .setup_apic_routing = numaq_setup_apic_routing, 511 .setup_apic_routing = numaq_setup_apic_routing,
510 .multi_timer_check = numaq_multi_timer_check, 512 .multi_timer_check = numaq_multi_timer_check,
511 .apicid_to_node = numaq_apicid_to_node,
512 .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
513 .cpu_present_to_apicid = numaq_cpu_present_to_apicid, 513 .cpu_present_to_apicid = numaq_cpu_present_to_apicid,
514 .apicid_to_cpu_present = numaq_apicid_to_cpu_present, 514 .apicid_to_cpu_present = numaq_apicid_to_cpu_present,
515 .setup_portio_remap = numaq_setup_portio_remap, 515 .setup_portio_remap = numaq_setup_portio_remap,
@@ -547,4 +547,7 @@ struct apic __refdata apic_numaq = {
547 .icr_write = native_apic_icr_write, 547 .icr_write = native_apic_icr_write,
548 .wait_icr_idle = native_apic_wait_icr_idle, 548 .wait_icr_idle = native_apic_wait_icr_idle,
549 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 549 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
550
551 .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
552 .x86_32_numa_cpu_node = numaq_numa_cpu_node,
550}; 553};
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 99d2fe016084..fc84c7b61108 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -77,6 +77,11 @@ void __init default_setup_apic_routing(void)
77 apic->setup_apic_routing(); 77 apic->setup_apic_routing();
78} 78}
79 79
80static int default_x86_32_early_logical_apicid(int cpu)
81{
82 return 1 << cpu;
83}
84
80static void setup_apic_flat_routing(void) 85static void setup_apic_flat_routing(void)
81{ 86{
82#ifdef CONFIG_X86_IO_APIC 87#ifdef CONFIG_X86_IO_APIC
@@ -130,8 +135,6 @@ struct apic apic_default = {
130 .ioapic_phys_id_map = default_ioapic_phys_id_map, 135 .ioapic_phys_id_map = default_ioapic_phys_id_map,
131 .setup_apic_routing = setup_apic_flat_routing, 136 .setup_apic_routing = setup_apic_flat_routing,
132 .multi_timer_check = NULL, 137 .multi_timer_check = NULL,
133 .apicid_to_node = default_apicid_to_node,
134 .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
135 .cpu_present_to_apicid = default_cpu_present_to_apicid, 138 .cpu_present_to_apicid = default_cpu_present_to_apicid,
136 .apicid_to_cpu_present = physid_set_mask_of_physid, 139 .apicid_to_cpu_present = physid_set_mask_of_physid,
137 .setup_portio_remap = NULL, 140 .setup_portio_remap = NULL,
@@ -167,6 +170,9 @@ struct apic apic_default = {
167 .icr_write = native_apic_icr_write, 170 .icr_write = native_apic_icr_write,
168 .wait_icr_idle = native_apic_wait_icr_idle, 171 .wait_icr_idle = native_apic_wait_icr_idle,
169 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 172 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
173
174 .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
175 .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
170}; 176};
171 177
172extern struct apic apic_numaq; 178extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9b419263d90d..e4b8059b414a 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -194,11 +194,10 @@ static unsigned long summit_check_apicid_present(int bit)
194 return 1; 194 return 1;
195} 195}
196 196
197static void summit_init_apic_ldr(void) 197static int summit_early_logical_apicid(int cpu)
198{ 198{
199 unsigned long val, id;
200 int count = 0; 199 int count = 0;
201 u8 my_id = (u8)hard_smp_processor_id(); 200 u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
202 u8 my_cluster = APIC_CLUSTER(my_id); 201 u8 my_cluster = APIC_CLUSTER(my_id);
203#ifdef CONFIG_SMP 202#ifdef CONFIG_SMP
204 u8 lid; 203 u8 lid;
@@ -206,7 +205,7 @@ static void summit_init_apic_ldr(void)
206 205
207 /* Create logical APIC IDs by counting CPUs already in cluster. */ 206 /* Create logical APIC IDs by counting CPUs already in cluster. */
208 for (count = 0, i = nr_cpu_ids; --i >= 0; ) { 207 for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
209 lid = cpu_2_logical_apicid[i]; 208 lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
210 if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) 209 if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
211 ++count; 210 ++count;
212 } 211 }
@@ -214,7 +213,15 @@ static void summit_init_apic_ldr(void)
214 /* We only have a 4 wide bitmap in cluster mode. If a deranged 213 /* We only have a 4 wide bitmap in cluster mode. If a deranged
215 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ 214 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
216 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); 215 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
217 id = my_cluster | (1UL << count); 216 return my_cluster | (1UL << count);
217}
218
219static void summit_init_apic_ldr(void)
220{
221 int cpu = smp_processor_id();
222 unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
223 unsigned long val;
224
218 apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); 225 apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
219 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; 226 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
220 val |= SET_APIC_LOGICAL_ID(id); 227 val |= SET_APIC_LOGICAL_ID(id);
@@ -232,27 +239,6 @@ static void summit_setup_apic_routing(void)
232 nr_ioapics); 239 nr_ioapics);
233} 240}
234 241
235static int summit_apicid_to_node(int logical_apicid)
236{
237#ifdef CONFIG_SMP
238 return apicid_2_node[hard_smp_processor_id()];
239#else
240 return 0;
241#endif
242}
243
244/* Mapping from cpu number to logical apicid */
245static inline int summit_cpu_to_logical_apicid(int cpu)
246{
247#ifdef CONFIG_SMP
248 if (cpu >= nr_cpu_ids)
249 return BAD_APICID;
250 return cpu_2_logical_apicid[cpu];
251#else
252 return logical_smp_processor_id();
253#endif
254}
255
256static int summit_cpu_present_to_apicid(int mps_cpu) 242static int summit_cpu_present_to_apicid(int mps_cpu)
257{ 243{
258 if (mps_cpu < nr_cpu_ids) 244 if (mps_cpu < nr_cpu_ids)
@@ -286,7 +272,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
286 * The cpus in the mask must all be on the apic cluster. 272 * The cpus in the mask must all be on the apic cluster.
287 */ 273 */
288 for_each_cpu(cpu, cpumask) { 274 for_each_cpu(cpu, cpumask) {
289 int new_apicid = summit_cpu_to_logical_apicid(cpu); 275 int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
290 276
291 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { 277 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
292 printk("%s: Not a valid mask!\n", __func__); 278 printk("%s: Not a valid mask!\n", __func__);
@@ -301,7 +287,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
301static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, 287static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
302 const struct cpumask *andmask) 288 const struct cpumask *andmask)
303{ 289{
304 int apicid = summit_cpu_to_logical_apicid(0); 290 int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
305 cpumask_var_t cpumask; 291 cpumask_var_t cpumask;
306 292
307 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) 293 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -528,8 +514,6 @@ struct apic apic_summit = {
528 .ioapic_phys_id_map = summit_ioapic_phys_id_map, 514 .ioapic_phys_id_map = summit_ioapic_phys_id_map,
529 .setup_apic_routing = summit_setup_apic_routing, 515 .setup_apic_routing = summit_setup_apic_routing,
530 .multi_timer_check = NULL, 516 .multi_timer_check = NULL,
531 .apicid_to_node = summit_apicid_to_node,
532 .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
533 .cpu_present_to_apicid = summit_cpu_present_to_apicid, 517 .cpu_present_to_apicid = summit_cpu_present_to_apicid,
534 .apicid_to_cpu_present = summit_apicid_to_cpu_present, 518 .apicid_to_cpu_present = summit_apicid_to_cpu_present,
535 .setup_portio_remap = NULL, 519 .setup_portio_remap = NULL,
@@ -565,4 +549,7 @@ struct apic apic_summit = {
565 .icr_write = native_apic_icr_write, 549 .icr_write = native_apic_icr_write,
566 .wait_icr_idle = native_apic_wait_icr_idle, 550 .wait_icr_idle = native_apic_wait_icr_idle,
567 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 551 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
552
553 .x86_32_early_logical_apicid = summit_early_logical_apicid,
554 .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
568}; 555};
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cf69c59f4910..90949bbd566d 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -206,8 +206,6 @@ struct apic apic_x2apic_cluster = {
206 .ioapic_phys_id_map = NULL, 206 .ioapic_phys_id_map = NULL,
207 .setup_apic_routing = NULL, 207 .setup_apic_routing = NULL,
208 .multi_timer_check = NULL, 208 .multi_timer_check = NULL,
209 .apicid_to_node = NULL,
210 .cpu_to_logical_apicid = NULL,
211 .cpu_present_to_apicid = default_cpu_present_to_apicid, 209 .cpu_present_to_apicid = default_cpu_present_to_apicid,
212 .apicid_to_cpu_present = NULL, 210 .apicid_to_cpu_present = NULL,
213 .setup_portio_remap = NULL, 211 .setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8972f38c5ced..c7e6d6645bf4 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -195,8 +195,6 @@ struct apic apic_x2apic_phys = {
195 .ioapic_phys_id_map = NULL, 195 .ioapic_phys_id_map = NULL,
196 .setup_apic_routing = NULL, 196 .setup_apic_routing = NULL,
197 .multi_timer_check = NULL, 197 .multi_timer_check = NULL,
198 .apicid_to_node = NULL,
199 .cpu_to_logical_apicid = NULL,
200 .cpu_present_to_apicid = default_cpu_present_to_apicid, 198 .cpu_present_to_apicid = default_cpu_present_to_apicid,
201 .apicid_to_cpu_present = NULL, 199 .apicid_to_cpu_present = NULL,
202 .setup_portio_remap = NULL, 200 .setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index bd16b58b8850..3c289281394c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -338,8 +338,6 @@ struct apic __refdata apic_x2apic_uv_x = {
338 .ioapic_phys_id_map = NULL, 338 .ioapic_phys_id_map = NULL,
339 .setup_apic_routing = NULL, 339 .setup_apic_routing = NULL,
340 .multi_timer_check = NULL, 340 .multi_timer_check = NULL,
341 .apicid_to_node = NULL,
342 .cpu_to_logical_apicid = NULL,
343 .cpu_present_to_apicid = default_cpu_present_to_apicid, 341 .cpu_present_to_apicid = default_cpu_present_to_apicid,
344 .apicid_to_cpu_present = NULL, 342 .apicid_to_cpu_present = NULL,
345 .setup_portio_remap = NULL, 343 .setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index b929108eb58f..9079926a5b18 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -227,6 +227,7 @@
227#include <linux/suspend.h> 227#include <linux/suspend.h>
228#include <linux/kthread.h> 228#include <linux/kthread.h>
229#include <linux/jiffies.h> 229#include <linux/jiffies.h>
230#include <linux/acpi.h>
230 231
231#include <asm/system.h> 232#include <asm/system.h>
232#include <asm/uaccess.h> 233#include <asm/uaccess.h>
@@ -2321,12 +2322,11 @@ static int __init apm_init(void)
2321 apm_info.disabled = 1; 2322 apm_info.disabled = 1;
2322 return -ENODEV; 2323 return -ENODEV;
2323 } 2324 }
2324 if (pm_flags & PM_ACPI) { 2325 if (!acpi_disabled) {
2325 printk(KERN_NOTICE "apm: overridden by ACPI.\n"); 2326 printk(KERN_NOTICE "apm: overridden by ACPI.\n");
2326 apm_info.disabled = 1; 2327 apm_info.disabled = 1;
2327 return -ENODEV; 2328 return -ENODEV;
2328 } 2329 }
2329 pm_flags |= PM_APM;
2330 2330
2331 /* 2331 /*
2332 * Set up the long jump entry point to the APM BIOS, which is called 2332 * Set up the long jump entry point to the APM BIOS, which is called
@@ -2418,7 +2418,6 @@ static void __exit apm_exit(void)
2418 kthread_stop(kapmd_task); 2418 kthread_stop(kapmd_task);
2419 kapmd_task = NULL; 2419 kapmd_task = NULL;
2420 } 2420 }
2421 pm_flags &= ~PM_APM;
2422} 2421}
2423 2422
2424module_init(apm_init); 2423module_init(apm_init);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index cfa82c899f47..4f13fafc5264 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -1,5 +1,70 @@
1/*
2 * Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed to extract
4 * and format the required data.
5 */
6#define COMPILE_OFFSETS
7
8#include <linux/crypto.h>
9#include <linux/sched.h>
10#include <linux/stddef.h>
11#include <linux/hardirq.h>
12#include <linux/suspend.h>
13#include <linux/kbuild.h>
14#include <asm/processor.h>
15#include <asm/thread_info.h>
16#include <asm/sigframe.h>
17#include <asm/bootparam.h>
18#include <asm/suspend.h>
19
20#ifdef CONFIG_XEN
21#include <xen/interface/xen.h>
22#endif
23
1#ifdef CONFIG_X86_32 24#ifdef CONFIG_X86_32
2# include "asm-offsets_32.c" 25# include "asm-offsets_32.c"
3#else 26#else
4# include "asm-offsets_64.c" 27# include "asm-offsets_64.c"
5#endif 28#endif
29
30void common(void) {
31 BLANK();
32 OFFSET(TI_flags, thread_info, flags);
33 OFFSET(TI_status, thread_info, status);
34 OFFSET(TI_addr_limit, thread_info, addr_limit);
35 OFFSET(TI_preempt_count, thread_info, preempt_count);
36
37 BLANK();
38 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
39
40 BLANK();
41 OFFSET(pbe_address, pbe, address);
42 OFFSET(pbe_orig_address, pbe, orig_address);
43 OFFSET(pbe_next, pbe, next);
44
45#ifdef CONFIG_PARAVIRT
46 BLANK();
47 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
48 OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
49 OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
50 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
51 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
52 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
53 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
54 OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
55 OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
56#endif
57
58#ifdef CONFIG_XEN
59 BLANK();
60 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
61 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
62#endif
63
64 BLANK();
65 OFFSET(BP_scratch, boot_params, scratch);
66 OFFSET(BP_loadflags, boot_params, hdr.loadflags);
67 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
68 OFFSET(BP_version, boot_params, hdr.version);
69 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
70}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37a..c29d631af6fc 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -1,26 +1,4 @@
1/*
2 * Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed
4 * to extract and format the required data.
5 */
6
7#include <linux/crypto.h>
8#include <linux/sched.h>
9#include <linux/signal.h>
10#include <linux/personality.h>
11#include <linux/suspend.h>
12#include <linux/kbuild.h>
13#include <asm/ucontext.h> 1#include <asm/ucontext.h>
14#include <asm/sigframe.h>
15#include <asm/pgtable.h>
16#include <asm/fixmap.h>
17#include <asm/processor.h>
18#include <asm/thread_info.h>
19#include <asm/bootparam.h>
20#include <asm/elf.h>
21#include <asm/suspend.h>
22
23#include <xen/interface/xen.h>
24 2
25#include <linux/lguest.h> 3#include <linux/lguest.h>
26#include "../../../drivers/lguest/lg.h" 4#include "../../../drivers/lguest/lg.h"
@@ -51,21 +29,10 @@ void foo(void)
51 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); 29 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
52 BLANK(); 30 BLANK();
53 31
54 OFFSET(TI_task, thread_info, task);
55 OFFSET(TI_exec_domain, thread_info, exec_domain);
56 OFFSET(TI_flags, thread_info, flags);
57 OFFSET(TI_status, thread_info, status);
58 OFFSET(TI_preempt_count, thread_info, preempt_count);
59 OFFSET(TI_addr_limit, thread_info, addr_limit);
60 OFFSET(TI_restart_block, thread_info, restart_block);
61 OFFSET(TI_sysenter_return, thread_info, sysenter_return); 32 OFFSET(TI_sysenter_return, thread_info, sysenter_return);
62 OFFSET(TI_cpu, thread_info, cpu); 33 OFFSET(TI_cpu, thread_info, cpu);
63 BLANK(); 34 BLANK();
64 35
65 OFFSET(GDS_size, desc_ptr, size);
66 OFFSET(GDS_address, desc_ptr, address);
67 BLANK();
68
69 OFFSET(PT_EBX, pt_regs, bx); 36 OFFSET(PT_EBX, pt_regs, bx);
70 OFFSET(PT_ECX, pt_regs, cx); 37 OFFSET(PT_ECX, pt_regs, cx);
71 OFFSET(PT_EDX, pt_regs, dx); 38 OFFSET(PT_EDX, pt_regs, dx);
@@ -85,42 +52,13 @@ void foo(void)
85 OFFSET(PT_OLDSS, pt_regs, ss); 52 OFFSET(PT_OLDSS, pt_regs, ss);
86 BLANK(); 53 BLANK();
87 54
88 OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
89 OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); 55 OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
90 BLANK(); 56 BLANK();
91 57
92 OFFSET(pbe_address, pbe, address);
93 OFFSET(pbe_orig_address, pbe, orig_address);
94 OFFSET(pbe_next, pbe, next);
95
96 /* Offset from the sysenter stack to tss.sp0 */ 58 /* Offset from the sysenter stack to tss.sp0 */
97 DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - 59 DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
98 sizeof(struct tss_struct)); 60 sizeof(struct tss_struct));
99 61
100 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
101 DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
102 DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
103
104 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
105
106#ifdef CONFIG_PARAVIRT
107 BLANK();
108 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
109 OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
110 OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
111 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
112 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
113 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
114 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
115 OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
116#endif
117
118#ifdef CONFIG_XEN
119 BLANK();
120 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
121 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
122#endif
123
124#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) 62#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
125 BLANK(); 63 BLANK();
126 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); 64 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
@@ -139,11 +77,4 @@ void foo(void)
139 OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); 77 OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
140 OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); 78 OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
141#endif 79#endif
142
143 BLANK();
144 OFFSET(BP_scratch, boot_params, scratch);
145 OFFSET(BP_loadflags, boot_params, hdr.loadflags);
146 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
147 OFFSET(BP_version, boot_params, hdr.version);
148 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
149} 80}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd965..e72a1194af22 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,27 +1,4 @@
1/*
2 * Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed to extract
4 * and format the required data.
5 */
6#define COMPILE_OFFSETS
7
8#include <linux/crypto.h>
9#include <linux/sched.h>
10#include <linux/stddef.h>
11#include <linux/errno.h>
12#include <linux/hardirq.h>
13#include <linux/suspend.h>
14#include <linux/kbuild.h>
15#include <asm/processor.h>
16#include <asm/segment.h>
17#include <asm/thread_info.h>
18#include <asm/ia32.h> 1#include <asm/ia32.h>
19#include <asm/bootparam.h>
20#include <asm/suspend.h>
21
22#include <xen/interface/xen.h>
23
24#include <asm/sigframe.h>
25 2
26#define __NO_STUBS 1 3#define __NO_STUBS 1
27#undef __SYSCALL 4#undef __SYSCALL
@@ -33,41 +10,19 @@ static char syscalls[] = {
33 10
34int main(void) 11int main(void)
35{ 12{
36#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
37 ENTRY(state);
38 ENTRY(flags);
39 ENTRY(pid);
40 BLANK();
41#undef ENTRY
42#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
43 ENTRY(flags);
44 ENTRY(addr_limit);
45 ENTRY(preempt_count);
46 ENTRY(status);
47#ifdef CONFIG_IA32_EMULATION
48 ENTRY(sysenter_return);
49#endif
50 BLANK();
51#undef ENTRY
52#ifdef CONFIG_PARAVIRT 13#ifdef CONFIG_PARAVIRT
53 BLANK();
54 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
55 OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
56 OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
57 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
58 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
59 OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); 14 OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
60 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
61 OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); 15 OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
62 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); 16 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
63 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
64 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 17 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
65 OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); 18 BLANK();
66#endif 19#endif
67 20
68
69#ifdef CONFIG_IA32_EMULATION 21#ifdef CONFIG_IA32_EMULATION
70#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry)) 22 OFFSET(TI_sysenter_return, thread_info, sysenter_return);
23 BLANK();
24
25#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
71 ENTRY(ax); 26 ENTRY(ax);
72 ENTRY(bx); 27 ENTRY(bx);
73 ENTRY(cx); 28 ENTRY(cx);
@@ -79,15 +34,12 @@ int main(void)
79 ENTRY(ip); 34 ENTRY(ip);
80 BLANK(); 35 BLANK();
81#undef ENTRY 36#undef ENTRY
82 DEFINE(IA32_RT_SIGFRAME_sigcontext, 37
83 offsetof (struct rt_sigframe_ia32, uc.uc_mcontext)); 38 OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
84 BLANK(); 39 BLANK();
85#endif 40#endif
86 DEFINE(pbe_address, offsetof(struct pbe, address)); 41
87 DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); 42#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
88 DEFINE(pbe_next, offsetof(struct pbe, next));
89 BLANK();
90#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
91 ENTRY(bx); 43 ENTRY(bx);
92 ENTRY(bx); 44 ENTRY(bx);
93 ENTRY(cx); 45 ENTRY(cx);
@@ -107,7 +59,8 @@ int main(void)
107 ENTRY(flags); 59 ENTRY(flags);
108 BLANK(); 60 BLANK();
109#undef ENTRY 61#undef ENTRY
110#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry)) 62
63#define ENTRY(entry) OFFSET(saved_context_ ## entry, saved_context, entry)
111 ENTRY(cr0); 64 ENTRY(cr0);
112 ENTRY(cr2); 65 ENTRY(cr2);
113 ENTRY(cr3); 66 ENTRY(cr3);
@@ -115,26 +68,11 @@ int main(void)
115 ENTRY(cr8); 68 ENTRY(cr8);
116 BLANK(); 69 BLANK();
117#undef ENTRY 70#undef ENTRY
118 DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist));
119 BLANK();
120 DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
121 BLANK();
122 DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
123 71
72 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
124 BLANK(); 73 BLANK();
125 OFFSET(BP_scratch, boot_params, scratch);
126 OFFSET(BP_loadflags, boot_params, hdr.loadflags);
127 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
128 OFFSET(BP_version, boot_params, hdr.version);
129 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
130 74
131 BLANK(); 75 DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
132 DEFINE(PAGE_SIZE_asm, PAGE_SIZE); 76
133#ifdef CONFIG_XEN
134 BLANK();
135 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
136 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
137#undef ENTRY
138#endif
139 return 0; 77 return 0;
140} 78}
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 13a389179514..452932d34730 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -106,8 +106,8 @@ void __init setup_bios_corruption_check(void)
106 addr += size; 106 addr += size;
107 } 107 }
108 108
109 printk(KERN_INFO "Scanning %d areas for low memory corruption\n", 109 if (num_scan_areas)
110 num_scan_areas); 110 printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas);
111} 111}
112 112
113 113
@@ -143,12 +143,12 @@ static void check_corruption(struct work_struct *dummy)
143{ 143{
144 check_for_bios_corruption(); 144 check_for_bios_corruption();
145 schedule_delayed_work(&bios_check_work, 145 schedule_delayed_work(&bios_check_work,
146 round_jiffies_relative(corruption_check_period*HZ)); 146 round_jiffies_relative(corruption_check_period*HZ));
147} 147}
148 148
149static int start_periodic_check_for_corruption(void) 149static int start_periodic_check_for_corruption(void)
150{ 150{
151 if (!memory_corruption_check || corruption_check_period == 0) 151 if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0)
152 return 0; 152 return 0;
153 153
154 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", 154 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c7bedb83c5a..f771ab6b49e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -233,18 +233,22 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
233} 233}
234#endif 234#endif
235 235
236#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 236#ifdef CONFIG_NUMA
237/*
238 * To workaround broken NUMA config. Read the comment in
239 * srat_detect_node().
240 */
237static int __cpuinit nearby_node(int apicid) 241static int __cpuinit nearby_node(int apicid)
238{ 242{
239 int i, node; 243 int i, node;
240 244
241 for (i = apicid - 1; i >= 0; i--) { 245 for (i = apicid - 1; i >= 0; i--) {
242 node = apicid_to_node[i]; 246 node = __apicid_to_node[i];
243 if (node != NUMA_NO_NODE && node_online(node)) 247 if (node != NUMA_NO_NODE && node_online(node))
244 return node; 248 return node;
245 } 249 }
246 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { 250 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
247 node = apicid_to_node[i]; 251 node = __apicid_to_node[i];
248 if (node != NUMA_NO_NODE && node_online(node)) 252 if (node != NUMA_NO_NODE && node_online(node))
249 return node; 253 return node;
250 } 254 }
@@ -261,7 +265,7 @@ static int __cpuinit nearby_node(int apicid)
261#ifdef CONFIG_X86_HT 265#ifdef CONFIG_X86_HT
262static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) 266static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
263{ 267{
264 u32 nodes; 268 u32 nodes, cores_per_cu = 1;
265 u8 node_id; 269 u8 node_id;
266 int cpu = smp_processor_id(); 270 int cpu = smp_processor_id();
267 271
@@ -276,6 +280,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
276 /* get compute unit information */ 280 /* get compute unit information */
277 smp_num_siblings = ((ebx >> 8) & 3) + 1; 281 smp_num_siblings = ((ebx >> 8) & 3) + 1;
278 c->compute_unit_id = ebx & 0xff; 282 c->compute_unit_id = ebx & 0xff;
283 cores_per_cu += ((ebx >> 8) & 3);
279 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { 284 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
280 u64 value; 285 u64 value;
281 286
@@ -288,15 +293,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
288 /* fixup multi-node processor information */ 293 /* fixup multi-node processor information */
289 if (nodes > 1) { 294 if (nodes > 1) {
290 u32 cores_per_node; 295 u32 cores_per_node;
296 u32 cus_per_node;
291 297
292 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 298 set_cpu_cap(c, X86_FEATURE_AMD_DCM);
293 cores_per_node = c->x86_max_cores / nodes; 299 cores_per_node = c->x86_max_cores / nodes;
300 cus_per_node = cores_per_node / cores_per_cu;
294 301
295 /* store NodeID, use llc_shared_map to store sibling info */ 302 /* store NodeID, use llc_shared_map to store sibling info */
296 per_cpu(cpu_llc_id, cpu) = node_id; 303 per_cpu(cpu_llc_id, cpu) = node_id;
297 304
298 /* core id to be in range from 0 to (cores_per_node - 1) */ 305 /* core id has to be in the [0 .. cores_per_node - 1] range */
299 c->cpu_core_id = c->cpu_core_id % cores_per_node; 306 c->cpu_core_id %= cores_per_node;
307 c->compute_unit_id %= cus_per_node;
300 } 308 }
301} 309}
302#endif 310#endif
@@ -334,31 +342,40 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id);
334 342
335static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 343static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
336{ 344{
337#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 345#ifdef CONFIG_NUMA
338 int cpu = smp_processor_id(); 346 int cpu = smp_processor_id();
339 int node; 347 int node;
340 unsigned apicid = c->apicid; 348 unsigned apicid = c->apicid;
341 349
342 node = per_cpu(cpu_llc_id, cpu); 350 node = numa_cpu_node(cpu);
351 if (node == NUMA_NO_NODE)
352 node = per_cpu(cpu_llc_id, cpu);
343 353
344 if (apicid_to_node[apicid] != NUMA_NO_NODE)
345 node = apicid_to_node[apicid];
346 if (!node_online(node)) { 354 if (!node_online(node)) {
347 /* Two possibilities here: 355 /*
348 - The CPU is missing memory and no node was created. 356 * Two possibilities here:
349 In that case try picking one from a nearby CPU 357 *
350 - The APIC IDs differ from the HyperTransport node IDs 358 * - The CPU is missing memory and no node was created. In
351 which the K8 northbridge parsing fills in. 359 * that case try picking one from a nearby CPU.
352 Assume they are all increased by a constant offset, 360 *
353 but in the same order as the HT nodeids. 361 * - The APIC IDs differ from the HyperTransport node IDs
354 If that doesn't result in a usable node fall back to the 362 * which the K8 northbridge parsing fills in. Assume
355 path for the previous case. */ 363 * they are all increased by a constant offset, but in
356 364 * the same order as the HT nodeids. If that doesn't
365 * result in a usable node fall back to the path for the
366 * previous case.
367 *
368 * This workaround operates directly on the mapping between
369 * APIC ID and NUMA node, assuming certain relationship
370 * between APIC ID, HT node ID and NUMA topology. As going
371 * through CPU mapping may alter the outcome, directly
372 * access __apicid_to_node[].
373 */
357 int ht_nodeid = c->initial_apicid; 374 int ht_nodeid = c->initial_apicid;
358 375
359 if (ht_nodeid >= 0 && 376 if (ht_nodeid >= 0 &&
360 apicid_to_node[ht_nodeid] != NUMA_NO_NODE) 377 __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
361 node = apicid_to_node[ht_nodeid]; 378 node = __apicid_to_node[ht_nodeid];
362 /* Pick a nearby node */ 379 /* Pick a nearby node */
363 if (!node_online(node)) 380 if (!node_online(node))
364 node = nearby_node(apicid); 381 node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396bd..e2ced0074a45 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -675,7 +675,7 @@ void __init early_cpu_init(void)
675 const struct cpu_dev *const *cdev; 675 const struct cpu_dev *const *cdev;
676 int count = 0; 676 int count = 0;
677 677
678#ifdef PROCESSOR_SELECT 678#ifdef CONFIG_PROCESSOR_SELECT
679 printk(KERN_INFO "KERNEL supported cpus:\n"); 679 printk(KERN_INFO "KERNEL supported cpus:\n");
680#endif 680#endif
681 681
@@ -687,7 +687,7 @@ void __init early_cpu_init(void)
687 cpu_devs[count] = cpudev; 687 cpu_devs[count] = cpudev;
688 count++; 688 count++;
689 689
690#ifdef PROCESSOR_SELECT 690#ifdef CONFIG_PROCESSOR_SELECT
691 { 691 {
692 unsigned int j; 692 unsigned int j;
693 693
@@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
869 869
870 select_idle_routine(c); 870 select_idle_routine(c);
871 871
872#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 872#ifdef CONFIG_NUMA
873 numa_add_cpu(smp_processor_id()); 873 numa_add_cpu(smp_processor_id());
874#endif 874#endif
875} 875}
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index bd1cac747f67..52c93648e492 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -158,9 +158,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
158{ 158{
159 if (c->x86 == 0x06) { 159 if (c->x86 == 0x06) {
160 if (cpu_has(c, X86_FEATURE_EST)) 160 if (cpu_has(c, X86_FEATURE_EST))
161 printk(KERN_WARNING PFX "Warning: EST-capable CPU " 161 printk_once(KERN_WARNING PFX "Warning: EST-capable "
162 "detected. The acpi-cpufreq module offers " 162 "CPU detected. The acpi-cpufreq module offers "
163 "voltage scaling in addition of frequency " 163 "voltage scaling in addition to frequency "
164 "scaling. You should use that instead of " 164 "scaling. You should use that instead of "
165 "p4-clockmod, if possible.\n"); 165 "p4-clockmod, if possible.\n");
166 switch (c->x86_model) { 166 switch (c->x86_model) {
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 4f6f679f2799..4a5a42b842ad 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -195,7 +195,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
195cmd_incomplete: 195cmd_incomplete:
196 iowrite16(0, &pcch_hdr->status); 196 iowrite16(0, &pcch_hdr->status);
197 spin_unlock(&pcc_lock); 197 spin_unlock(&pcc_lock);
198 return -EINVAL; 198 return 0;
199} 199}
200 200
201static int pcc_cpufreq_target(struct cpufreq_policy *policy, 201static int pcc_cpufreq_target(struct cpufreq_policy *policy,
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 35c7e65e59be..c567dec854f6 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1537,6 +1537,7 @@ static struct notifier_block cpb_nb = {
1537static int __cpuinit powernowk8_init(void) 1537static int __cpuinit powernowk8_init(void)
1538{ 1538{
1539 unsigned int i, supported_cpus = 0, cpu; 1539 unsigned int i, supported_cpus = 0, cpu;
1540 int rv;
1540 1541
1541 for_each_online_cpu(i) { 1542 for_each_online_cpu(i) {
1542 int rc; 1543 int rc;
@@ -1555,14 +1556,14 @@ static int __cpuinit powernowk8_init(void)
1555 1556
1556 cpb_capable = true; 1557 cpb_capable = true;
1557 1558
1558 register_cpu_notifier(&cpb_nb);
1559
1560 msrs = msrs_alloc(); 1559 msrs = msrs_alloc();
1561 if (!msrs) { 1560 if (!msrs) {
1562 printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); 1561 printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
1563 return -ENOMEM; 1562 return -ENOMEM;
1564 } 1563 }
1565 1564
1565 register_cpu_notifier(&cpb_nb);
1566
1566 rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); 1567 rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1567 1568
1568 for_each_cpu(cpu, cpu_online_mask) { 1569 for_each_cpu(cpu, cpu_online_mask) {
@@ -1574,7 +1575,13 @@ static int __cpuinit powernowk8_init(void)
1574 (cpb_enabled ? "on" : "off")); 1575 (cpb_enabled ? "on" : "off"));
1575 } 1576 }
1576 1577
1577 return cpufreq_register_driver(&cpufreq_amd64_driver); 1578 rv = cpufreq_register_driver(&cpufreq_amd64_driver);
1579 if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
1580 unregister_cpu_notifier(&cpb_nb);
1581 msrs_free(msrs);
1582 msrs = NULL;
1583 }
1584 return rv;
1578} 1585}
1579 1586
1580/* driver entry point for term */ 1587/* driver entry point for term */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c53d6bf..df86bc8c859d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -276,14 +276,13 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
276 276
277static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 277static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
278{ 278{
279#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 279#ifdef CONFIG_NUMA
280 unsigned node; 280 unsigned node;
281 int cpu = smp_processor_id(); 281 int cpu = smp_processor_id();
282 int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
283 282
284 /* Don't do the funky fallback heuristics the AMD version employs 283 /* Don't do the funky fallback heuristics the AMD version employs
285 for now. */ 284 for now. */
286 node = apicid_to_node[apicid]; 285 node = numa_cpu_node(cpu);
287 if (node == NUMA_NO_NODE || !node_online(node)) { 286 if (node == NUMA_NO_NODE || !node_online(node)) {
288 /* reuse the value from init_cpu_to_node() */ 287 /* reuse the value from init_cpu_to_node() */
289 node = cpu_to_node(cpu); 288 node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index ec2c19a7b8ef..1ce1af2899df 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -304,8 +304,9 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
304 304
305struct _cache_attr { 305struct _cache_attr {
306 struct attribute attr; 306 struct attribute attr;
307 ssize_t (*show)(struct _cpuid4_info *, char *); 307 ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
308 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); 308 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
309 unsigned int);
309}; 310};
310 311
311#ifdef CONFIG_AMD_NB 312#ifdef CONFIG_AMD_NB
@@ -400,7 +401,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
400 401
401#define SHOW_CACHE_DISABLE(slot) \ 402#define SHOW_CACHE_DISABLE(slot) \
402static ssize_t \ 403static ssize_t \
403show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ 404show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
405 unsigned int cpu) \
404{ \ 406{ \
405 return show_cache_disable(this_leaf, buf, slot); \ 407 return show_cache_disable(this_leaf, buf, slot); \
406} 408}
@@ -512,7 +514,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
512#define STORE_CACHE_DISABLE(slot) \ 514#define STORE_CACHE_DISABLE(slot) \
513static ssize_t \ 515static ssize_t \
514store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ 516store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
515 const char *buf, size_t count) \ 517 const char *buf, size_t count, \
518 unsigned int cpu) \
516{ \ 519{ \
517 return store_cache_disable(this_leaf, buf, count, slot); \ 520 return store_cache_disable(this_leaf, buf, count, slot); \
518} 521}
@@ -524,6 +527,39 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
524static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 527static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
525 show_cache_disable_1, store_cache_disable_1); 528 show_cache_disable_1, store_cache_disable_1);
526 529
530static ssize_t
531show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
532{
533 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
534 return -EINVAL;
535
536 return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
537}
538
539static ssize_t
540store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
541 unsigned int cpu)
542{
543 unsigned long val;
544
545 if (!capable(CAP_SYS_ADMIN))
546 return -EPERM;
547
548 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
549 return -EINVAL;
550
551 if (strict_strtoul(buf, 16, &val) < 0)
552 return -EINVAL;
553
554 if (amd_set_subcaches(cpu, val))
555 return -EINVAL;
556
557 return count;
558}
559
560static struct _cache_attr subcaches =
561 __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
562
527#else /* CONFIG_AMD_NB */ 563#else /* CONFIG_AMD_NB */
528#define amd_init_l3_cache(x, y) 564#define amd_init_l3_cache(x, y)
529#endif /* CONFIG_AMD_NB */ 565#endif /* CONFIG_AMD_NB */
@@ -532,9 +568,9 @@ static int
532__cpuinit cpuid4_cache_lookup_regs(int index, 568__cpuinit cpuid4_cache_lookup_regs(int index,
533 struct _cpuid4_info_regs *this_leaf) 569 struct _cpuid4_info_regs *this_leaf)
534{ 570{
535 union _cpuid4_leaf_eax eax; 571 union _cpuid4_leaf_eax eax;
536 union _cpuid4_leaf_ebx ebx; 572 union _cpuid4_leaf_ebx ebx;
537 union _cpuid4_leaf_ecx ecx; 573 union _cpuid4_leaf_ecx ecx;
538 unsigned edx; 574 unsigned edx;
539 575
540 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 576 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
@@ -732,11 +768,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
732 struct cpuinfo_x86 *c = &cpu_data(cpu); 768 struct cpuinfo_x86 *c = &cpu_data(cpu);
733 769
734 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { 770 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
735 for_each_cpu(i, c->llc_shared_map) { 771 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
736 if (!per_cpu(ici_cpuid4_info, i)) 772 if (!per_cpu(ici_cpuid4_info, i))
737 continue; 773 continue;
738 this_leaf = CPUID4_INFO_IDX(i, index); 774 this_leaf = CPUID4_INFO_IDX(i, index);
739 for_each_cpu(sibling, c->llc_shared_map) { 775 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
740 if (!cpu_online(sibling)) 776 if (!cpu_online(sibling))
741 continue; 777 continue;
742 set_bit(sibling, this_leaf->shared_cpu_map); 778 set_bit(sibling, this_leaf->shared_cpu_map);
@@ -870,8 +906,8 @@ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
870#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) 906#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
871 907
872#define show_one_plus(file_name, object, val) \ 908#define show_one_plus(file_name, object, val) \
873static ssize_t show_##file_name \ 909static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
874 (struct _cpuid4_info *this_leaf, char *buf) \ 910 unsigned int cpu) \
875{ \ 911{ \
876 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ 912 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
877} 913}
@@ -882,7 +918,8 @@ show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
882show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); 918show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
883show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); 919show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
884 920
885static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) 921static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
922 unsigned int cpu)
886{ 923{
887 return sprintf(buf, "%luK\n", this_leaf->size / 1024); 924 return sprintf(buf, "%luK\n", this_leaf->size / 1024);
888} 925}
@@ -906,17 +943,20 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
906 return n; 943 return n;
907} 944}
908 945
909static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf) 946static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
947 unsigned int cpu)
910{ 948{
911 return show_shared_cpu_map_func(leaf, 0, buf); 949 return show_shared_cpu_map_func(leaf, 0, buf);
912} 950}
913 951
914static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) 952static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
953 unsigned int cpu)
915{ 954{
916 return show_shared_cpu_map_func(leaf, 1, buf); 955 return show_shared_cpu_map_func(leaf, 1, buf);
917} 956}
918 957
919static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) 958static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
959 unsigned int cpu)
920{ 960{
921 switch (this_leaf->eax.split.type) { 961 switch (this_leaf->eax.split.type) {
922 case CACHE_TYPE_DATA: 962 case CACHE_TYPE_DATA:
@@ -974,6 +1014,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
974 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) 1014 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
975 n += 2; 1015 n += 2;
976 1016
1017 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1018 n += 1;
1019
977 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); 1020 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
978 if (attrs == NULL) 1021 if (attrs == NULL)
979 return attrs = default_attrs; 1022 return attrs = default_attrs;
@@ -986,6 +1029,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
986 attrs[n++] = &cache_disable_1.attr; 1029 attrs[n++] = &cache_disable_1.attr;
987 } 1030 }
988 1031
1032 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1033 attrs[n++] = &subcaches.attr;
1034
989 return attrs; 1035 return attrs;
990} 1036}
991#endif 1037#endif
@@ -998,7 +1044,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
998 1044
999 ret = fattr->show ? 1045 ret = fattr->show ?
1000 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 1046 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1001 buf) : 1047 buf, this_leaf->cpu) :
1002 0; 1048 0;
1003 return ret; 1049 return ret;
1004} 1050}
@@ -1012,7 +1058,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
1012 1058
1013 ret = fattr->store ? 1059 ret = fattr->store ?
1014 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 1060 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1015 buf, count) : 1061 buf, count, this_leaf->cpu) :
1016 0; 1062 0;
1017 return ret; 1063 return ret;
1018} 1064}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5bf2fac52aca..167f97b5596e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
527 int i, err = 0; 527 int i, err = 0;
528 struct threshold_bank *b = NULL; 528 struct threshold_bank *b = NULL;
529 char name[32]; 529 char name[32];
530#ifdef CONFIG_SMP
531 struct cpuinfo_x86 *c = &cpu_data(cpu);
532#endif
533 530
534 sprintf(name, "threshold_bank%i", bank); 531 sprintf(name, "threshold_bank%i", bank);
535 532
536#ifdef CONFIG_SMP 533#ifdef CONFIG_SMP
537 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 534 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
538 i = cpumask_first(c->llc_shared_map); 535 i = cpumask_first(cpu_llc_shared_mask(cpu));
539 536
540 /* first core not up yet */ 537 /* first core not up yet */
541 if (cpu_data(i).cpu_core_id) 538 if (cpu_data(i).cpu_core_id)
@@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
555 if (err) 552 if (err)
556 goto out; 553 goto out;
557 554
558 cpumask_copy(b->cpus, c->llc_shared_map); 555 cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
559 per_cpu(threshold_banks, cpu)[bank] = b; 556 per_cpu(threshold_banks, cpu)[bank] = b;
560 557
561 goto out; 558 goto out;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9d977a2ea693..26604188aa49 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -30,6 +30,7 @@
30#include <asm/stacktrace.h> 30#include <asm/stacktrace.h>
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33#include <asm/smp.h>
33 34
34#if 0 35#if 0
35#undef wrmsrl 36#undef wrmsrl
@@ -93,6 +94,8 @@ struct amd_nb {
93 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 94 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
94}; 95};
95 96
97struct intel_percore;
98
96#define MAX_LBR_ENTRIES 16 99#define MAX_LBR_ENTRIES 16
97 100
98struct cpu_hw_events { 101struct cpu_hw_events {
@@ -128,6 +131,13 @@ struct cpu_hw_events {
128 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 131 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
129 132
130 /* 133 /*
134 * Intel percore register state.
135 * Coordinate shared resources between HT threads.
136 */
137 int percore_used; /* Used by this CPU? */
138 struct intel_percore *per_core;
139
140 /*
131 * AMD specific bits 141 * AMD specific bits
132 */ 142 */
133 struct amd_nb *amd_nb; 143 struct amd_nb *amd_nb;
@@ -166,8 +176,10 @@ struct cpu_hw_events {
166/* 176/*
167 * Constraint on the Event code + UMask 177 * Constraint on the Event code + UMask
168 */ 178 */
169#define PEBS_EVENT_CONSTRAINT(c, n) \ 179#define INTEL_UEVENT_CONSTRAINT(c, n) \
170 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) 180 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
181#define PEBS_EVENT_CONSTRAINT(c, n) \
182 INTEL_UEVENT_CONSTRAINT(c, n)
171 183
172#define EVENT_CONSTRAINT_END \ 184#define EVENT_CONSTRAINT_END \
173 EVENT_CONSTRAINT(0, 0, 0) 185 EVENT_CONSTRAINT(0, 0, 0)
@@ -175,6 +187,28 @@ struct cpu_hw_events {
175#define for_each_event_constraint(e, c) \ 187#define for_each_event_constraint(e, c) \
176 for ((e) = (c); (e)->weight; (e)++) 188 for ((e) = (c); (e)->weight; (e)++)
177 189
190/*
191 * Extra registers for specific events.
192 * Some events need large masks and require external MSRs.
193 * Define a mapping to these extra registers.
194 */
195struct extra_reg {
196 unsigned int event;
197 unsigned int msr;
198 u64 config_mask;
199 u64 valid_mask;
200};
201
202#define EVENT_EXTRA_REG(e, ms, m, vm) { \
203 .event = (e), \
204 .msr = (ms), \
205 .config_mask = (m), \
206 .valid_mask = (vm), \
207 }
208#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
209 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
210#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
211
178union perf_capabilities { 212union perf_capabilities {
179 struct { 213 struct {
180 u64 lbr_format : 6; 214 u64 lbr_format : 6;
@@ -219,6 +253,7 @@ struct x86_pmu {
219 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 253 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
220 struct perf_event *event); 254 struct perf_event *event);
221 struct event_constraint *event_constraints; 255 struct event_constraint *event_constraints;
256 struct event_constraint *percore_constraints;
222 void (*quirks)(void); 257 void (*quirks)(void);
223 int perfctr_second_write; 258 int perfctr_second_write;
224 259
@@ -247,6 +282,11 @@ struct x86_pmu {
247 */ 282 */
248 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ 283 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
249 int lbr_nr; /* hardware stack size */ 284 int lbr_nr; /* hardware stack size */
285
286 /*
287 * Extra registers for events
288 */
289 struct extra_reg *extra_regs;
250}; 290};
251 291
252static struct x86_pmu x86_pmu __read_mostly; 292static struct x86_pmu x86_pmu __read_mostly;
@@ -271,6 +311,10 @@ static u64 __read_mostly hw_cache_event_ids
271 [PERF_COUNT_HW_CACHE_MAX] 311 [PERF_COUNT_HW_CACHE_MAX]
272 [PERF_COUNT_HW_CACHE_OP_MAX] 312 [PERF_COUNT_HW_CACHE_OP_MAX]
273 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 313 [PERF_COUNT_HW_CACHE_RESULT_MAX];
314static u64 __read_mostly hw_cache_extra_regs
315 [PERF_COUNT_HW_CACHE_MAX]
316 [PERF_COUNT_HW_CACHE_OP_MAX]
317 [PERF_COUNT_HW_CACHE_RESULT_MAX];
274 318
275/* 319/*
276 * Propagate event elapsed time into the generic event. 320 * Propagate event elapsed time into the generic event.
@@ -298,7 +342,7 @@ x86_perf_event_update(struct perf_event *event)
298 */ 342 */
299again: 343again:
300 prev_raw_count = local64_read(&hwc->prev_count); 344 prev_raw_count = local64_read(&hwc->prev_count);
301 rdmsrl(hwc->event_base + idx, new_raw_count); 345 rdmsrl(hwc->event_base, new_raw_count);
302 346
303 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 347 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
304 new_raw_count) != prev_raw_count) 348 new_raw_count) != prev_raw_count)
@@ -321,6 +365,49 @@ again:
321 return new_raw_count; 365 return new_raw_count;
322} 366}
323 367
368/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
369static inline int x86_pmu_addr_offset(int index)
370{
371 if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
372 return index << 1;
373 return index;
374}
375
376static inline unsigned int x86_pmu_config_addr(int index)
377{
378 return x86_pmu.eventsel + x86_pmu_addr_offset(index);
379}
380
381static inline unsigned int x86_pmu_event_addr(int index)
382{
383 return x86_pmu.perfctr + x86_pmu_addr_offset(index);
384}
385
386/*
387 * Find and validate any extra registers to set up.
388 */
389static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
390{
391 struct extra_reg *er;
392
393 event->hw.extra_reg = 0;
394 event->hw.extra_config = 0;
395
396 if (!x86_pmu.extra_regs)
397 return 0;
398
399 for (er = x86_pmu.extra_regs; er->msr; er++) {
400 if (er->event != (config & er->config_mask))
401 continue;
402 if (event->attr.config1 & ~er->valid_mask)
403 return -EINVAL;
404 event->hw.extra_reg = er->msr;
405 event->hw.extra_config = event->attr.config1;
406 break;
407 }
408 return 0;
409}
410
324static atomic_t active_events; 411static atomic_t active_events;
325static DEFINE_MUTEX(pmc_reserve_mutex); 412static DEFINE_MUTEX(pmc_reserve_mutex);
326 413
@@ -331,12 +418,12 @@ static bool reserve_pmc_hardware(void)
331 int i; 418 int i;
332 419
333 for (i = 0; i < x86_pmu.num_counters; i++) { 420 for (i = 0; i < x86_pmu.num_counters; i++) {
334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 421 if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
335 goto perfctr_fail; 422 goto perfctr_fail;
336 } 423 }
337 424
338 for (i = 0; i < x86_pmu.num_counters; i++) { 425 for (i = 0; i < x86_pmu.num_counters; i++) {
339 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 426 if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
340 goto eventsel_fail; 427 goto eventsel_fail;
341 } 428 }
342 429
@@ -344,13 +431,13 @@ static bool reserve_pmc_hardware(void)
344 431
345eventsel_fail: 432eventsel_fail:
346 for (i--; i >= 0; i--) 433 for (i--; i >= 0; i--)
347 release_evntsel_nmi(x86_pmu.eventsel + i); 434 release_evntsel_nmi(x86_pmu_config_addr(i));
348 435
349 i = x86_pmu.num_counters; 436 i = x86_pmu.num_counters;
350 437
351perfctr_fail: 438perfctr_fail:
352 for (i--; i >= 0; i--) 439 for (i--; i >= 0; i--)
353 release_perfctr_nmi(x86_pmu.perfctr + i); 440 release_perfctr_nmi(x86_pmu_event_addr(i));
354 441
355 return false; 442 return false;
356} 443}
@@ -360,8 +447,8 @@ static void release_pmc_hardware(void)
360 int i; 447 int i;
361 448
362 for (i = 0; i < x86_pmu.num_counters; i++) { 449 for (i = 0; i < x86_pmu.num_counters; i++) {
363 release_perfctr_nmi(x86_pmu.perfctr + i); 450 release_perfctr_nmi(x86_pmu_event_addr(i));
364 release_evntsel_nmi(x86_pmu.eventsel + i); 451 release_evntsel_nmi(x86_pmu_config_addr(i));
365 } 452 }
366} 453}
367 454
@@ -382,7 +469,7 @@ static bool check_hw_exists(void)
382 * complain and bail. 469 * complain and bail.
383 */ 470 */
384 for (i = 0; i < x86_pmu.num_counters; i++) { 471 for (i = 0; i < x86_pmu.num_counters; i++) {
385 reg = x86_pmu.eventsel + i; 472 reg = x86_pmu_config_addr(i);
386 ret = rdmsrl_safe(reg, &val); 473 ret = rdmsrl_safe(reg, &val);
387 if (ret) 474 if (ret)
388 goto msr_fail; 475 goto msr_fail;
@@ -407,8 +494,8 @@ static bool check_hw_exists(void)
407 * that don't trap on the MSR access and always return 0s. 494 * that don't trap on the MSR access and always return 0s.
408 */ 495 */
409 val = 0xabcdUL; 496 val = 0xabcdUL;
410 ret = checking_wrmsrl(x86_pmu.perfctr, val); 497 ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
411 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); 498 ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
412 if (ret || val != val_new) 499 if (ret || val != val_new)
413 goto msr_fail; 500 goto msr_fail;
414 501
@@ -442,8 +529,9 @@ static inline int x86_pmu_initialized(void)
442} 529}
443 530
444static inline int 531static inline int
445set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) 532set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
446{ 533{
534 struct perf_event_attr *attr = &event->attr;
447 unsigned int cache_type, cache_op, cache_result; 535 unsigned int cache_type, cache_op, cache_result;
448 u64 config, val; 536 u64 config, val;
449 537
@@ -470,8 +558,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
470 return -EINVAL; 558 return -EINVAL;
471 559
472 hwc->config |= val; 560 hwc->config |= val;
473 561 attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
474 return 0; 562 return x86_pmu_extra_regs(val, event);
475} 563}
476 564
477static int x86_setup_perfctr(struct perf_event *event) 565static int x86_setup_perfctr(struct perf_event *event)
@@ -496,10 +584,10 @@ static int x86_setup_perfctr(struct perf_event *event)
496 } 584 }
497 585
498 if (attr->type == PERF_TYPE_RAW) 586 if (attr->type == PERF_TYPE_RAW)
499 return 0; 587 return x86_pmu_extra_regs(event->attr.config, event);
500 588
501 if (attr->type == PERF_TYPE_HW_CACHE) 589 if (attr->type == PERF_TYPE_HW_CACHE)
502 return set_ext_hw_attr(hwc, attr); 590 return set_ext_hw_attr(hwc, event);
503 591
504 if (attr->config >= x86_pmu.max_events) 592 if (attr->config >= x86_pmu.max_events)
505 return -EINVAL; 593 return -EINVAL;
@@ -617,11 +705,11 @@ static void x86_pmu_disable_all(void)
617 705
618 if (!test_bit(idx, cpuc->active_mask)) 706 if (!test_bit(idx, cpuc->active_mask))
619 continue; 707 continue;
620 rdmsrl(x86_pmu.eventsel + idx, val); 708 rdmsrl(x86_pmu_config_addr(idx), val);
621 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) 709 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
622 continue; 710 continue;
623 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 711 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
624 wrmsrl(x86_pmu.eventsel + idx, val); 712 wrmsrl(x86_pmu_config_addr(idx), val);
625 } 713 }
626} 714}
627 715
@@ -642,21 +730,26 @@ static void x86_pmu_disable(struct pmu *pmu)
642 x86_pmu.disable_all(); 730 x86_pmu.disable_all();
643} 731}
644 732
733static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
734 u64 enable_mask)
735{
736 if (hwc->extra_reg)
737 wrmsrl(hwc->extra_reg, hwc->extra_config);
738 wrmsrl(hwc->config_base, hwc->config | enable_mask);
739}
740
645static void x86_pmu_enable_all(int added) 741static void x86_pmu_enable_all(int added)
646{ 742{
647 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 743 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
648 int idx; 744 int idx;
649 745
650 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 746 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
651 struct perf_event *event = cpuc->events[idx]; 747 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
652 u64 val;
653 748
654 if (!test_bit(idx, cpuc->active_mask)) 749 if (!test_bit(idx, cpuc->active_mask))
655 continue; 750 continue;
656 751
657 val = event->hw.config; 752 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
658 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
659 wrmsrl(x86_pmu.eventsel + idx, val);
660 } 753 }
661} 754}
662 755
@@ -821,15 +914,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
821 hwc->event_base = 0; 914 hwc->event_base = 0;
822 } else if (hwc->idx >= X86_PMC_IDX_FIXED) { 915 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
823 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; 916 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
824 /* 917 hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0;
825 * We set it so that event_base + idx in wrmsr/rdmsr maps to
826 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
827 */
828 hwc->event_base =
829 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
830 } else { 918 } else {
831 hwc->config_base = x86_pmu.eventsel; 919 hwc->config_base = x86_pmu_config_addr(hwc->idx);
832 hwc->event_base = x86_pmu.perfctr; 920 hwc->event_base = x86_pmu_event_addr(hwc->idx);
833 } 921 }
834} 922}
835 923
@@ -915,17 +1003,11 @@ static void x86_pmu_enable(struct pmu *pmu)
915 x86_pmu.enable_all(added); 1003 x86_pmu.enable_all(added);
916} 1004}
917 1005
918static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
919 u64 enable_mask)
920{
921 wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
922}
923
924static inline void x86_pmu_disable_event(struct perf_event *event) 1006static inline void x86_pmu_disable_event(struct perf_event *event)
925{ 1007{
926 struct hw_perf_event *hwc = &event->hw; 1008 struct hw_perf_event *hwc = &event->hw;
927 1009
928 wrmsrl(hwc->config_base + hwc->idx, hwc->config); 1010 wrmsrl(hwc->config_base, hwc->config);
929} 1011}
930 1012
931static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 1013static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -978,7 +1060,7 @@ x86_perf_event_set_period(struct perf_event *event)
978 */ 1060 */
979 local64_set(&hwc->prev_count, (u64)-left); 1061 local64_set(&hwc->prev_count, (u64)-left);
980 1062
981 wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); 1063 wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
982 1064
983 /* 1065 /*
984 * Due to erratum on certan cpu we need 1066 * Due to erratum on certan cpu we need
@@ -986,7 +1068,7 @@ x86_perf_event_set_period(struct perf_event *event)
986 * is updated properly 1068 * is updated properly
987 */ 1069 */
988 if (x86_pmu.perfctr_second_write) { 1070 if (x86_pmu.perfctr_second_write) {
989 wrmsrl(hwc->event_base + idx, 1071 wrmsrl(hwc->event_base,
990 (u64)(-left) & x86_pmu.cntval_mask); 1072 (u64)(-left) & x86_pmu.cntval_mask);
991 } 1073 }
992 1074
@@ -1113,8 +1195,8 @@ void perf_event_print_debug(void)
1113 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 1195 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1114 1196
1115 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1197 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1116 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1198 rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
1117 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1199 rdmsrl(x86_pmu_event_addr(idx), pmc_count);
1118 1200
1119 prev_left = per_cpu(pmc_prev_left[idx], cpu); 1201 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1120 1202
@@ -1389,7 +1471,7 @@ static void __init pmu_check_apic(void)
1389 pr_info("no hardware sampling interrupt available.\n"); 1471 pr_info("no hardware sampling interrupt available.\n");
1390} 1472}
1391 1473
1392int __init init_hw_perf_events(void) 1474static int __init init_hw_perf_events(void)
1393{ 1475{
1394 struct event_constraint *c; 1476 struct event_constraint *c;
1395 int err; 1477 int err;
@@ -1608,7 +1690,7 @@ out:
1608 return ret; 1690 return ret;
1609} 1691}
1610 1692
1611int x86_pmu_event_init(struct perf_event *event) 1693static int x86_pmu_event_init(struct perf_event *event)
1612{ 1694{
1613 struct pmu *tmp; 1695 struct pmu *tmp;
1614 int err; 1696 int err;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67e2202a6039..461f62bbd774 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event)
127/* 127/*
128 * AMD64 events are detected based on their event codes. 128 * AMD64 events are detected based on their event codes.
129 */ 129 */
130static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
131{
132 return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
133}
134
130static inline int amd_is_nb_event(struct hw_perf_event *hwc) 135static inline int amd_is_nb_event(struct hw_perf_event *hwc)
131{ 136{
132 return (hwc->config & 0xe0) == 0xe0; 137 return (hwc->config & 0xe0) == 0xe0;
@@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = {
385 .cpu_dead = amd_pmu_cpu_dead, 390 .cpu_dead = amd_pmu_cpu_dead,
386}; 391};
387 392
393/* AMD Family 15h */
394
395#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
396
397#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
398#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
399#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
400#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
401#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
402#define AMD_EVENT_EX_LS 0x000000C0ULL
403#define AMD_EVENT_DE 0x000000D0ULL
404#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
405
406/*
407 * AMD family 15h event code/PMC mappings:
408 *
409 * type = event_code & 0x0F0:
410 *
411 * 0x000 FP PERF_CTL[5:3]
412 * 0x010 FP PERF_CTL[5:3]
413 * 0x020 LS PERF_CTL[5:0]
414 * 0x030 LS PERF_CTL[5:0]
415 * 0x040 DC PERF_CTL[5:0]
416 * 0x050 DC PERF_CTL[5:0]
417 * 0x060 CU PERF_CTL[2:0]
418 * 0x070 CU PERF_CTL[2:0]
419 * 0x080 IC/DE PERF_CTL[2:0]
420 * 0x090 IC/DE PERF_CTL[2:0]
421 * 0x0A0 ---
422 * 0x0B0 ---
423 * 0x0C0 EX/LS PERF_CTL[5:0]
424 * 0x0D0 DE PERF_CTL[2:0]
425 * 0x0E0 NB NB_PERF_CTL[3:0]
426 * 0x0F0 NB NB_PERF_CTL[3:0]
427 *
428 * Exceptions:
429 *
430 * 0x003 FP PERF_CTL[3]
431 * 0x00B FP PERF_CTL[3]
432 * 0x00D FP PERF_CTL[3]
433 * 0x023 DE PERF_CTL[2:0]
434 * 0x02D LS PERF_CTL[3]
435 * 0x02E LS PERF_CTL[3,0]
436 * 0x043 CU PERF_CTL[2:0]
437 * 0x045 CU PERF_CTL[2:0]
438 * 0x046 CU PERF_CTL[2:0]
439 * 0x054 CU PERF_CTL[2:0]
440 * 0x055 CU PERF_CTL[2:0]
441 * 0x08F IC PERF_CTL[0]
442 * 0x187 DE PERF_CTL[0]
443 * 0x188 DE PERF_CTL[0]
444 * 0x0DB EX PERF_CTL[5:0]
445 * 0x0DC LS PERF_CTL[5:0]
446 * 0x0DD LS PERF_CTL[5:0]
447 * 0x0DE LS PERF_CTL[5:0]
448 * 0x0DF LS PERF_CTL[5:0]
449 * 0x1D6 EX PERF_CTL[5:0]
450 * 0x1D8 EX PERF_CTL[5:0]
451 */
452
453static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
454static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
455static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
456static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
457static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
458static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
459
460static struct event_constraint *
461amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
462{
463 unsigned int event_code = amd_get_event_code(&event->hw);
464
465 switch (event_code & AMD_EVENT_TYPE_MASK) {
466 case AMD_EVENT_FP:
467 switch (event_code) {
468 case 0x003:
469 case 0x00B:
470 case 0x00D:
471 return &amd_f15_PMC3;
472 default:
473 return &amd_f15_PMC53;
474 }
475 case AMD_EVENT_LS:
476 case AMD_EVENT_DC:
477 case AMD_EVENT_EX_LS:
478 switch (event_code) {
479 case 0x023:
480 case 0x043:
481 case 0x045:
482 case 0x046:
483 case 0x054:
484 case 0x055:
485 return &amd_f15_PMC20;
486 case 0x02D:
487 return &amd_f15_PMC3;
488 case 0x02E:
489 return &amd_f15_PMC30;
490 default:
491 return &amd_f15_PMC50;
492 }
493 case AMD_EVENT_CU:
494 case AMD_EVENT_IC_DE:
495 case AMD_EVENT_DE:
496 switch (event_code) {
497 case 0x08F:
498 case 0x187:
499 case 0x188:
500 return &amd_f15_PMC0;
501 case 0x0DB ... 0x0DF:
502 case 0x1D6:
503 case 0x1D8:
504 return &amd_f15_PMC50;
505 default:
506 return &amd_f15_PMC20;
507 }
508 case AMD_EVENT_NB:
509 /* not yet implemented */
510 return &emptyconstraint;
511 default:
512 return &emptyconstraint;
513 }
514}
515
516static __initconst const struct x86_pmu amd_pmu_f15h = {
517 .name = "AMD Family 15h",
518 .handle_irq = x86_pmu_handle_irq,
519 .disable_all = x86_pmu_disable_all,
520 .enable_all = x86_pmu_enable_all,
521 .enable = x86_pmu_enable_event,
522 .disable = x86_pmu_disable_event,
523 .hw_config = amd_pmu_hw_config,
524 .schedule_events = x86_schedule_events,
525 .eventsel = MSR_F15H_PERF_CTL,
526 .perfctr = MSR_F15H_PERF_CTR,
527 .event_map = amd_pmu_event_map,
528 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
529 .num_counters = 6,
530 .cntval_bits = 48,
531 .cntval_mask = (1ULL << 48) - 1,
532 .apic = 1,
533 /* use highest bit to detect overflow */
534 .max_period = (1ULL << 47) - 1,
535 .get_event_constraints = amd_get_event_constraints_f15h,
536 /* nortbridge counters not yet implemented: */
537#if 0
538 .put_event_constraints = amd_put_event_constraints,
539
540 .cpu_prepare = amd_pmu_cpu_prepare,
541 .cpu_starting = amd_pmu_cpu_starting,
542 .cpu_dead = amd_pmu_cpu_dead,
543#endif
544};
545
388static __init int amd_pmu_init(void) 546static __init int amd_pmu_init(void)
389{ 547{
390 /* Performance-monitoring supported from K7 and later: */ 548 /* Performance-monitoring supported from K7 and later: */
391 if (boot_cpu_data.x86 < 6) 549 if (boot_cpu_data.x86 < 6)
392 return -ENODEV; 550 return -ENODEV;
393 551
394 x86_pmu = amd_pmu; 552 /*
553 * If core performance counter extensions exists, it must be
554 * family 15h, otherwise fail. See x86_pmu_addr_offset().
555 */
556 switch (boot_cpu_data.x86) {
557 case 0x15:
558 if (!cpu_has_perfctr_core)
559 return -ENODEV;
560 x86_pmu = amd_pmu_f15h;
561 break;
562 default:
563 if (cpu_has_perfctr_core)
564 return -ENODEV;
565 x86_pmu = amd_pmu;
566 break;
567 }
395 568
396 /* Events are common for all AMDs */ 569 /* Events are common for all AMDs */
397 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 570 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 008835c1d79c..8fc2b2cee1da 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,5 +1,27 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#ifdef CONFIG_CPU_SUP_INTEL
2 2
3#define MAX_EXTRA_REGS 2
4
5/*
6 * Per register state.
7 */
8struct er_account {
9 int ref; /* reference count */
10 unsigned int extra_reg; /* extra MSR number */
11 u64 extra_config; /* extra MSR config */
12};
13
14/*
15 * Per core state
16 * This used to coordinate shared registers for HT threads.
17 */
18struct intel_percore {
19 raw_spinlock_t lock; /* protect structure */
20 struct er_account regs[MAX_EXTRA_REGS];
21 int refcnt; /* number of threads */
22 unsigned core_id;
23};
24
3/* 25/*
4 * Intel PerfMon, used on Core and later. 26 * Intel PerfMon, used on Core and later.
5 */ 27 */
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
64 EVENT_CONSTRAINT_END 86 EVENT_CONSTRAINT_END
65}; 87};
66 88
89static struct extra_reg intel_nehalem_extra_regs[] =
90{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
92 EVENT_EXTRA_END
93};
94
95static struct event_constraint intel_nehalem_percore_constraints[] =
96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END
99};
100
67static struct event_constraint intel_westmere_event_constraints[] = 101static struct event_constraint intel_westmere_event_constraints[] =
68{ 102{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -76,6 +110,33 @@ static struct event_constraint intel_westmere_event_constraints[] =
76 EVENT_CONSTRAINT_END 110 EVENT_CONSTRAINT_END
77}; 111};
78 112
113static struct event_constraint intel_snb_event_constraints[] =
114{
115 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
117 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
118 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
119 INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
120 INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
121 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
122 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
123 EVENT_CONSTRAINT_END
124};
125
126static struct extra_reg intel_westmere_extra_regs[] =
127{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
130 EVENT_EXTRA_END
131};
132
133static struct event_constraint intel_westmere_percore_constraints[] =
134{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END
138};
139
79static struct event_constraint intel_gen_event_constraints[] = 140static struct event_constraint intel_gen_event_constraints[] =
80{ 141{
81 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -89,6 +150,106 @@ static u64 intel_pmu_event_map(int hw_event)
89 return intel_perfmon_event_map[hw_event]; 150 return intel_perfmon_event_map[hw_event];
90} 151}
91 152
153static __initconst const u64 snb_hw_cache_event_ids
154 [PERF_COUNT_HW_CACHE_MAX]
155 [PERF_COUNT_HW_CACHE_OP_MAX]
156 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
157{
158 [ C(L1D) ] = {
159 [ C(OP_READ) ] = {
160 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
161 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
162 },
163 [ C(OP_WRITE) ] = {
164 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
165 [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
166 },
167 [ C(OP_PREFETCH) ] = {
168 [ C(RESULT_ACCESS) ] = 0x0,
169 [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
170 },
171 },
172 [ C(L1I ) ] = {
173 [ C(OP_READ) ] = {
174 [ C(RESULT_ACCESS) ] = 0x0,
175 [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
176 },
177 [ C(OP_WRITE) ] = {
178 [ C(RESULT_ACCESS) ] = -1,
179 [ C(RESULT_MISS) ] = -1,
180 },
181 [ C(OP_PREFETCH) ] = {
182 [ C(RESULT_ACCESS) ] = 0x0,
183 [ C(RESULT_MISS) ] = 0x0,
184 },
185 },
186 [ C(LL ) ] = {
187 /*
188 * TBD: Need Off-core Response Performance Monitoring support
189 */
190 [ C(OP_READ) ] = {
191 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
192 [ C(RESULT_ACCESS) ] = 0x01b7,
193 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
194 [ C(RESULT_MISS) ] = 0x01bb,
195 },
196 [ C(OP_WRITE) ] = {
197 /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
198 [ C(RESULT_ACCESS) ] = 0x01b7,
199 /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
200 [ C(RESULT_MISS) ] = 0x01bb,
201 },
202 [ C(OP_PREFETCH) ] = {
203 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
204 [ C(RESULT_ACCESS) ] = 0x01b7,
205 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
206 [ C(RESULT_MISS) ] = 0x01bb,
207 },
208 },
209 [ C(DTLB) ] = {
210 [ C(OP_READ) ] = {
211 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
212 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
213 },
214 [ C(OP_WRITE) ] = {
215 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
216 [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
217 },
218 [ C(OP_PREFETCH) ] = {
219 [ C(RESULT_ACCESS) ] = 0x0,
220 [ C(RESULT_MISS) ] = 0x0,
221 },
222 },
223 [ C(ITLB) ] = {
224 [ C(OP_READ) ] = {
225 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
226 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
227 },
228 [ C(OP_WRITE) ] = {
229 [ C(RESULT_ACCESS) ] = -1,
230 [ C(RESULT_MISS) ] = -1,
231 },
232 [ C(OP_PREFETCH) ] = {
233 [ C(RESULT_ACCESS) ] = -1,
234 [ C(RESULT_MISS) ] = -1,
235 },
236 },
237 [ C(BPU ) ] = {
238 [ C(OP_READ) ] = {
239 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
240 [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
241 },
242 [ C(OP_WRITE) ] = {
243 [ C(RESULT_ACCESS) ] = -1,
244 [ C(RESULT_MISS) ] = -1,
245 },
246 [ C(OP_PREFETCH) ] = {
247 [ C(RESULT_ACCESS) ] = -1,
248 [ C(RESULT_MISS) ] = -1,
249 },
250 },
251};
252
92static __initconst const u64 westmere_hw_cache_event_ids 253static __initconst const u64 westmere_hw_cache_event_ids
93 [PERF_COUNT_HW_CACHE_MAX] 254 [PERF_COUNT_HW_CACHE_MAX]
94 [PERF_COUNT_HW_CACHE_OP_MAX] 255 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -124,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
124 }, 285 },
125 [ C(LL ) ] = { 286 [ C(LL ) ] = {
126 [ C(OP_READ) ] = { 287 [ C(OP_READ) ] = {
127 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ 288 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
128 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ 289 [ C(RESULT_ACCESS) ] = 0x01b7,
290 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
291 [ C(RESULT_MISS) ] = 0x01bb,
129 }, 292 },
293 /*
294 * Use RFO, not WRITEBACK, because a write miss would typically occur
295 * on RFO.
296 */
130 [ C(OP_WRITE) ] = { 297 [ C(OP_WRITE) ] = {
131 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ 298 /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
132 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ 299 [ C(RESULT_ACCESS) ] = 0x01bb,
300 /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
301 [ C(RESULT_MISS) ] = 0x01b7,
133 }, 302 },
134 [ C(OP_PREFETCH) ] = { 303 [ C(OP_PREFETCH) ] = {
135 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ 304 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
136 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ 305 [ C(RESULT_ACCESS) ] = 0x01b7,
306 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
307 [ C(RESULT_MISS) ] = 0x01bb,
137 }, 308 },
138 }, 309 },
139 [ C(DTLB) ] = { 310 [ C(DTLB) ] = {
@@ -180,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids
180 }, 351 },
181}; 352};
182 353
354/*
355 * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
356 */
357
358#define DMND_DATA_RD (1 << 0)
359#define DMND_RFO (1 << 1)
360#define DMND_WB (1 << 3)
361#define PF_DATA_RD (1 << 4)
362#define PF_DATA_RFO (1 << 5)
363#define RESP_UNCORE_HIT (1 << 8)
364#define RESP_MISS (0xf600) /* non uncore hit */
365
366static __initconst const u64 nehalem_hw_cache_extra_regs
367 [PERF_COUNT_HW_CACHE_MAX]
368 [PERF_COUNT_HW_CACHE_OP_MAX]
369 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
370{
371 [ C(LL ) ] = {
372 [ C(OP_READ) ] = {
373 [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
374 [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS,
375 },
376 [ C(OP_WRITE) ] = {
377 [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
378 [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS,
379 },
380 [ C(OP_PREFETCH) ] = {
381 [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
382 [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
383 },
384 }
385};
386
183static __initconst const u64 nehalem_hw_cache_event_ids 387static __initconst const u64 nehalem_hw_cache_event_ids
184 [PERF_COUNT_HW_CACHE_MAX] 388 [PERF_COUNT_HW_CACHE_MAX]
185 [PERF_COUNT_HW_CACHE_OP_MAX] 389 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -215,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids
215 }, 419 },
216 [ C(LL ) ] = { 420 [ C(LL ) ] = {
217 [ C(OP_READ) ] = { 421 [ C(OP_READ) ] = {
218 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ 422 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
219 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ 423 [ C(RESULT_ACCESS) ] = 0x01b7,
424 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
425 [ C(RESULT_MISS) ] = 0x01b7,
220 }, 426 },
427 /*
428 * Use RFO, not WRITEBACK, because a write miss would typically occur
429 * on RFO.
430 */
221 [ C(OP_WRITE) ] = { 431 [ C(OP_WRITE) ] = {
222 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ 432 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
223 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ 433 [ C(RESULT_ACCESS) ] = 0x01b7,
434 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
435 [ C(RESULT_MISS) ] = 0x01b7,
224 }, 436 },
225 [ C(OP_PREFETCH) ] = { 437 [ C(OP_PREFETCH) ] = {
226 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ 438 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
227 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ 439 [ C(RESULT_ACCESS) ] = 0x01b7,
440 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
441 [ C(RESULT_MISS) ] = 0x01b7,
228 }, 442 },
229 }, 443 },
230 [ C(DTLB) ] = { 444 [ C(DTLB) ] = {
@@ -691,8 +905,8 @@ static void intel_pmu_reset(void)
691 printk("clearing PMU state on CPU#%d\n", smp_processor_id()); 905 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
692 906
693 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 907 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
694 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); 908 checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
695 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); 909 checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
696 } 910 }
697 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) 911 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
698 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 912 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
@@ -794,6 +1008,67 @@ intel_bts_constraints(struct perf_event *event)
794} 1008}
795 1009
796static struct event_constraint * 1010static struct event_constraint *
1011intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1012{
1013 struct hw_perf_event *hwc = &event->hw;
1014 unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
1015 struct event_constraint *c;
1016 struct intel_percore *pc;
1017 struct er_account *era;
1018 int i;
1019 int free_slot;
1020 int found;
1021
1022 if (!x86_pmu.percore_constraints || hwc->extra_alloc)
1023 return NULL;
1024
1025 for (c = x86_pmu.percore_constraints; c->cmask; c++) {
1026 if (e != c->code)
1027 continue;
1028
1029 /*
1030 * Allocate resource per core.
1031 */
1032 pc = cpuc->per_core;
1033 if (!pc)
1034 break;
1035 c = &emptyconstraint;
1036 raw_spin_lock(&pc->lock);
1037 free_slot = -1;
1038 found = 0;
1039 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1040 era = &pc->regs[i];
1041 if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
1042 /* Allow sharing same config */
1043 if (hwc->extra_config == era->extra_config) {
1044 era->ref++;
1045 cpuc->percore_used = 1;
1046 hwc->extra_alloc = 1;
1047 c = NULL;
1048 }
1049 /* else conflict */
1050 found = 1;
1051 break;
1052 } else if (era->ref == 0 && free_slot == -1)
1053 free_slot = i;
1054 }
1055 if (!found && free_slot != -1) {
1056 era = &pc->regs[free_slot];
1057 era->ref = 1;
1058 era->extra_reg = hwc->extra_reg;
1059 era->extra_config = hwc->extra_config;
1060 cpuc->percore_used = 1;
1061 hwc->extra_alloc = 1;
1062 c = NULL;
1063 }
1064 raw_spin_unlock(&pc->lock);
1065 return c;
1066 }
1067
1068 return NULL;
1069}
1070
1071static struct event_constraint *
797intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1072intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
798{ 1073{
799 struct event_constraint *c; 1074 struct event_constraint *c;
@@ -806,9 +1081,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
806 if (c) 1081 if (c)
807 return c; 1082 return c;
808 1083
1084 c = intel_percore_constraints(cpuc, event);
1085 if (c)
1086 return c;
1087
809 return x86_get_event_constraints(cpuc, event); 1088 return x86_get_event_constraints(cpuc, event);
810} 1089}
811 1090
1091static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1092 struct perf_event *event)
1093{
1094 struct extra_reg *er;
1095 struct intel_percore *pc;
1096 struct er_account *era;
1097 struct hw_perf_event *hwc = &event->hw;
1098 int i, allref;
1099
1100 if (!cpuc->percore_used)
1101 return;
1102
1103 for (er = x86_pmu.extra_regs; er->msr; er++) {
1104 if (er->event != (hwc->config & er->config_mask))
1105 continue;
1106
1107 pc = cpuc->per_core;
1108 raw_spin_lock(&pc->lock);
1109 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1110 era = &pc->regs[i];
1111 if (era->ref > 0 &&
1112 era->extra_config == hwc->extra_config &&
1113 era->extra_reg == er->msr) {
1114 era->ref--;
1115 hwc->extra_alloc = 0;
1116 break;
1117 }
1118 }
1119 allref = 0;
1120 for (i = 0; i < MAX_EXTRA_REGS; i++)
1121 allref += pc->regs[i].ref;
1122 if (allref == 0)
1123 cpuc->percore_used = 0;
1124 raw_spin_unlock(&pc->lock);
1125 break;
1126 }
1127}
1128
812static int intel_pmu_hw_config(struct perf_event *event) 1129static int intel_pmu_hw_config(struct perf_event *event)
813{ 1130{
814 int ret = x86_pmu_hw_config(event); 1131 int ret = x86_pmu_hw_config(event);
@@ -880,20 +1197,67 @@ static __initconst const struct x86_pmu core_pmu = {
880 */ 1197 */
881 .max_period = (1ULL << 31) - 1, 1198 .max_period = (1ULL << 31) - 1,
882 .get_event_constraints = intel_get_event_constraints, 1199 .get_event_constraints = intel_get_event_constraints,
1200 .put_event_constraints = intel_put_event_constraints,
883 .event_constraints = intel_core_event_constraints, 1201 .event_constraints = intel_core_event_constraints,
884}; 1202};
885 1203
1204static int intel_pmu_cpu_prepare(int cpu)
1205{
1206 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1207
1208 if (!cpu_has_ht_siblings())
1209 return NOTIFY_OK;
1210
1211 cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
1212 GFP_KERNEL, cpu_to_node(cpu));
1213 if (!cpuc->per_core)
1214 return NOTIFY_BAD;
1215
1216 raw_spin_lock_init(&cpuc->per_core->lock);
1217 cpuc->per_core->core_id = -1;
1218 return NOTIFY_OK;
1219}
1220
886static void intel_pmu_cpu_starting(int cpu) 1221static void intel_pmu_cpu_starting(int cpu)
887{ 1222{
1223 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1224 int core_id = topology_core_id(cpu);
1225 int i;
1226
888 init_debug_store_on_cpu(cpu); 1227 init_debug_store_on_cpu(cpu);
889 /* 1228 /*
890 * Deal with CPUs that don't clear their LBRs on power-up. 1229 * Deal with CPUs that don't clear their LBRs on power-up.
891 */ 1230 */
892 intel_pmu_lbr_reset(); 1231 intel_pmu_lbr_reset();
1232
1233 if (!cpu_has_ht_siblings())
1234 return;
1235
1236 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1237 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
1238
1239 if (pc && pc->core_id == core_id) {
1240 kfree(cpuc->per_core);
1241 cpuc->per_core = pc;
1242 break;
1243 }
1244 }
1245
1246 cpuc->per_core->core_id = core_id;
1247 cpuc->per_core->refcnt++;
893} 1248}
894 1249
895static void intel_pmu_cpu_dying(int cpu) 1250static void intel_pmu_cpu_dying(int cpu)
896{ 1251{
1252 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1253 struct intel_percore *pc = cpuc->per_core;
1254
1255 if (pc) {
1256 if (pc->core_id == -1 || --pc->refcnt == 0)
1257 kfree(pc);
1258 cpuc->per_core = NULL;
1259 }
1260
897 fini_debug_store_on_cpu(cpu); 1261 fini_debug_store_on_cpu(cpu);
898} 1262}
899 1263
@@ -918,7 +1282,9 @@ static __initconst const struct x86_pmu intel_pmu = {
918 */ 1282 */
919 .max_period = (1ULL << 31) - 1, 1283 .max_period = (1ULL << 31) - 1,
920 .get_event_constraints = intel_get_event_constraints, 1284 .get_event_constraints = intel_get_event_constraints,
1285 .put_event_constraints = intel_put_event_constraints,
921 1286
1287 .cpu_prepare = intel_pmu_cpu_prepare,
922 .cpu_starting = intel_pmu_cpu_starting, 1288 .cpu_starting = intel_pmu_cpu_starting,
923 .cpu_dying = intel_pmu_cpu_dying, 1289 .cpu_dying = intel_pmu_cpu_dying,
924}; 1290};
@@ -1024,6 +1390,7 @@ static __init int intel_pmu_init(void)
1024 intel_pmu_lbr_init_core(); 1390 intel_pmu_lbr_init_core();
1025 1391
1026 x86_pmu.event_constraints = intel_core2_event_constraints; 1392 x86_pmu.event_constraints = intel_core2_event_constraints;
1393 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
1027 pr_cont("Core2 events, "); 1394 pr_cont("Core2 events, ");
1028 break; 1395 break;
1029 1396
@@ -1032,11 +1399,16 @@ static __init int intel_pmu_init(void)
1032 case 46: /* 45 nm nehalem-ex, "Beckton" */ 1399 case 46: /* 45 nm nehalem-ex, "Beckton" */
1033 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 1400 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1034 sizeof(hw_cache_event_ids)); 1401 sizeof(hw_cache_event_ids));
1402 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1403 sizeof(hw_cache_extra_regs));
1035 1404
1036 intel_pmu_lbr_init_nhm(); 1405 intel_pmu_lbr_init_nhm();
1037 1406
1038 x86_pmu.event_constraints = intel_nehalem_event_constraints; 1407 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1408 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1409 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1039 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1410 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1411 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1040 pr_cont("Nehalem events, "); 1412 pr_cont("Nehalem events, ");
1041 break; 1413 break;
1042 1414
@@ -1047,6 +1419,7 @@ static __init int intel_pmu_init(void)
1047 intel_pmu_lbr_init_atom(); 1419 intel_pmu_lbr_init_atom();
1048 1420
1049 x86_pmu.event_constraints = intel_gen_event_constraints; 1421 x86_pmu.event_constraints = intel_gen_event_constraints;
1422 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
1050 pr_cont("Atom events, "); 1423 pr_cont("Atom events, ");
1051 break; 1424 break;
1052 1425
@@ -1054,14 +1427,30 @@ static __init int intel_pmu_init(void)
1054 case 44: /* 32 nm nehalem, "Gulftown" */ 1427 case 44: /* 32 nm nehalem, "Gulftown" */
1055 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 1428 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
1056 sizeof(hw_cache_event_ids)); 1429 sizeof(hw_cache_event_ids));
1430 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1431 sizeof(hw_cache_extra_regs));
1057 1432
1058 intel_pmu_lbr_init_nhm(); 1433 intel_pmu_lbr_init_nhm();
1059 1434
1060 x86_pmu.event_constraints = intel_westmere_event_constraints; 1435 x86_pmu.event_constraints = intel_westmere_event_constraints;
1436 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1061 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1437 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1438 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1439 x86_pmu.extra_regs = intel_westmere_extra_regs;
1062 pr_cont("Westmere events, "); 1440 pr_cont("Westmere events, ");
1063 break; 1441 break;
1064 1442
1443 case 42: /* SandyBridge */
1444 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1445 sizeof(hw_cache_event_ids));
1446
1447 intel_pmu_lbr_init_nhm();
1448
1449 x86_pmu.event_constraints = intel_snb_event_constraints;
1450 x86_pmu.pebs_constraints = intel_snb_pebs_events;
1451 pr_cont("SandyBridge events, ");
1452 break;
1453
1065 default: 1454 default:
1066 /* 1455 /*
1067 * default constraints for v2 and up 1456 * default constraints for v2 and up
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index b7dcd9f2b8a0..b95c66ae4a2a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -361,30 +361,88 @@ static int intel_pmu_drain_bts_buffer(void)
361/* 361/*
362 * PEBS 362 * PEBS
363 */ 363 */
364 364static struct event_constraint intel_core2_pebs_event_constraints[] = {
365static struct event_constraint intel_core_pebs_events[] = { 365 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
366 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
367 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ 366 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
368 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 367 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
369 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ 368 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
370 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ 369 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
371 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ 370 EVENT_CONSTRAINT_END
372 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ 371};
373 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ 372
374 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ 373static struct event_constraint intel_atom_pebs_event_constraints[] = {
374 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
375 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
376 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
375 EVENT_CONSTRAINT_END 377 EVENT_CONSTRAINT_END
376}; 378};
377 379
378static struct event_constraint intel_nehalem_pebs_events[] = { 380static struct event_constraint intel_nehalem_pebs_event_constraints[] = {
379 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ 381 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
380 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ 382 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
381 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ 383 PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
382 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ 384 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
383 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ 385 INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
384 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ 386 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
385 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ 387 PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
386 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ 388 INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
387 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ 389 PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
390 INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
391 INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
392 EVENT_CONSTRAINT_END
393};
394
395static struct event_constraint intel_westmere_pebs_event_constraints[] = {
396 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
397 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
398 PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
399 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
400 INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
401
402 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
403 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
404 INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
405 PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
406 INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
407 INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
408 EVENT_CONSTRAINT_END
409};
410
411static struct event_constraint intel_snb_pebs_events[] = {
412 PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
413 PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
414 PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
415 PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */
416 PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */
417 PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */
418 PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */
419 PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */
420 PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */
421 PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */
422 PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
423 PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
424 PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
425 PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */
426 PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
427 PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
428 PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
429 PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
430 PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
431 PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
432 PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
433 PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
434 PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
435 PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
436 PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
437 PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
438 PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
439 PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */
440 PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
441 PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
442 PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
443 PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */
444 PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */
445 PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
388 EVENT_CONSTRAINT_END 446 EVENT_CONSTRAINT_END
389}; 447};
390 448
@@ -695,20 +753,17 @@ static void intel_ds_init(void)
695 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); 753 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
696 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); 754 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
697 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; 755 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
698 x86_pmu.pebs_constraints = intel_core_pebs_events;
699 break; 756 break;
700 757
701 case 1: 758 case 1:
702 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); 759 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
703 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); 760 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
704 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 761 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
705 x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
706 break; 762 break;
707 763
708 default: 764 default:
709 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); 765 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
710 x86_pmu.pebs = 0; 766 x86_pmu.pebs = 0;
711 break;
712 } 767 }
713 } 768 }
714} 769}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index f7a0993c1e7c..3769ac822f96 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -764,15 +764,20 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
764 u64 v; 764 u64 v;
765 765
766 /* an official way for overflow indication */ 766 /* an official way for overflow indication */
767 rdmsrl(hwc->config_base + hwc->idx, v); 767 rdmsrl(hwc->config_base, v);
768 if (v & P4_CCCR_OVF) { 768 if (v & P4_CCCR_OVF) {
769 wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); 769 wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
770 return 1; 770 return 1;
771 } 771 }
772 772
773 /* it might be unflagged overflow */ 773 /*
774 rdmsrl(hwc->event_base + hwc->idx, v); 774 * In some circumstances the overflow might issue an NMI but did
775 if (!(v & ARCH_P4_CNTRVAL_MASK)) 775 * not set P4_CCCR_OVF bit. Because a counter holds a negative value
776 * we simply check for high bit being set, if it's cleared it means
777 * the counter has reached zero value and continued counting before
778 * real NMI signal was received:
779 */
780 if (!(v & ARCH_P4_UNFLAGGED_BIT))
776 return 1; 781 return 1;
777 782
778 return 0; 783 return 0;
@@ -810,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
810 * state we need to clear P4_CCCR_OVF, otherwise interrupt get 815 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
811 * asserted again and again 816 * asserted again and again
812 */ 817 */
813 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 818 (void)checking_wrmsrl(hwc->config_base,
814 (u64)(p4_config_unpack_cccr(hwc->config)) & 819 (u64)(p4_config_unpack_cccr(hwc->config)) &
815 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); 820 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
816} 821}
@@ -880,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
880 p4_pmu_enable_pebs(hwc->config); 885 p4_pmu_enable_pebs(hwc->config);
881 886
882 (void)checking_wrmsrl(escr_addr, escr_conf); 887 (void)checking_wrmsrl(escr_addr, escr_conf);
883 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 888 (void)checking_wrmsrl(hwc->config_base,
884 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); 889 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
885} 890}
886 891
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 34ba07be2cda..20c097e33860 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event)
68 if (cpuc->enabled) 68 if (cpuc->enabled)
69 val |= ARCH_PERFMON_EVENTSEL_ENABLE; 69 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
70 70
71 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); 71 (void)checking_wrmsrl(hwc->config_base, val);
72} 72}
73 73
74static void p6_pmu_enable_event(struct perf_event *event) 74static void p6_pmu_enable_event(struct perf_event *event)
@@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
81 if (cpuc->enabled) 81 if (cpuc->enabled)
82 val |= ARCH_PERFMON_EVENTSEL_ENABLE; 82 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
83 83
84 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); 84 (void)checking_wrmsrl(hwc->config_base, val);
85} 85}
86 86
87static __initconst const struct x86_pmu p6_pmu = { 87static __initconst const struct x86_pmu p6_pmu = {
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d5a236615501..966512b2cacf 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
46 /* returns the bit offset of the performance counter register */ 46 /* returns the bit offset of the performance counter register */
47 switch (boot_cpu_data.x86_vendor) { 47 switch (boot_cpu_data.x86_vendor) {
48 case X86_VENDOR_AMD: 48 case X86_VENDOR_AMD:
49 if (msr >= MSR_F15H_PERF_CTR)
50 return (msr - MSR_F15H_PERF_CTR) >> 1;
49 return msr - MSR_K7_PERFCTR0; 51 return msr - MSR_K7_PERFCTR0;
50 case X86_VENDOR_INTEL: 52 case X86_VENDOR_INTEL:
51 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 53 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
@@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
70 /* returns the bit offset of the event selection register */ 72 /* returns the bit offset of the event selection register */
71 switch (boot_cpu_data.x86_vendor) { 73 switch (boot_cpu_data.x86_vendor) {
72 case X86_VENDOR_AMD: 74 case X86_VENDOR_AMD:
75 if (msr >= MSR_F15H_PERF_CTL)
76 return (msr - MSR_F15H_PERF_CTL) >> 1;
73 return msr - MSR_K7_EVNTSEL0; 77 return msr - MSR_K7_EVNTSEL0;
74 case X86_VENDOR_INTEL: 78 case X86_VENDOR_INTEL:
75 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 79 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
new file mode 100644
index 000000000000..7a8cebc9ff29
--- /dev/null
+++ b/arch/x86/kernel/devicetree.c
@@ -0,0 +1,441 @@
1/*
2 * Architecture specific OF callbacks.
3 */
4#include <linux/bootmem.h>
5#include <linux/io.h>
6#include <linux/interrupt.h>
7#include <linux/list.h>
8#include <linux/of.h>
9#include <linux/of_fdt.h>
10#include <linux/of_address.h>
11#include <linux/of_platform.h>
12#include <linux/of_irq.h>
13#include <linux/slab.h>
14#include <linux/pci.h>
15#include <linux/of_pci.h>
16
17#include <asm/hpet.h>
18#include <asm/irq_controller.h>
19#include <asm/apic.h>
20#include <asm/pci_x86.h>
21
22__initdata u64 initial_dtb;
23char __initdata cmd_line[COMMAND_LINE_SIZE];
24static LIST_HEAD(irq_domains);
25static DEFINE_RAW_SPINLOCK(big_irq_lock);
26
27int __initdata of_ioapic;
28
29#ifdef CONFIG_X86_IO_APIC
30static void add_interrupt_host(struct irq_domain *ih)
31{
32 unsigned long flags;
33
34 raw_spin_lock_irqsave(&big_irq_lock, flags);
35 list_add(&ih->l, &irq_domains);
36 raw_spin_unlock_irqrestore(&big_irq_lock, flags);
37}
38#endif
39
40static struct irq_domain *get_ih_from_node(struct device_node *controller)
41{
42 struct irq_domain *ih, *found = NULL;
43 unsigned long flags;
44
45 raw_spin_lock_irqsave(&big_irq_lock, flags);
46 list_for_each_entry(ih, &irq_domains, l) {
47 if (ih->controller == controller) {
48 found = ih;
49 break;
50 }
51 }
52 raw_spin_unlock_irqrestore(&big_irq_lock, flags);
53 return found;
54}
55
56unsigned int irq_create_of_mapping(struct device_node *controller,
57 const u32 *intspec, unsigned int intsize)
58{
59 struct irq_domain *ih;
60 u32 virq, type;
61 int ret;
62
63 ih = get_ih_from_node(controller);
64 if (!ih)
65 return 0;
66 ret = ih->xlate(ih, intspec, intsize, &virq, &type);
67 if (ret)
68 return ret;
69 if (type == IRQ_TYPE_NONE)
70 return virq;
71 /* set the mask if it is different from current */
72 if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
73 set_irq_type(virq, type);
74 return virq;
75}
76EXPORT_SYMBOL_GPL(irq_create_of_mapping);
77
78unsigned long pci_address_to_pio(phys_addr_t address)
79{
80 /*
81 * The ioport address can be directly used by inX / outX
82 */
83 BUG_ON(address >= (1 << 16));
84 return (unsigned long)address;
85}
86EXPORT_SYMBOL_GPL(pci_address_to_pio);
87
88void __init early_init_dt_scan_chosen_arch(unsigned long node)
89{
90 BUG();
91}
92
93void __init early_init_dt_add_memory_arch(u64 base, u64 size)
94{
95 BUG();
96}
97
98void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
99{
100 return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
101}
102
103void __init add_dtb(u64 data)
104{
105 initial_dtb = data + offsetof(struct setup_data, data);
106}
107
108/*
109 * CE4100 ids. Will be moved to machine_device_initcall() once we have it.
110 */
111static struct of_device_id __initdata ce4100_ids[] = {
112 { .compatible = "intel,ce4100-cp", },
113 { .compatible = "isa", },
114 { .compatible = "pci", },
115 {},
116};
117
118static int __init add_bus_probe(void)
119{
120 if (!of_have_populated_dt())
121 return 0;
122
123 return of_platform_bus_probe(NULL, ce4100_ids, NULL);
124}
125module_init(add_bus_probe);
126
127#ifdef CONFIG_PCI
128static int x86_of_pci_irq_enable(struct pci_dev *dev)
129{
130 struct of_irq oirq;
131 u32 virq;
132 int ret;
133 u8 pin;
134
135 ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
136 if (ret)
137 return ret;
138 if (!pin)
139 return 0;
140
141 ret = of_irq_map_pci(dev, &oirq);
142 if (ret)
143 return ret;
144
145 virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
146 oirq.size);
147 if (virq == 0)
148 return -EINVAL;
149 dev->irq = virq;
150 return 0;
151}
152
153static void x86_of_pci_irq_disable(struct pci_dev *dev)
154{
155}
156
157void __cpuinit x86_of_pci_init(void)
158{
159 struct device_node *np;
160
161 pcibios_enable_irq = x86_of_pci_irq_enable;
162 pcibios_disable_irq = x86_of_pci_irq_disable;
163
164 for_each_node_by_type(np, "pci") {
165 const void *prop;
166 struct pci_bus *bus;
167 unsigned int bus_min;
168 struct device_node *child;
169
170 prop = of_get_property(np, "bus-range", NULL);
171 if (!prop)
172 continue;
173 bus_min = be32_to_cpup(prop);
174
175 bus = pci_find_bus(0, bus_min);
176 if (!bus) {
177 printk(KERN_ERR "Can't find a node for bus %s.\n",
178 np->full_name);
179 continue;
180 }
181
182 if (bus->self)
183 bus->self->dev.of_node = np;
184 else
185 bus->dev.of_node = np;
186
187 for_each_child_of_node(np, child) {
188 struct pci_dev *dev;
189 u32 devfn;
190
191 prop = of_get_property(child, "reg", NULL);
192 if (!prop)
193 continue;
194
195 devfn = (be32_to_cpup(prop) >> 8) & 0xff;
196 dev = pci_get_slot(bus, devfn);
197 if (!dev)
198 continue;
199 dev->dev.of_node = child;
200 pci_dev_put(dev);
201 }
202 }
203}
204#endif
205
206static void __init dtb_setup_hpet(void)
207{
208#ifdef CONFIG_HPET_TIMER
209 struct device_node *dn;
210 struct resource r;
211 int ret;
212
213 dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-hpet");
214 if (!dn)
215 return;
216 ret = of_address_to_resource(dn, 0, &r);
217 if (ret) {
218 WARN_ON(1);
219 return;
220 }
221 hpet_address = r.start;
222#endif
223}
224
225static void __init dtb_lapic_setup(void)
226{
227#ifdef CONFIG_X86_LOCAL_APIC
228 struct device_node *dn;
229 struct resource r;
230 int ret;
231
232 dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-lapic");
233 if (!dn)
234 return;
235
236 ret = of_address_to_resource(dn, 0, &r);
237 if (WARN_ON(ret))
238 return;
239
240 /* Did the boot loader setup the local APIC ? */
241 if (!cpu_has_apic) {
242 if (apic_force_enable(r.start))
243 return;
244 }
245 smp_found_config = 1;
246 pic_mode = 1;
247 register_lapic_address(r.start);
248 generic_processor_info(boot_cpu_physical_apicid,
249 GET_APIC_VERSION(apic_read(APIC_LVR)));
250#endif
251}
252
253#ifdef CONFIG_X86_IO_APIC
254static unsigned int ioapic_id;
255
256static void __init dtb_add_ioapic(struct device_node *dn)
257{
258 struct resource r;
259 int ret;
260
261 ret = of_address_to_resource(dn, 0, &r);
262 if (ret) {
263 printk(KERN_ERR "Can't obtain address from node %s.\n",
264 dn->full_name);
265 return;
266 }
267 mp_register_ioapic(++ioapic_id, r.start, gsi_top);
268}
269
270static void __init dtb_ioapic_setup(void)
271{
272 struct device_node *dn;
273
274 for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic")
275 dtb_add_ioapic(dn);
276
277 if (nr_ioapics) {
278 of_ioapic = 1;
279 return;
280 }
281 printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
282}
283#else
284static void __init dtb_ioapic_setup(void) {}
285#endif
286
287static void __init dtb_apic_setup(void)
288{
289 dtb_lapic_setup();
290 dtb_ioapic_setup();
291}
292
293#ifdef CONFIG_OF_FLATTREE
294static void __init x86_flattree_get_config(void)
295{
296 u32 size, map_len;
297 void *new_dtb;
298
299 if (!initial_dtb)
300 return;
301
302 map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK),
303 (u64)sizeof(struct boot_param_header));
304
305 initial_boot_params = early_memremap(initial_dtb, map_len);
306 size = be32_to_cpu(initial_boot_params->totalsize);
307 if (map_len < size) {
308 early_iounmap(initial_boot_params, map_len);
309 initial_boot_params = early_memremap(initial_dtb, size);
310 map_len = size;
311 }
312
313 new_dtb = alloc_bootmem(size);
314 memcpy(new_dtb, initial_boot_params, size);
315 early_iounmap(initial_boot_params, map_len);
316
317 initial_boot_params = new_dtb;
318
319 /* root level address cells */
320 of_scan_flat_dt(early_init_dt_scan_root, NULL);
321
322 unflatten_device_tree();
323}
324#else
325static inline void x86_flattree_get_config(void) { }
326#endif
327
328void __init x86_dtb_init(void)
329{
330 x86_flattree_get_config();
331
332 if (!of_have_populated_dt())
333 return;
334
335 dtb_setup_hpet();
336 dtb_apic_setup();
337}
338
339#ifdef CONFIG_X86_IO_APIC
340
341struct of_ioapic_type {
342 u32 out_type;
343 u32 trigger;
344 u32 polarity;
345};
346
347static struct of_ioapic_type of_ioapic_type[] =
348{
349 {
350 .out_type = IRQ_TYPE_EDGE_RISING,
351 .trigger = IOAPIC_EDGE,
352 .polarity = 1,
353 },
354 {
355 .out_type = IRQ_TYPE_LEVEL_LOW,
356 .trigger = IOAPIC_LEVEL,
357 .polarity = 0,
358 },
359 {
360 .out_type = IRQ_TYPE_LEVEL_HIGH,
361 .trigger = IOAPIC_LEVEL,
362 .polarity = 1,
363 },
364 {
365 .out_type = IRQ_TYPE_EDGE_FALLING,
366 .trigger = IOAPIC_EDGE,
367 .polarity = 0,
368 },
369};
370
371static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
372 u32 *out_hwirq, u32 *out_type)
373{
374 struct io_apic_irq_attr attr;
375 struct of_ioapic_type *it;
376 u32 line, idx, type;
377
378 if (intsize < 2)
379 return -EINVAL;
380
381 line = *intspec;
382 idx = (u32) id->priv;
383 *out_hwirq = line + mp_gsi_routing[idx].gsi_base;
384
385 intspec++;
386 type = *intspec;
387
388 if (type >= ARRAY_SIZE(of_ioapic_type))
389 return -EINVAL;
390
391 it = of_ioapic_type + type;
392 *out_type = it->out_type;
393
394 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
395
396 return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr);
397}
398
399static void __init ioapic_add_ofnode(struct device_node *np)
400{
401 struct resource r;
402 int i, ret;
403
404 ret = of_address_to_resource(np, 0, &r);
405 if (ret) {
406 printk(KERN_ERR "Failed to obtain address for %s\n",
407 np->full_name);
408 return;
409 }
410
411 for (i = 0; i < nr_ioapics; i++) {
412 if (r.start == mp_ioapics[i].apicaddr) {
413 struct irq_domain *id;
414
415 id = kzalloc(sizeof(*id), GFP_KERNEL);
416 BUG_ON(!id);
417 id->controller = np;
418 id->xlate = ioapic_xlate;
419 id->priv = (void *)i;
420 add_interrupt_host(id);
421 return;
422 }
423 }
424 printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name);
425}
426
427void __init x86_add_irq_domains(void)
428{
429 struct device_node *dp;
430
431 if (!of_have_populated_dt())
432 return;
433
434 for_each_node_with_property(dp, "interrupt-controller") {
435 if (of_device_is_compatible(dp, "intel,ce4100-ioapic"))
436 ioapic_add_ofnode(dp);
437 }
438}
439#else
440void __init x86_add_irq_domains(void) { }
441#endif
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723a6a1b..220a1c11cfde 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -320,31 +320,6 @@ void die(const char *str, struct pt_regs *regs, long err)
320 oops_end(flags, regs, sig); 320 oops_end(flags, regs, sig);
321} 321}
322 322
323void notrace __kprobes
324die_nmi(char *str, struct pt_regs *regs, int do_panic)
325{
326 unsigned long flags;
327
328 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
329 return;
330
331 /*
332 * We are in trouble anyway, lets at least try
333 * to get a message out.
334 */
335 flags = oops_begin();
336 printk(KERN_EMERG "%s", str);
337 printk(" on CPU%d, ip %08lx, registers:\n",
338 smp_processor_id(), regs->ip);
339 show_registers(regs);
340 oops_end(flags, regs, 0);
341 if (do_panic || panic_on_oops)
342 panic("Non maskable interrupt");
343 nmi_exit();
344 local_irq_enable();
345 do_exit(SIGBUS);
346}
347
348static int __init oops_setup(char *s) 323static int __init oops_setup(char *s)
349{ 324{
350 if (!s) 325 if (!s)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 294f26da0c0c..cdf5bfd9d4d5 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -667,21 +667,15 @@ __init void e820_setup_gap(void)
667 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of 667 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
668 * linked list of struct setup_data, which is parsed here. 668 * linked list of struct setup_data, which is parsed here.
669 */ 669 */
670void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data) 670void __init parse_e820_ext(struct setup_data *sdata)
671{ 671{
672 u32 map_len;
673 int entries; 672 int entries;
674 struct e820entry *extmap; 673 struct e820entry *extmap;
675 674
676 entries = sdata->len / sizeof(struct e820entry); 675 entries = sdata->len / sizeof(struct e820entry);
677 map_len = sdata->len + sizeof(struct setup_data);
678 if (map_len > PAGE_SIZE)
679 sdata = early_ioremap(pa_data, map_len);
680 extmap = (struct e820entry *)(sdata->data); 676 extmap = (struct e820entry *)(sdata->data);
681 __append_e820_map(extmap, entries); 677 __append_e820_map(extmap, entries);
682 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 678 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
683 if (map_len > PAGE_SIZE)
684 early_iounmap(sdata, map_len);
685 printk(KERN_INFO "extended physical RAM map:\n"); 679 printk(KERN_INFO "extended physical RAM map:\n");
686 e820_print_map("extended"); 680 e820_print_map("extended");
687} 681}
@@ -847,15 +841,21 @@ static int __init parse_memopt(char *p)
847 if (!p) 841 if (!p)
848 return -EINVAL; 842 return -EINVAL;
849 843
850#ifdef CONFIG_X86_32
851 if (!strcmp(p, "nopentium")) { 844 if (!strcmp(p, "nopentium")) {
845#ifdef CONFIG_X86_32
852 setup_clear_cpu_cap(X86_FEATURE_PSE); 846 setup_clear_cpu_cap(X86_FEATURE_PSE);
853 return 0; 847 return 0;
854 } 848#else
849 printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
850 return -EINVAL;
855#endif 851#endif
852 }
856 853
857 userdef = 1; 854 userdef = 1;
858 mem_size = memparse(p, &p); 855 mem_size = memparse(p, &p);
856 /* don't remove all of memory when handling "mem={invalid}" param */
857 if (mem_size == 0)
858 return -EINVAL;
859 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); 859 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
860 860
861 return 0; 861 return 0;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 76b8cd953dee..9efbdcc56425 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -143,15 +143,10 @@ static void __init ati_bugs(int num, int slot, int func)
143 143
144static u32 __init ati_sbx00_rev(int num, int slot, int func) 144static u32 __init ati_sbx00_rev(int num, int slot, int func)
145{ 145{
146 u32 old, d; 146 u32 d;
147 147
148 d = read_pci_config(num, slot, func, 0x70);
149 old = d;
150 d &= ~(1<<8);
151 write_pci_config(num, slot, func, 0x70, d);
152 d = read_pci_config(num, slot, func, 0x8); 148 d = read_pci_config(num, slot, func, 0x8);
153 d &= 0xff; 149 d &= 0xff;
154 write_pci_config(num, slot, func, 0x70, old);
155 150
156 return d; 151 return d;
157} 152}
@@ -160,13 +155,16 @@ static void __init ati_bugs_contd(int num, int slot, int func)
160{ 155{
161 u32 d, rev; 156 u32 d, rev;
162 157
163 if (acpi_use_timer_override)
164 return;
165
166 rev = ati_sbx00_rev(num, slot, func); 158 rev = ati_sbx00_rev(num, slot, func);
159 if (rev >= 0x40)
160 acpi_fix_pin2_polarity = 1;
161
167 if (rev > 0x13) 162 if (rev > 0x13)
168 return; 163 return;
169 164
165 if (acpi_use_timer_override)
166 return;
167
170 /* check for IRQ0 interrupt swap */ 168 /* check for IRQ0 interrupt swap */
171 d = read_pci_config(num, slot, func, 0x64); 169 d = read_pci_config(num, slot, func, 0x64);
172 if (!(d & (1<<14))) 170 if (!(d & (1<<14)))
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9ca3b0e343e5..5c1a91974918 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -65,6 +65,8 @@
65#define sysexit_audit syscall_exit_work 65#define sysexit_audit syscall_exit_work
66#endif 66#endif
67 67
68 .section .entry.text, "ax"
69
68/* 70/*
69 * We use macros for low-level operations which need to be overridden 71 * We use macros for low-level operations which need to be overridden
70 * for paravirtualization. The following will never clobber any registers: 72 * for paravirtualization. The following will never clobber any registers:
@@ -395,7 +397,7 @@ sysenter_past_esp:
395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 397 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
396 * pushed above; +8 corresponds to copy_thread's esp0 setting. 398 * pushed above; +8 corresponds to copy_thread's esp0 setting.
397 */ 399 */
398 pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp) 400 pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
399 CFI_REL_OFFSET eip, 0 401 CFI_REL_OFFSET eip, 0
400 402
401 pushl_cfi %eax 403 pushl_cfi %eax
@@ -788,7 +790,7 @@ ENDPROC(ptregs_clone)
788 */ 790 */
789.section .init.rodata,"a" 791.section .init.rodata,"a"
790ENTRY(interrupt) 792ENTRY(interrupt)
791.text 793.section .entry.text, "ax"
792 .p2align 5 794 .p2align 5
793 .p2align CONFIG_X86_L1_CACHE_SHIFT 795 .p2align CONFIG_X86_L1_CACHE_SHIFT
794ENTRY(irq_entries_start) 796ENTRY(irq_entries_start)
@@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR
807 .endif 809 .endif
808 .previous 810 .previous
809 .long 1b 811 .long 1b
810 .text 812 .section .entry.text, "ax"
811vector=vector+1 813vector=vector+1
812 .endif 814 .endif
813 .endr 815 .endr
@@ -1409,8 +1411,7 @@ END(general_protection)
1409#ifdef CONFIG_KVM_GUEST 1411#ifdef CONFIG_KVM_GUEST
1410ENTRY(async_page_fault) 1412ENTRY(async_page_fault)
1411 RING0_EC_FRAME 1413 RING0_EC_FRAME
1412 pushl $do_async_page_fault 1414 pushl_cfi $do_async_page_fault
1413 CFI_ADJUST_CFA_OFFSET 4
1414 jmp error_code 1415 jmp error_code
1415 CFI_ENDPROC 1416 CFI_ENDPROC
1416END(async_page_fault) 1417END(async_page_fault)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index aed1ffbeb0c9..b72b4a6466a9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -61,6 +61,8 @@
61#define __AUDIT_ARCH_LE 0x40000000 61#define __AUDIT_ARCH_LE 0x40000000
62 62
63 .code64 63 .code64
64 .section .entry.text, "ax"
65
64#ifdef CONFIG_FUNCTION_TRACER 66#ifdef CONFIG_FUNCTION_TRACER
65#ifdef CONFIG_DYNAMIC_FTRACE 67#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount) 68ENTRY(mcount)
@@ -744,7 +746,7 @@ END(stub_rt_sigreturn)
744 */ 746 */
745 .section .init.rodata,"a" 747 .section .init.rodata,"a"
746ENTRY(interrupt) 748ENTRY(interrupt)
747 .text 749 .section .entry.text
748 .p2align 5 750 .p2align 5
749 .p2align CONFIG_X86_L1_CACHE_SHIFT 751 .p2align CONFIG_X86_L1_CACHE_SHIFT
750ENTRY(irq_entries_start) 752ENTRY(irq_entries_start)
@@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR
763 .endif 765 .endif
764 .previous 766 .previous
765 .quad 1b 767 .quad 1b
766 .text 768 .section .entry.text
767vector=vector+1 769vector=vector+1
768 .endif 770 .endif
769 .endr 771 .endr
@@ -975,9 +977,12 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
975 x86_platform_ipi smp_x86_platform_ipi 977 x86_platform_ipi smp_x86_platform_ipi
976 978
977#ifdef CONFIG_SMP 979#ifdef CONFIG_SMP
978.irpc idx, "01234567" 980.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
981 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
982.if NUM_INVALIDATE_TLB_VECTORS > \idx
979apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ 983apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
980 invalidate_interrupt\idx smp_invalidate_interrupt 984 invalidate_interrupt\idx smp_invalidate_interrupt
985.endif
981.endr 986.endr
982#endif 987#endif
983 988
@@ -1248,7 +1253,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1248 decl PER_CPU_VAR(irq_count) 1253 decl PER_CPU_VAR(irq_count)
1249 jmp error_exit 1254 jmp error_exit
1250 CFI_ENDPROC 1255 CFI_ENDPROC
1251END(do_hypervisor_callback) 1256END(xen_do_hypervisor_callback)
1252 1257
1253/* 1258/*
1254 * Hypervisor uses this for application faults while it executes. 1259 * Hypervisor uses this for application faults while it executes.
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 382eb2936d4d..a93742a57468 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -437,18 +437,19 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
437 return; 437 return;
438 } 438 }
439 439
440 if (ftrace_push_return_trace(old, self_addr, &trace.depth,
441 frame_pointer) == -EBUSY) {
442 *parent = old;
443 return;
444 }
445
446 trace.func = self_addr; 440 trace.func = self_addr;
441 trace.depth = current->curr_ret_stack + 1;
447 442
448 /* Only trace if the calling function expects to */ 443 /* Only trace if the calling function expects to */
449 if (!ftrace_graph_entry(&trace)) { 444 if (!ftrace_graph_entry(&trace)) {
450 current->curr_ret_stack--;
451 *parent = old; 445 *parent = old;
446 return;
447 }
448
449 if (ftrace_push_return_trace(old, self_addr, &trace.depth,
450 frame_pointer) == -EBUSY) {
451 *parent = old;
452 return;
452 } 453 }
453} 454}
454#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 455#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 767d6c43de37..ce0be7cd085e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -73,7 +73,7 @@ MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
73 */ 73 */
74KERNEL_PAGES = LOWMEM_PAGES 74KERNEL_PAGES = LOWMEM_PAGES
75 75
76INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm 76INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE
77RESERVE_BRK(pagetables, INIT_MAP_SIZE) 77RESERVE_BRK(pagetables, INIT_MAP_SIZE)
78 78
79/* 79/*
@@ -137,7 +137,7 @@ ENTRY(startup_32)
137 movsl 137 movsl
1381: 1381:
139 139
140#ifdef CONFIG_OLPC_OPENFIRMWARE 140#ifdef CONFIG_OLPC
141 /* save OFW's pgdir table for later use when calling into OFW */ 141 /* save OFW's pgdir table for later use when calling into OFW */
142 movl %cr3, %eax 142 movl %cr3, %eax
143 movl %eax, pa(olpc_ofw_pgd) 143 movl %eax, pa(olpc_ofw_pgd)
@@ -623,7 +623,7 @@ ENTRY(initial_code)
623 * BSS section 623 * BSS section
624 */ 624 */
625__PAGE_ALIGNED_BSS 625__PAGE_ALIGNED_BSS
626 .align PAGE_SIZE_asm 626 .align PAGE_SIZE
627#ifdef CONFIG_X86_PAE 627#ifdef CONFIG_X86_PAE
628initial_pg_pmd: 628initial_pg_pmd:
629 .fill 1024*KPMDS,4,0 629 .fill 1024*KPMDS,4,0
@@ -644,7 +644,7 @@ ENTRY(swapper_pg_dir)
644#ifdef CONFIG_X86_PAE 644#ifdef CONFIG_X86_PAE
645__PAGE_ALIGNED_DATA 645__PAGE_ALIGNED_DATA
646 /* Page-aligned for the benefit of paravirt? */ 646 /* Page-aligned for the benefit of paravirt? */
647 .align PAGE_SIZE_asm 647 .align PAGE_SIZE
648ENTRY(initial_page_table) 648ENTRY(initial_page_table)
649 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ 649 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
650# if KPMDS == 3 650# if KPMDS == 3
@@ -662,7 +662,7 @@ ENTRY(initial_page_table)
662# else 662# else
663# error "Kernel PMDs should be 1, 2 or 3" 663# error "Kernel PMDs should be 1, 2 or 3"
664# endif 664# endif
665 .align PAGE_SIZE_asm /* needs to be page-sized too */ 665 .align PAGE_SIZE /* needs to be page-sized too */
666#endif 666#endif
667 667
668.data 668.data
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 4ff5968f12d2..bfe8f729e086 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -503,7 +503,7 @@ static int hpet_assign_irq(struct hpet_dev *dev)
503 if (!irq) 503 if (!irq)
504 return -EINVAL; 504 return -EINVAL;
505 505
506 set_irq_data(irq, dev); 506 irq_set_handler_data(irq, dev);
507 507
508 if (hpet_setup_msi_irq(irq)) 508 if (hpet_setup_msi_irq(irq))
509 return -EINVAL; 509 return -EINVAL;
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 20757cb2efa3..d9ca749c123b 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -112,7 +112,7 @@ static void make_8259A_irq(unsigned int irq)
112{ 112{
113 disable_irq_nosync(irq); 113 disable_irq_nosync(irq);
114 io_apic_irqs &= ~(1<<irq); 114 io_apic_irqs &= ~(1<<irq);
115 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, 115 irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
116 i8259A_chip.name); 116 i8259A_chip.name);
117 enable_irq(irq); 117 enable_irq(irq);
118} 118}
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8eec0ec59af2..8c968974253d 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,22 +14,9 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/thread_info.h> 15#include <linux/thread_info.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <linux/bitmap.h>
17#include <asm/syscalls.h> 18#include <asm/syscalls.h>
18 19
19/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
20static void set_bitmap(unsigned long *bitmap, unsigned int base,
21 unsigned int extent, int new_value)
22{
23 unsigned int i;
24
25 for (i = base; i < base + extent; i++) {
26 if (new_value)
27 __set_bit(i, bitmap);
28 else
29 __clear_bit(i, bitmap);
30 }
31}
32
33/* 20/*
34 * this changes the io permissions bitmap in the current task. 21 * this changes the io permissions bitmap in the current task.
35 */ 22 */
@@ -69,7 +56,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
69 */ 56 */
70 tss = &per_cpu(init_tss, get_cpu()); 57 tss = &per_cpu(init_tss, get_cpu());
71 58
72 set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); 59 if (turn_on)
60 bitmap_clear(t->io_bitmap_ptr, from, num);
61 else
62 bitmap_set(t->io_bitmap_ptr, from, num);
73 63
74 /* 64 /*
75 * Search for a (possibly new) maximum. This is simple and stupid, 65 * Search for a (possibly new) maximum. This is simple and stupid,
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 387b6a0c9e81..948a31eae75f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -44,9 +44,9 @@ void ack_bad_irq(unsigned int irq)
44 44
45#define irq_stats(x) (&per_cpu(irq_stat, x)) 45#define irq_stats(x) (&per_cpu(irq_stat, x))
46/* 46/*
47 * /proc/interrupts printing: 47 * /proc/interrupts printing for arch specific interrupts
48 */ 48 */
49static int show_other_interrupts(struct seq_file *p, int prec) 49int arch_show_interrupts(struct seq_file *p, int prec)
50{ 50{
51 int j; 51 int j;
52 52
@@ -122,59 +122,6 @@ static int show_other_interrupts(struct seq_file *p, int prec)
122 return 0; 122 return 0;
123} 123}
124 124
125int show_interrupts(struct seq_file *p, void *v)
126{
127 unsigned long flags, any_count = 0;
128 int i = *(loff_t *) v, j, prec;
129 struct irqaction *action;
130 struct irq_desc *desc;
131
132 if (i > nr_irqs)
133 return 0;
134
135 for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
136 j *= 10;
137
138 if (i == nr_irqs)
139 return show_other_interrupts(p, prec);
140
141 /* print header */
142 if (i == 0) {
143 seq_printf(p, "%*s", prec + 8, "");
144 for_each_online_cpu(j)
145 seq_printf(p, "CPU%-8d", j);
146 seq_putc(p, '\n');
147 }
148
149 desc = irq_to_desc(i);
150 if (!desc)
151 return 0;
152
153 raw_spin_lock_irqsave(&desc->lock, flags);
154 for_each_online_cpu(j)
155 any_count |= kstat_irqs_cpu(i, j);
156 action = desc->action;
157 if (!action && !any_count)
158 goto out;
159
160 seq_printf(p, "%*d: ", prec, i);
161 for_each_online_cpu(j)
162 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
163 seq_printf(p, " %8s", desc->irq_data.chip->name);
164 seq_printf(p, "-%-8s", desc->name);
165
166 if (action) {
167 seq_printf(p, " %s", action->name);
168 while ((action = action->next) != NULL)
169 seq_printf(p, ", %s", action->name);
170 }
171
172 seq_putc(p, '\n');
173out:
174 raw_spin_unlock_irqrestore(&desc->lock, flags);
175 return 0;
176}
177
178/* 125/*
179 * /proc/stat helpers 126 * /proc/stat helpers
180 */ 127 */
@@ -276,15 +223,6 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
276 223
277EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); 224EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
278 225
279#ifdef CONFIG_OF
280unsigned int irq_create_of_mapping(struct device_node *controller,
281 const u32 *intspec, unsigned int intsize)
282{
283 return intspec[0];
284}
285EXPORT_SYMBOL_GPL(irq_create_of_mapping);
286#endif
287
288#ifdef CONFIG_HOTPLUG_CPU 226#ifdef CONFIG_HOTPLUG_CPU
289/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ 227/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
290void fixup_irqs(void) 228void fixup_irqs(void)
@@ -293,6 +231,7 @@ void fixup_irqs(void)
293 static int warned; 231 static int warned;
294 struct irq_desc *desc; 232 struct irq_desc *desc;
295 struct irq_data *data; 233 struct irq_data *data;
234 struct irq_chip *chip;
296 235
297 for_each_irq_desc(irq, desc) { 236 for_each_irq_desc(irq, desc) {
298 int break_affinity = 0; 237 int break_affinity = 0;
@@ -307,10 +246,10 @@ void fixup_irqs(void)
307 /* interrupt's are disabled at this point */ 246 /* interrupt's are disabled at this point */
308 raw_spin_lock(&desc->lock); 247 raw_spin_lock(&desc->lock);
309 248
310 data = &desc->irq_data; 249 data = irq_desc_get_irq_data(desc);
311 affinity = data->affinity; 250 affinity = data->affinity;
312 if (!irq_has_action(irq) || 251 if (!irq_has_action(irq) ||
313 cpumask_equal(affinity, cpu_online_mask)) { 252 cpumask_subset(affinity, cpu_online_mask)) {
314 raw_spin_unlock(&desc->lock); 253 raw_spin_unlock(&desc->lock);
315 continue; 254 continue;
316 } 255 }
@@ -327,16 +266,17 @@ void fixup_irqs(void)
327 affinity = cpu_all_mask; 266 affinity = cpu_all_mask;
328 } 267 }
329 268
330 if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask) 269 chip = irq_data_get_irq_chip(data);
331 data->chip->irq_mask(data); 270 if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
271 chip->irq_mask(data);
332 272
333 if (data->chip->irq_set_affinity) 273 if (chip->irq_set_affinity)
334 data->chip->irq_set_affinity(data, affinity, true); 274 chip->irq_set_affinity(data, affinity, true);
335 else if (!(warned++)) 275 else if (!(warned++))
336 set_affinity = 0; 276 set_affinity = 0;
337 277
338 if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask) 278 if (!irqd_can_move_in_process_context(data) && chip->irq_unmask)
339 data->chip->irq_unmask(data); 279 chip->irq_unmask(data);
340 280
341 raw_spin_unlock(&desc->lock); 281 raw_spin_unlock(&desc->lock);
342 282
@@ -368,10 +308,11 @@ void fixup_irqs(void)
368 irq = __this_cpu_read(vector_irq[vector]); 308 irq = __this_cpu_read(vector_irq[vector]);
369 309
370 desc = irq_to_desc(irq); 310 desc = irq_to_desc(irq);
371 data = &desc->irq_data; 311 data = irq_desc_get_irq_data(desc);
312 chip = irq_data_get_irq_chip(data);
372 raw_spin_lock(&desc->lock); 313 raw_spin_lock(&desc->lock);
373 if (data->chip->irq_retrigger) 314 if (chip->irq_retrigger)
374 data->chip->irq_retrigger(data); 315 chip->irq_retrigger(data);
375 raw_spin_unlock(&desc->lock); 316 raw_spin_unlock(&desc->lock);
376 } 317 }
377 } 318 }
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index c752e973958d..f470e4ef993e 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -25,6 +25,7 @@
25#include <asm/setup.h> 25#include <asm/setup.h>
26#include <asm/i8259.h> 26#include <asm/i8259.h>
27#include <asm/traps.h> 27#include <asm/traps.h>
28#include <asm/prom.h>
28 29
29/* 30/*
30 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: 31 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -71,6 +72,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id)
71static struct irqaction fpu_irq = { 72static struct irqaction fpu_irq = {
72 .handler = math_error_irq, 73 .handler = math_error_irq,
73 .name = "fpu", 74 .name = "fpu",
75 .flags = IRQF_NO_THREAD,
74}; 76};
75#endif 77#endif
76 78
@@ -80,6 +82,7 @@ static struct irqaction fpu_irq = {
80static struct irqaction irq2 = { 82static struct irqaction irq2 = {
81 .handler = no_action, 83 .handler = no_action,
82 .name = "cascade", 84 .name = "cascade",
85 .flags = IRQF_NO_THREAD,
83}; 86};
84 87
85DEFINE_PER_CPU(vector_irq_t, vector_irq) = { 88DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
@@ -110,7 +113,7 @@ void __init init_ISA_irqs(void)
110 legacy_pic->init(0); 113 legacy_pic->init(0);
111 114
112 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) 115 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
113 set_irq_chip_and_handler_name(i, chip, handle_level_irq, name); 116 irq_set_chip_and_handler_name(i, chip, handle_level_irq, name);
114} 117}
115 118
116void __init init_IRQ(void) 119void __init init_IRQ(void)
@@ -118,6 +121,12 @@ void __init init_IRQ(void)
118 int i; 121 int i;
119 122
120 /* 123 /*
124 * We probably need a better place for this, but it works for
125 * now ...
126 */
127 x86_add_irq_domains();
128
129 /*
121 * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. 130 * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
122 * If these IRQ's are handled by legacy interrupt-controllers like PIC, 131 * If these IRQ's are handled by legacy interrupt-controllers like PIC,
123 * then this configuration will likely be static after the boot. If 132 * then this configuration will likely be static after the boot. If
@@ -164,14 +173,77 @@ static void __init smp_intr_init(void)
164 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 173 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
165 174
166 /* IPIs for invalidation */ 175 /* IPIs for invalidation */
167 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); 176#define ALLOC_INVTLB_VEC(NR) \
168 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); 177 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \
169 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); 178 invalidate_interrupt##NR)
170 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); 179
171 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); 180 switch (NUM_INVALIDATE_TLB_VECTORS) {
172 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); 181 default:
173 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); 182 ALLOC_INVTLB_VEC(31);
174 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); 183 case 31:
184 ALLOC_INVTLB_VEC(30);
185 case 30:
186 ALLOC_INVTLB_VEC(29);
187 case 29:
188 ALLOC_INVTLB_VEC(28);
189 case 28:
190 ALLOC_INVTLB_VEC(27);
191 case 27:
192 ALLOC_INVTLB_VEC(26);
193 case 26:
194 ALLOC_INVTLB_VEC(25);
195 case 25:
196 ALLOC_INVTLB_VEC(24);
197 case 24:
198 ALLOC_INVTLB_VEC(23);
199 case 23:
200 ALLOC_INVTLB_VEC(22);
201 case 22:
202 ALLOC_INVTLB_VEC(21);
203 case 21:
204 ALLOC_INVTLB_VEC(20);
205 case 20:
206 ALLOC_INVTLB_VEC(19);
207 case 19:
208 ALLOC_INVTLB_VEC(18);
209 case 18:
210 ALLOC_INVTLB_VEC(17);
211 case 17:
212 ALLOC_INVTLB_VEC(16);
213 case 16:
214 ALLOC_INVTLB_VEC(15);
215 case 15:
216 ALLOC_INVTLB_VEC(14);
217 case 14:
218 ALLOC_INVTLB_VEC(13);
219 case 13:
220 ALLOC_INVTLB_VEC(12);
221 case 12:
222 ALLOC_INVTLB_VEC(11);
223 case 11:
224 ALLOC_INVTLB_VEC(10);
225 case 10:
226 ALLOC_INVTLB_VEC(9);
227 case 9:
228 ALLOC_INVTLB_VEC(8);
229 case 8:
230 ALLOC_INVTLB_VEC(7);
231 case 7:
232 ALLOC_INVTLB_VEC(6);
233 case 6:
234 ALLOC_INVTLB_VEC(5);
235 case 5:
236 ALLOC_INVTLB_VEC(4);
237 case 4:
238 ALLOC_INVTLB_VEC(3);
239 case 3:
240 ALLOC_INVTLB_VEC(2);
241 case 2:
242 ALLOC_INVTLB_VEC(1);
243 case 1:
244 ALLOC_INVTLB_VEC(0);
245 break;
246 }
175 247
176 /* IPI for generic function call */ 248 /* IPI for generic function call */
177 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 249 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
@@ -243,7 +315,7 @@ void __init native_init_IRQ(void)
243 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); 315 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
244 } 316 }
245 317
246 if (!acpi_ioapic) 318 if (!acpi_ioapic && !of_ioapic)
247 setup_irq(2, &irq2); 319 setup_irq(2, &irq2);
248 320
249#ifdef CONFIG_X86_32 321#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index a4130005028a..7c64c420a9f6 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -533,15 +533,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
533 } 533 }
534 return NOTIFY_DONE; 534 return NOTIFY_DONE;
535 535
536 case DIE_NMIWATCHDOG:
537 if (atomic_read(&kgdb_active) != -1) {
538 /* KGDB CPU roundup: */
539 kgdb_nmicallback(raw_smp_processor_id(), regs);
540 return NOTIFY_STOP;
541 }
542 /* Enter debugger: */
543 break;
544
545 case DIE_DEBUG: 536 case DIE_DEBUG:
546 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { 537 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
547 if (user_mode(regs)) 538 if (user_mode(regs))
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index d91c477b3f62..c969fd9d1566 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1276,6 +1276,14 @@ static int __kprobes can_optimize(unsigned long paddr)
1276 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 1276 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1277 return 0; 1277 return 0;
1278 1278
1279 /*
1280 * Do not optimize in the entry code due to the unstable
1281 * stack handling.
1282 */
1283 if ((paddr >= (unsigned long )__entry_text_start) &&
1284 (paddr < (unsigned long )__entry_text_end))
1285 return 0;
1286
1279 /* Check there is enough space for a relative jump. */ 1287 /* Check there is enough space for a relative jump. */
1280 if (size - offset < RELATIVEJUMP_SIZE) 1288 if (size - offset < RELATIVEJUMP_SIZE)
1281 return 0; 1289 return 0;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 0fe6d1a66c38..c5610384ab16 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -66,7 +66,6 @@ struct microcode_amd {
66 unsigned int mpb[0]; 66 unsigned int mpb[0];
67}; 67};
68 68
69#define UCODE_MAX_SIZE 2048
70#define UCODE_CONTAINER_SECTION_HDR 8 69#define UCODE_CONTAINER_SECTION_HDR 8
71#define UCODE_CONTAINER_HEADER_SIZE 12 70#define UCODE_CONTAINER_HEADER_SIZE 12
72 71
@@ -77,20 +76,20 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
77 struct cpuinfo_x86 *c = &cpu_data(cpu); 76 struct cpuinfo_x86 *c = &cpu_data(cpu);
78 u32 dummy; 77 u32 dummy;
79 78
80 memset(csig, 0, sizeof(*csig));
81 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { 79 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
82 pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " 80 pr_warning("CPU%d: family %d not supported\n", cpu, c->x86);
83 "supported\n", cpu, c->x86);
84 return -1; 81 return -1;
85 } 82 }
83
86 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); 84 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
87 pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); 85 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
86
88 return 0; 87 return 0;
89} 88}
90 89
91static int get_matching_microcode(int cpu, void *mc, int rev) 90static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
91 int rev)
92{ 92{
93 struct microcode_header_amd *mc_header = mc;
94 unsigned int current_cpu_id; 93 unsigned int current_cpu_id;
95 u16 equiv_cpu_id = 0; 94 u16 equiv_cpu_id = 0;
96 unsigned int i = 0; 95 unsigned int i = 0;
@@ -109,17 +108,17 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
109 if (!equiv_cpu_id) 108 if (!equiv_cpu_id)
110 return 0; 109 return 0;
111 110
112 if (mc_header->processor_rev_id != equiv_cpu_id) 111 if (mc_hdr->processor_rev_id != equiv_cpu_id)
113 return 0; 112 return 0;
114 113
115 /* ucode might be chipset specific -- currently we don't support this */ 114 /* ucode might be chipset specific -- currently we don't support this */
116 if (mc_header->nb_dev_id || mc_header->sb_dev_id) { 115 if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
117 pr_err("CPU%d: loading of chipset specific code not yet supported\n", 116 pr_err("CPU%d: chipset specific code not yet supported\n",
118 cpu); 117 cpu);
119 return 0; 118 return 0;
120 } 119 }
121 120
122 if (mc_header->patch_id <= rev) 121 if (mc_hdr->patch_id <= rev)
123 return 0; 122 return 0;
124 123
125 return 1; 124 return 1;
@@ -144,71 +143,93 @@ static int apply_microcode_amd(int cpu)
144 143
145 /* check current patch id and patch's id for match */ 144 /* check current patch id and patch's id for match */
146 if (rev != mc_amd->hdr.patch_id) { 145 if (rev != mc_amd->hdr.patch_id) {
147 pr_err("CPU%d: update failed (for patch_level=0x%x)\n", 146 pr_err("CPU%d: update failed for patch_level=0x%08x\n",
148 cpu, mc_amd->hdr.patch_id); 147 cpu, mc_amd->hdr.patch_id);
149 return -1; 148 return -1;
150 } 149 }
151 150
152 pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); 151 pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
153 uci->cpu_sig.rev = rev; 152 uci->cpu_sig.rev = rev;
154 153
155 return 0; 154 return 0;
156} 155}
157 156
158static void * 157static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
159get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
160{ 158{
161 unsigned int total_size; 159 struct cpuinfo_x86 *c = &cpu_data(cpu);
162 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; 160 unsigned int max_size, actual_size;
163 void *mc; 161
162#define F1XH_MPB_MAX_SIZE 2048
163#define F14H_MPB_MAX_SIZE 1824
164#define F15H_MPB_MAX_SIZE 4096
165
166 switch (c->x86) {
167 case 0x14:
168 max_size = F14H_MPB_MAX_SIZE;
169 break;
170 case 0x15:
171 max_size = F15H_MPB_MAX_SIZE;
172 break;
173 default:
174 max_size = F1XH_MPB_MAX_SIZE;
175 break;
176 }
164 177
165 get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR); 178 actual_size = buf[4] + (buf[5] << 8);
166 179
167 if (section_hdr[0] != UCODE_UCODE_TYPE) { 180 if (actual_size > size || actual_size > max_size) {
168 pr_err("error: invalid type field in container file section header\n"); 181 pr_err("section size mismatch\n");
169 return NULL; 182 return 0;
170 } 183 }
171 184
172 total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); 185 return actual_size;
186}
173 187
174 if (total_size > size || total_size > UCODE_MAX_SIZE) { 188static struct microcode_header_amd *
175 pr_err("error: size mismatch\n"); 189get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
176 return NULL; 190{
191 struct microcode_header_amd *mc = NULL;
192 unsigned int actual_size = 0;
193
194 if (buf[0] != UCODE_UCODE_TYPE) {
195 pr_err("invalid type field in container file section header\n");
196 goto out;
177 } 197 }
178 198
179 mc = vzalloc(UCODE_MAX_SIZE); 199 actual_size = verify_ucode_size(cpu, buf, size);
200 if (!actual_size)
201 goto out;
202
203 mc = vzalloc(actual_size);
180 if (!mc) 204 if (!mc)
181 return NULL; 205 goto out;
182 206
183 get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size); 207 get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, actual_size);
184 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; 208 *mc_size = actual_size + UCODE_CONTAINER_SECTION_HDR;
185 209
210out:
186 return mc; 211 return mc;
187} 212}
188 213
189static int install_equiv_cpu_table(const u8 *buf) 214static int install_equiv_cpu_table(const u8 *buf)
190{ 215{
191 u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; 216 unsigned int *ibuf = (unsigned int *)buf;
192 unsigned int *buf_pos = (unsigned int *)container_hdr; 217 unsigned int type = ibuf[1];
193 unsigned long size; 218 unsigned int size = ibuf[2];
194 219
195 get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE); 220 if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
196 221 pr_err("empty section/"
197 size = buf_pos[2]; 222 "invalid type field in container file section header\n");
198 223 return -EINVAL;
199 if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
200 pr_err("error: invalid type field in container file section header\n");
201 return 0;
202 } 224 }
203 225
204 equiv_cpu_table = vmalloc(size); 226 equiv_cpu_table = vmalloc(size);
205 if (!equiv_cpu_table) { 227 if (!equiv_cpu_table) {
206 pr_err("failed to allocate equivalent CPU table\n"); 228 pr_err("failed to allocate equivalent CPU table\n");
207 return 0; 229 return -ENOMEM;
208 } 230 }
209 231
210 buf += UCODE_CONTAINER_HEADER_SIZE; 232 get_ucode_data(equiv_cpu_table, buf + UCODE_CONTAINER_HEADER_SIZE, size);
211 get_ucode_data(equiv_cpu_table, buf, size);
212 233
213 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ 234 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
214} 235}
@@ -223,16 +244,16 @@ static enum ucode_state
223generic_load_microcode(int cpu, const u8 *data, size_t size) 244generic_load_microcode(int cpu, const u8 *data, size_t size)
224{ 245{
225 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 246 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
247 struct microcode_header_amd *mc_hdr = NULL;
248 unsigned int mc_size, leftover;
249 int offset;
226 const u8 *ucode_ptr = data; 250 const u8 *ucode_ptr = data;
227 void *new_mc = NULL; 251 void *new_mc = NULL;
228 void *mc; 252 unsigned int new_rev = uci->cpu_sig.rev;
229 int new_rev = uci->cpu_sig.rev;
230 unsigned int leftover;
231 unsigned long offset;
232 enum ucode_state state = UCODE_OK; 253 enum ucode_state state = UCODE_OK;
233 254
234 offset = install_equiv_cpu_table(ucode_ptr); 255 offset = install_equiv_cpu_table(ucode_ptr);
235 if (!offset) { 256 if (offset < 0) {
236 pr_err("failed to create equivalent cpu table\n"); 257 pr_err("failed to create equivalent cpu table\n");
237 return UCODE_ERROR; 258 return UCODE_ERROR;
238 } 259 }
@@ -241,64 +262,65 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
241 leftover = size - offset; 262 leftover = size - offset;
242 263
243 while (leftover) { 264 while (leftover) {
244 unsigned int uninitialized_var(mc_size); 265 mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size);
245 struct microcode_header_amd *mc_header; 266 if (!mc_hdr)
246
247 mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
248 if (!mc)
249 break; 267 break;
250 268
251 mc_header = (struct microcode_header_amd *)mc; 269 if (get_matching_microcode(cpu, mc_hdr, new_rev)) {
252 if (get_matching_microcode(cpu, mc, new_rev)) {
253 vfree(new_mc); 270 vfree(new_mc);
254 new_rev = mc_header->patch_id; 271 new_rev = mc_hdr->patch_id;
255 new_mc = mc; 272 new_mc = mc_hdr;
256 } else 273 } else
257 vfree(mc); 274 vfree(mc_hdr);
258 275
259 ucode_ptr += mc_size; 276 ucode_ptr += mc_size;
260 leftover -= mc_size; 277 leftover -= mc_size;
261 } 278 }
262 279
263 if (new_mc) { 280 if (!new_mc) {
264 if (!leftover) {
265 vfree(uci->mc);
266 uci->mc = new_mc;
267 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
268 cpu, new_rev, uci->cpu_sig.rev);
269 } else {
270 vfree(new_mc);
271 state = UCODE_ERROR;
272 }
273 } else
274 state = UCODE_NFOUND; 281 state = UCODE_NFOUND;
282 goto free_table;
283 }
275 284
285 if (!leftover) {
286 vfree(uci->mc);
287 uci->mc = new_mc;
288 pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
289 cpu, uci->cpu_sig.rev, new_rev);
290 } else {
291 vfree(new_mc);
292 state = UCODE_ERROR;
293 }
294
295free_table:
276 free_equiv_cpu_table(); 296 free_equiv_cpu_table();
277 297
278 return state; 298 return state;
279} 299}
280 300
281static enum ucode_state request_microcode_fw(int cpu, struct device *device) 301static enum ucode_state request_microcode_amd(int cpu, struct device *device)
282{ 302{
283 const char *fw_name = "amd-ucode/microcode_amd.bin"; 303 const char *fw_name = "amd-ucode/microcode_amd.bin";
284 const struct firmware *firmware; 304 const struct firmware *fw;
285 enum ucode_state ret; 305 enum ucode_state ret = UCODE_NFOUND;
286 306
287 if (request_firmware(&firmware, fw_name, device)) { 307 if (request_firmware(&fw, fw_name, device)) {
288 printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); 308 pr_err("failed to load file %s\n", fw_name);
289 return UCODE_NFOUND; 309 goto out;
290 } 310 }
291 311
292 if (*(u32 *)firmware->data != UCODE_MAGIC) { 312 ret = UCODE_ERROR;
293 pr_err("invalid UCODE_MAGIC (0x%08x)\n", 313 if (*(u32 *)fw->data != UCODE_MAGIC) {
294 *(u32 *)firmware->data); 314 pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data);
295 return UCODE_ERROR; 315 goto fw_release;
296 } 316 }
297 317
298 ret = generic_load_microcode(cpu, firmware->data, firmware->size); 318 ret = generic_load_microcode(cpu, fw->data, fw->size);
299 319
300 release_firmware(firmware); 320fw_release:
321 release_firmware(fw);
301 322
323out:
302 return ret; 324 return ret;
303} 325}
304 326
@@ -319,7 +341,7 @@ static void microcode_fini_cpu_amd(int cpu)
319 341
320static struct microcode_ops microcode_amd_ops = { 342static struct microcode_ops microcode_amd_ops = {
321 .request_microcode_user = request_microcode_user, 343 .request_microcode_user = request_microcode_user,
322 .request_microcode_fw = request_microcode_fw, 344 .request_microcode_fw = request_microcode_amd,
323 .collect_cpu_info = collect_cpu_info_amd, 345 .collect_cpu_info = collect_cpu_info_amd,
324 .apply_microcode = apply_microcode_amd, 346 .apply_microcode = apply_microcode_amd,
325 .microcode_fini_cpu = microcode_fini_cpu_amd, 347 .microcode_fini_cpu = microcode_fini_cpu_amd,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 1cca374a2bac..87af68e0e1e1 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -417,8 +417,10 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
417 if (err) 417 if (err)
418 return err; 418 return err;
419 419
420 if (microcode_init_cpu(cpu) == UCODE_ERROR) 420 if (microcode_init_cpu(cpu) == UCODE_ERROR) {
421 err = -EINVAL; 421 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
422 return -EINVAL;
423 }
422 424
423 return err; 425 return err;
424} 426}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ff4554198981..99fa3adf0141 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -110,12 +110,9 @@ void show_regs_common(void)
110 init_utsname()->release, 110 init_utsname()->release,
111 (int)strcspn(init_utsname()->version, " "), 111 (int)strcspn(init_utsname()->version, " "),
112 init_utsname()->version); 112 init_utsname()->version);
113 printk(KERN_CONT " "); 113 printk(KERN_CONT " %s %s", vendor, product);
114 printk(KERN_CONT "%s %s", vendor, product); 114 if (board)
115 if (board) { 115 printk(KERN_CONT "/%s", board);
116 printk(KERN_CONT "/");
117 printk(KERN_CONT "%s", board);
118 }
119 printk(KERN_CONT "\n"); 116 printk(KERN_CONT "\n");
120} 117}
121 118
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 10c6619c0543..d3ce37edb54d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -285,6 +285,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
285 DMI_MATCH(DMI_BOARD_NAME, "P4S800"), 285 DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
286 }, 286 },
287 }, 287 },
288 { /* Handle problems with rebooting on VersaLogic Menlow boards */
289 .callback = set_bios_reboot,
290 .ident = "VersaLogic Menlow based board",
291 .matches = {
292 DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"),
293 DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"),
294 },
295 },
288 { } 296 { }
289}; 297};
290 298
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 6f39cab052d5..3f2ad2640d85 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -6,6 +6,7 @@
6#include <linux/acpi.h> 6#include <linux/acpi.h>
7#include <linux/bcd.h> 7#include <linux/bcd.h>
8#include <linux/pnp.h> 8#include <linux/pnp.h>
9#include <linux/of.h>
9 10
10#include <asm/vsyscall.h> 11#include <asm/vsyscall.h>
11#include <asm/x86_init.h> 12#include <asm/x86_init.h>
@@ -236,6 +237,8 @@ static __init int add_rtc_cmos(void)
236 } 237 }
237 } 238 }
238#endif 239#endif
240 if (of_have_populated_dt())
241 return 0;
239 242
240 platform_device_register(&rtc_device); 243 platform_device_register(&rtc_device);
241 dev_info(&rtc_device.dev, 244 dev_info(&rtc_device.dev,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a089fc19ffae..9d43b28e0728 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -113,6 +113,7 @@
113#endif 113#endif
114#include <asm/mce.h> 114#include <asm/mce.h>
115#include <asm/alternative.h> 115#include <asm/alternative.h>
116#include <asm/prom.h>
116 117
117/* 118/*
118 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 119 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -293,10 +294,32 @@ static void __init init_gbpages(void)
293 else 294 else
294 direct_gbpages = 0; 295 direct_gbpages = 0;
295} 296}
297
298static void __init cleanup_highmap_brk_end(void)
299{
300 pud_t *pud;
301 pmd_t *pmd;
302
303 mmu_cr4_features = read_cr4();
304
305 /*
306 * _brk_end cannot change anymore, but it and _end may be
307 * located on different 2M pages. cleanup_highmap(), however,
308 * can only consider _end when it runs, so destroy any
309 * mappings beyond _brk_end here.
310 */
311 pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
312 pmd = pmd_offset(pud, _brk_end - 1);
313 while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
314 pmd_clear(pmd);
315}
296#else 316#else
297static inline void init_gbpages(void) 317static inline void init_gbpages(void)
298{ 318{
299} 319}
320static inline void cleanup_highmap_brk_end(void)
321{
322}
300#endif 323#endif
301 324
302static void __init reserve_brk(void) 325static void __init reserve_brk(void)
@@ -307,6 +330,8 @@ static void __init reserve_brk(void)
307 /* Mark brk area as locked down and no longer taking any 330 /* Mark brk area as locked down and no longer taking any
308 new allocations */ 331 new allocations */
309 _brk_start = 0; 332 _brk_start = 0;
333
334 cleanup_highmap_brk_end();
310} 335}
311 336
312#ifdef CONFIG_BLK_DEV_INITRD 337#ifdef CONFIG_BLK_DEV_INITRD
@@ -429,16 +454,30 @@ static void __init parse_setup_data(void)
429 return; 454 return;
430 pa_data = boot_params.hdr.setup_data; 455 pa_data = boot_params.hdr.setup_data;
431 while (pa_data) { 456 while (pa_data) {
432 data = early_memremap(pa_data, PAGE_SIZE); 457 u32 data_len, map_len;
458
459 map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
460 (u64)sizeof(struct setup_data));
461 data = early_memremap(pa_data, map_len);
462 data_len = data->len + sizeof(struct setup_data);
463 if (data_len > map_len) {
464 early_iounmap(data, map_len);
465 data = early_memremap(pa_data, data_len);
466 map_len = data_len;
467 }
468
433 switch (data->type) { 469 switch (data->type) {
434 case SETUP_E820_EXT: 470 case SETUP_E820_EXT:
435 parse_e820_ext(data, pa_data); 471 parse_e820_ext(data);
472 break;
473 case SETUP_DTB:
474 add_dtb(pa_data);
436 break; 475 break;
437 default: 476 default:
438 break; 477 break;
439 } 478 }
440 pa_data = data->next; 479 pa_data = data->next;
441 early_iounmap(data, PAGE_SIZE); 480 early_iounmap(data, map_len);
442 } 481 }
443} 482}
444 483
@@ -680,15 +719,6 @@ static int __init parse_reservelow(char *p)
680 719
681early_param("reservelow", parse_reservelow); 720early_param("reservelow", parse_reservelow);
682 721
683static u64 __init get_max_mapped(void)
684{
685 u64 end = max_pfn_mapped;
686
687 end <<= PAGE_SHIFT;
688
689 return end;
690}
691
692/* 722/*
693 * Determine if we were loaded by an EFI loader. If so, then we have also been 723 * Determine if we were loaded by an EFI loader. If so, then we have also been
694 * passed the efi memmap, systab, etc., so we should use these data structures 724 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -704,8 +734,6 @@ static u64 __init get_max_mapped(void)
704 734
705void __init setup_arch(char **cmdline_p) 735void __init setup_arch(char **cmdline_p)
706{ 736{
707 int acpi = 0;
708 int amd = 0;
709 unsigned long flags; 737 unsigned long flags;
710 738
711#ifdef CONFIG_X86_32 739#ifdef CONFIG_X86_32
@@ -977,19 +1005,7 @@ void __init setup_arch(char **cmdline_p)
977 1005
978 early_acpi_boot_init(); 1006 early_acpi_boot_init();
979 1007
980#ifdef CONFIG_ACPI_NUMA 1008 initmem_init();
981 /*
982 * Parse SRAT to discover nodes.
983 */
984 acpi = acpi_numa_init();
985#endif
986
987#ifdef CONFIG_AMD_NUMA
988 if (!acpi)
989 amd = !amd_numa_init(0, max_pfn);
990#endif
991
992 initmem_init(0, max_pfn, acpi, amd);
993 memblock_find_dma_reserve(); 1009 memblock_find_dma_reserve();
994 dma32_reserve_bootmem(); 1010 dma32_reserve_bootmem();
995 1011
@@ -1022,8 +1038,8 @@ void __init setup_arch(char **cmdline_p)
1022 * Read APIC and some other early information from ACPI tables. 1038 * Read APIC and some other early information from ACPI tables.
1023 */ 1039 */
1024 acpi_boot_init(); 1040 acpi_boot_init();
1025
1026 sfi_init(); 1041 sfi_init();
1042 x86_dtb_init();
1027 1043
1028 /* 1044 /*
1029 * get boot-time SMP configuration: 1045 * get boot-time SMP configuration:
@@ -1033,9 +1049,7 @@ void __init setup_arch(char **cmdline_p)
1033 1049
1034 prefill_possible_map(); 1050 prefill_possible_map();
1035 1051
1036#ifdef CONFIG_X86_64
1037 init_cpu_to_node(); 1052 init_cpu_to_node();
1038#endif
1039 1053
1040 init_apic_mappings(); 1054 init_apic_mappings();
1041 ioapic_and_gsi_init(); 1055 ioapic_and_gsi_init();
@@ -1059,6 +1073,8 @@ void __init setup_arch(char **cmdline_p)
1059#endif 1073#endif
1060 x86_init.oem.banner(); 1074 x86_init.oem.banner();
1061 1075
1076 x86_init.timers.wallclock_init();
1077
1062 mcheck_init(); 1078 mcheck_init();
1063 1079
1064 local_irq_save(flags); 1080 local_irq_save(flags);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 002b79685f73..71f4727da373 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -225,10 +225,15 @@ void __init setup_per_cpu_areas(void)
225 per_cpu(x86_bios_cpu_apicid, cpu) = 225 per_cpu(x86_bios_cpu_apicid, cpu) =
226 early_per_cpu_map(x86_bios_cpu_apicid, cpu); 226 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
227#endif 227#endif
228#ifdef CONFIG_X86_32
229 per_cpu(x86_cpu_to_logical_apicid, cpu) =
230 early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
231#endif
228#ifdef CONFIG_X86_64 232#ifdef CONFIG_X86_64
229 per_cpu(irq_stack_ptr, cpu) = 233 per_cpu(irq_stack_ptr, cpu) =
230 per_cpu(irq_stack_union.irq_stack, cpu) + 234 per_cpu(irq_stack_union.irq_stack, cpu) +
231 IRQ_STACK_SIZE - 64; 235 IRQ_STACK_SIZE - 64;
236#endif
232#ifdef CONFIG_NUMA 237#ifdef CONFIG_NUMA
233 per_cpu(x86_cpu_to_node_map, cpu) = 238 per_cpu(x86_cpu_to_node_map, cpu) =
234 early_per_cpu_map(x86_cpu_to_node_map, cpu); 239 early_per_cpu_map(x86_cpu_to_node_map, cpu);
@@ -242,7 +247,6 @@ void __init setup_per_cpu_areas(void)
242 */ 247 */
243 set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); 248 set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
244#endif 249#endif
245#endif
246 /* 250 /*
247 * Up to this point, the boot CPU has been using .init.data 251 * Up to this point, the boot CPU has been using .init.data
248 * area. Reload any changed state for the boot CPU. 252 * area. Reload any changed state for the boot CPU.
@@ -256,7 +260,10 @@ void __init setup_per_cpu_areas(void)
256 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 260 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
257 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 261 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
258#endif 262#endif
259#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) 263#ifdef CONFIG_X86_32
264 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
265#endif
266#ifdef CONFIG_NUMA
260 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 267 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
261#endif 268#endif
262 269
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 545273369efa..c2871d3c71b6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,6 +64,7 @@
64#include <asm/mtrr.h> 64#include <asm/mtrr.h>
65#include <asm/mwait.h> 65#include <asm/mwait.h>
66#include <asm/apic.h> 66#include <asm/apic.h>
67#include <asm/io_apic.h>
67#include <asm/setup.h> 68#include <asm/setup.h>
68#include <asm/uv/uv.h> 69#include <asm/uv/uv.h>
69#include <linux/mc146818rtc.h> 70#include <linux/mc146818rtc.h>
@@ -71,10 +72,6 @@
71#include <asm/smpboot_hooks.h> 72#include <asm/smpboot_hooks.h>
72#include <asm/i8259.h> 73#include <asm/i8259.h>
73 74
74#ifdef CONFIG_X86_32
75u8 apicid_2_node[MAX_APICID];
76#endif
77
78/* State of each CPU */ 75/* State of each CPU */
79DEFINE_PER_CPU(int, cpu_state) = { 0 }; 76DEFINE_PER_CPU(int, cpu_state) = { 0 };
80 77
@@ -130,68 +127,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
130DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); 127DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
131EXPORT_PER_CPU_SYMBOL(cpu_core_map); 128EXPORT_PER_CPU_SYMBOL(cpu_core_map);
132 129
130DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
131
133/* Per CPU bogomips and other parameters */ 132/* Per CPU bogomips and other parameters */
134DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 133DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
135EXPORT_PER_CPU_SYMBOL(cpu_info); 134EXPORT_PER_CPU_SYMBOL(cpu_info);
136 135
137atomic_t init_deasserted; 136atomic_t init_deasserted;
138 137
139#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
140/* which node each logical CPU is on */
141int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
142EXPORT_SYMBOL(cpu_to_node_map);
143
144/* set up a mapping between cpu and node. */
145static void map_cpu_to_node(int cpu, int node)
146{
147 printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
148 cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
149 cpu_to_node_map[cpu] = node;
150}
151
152/* undo a mapping between cpu and node. */
153static void unmap_cpu_to_node(int cpu)
154{
155 int node;
156
157 printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
158 for (node = 0; node < MAX_NUMNODES; node++)
159 cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
160 cpu_to_node_map[cpu] = 0;
161}
162#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
163#define map_cpu_to_node(cpu, node) ({})
164#define unmap_cpu_to_node(cpu) ({})
165#endif
166
167#ifdef CONFIG_X86_32
168static int boot_cpu_logical_apicid;
169
170u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
171 { [0 ... NR_CPUS-1] = BAD_APICID };
172
173static void map_cpu_to_logical_apicid(void)
174{
175 int cpu = smp_processor_id();
176 int apicid = logical_smp_processor_id();
177 int node = apic->apicid_to_node(apicid);
178
179 if (!node_online(node))
180 node = first_online_node;
181
182 cpu_2_logical_apicid[cpu] = apicid;
183 map_cpu_to_node(cpu, node);
184}
185
186void numa_remove_cpu(int cpu)
187{
188 cpu_2_logical_apicid[cpu] = BAD_APICID;
189 unmap_cpu_to_node(cpu);
190}
191#else
192#define map_cpu_to_logical_apicid() do {} while (0)
193#endif
194
195/* 138/*
196 * Report back to the Boot Processor. 139 * Report back to the Boot Processor.
197 * Running on AP. 140 * Running on AP.
@@ -259,7 +202,6 @@ static void __cpuinit smp_callin(void)
259 apic->smp_callin_clear_local_apic(); 202 apic->smp_callin_clear_local_apic();
260 setup_local_APIC(); 203 setup_local_APIC();
261 end_local_APIC_setup(); 204 end_local_APIC_setup();
262 map_cpu_to_logical_apicid();
263 205
264 /* 206 /*
265 * Need to setup vector mappings before we enable interrupts. 207 * Need to setup vector mappings before we enable interrupts.
@@ -355,23 +297,6 @@ notrace static void __cpuinit start_secondary(void *unused)
355 cpu_idle(); 297 cpu_idle();
356} 298}
357 299
358#ifdef CONFIG_CPUMASK_OFFSTACK
359/* In this case, llc_shared_map is a pointer to a cpumask. */
360static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
361 const struct cpuinfo_x86 *src)
362{
363 struct cpumask *llc = dst->llc_shared_map;
364 *dst = *src;
365 dst->llc_shared_map = llc;
366}
367#else
368static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
369 const struct cpuinfo_x86 *src)
370{
371 *dst = *src;
372}
373#endif /* CONFIG_CPUMASK_OFFSTACK */
374
375/* 300/*
376 * The bootstrap kernel entry code has set these up. Save them for 301 * The bootstrap kernel entry code has set these up. Save them for
377 * a given CPU 302 * a given CPU
@@ -381,7 +306,7 @@ void __cpuinit smp_store_cpu_info(int id)
381{ 306{
382 struct cpuinfo_x86 *c = &cpu_data(id); 307 struct cpuinfo_x86 *c = &cpu_data(id);
383 308
384 copy_cpuinfo_x86(c, &boot_cpu_data); 309 *c = boot_cpu_data;
385 c->cpu_index = id; 310 c->cpu_index = id;
386 if (id != 0) 311 if (id != 0)
387 identify_secondary_cpu(c); 312 identify_secondary_cpu(c);
@@ -389,15 +314,12 @@ void __cpuinit smp_store_cpu_info(int id)
389 314
390static void __cpuinit link_thread_siblings(int cpu1, int cpu2) 315static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
391{ 316{
392 struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
393 struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
394
395 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); 317 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
396 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); 318 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
397 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); 319 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
398 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); 320 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
399 cpumask_set_cpu(cpu1, c2->llc_shared_map); 321 cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
400 cpumask_set_cpu(cpu2, c1->llc_shared_map); 322 cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
401} 323}
402 324
403 325
@@ -414,6 +336,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
414 336
415 if (cpu_has(c, X86_FEATURE_TOPOEXT)) { 337 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
416 if (c->phys_proc_id == o->phys_proc_id && 338 if (c->phys_proc_id == o->phys_proc_id &&
339 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
417 c->compute_unit_id == o->compute_unit_id) 340 c->compute_unit_id == o->compute_unit_id)
418 link_thread_siblings(cpu, i); 341 link_thread_siblings(cpu, i);
419 } else if (c->phys_proc_id == o->phys_proc_id && 342 } else if (c->phys_proc_id == o->phys_proc_id &&
@@ -425,7 +348,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
425 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 348 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
426 } 349 }
427 350
428 cpumask_set_cpu(cpu, c->llc_shared_map); 351 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
429 352
430 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) { 353 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
431 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); 354 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
@@ -436,8 +359,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
436 for_each_cpu(i, cpu_sibling_setup_mask) { 359 for_each_cpu(i, cpu_sibling_setup_mask) {
437 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 360 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
438 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 361 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
439 cpumask_set_cpu(i, c->llc_shared_map); 362 cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
440 cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map); 363 cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
441 } 364 }
442 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 365 if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
443 cpumask_set_cpu(i, cpu_core_mask(cpu)); 366 cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -476,7 +399,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
476 !(cpu_has(c, X86_FEATURE_AMD_DCM))) 399 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
477 return cpu_core_mask(cpu); 400 return cpu_core_mask(cpu);
478 else 401 else
479 return c->llc_shared_map; 402 return cpu_llc_shared_mask(cpu);
480} 403}
481 404
482static void impress_friends(void) 405static void impress_friends(void)
@@ -947,6 +870,14 @@ int __cpuinit native_cpu_up(unsigned int cpu)
947 return 0; 870 return 0;
948} 871}
949 872
873/**
874 * arch_disable_smp_support() - disables SMP support for x86 at runtime
875 */
876void arch_disable_smp_support(void)
877{
878 disable_ioapic_support();
879}
880
950/* 881/*
951 * Fall back to non SMP mode after errors. 882 * Fall back to non SMP mode after errors.
952 * 883 *
@@ -962,7 +893,6 @@ static __init void disable_smp(void)
962 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 893 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
963 else 894 else
964 physid_set_mask_of_physid(0, &phys_cpu_present_map); 895 physid_set_mask_of_physid(0, &phys_cpu_present_map);
965 map_cpu_to_logical_apicid();
966 cpumask_set_cpu(0, cpu_sibling_mask(0)); 896 cpumask_set_cpu(0, cpu_sibling_mask(0));
967 cpumask_set_cpu(0, cpu_core_mask(0)); 897 cpumask_set_cpu(0, cpu_core_mask(0));
968} 898}
@@ -1047,7 +977,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1047 "(tell your hw vendor)\n"); 977 "(tell your hw vendor)\n");
1048 } 978 }
1049 smpboot_clear_io_apic(); 979 smpboot_clear_io_apic();
1050 arch_disable_smp_support(); 980 disable_ioapic_support();
1051 return -1; 981 return -1;
1052 } 982 }
1053 983
@@ -1091,21 +1021,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1091 1021
1092 preempt_disable(); 1022 preempt_disable();
1093 smp_cpu_index_default(); 1023 smp_cpu_index_default();
1094 memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info)); 1024
1095 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1096 mb();
1097 /* 1025 /*
1098 * Setup boot CPU information 1026 * Setup boot CPU information
1099 */ 1027 */
1100 smp_store_cpu_info(0); /* Final full version of the data */ 1028 smp_store_cpu_info(0); /* Final full version of the data */
1101#ifdef CONFIG_X86_32 1029 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1102 boot_cpu_logical_apicid = logical_smp_processor_id(); 1030 mb();
1103#endif 1031
1104 current_thread_info()->cpu = 0; /* needed? */ 1032 current_thread_info()->cpu = 0; /* needed? */
1105 for_each_possible_cpu(i) { 1033 for_each_possible_cpu(i) {
1106 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); 1034 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1107 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); 1035 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1108 zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); 1036 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
1109 } 1037 }
1110 set_cpu_sibling_map(0); 1038 set_cpu_sibling_map(0);
1111 1039
@@ -1141,8 +1069,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1141 1069
1142 bsp_end_local_APIC_setup(); 1070 bsp_end_local_APIC_setup();
1143 1071
1144 map_cpu_to_logical_apicid();
1145
1146 if (apic->setup_portio_remap) 1072 if (apic->setup_portio_remap)
1147 apic->setup_portio_remap(); 1073 apic->setup_portio_remap();
1148 1074
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786dc9b8f..5f181742e8f9 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,6 @@ ENTRY(sys_call_table)
340 .long sys_fanotify_init 340 .long sys_fanotify_init
341 .long sys_fanotify_mark 341 .long sys_fanotify_mark
342 .long sys_prlimit64 /* 340 */ 342 .long sys_prlimit64 /* 340 */
343 .long sys_name_to_handle_at
344 .long sys_open_by_handle_at
345 .long sys_clock_adjtime
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index cb2c5069b016..624a2016198e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -105,6 +105,7 @@ SECTIONS
105 SCHED_TEXT 105 SCHED_TEXT
106 LOCK_TEXT 106 LOCK_TEXT
107 KPROBES_TEXT 107 KPROBES_TEXT
108 ENTRY_TEXT
108 IRQENTRY_TEXT 109 IRQENTRY_TEXT
109 *(.fixup) 110 *(.fixup)
110 *(.gnu.warning) 111 *(.gnu.warning)
@@ -230,7 +231,7 @@ SECTIONS
230 * output PHDR, so the next output section - .init.text - should 231 * output PHDR, so the next output section - .init.text - should
231 * start another segment - init. 232 * start another segment - init.
232 */ 233 */
233 PERCPU_VADDR(0, :percpu) 234 PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
234#endif 235#endif
235 236
236 INIT_TEXT_SECTION(PAGE_SIZE) 237 INIT_TEXT_SECTION(PAGE_SIZE)
@@ -318,7 +319,7 @@ SECTIONS
318 } 319 }
319 320
320#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) 321#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
321 PERCPU(THREAD_SIZE) 322 PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE)
322#endif 323#endif
323 324
324 . = ALIGN(PAGE_SIZE); 325 . = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1b950d151e58..9796c2f3d074 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -52,6 +52,7 @@ extern void *__memcpy(void *, const void *, __kernel_size_t);
52EXPORT_SYMBOL(memset); 52EXPORT_SYMBOL(memset);
53EXPORT_SYMBOL(memcpy); 53EXPORT_SYMBOL(memcpy);
54EXPORT_SYMBOL(__memcpy); 54EXPORT_SYMBOL(__memcpy);
55EXPORT_SYMBOL(memmove);
55 56
56EXPORT_SYMBOL(empty_zero_page); 57EXPORT_SYMBOL(empty_zero_page);
57#ifndef CONFIG_PARAVIRT 58#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ceb2911aa439..c11514e9128b 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -70,6 +70,7 @@ struct x86_init_ops x86_init __initdata = {
70 .setup_percpu_clockev = setup_boot_APIC_clock, 70 .setup_percpu_clockev = setup_boot_APIC_clock,
71 .tsc_pre_init = x86_init_noop, 71 .tsc_pre_init = x86_init_noop,
72 .timer_init = hpet_time_init, 72 .timer_init = hpet_time_init,
73 .wallclock_init = x86_init_noop,
73 }, 74 },
74 75
75 .iommu = { 76 .iommu = {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 54ce246a383e..63fec1531e89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2777,6 +2777,8 @@ static int dr_interception(struct vcpu_svm *svm)
2777 kvm_register_write(&svm->vcpu, reg, val); 2777 kvm_register_write(&svm->vcpu, reg, val);
2778 } 2778 }
2779 2779
2780 skip_emulated_instruction(&svm->vcpu);
2781
2780 return 1; 2782 return 1;
2781} 2783}
2782 2784
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 1357d7cf4ec8..db932760ea82 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -62,21 +62,21 @@ TRACE_EVENT(kvm_hv_hypercall,
62 TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), 62 TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
63 63
64 TP_STRUCT__entry( 64 TP_STRUCT__entry(
65 __field( __u16, code )
66 __field( bool, fast )
67 __field( __u16, rep_cnt ) 65 __field( __u16, rep_cnt )
68 __field( __u16, rep_idx ) 66 __field( __u16, rep_idx )
69 __field( __u64, ingpa ) 67 __field( __u64, ingpa )
70 __field( __u64, outgpa ) 68 __field( __u64, outgpa )
69 __field( __u16, code )
70 __field( bool, fast )
71 ), 71 ),
72 72
73 TP_fast_assign( 73 TP_fast_assign(
74 __entry->code = code;
75 __entry->fast = fast;
76 __entry->rep_cnt = rep_cnt; 74 __entry->rep_cnt = rep_cnt;
77 __entry->rep_idx = rep_idx; 75 __entry->rep_idx = rep_idx;
78 __entry->ingpa = ingpa; 76 __entry->ingpa = ingpa;
79 __entry->outgpa = outgpa; 77 __entry->outgpa = outgpa;
78 __entry->code = code;
79 __entry->fast = fast;
80 ), 80 ),
81 81
82 TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", 82 TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index eba687f0cc0c..b9ec1c74943c 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -847,7 +847,7 @@ static void __init lguest_init_IRQ(void)
847void lguest_setup_irq(unsigned int irq) 847void lguest_setup_irq(unsigned int irq)
848{ 848{
849 irq_alloc_desc_at(irq, 0); 849 irq_alloc_desc_at(irq, 0);
850 set_irq_chip_and_handler_name(irq, &lguest_irq_controller, 850 irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
851 handle_level_irq, "level"); 851 handle_level_irq, "level");
852} 852}
853 853
@@ -995,7 +995,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
995static void lguest_time_init(void) 995static void lguest_time_init(void)
996{ 996{
997 /* Set up the timer interrupt (0) to go to our simple timer routine */ 997 /* Set up the timer interrupt (0) to go to our simple timer routine */
998 set_irq_handler(0, lguest_time_irq); 998 irq_set_handler(0, lguest_time_irq);
999 999
1000 clocksource_register(&lguest_clock); 1000 clocksource_register(&lguest_clock);
1001 1001
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index e10cf070ede0..f2479f19ddde 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -42,4 +42,5 @@ else
42 lib-y += memmove_64.o memset_64.o 42 lib-y += memmove_64.o memset_64.o
43 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o 43 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
44 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o 44 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
45 lib-y += cmpxchg16b_emu.o
45endif 46endif
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 2cda60a06e65..e8e7e0d06f42 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -15,14 +15,12 @@
15 15
16/* if you want SMP support, implement these with real spinlocks */ 16/* if you want SMP support, implement these with real spinlocks */
17.macro LOCK reg 17.macro LOCK reg
18 pushfl 18 pushfl_cfi
19 CFI_ADJUST_CFA_OFFSET 4
20 cli 19 cli
21.endm 20.endm
22 21
23.macro UNLOCK reg 22.macro UNLOCK reg
24 popfl 23 popfl_cfi
25 CFI_ADJUST_CFA_OFFSET -4
26.endm 24.endm
27 25
28#define BEGIN(op) \ 26#define BEGIN(op) \
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 71e080de3352..391a083674b4 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -14,14 +14,12 @@
14#include <asm/dwarf2.h> 14#include <asm/dwarf2.h>
15 15
16.macro SAVE reg 16.macro SAVE reg
17 pushl %\reg 17 pushl_cfi %\reg
18 CFI_ADJUST_CFA_OFFSET 4
19 CFI_REL_OFFSET \reg, 0 18 CFI_REL_OFFSET \reg, 0
20.endm 19.endm
21 20
22.macro RESTORE reg 21.macro RESTORE reg
23 popl %\reg 22 popl_cfi %\reg
24 CFI_ADJUST_CFA_OFFSET -4
25 CFI_RESTORE \reg 23 CFI_RESTORE \reg
26.endm 24.endm
27 25
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index adbccd0bbb78..78d16a554db0 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -50,11 +50,9 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
50 */ 50 */
51ENTRY(csum_partial) 51ENTRY(csum_partial)
52 CFI_STARTPROC 52 CFI_STARTPROC
53 pushl %esi 53 pushl_cfi %esi
54 CFI_ADJUST_CFA_OFFSET 4
55 CFI_REL_OFFSET esi, 0 54 CFI_REL_OFFSET esi, 0
56 pushl %ebx 55 pushl_cfi %ebx
57 CFI_ADJUST_CFA_OFFSET 4
58 CFI_REL_OFFSET ebx, 0 56 CFI_REL_OFFSET ebx, 0
59 movl 20(%esp),%eax # Function arg: unsigned int sum 57 movl 20(%esp),%eax # Function arg: unsigned int sum
60 movl 16(%esp),%ecx # Function arg: int len 58 movl 16(%esp),%ecx # Function arg: int len
@@ -132,11 +130,9 @@ ENTRY(csum_partial)
132 jz 8f 130 jz 8f
133 roll $8, %eax 131 roll $8, %eax
1348: 1328:
135 popl %ebx 133 popl_cfi %ebx
136 CFI_ADJUST_CFA_OFFSET -4
137 CFI_RESTORE ebx 134 CFI_RESTORE ebx
138 popl %esi 135 popl_cfi %esi
139 CFI_ADJUST_CFA_OFFSET -4
140 CFI_RESTORE esi 136 CFI_RESTORE esi
141 ret 137 ret
142 CFI_ENDPROC 138 CFI_ENDPROC
@@ -148,11 +144,9 @@ ENDPROC(csum_partial)
148 144
149ENTRY(csum_partial) 145ENTRY(csum_partial)
150 CFI_STARTPROC 146 CFI_STARTPROC
151 pushl %esi 147 pushl_cfi %esi
152 CFI_ADJUST_CFA_OFFSET 4
153 CFI_REL_OFFSET esi, 0 148 CFI_REL_OFFSET esi, 0
154 pushl %ebx 149 pushl_cfi %ebx
155 CFI_ADJUST_CFA_OFFSET 4
156 CFI_REL_OFFSET ebx, 0 150 CFI_REL_OFFSET ebx, 0
157 movl 20(%esp),%eax # Function arg: unsigned int sum 151 movl 20(%esp),%eax # Function arg: unsigned int sum
158 movl 16(%esp),%ecx # Function arg: int len 152 movl 16(%esp),%ecx # Function arg: int len
@@ -260,11 +254,9 @@ ENTRY(csum_partial)
260 jz 90f 254 jz 90f
261 roll $8, %eax 255 roll $8, %eax
26290: 25690:
263 popl %ebx 257 popl_cfi %ebx
264 CFI_ADJUST_CFA_OFFSET -4
265 CFI_RESTORE ebx 258 CFI_RESTORE ebx
266 popl %esi 259 popl_cfi %esi
267 CFI_ADJUST_CFA_OFFSET -4
268 CFI_RESTORE esi 260 CFI_RESTORE esi
269 ret 261 ret
270 CFI_ENDPROC 262 CFI_ENDPROC
@@ -309,14 +301,11 @@ ENTRY(csum_partial_copy_generic)
309 CFI_STARTPROC 301 CFI_STARTPROC
310 subl $4,%esp 302 subl $4,%esp
311 CFI_ADJUST_CFA_OFFSET 4 303 CFI_ADJUST_CFA_OFFSET 4
312 pushl %edi 304 pushl_cfi %edi
313 CFI_ADJUST_CFA_OFFSET 4
314 CFI_REL_OFFSET edi, 0 305 CFI_REL_OFFSET edi, 0
315 pushl %esi 306 pushl_cfi %esi
316 CFI_ADJUST_CFA_OFFSET 4
317 CFI_REL_OFFSET esi, 0 307 CFI_REL_OFFSET esi, 0
318 pushl %ebx 308 pushl_cfi %ebx
319 CFI_ADJUST_CFA_OFFSET 4
320 CFI_REL_OFFSET ebx, 0 309 CFI_REL_OFFSET ebx, 0
321 movl ARGBASE+16(%esp),%eax # sum 310 movl ARGBASE+16(%esp),%eax # sum
322 movl ARGBASE+12(%esp),%ecx # len 311 movl ARGBASE+12(%esp),%ecx # len
@@ -426,17 +415,13 @@ DST( movb %cl, (%edi) )
426 415
427.previous 416.previous
428 417
429 popl %ebx 418 popl_cfi %ebx
430 CFI_ADJUST_CFA_OFFSET -4
431 CFI_RESTORE ebx 419 CFI_RESTORE ebx
432 popl %esi 420 popl_cfi %esi
433 CFI_ADJUST_CFA_OFFSET -4
434 CFI_RESTORE esi 421 CFI_RESTORE esi
435 popl %edi 422 popl_cfi %edi
436 CFI_ADJUST_CFA_OFFSET -4
437 CFI_RESTORE edi 423 CFI_RESTORE edi
438 popl %ecx # equivalent to addl $4,%esp 424 popl_cfi %ecx # equivalent to addl $4,%esp
439 CFI_ADJUST_CFA_OFFSET -4
440 ret 425 ret
441 CFI_ENDPROC 426 CFI_ENDPROC
442ENDPROC(csum_partial_copy_generic) 427ENDPROC(csum_partial_copy_generic)
@@ -459,14 +444,11 @@ ENDPROC(csum_partial_copy_generic)
459 444
460ENTRY(csum_partial_copy_generic) 445ENTRY(csum_partial_copy_generic)
461 CFI_STARTPROC 446 CFI_STARTPROC
462 pushl %ebx 447 pushl_cfi %ebx
463 CFI_ADJUST_CFA_OFFSET 4
464 CFI_REL_OFFSET ebx, 0 448 CFI_REL_OFFSET ebx, 0
465 pushl %edi 449 pushl_cfi %edi
466 CFI_ADJUST_CFA_OFFSET 4
467 CFI_REL_OFFSET edi, 0 450 CFI_REL_OFFSET edi, 0
468 pushl %esi 451 pushl_cfi %esi
469 CFI_ADJUST_CFA_OFFSET 4
470 CFI_REL_OFFSET esi, 0 452 CFI_REL_OFFSET esi, 0
471 movl ARGBASE+4(%esp),%esi #src 453 movl ARGBASE+4(%esp),%esi #src
472 movl ARGBASE+8(%esp),%edi #dst 454 movl ARGBASE+8(%esp),%edi #dst
@@ -527,14 +509,11 @@ DST( movb %dl, (%edi) )
527 jmp 7b 509 jmp 7b
528.previous 510.previous
529 511
530 popl %esi 512 popl_cfi %esi
531 CFI_ADJUST_CFA_OFFSET -4
532 CFI_RESTORE esi 513 CFI_RESTORE esi
533 popl %edi 514 popl_cfi %edi
534 CFI_ADJUST_CFA_OFFSET -4
535 CFI_RESTORE edi 515 CFI_RESTORE edi
536 popl %ebx 516 popl_cfi %ebx
537 CFI_ADJUST_CFA_OFFSET -4
538 CFI_RESTORE ebx 517 CFI_RESTORE ebx
539 ret 518 ret
540 CFI_ENDPROC 519 CFI_ENDPROC
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
new file mode 100644
index 000000000000..3e8b08a6de2b
--- /dev/null
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -0,0 +1,59 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; version 2
5 * of the License.
6 *
7 */
8#include <linux/linkage.h>
9#include <asm/alternative-asm.h>
10#include <asm/frame.h>
11#include <asm/dwarf2.h>
12
13.text
14
15/*
16 * Inputs:
17 * %rsi : memory location to compare
18 * %rax : low 64 bits of old value
19 * %rdx : high 64 bits of old value
20 * %rbx : low 64 bits of new value
21 * %rcx : high 64 bits of new value
22 * %al : Operation successful
23 */
24ENTRY(this_cpu_cmpxchg16b_emu)
25CFI_STARTPROC
26
27#
28# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
29# via the ZF. Caller will access %al to get result.
30#
31# Note that this is only useful for a cpuops operation. Meaning that we
32# do *not* have a fully atomic operation but just an operation that is
33# *atomic* on a single cpu (as provided by the this_cpu_xx class of
34# macros).
35#
36this_cpu_cmpxchg16b_emu:
37 pushf
38 cli
39
40 cmpq %gs:(%rsi), %rax
41 jne not_same
42 cmpq %gs:8(%rsi), %rdx
43 jne not_same
44
45 movq %rbx, %gs:(%rsi)
46 movq %rcx, %gs:8(%rsi)
47
48 popf
49 mov $1, %al
50 ret
51
52 not_same:
53 popf
54 xor %al,%al
55 ret
56
57CFI_ENDPROC
58
59ENDPROC(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
new file mode 100644
index 000000000000..0ecb8433e5a8
--- /dev/null
+++ b/arch/x86/lib/memmove_64.S
@@ -0,0 +1,197 @@
1/*
2 * Normally compiler builtins are used, but sometimes the compiler calls out
3 * of line code. Based on asm-i386/string.h.
4 *
5 * This assembly file is re-written from memmove_64.c file.
6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
7 */
8#define _STRING_C
9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
11
12#undef memmove
13
14/*
15 * Implement memmove(). This can handle overlap between src and dst.
16 *
17 * Input:
18 * rdi: dest
19 * rsi: src
20 * rdx: count
21 *
22 * Output:
23 * rax: dest
24 */
25ENTRY(memmove)
26 CFI_STARTPROC
27 /* Handle more 32bytes in loop */
28 mov %rdi, %rax
29 cmp $0x20, %rdx
30 jb 1f
31
32 /* Decide forward/backward copy mode */
33 cmp %rdi, %rsi
34 jb 2f
35
36 /*
37 * movsq instruction have many startup latency
38 * so we handle small size by general register.
39 */
40 cmp $680, %rdx
41 jb 3f
42 /*
43 * movsq instruction is only good for aligned case.
44 */
45
46 cmpb %dil, %sil
47 je 4f
483:
49 sub $0x20, %rdx
50 /*
51 * We gobble 32byts forward in each loop.
52 */
535:
54 sub $0x20, %rdx
55 movq 0*8(%rsi), %r11
56 movq 1*8(%rsi), %r10
57 movq 2*8(%rsi), %r9
58 movq 3*8(%rsi), %r8
59 leaq 4*8(%rsi), %rsi
60
61 movq %r11, 0*8(%rdi)
62 movq %r10, 1*8(%rdi)
63 movq %r9, 2*8(%rdi)
64 movq %r8, 3*8(%rdi)
65 leaq 4*8(%rdi), %rdi
66 jae 5b
67 addq $0x20, %rdx
68 jmp 1f
69 /*
70 * Handle data forward by movsq.
71 */
72 .p2align 4
734:
74 movq %rdx, %rcx
75 movq -8(%rsi, %rdx), %r11
76 lea -8(%rdi, %rdx), %r10
77 shrq $3, %rcx
78 rep movsq
79 movq %r11, (%r10)
80 jmp 13f
81 /*
82 * Handle data backward by movsq.
83 */
84 .p2align 4
857:
86 movq %rdx, %rcx
87 movq (%rsi), %r11
88 movq %rdi, %r10
89 leaq -8(%rsi, %rdx), %rsi
90 leaq -8(%rdi, %rdx), %rdi
91 shrq $3, %rcx
92 std
93 rep movsq
94 cld
95 movq %r11, (%r10)
96 jmp 13f
97
98 /*
99 * Start to prepare for backward copy.
100 */
101 .p2align 4
1022:
103 cmp $680, %rdx
104 jb 6f
105 cmp %dil, %sil
106 je 7b
1076:
108 /*
109 * Calculate copy position to tail.
110 */
111 addq %rdx, %rsi
112 addq %rdx, %rdi
113 subq $0x20, %rdx
114 /*
115 * We gobble 32byts backward in each loop.
116 */
1178:
118 subq $0x20, %rdx
119 movq -1*8(%rsi), %r11
120 movq -2*8(%rsi), %r10
121 movq -3*8(%rsi), %r9
122 movq -4*8(%rsi), %r8
123 leaq -4*8(%rsi), %rsi
124
125 movq %r11, -1*8(%rdi)
126 movq %r10, -2*8(%rdi)
127 movq %r9, -3*8(%rdi)
128 movq %r8, -4*8(%rdi)
129 leaq -4*8(%rdi), %rdi
130 jae 8b
131 /*
132 * Calculate copy position to head.
133 */
134 addq $0x20, %rdx
135 subq %rdx, %rsi
136 subq %rdx, %rdi
1371:
138 cmpq $16, %rdx
139 jb 9f
140 /*
141 * Move data from 16 bytes to 31 bytes.
142 */
143 movq 0*8(%rsi), %r11
144 movq 1*8(%rsi), %r10
145 movq -2*8(%rsi, %rdx), %r9
146 movq -1*8(%rsi, %rdx), %r8
147 movq %r11, 0*8(%rdi)
148 movq %r10, 1*8(%rdi)
149 movq %r9, -2*8(%rdi, %rdx)
150 movq %r8, -1*8(%rdi, %rdx)
151 jmp 13f
152 .p2align 4
1539:
154 cmpq $8, %rdx
155 jb 10f
156 /*
157 * Move data from 8 bytes to 15 bytes.
158 */
159 movq 0*8(%rsi), %r11
160 movq -1*8(%rsi, %rdx), %r10
161 movq %r11, 0*8(%rdi)
162 movq %r10, -1*8(%rdi, %rdx)
163 jmp 13f
16410:
165 cmpq $4, %rdx
166 jb 11f
167 /*
168 * Move data from 4 bytes to 7 bytes.
169 */
170 movl (%rsi), %r11d
171 movl -4(%rsi, %rdx), %r10d
172 movl %r11d, (%rdi)
173 movl %r10d, -4(%rdi, %rdx)
174 jmp 13f
17511:
176 cmp $2, %rdx
177 jb 12f
178 /*
179 * Move data from 2 bytes to 3 bytes.
180 */
181 movw (%rsi), %r11w
182 movw -2(%rsi, %rdx), %r10w
183 movw %r11w, (%rdi)
184 movw %r10w, -2(%rdi, %rdx)
185 jmp 13f
18612:
187 cmp $1, %rdx
188 jb 13f
189 /*
190 * Move data for 1 byte.
191 */
192 movb (%rsi), %r11b
193 movb %r11b, (%rdi)
19413:
195 retq
196 CFI_ENDPROC
197ENDPROC(memmove)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
deleted file mode 100644
index 6d0f0ec41b34..000000000000
--- a/arch/x86/lib/memmove_64.c
+++ /dev/null
@@ -1,192 +0,0 @@
1/* Normally compiler builtins are used, but sometimes the compiler calls out
2 of line code. Based on asm-i386/string.h.
3 */
4#define _STRING_C
5#include <linux/string.h>
6#include <linux/module.h>
7
8#undef memmove
9void *memmove(void *dest, const void *src, size_t count)
10{
11 unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
12 char *ret;
13
14 __asm__ __volatile__(
15 /* Handle more 32bytes in loop */
16 "mov %2, %3\n\t"
17 "cmp $0x20, %0\n\t"
18 "jb 1f\n\t"
19
20 /* Decide forward/backward copy mode */
21 "cmp %2, %1\n\t"
22 "jb 2f\n\t"
23
24 /*
25 * movsq instruction have many startup latency
26 * so we handle small size by general register.
27 */
28 "cmp $680, %0\n\t"
29 "jb 3f\n\t"
30 /*
31 * movsq instruction is only good for aligned case.
32 */
33 "cmpb %%dil, %%sil\n\t"
34 "je 4f\n\t"
35 "3:\n\t"
36 "sub $0x20, %0\n\t"
37 /*
38 * We gobble 32byts forward in each loop.
39 */
40 "5:\n\t"
41 "sub $0x20, %0\n\t"
42 "movq 0*8(%1), %4\n\t"
43 "movq 1*8(%1), %5\n\t"
44 "movq 2*8(%1), %6\n\t"
45 "movq 3*8(%1), %7\n\t"
46 "leaq 4*8(%1), %1\n\t"
47
48 "movq %4, 0*8(%2)\n\t"
49 "movq %5, 1*8(%2)\n\t"
50 "movq %6, 2*8(%2)\n\t"
51 "movq %7, 3*8(%2)\n\t"
52 "leaq 4*8(%2), %2\n\t"
53 "jae 5b\n\t"
54 "addq $0x20, %0\n\t"
55 "jmp 1f\n\t"
56 /*
57 * Handle data forward by movsq.
58 */
59 ".p2align 4\n\t"
60 "4:\n\t"
61 "movq %0, %8\n\t"
62 "movq -8(%1, %0), %4\n\t"
63 "lea -8(%2, %0), %5\n\t"
64 "shrq $3, %8\n\t"
65 "rep movsq\n\t"
66 "movq %4, (%5)\n\t"
67 "jmp 13f\n\t"
68 /*
69 * Handle data backward by movsq.
70 */
71 ".p2align 4\n\t"
72 "7:\n\t"
73 "movq %0, %8\n\t"
74 "movq (%1), %4\n\t"
75 "movq %2, %5\n\t"
76 "leaq -8(%1, %0), %1\n\t"
77 "leaq -8(%2, %0), %2\n\t"
78 "shrq $3, %8\n\t"
79 "std\n\t"
80 "rep movsq\n\t"
81 "cld\n\t"
82 "movq %4, (%5)\n\t"
83 "jmp 13f\n\t"
84
85 /*
86 * Start to prepare for backward copy.
87 */
88 ".p2align 4\n\t"
89 "2:\n\t"
90 "cmp $680, %0\n\t"
91 "jb 6f \n\t"
92 "cmp %%dil, %%sil\n\t"
93 "je 7b \n\t"
94 "6:\n\t"
95 /*
96 * Calculate copy position to tail.
97 */
98 "addq %0, %1\n\t"
99 "addq %0, %2\n\t"
100 "subq $0x20, %0\n\t"
101 /*
102 * We gobble 32byts backward in each loop.
103 */
104 "8:\n\t"
105 "subq $0x20, %0\n\t"
106 "movq -1*8(%1), %4\n\t"
107 "movq -2*8(%1), %5\n\t"
108 "movq -3*8(%1), %6\n\t"
109 "movq -4*8(%1), %7\n\t"
110 "leaq -4*8(%1), %1\n\t"
111
112 "movq %4, -1*8(%2)\n\t"
113 "movq %5, -2*8(%2)\n\t"
114 "movq %6, -3*8(%2)\n\t"
115 "movq %7, -4*8(%2)\n\t"
116 "leaq -4*8(%2), %2\n\t"
117 "jae 8b\n\t"
118 /*
119 * Calculate copy position to head.
120 */
121 "addq $0x20, %0\n\t"
122 "subq %0, %1\n\t"
123 "subq %0, %2\n\t"
124 "1:\n\t"
125 "cmpq $16, %0\n\t"
126 "jb 9f\n\t"
127 /*
128 * Move data from 16 bytes to 31 bytes.
129 */
130 "movq 0*8(%1), %4\n\t"
131 "movq 1*8(%1), %5\n\t"
132 "movq -2*8(%1, %0), %6\n\t"
133 "movq -1*8(%1, %0), %7\n\t"
134 "movq %4, 0*8(%2)\n\t"
135 "movq %5, 1*8(%2)\n\t"
136 "movq %6, -2*8(%2, %0)\n\t"
137 "movq %7, -1*8(%2, %0)\n\t"
138 "jmp 13f\n\t"
139 ".p2align 4\n\t"
140 "9:\n\t"
141 "cmpq $8, %0\n\t"
142 "jb 10f\n\t"
143 /*
144 * Move data from 8 bytes to 15 bytes.
145 */
146 "movq 0*8(%1), %4\n\t"
147 "movq -1*8(%1, %0), %5\n\t"
148 "movq %4, 0*8(%2)\n\t"
149 "movq %5, -1*8(%2, %0)\n\t"
150 "jmp 13f\n\t"
151 "10:\n\t"
152 "cmpq $4, %0\n\t"
153 "jb 11f\n\t"
154 /*
155 * Move data from 4 bytes to 7 bytes.
156 */
157 "movl (%1), %4d\n\t"
158 "movl -4(%1, %0), %5d\n\t"
159 "movl %4d, (%2)\n\t"
160 "movl %5d, -4(%2, %0)\n\t"
161 "jmp 13f\n\t"
162 "11:\n\t"
163 "cmp $2, %0\n\t"
164 "jb 12f\n\t"
165 /*
166 * Move data from 2 bytes to 3 bytes.
167 */
168 "movw (%1), %4w\n\t"
169 "movw -2(%1, %0), %5w\n\t"
170 "movw %4w, (%2)\n\t"
171 "movw %5w, -2(%2, %0)\n\t"
172 "jmp 13f\n\t"
173 "12:\n\t"
174 "cmp $1, %0\n\t"
175 "jb 13f\n\t"
176 /*
177 * Move data for 1 byte.
178 */
179 "movb (%1), %4b\n\t"
180 "movb %4b, (%2)\n\t"
181 "13:\n\t"
182 : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
183 "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
184 :"0" (count),
185 "1" (src),
186 "2" (dest)
187 :"memory");
188
189 return ret;
190
191}
192EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
index 41fcf00e49df..67743977398b 100644
--- a/arch/x86/lib/rwsem_64.S
+++ b/arch/x86/lib/rwsem_64.S
@@ -23,43 +23,50 @@
23#include <asm/dwarf2.h> 23#include <asm/dwarf2.h>
24 24
25#define save_common_regs \ 25#define save_common_regs \
26 pushq %rdi; \ 26 pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
27 pushq %rsi; \ 27 pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
28 pushq %rcx; \ 28 pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
29 pushq %r8; \ 29 pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
30 pushq %r9; \ 30 pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
31 pushq %r10; \ 31 pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
32 pushq %r11 32 pushq_cfi %r11; CFI_REL_OFFSET r11, 0
33 33
34#define restore_common_regs \ 34#define restore_common_regs \
35 popq %r11; \ 35 popq_cfi %r11; CFI_RESTORE r11; \
36 popq %r10; \ 36 popq_cfi %r10; CFI_RESTORE r10; \
37 popq %r9; \ 37 popq_cfi %r9; CFI_RESTORE r9; \
38 popq %r8; \ 38 popq_cfi %r8; CFI_RESTORE r8; \
39 popq %rcx; \ 39 popq_cfi %rcx; CFI_RESTORE rcx; \
40 popq %rsi; \ 40 popq_cfi %rsi; CFI_RESTORE rsi; \
41 popq %rdi 41 popq_cfi %rdi; CFI_RESTORE rdi
42 42
43/* Fix up special calling conventions */ 43/* Fix up special calling conventions */
44ENTRY(call_rwsem_down_read_failed) 44ENTRY(call_rwsem_down_read_failed)
45 CFI_STARTPROC
45 save_common_regs 46 save_common_regs
46 pushq %rdx 47 pushq_cfi %rdx
48 CFI_REL_OFFSET rdx, 0
47 movq %rax,%rdi 49 movq %rax,%rdi
48 call rwsem_down_read_failed 50 call rwsem_down_read_failed
49 popq %rdx 51 popq_cfi %rdx
52 CFI_RESTORE rdx
50 restore_common_regs 53 restore_common_regs
51 ret 54 ret
52 ENDPROC(call_rwsem_down_read_failed) 55 CFI_ENDPROC
56ENDPROC(call_rwsem_down_read_failed)
53 57
54ENTRY(call_rwsem_down_write_failed) 58ENTRY(call_rwsem_down_write_failed)
59 CFI_STARTPROC
55 save_common_regs 60 save_common_regs
56 movq %rax,%rdi 61 movq %rax,%rdi
57 call rwsem_down_write_failed 62 call rwsem_down_write_failed
58 restore_common_regs 63 restore_common_regs
59 ret 64 ret
60 ENDPROC(call_rwsem_down_write_failed) 65 CFI_ENDPROC
66ENDPROC(call_rwsem_down_write_failed)
61 67
62ENTRY(call_rwsem_wake) 68ENTRY(call_rwsem_wake)
69 CFI_STARTPROC
63 decl %edx /* do nothing if still outstanding active readers */ 70 decl %edx /* do nothing if still outstanding active readers */
64 jnz 1f 71 jnz 1f
65 save_common_regs 72 save_common_regs
@@ -67,15 +74,20 @@ ENTRY(call_rwsem_wake)
67 call rwsem_wake 74 call rwsem_wake
68 restore_common_regs 75 restore_common_regs
691: ret 761: ret
70 ENDPROC(call_rwsem_wake) 77 CFI_ENDPROC
78ENDPROC(call_rwsem_wake)
71 79
72/* Fix up special calling conventions */ 80/* Fix up special calling conventions */
73ENTRY(call_rwsem_downgrade_wake) 81ENTRY(call_rwsem_downgrade_wake)
82 CFI_STARTPROC
74 save_common_regs 83 save_common_regs
75 pushq %rdx 84 pushq_cfi %rdx
85 CFI_REL_OFFSET rdx, 0
76 movq %rax,%rdi 86 movq %rax,%rdi
77 call rwsem_downgrade_wake 87 call rwsem_downgrade_wake
78 popq %rdx 88 popq_cfi %rdx
89 CFI_RESTORE rdx
79 restore_common_regs 90 restore_common_regs
80 ret 91 ret
81 ENDPROC(call_rwsem_downgrade_wake) 92 CFI_ENDPROC
93ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 648fe4741782..06691daa4108 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -36,7 +36,7 @@
36 */ 36 */
37#ifdef CONFIG_SMP 37#ifdef CONFIG_SMP
38ENTRY(__write_lock_failed) 38ENTRY(__write_lock_failed)
39 CFI_STARTPROC simple 39 CFI_STARTPROC
40 FRAME 40 FRAME
412: LOCK_PREFIX 412: LOCK_PREFIX
42 addl $ RW_LOCK_BIAS,(%eax) 42 addl $ RW_LOCK_BIAS,(%eax)
@@ -74,29 +74,23 @@ ENTRY(__read_lock_failed)
74/* Fix up special calling conventions */ 74/* Fix up special calling conventions */
75ENTRY(call_rwsem_down_read_failed) 75ENTRY(call_rwsem_down_read_failed)
76 CFI_STARTPROC 76 CFI_STARTPROC
77 push %ecx 77 pushl_cfi %ecx
78 CFI_ADJUST_CFA_OFFSET 4
79 CFI_REL_OFFSET ecx,0 78 CFI_REL_OFFSET ecx,0
80 push %edx 79 pushl_cfi %edx
81 CFI_ADJUST_CFA_OFFSET 4
82 CFI_REL_OFFSET edx,0 80 CFI_REL_OFFSET edx,0
83 call rwsem_down_read_failed 81 call rwsem_down_read_failed
84 pop %edx 82 popl_cfi %edx
85 CFI_ADJUST_CFA_OFFSET -4 83 popl_cfi %ecx
86 pop %ecx
87 CFI_ADJUST_CFA_OFFSET -4
88 ret 84 ret
89 CFI_ENDPROC 85 CFI_ENDPROC
90 ENDPROC(call_rwsem_down_read_failed) 86 ENDPROC(call_rwsem_down_read_failed)
91 87
92ENTRY(call_rwsem_down_write_failed) 88ENTRY(call_rwsem_down_write_failed)
93 CFI_STARTPROC 89 CFI_STARTPROC
94 push %ecx 90 pushl_cfi %ecx
95 CFI_ADJUST_CFA_OFFSET 4
96 CFI_REL_OFFSET ecx,0 91 CFI_REL_OFFSET ecx,0
97 calll rwsem_down_write_failed 92 calll rwsem_down_write_failed
98 pop %ecx 93 popl_cfi %ecx
99 CFI_ADJUST_CFA_OFFSET -4
100 ret 94 ret
101 CFI_ENDPROC 95 CFI_ENDPROC
102 ENDPROC(call_rwsem_down_write_failed) 96 ENDPROC(call_rwsem_down_write_failed)
@@ -105,12 +99,10 @@ ENTRY(call_rwsem_wake)
105 CFI_STARTPROC 99 CFI_STARTPROC
106 decw %dx /* do nothing if still outstanding active readers */ 100 decw %dx /* do nothing if still outstanding active readers */
107 jnz 1f 101 jnz 1f
108 push %ecx 102 pushl_cfi %ecx
109 CFI_ADJUST_CFA_OFFSET 4
110 CFI_REL_OFFSET ecx,0 103 CFI_REL_OFFSET ecx,0
111 call rwsem_wake 104 call rwsem_wake
112 pop %ecx 105 popl_cfi %ecx
113 CFI_ADJUST_CFA_OFFSET -4
1141: ret 1061: ret
115 CFI_ENDPROC 107 CFI_ENDPROC
116 ENDPROC(call_rwsem_wake) 108 ENDPROC(call_rwsem_wake)
@@ -118,17 +110,13 @@ ENTRY(call_rwsem_wake)
118/* Fix up special calling conventions */ 110/* Fix up special calling conventions */
119ENTRY(call_rwsem_downgrade_wake) 111ENTRY(call_rwsem_downgrade_wake)
120 CFI_STARTPROC 112 CFI_STARTPROC
121 push %ecx 113 pushl_cfi %ecx
122 CFI_ADJUST_CFA_OFFSET 4
123 CFI_REL_OFFSET ecx,0 114 CFI_REL_OFFSET ecx,0
124 push %edx 115 pushl_cfi %edx
125 CFI_ADJUST_CFA_OFFSET 4
126 CFI_REL_OFFSET edx,0 116 CFI_REL_OFFSET edx,0
127 call rwsem_downgrade_wake 117 call rwsem_downgrade_wake
128 pop %edx 118 popl_cfi %edx
129 CFI_ADJUST_CFA_OFFSET -4 119 popl_cfi %ecx
130 pop %ecx
131 CFI_ADJUST_CFA_OFFSET -4
132 ret 120 ret
133 CFI_ENDPROC 121 CFI_ENDPROC
134 ENDPROC(call_rwsem_downgrade_wake) 122 ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 650b11e00ecc..2930ae05d773 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -7,24 +7,6 @@
7 7
8 #include <linux/linkage.h> 8 #include <linux/linkage.h>
9 9
10#define ARCH_TRACE_IRQS_ON \
11 pushl %eax; \
12 pushl %ecx; \
13 pushl %edx; \
14 call trace_hardirqs_on; \
15 popl %edx; \
16 popl %ecx; \
17 popl %eax;
18
19#define ARCH_TRACE_IRQS_OFF \
20 pushl %eax; \
21 pushl %ecx; \
22 pushl %edx; \
23 call trace_hardirqs_off; \
24 popl %edx; \
25 popl %ecx; \
26 popl %eax;
27
28#ifdef CONFIG_TRACE_IRQFLAGS 10#ifdef CONFIG_TRACE_IRQFLAGS
29 /* put return address in eax (arg1) */ 11 /* put return address in eax (arg1) */
30 .macro thunk_ra name,func 12 .macro thunk_ra name,func
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index bf9a7d5a5428..782b082c9ff7 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -22,26 +22,6 @@
22 CFI_ENDPROC 22 CFI_ENDPROC
23 .endm 23 .endm
24 24
25 /* rdi: arg1 ... normal C conventions. rax is passed from C. */
26 .macro thunk_retrax name,func
27 .globl \name
28\name:
29 CFI_STARTPROC
30 SAVE_ARGS
31 call \func
32 jmp restore_norax
33 CFI_ENDPROC
34 .endm
35
36
37 .section .sched.text, "ax"
38#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
39 thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
40 thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
41 thunk rwsem_wake_thunk,rwsem_wake
42 thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
43#endif
44
45#ifdef CONFIG_TRACE_IRQFLAGS 25#ifdef CONFIG_TRACE_IRQFLAGS
46 /* put return address in rdi (arg1) */ 26 /* put return address in rdi (arg1) */
47 .macro thunk_ra name,func 27 .macro thunk_ra name,func
@@ -72,10 +52,3 @@ restore:
72 RESTORE_ARGS 52 RESTORE_ARGS
73 ret 53 ret
74 CFI_ENDPROC 54 CFI_ENDPROC
75
76 CFI_STARTPROC
77 SAVE_ARGS
78restore_norax:
79 RESTORE_ARGS 1
80 ret
81 CFI_ENDPROC
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 09df2f9a3d69..3e608edf9958 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o 25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
26obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o 26obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o
27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
28 29
29obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 30obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
30 31
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c435ed..0919c26820d4 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -26,9 +26,7 @@
26#include <asm/apic.h> 26#include <asm/apic.h>
27#include <asm/amd_nb.h> 27#include <asm/amd_nb.h>
28 28
29static struct bootnode __initdata nodes[8];
30static unsigned char __initdata nodeids[8]; 29static unsigned char __initdata nodeids[8];
31static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
32 30
33static __init int find_northbridge(void) 31static __init int find_northbridge(void)
34{ 32{
@@ -51,7 +49,7 @@ static __init int find_northbridge(void)
51 return num; 49 return num;
52 } 50 }
53 51
54 return -1; 52 return -ENOENT;
55} 53}
56 54
57static __init void early_get_boot_cpu_id(void) 55static __init void early_get_boot_cpu_id(void)
@@ -69,17 +67,18 @@ static __init void early_get_boot_cpu_id(void)
69#endif 67#endif
70} 68}
71 69
72int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) 70int __init amd_numa_init(void)
73{ 71{
74 unsigned long start = PFN_PHYS(start_pfn); 72 unsigned long start = PFN_PHYS(0);
75 unsigned long end = PFN_PHYS(end_pfn); 73 unsigned long end = PFN_PHYS(max_pfn);
76 unsigned numnodes; 74 unsigned numnodes;
77 unsigned long prevbase; 75 unsigned long prevbase;
78 int i, nb, found = 0; 76 int i, j, nb;
79 u32 nodeid, reg; 77 u32 nodeid, reg;
78 unsigned int bits, cores, apicid_base;
80 79
81 if (!early_pci_allowed()) 80 if (!early_pci_allowed())
82 return -1; 81 return -EINVAL;
83 82
84 nb = find_northbridge(); 83 nb = find_northbridge();
85 if (nb < 0) 84 if (nb < 0)
@@ -90,7 +89,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
90 reg = read_pci_config(0, nb, 0, 0x60); 89 reg = read_pci_config(0, nb, 0, 0x60);
91 numnodes = ((reg >> 4) & 0xF) + 1; 90 numnodes = ((reg >> 4) & 0xF) + 1;
92 if (numnodes <= 1) 91 if (numnodes <= 1)
93 return -1; 92 return -ENOENT;
94 93
95 pr_info("Number of physical nodes %d\n", numnodes); 94 pr_info("Number of physical nodes %d\n", numnodes);
96 95
@@ -121,9 +120,9 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
121 if ((base >> 8) & 3 || (limit >> 8) & 3) { 120 if ((base >> 8) & 3 || (limit >> 8) & 3) {
122 pr_err("Node %d using interleaving mode %lx/%lx\n", 121 pr_err("Node %d using interleaving mode %lx/%lx\n",
123 nodeid, (base >> 8) & 3, (limit >> 8) & 3); 122 nodeid, (base >> 8) & 3, (limit >> 8) & 3);
124 return -1; 123 return -EINVAL;
125 } 124 }
126 if (node_isset(nodeid, nodes_parsed)) { 125 if (node_isset(nodeid, numa_nodes_parsed)) {
127 pr_info("Node %d already present, skipping\n", 126 pr_info("Node %d already present, skipping\n",
128 nodeid); 127 nodeid);
129 continue; 128 continue;
@@ -160,117 +159,28 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
160 if (prevbase > base) { 159 if (prevbase > base) {
161 pr_err("Node map not sorted %lx,%lx\n", 160 pr_err("Node map not sorted %lx,%lx\n",
162 prevbase, base); 161 prevbase, base);
163 return -1; 162 return -EINVAL;
164 } 163 }
165 164
166 pr_info("Node %d MemBase %016lx Limit %016lx\n", 165 pr_info("Node %d MemBase %016lx Limit %016lx\n",
167 nodeid, base, limit); 166 nodeid, base, limit);
168 167
169 found++;
170
171 nodes[nodeid].start = base;
172 nodes[nodeid].end = limit;
173
174 prevbase = base; 168 prevbase = base;
175 169 numa_add_memblk(nodeid, base, limit);
176 node_set(nodeid, nodes_parsed); 170 node_set(nodeid, numa_nodes_parsed);
177 }
178
179 if (!found)
180 return -1;
181 return 0;
182}
183
184#ifdef CONFIG_NUMA_EMU
185static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
186 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
187};
188
189void __init amd_get_nodes(struct bootnode *physnodes)
190{
191 int i;
192
193 for_each_node_mask(i, nodes_parsed) {
194 physnodes[i].start = nodes[i].start;
195 physnodes[i].end = nodes[i].end;
196 } 171 }
197}
198
199static int __init find_node_by_addr(unsigned long addr)
200{
201 int ret = NUMA_NO_NODE;
202 int i;
203
204 for (i = 0; i < 8; i++)
205 if (addr >= nodes[i].start && addr < nodes[i].end) {
206 ret = i;
207 break;
208 }
209 return ret;
210}
211 172
212/* 173 if (!nodes_weight(numa_nodes_parsed))
213 * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be 174 return -ENOENT;
214 * setup to represent the physical topology but reflect the emulated
215 * environment. For each emulated node, the real node which it appears on is
216 * found and a fake pxm to nid mapping is created which mirrors the actual
217 * locality. node_distance() then represents the correct distances between
218 * emulated nodes by using the fake acpi mappings to pxms.
219 */
220void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
221{
222 unsigned int bits;
223 unsigned int cores;
224 unsigned int apicid_base = 0;
225 int i;
226 175
176 /*
177 * We seem to have valid NUMA configuration. Map apicids to nodes
178 * using the coreid bits from early_identify_cpu.
179 */
227 bits = boot_cpu_data.x86_coreid_bits; 180 bits = boot_cpu_data.x86_coreid_bits;
228 cores = 1 << bits; 181 cores = 1 << bits;
229 early_get_boot_cpu_id();
230 if (boot_cpu_physical_apicid > 0)
231 apicid_base = boot_cpu_physical_apicid;
232
233 for (i = 0; i < nr_nodes; i++) {
234 int index;
235 int nid;
236 int j;
237
238 nid = find_node_by_addr(nodes[i].start);
239 if (nid == NUMA_NO_NODE)
240 continue;
241
242 index = nodeids[nid] << bits;
243 if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
244 for (j = apicid_base; j < cores + apicid_base; j++)
245 fake_apicid_to_node[index + j] = i;
246#ifdef CONFIG_ACPI_NUMA
247 __acpi_map_pxm_to_node(nid, i);
248#endif
249 }
250 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
251}
252#endif /* CONFIG_NUMA_EMU */
253
254int __init amd_scan_nodes(void)
255{
256 unsigned int bits;
257 unsigned int cores;
258 unsigned int apicid_base;
259 int i;
260
261 BUG_ON(nodes_empty(nodes_parsed));
262 node_possible_map = nodes_parsed;
263 memnode_shift = compute_hash_shift(nodes, 8, NULL);
264 if (memnode_shift < 0) {
265 pr_err("No NUMA node hash function found. Contact maintainer\n");
266 return -1;
267 }
268 pr_info("Using node hash shift of %d\n", memnode_shift);
269
270 /* use the coreid bits from early_identify_cpu */
271 bits = boot_cpu_data.x86_coreid_bits;
272 cores = (1<<bits);
273 apicid_base = 0; 182 apicid_base = 0;
183
274 /* get the APIC ID of the BSP early for systems with apicid lifting */ 184 /* get the APIC ID of the BSP early for systems with apicid lifting */
275 early_get_boot_cpu_id(); 185 early_get_boot_cpu_id();
276 if (boot_cpu_physical_apicid > 0) { 186 if (boot_cpu_physical_apicid > 0) {
@@ -278,17 +188,9 @@ int __init amd_scan_nodes(void)
278 apicid_base = boot_cpu_physical_apicid; 188 apicid_base = boot_cpu_physical_apicid;
279 } 189 }
280 190
281 for_each_node_mask(i, node_possible_map) { 191 for_each_node_mask(i, numa_nodes_parsed)
282 int j;
283
284 memblock_x86_register_active_regions(i,
285 nodes[i].start >> PAGE_SHIFT,
286 nodes[i].end >> PAGE_SHIFT);
287 for (j = apicid_base; j < cores + apicid_base; j++) 192 for (j = apicid_base; j < cores + apicid_base; j++)
288 apicid_to_node[(i << bits) + j] = i; 193 set_apicid_to_node((i << bits) + j, i);
289 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
290 }
291 194
292 numa_init_array();
293 return 0; 195 return 0;
294} 196}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d90ceb882a4..20e3f8702d1e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,15 +229,14 @@ void vmalloc_sync_all(void)
229 for (address = VMALLOC_START & PMD_MASK; 229 for (address = VMALLOC_START & PMD_MASK;
230 address >= TASK_SIZE && address < FIXADDR_TOP; 230 address >= TASK_SIZE && address < FIXADDR_TOP;
231 address += PMD_SIZE) { 231 address += PMD_SIZE) {
232
233 unsigned long flags;
234 struct page *page; 232 struct page *page;
235 233
236 spin_lock_irqsave(&pgd_lock, flags); 234 spin_lock(&pgd_lock);
237 list_for_each_entry(page, &pgd_list, lru) { 235 list_for_each_entry(page, &pgd_list, lru) {
238 spinlock_t *pgt_lock; 236 spinlock_t *pgt_lock;
239 pmd_t *ret; 237 pmd_t *ret;
240 238
239 /* the pgt_lock only for Xen */
241 pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 240 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
242 241
243 spin_lock(pgt_lock); 242 spin_lock(pgt_lock);
@@ -247,7 +246,7 @@ void vmalloc_sync_all(void)
247 if (!ret) 246 if (!ret)
248 break; 247 break;
249 } 248 }
250 spin_unlock_irqrestore(&pgd_lock, flags); 249 spin_unlock(&pgd_lock);
251 } 250 }
252} 251}
253 252
@@ -828,6 +827,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
828 unsigned long address, unsigned int fault) 827 unsigned long address, unsigned int fault)
829{ 828{
830 if (fault & VM_FAULT_OOM) { 829 if (fault & VM_FAULT_OOM) {
830 /* Kernel mode? Handle exceptions or die: */
831 if (!(error_code & PF_USER)) {
832 up_read(&current->mm->mmap_sem);
833 no_context(regs, error_code, address);
834 return;
835 }
836
831 out_of_memory(regs, error_code, address); 837 out_of_memory(regs, error_code, address);
832 } else { 838 } else {
833 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| 839 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 947f42abe820..286d289b039b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,9 +18,9 @@
18 18
19DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 19DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
20 20
21unsigned long __initdata e820_table_start; 21unsigned long __initdata pgt_buf_start;
22unsigned long __meminitdata e820_table_end; 22unsigned long __meminitdata pgt_buf_end;
23unsigned long __meminitdata e820_table_top; 23unsigned long __meminitdata pgt_buf_top;
24 24
25int after_bootmem; 25int after_bootmem;
26 26
@@ -33,7 +33,7 @@ int direct_gbpages
33static void __init find_early_table_space(unsigned long end, int use_pse, 33static void __init find_early_table_space(unsigned long end, int use_pse,
34 int use_gbpages) 34 int use_gbpages)
35{ 35{
36 unsigned long puds, pmds, ptes, tables, start; 36 unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
37 phys_addr_t base; 37 phys_addr_t base;
38 38
39 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 39 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
@@ -65,29 +65,20 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
65#ifdef CONFIG_X86_32 65#ifdef CONFIG_X86_32
66 /* for fixmap */ 66 /* for fixmap */
67 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); 67 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
68#endif
69 68
70 /* 69 good_end = max_pfn_mapped << PAGE_SHIFT;
71 * RED-PEN putting page tables only on node 0 could
72 * cause a hotspot and fill up ZONE_DMA. The page tables
73 * need roughly 0.5KB per GB.
74 */
75#ifdef CONFIG_X86_32
76 start = 0x7000;
77#else
78 start = 0x8000;
79#endif 70#endif
80 base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT, 71
81 tables, PAGE_SIZE); 72 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
82 if (base == MEMBLOCK_ERROR) 73 if (base == MEMBLOCK_ERROR)
83 panic("Cannot find space for the kernel page tables"); 74 panic("Cannot find space for the kernel page tables");
84 75
85 e820_table_start = base >> PAGE_SHIFT; 76 pgt_buf_start = base >> PAGE_SHIFT;
86 e820_table_end = e820_table_start; 77 pgt_buf_end = pgt_buf_start;
87 e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); 78 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
88 79
89 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", 80 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
90 end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); 81 end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
91} 82}
92 83
93struct map_range { 84struct map_range {
@@ -279,30 +270,11 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
279 load_cr3(swapper_pg_dir); 270 load_cr3(swapper_pg_dir);
280#endif 271#endif
281 272
282#ifdef CONFIG_X86_64
283 if (!after_bootmem && !start) {
284 pud_t *pud;
285 pmd_t *pmd;
286
287 mmu_cr4_features = read_cr4();
288
289 /*
290 * _brk_end cannot change anymore, but it and _end may be
291 * located on different 2M pages. cleanup_highmap(), however,
292 * can only consider _end when it runs, so destroy any
293 * mappings beyond _brk_end here.
294 */
295 pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
296 pmd = pmd_offset(pud, _brk_end - 1);
297 while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
298 pmd_clear(pmd);
299 }
300#endif
301 __flush_tlb_all(); 273 __flush_tlb_all();
302 274
303 if (!after_bootmem && e820_table_end > e820_table_start) 275 if (!after_bootmem && pgt_buf_end > pgt_buf_start)
304 memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT, 276 memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
305 e820_table_end << PAGE_SHIFT, "PGTABLE"); 277 pgt_buf_end << PAGE_SHIFT, "PGTABLE");
306 278
307 if (!after_bootmem) 279 if (!after_bootmem)
308 early_memtest(start, end); 280 early_memtest(start, end);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c821074b7f0b..73ad7ebd6e9c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -62,10 +62,10 @@ bool __read_mostly __vmalloc_start_set = false;
62 62
63static __init void *alloc_low_page(void) 63static __init void *alloc_low_page(void)
64{ 64{
65 unsigned long pfn = e820_table_end++; 65 unsigned long pfn = pgt_buf_end++;
66 void *adr; 66 void *adr;
67 67
68 if (pfn >= e820_table_top) 68 if (pfn >= pgt_buf_top)
69 panic("alloc_low_page: ran out of memory"); 69 panic("alloc_low_page: ran out of memory");
70 70
71 adr = __va(pfn * PAGE_SIZE); 71 adr = __va(pfn * PAGE_SIZE);
@@ -163,8 +163,8 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
163 if (pmd_idx_kmap_begin != pmd_idx_kmap_end 163 if (pmd_idx_kmap_begin != pmd_idx_kmap_end
164 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin 164 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
165 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end 165 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
166 && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start 166 && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
167 || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { 167 || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
168 pte_t *newpte; 168 pte_t *newpte;
169 int i; 169 int i;
170 170
@@ -644,8 +644,7 @@ void __init find_low_pfn_range(void)
644} 644}
645 645
646#ifndef CONFIG_NEED_MULTIPLE_NODES 646#ifndef CONFIG_NEED_MULTIPLE_NODES
647void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 647void __init initmem_init(void)
648 int acpi, int k8)
649{ 648{
650#ifdef CONFIG_HIGHMEM 649#ifdef CONFIG_HIGHMEM
651 highstart_pfn = highend_pfn = max_pfn; 650 highstart_pfn = highend_pfn = max_pfn;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 71a59296af80..a08a62cb136e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
105 105
106 for (address = start; address <= end; address += PGDIR_SIZE) { 106 for (address = start; address <= end; address += PGDIR_SIZE) {
107 const pgd_t *pgd_ref = pgd_offset_k(address); 107 const pgd_t *pgd_ref = pgd_offset_k(address);
108 unsigned long flags;
109 struct page *page; 108 struct page *page;
110 109
111 if (pgd_none(*pgd_ref)) 110 if (pgd_none(*pgd_ref))
112 continue; 111 continue;
113 112
114 spin_lock_irqsave(&pgd_lock, flags); 113 spin_lock(&pgd_lock);
115 list_for_each_entry(page, &pgd_list, lru) { 114 list_for_each_entry(page, &pgd_list, lru) {
116 pgd_t *pgd; 115 pgd_t *pgd;
117 spinlock_t *pgt_lock; 116 spinlock_t *pgt_lock;
118 117
119 pgd = (pgd_t *)page_address(page) + pgd_index(address); 118 pgd = (pgd_t *)page_address(page) + pgd_index(address);
119 /* the pgt_lock only for Xen */
120 pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 120 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
121 spin_lock(pgt_lock); 121 spin_lock(pgt_lock);
122 122
@@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
128 128
129 spin_unlock(pgt_lock); 129 spin_unlock(pgt_lock);
130 } 130 }
131 spin_unlock_irqrestore(&pgd_lock, flags); 131 spin_unlock(&pgd_lock);
132 } 132 }
133} 133}
134 134
@@ -314,7 +314,7 @@ void __init cleanup_highmap(void)
314 314
315static __ref void *alloc_low_page(unsigned long *phys) 315static __ref void *alloc_low_page(unsigned long *phys)
316{ 316{
317 unsigned long pfn = e820_table_end++; 317 unsigned long pfn = pgt_buf_end++;
318 void *adr; 318 void *adr;
319 319
320 if (after_bootmem) { 320 if (after_bootmem) {
@@ -324,7 +324,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
324 return adr; 324 return adr;
325 } 325 }
326 326
327 if (pfn >= e820_table_top) 327 if (pfn >= pgt_buf_top)
328 panic("alloc_low_page: ran out of memory"); 328 panic("alloc_low_page: ran out of memory");
329 329
330 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); 330 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -333,12 +333,28 @@ static __ref void *alloc_low_page(unsigned long *phys)
333 return adr; 333 return adr;
334} 334}
335 335
336static __ref void *map_low_page(void *virt)
337{
338 void *adr;
339 unsigned long phys, left;
340
341 if (after_bootmem)
342 return virt;
343
344 phys = __pa(virt);
345 left = phys & (PAGE_SIZE - 1);
346 adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
347 adr = (void *)(((unsigned long)adr) | left);
348
349 return adr;
350}
351
336static __ref void unmap_low_page(void *adr) 352static __ref void unmap_low_page(void *adr)
337{ 353{
338 if (after_bootmem) 354 if (after_bootmem)
339 return; 355 return;
340 356
341 early_iounmap(adr, PAGE_SIZE); 357 early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
342} 358}
343 359
344static unsigned long __meminit 360static unsigned long __meminit
@@ -386,15 +402,6 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
386} 402}
387 403
388static unsigned long __meminit 404static unsigned long __meminit
389phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
390 pgprot_t prot)
391{
392 pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
393
394 return phys_pte_init(pte, address, end, prot);
395}
396
397static unsigned long __meminit
398phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, 405phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
399 unsigned long page_size_mask, pgprot_t prot) 406 unsigned long page_size_mask, pgprot_t prot)
400{ 407{
@@ -420,8 +427,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
420 if (pmd_val(*pmd)) { 427 if (pmd_val(*pmd)) {
421 if (!pmd_large(*pmd)) { 428 if (!pmd_large(*pmd)) {
422 spin_lock(&init_mm.page_table_lock); 429 spin_lock(&init_mm.page_table_lock);
423 last_map_addr = phys_pte_update(pmd, address, 430 pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
431 last_map_addr = phys_pte_init(pte, address,
424 end, prot); 432 end, prot);
433 unmap_low_page(pte);
425 spin_unlock(&init_mm.page_table_lock); 434 spin_unlock(&init_mm.page_table_lock);
426 continue; 435 continue;
427 } 436 }
@@ -468,18 +477,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
468} 477}
469 478
470static unsigned long __meminit 479static unsigned long __meminit
471phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
472 unsigned long page_size_mask, pgprot_t prot)
473{
474 pmd_t *pmd = pmd_offset(pud, 0);
475 unsigned long last_map_addr;
476
477 last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
478 __flush_tlb_all();
479 return last_map_addr;
480}
481
482static unsigned long __meminit
483phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, 480phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
484 unsigned long page_size_mask) 481 unsigned long page_size_mask)
485{ 482{
@@ -504,8 +501,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
504 501
505 if (pud_val(*pud)) { 502 if (pud_val(*pud)) {
506 if (!pud_large(*pud)) { 503 if (!pud_large(*pud)) {
507 last_map_addr = phys_pmd_update(pud, addr, end, 504 pmd = map_low_page(pmd_offset(pud, 0));
505 last_map_addr = phys_pmd_init(pmd, addr, end,
508 page_size_mask, prot); 506 page_size_mask, prot);
507 unmap_low_page(pmd);
508 __flush_tlb_all();
509 continue; 509 continue;
510 } 510 }
511 /* 511 /*
@@ -553,17 +553,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
553 return last_map_addr; 553 return last_map_addr;
554} 554}
555 555
556static unsigned long __meminit
557phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
558 unsigned long page_size_mask)
559{
560 pud_t *pud;
561
562 pud = (pud_t *)pgd_page_vaddr(*pgd);
563
564 return phys_pud_init(pud, addr, end, page_size_mask);
565}
566
567unsigned long __meminit 556unsigned long __meminit
568kernel_physical_mapping_init(unsigned long start, 557kernel_physical_mapping_init(unsigned long start,
569 unsigned long end, 558 unsigned long end,
@@ -587,8 +576,10 @@ kernel_physical_mapping_init(unsigned long start,
587 next = end; 576 next = end;
588 577
589 if (pgd_val(*pgd)) { 578 if (pgd_val(*pgd)) {
590 last_map_addr = phys_pud_update(pgd, __pa(start), 579 pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
580 last_map_addr = phys_pud_init(pud, __pa(start),
591 __pa(end), page_size_mask); 581 __pa(end), page_size_mask);
582 unmap_low_page(pud);
592 continue; 583 continue;
593 } 584 }
594 585
@@ -612,10 +603,9 @@ kernel_physical_mapping_init(unsigned long start,
612} 603}
613 604
614#ifndef CONFIG_NUMA 605#ifndef CONFIG_NUMA
615void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 606void __init initmem_init(void)
616 int acpi, int k8)
617{ 607{
618 memblock_x86_register_active_regions(0, start_pfn, end_pfn); 608 memblock_x86_register_active_regions(0, 0, max_pfn);
619} 609}
620#endif 610#endif
621 611
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ebf6d7887a38..9559d360fde7 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -26,12 +26,50 @@ static __init int numa_setup(char *opt)
26early_param("numa", numa_setup); 26early_param("numa", numa_setup);
27 27
28/* 28/*
29 * Which logical CPUs are on which nodes 29 * apicid, cpu, node mappings
30 */ 30 */
31s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
32 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
33};
34
31cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; 35cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
32EXPORT_SYMBOL(node_to_cpumask_map); 36EXPORT_SYMBOL(node_to_cpumask_map);
33 37
34/* 38/*
39 * Map cpu index to node index
40 */
41DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
42EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
43
44void __cpuinit numa_set_node(int cpu, int node)
45{
46 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
47
48 /* early setting, no percpu area yet */
49 if (cpu_to_node_map) {
50 cpu_to_node_map[cpu] = node;
51 return;
52 }
53
54#ifdef CONFIG_DEBUG_PER_CPU_MAPS
55 if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
56 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
57 dump_stack();
58 return;
59 }
60#endif
61 per_cpu(x86_cpu_to_node_map, cpu) = node;
62
63 if (node != NUMA_NO_NODE)
64 set_cpu_numa_node(cpu, node);
65}
66
67void __cpuinit numa_clear_node(int cpu)
68{
69 numa_set_node(cpu, NUMA_NO_NODE);
70}
71
72/*
35 * Allocate node_to_cpumask_map based on number of available nodes 73 * Allocate node_to_cpumask_map based on number of available nodes
36 * Requires node_possible_map to be valid. 74 * Requires node_possible_map to be valid.
37 * 75 *
@@ -57,7 +95,174 @@ void __init setup_node_to_cpumask_map(void)
57 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); 95 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
58} 96}
59 97
60#ifdef CONFIG_DEBUG_PER_CPU_MAPS 98/*
99 * There are unfortunately some poorly designed mainboards around that
100 * only connect memory to a single CPU. This breaks the 1:1 cpu->node
101 * mapping. To avoid this fill in the mapping for all possible CPUs,
102 * as the number of CPUs is not known yet. We round robin the existing
103 * nodes.
104 */
105void __init numa_init_array(void)
106{
107 int rr, i;
108
109 rr = first_node(node_online_map);
110 for (i = 0; i < nr_cpu_ids; i++) {
111 if (early_cpu_to_node(i) != NUMA_NO_NODE)
112 continue;
113 numa_set_node(i, rr);
114 rr = next_node(rr, node_online_map);
115 if (rr == MAX_NUMNODES)
116 rr = first_node(node_online_map);
117 }
118}
119
120static __init int find_near_online_node(int node)
121{
122 int n, val;
123 int min_val = INT_MAX;
124 int best_node = -1;
125
126 for_each_online_node(n) {
127 val = node_distance(node, n);
128
129 if (val < min_val) {
130 min_val = val;
131 best_node = n;
132 }
133 }
134
135 return best_node;
136}
137
138/*
139 * Setup early cpu_to_node.
140 *
141 * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
142 * and apicid_to_node[] tables have valid entries for a CPU.
143 * This means we skip cpu_to_node[] initialisation for NUMA
144 * emulation and faking node case (when running a kernel compiled
145 * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
146 * is already initialized in a round robin manner at numa_init_array,
147 * prior to this call, and this initialization is good enough
148 * for the fake NUMA cases.
149 *
150 * Called before the per_cpu areas are setup.
151 */
152void __init init_cpu_to_node(void)
153{
154 int cpu;
155 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
156
157 BUG_ON(cpu_to_apicid == NULL);
158
159 for_each_possible_cpu(cpu) {
160 int node = numa_cpu_node(cpu);
161
162 if (node == NUMA_NO_NODE)
163 continue;
164 if (!node_online(node))
165 node = find_near_online_node(node);
166 numa_set_node(cpu, node);
167 }
168}
169
170#ifndef CONFIG_DEBUG_PER_CPU_MAPS
171
172# ifndef CONFIG_NUMA_EMU
173void __cpuinit numa_add_cpu(int cpu)
174{
175 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
176}
177
178void __cpuinit numa_remove_cpu(int cpu)
179{
180 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
181}
182# endif /* !CONFIG_NUMA_EMU */
183
184#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
185
186int __cpu_to_node(int cpu)
187{
188 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
189 printk(KERN_WARNING
190 "cpu_to_node(%d): usage too early!\n", cpu);
191 dump_stack();
192 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
193 }
194 return per_cpu(x86_cpu_to_node_map, cpu);
195}
196EXPORT_SYMBOL(__cpu_to_node);
197
198/*
199 * Same function as cpu_to_node() but used if called before the
200 * per_cpu areas are setup.
201 */
202int early_cpu_to_node(int cpu)
203{
204 if (early_per_cpu_ptr(x86_cpu_to_node_map))
205 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
206
207 if (!cpu_possible(cpu)) {
208 printk(KERN_WARNING
209 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
210 dump_stack();
211 return NUMA_NO_NODE;
212 }
213 return per_cpu(x86_cpu_to_node_map, cpu);
214}
215
216struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
217{
218 int node = early_cpu_to_node(cpu);
219 struct cpumask *mask;
220 char buf[64];
221
222 if (node == NUMA_NO_NODE) {
223 /* early_cpu_to_node() already emits a warning and trace */
224 return NULL;
225 }
226 mask = node_to_cpumask_map[node];
227 if (!mask) {
228 pr_err("node_to_cpumask_map[%i] NULL\n", node);
229 dump_stack();
230 return NULL;
231 }
232
233 cpulist_scnprintf(buf, sizeof(buf), mask);
234 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
235 enable ? "numa_add_cpu" : "numa_remove_cpu",
236 cpu, node, buf);
237 return mask;
238}
239
240# ifndef CONFIG_NUMA_EMU
241static void __cpuinit numa_set_cpumask(int cpu, int enable)
242{
243 struct cpumask *mask;
244
245 mask = debug_cpumask_set_cpu(cpu, enable);
246 if (!mask)
247 return;
248
249 if (enable)
250 cpumask_set_cpu(cpu, mask);
251 else
252 cpumask_clear_cpu(cpu, mask);
253}
254
255void __cpuinit numa_add_cpu(int cpu)
256{
257 numa_set_cpumask(cpu, 1);
258}
259
260void __cpuinit numa_remove_cpu(int cpu)
261{
262 numa_set_cpumask(cpu, 0);
263}
264# endif /* !CONFIG_NUMA_EMU */
265
61/* 266/*
62 * Returns a pointer to the bitmask of CPUs on Node 'node'. 267 * Returns a pointer to the bitmask of CPUs on Node 'node'.
63 */ 268 */
@@ -80,4 +285,5 @@ const struct cpumask *cpumask_of_node(int node)
80 return node_to_cpumask_map[node]; 285 return node_to_cpumask_map[node];
81} 286}
82EXPORT_SYMBOL(cpumask_of_node); 287EXPORT_SYMBOL(cpumask_of_node);
83#endif 288
289#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c9f277..bde3906420df 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
110 110
111static unsigned long kva_start_pfn; 111static unsigned long kva_start_pfn;
112static unsigned long kva_pages; 112static unsigned long kva_pages;
113
114int __cpuinit numa_cpu_node(int cpu)
115{
116 return apic->x86_32_numa_cpu_node(cpu);
117}
118
113/* 119/*
114 * FLAT - support for basic PC memory model with discontig enabled, essentially 120 * FLAT - support for basic PC memory model with discontig enabled, essentially
115 * a single node with all available processors in it with a flat 121 * a single node with all available processors in it with a flat
@@ -346,8 +352,7 @@ static void init_remap_allocator(int nid)
346 (ulong) node_remap_end_vaddr[nid]); 352 (ulong) node_remap_end_vaddr[nid]);
347} 353}
348 354
349void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 355void __init initmem_init(void)
350 int acpi, int k8)
351{ 356{
352 int nid; 357 int nid;
353 long kva_target_pfn; 358 long kva_target_pfn;
@@ -361,6 +366,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
361 */ 366 */
362 367
363 get_memcfg_numa(); 368 get_memcfg_numa();
369 numa_init_array();
364 370
365 kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); 371 kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
366 372
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea1551eebc..9ec0f209a6a4 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -13,31 +13,30 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/nodemask.h> 14#include <linux/nodemask.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/acpi.h>
16 17
17#include <asm/e820.h> 18#include <asm/e820.h>
18#include <asm/proto.h> 19#include <asm/proto.h>
19#include <asm/dma.h> 20#include <asm/dma.h>
20#include <asm/numa.h>
21#include <asm/acpi.h> 21#include <asm/acpi.h>
22#include <asm/amd_nb.h> 22#include <asm/amd_nb.h>
23 23
24#include "numa_internal.h"
25
24struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 26struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
25EXPORT_SYMBOL(node_data); 27EXPORT_SYMBOL(node_data);
26 28
27struct memnode memnode; 29nodemask_t numa_nodes_parsed __initdata;
28 30
29s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 31struct memnode memnode;
30 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
31};
32 32
33static unsigned long __initdata nodemap_addr; 33static unsigned long __initdata nodemap_addr;
34static unsigned long __initdata nodemap_size; 34static unsigned long __initdata nodemap_size;
35 35
36/* 36static struct numa_meminfo numa_meminfo __initdata;
37 * Map cpu index to node index 37
38 */ 38static int numa_distance_cnt;
39DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 39static u8 *numa_distance;
40EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
41 40
42/* 41/*
43 * Given a shift value, try to populate memnodemap[] 42 * Given a shift value, try to populate memnodemap[]
@@ -46,16 +45,15 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
46 * 0 if memnodmap[] too small (of shift too small) 45 * 0 if memnodmap[] too small (of shift too small)
47 * -1 if node overlap or lost ram (shift too big) 46 * -1 if node overlap or lost ram (shift too big)
48 */ 47 */
49static int __init populate_memnodemap(const struct bootnode *nodes, 48static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
50 int numnodes, int shift, int *nodeids)
51{ 49{
52 unsigned long addr, end; 50 unsigned long addr, end;
53 int i, res = -1; 51 int i, res = -1;
54 52
55 memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); 53 memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
56 for (i = 0; i < numnodes; i++) { 54 for (i = 0; i < mi->nr_blks; i++) {
57 addr = nodes[i].start; 55 addr = mi->blk[i].start;
58 end = nodes[i].end; 56 end = mi->blk[i].end;
59 if (addr >= end) 57 if (addr >= end)
60 continue; 58 continue;
61 if ((end >> shift) >= memnodemapsize) 59 if ((end >> shift) >= memnodemapsize)
@@ -63,12 +61,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
63 do { 61 do {
64 if (memnodemap[addr >> shift] != NUMA_NO_NODE) 62 if (memnodemap[addr >> shift] != NUMA_NO_NODE)
65 return -1; 63 return -1;
66 64 memnodemap[addr >> shift] = mi->blk[i].nid;
67 if (!nodeids)
68 memnodemap[addr >> shift] = i;
69 else
70 memnodemap[addr >> shift] = nodeids[i];
71
72 addr += (1UL << shift); 65 addr += (1UL << shift);
73 } while (addr < end); 66 } while (addr < end);
74 res = 1; 67 res = 1;
@@ -86,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void)
86 79
87 addr = 0x8000; 80 addr = 0x8000;
88 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); 81 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
89 nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT, 82 nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
90 nodemap_size, L1_CACHE_BYTES); 83 nodemap_size, L1_CACHE_BYTES);
91 if (nodemap_addr == MEMBLOCK_ERROR) { 84 if (nodemap_addr == MEMBLOCK_ERROR) {
92 printk(KERN_ERR 85 printk(KERN_ERR
@@ -106,16 +99,15 @@ static int __init allocate_cachealigned_memnodemap(void)
106 * The LSB of all start and end addresses in the node map is the value of the 99 * The LSB of all start and end addresses in the node map is the value of the
107 * maximum possible shift. 100 * maximum possible shift.
108 */ 101 */
109static int __init extract_lsb_from_nodes(const struct bootnode *nodes, 102static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
110 int numnodes)
111{ 103{
112 int i, nodes_used = 0; 104 int i, nodes_used = 0;
113 unsigned long start, end; 105 unsigned long start, end;
114 unsigned long bitfield = 0, memtop = 0; 106 unsigned long bitfield = 0, memtop = 0;
115 107
116 for (i = 0; i < numnodes; i++) { 108 for (i = 0; i < mi->nr_blks; i++) {
117 start = nodes[i].start; 109 start = mi->blk[i].start;
118 end = nodes[i].end; 110 end = mi->blk[i].end;
119 if (start >= end) 111 if (start >= end)
120 continue; 112 continue;
121 bitfield |= start; 113 bitfield |= start;
@@ -131,18 +123,17 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
131 return i; 123 return i;
132} 124}
133 125
134int __init compute_hash_shift(struct bootnode *nodes, int numnodes, 126static int __init compute_hash_shift(const struct numa_meminfo *mi)
135 int *nodeids)
136{ 127{
137 int shift; 128 int shift;
138 129
139 shift = extract_lsb_from_nodes(nodes, numnodes); 130 shift = extract_lsb_from_nodes(mi);
140 if (allocate_cachealigned_memnodemap()) 131 if (allocate_cachealigned_memnodemap())
141 return -1; 132 return -1;
142 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", 133 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
143 shift); 134 shift);
144 135
145 if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { 136 if (populate_memnodemap(mi, shift) != 1) {
146 printk(KERN_INFO "Your memory is not aligned you need to " 137 printk(KERN_INFO "Your memory is not aligned you need to "
147 "rebuild your kernel with a bigger NODEMAPSIZE " 138 "rebuild your kernel with a bigger NODEMAPSIZE "
148 "shift=%d\n", shift); 139 "shift=%d\n", shift);
@@ -188,6 +179,63 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
188 return NULL; 179 return NULL;
189} 180}
190 181
182static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
183 struct numa_meminfo *mi)
184{
185 /* ignore zero length blks */
186 if (start == end)
187 return 0;
188
189 /* whine about and ignore invalid blks */
190 if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
191 pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
192 nid, start, end);
193 return 0;
194 }
195
196 if (mi->nr_blks >= NR_NODE_MEMBLKS) {
197 pr_err("NUMA: too many memblk ranges\n");
198 return -EINVAL;
199 }
200
201 mi->blk[mi->nr_blks].start = start;
202 mi->blk[mi->nr_blks].end = end;
203 mi->blk[mi->nr_blks].nid = nid;
204 mi->nr_blks++;
205 return 0;
206}
207
208/**
209 * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
210 * @idx: Index of memblk to remove
211 * @mi: numa_meminfo to remove memblk from
212 *
213 * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
214 * decrementing @mi->nr_blks.
215 */
216void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
217{
218 mi->nr_blks--;
219 memmove(&mi->blk[idx], &mi->blk[idx + 1],
220 (mi->nr_blks - idx) * sizeof(mi->blk[0]));
221}
222
223/**
224 * numa_add_memblk - Add one numa_memblk to numa_meminfo
225 * @nid: NUMA node ID of the new memblk
226 * @start: Start address of the new memblk
227 * @end: End address of the new memblk
228 *
229 * Add a new memblk to the default numa_meminfo.
230 *
231 * RETURNS:
232 * 0 on success, -errno on failure.
233 */
234int __init numa_add_memblk(int nid, u64 start, u64 end)
235{
236 return numa_add_memblk_to(nid, start, end, &numa_meminfo);
237}
238
191/* Initialize bootmem allocator for a node */ 239/* Initialize bootmem allocator for a node */
192void __init 240void __init
193setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) 241setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
@@ -234,696 +282,386 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
234 node_set_online(nodeid); 282 node_set_online(nodeid);
235} 283}
236 284
237/* 285/**
238 * There are unfortunately some poorly designed mainboards around that 286 * numa_cleanup_meminfo - Cleanup a numa_meminfo
239 * only connect memory to a single CPU. This breaks the 1:1 cpu->node 287 * @mi: numa_meminfo to clean up
240 * mapping. To avoid this fill in the mapping for all possible CPUs, 288 *
241 * as the number of CPUs is not known yet. We round robin the existing 289 * Sanitize @mi by merging and removing unncessary memblks. Also check for
242 * nodes. 290 * conflicts and clear unused memblks.
291 *
292 * RETURNS:
293 * 0 on success, -errno on failure.
243 */ 294 */
244void __init numa_init_array(void) 295int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
245{ 296{
246 int rr, i; 297 const u64 low = 0;
298 const u64 high = (u64)max_pfn << PAGE_SHIFT;
299 int i, j, k;
247 300
248 rr = first_node(node_online_map); 301 for (i = 0; i < mi->nr_blks; i++) {
249 for (i = 0; i < nr_cpu_ids; i++) { 302 struct numa_memblk *bi = &mi->blk[i];
250 if (early_cpu_to_node(i) != NUMA_NO_NODE)
251 continue;
252 numa_set_node(i, rr);
253 rr = next_node(rr, node_online_map);
254 if (rr == MAX_NUMNODES)
255 rr = first_node(node_online_map);
256 }
257}
258
259#ifdef CONFIG_NUMA_EMU
260/* Numa emulation */
261static struct bootnode nodes[MAX_NUMNODES] __initdata;
262static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
263static char *cmdline __initdata;
264 303
265void __init numa_emu_cmdline(char *str) 304 /* make sure all blocks are inside the limits */
266{ 305 bi->start = max(bi->start, low);
267 cmdline = str; 306 bi->end = min(bi->end, high);
268}
269 307
270static int __init setup_physnodes(unsigned long start, unsigned long end, 308 /* and there's no empty block */
271 int acpi, int amd) 309 if (bi->start == bi->end) {
272{ 310 numa_remove_memblk_from(i--, mi);
273 int ret = 0;
274 int i;
275
276 memset(physnodes, 0, sizeof(physnodes));
277#ifdef CONFIG_ACPI_NUMA
278 if (acpi)
279 acpi_get_nodes(physnodes, start, end);
280#endif
281#ifdef CONFIG_AMD_NUMA
282 if (amd)
283 amd_get_nodes(physnodes);
284#endif
285 /*
286 * Basic sanity checking on the physical node map: there may be errors
287 * if the SRAT or AMD code incorrectly reported the topology or the mem=
288 * kernel parameter is used.
289 */
290 for (i = 0; i < MAX_NUMNODES; i++) {
291 if (physnodes[i].start == physnodes[i].end)
292 continue;
293 if (physnodes[i].start > end) {
294 physnodes[i].end = physnodes[i].start;
295 continue;
296 }
297 if (physnodes[i].end < start) {
298 physnodes[i].start = physnodes[i].end;
299 continue; 311 continue;
300 } 312 }
301 if (physnodes[i].start < start)
302 physnodes[i].start = start;
303 if (physnodes[i].end > end)
304 physnodes[i].end = end;
305 ret++;
306 }
307
308 /*
309 * If no physical topology was detected, a single node is faked to cover
310 * the entire address space.
311 */
312 if (!ret) {
313 physnodes[ret].start = start;
314 physnodes[ret].end = end;
315 ret = 1;
316 }
317 return ret;
318}
319
320static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
321{
322 int i;
323
324 BUG_ON(acpi && amd);
325#ifdef CONFIG_ACPI_NUMA
326 if (acpi)
327 acpi_fake_nodes(nodes, nr_nodes);
328#endif
329#ifdef CONFIG_AMD_NUMA
330 if (amd)
331 amd_fake_nodes(nodes, nr_nodes);
332#endif
333 if (!acpi && !amd)
334 for (i = 0; i < nr_cpu_ids; i++)
335 numa_set_node(i, 0);
336}
337
338/*
339 * Setups up nid to range from addr to addr + size. If the end
340 * boundary is greater than max_addr, then max_addr is used instead.
341 * The return value is 0 if there is additional memory left for
342 * allocation past addr and -1 otherwise. addr is adjusted to be at
343 * the end of the node.
344 */
345static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
346{
347 int ret = 0;
348 nodes[nid].start = *addr;
349 *addr += size;
350 if (*addr >= max_addr) {
351 *addr = max_addr;
352 ret = -1;
353 }
354 nodes[nid].end = *addr;
355 node_set(nid, node_possible_map);
356 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
357 nodes[nid].start, nodes[nid].end,
358 (nodes[nid].end - nodes[nid].start) >> 20);
359 return ret;
360}
361
362/*
363 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
364 * to max_addr. The return value is the number of nodes allocated.
365 */
366static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
367{
368 nodemask_t physnode_mask = NODE_MASK_NONE;
369 u64 size;
370 int big;
371 int ret = 0;
372 int i;
373
374 if (nr_nodes <= 0)
375 return -1;
376 if (nr_nodes > MAX_NUMNODES) {
377 pr_info("numa=fake=%d too large, reducing to %d\n",
378 nr_nodes, MAX_NUMNODES);
379 nr_nodes = MAX_NUMNODES;
380 }
381
382 size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
383 /*
384 * Calculate the number of big nodes that can be allocated as a result
385 * of consolidating the remainder.
386 */
387 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
388 FAKE_NODE_MIN_SIZE;
389
390 size &= FAKE_NODE_MIN_HASH_MASK;
391 if (!size) {
392 pr_err("Not enough memory for each node. "
393 "NUMA emulation disabled.\n");
394 return -1;
395 }
396 313
397 for (i = 0; i < MAX_NUMNODES; i++) 314 for (j = i + 1; j < mi->nr_blks; j++) {
398 if (physnodes[i].start != physnodes[i].end) 315 struct numa_memblk *bj = &mi->blk[j];
399 node_set(i, physnode_mask); 316 unsigned long start, end;
400
401 /*
402 * Continue to fill physical nodes with fake nodes until there is no
403 * memory left on any of them.
404 */
405 while (nodes_weight(physnode_mask)) {
406 for_each_node_mask(i, physnode_mask) {
407 u64 end = physnodes[i].start + size;
408 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
409
410 if (ret < big)
411 end += FAKE_NODE_MIN_SIZE;
412 317
413 /* 318 /*
414 * Continue to add memory to this fake node if its 319 * See whether there are overlapping blocks. Whine
415 * non-reserved memory is less than the per-node size. 320 * about but allow overlaps of the same nid. They
321 * will be merged below.
416 */ 322 */
417 while (end - physnodes[i].start - 323 if (bi->end > bj->start && bi->start < bj->end) {
418 memblock_x86_hole_size(physnodes[i].start, end) < size) { 324 if (bi->nid != bj->nid) {
419 end += FAKE_NODE_MIN_SIZE; 325 pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
420 if (end > physnodes[i].end) { 326 bi->nid, bi->start, bi->end,
421 end = physnodes[i].end; 327 bj->nid, bj->start, bj->end);
422 break; 328 return -EINVAL;
423 } 329 }
330 pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
331 bi->nid, bi->start, bi->end,
332 bj->start, bj->end);
424 } 333 }
425 334
426 /* 335 /*
427 * If there won't be at least FAKE_NODE_MIN_SIZE of 336 * Join together blocks on the same node, holes
428 * non-reserved memory in ZONE_DMA32 for the next node, 337 * between which don't overlap with memory on other
429 * this one must extend to the boundary. 338 * nodes.
430 */ 339 */
431 if (end < dma32_end && dma32_end - end - 340 if (bi->nid != bj->nid)
432 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 341 continue;
433 end = dma32_end; 342 start = max(min(bi->start, bj->start), low);
434 343 end = min(max(bi->end, bj->end), high);
435 /* 344 for (k = 0; k < mi->nr_blks; k++) {
436 * If there won't be enough non-reserved memory for the 345 struct numa_memblk *bk = &mi->blk[k];
437 * next node, this one must extend to the end of the 346
438 * physical node. 347 if (bi->nid == bk->nid)
439 */ 348 continue;
440 if (physnodes[i].end - end - 349 if (start < bk->end && end > bk->start)
441 memblock_x86_hole_size(end, physnodes[i].end) < size) 350 break;
442 end = physnodes[i].end; 351 }
443 352 if (k < mi->nr_blks)
444 /* 353 continue;
445 * Avoid allocating more nodes than requested, which can 354 printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
446 * happen as a result of rounding down each node's size 355 bi->nid, bi->start, bi->end, bj->start, bj->end,
447 * to FAKE_NODE_MIN_SIZE. 356 start, end);
448 */ 357 bi->start = start;
449 if (nodes_weight(physnode_mask) + ret >= nr_nodes) 358 bi->end = end;
450 end = physnodes[i].end; 359 numa_remove_memblk_from(j--, mi);
451
452 if (setup_node_range(ret++, &physnodes[i].start,
453 end - physnodes[i].start,
454 physnodes[i].end) < 0)
455 node_clear(i, physnode_mask);
456 } 360 }
457 } 361 }
458 return ret;
459}
460 362
461/* 363 for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
462 * Returns the end address of a node so that there is at least `size' amount of 364 mi->blk[i].start = mi->blk[i].end = 0;
463 * non-reserved memory or `max_addr' is reached. 365 mi->blk[i].nid = NUMA_NO_NODE;
464 */
465static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
466{
467 u64 end = start + size;
468
469 while (end - start - memblock_x86_hole_size(start, end) < size) {
470 end += FAKE_NODE_MIN_SIZE;
471 if (end > max_addr) {
472 end = max_addr;
473 break;
474 }
475 } 366 }
476 return end; 367
368 return 0;
477} 369}
478 370
479/* 371/*
480 * Sets up fake nodes of `size' interleaved over physical nodes ranging from 372 * Set nodes, which have memory in @mi, in *@nodemask.
481 * `addr' to `max_addr'. The return value is the number of nodes allocated.
482 */ 373 */
483static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) 374static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
375 const struct numa_meminfo *mi)
484{ 376{
485 nodemask_t physnode_mask = NODE_MASK_NONE;
486 u64 min_size;
487 int ret = 0;
488 int i; 377 int i;
489 378
490 if (!size) 379 for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
491 return -1; 380 if (mi->blk[i].start != mi->blk[i].end &&
492 /* 381 mi->blk[i].nid != NUMA_NO_NODE)
493 * The limit on emulated nodes is MAX_NUMNODES, so the size per node is 382 node_set(mi->blk[i].nid, *nodemask);
494 * increased accordingly if the requested size is too small. This 383}
495 * creates a uniform distribution of node sizes across the entire
496 * machine (but not necessarily over physical nodes).
497 */
498 min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
499 MAX_NUMNODES;
500 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
501 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
502 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
503 FAKE_NODE_MIN_HASH_MASK;
504 if (size < min_size) {
505 pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
506 size >> 20, min_size >> 20);
507 size = min_size;
508 }
509 size &= FAKE_NODE_MIN_HASH_MASK;
510
511 for (i = 0; i < MAX_NUMNODES; i++)
512 if (physnodes[i].start != physnodes[i].end)
513 node_set(i, physnode_mask);
514 /*
515 * Fill physical nodes with fake nodes of size until there is no memory
516 * left on any of them.
517 */
518 while (nodes_weight(physnode_mask)) {
519 for_each_node_mask(i, physnode_mask) {
520 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
521 u64 end;
522
523 end = find_end_of_node(physnodes[i].start,
524 physnodes[i].end, size);
525 /*
526 * If there won't be at least FAKE_NODE_MIN_SIZE of
527 * non-reserved memory in ZONE_DMA32 for the next node,
528 * this one must extend to the boundary.
529 */
530 if (end < dma32_end && dma32_end - end -
531 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
532 end = dma32_end;
533 384
534 /* 385/**
535 * If there won't be enough non-reserved memory for the 386 * numa_reset_distance - Reset NUMA distance table
536 * next node, this one must extend to the end of the 387 *
537 * physical node. 388 * The current table is freed. The next numa_set_distance() call will
538 */ 389 * create a new one.
539 if (physnodes[i].end - end - 390 */
540 memblock_x86_hole_size(end, physnodes[i].end) < size) 391void __init numa_reset_distance(void)
541 end = physnodes[i].end; 392{
393 size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
542 394
543 /* 395 /* numa_distance could be 1LU marking allocation failure, test cnt */
544 * Setup the fake node that will be allocated as bootmem 396 if (numa_distance_cnt)
545 * later. If setup_node_range() returns non-zero, there 397 memblock_x86_free_range(__pa(numa_distance),
546 * is no more memory available on this physical node. 398 __pa(numa_distance) + size);
547 */ 399 numa_distance_cnt = 0;
548 if (setup_node_range(ret++, &physnodes[i].start, 400 numa_distance = NULL; /* enable table creation */
549 end - physnodes[i].start,
550 physnodes[i].end) < 0)
551 node_clear(i, physnode_mask);
552 }
553 }
554 return ret;
555} 401}
556 402
557/* 403static int __init numa_alloc_distance(void)
558 * Sets up the system RAM area from start_pfn to last_pfn according to the
559 * numa=fake command-line option.
560 */
561static int __init numa_emulation(unsigned long start_pfn,
562 unsigned long last_pfn, int acpi, int amd)
563{ 404{
564 u64 addr = start_pfn << PAGE_SHIFT; 405 nodemask_t nodes_parsed;
565 u64 max_addr = last_pfn << PAGE_SHIFT; 406 size_t size;
566 int num_nodes; 407 int i, j, cnt = 0;
567 int i; 408 u64 phys;
568 409
569 /* 410 /* size the new table and allocate it */
570 * If the numa=fake command-line contains a 'M' or 'G', it represents 411 nodes_parsed = numa_nodes_parsed;
571 * the fixed node size. Otherwise, if it is just a single number N, 412 numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
572 * split the system RAM into N fake nodes.
573 */
574 if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
575 u64 size;
576 413
577 size = memparse(cmdline, &cmdline); 414 for_each_node_mask(i, nodes_parsed)
578 num_nodes = split_nodes_size_interleave(addr, max_addr, size); 415 cnt = i;
579 } else { 416 cnt++;
580 unsigned long n; 417 size = cnt * cnt * sizeof(numa_distance[0]);
581 418
582 n = simple_strtoul(cmdline, NULL, 0); 419 phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
583 num_nodes = split_nodes_interleave(addr, max_addr, n); 420 size, PAGE_SIZE);
421 if (phys == MEMBLOCK_ERROR) {
422 pr_warning("NUMA: Warning: can't allocate distance table!\n");
423 /* don't retry until explicitly reset */
424 numa_distance = (void *)1LU;
425 return -ENOMEM;
584 } 426 }
427 memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
585 428
586 if (num_nodes < 0) 429 numa_distance = __va(phys);
587 return num_nodes; 430 numa_distance_cnt = cnt;
588 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); 431
589 if (memnode_shift < 0) { 432 /* fill with the default distances */
590 memnode_shift = 0; 433 for (i = 0; i < cnt; i++)
591 printk(KERN_ERR "No NUMA hash function found. NUMA emulation " 434 for (j = 0; j < cnt; j++)
592 "disabled.\n"); 435 numa_distance[i * cnt + j] = i == j ?
593 return -1; 436 LOCAL_DISTANCE : REMOTE_DISTANCE;
594 } 437 printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
595 438
596 /*
597 * We need to vacate all active ranges that may have been registered for
598 * the e820 memory map.
599 */
600 remove_all_active_ranges();
601 for_each_node_mask(i, node_possible_map) {
602 memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
603 nodes[i].end >> PAGE_SHIFT);
604 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
605 }
606 setup_physnodes(addr, max_addr, acpi, amd);
607 fake_physnodes(acpi, amd, num_nodes);
608 numa_init_array();
609 return 0; 439 return 0;
610} 440}
611#endif /* CONFIG_NUMA_EMU */
612 441
613void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, 442/**
614 int acpi, int amd) 443 * numa_set_distance - Set NUMA distance from one NUMA to another
444 * @from: the 'from' node to set distance
445 * @to: the 'to' node to set distance
446 * @distance: NUMA distance
447 *
448 * Set the distance from node @from to @to to @distance. If distance table
449 * doesn't exist, one which is large enough to accomodate all the currently
450 * known nodes will be created.
451 *
452 * If such table cannot be allocated, a warning is printed and further
453 * calls are ignored until the distance table is reset with
454 * numa_reset_distance().
455 *
456 * If @from or @to is higher than the highest known node at the time of
457 * table creation or @distance doesn't make sense, the call is ignored.
458 * This is to allow simplification of specific NUMA config implementations.
459 */
460void __init numa_set_distance(int from, int to, int distance)
615{ 461{
616 int i; 462 if (!numa_distance && numa_alloc_distance() < 0)
617
618 nodes_clear(node_possible_map);
619 nodes_clear(node_online_map);
620
621#ifdef CONFIG_NUMA_EMU
622 setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
623 acpi, amd);
624 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
625 return; 463 return;
626 setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
627 acpi, amd);
628 nodes_clear(node_possible_map);
629 nodes_clear(node_online_map);
630#endif
631 464
632#ifdef CONFIG_ACPI_NUMA 465 if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
633 if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, 466 printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
634 last_pfn << PAGE_SHIFT)) 467 from, to, distance);
635 return; 468 return;
636 nodes_clear(node_possible_map); 469 }
637 nodes_clear(node_online_map);
638#endif
639 470
640#ifdef CONFIG_AMD_NUMA 471 if ((u8)distance != distance ||
641 if (!numa_off && amd && !amd_scan_nodes()) 472 (from == to && distance != LOCAL_DISTANCE)) {
473 pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
474 from, to, distance);
642 return; 475 return;
643 nodes_clear(node_possible_map); 476 }
644 nodes_clear(node_online_map);
645#endif
646 printk(KERN_INFO "%s\n",
647 numa_off ? "NUMA turned off" : "No NUMA configuration found");
648 477
649 printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 478 numa_distance[from * numa_distance_cnt + to] = distance;
650 start_pfn << PAGE_SHIFT,
651 last_pfn << PAGE_SHIFT);
652 /* setup dummy node covering all memory */
653 memnode_shift = 63;
654 memnodemap = memnode.embedded_map;
655 memnodemap[0] = 0;
656 node_set_online(0);
657 node_set(0, node_possible_map);
658 for (i = 0; i < nr_cpu_ids; i++)
659 numa_set_node(i, 0);
660 memblock_x86_register_active_regions(0, start_pfn, last_pfn);
661 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
662} 479}
663 480
664unsigned long __init numa_free_all_bootmem(void) 481int __node_distance(int from, int to)
665{ 482{
666 unsigned long pages = 0; 483 if (from >= numa_distance_cnt || to >= numa_distance_cnt)
667 int i; 484 return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
485 return numa_distance[from * numa_distance_cnt + to];
486}
487EXPORT_SYMBOL(__node_distance);
668 488
669 for_each_online_node(i) 489/*
670 pages += free_all_bootmem_node(NODE_DATA(i)); 490 * Sanity check to catch more bad NUMA configurations (they are amazingly
491 * common). Make sure the nodes cover all memory.
492 */
493static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
494{
495 unsigned long numaram, e820ram;
496 int i;
671 497
672 pages += free_all_memory_core_early(MAX_NUMNODES); 498 numaram = 0;
499 for (i = 0; i < mi->nr_blks; i++) {
500 unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
501 unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
502 numaram += e - s;
503 numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
504 if ((long)numaram < 0)
505 numaram = 0;
506 }
673 507
674 return pages; 508 e820ram = max_pfn - (memblock_x86_hole_size(0,
509 max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
510 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
511 if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
512 printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
513 (numaram << PAGE_SHIFT) >> 20,
514 (e820ram << PAGE_SHIFT) >> 20);
515 return false;
516 }
517 return true;
675} 518}
676 519
677#ifdef CONFIG_NUMA 520static int __init numa_register_memblks(struct numa_meminfo *mi)
678
679static __init int find_near_online_node(int node)
680{ 521{
681 int n, val; 522 int i, nid;
682 int min_val = INT_MAX;
683 int best_node = -1;
684 523
685 for_each_online_node(n) { 524 /* Account for nodes with cpus and no memory */
686 val = node_distance(node, n); 525 node_possible_map = numa_nodes_parsed;
526 numa_nodemask_from_meminfo(&node_possible_map, mi);
527 if (WARN_ON(nodes_empty(node_possible_map)))
528 return -EINVAL;
529
530 memnode_shift = compute_hash_shift(mi);
531 if (memnode_shift < 0) {
532 printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
533 return -EINVAL;
534 }
687 535
688 if (val < min_val) { 536 for (i = 0; i < mi->nr_blks; i++)
689 min_val = val; 537 memblock_x86_register_active_regions(mi->blk[i].nid,
690 best_node = n; 538 mi->blk[i].start >> PAGE_SHIFT,
539 mi->blk[i].end >> PAGE_SHIFT);
540
541 /* for out of order entries */
542 sort_node_map();
543 if (!numa_meminfo_cover_memory(mi))
544 return -EINVAL;
545
546 /* Finally register nodes. */
547 for_each_node_mask(nid, node_possible_map) {
548 u64 start = (u64)max_pfn << PAGE_SHIFT;
549 u64 end = 0;
550
551 for (i = 0; i < mi->nr_blks; i++) {
552 if (nid != mi->blk[i].nid)
553 continue;
554 start = min(mi->blk[i].start, start);
555 end = max(mi->blk[i].end, end);
691 } 556 }
557
558 if (start < end)
559 setup_node_bootmem(nid, start, end);
692 } 560 }
693 561
694 return best_node; 562 return 0;
695} 563}
696 564
697/* 565/**
698 * Setup early cpu_to_node. 566 * dummy_numma_init - Fallback dummy NUMA init
699 * 567 *
700 * Populate cpu_to_node[] only if x86_cpu_to_apicid[], 568 * Used if there's no underlying NUMA architecture, NUMA initialization
701 * and apicid_to_node[] tables have valid entries for a CPU. 569 * fails, or NUMA is disabled on the command line.
702 * This means we skip cpu_to_node[] initialisation for NUMA
703 * emulation and faking node case (when running a kernel compiled
704 * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
705 * is already initialized in a round robin manner at numa_init_array,
706 * prior to this call, and this initialization is good enough
707 * for the fake NUMA cases.
708 * 570 *
709 * Called before the per_cpu areas are setup. 571 * Must online at least one node and add memory blocks that cover all
572 * allowed memory. This function must not fail.
710 */ 573 */
711void __init init_cpu_to_node(void) 574static int __init dummy_numa_init(void)
712{ 575{
713 int cpu; 576 printk(KERN_INFO "%s\n",
714 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 577 numa_off ? "NUMA turned off" : "No NUMA configuration found");
715 578 printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
716 BUG_ON(cpu_to_apicid == NULL); 579 0LU, max_pfn << PAGE_SHIFT);
717 580
718 for_each_possible_cpu(cpu) { 581 node_set(0, numa_nodes_parsed);
719 int node; 582 numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
720 u16 apicid = cpu_to_apicid[cpu];
721 583
722 if (apicid == BAD_APICID) 584 return 0;
723 continue;
724 node = apicid_to_node[apicid];
725 if (node == NUMA_NO_NODE)
726 continue;
727 if (!node_online(node))
728 node = find_near_online_node(node);
729 numa_set_node(cpu, node);
730 }
731} 585}
732#endif
733 586
734 587static int __init numa_init(int (*init_func)(void))
735void __cpuinit numa_set_node(int cpu, int node)
736{ 588{
737 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 589 int i;
738 590 int ret;
739 /* early setting, no percpu area yet */
740 if (cpu_to_node_map) {
741 cpu_to_node_map[cpu] = node;
742 return;
743 }
744
745#ifdef CONFIG_DEBUG_PER_CPU_MAPS
746 if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
747 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
748 dump_stack();
749 return;
750 }
751#endif
752 per_cpu(x86_cpu_to_node_map, cpu) = node;
753 591
754 if (node != NUMA_NO_NODE) 592 for (i = 0; i < MAX_LOCAL_APIC; i++)
755 set_cpu_numa_node(cpu, node); 593 set_apicid_to_node(i, NUMA_NO_NODE);
756}
757 594
758void __cpuinit numa_clear_node(int cpu) 595 nodes_clear(numa_nodes_parsed);
759{ 596 nodes_clear(node_possible_map);
760 numa_set_node(cpu, NUMA_NO_NODE); 597 nodes_clear(node_online_map);
761} 598 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
599 remove_all_active_ranges();
600 numa_reset_distance();
762 601
763#ifndef CONFIG_DEBUG_PER_CPU_MAPS 602 ret = init_func();
603 if (ret < 0)
604 return ret;
605 ret = numa_cleanup_meminfo(&numa_meminfo);
606 if (ret < 0)
607 return ret;
764 608
765#ifndef CONFIG_NUMA_EMU 609 numa_emulation(&numa_meminfo, numa_distance_cnt);
766void __cpuinit numa_add_cpu(int cpu)
767{
768 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
769}
770 610
771void __cpuinit numa_remove_cpu(int cpu) 611 ret = numa_register_memblks(&numa_meminfo);
772{ 612 if (ret < 0)
773 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 613 return ret;
774}
775#else
776void __cpuinit numa_add_cpu(int cpu)
777{
778 unsigned long addr;
779 u16 apicid;
780 int physnid;
781 int nid = NUMA_NO_NODE;
782 614
783 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); 615 for (i = 0; i < nr_cpu_ids; i++) {
784 if (apicid != BAD_APICID) 616 int nid = early_cpu_to_node(i);
785 nid = apicid_to_node[apicid];
786 if (nid == NUMA_NO_NODE)
787 nid = early_cpu_to_node(cpu);
788 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
789
790 /*
791 * Use the starting address of the emulated node to find which physical
792 * node it is allocated on.
793 */
794 addr = node_start_pfn(nid) << PAGE_SHIFT;
795 for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
796 if (addr >= physnodes[physnid].start &&
797 addr < physnodes[physnid].end)
798 break;
799 617
800 /* 618 if (nid == NUMA_NO_NODE)
801 * Map the cpu to each emulated node that is allocated on the physical 619 continue;
802 * node of the cpu's apic id. 620 if (!node_online(nid))
803 */ 621 numa_clear_node(i);
804 for_each_online_node(nid) {
805 addr = node_start_pfn(nid) << PAGE_SHIFT;
806 if (addr >= physnodes[physnid].start &&
807 addr < physnodes[physnid].end)
808 cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
809 } 622 }
623 numa_init_array();
624 return 0;
810} 625}
811 626
812void __cpuinit numa_remove_cpu(int cpu) 627void __init initmem_init(void)
813{ 628{
814 int i; 629 int ret;
815 630
816 for_each_online_node(i) 631 if (!numa_off) {
817 cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); 632#ifdef CONFIG_ACPI_NUMA
818} 633 ret = numa_init(x86_acpi_numa_init);
819#endif /* !CONFIG_NUMA_EMU */ 634 if (!ret)
820 635 return;
821#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 636#endif
822static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) 637#ifdef CONFIG_AMD_NUMA
823{ 638 ret = numa_init(amd_numa_init);
824 int node = early_cpu_to_node(cpu); 639 if (!ret)
825 struct cpumask *mask; 640 return;
826 char buf[64]; 641#endif
827
828 mask = node_to_cpumask_map[node];
829 if (!mask) {
830 pr_err("node_to_cpumask_map[%i] NULL\n", node);
831 dump_stack();
832 return NULL;
833 } 642 }
834 643
835 cpulist_scnprintf(buf, sizeof(buf), mask); 644 numa_init(dummy_numa_init);
836 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
837 enable ? "numa_add_cpu" : "numa_remove_cpu",
838 cpu, node, buf);
839 return mask;
840} 645}
841 646
842/* 647unsigned long __init numa_free_all_bootmem(void)
843 * --------- debug versions of the numa functions ---------
844 */
845#ifndef CONFIG_NUMA_EMU
846static void __cpuinit numa_set_cpumask(int cpu, int enable)
847{
848 struct cpumask *mask;
849
850 mask = debug_cpumask_set_cpu(cpu, enable);
851 if (!mask)
852 return;
853
854 if (enable)
855 cpumask_set_cpu(cpu, mask);
856 else
857 cpumask_clear_cpu(cpu, mask);
858}
859#else
860static void __cpuinit numa_set_cpumask(int cpu, int enable)
861{ 648{
862 int node = early_cpu_to_node(cpu); 649 unsigned long pages = 0;
863 struct cpumask *mask;
864 int i; 650 int i;
865 651
866 for_each_online_node(i) { 652 for_each_online_node(i)
867 unsigned long addr; 653 pages += free_all_bootmem_node(NODE_DATA(i));
868
869 addr = node_start_pfn(i) << PAGE_SHIFT;
870 if (addr < physnodes[node].start ||
871 addr >= physnodes[node].end)
872 continue;
873 mask = debug_cpumask_set_cpu(cpu, enable);
874 if (!mask)
875 return;
876
877 if (enable)
878 cpumask_set_cpu(cpu, mask);
879 else
880 cpumask_clear_cpu(cpu, mask);
881 }
882}
883#endif /* CONFIG_NUMA_EMU */
884 654
885void __cpuinit numa_add_cpu(int cpu) 655 pages += free_all_memory_core_early(MAX_NUMNODES);
886{
887 numa_set_cpumask(cpu, 1);
888}
889 656
890void __cpuinit numa_remove_cpu(int cpu) 657 return pages;
891{
892 numa_set_cpumask(cpu, 0);
893} 658}
894 659
895int __cpu_to_node(int cpu) 660int __cpuinit numa_cpu_node(int cpu)
896{ 661{
897 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 662 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
898 printk(KERN_WARNING
899 "cpu_to_node(%d): usage too early!\n", cpu);
900 dump_stack();
901 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
902 }
903 return per_cpu(x86_cpu_to_node_map, cpu);
904}
905EXPORT_SYMBOL(__cpu_to_node);
906 663
907/* 664 if (apicid != BAD_APICID)
908 * Same function as cpu_to_node() but used if called before the 665 return __apicid_to_node[apicid];
909 * per_cpu areas are setup. 666 return NUMA_NO_NODE;
910 */
911int early_cpu_to_node(int cpu)
912{
913 if (early_per_cpu_ptr(x86_cpu_to_node_map))
914 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
915
916 if (!cpu_possible(cpu)) {
917 printk(KERN_WARNING
918 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
919 dump_stack();
920 return NUMA_NO_NODE;
921 }
922 return per_cpu(x86_cpu_to_node_map, cpu);
923} 667}
924
925/*
926 * --------- end of debug versions of the numa functions ---------
927 */
928
929#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
new file mode 100644
index 000000000000..ad091e4cff17
--- /dev/null
+++ b/arch/x86/mm/numa_emulation.c
@@ -0,0 +1,494 @@
1/*
2 * NUMA emulation
3 */
4#include <linux/kernel.h>
5#include <linux/errno.h>
6#include <linux/topology.h>
7#include <linux/memblock.h>
8#include <asm/dma.h>
9
10#include "numa_internal.h"
11
12static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
13static char *emu_cmdline __initdata;
14
15void __init numa_emu_cmdline(char *str)
16{
17 emu_cmdline = str;
18}
19
20static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
21{
22 int i;
23
24 for (i = 0; i < mi->nr_blks; i++)
25 if (mi->blk[i].nid == nid)
26 return i;
27 return -ENOENT;
28}
29
30/*
31 * Sets up nid to range from @start to @end. The return value is -errno if
32 * something went wrong, 0 otherwise.
33 */
34static int __init emu_setup_memblk(struct numa_meminfo *ei,
35 struct numa_meminfo *pi,
36 int nid, int phys_blk, u64 size)
37{
38 struct numa_memblk *eb = &ei->blk[ei->nr_blks];
39 struct numa_memblk *pb = &pi->blk[phys_blk];
40
41 if (ei->nr_blks >= NR_NODE_MEMBLKS) {
42 pr_err("NUMA: Too many emulated memblks, failing emulation\n");
43 return -EINVAL;
44 }
45
46 ei->nr_blks++;
47 eb->start = pb->start;
48 eb->end = pb->start + size;
49 eb->nid = nid;
50
51 if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
52 emu_nid_to_phys[nid] = pb->nid;
53
54 pb->start += size;
55 if (pb->start >= pb->end) {
56 WARN_ON_ONCE(pb->start > pb->end);
57 numa_remove_memblk_from(phys_blk, pi);
58 }
59
60 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
61 eb->start, eb->end, (eb->end - eb->start) >> 20);
62 return 0;
63}
64
65/*
66 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
67 * to max_addr. The return value is the number of nodes allocated.
68 */
69static int __init split_nodes_interleave(struct numa_meminfo *ei,
70 struct numa_meminfo *pi,
71 u64 addr, u64 max_addr, int nr_nodes)
72{
73 nodemask_t physnode_mask = NODE_MASK_NONE;
74 u64 size;
75 int big;
76 int nid = 0;
77 int i, ret;
78
79 if (nr_nodes <= 0)
80 return -1;
81 if (nr_nodes > MAX_NUMNODES) {
82 pr_info("numa=fake=%d too large, reducing to %d\n",
83 nr_nodes, MAX_NUMNODES);
84 nr_nodes = MAX_NUMNODES;
85 }
86
87 size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
88 /*
89 * Calculate the number of big nodes that can be allocated as a result
90 * of consolidating the remainder.
91 */
92 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
93 FAKE_NODE_MIN_SIZE;
94
95 size &= FAKE_NODE_MIN_HASH_MASK;
96 if (!size) {
97 pr_err("Not enough memory for each node. "
98 "NUMA emulation disabled.\n");
99 return -1;
100 }
101
102 for (i = 0; i < pi->nr_blks; i++)
103 node_set(pi->blk[i].nid, physnode_mask);
104
105 /*
106 * Continue to fill physical nodes with fake nodes until there is no
107 * memory left on any of them.
108 */
109 while (nodes_weight(physnode_mask)) {
110 for_each_node_mask(i, physnode_mask) {
111 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
112 u64 start, limit, end;
113 int phys_blk;
114
115 phys_blk = emu_find_memblk_by_nid(i, pi);
116 if (phys_blk < 0) {
117 node_clear(i, physnode_mask);
118 continue;
119 }
120 start = pi->blk[phys_blk].start;
121 limit = pi->blk[phys_blk].end;
122 end = start + size;
123
124 if (nid < big)
125 end += FAKE_NODE_MIN_SIZE;
126
127 /*
128 * Continue to add memory to this fake node if its
129 * non-reserved memory is less than the per-node size.
130 */
131 while (end - start -
132 memblock_x86_hole_size(start, end) < size) {
133 end += FAKE_NODE_MIN_SIZE;
134 if (end > limit) {
135 end = limit;
136 break;
137 }
138 }
139
140 /*
141 * If there won't be at least FAKE_NODE_MIN_SIZE of
142 * non-reserved memory in ZONE_DMA32 for the next node,
143 * this one must extend to the boundary.
144 */
145 if (end < dma32_end && dma32_end - end -
146 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
147 end = dma32_end;
148
149 /*
150 * If there won't be enough non-reserved memory for the
151 * next node, this one must extend to the end of the
152 * physical node.
153 */
154 if (limit - end -
155 memblock_x86_hole_size(end, limit) < size)
156 end = limit;
157
158 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
159 phys_blk,
160 min(end, limit) - start);
161 if (ret < 0)
162 return ret;
163 }
164 }
165 return 0;
166}
167
168/*
169 * Returns the end address of a node so that there is at least `size' amount of
170 * non-reserved memory or `max_addr' is reached.
171 */
172static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
173{
174 u64 end = start + size;
175
176 while (end - start - memblock_x86_hole_size(start, end) < size) {
177 end += FAKE_NODE_MIN_SIZE;
178 if (end > max_addr) {
179 end = max_addr;
180 break;
181 }
182 }
183 return end;
184}
185
186/*
187 * Sets up fake nodes of `size' interleaved over physical nodes ranging from
188 * `addr' to `max_addr'. The return value is the number of nodes allocated.
189 */
190static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
191 struct numa_meminfo *pi,
192 u64 addr, u64 max_addr, u64 size)
193{
194 nodemask_t physnode_mask = NODE_MASK_NONE;
195 u64 min_size;
196 int nid = 0;
197 int i, ret;
198
199 if (!size)
200 return -1;
201 /*
202 * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
203 * increased accordingly if the requested size is too small. This
204 * creates a uniform distribution of node sizes across the entire
205 * machine (but not necessarily over physical nodes).
206 */
207 min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
208 MAX_NUMNODES;
209 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
210 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
211 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
212 FAKE_NODE_MIN_HASH_MASK;
213 if (size < min_size) {
214 pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
215 size >> 20, min_size >> 20);
216 size = min_size;
217 }
218 size &= FAKE_NODE_MIN_HASH_MASK;
219
220 for (i = 0; i < pi->nr_blks; i++)
221 node_set(pi->blk[i].nid, physnode_mask);
222
223 /*
224 * Fill physical nodes with fake nodes of size until there is no memory
225 * left on any of them.
226 */
227 while (nodes_weight(physnode_mask)) {
228 for_each_node_mask(i, physnode_mask) {
229 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
230 u64 start, limit, end;
231 int phys_blk;
232
233 phys_blk = emu_find_memblk_by_nid(i, pi);
234 if (phys_blk < 0) {
235 node_clear(i, physnode_mask);
236 continue;
237 }
238 start = pi->blk[phys_blk].start;
239 limit = pi->blk[phys_blk].end;
240
241 end = find_end_of_node(start, limit, size);
242 /*
243 * If there won't be at least FAKE_NODE_MIN_SIZE of
244 * non-reserved memory in ZONE_DMA32 for the next node,
245 * this one must extend to the boundary.
246 */
247 if (end < dma32_end && dma32_end - end -
248 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
249 end = dma32_end;
250
251 /*
252 * If there won't be enough non-reserved memory for the
253 * next node, this one must extend to the end of the
254 * physical node.
255 */
256 if (limit - end -
257 memblock_x86_hole_size(end, limit) < size)
258 end = limit;
259
260 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
261 phys_blk,
262 min(end, limit) - start);
263 if (ret < 0)
264 return ret;
265 }
266 }
267 return 0;
268}
269
270/**
271 * numa_emulation - Emulate NUMA nodes
272 * @numa_meminfo: NUMA configuration to massage
273 * @numa_dist_cnt: The size of the physical NUMA distance table
274 *
275 * Emulate NUMA nodes according to the numa=fake kernel parameter.
276 * @numa_meminfo contains the physical memory configuration and is modified
277 * to reflect the emulated configuration on success. @numa_dist_cnt is
278 * used to determine the size of the physical distance table.
279 *
280 * On success, the following modifications are made.
281 *
282 * - @numa_meminfo is updated to reflect the emulated nodes.
283 *
284 * - __apicid_to_node[] is updated such that APIC IDs are mapped to the
285 * emulated nodes.
286 *
287 * - NUMA distance table is rebuilt to represent distances between emulated
288 * nodes. The distances are determined considering how emulated nodes
289 * are mapped to physical nodes and match the actual distances.
290 *
291 * - emu_nid_to_phys[] reflects how emulated nodes are mapped to physical
292 * nodes. This is used by numa_add_cpu() and numa_remove_cpu().
293 *
294 * If emulation is not enabled or fails, emu_nid_to_phys[] is filled with
295 * identity mapping and no other modification is made.
296 */
297void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
298{
299 static struct numa_meminfo ei __initdata;
300 static struct numa_meminfo pi __initdata;
301 const u64 max_addr = max_pfn << PAGE_SHIFT;
302 u8 *phys_dist = NULL;
303 size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
304 int max_emu_nid, dfl_phys_nid;
305 int i, j, ret;
306
307 if (!emu_cmdline)
308 goto no_emu;
309
310 memset(&ei, 0, sizeof(ei));
311 pi = *numa_meminfo;
312
313 for (i = 0; i < MAX_NUMNODES; i++)
314 emu_nid_to_phys[i] = NUMA_NO_NODE;
315
316 /*
317 * If the numa=fake command-line contains a 'M' or 'G', it represents
318 * the fixed node size. Otherwise, if it is just a single number N,
319 * split the system RAM into N fake nodes.
320 */
321 if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) {
322 u64 size;
323
324 size = memparse(emu_cmdline, &emu_cmdline);
325 ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size);
326 } else {
327 unsigned long n;
328
329 n = simple_strtoul(emu_cmdline, NULL, 0);
330 ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
331 }
332
333 if (ret < 0)
334 goto no_emu;
335
336 if (numa_cleanup_meminfo(&ei) < 0) {
337 pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
338 goto no_emu;
339 }
340
341 /* copy the physical distance table */
342 if (numa_dist_cnt) {
343 u64 phys;
344
345 phys = memblock_find_in_range(0,
346 (u64)max_pfn_mapped << PAGE_SHIFT,
347 phys_size, PAGE_SIZE);
348 if (phys == MEMBLOCK_ERROR) {
349 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
350 goto no_emu;
351 }
352 memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST");
353 phys_dist = __va(phys);
354
355 for (i = 0; i < numa_dist_cnt; i++)
356 for (j = 0; j < numa_dist_cnt; j++)
357 phys_dist[i * numa_dist_cnt + j] =
358 node_distance(i, j);
359 }
360
361 /*
362 * Determine the max emulated nid and the default phys nid to use
363 * for unmapped nodes.
364 */
365 max_emu_nid = 0;
366 dfl_phys_nid = NUMA_NO_NODE;
367 for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
368 if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
369 max_emu_nid = i;
370 if (dfl_phys_nid == NUMA_NO_NODE)
371 dfl_phys_nid = emu_nid_to_phys[i];
372 }
373 }
374 if (dfl_phys_nid == NUMA_NO_NODE) {
375 pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n");
376 goto no_emu;
377 }
378
379 /* commit */
380 *numa_meminfo = ei;
381
382 /*
383 * Transform __apicid_to_node table to use emulated nids by
384 * reverse-mapping phys_nid. The maps should always exist but fall
385 * back to zero just in case.
386 */
387 for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) {
388 if (__apicid_to_node[i] == NUMA_NO_NODE)
389 continue;
390 for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
391 if (__apicid_to_node[i] == emu_nid_to_phys[j])
392 break;
393 __apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0;
394 }
395
396 /* make sure all emulated nodes are mapped to a physical node */
397 for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
398 if (emu_nid_to_phys[i] == NUMA_NO_NODE)
399 emu_nid_to_phys[i] = dfl_phys_nid;
400
401 /* transform distance table */
402 numa_reset_distance();
403 for (i = 0; i < max_emu_nid + 1; i++) {
404 for (j = 0; j < max_emu_nid + 1; j++) {
405 int physi = emu_nid_to_phys[i];
406 int physj = emu_nid_to_phys[j];
407 int dist;
408
409 if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
410 dist = physi == physj ?
411 LOCAL_DISTANCE : REMOTE_DISTANCE;
412 else
413 dist = phys_dist[physi * numa_dist_cnt + physj];
414
415 numa_set_distance(i, j, dist);
416 }
417 }
418
419 /* free the copied physical distance table */
420 if (phys_dist)
421 memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size);
422 return;
423
424no_emu:
425 /* No emulation. Build identity emu_nid_to_phys[] for numa_add_cpu() */
426 for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
427 emu_nid_to_phys[i] = i;
428}
429
430#ifndef CONFIG_DEBUG_PER_CPU_MAPS
431void __cpuinit numa_add_cpu(int cpu)
432{
433 int physnid, nid;
434
435 nid = early_cpu_to_node(cpu);
436 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
437
438 physnid = emu_nid_to_phys[nid];
439
440 /*
441 * Map the cpu to each emulated node that is allocated on the physical
442 * node of the cpu's apic id.
443 */
444 for_each_online_node(nid)
445 if (emu_nid_to_phys[nid] == physnid)
446 cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
447}
448
449void __cpuinit numa_remove_cpu(int cpu)
450{
451 int i;
452
453 for_each_online_node(i)
454 cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
455}
456#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
457static void __cpuinit numa_set_cpumask(int cpu, int enable)
458{
459 struct cpumask *mask;
460 int nid, physnid, i;
461
462 nid = early_cpu_to_node(cpu);
463 if (nid == NUMA_NO_NODE) {
464 /* early_cpu_to_node() already emits a warning and trace */
465 return;
466 }
467
468 physnid = emu_nid_to_phys[nid];
469
470 for_each_online_node(i) {
471 if (emu_nid_to_phys[nid] != physnid)
472 continue;
473
474 mask = debug_cpumask_set_cpu(cpu, enable);
475 if (!mask)
476 return;
477
478 if (enable)
479 cpumask_set_cpu(cpu, mask);
480 else
481 cpumask_clear_cpu(cpu, mask);
482 }
483}
484
485void __cpuinit numa_add_cpu(int cpu)
486{
487 numa_set_cpumask(cpu, 1);
488}
489
490void __cpuinit numa_remove_cpu(int cpu)
491{
492 numa_set_cpumask(cpu, 0);
493}
494#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
new file mode 100644
index 000000000000..ef2d97377d7c
--- /dev/null
+++ b/arch/x86/mm/numa_internal.h
@@ -0,0 +1,31 @@
1#ifndef __X86_MM_NUMA_INTERNAL_H
2#define __X86_MM_NUMA_INTERNAL_H
3
4#include <linux/types.h>
5#include <asm/numa.h>
6
7struct numa_memblk {
8 u64 start;
9 u64 end;
10 int nid;
11};
12
13struct numa_meminfo {
14 int nr_blks;
15 struct numa_memblk blk[NR_NODE_MEMBLKS];
16};
17
18void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
19int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
20void __init numa_reset_distance(void);
21
22#ifdef CONFIG_NUMA_EMU
23void __init numa_emulation(struct numa_meminfo *numa_meminfo,
24 int numa_dist_cnt);
25#else
26static inline void numa_emulation(struct numa_meminfo *numa_meminfo,
27 int numa_dist_cnt)
28{ }
29#endif
30
31#endif /* __X86_MM_NUMA_INTERNAL_H */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d343b3c81f3c..90825f2eb0f4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM];
57 57
58void update_page_count(int level, unsigned long pages) 58void update_page_count(int level, unsigned long pages)
59{ 59{
60 unsigned long flags;
61
62 /* Protect against CPA */ 60 /* Protect against CPA */
63 spin_lock_irqsave(&pgd_lock, flags); 61 spin_lock(&pgd_lock);
64 direct_pages_count[level] += pages; 62 direct_pages_count[level] += pages;
65 spin_unlock_irqrestore(&pgd_lock, flags); 63 spin_unlock(&pgd_lock);
66} 64}
67 65
68static void split_page_count(int level) 66static void split_page_count(int level)
@@ -394,7 +392,7 @@ static int
394try_preserve_large_page(pte_t *kpte, unsigned long address, 392try_preserve_large_page(pte_t *kpte, unsigned long address,
395 struct cpa_data *cpa) 393 struct cpa_data *cpa)
396{ 394{
397 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; 395 unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
398 pte_t new_pte, old_pte, *tmp; 396 pte_t new_pte, old_pte, *tmp;
399 pgprot_t old_prot, new_prot, req_prot; 397 pgprot_t old_prot, new_prot, req_prot;
400 int i, do_split = 1; 398 int i, do_split = 1;
@@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
403 if (cpa->force_split) 401 if (cpa->force_split)
404 return 1; 402 return 1;
405 403
406 spin_lock_irqsave(&pgd_lock, flags); 404 spin_lock(&pgd_lock);
407 /* 405 /*
408 * Check for races, another CPU might have split this page 406 * Check for races, another CPU might have split this page
409 * up already: 407 * up already:
@@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
498 } 496 }
499 497
500out_unlock: 498out_unlock:
501 spin_unlock_irqrestore(&pgd_lock, flags); 499 spin_unlock(&pgd_lock);
502 500
503 return do_split; 501 return do_split;
504} 502}
505 503
506static int split_large_page(pte_t *kpte, unsigned long address) 504static int split_large_page(pte_t *kpte, unsigned long address)
507{ 505{
508 unsigned long flags, pfn, pfninc = 1; 506 unsigned long pfn, pfninc = 1;
509 unsigned int i, level; 507 unsigned int i, level;
510 pte_t *pbase, *tmp; 508 pte_t *pbase, *tmp;
511 pgprot_t ref_prot; 509 pgprot_t ref_prot;
@@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
519 if (!base) 517 if (!base)
520 return -ENOMEM; 518 return -ENOMEM;
521 519
522 spin_lock_irqsave(&pgd_lock, flags); 520 spin_lock(&pgd_lock);
523 /* 521 /*
524 * Check for races, another CPU might have split this page 522 * Check for races, another CPU might have split this page
525 * up for us already: 523 * up for us already:
@@ -591,7 +589,7 @@ out_unlock:
591 */ 589 */
592 if (base) 590 if (base)
593 __free_page(base); 591 __free_page(base);
594 spin_unlock_irqrestore(&pgd_lock, flags); 592 spin_unlock(&pgd_lock);
595 593
596 return 0; 594 return 0;
597} 595}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 500242d3c96d..0113d19c8aa6 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
121 121
122static void pgd_dtor(pgd_t *pgd) 122static void pgd_dtor(pgd_t *pgd)
123{ 123{
124 unsigned long flags; /* can be called from interrupt context */
125
126 if (SHARED_KERNEL_PMD) 124 if (SHARED_KERNEL_PMD)
127 return; 125 return;
128 126
129 spin_lock_irqsave(&pgd_lock, flags); 127 spin_lock(&pgd_lock);
130 pgd_list_del(pgd); 128 pgd_list_del(pgd);
131 spin_unlock_irqrestore(&pgd_lock, flags); 129 spin_unlock(&pgd_lock);
132} 130}
133 131
134/* 132/*
@@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
260{ 258{
261 pgd_t *pgd; 259 pgd_t *pgd;
262 pmd_t *pmds[PREALLOCATED_PMDS]; 260 pmd_t *pmds[PREALLOCATED_PMDS];
263 unsigned long flags;
264 261
265 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); 262 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
266 263
@@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
280 * respect to anything walking the pgd_list, so that they 277 * respect to anything walking the pgd_list, so that they
281 * never see a partially populated pgd. 278 * never see a partially populated pgd.
282 */ 279 */
283 spin_lock_irqsave(&pgd_lock, flags); 280 spin_lock(&pgd_lock);
284 281
285 pgd_ctor(mm, pgd); 282 pgd_ctor(mm, pgd);
286 pgd_prepopulate_pmd(mm, pgd, pmds); 283 pgd_prepopulate_pmd(mm, pgd, pmds);
287 284
288 spin_unlock_irqrestore(&pgd_lock, flags); 285 spin_unlock(&pgd_lock);
289 286
290 return pgd; 287 return pgd;
291 288
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index ae96e7b8051d..48651c6f657d 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -57,7 +57,7 @@ struct node_memory_chunk_s {
57static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; 57static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
58 58
59static int __initdata num_memory_chunks; /* total number of memory chunks */ 59static int __initdata num_memory_chunks; /* total number of memory chunks */
60static u8 __initdata apicid_to_pxm[MAX_APICID]; 60static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
61 61
62int acpi_numa __initdata; 62int acpi_numa __initdata;
63 63
@@ -254,8 +254,8 @@ int __init get_memcfg_from_srat(void)
254 printk(KERN_DEBUG "Number of memory chunks in system = %d\n", 254 printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
255 num_memory_chunks); 255 num_memory_chunks);
256 256
257 for (i = 0; i < MAX_APICID; i++) 257 for (i = 0; i < MAX_LOCAL_APIC; i++)
258 apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]); 258 set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
259 259
260 for (j = 0; j < num_memory_chunks; j++){ 260 for (j = 0; j < num_memory_chunks; j++){
261 struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; 261 struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 603d285d1daa..8e9d3394f6d4 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -26,88 +26,34 @@
26 26
27int acpi_numa __initdata; 27int acpi_numa __initdata;
28 28
29static struct acpi_table_slit *acpi_slit;
30
31static nodemask_t nodes_parsed __initdata;
32static nodemask_t cpu_nodes_parsed __initdata;
33static struct bootnode nodes[MAX_NUMNODES] __initdata;
34static struct bootnode nodes_add[MAX_NUMNODES]; 29static struct bootnode nodes_add[MAX_NUMNODES];
35 30
36static int num_node_memblks __initdata;
37static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
38static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
39
40static __init int setup_node(int pxm) 31static __init int setup_node(int pxm)
41{ 32{
42 return acpi_map_pxm_to_node(pxm); 33 return acpi_map_pxm_to_node(pxm);
43} 34}
44 35
45static __init int conflicting_memblks(unsigned long start, unsigned long end)
46{
47 int i;
48 for (i = 0; i < num_node_memblks; i++) {
49 struct bootnode *nd = &node_memblk_range[i];
50 if (nd->start == nd->end)
51 continue;
52 if (nd->end > start && nd->start < end)
53 return memblk_nodeid[i];
54 if (nd->end == end && nd->start == start)
55 return memblk_nodeid[i];
56 }
57 return -1;
58}
59
60static __init void cutoff_node(int i, unsigned long start, unsigned long end)
61{
62 struct bootnode *nd = &nodes[i];
63
64 if (nd->start < start) {
65 nd->start = start;
66 if (nd->end < nd->start)
67 nd->start = nd->end;
68 }
69 if (nd->end > end) {
70 nd->end = end;
71 if (nd->start > nd->end)
72 nd->start = nd->end;
73 }
74}
75
76static __init void bad_srat(void) 36static __init void bad_srat(void)
77{ 37{
78 int i;
79 printk(KERN_ERR "SRAT: SRAT not used.\n"); 38 printk(KERN_ERR "SRAT: SRAT not used.\n");
80 acpi_numa = -1; 39 acpi_numa = -1;
81 for (i = 0; i < MAX_LOCAL_APIC; i++) 40 memset(nodes_add, 0, sizeof(nodes_add));
82 apicid_to_node[i] = NUMA_NO_NODE;
83 for (i = 0; i < MAX_NUMNODES; i++) {
84 nodes[i].start = nodes[i].end = 0;
85 nodes_add[i].start = nodes_add[i].end = 0;
86 }
87 remove_all_active_ranges();
88} 41}
89 42
90static __init inline int srat_disabled(void) 43static __init inline int srat_disabled(void)
91{ 44{
92 return numa_off || acpi_numa < 0; 45 return acpi_numa < 0;
93} 46}
94 47
95/* Callback for SLIT parsing */ 48/* Callback for SLIT parsing */
96void __init acpi_numa_slit_init(struct acpi_table_slit *slit) 49void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
97{ 50{
98 unsigned length; 51 int i, j;
99 unsigned long phys;
100
101 length = slit->header.length;
102 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
103 PAGE_SIZE);
104
105 if (phys == MEMBLOCK_ERROR)
106 panic(" Can not save slit!\n");
107 52
108 acpi_slit = __va(phys); 53 for (i = 0; i < slit->locality_count; i++)
109 memcpy(acpi_slit, slit, length); 54 for (j = 0; j < slit->locality_count; j++)
110 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT"); 55 numa_set_distance(pxm_to_node(i), pxm_to_node(j),
56 slit->entry[slit->locality_count * i + j]);
111} 57}
112 58
113/* Callback for Proximity Domain -> x2APIC mapping */ 59/* Callback for Proximity Domain -> x2APIC mapping */
@@ -138,8 +84,8 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
138 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); 84 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
139 return; 85 return;
140 } 86 }
141 apicid_to_node[apic_id] = node; 87 set_apicid_to_node(apic_id, node);
142 node_set(node, cpu_nodes_parsed); 88 node_set(node, numa_nodes_parsed);
143 acpi_numa = 1; 89 acpi_numa = 1;
144 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", 90 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
145 pxm, apic_id, node); 91 pxm, apic_id, node);
@@ -178,8 +124,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
178 return; 124 return;
179 } 125 }
180 126
181 apicid_to_node[apic_id] = node; 127 set_apicid_to_node(apic_id, node);
182 node_set(node, cpu_nodes_parsed); 128 node_set(node, numa_nodes_parsed);
183 acpi_numa = 1; 129 acpi_numa = 1;
184 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", 130 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
185 pxm, apic_id, node); 131 pxm, apic_id, node);
@@ -241,7 +187,7 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
241 } 187 }
242 188
243 if (changed) { 189 if (changed) {
244 node_set(node, cpu_nodes_parsed); 190 node_set(node, numa_nodes_parsed);
245 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", 191 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
246 nd->start, nd->end); 192 nd->start, nd->end);
247 } 193 }
@@ -251,10 +197,8 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
251void __init 197void __init
252acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) 198acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
253{ 199{
254 struct bootnode *nd, oldnode;
255 unsigned long start, end; 200 unsigned long start, end;
256 int node, pxm; 201 int node, pxm;
257 int i;
258 202
259 if (srat_disabled()) 203 if (srat_disabled())
260 return; 204 return;
@@ -276,300 +220,31 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
276 bad_srat(); 220 bad_srat();
277 return; 221 return;
278 } 222 }
279 i = conflicting_memblks(start, end); 223
280 if (i == node) { 224 if (numa_add_memblk(node, start, end) < 0) {
281 printk(KERN_WARNING
282 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
283 pxm, start, end, nodes[i].start, nodes[i].end);
284 } else if (i >= 0) {
285 printk(KERN_ERR
286 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
287 pxm, start, end, node_to_pxm(i),
288 nodes[i].start, nodes[i].end);
289 bad_srat(); 225 bad_srat();
290 return; 226 return;
291 } 227 }
292 nd = &nodes[node];
293 oldnode = *nd;
294 if (!node_test_and_set(node, nodes_parsed)) {
295 nd->start = start;
296 nd->end = end;
297 } else {
298 if (start < nd->start)
299 nd->start = start;
300 if (nd->end < end)
301 nd->end = end;
302 }
303 228
304 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, 229 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
305 start, end); 230 start, end);
306 231
307 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { 232 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
308 update_nodes_add(node, start, end); 233 update_nodes_add(node, start, end);
309 /* restore nodes[node] */
310 *nd = oldnode;
311 if ((nd->start | nd->end) == 0)
312 node_clear(node, nodes_parsed);
313 }
314
315 node_memblk_range[num_node_memblks].start = start;
316 node_memblk_range[num_node_memblks].end = end;
317 memblk_nodeid[num_node_memblks] = node;
318 num_node_memblks++;
319}
320
321/* Sanity check to catch more bad SRATs (they are amazingly common).
322 Make sure the PXMs cover all memory. */
323static int __init nodes_cover_memory(const struct bootnode *nodes)
324{
325 int i;
326 unsigned long pxmram, e820ram;
327
328 pxmram = 0;
329 for_each_node_mask(i, nodes_parsed) {
330 unsigned long s = nodes[i].start >> PAGE_SHIFT;
331 unsigned long e = nodes[i].end >> PAGE_SHIFT;
332 pxmram += e - s;
333 pxmram -= __absent_pages_in_range(i, s, e);
334 if ((long)pxmram < 0)
335 pxmram = 0;
336 }
337
338 e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
339 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
340 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
341 printk(KERN_ERR
342 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
343 (pxmram << PAGE_SHIFT) >> 20,
344 (e820ram << PAGE_SHIFT) >> 20);
345 return 0;
346 }
347 return 1;
348} 234}
349 235
350void __init acpi_numa_arch_fixup(void) {} 236void __init acpi_numa_arch_fixup(void) {}
351 237
352#ifdef CONFIG_NUMA_EMU 238int __init x86_acpi_numa_init(void)
353void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
354 unsigned long end)
355{
356 int i;
357
358 for_each_node_mask(i, nodes_parsed) {
359 cutoff_node(i, start, end);
360 physnodes[i].start = nodes[i].start;
361 physnodes[i].end = nodes[i].end;
362 }
363}
364#endif /* CONFIG_NUMA_EMU */
365
366/* Use the information discovered above to actually set up the nodes. */
367int __init acpi_scan_nodes(unsigned long start, unsigned long end)
368{ 239{
369 int i; 240 int ret;
370
371 if (acpi_numa <= 0)
372 return -1;
373
374 /* First clean up the node list */
375 for (i = 0; i < MAX_NUMNODES; i++)
376 cutoff_node(i, start, end);
377
378 /*
379 * Join together blocks on the same node, holes between
380 * which don't overlap with memory on other nodes.
381 */
382 for (i = 0; i < num_node_memblks; ++i) {
383 int j, k;
384
385 for (j = i + 1; j < num_node_memblks; ++j) {
386 unsigned long start, end;
387
388 if (memblk_nodeid[i] != memblk_nodeid[j])
389 continue;
390 start = min(node_memblk_range[i].end,
391 node_memblk_range[j].end);
392 end = max(node_memblk_range[i].start,
393 node_memblk_range[j].start);
394 for (k = 0; k < num_node_memblks; ++k) {
395 if (memblk_nodeid[i] == memblk_nodeid[k])
396 continue;
397 if (start < node_memblk_range[k].end &&
398 end > node_memblk_range[k].start)
399 break;
400 }
401 if (k < num_node_memblks)
402 continue;
403 start = min(node_memblk_range[i].start,
404 node_memblk_range[j].start);
405 end = max(node_memblk_range[i].end,
406 node_memblk_range[j].end);
407 printk(KERN_INFO "SRAT: Node %d "
408 "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
409 memblk_nodeid[i],
410 node_memblk_range[i].start,
411 node_memblk_range[i].end,
412 node_memblk_range[j].start,
413 node_memblk_range[j].end,
414 start, end);
415 node_memblk_range[i].start = start;
416 node_memblk_range[i].end = end;
417 k = --num_node_memblks - j;
418 memmove(memblk_nodeid + j, memblk_nodeid + j+1,
419 k * sizeof(*memblk_nodeid));
420 memmove(node_memblk_range + j, node_memblk_range + j+1,
421 k * sizeof(*node_memblk_range));
422 --j;
423 }
424 }
425
426 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
427 memblk_nodeid);
428 if (memnode_shift < 0) {
429 printk(KERN_ERR
430 "SRAT: No NUMA node hash function found. Contact maintainer\n");
431 bad_srat();
432 return -1;
433 }
434
435 for (i = 0; i < num_node_memblks; i++)
436 memblock_x86_register_active_regions(memblk_nodeid[i],
437 node_memblk_range[i].start >> PAGE_SHIFT,
438 node_memblk_range[i].end >> PAGE_SHIFT);
439
440 /* for out of order entries in SRAT */
441 sort_node_map();
442 if (!nodes_cover_memory(nodes)) {
443 bad_srat();
444 return -1;
445 }
446 241
447 /* Account for nodes with cpus and no memory */ 242 ret = acpi_numa_init();
448 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); 243 if (ret < 0)
449 244 return ret;
450 /* Finally register nodes */ 245 return srat_disabled() ? -EINVAL : 0;
451 for_each_node_mask(i, node_possible_map)
452 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
453 /* Try again in case setup_node_bootmem missed one due
454 to missing bootmem */
455 for_each_node_mask(i, node_possible_map)
456 if (!node_online(i))
457 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
458
459 for (i = 0; i < nr_cpu_ids; i++) {
460 int node = early_cpu_to_node(i);
461
462 if (node == NUMA_NO_NODE)
463 continue;
464 if (!node_online(node))
465 numa_clear_node(i);
466 }
467 numa_init_array();
468 return 0;
469}
470
471#ifdef CONFIG_NUMA_EMU
472static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
473 [0 ... MAX_NUMNODES-1] = PXM_INVAL
474};
475static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
476 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
477};
478static int __init find_node_by_addr(unsigned long addr)
479{
480 int ret = NUMA_NO_NODE;
481 int i;
482
483 for_each_node_mask(i, nodes_parsed) {
484 /*
485 * Find the real node that this emulated node appears on. For
486 * the sake of simplicity, we only use a real node's starting
487 * address to determine which emulated node it appears on.
488 */
489 if (addr >= nodes[i].start && addr < nodes[i].end) {
490 ret = i;
491 break;
492 }
493 }
494 return ret;
495} 246}
496 247
497/*
498 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
499 * mappings that respect the real ACPI topology but reflect our emulated
500 * environment. For each emulated node, we find which real node it appears on
501 * and create PXM to NID mappings for those fake nodes which mirror that
502 * locality. SLIT will now represent the correct distances between emulated
503 * nodes as a result of the real topology.
504 */
505void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
506{
507 int i, j;
508
509 for (i = 0; i < num_nodes; i++) {
510 int nid, pxm;
511
512 nid = find_node_by_addr(fake_nodes[i].start);
513 if (nid == NUMA_NO_NODE)
514 continue;
515 pxm = node_to_pxm(nid);
516 if (pxm == PXM_INVAL)
517 continue;
518 fake_node_to_pxm_map[i] = pxm;
519 /*
520 * For each apicid_to_node mapping that exists for this real
521 * node, it must now point to the fake node ID.
522 */
523 for (j = 0; j < MAX_LOCAL_APIC; j++)
524 if (apicid_to_node[j] == nid &&
525 fake_apicid_to_node[j] == NUMA_NO_NODE)
526 fake_apicid_to_node[j] = i;
527 }
528
529 /*
530 * If there are apicid-to-node mappings for physical nodes that do not
531 * have a corresponding emulated node, it should default to a guaranteed
532 * value.
533 */
534 for (i = 0; i < MAX_LOCAL_APIC; i++)
535 if (apicid_to_node[i] != NUMA_NO_NODE &&
536 fake_apicid_to_node[i] == NUMA_NO_NODE)
537 fake_apicid_to_node[i] = 0;
538
539 for (i = 0; i < num_nodes; i++)
540 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
541 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
542
543 nodes_clear(nodes_parsed);
544 for (i = 0; i < num_nodes; i++)
545 if (fake_nodes[i].start != fake_nodes[i].end)
546 node_set(i, nodes_parsed);
547}
548
549static int null_slit_node_compare(int a, int b)
550{
551 return node_to_pxm(a) == node_to_pxm(b);
552}
553#else
554static int null_slit_node_compare(int a, int b)
555{
556 return a == b;
557}
558#endif /* CONFIG_NUMA_EMU */
559
560int __node_distance(int a, int b)
561{
562 int index;
563
564 if (!acpi_slit)
565 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
566 REMOTE_DISTANCE;
567 index = acpi_slit->locality_count * node_to_pxm(a);
568 return acpi_slit->entry[index + node_to_pxm(b)];
569}
570
571EXPORT_SYMBOL(__node_distance);
572
573#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) 248#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
574int memory_add_physaddr_to_nid(u64 start) 249int memory_add_physaddr_to_nid(u64 start)
575{ 250{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6acc724d5d8f..d6c0418c3e47 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -179,12 +179,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
179 sender = this_cpu_read(tlb_vector_offset); 179 sender = this_cpu_read(tlb_vector_offset);
180 f = &flush_state[sender]; 180 f = &flush_state[sender];
181 181
182 /* 182 if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
183 * Could avoid this lock when 183 raw_spin_lock(&f->tlbstate_lock);
184 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
185 * probably not worth checking this for a cache-hot lock.
186 */
187 raw_spin_lock(&f->tlbstate_lock);
188 184
189 f->flush_mm = mm; 185 f->flush_mm = mm;
190 f->flush_va = va; 186 f->flush_va = va;
@@ -202,7 +198,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
202 198
203 f->flush_mm = NULL; 199 f->flush_mm = NULL;
204 f->flush_va = 0; 200 f->flush_va = 0;
205 raw_spin_unlock(&f->tlbstate_lock); 201 if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
202 raw_spin_unlock(&f->tlbstate_lock);
206} 203}
207 204
208void native_flush_tlb_others(const struct cpumask *cpumask, 205void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -211,11 +208,10 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
211 if (is_uv_system()) { 208 if (is_uv_system()) {
212 unsigned int cpu; 209 unsigned int cpu;
213 210
214 cpu = get_cpu(); 211 cpu = smp_processor_id();
215 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); 212 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
216 if (cpumask) 213 if (cpumask)
217 flush_tlb_others_ipi(cpumask, mm, va); 214 flush_tlb_others_ipi(cpumask, mm, va);
218 put_cpu();
219 return; 215 return;
220 } 216 }
221 flush_tlb_others_ipi(cpumask, mm, va); 217 flush_tlb_others_ipi(cpumask, mm, va);
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index e27dffbbb1a7..026e4931d162 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -350,7 +350,7 @@ static int __init early_fill_mp_bus_info(void)
350 350
351#define ENABLE_CF8_EXT_CFG (1ULL << 46) 351#define ENABLE_CF8_EXT_CFG (1ULL << 46)
352 352
353static void enable_pci_io_ecs(void *unused) 353static void __cpuinit enable_pci_io_ecs(void *unused)
354{ 354{
355 u64 reg; 355 u64 reg;
356 rdmsrl(MSR_AMD64_NB_CFG, reg); 356 rdmsrl(MSR_AMD64_NB_CFG, reg);
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 85b68ef5e809..67858be4b52b 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -34,6 +34,7 @@
34#include <linux/pci.h> 34#include <linux/pci.h>
35#include <linux/init.h> 35#include <linux/init.h>
36 36
37#include <asm/ce4100.h>
37#include <asm/pci_x86.h> 38#include <asm/pci_x86.h>
38 39
39struct sim_reg { 40struct sim_reg {
@@ -254,7 +255,7 @@ int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
254static int ce4100_conf_read(unsigned int seg, unsigned int bus, 255static int ce4100_conf_read(unsigned int seg, unsigned int bus,
255 unsigned int devfn, int reg, int len, u32 *value) 256 unsigned int devfn, int reg, int len, u32 *value)
256{ 257{
257 int i, retval = 1; 258 int i;
258 259
259 if (bus == 1) { 260 if (bus == 1) {
260 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { 261 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
@@ -306,10 +307,10 @@ struct pci_raw_ops ce4100_pci_conf = {
306 .write = ce4100_conf_write, 307 .write = ce4100_conf_write,
307}; 308};
308 309
309static int __init ce4100_pci_init(void) 310int __init ce4100_pci_init(void)
310{ 311{
311 init_sim_regs(); 312 init_sim_regs();
312 raw_pci_ops = &ce4100_pci_conf; 313 raw_pci_ops = &ce4100_pci_conf;
313 return 0; 314 /* Indicate caller that it should invoke pci_legacy_init() */
315 return 1;
314} 316}
315subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 25cd4a07d09f..8c4085a95ef1 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -20,7 +20,8 @@
20#include <asm/xen/pci.h> 20#include <asm/xen/pci.h>
21 21
22#ifdef CONFIG_ACPI 22#ifdef CONFIG_ACPI
23static int xen_hvm_register_pirq(u32 gsi, int triggering) 23static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
24 int trigger, int polarity)
24{ 25{
25 int rc, irq; 26 int rc, irq;
26 struct physdev_map_pirq map_irq; 27 struct physdev_map_pirq map_irq;
@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
41 return -1; 42 return -1;
42 } 43 }
43 44
44 if (triggering == ACPI_EDGE_SENSITIVE) { 45 if (trigger == ACPI_EDGE_SENSITIVE) {
45 shareable = 0; 46 shareable = 0;
46 name = "ioapic-edge"; 47 name = "ioapic-edge";
47 } else { 48 } else {
@@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
55 56
56 return irq; 57 return irq;
57} 58}
58
59static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
60 int trigger, int polarity)
61{
62 return xen_hvm_register_pirq(gsi, trigger);
63}
64#endif 59#endif
65 60
66#if defined(CONFIG_PCI_MSI) 61#if defined(CONFIG_PCI_MSI)
@@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
91 86
92static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 87static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
93{ 88{
94 int irq, pirq, ret = 0; 89 int irq, pirq;
95 struct msi_desc *msidesc; 90 struct msi_desc *msidesc;
96 struct msi_msg msg; 91 struct msi_msg msg;
97 92
@@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
99 __read_msi_msg(msidesc, &msg); 94 __read_msi_msg(msidesc, &msg);
100 pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | 95 pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
101 ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); 96 ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
102 if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { 97 if (msg.data != XEN_PIRQ_MSI_DATA ||
103 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? 98 xen_irq_from_pirq(pirq) < 0) {
104 "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); 99 pirq = xen_allocate_pirq_msi(dev, msidesc);
105 if (irq < 0) 100 if (pirq < 0)
106 goto error; 101 goto error;
107 ret = set_irq_msi(irq, msidesc); 102 xen_msi_compose_msg(dev, pirq, &msg);
108 if (ret < 0) 103 __write_msi_msg(msidesc, &msg);
109 goto error_while; 104 dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
110 printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" 105 } else {
111 " pirq=%d\n", irq, pirq); 106 dev_dbg(&dev->dev,
112 return 0; 107 "xen: msi already bound to pirq=%d\n", pirq);
113 } 108 }
114 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? 109 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
115 "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); 110 (type == PCI_CAP_ID_MSIX) ?
116 if (irq < 0 || pirq < 0) 111 "msi-x" : "msi");
112 if (irq < 0)
117 goto error; 113 goto error;
118 printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); 114 dev_dbg(&dev->dev,
119 xen_msi_compose_msg(dev, pirq, &msg); 115 "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
120 ret = set_irq_msi(irq, msidesc);
121 if (ret < 0)
122 goto error_while;
123 write_msi_msg(irq, &msg);
124 } 116 }
125 return 0; 117 return 0;
126 118
127error_while:
128 unbind_from_irqhandler(irq, NULL);
129error: 119error:
130 if (ret == -ENODEV) 120 dev_err(&dev->dev,
131 dev_err(&dev->dev, "Xen PCI frontend has not registered" \ 121 "Xen PCI frontend has not registered MSI/MSI-X support!\n");
132 " MSI/MSI-X support!\n"); 122 return -ENODEV;
133
134 return ret;
135} 123}
136 124
137/* 125/*
@@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
150 return -ENOMEM; 138 return -ENOMEM;
151 139
152 if (type == PCI_CAP_ID_MSIX) 140 if (type == PCI_CAP_ID_MSIX)
153 ret = xen_pci_frontend_enable_msix(dev, &v, nvec); 141 ret = xen_pci_frontend_enable_msix(dev, v, nvec);
154 else 142 else
155 ret = xen_pci_frontend_enable_msi(dev, &v); 143 ret = xen_pci_frontend_enable_msi(dev, v);
156 if (ret) 144 if (ret)
157 goto error; 145 goto error;
158 i = 0; 146 i = 0;
159 list_for_each_entry(msidesc, &dev->msi_list, list) { 147 list_for_each_entry(msidesc, &dev->msi_list, list) {
160 irq = xen_allocate_pirq(v[i], 0, /* not sharable */ 148 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
161 (type == PCI_CAP_ID_MSIX) ? 149 (type == PCI_CAP_ID_MSIX) ?
162 "pcifront-msi-x" : "pcifront-msi"); 150 "pcifront-msi-x" :
163 if (irq < 0) { 151 "pcifront-msi");
164 ret = -1; 152 if (irq < 0)
165 goto free; 153 goto free;
166 }
167
168 ret = set_irq_msi(irq, msidesc);
169 if (ret)
170 goto error_while;
171 i++; 154 i++;
172 } 155 }
173 kfree(v); 156 kfree(v);
174 return 0; 157 return 0;
175 158
176error_while:
177 unbind_from_irqhandler(irq, NULL);
178error: 159error:
179 if (ret == -ENODEV) 160 dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
180 dev_err(&dev->dev, "Xen PCI frontend has not registered" \
181 " MSI/MSI-X support!\n");
182free: 161free:
183 kfree(v); 162 kfree(v);
184 return ret; 163 return ret;
@@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
193 xen_pci_frontend_disable_msix(dev); 172 xen_pci_frontend_disable_msix(dev);
194 else 173 else
195 xen_pci_frontend_disable_msi(dev); 174 xen_pci_frontend_disable_msi(dev);
175
176 /* Free the IRQ's and the msidesc using the generic code. */
177 default_teardown_msi_irqs(dev);
196} 178}
197 179
198static void xen_teardown_msi_irq(unsigned int irq) 180static void xen_teardown_msi_irq(unsigned int irq)
@@ -200,47 +182,82 @@ static void xen_teardown_msi_irq(unsigned int irq)
200 xen_destroy_irq(irq); 182 xen_destroy_irq(irq);
201} 183}
202 184
185#ifdef CONFIG_XEN_DOM0
203static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 186static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
204{ 187{
205 int irq, ret; 188 int ret = 0;
206 struct msi_desc *msidesc; 189 struct msi_desc *msidesc;
207 190
208 list_for_each_entry(msidesc, &dev->msi_list, list) { 191 list_for_each_entry(msidesc, &dev->msi_list, list) {
209 irq = xen_create_msi_irq(dev, msidesc, type); 192 struct physdev_map_pirq map_irq;
210 if (irq < 0)
211 return -1;
212 193
213 ret = set_irq_msi(irq, msidesc); 194 memset(&map_irq, 0, sizeof(map_irq));
214 if (ret) 195 map_irq.domid = DOMID_SELF;
215 goto error; 196 map_irq.type = MAP_PIRQ_TYPE_MSI;
216 } 197 map_irq.index = -1;
217 return 0; 198 map_irq.pirq = -1;
199 map_irq.bus = dev->bus->number;
200 map_irq.devfn = dev->devfn;
218 201
219error: 202 if (type == PCI_CAP_ID_MSIX) {
220 xen_destroy_irq(irq); 203 int pos;
204 u32 table_offset, bir;
205
206 pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
207
208 pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
209 &table_offset);
210 bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
211
212 map_irq.table_base = pci_resource_start(dev, bir);
213 map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
214 }
215
216 ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
217 if (ret) {
218 dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
219 goto out;
220 }
221
222 ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
223 map_irq.pirq, map_irq.index,
224 (type == PCI_CAP_ID_MSIX) ?
225 "msi-x" : "msi");
226 if (ret < 0)
227 goto out;
228 }
229 ret = 0;
230out:
221 return ret; 231 return ret;
222} 232}
223#endif 233#endif
234#endif
224 235
225static int xen_pcifront_enable_irq(struct pci_dev *dev) 236static int xen_pcifront_enable_irq(struct pci_dev *dev)
226{ 237{
227 int rc; 238 int rc;
228 int share = 1; 239 int share = 1;
240 u8 gsi;
229 241
230 dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); 242 rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
231 243 if (rc < 0) {
232 if (dev->irq < 0) 244 dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
233 return -EINVAL; 245 rc);
246 return rc;
247 }
234 248
235 if (dev->irq < NR_IRQS_LEGACY) 249 if (gsi < NR_IRQS_LEGACY)
236 share = 0; 250 share = 0;
237 251
238 rc = xen_allocate_pirq(dev->irq, share, "pcifront"); 252 rc = xen_allocate_pirq(gsi, share, "pcifront");
239 if (rc < 0) { 253 if (rc < 0) {
240 dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", 254 dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
241 dev->irq, rc); 255 gsi, rc);
242 return rc; 256 return rc;
243 } 257 }
258
259 dev->irq = rc;
260 dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
244 return 0; 261 return 0;
245} 262}
246 263
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index d2c0d51a7178..28071bb31db7 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -15,21 +15,20 @@
15#include <linux/serial_reg.h> 15#include <linux/serial_reg.h>
16#include <linux/serial_8250.h> 16#include <linux/serial_8250.h>
17 17
18#include <asm/ce4100.h>
19#include <asm/prom.h>
18#include <asm/setup.h> 20#include <asm/setup.h>
21#include <asm/i8259.h>
19#include <asm/io.h> 22#include <asm/io.h>
23#include <asm/io_apic.h>
20 24
21static int ce4100_i8042_detect(void) 25static int ce4100_i8042_detect(void)
22{ 26{
23 return 0; 27 return 0;
24} 28}
25 29
26static void __init sdv_find_smp_config(void)
27{
28}
29
30#ifdef CONFIG_SERIAL_8250 30#ifdef CONFIG_SERIAL_8250
31 31
32
33static unsigned int mem_serial_in(struct uart_port *p, int offset) 32static unsigned int mem_serial_in(struct uart_port *p, int offset)
34{ 33{
35 offset = offset << p->regshift; 34 offset = offset << p->regshift;
@@ -118,6 +117,15 @@ static void __init sdv_arch_setup(void)
118 sdv_serial_fixup(); 117 sdv_serial_fixup();
119} 118}
120 119
120#ifdef CONFIG_X86_IO_APIC
121static void __cpuinit sdv_pci_init(void)
122{
123 x86_of_pci_init();
124 /* We can't set this earlier, because we need to calibrate the timer */
125 legacy_pic = &null_legacy_pic;
126}
127#endif
128
121/* 129/*
122 * CE4100 specific x86_init function overrides and early setup 130 * CE4100 specific x86_init function overrides and early setup
123 * calls. 131 * calls.
@@ -128,5 +136,11 @@ void __init x86_ce4100_early_setup(void)
128 x86_platform.i8042_detect = ce4100_i8042_detect; 136 x86_platform.i8042_detect = ce4100_i8042_detect;
129 x86_init.resources.probe_roms = x86_init_noop; 137 x86_init.resources.probe_roms = x86_init_noop;
130 x86_init.mpparse.get_smp_config = x86_init_uint_noop; 138 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
131 x86_init.mpparse.find_smp_config = sdv_find_smp_config; 139 x86_init.mpparse.find_smp_config = x86_init_noop;
140 x86_init.pci.init = ce4100_pci_init;
141
142#ifdef CONFIG_X86_IO_APIC
143 x86_init.pci.init_irq = sdv_pci_init;
144 x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
145#endif
132} 146}
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
new file mode 100644
index 000000000000..dc701ea58546
--- /dev/null
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -0,0 +1,428 @@
1/*
2 * CE4100 on Falcon Falls
3 *
4 * (c) Copyright 2010 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; version 2 of the License.
9 */
10/dts-v1/;
11/ {
12 model = "intel,falconfalls";
13 compatible = "intel,falconfalls";
14 #address-cells = <1>;
15 #size-cells = <1>;
16
17 cpus {
18 #address-cells = <1>;
19 #size-cells = <0>;
20
21 cpu@0 {
22 device_type = "cpu";
23 compatible = "intel,ce4100";
24 reg = <0>;
25 lapic = <&lapic0>;
26 };
27 };
28
29 soc@0 {
30 #address-cells = <1>;
31 #size-cells = <1>;
32 compatible = "intel,ce4100-cp";
33 ranges;
34
35 ioapic1: interrupt-controller@fec00000 {
36 #interrupt-cells = <2>;
37 compatible = "intel,ce4100-ioapic";
38 interrupt-controller;
39 reg = <0xfec00000 0x1000>;
40 };
41
42 timer@fed00000 {
43 compatible = "intel,ce4100-hpet";
44 reg = <0xfed00000 0x200>;
45 };
46
47 lapic0: interrupt-controller@fee00000 {
48 compatible = "intel,ce4100-lapic";
49 reg = <0xfee00000 0x1000>;
50 };
51
52 pci@3fc {
53 #address-cells = <3>;
54 #size-cells = <2>;
55 compatible = "intel,ce4100-pci", "pci";
56 device_type = "pci";
57 bus-range = <0 0>;
58 ranges = <0x2000000 0 0xbffff000 0xbffff000 0 0x1000
59 0x2000000 0 0xdffe0000 0xdffe0000 0 0x1000
60 0x0000000 0 0x0 0x0 0 0x100>;
61
62 /* Secondary IO-APIC */
63 ioapic2: interrupt-controller@0,1 {
64 #interrupt-cells = <2>;
65 compatible = "intel,ce4100-ioapic";
66 interrupt-controller;
67 reg = <0x100 0x0 0x0 0x0 0x0>;
68 assigned-addresses = <0x02000000 0x0 0xbffff000 0x0 0x1000>;
69 };
70
71 pci@1,0 {
72 #address-cells = <3>;
73 #size-cells = <2>;
74 compatible = "intel,ce4100-pci", "pci";
75 device_type = "pci";
76 bus-range = <1 1>;
77 ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>;
78
79 interrupt-parent = <&ioapic2>;
80
81 display@2,0 {
82 compatible = "pci8086,2e5b.2",
83 "pci8086,2e5b",
84 "pciclass038000",
85 "pciclass0380";
86
87 reg = <0x11000 0x0 0x0 0x0 0x0>;
88 interrupts = <0 1>;
89 };
90
91 multimedia@3,0 {
92 compatible = "pci8086,2e5c.2",
93 "pci8086,2e5c",
94 "pciclass048000",
95 "pciclass0480";
96
97 reg = <0x11800 0x0 0x0 0x0 0x0>;
98 interrupts = <2 1>;
99 };
100
101 multimedia@4,0 {
102 compatible = "pci8086,2e5d.2",
103 "pci8086,2e5d",
104 "pciclass048000",
105 "pciclass0480";
106
107 reg = <0x12000 0x0 0x0 0x0 0x0>;
108 interrupts = <4 1>;
109 };
110
111 multimedia@4,1 {
112 compatible = "pci8086,2e5e.2",
113 "pci8086,2e5e",
114 "pciclass048000",
115 "pciclass0480";
116
117 reg = <0x12100 0x0 0x0 0x0 0x0>;
118 interrupts = <5 1>;
119 };
120
121 sound@6,0 {
122 compatible = "pci8086,2e5f.2",
123 "pci8086,2e5f",
124 "pciclass040100",
125 "pciclass0401";
126
127 reg = <0x13000 0x0 0x0 0x0 0x0>;
128 interrupts = <6 1>;
129 };
130
131 sound@6,1 {
132 compatible = "pci8086,2e5f.2",
133 "pci8086,2e5f",
134 "pciclass040100",
135 "pciclass0401";
136
137 reg = <0x13100 0x0 0x0 0x0 0x0>;
138 interrupts = <7 1>;
139 };
140
141 sound@6,2 {
142 compatible = "pci8086,2e60.2",
143 "pci8086,2e60",
144 "pciclass040100",
145 "pciclass0401";
146
147 reg = <0x13200 0x0 0x0 0x0 0x0>;
148 interrupts = <8 1>;
149 };
150
151 display@8,0 {
152 compatible = "pci8086,2e61.2",
153 "pci8086,2e61",
154 "pciclass038000",
155 "pciclass0380";
156
157 reg = <0x14000 0x0 0x0 0x0 0x0>;
158 interrupts = <9 1>;
159 };
160
161 display@8,1 {
162 compatible = "pci8086,2e62.2",
163 "pci8086,2e62",
164 "pciclass038000",
165 "pciclass0380";
166
167 reg = <0x14100 0x0 0x0 0x0 0x0>;
168 interrupts = <10 1>;
169 };
170
171 multimedia@8,2 {
172 compatible = "pci8086,2e63.2",
173 "pci8086,2e63",
174 "pciclass048000",
175 "pciclass0480";
176
177 reg = <0x14200 0x0 0x0 0x0 0x0>;
178 interrupts = <11 1>;
179 };
180
181 entertainment-encryption@9,0 {
182 compatible = "pci8086,2e64.2",
183 "pci8086,2e64",
184 "pciclass101000",
185 "pciclass1010";
186
187 reg = <0x14800 0x0 0x0 0x0 0x0>;
188 interrupts = <12 1>;
189 };
190
191 localbus@a,0 {
192 compatible = "pci8086,2e65.2",
193 "pci8086,2e65",
194 "pciclassff0000",
195 "pciclassff00";
196
197 reg = <0x15000 0x0 0x0 0x0 0x0>;
198 };
199
200 serial@b,0 {
201 compatible = "pci8086,2e66.2",
202 "pci8086,2e66",
203 "pciclass070003",
204 "pciclass0700";
205
206 reg = <0x15800 0x0 0x0 0x0 0x0>;
207 interrupts = <14 1>;
208 };
209
210 gpio@b,1 {
211 compatible = "pci8086,2e67.2",
212 "pci8086,2e67",
213 "pciclassff0000",
214 "pciclassff00";
215
216 #gpio-cells = <2>;
217 reg = <0x15900 0x0 0x0 0x0 0x0>;
218 interrupts = <15 1>;
219 gpio-controller;
220 };
221
222 i2c-controller@b,2 {
223 #address-cells = <2>;
224 #size-cells = <1>;
225 compatible = "pci8086,2e68.2",
226 "pci8086,2e68",
227 "pciclass,ff0000",
228 "pciclass,ff00";
229
230 reg = <0x15a00 0x0 0x0 0x0 0x0>;
231 interrupts = <16 1>;
232 ranges = <0 0 0x02000000 0 0xdffe0500 0x100
233 1 0 0x02000000 0 0xdffe0600 0x100
234 2 0 0x02000000 0 0xdffe0700 0x100>;
235
236 i2c@0 {
237 #address-cells = <1>;
238 #size-cells = <0>;
239 compatible = "intel,ce4100-i2c-controller";
240 reg = <0 0 0x100>;
241 };
242
243 i2c@1 {
244 #address-cells = <1>;
245 #size-cells = <0>;
246 compatible = "intel,ce4100-i2c-controller";
247 reg = <1 0 0x100>;
248
249 gpio@26 {
250 #gpio-cells = <2>;
251 compatible = "ti,pcf8575";
252 reg = <0x26>;
253 gpio-controller;
254 };
255 };
256
257 i2c@2 {
258 #address-cells = <1>;
259 #size-cells = <0>;
260 compatible = "intel,ce4100-i2c-controller";
261 reg = <2 0 0x100>;
262
263 gpio@26 {
264 #gpio-cells = <2>;
265 compatible = "ti,pcf8575";
266 reg = <0x26>;
267 gpio-controller;
268 };
269 };
270 };
271
272 smard-card@b,3 {
273 compatible = "pci8086,2e69.2",
274 "pci8086,2e69",
275 "pciclass070500",
276 "pciclass0705";
277
278 reg = <0x15b00 0x0 0x0 0x0 0x0>;
279 interrupts = <15 1>;
280 };
281
282 spi-controller@b,4 {
283 #address-cells = <1>;
284 #size-cells = <0>;
285 compatible =
286 "pci8086,2e6a.2",
287 "pci8086,2e6a",
288 "pciclass,ff0000",
289 "pciclass,ff00";
290
291 reg = <0x15c00 0x0 0x0 0x0 0x0>;
292 interrupts = <15 1>;
293
294 dac@0 {
295 compatible = "ti,pcm1755";
296 reg = <0>;
297 spi-max-frequency = <115200>;
298 };
299
300 dac@1 {
301 compatible = "ti,pcm1609a";
302 reg = <1>;
303 spi-max-frequency = <115200>;
304 };
305
306 eeprom@2 {
307 compatible = "atmel,at93c46";
308 reg = <2>;
309 spi-max-frequency = <115200>;
310 };
311 };
312
313 multimedia@b,7 {
314 compatible = "pci8086,2e6d.2",
315 "pci8086,2e6d",
316 "pciclassff0000",
317 "pciclassff00";
318
319 reg = <0x15f00 0x0 0x0 0x0 0x0>;
320 };
321
322 ethernet@c,0 {
323 compatible = "pci8086,2e6e.2",
324 "pci8086,2e6e",
325 "pciclass020000",
326 "pciclass0200";
327
328 reg = <0x16000 0x0 0x0 0x0 0x0>;
329 interrupts = <21 1>;
330 };
331
332 clock@c,1 {
333 compatible = "pci8086,2e6f.2",
334 "pci8086,2e6f",
335 "pciclassff0000",
336 "pciclassff00";
337
338 reg = <0x16100 0x0 0x0 0x0 0x0>;
339 interrupts = <3 1>;
340 };
341
342 usb@d,0 {
343 compatible = "pci8086,2e70.2",
344 "pci8086,2e70",
345 "pciclass0c0320",
346 "pciclass0c03";
347
348 reg = <0x16800 0x0 0x0 0x0 0x0>;
349 interrupts = <22 3>;
350 };
351
352 usb@d,1 {
353 compatible = "pci8086,2e70.2",
354 "pci8086,2e70",
355 "pciclass0c0320",
356 "pciclass0c03";
357
358 reg = <0x16900 0x0 0x0 0x0 0x0>;
359 interrupts = <22 3>;
360 };
361
362 sata@e,0 {
363 compatible = "pci8086,2e71.0",
364 "pci8086,2e71",
365 "pciclass010601",
366 "pciclass0106";
367
368 reg = <0x17000 0x0 0x0 0x0 0x0>;
369 interrupts = <23 3>;
370 };
371
372 flash@f,0 {
373 compatible = "pci8086,701.1",
374 "pci8086,701",
375 "pciclass050100",
376 "pciclass0501";
377
378 reg = <0x17800 0x0 0x0 0x0 0x0>;
379 interrupts = <13 1>;
380 };
381
382 entertainment-encryption@10,0 {
383 compatible = "pci8086,702.1",
384 "pci8086,702",
385 "pciclass101000",
386 "pciclass1010";
387
388 reg = <0x18000 0x0 0x0 0x0 0x0>;
389 };
390
391 co-processor@11,0 {
392 compatible = "pci8086,703.1",
393 "pci8086,703",
394 "pciclass0b4000",
395 "pciclass0b40";
396
397 reg = <0x18800 0x0 0x0 0x0 0x0>;
398 interrupts = <1 1>;
399 };
400
401 multimedia@12,0 {
402 compatible = "pci8086,704.0",
403 "pci8086,704",
404 "pciclass048000",
405 "pciclass0480";
406
407 reg = <0x19000 0x0 0x0 0x0 0x0>;
408 };
409 };
410
411 isa@1f,0 {
412 #address-cells = <2>;
413 #size-cells = <1>;
414 compatible = "isa";
415 ranges = <1 0 0 0 0 0x100>;
416
417 rtc@70 {
418 compatible = "intel,ce4100-rtc", "motorola,mc146818";
419 interrupts = <8 3>;
420 interrupt-parent = <&ioapic1>;
421 ctrl-reg = <2>;
422 freq-reg = <0x26>;
423 reg = <1 0x70 2>;
424 };
425 };
426 };
427 };
428};
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index ea6529e93c6f..5c0207bf959b 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -31,6 +31,7 @@
31#include <asm/apic.h> 31#include <asm/apic.h>
32#include <asm/io_apic.h> 32#include <asm/io_apic.h>
33#include <asm/mrst.h> 33#include <asm/mrst.h>
34#include <asm/mrst-vrtc.h>
34#include <asm/io.h> 35#include <asm/io.h>
35#include <asm/i8259.h> 36#include <asm/i8259.h>
36#include <asm/intel_scu_ipc.h> 37#include <asm/intel_scu_ipc.h>
@@ -268,6 +269,7 @@ void __init x86_mrst_early_setup(void)
268 269
269 x86_platform.calibrate_tsc = mrst_calibrate_tsc; 270 x86_platform.calibrate_tsc = mrst_calibrate_tsc;
270 x86_platform.i8042_detect = mrst_i8042_detect; 271 x86_platform.i8042_detect = mrst_i8042_detect;
272 x86_init.timers.wallclock_init = mrst_rtc_init;
271 x86_init.pci.init = pci_mrst_init; 273 x86_init.pci.init = pci_mrst_init;
272 x86_init.pci.fixup_irqs = x86_init_noop; 274 x86_init.pci.fixup_irqs = x86_init_noop;
273 275
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index 32cd7edd71a0..04cf645feb92 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -100,22 +100,14 @@ int vrtc_set_mmss(unsigned long nowtime)
100 100
101void __init mrst_rtc_init(void) 101void __init mrst_rtc_init(void)
102{ 102{
103 unsigned long rtc_paddr; 103 unsigned long vrtc_paddr = sfi_mrtc_array[0].phys_addr;
104 void __iomem *virt_base;
105 104
106 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); 105 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
107 if (!sfi_mrtc_num) 106 if (!sfi_mrtc_num || !vrtc_paddr)
108 return; 107 return;
109 108
110 rtc_paddr = sfi_mrtc_array[0].phys_addr; 109 vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC,
111 110 vrtc_paddr);
112 /* vRTC's register address may not be page aligned */
113 set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
114
115 virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
116 virt_base += rtc_paddr & ~PAGE_MASK;
117 vrtc_virt_base = virt_base;
118
119 x86_platform.get_wallclock = vrtc_get_time; 111 x86_platform.get_wallclock = vrtc_get_time;
120 x86_platform.set_wallclock = vrtc_set_mmss; 112 x86_platform.set_wallclock = vrtc_set_mmss;
121} 113}
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
index e797428b163b..c2a8cab65e5d 100644
--- a/arch/x86/platform/olpc/Makefile
+++ b/arch/x86/platform/olpc/Makefile
@@ -1,4 +1,4 @@
1obj-$(CONFIG_OLPC) += olpc.o 1obj-$(CONFIG_OLPC) += olpc.o
2obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o 2obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
3obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o 3obj-$(CONFIG_OLPC) += olpc_ofw.o
4obj-$(CONFIG_OLPC_OPENFIRMWARE_DT) += olpc_dt.o 4obj-$(CONFIG_OF_PROMTREE) += olpc_dt.o
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index dab874647530..044bda5b3174 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -140,8 +140,7 @@ void * __init prom_early_alloc(unsigned long size)
140 * wasted bootmem) and hand off chunks of it to callers. 140 * wasted bootmem) and hand off chunks of it to callers.
141 */ 141 */
142 res = alloc_bootmem(chunk_size); 142 res = alloc_bootmem(chunk_size);
143 if (!res) 143 BUG_ON(!res);
144 return NULL;
145 prom_early_allocated += chunk_size; 144 prom_early_allocated += chunk_size;
146 memset(res, 0, chunk_size); 145 memset(res, 0, chunk_size);
147 free_mem = chunk_size; 146 free_mem = chunk_size;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index df58e9cad96a..a7b38d35c29a 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1364,11 +1364,11 @@ uv_activation_descriptor_init(int node, int pnode)
1364 memset(bd2, 0, sizeof(struct bau_desc)); 1364 memset(bd2, 0, sizeof(struct bau_desc));
1365 bd2->header.sw_ack_flag = 1; 1365 bd2->header.sw_ack_flag = 1;
1366 /* 1366 /*
1367 * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub 1367 * base_dest_nodeid is the nasid of the first uvhub
1368 * in the partition. The bit map will indicate uvhub numbers, 1368 * in the partition. The bit map will indicate uvhub numbers,
1369 * which are 0-N in a partition. Pnodes are unique system-wide. 1369 * which are 0-N in a partition. Pnodes are unique system-wide.
1370 */ 1370 */
1371 bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; 1371 bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
1372 bd2->header.dest_subnodeid = 0x10; /* the LB */ 1372 bd2->header.dest_subnodeid = 0x10; /* the LB */
1373 bd2->header.command = UV_NET_ENDPOINT_INTD; 1373 bd2->header.command = UV_NET_ENDPOINT_INTD;
1374 bd2->header.int_both = 1; 1374 bd2->header.int_both = 1;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 7b24460917d5..374a05d8ad22 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -131,7 +131,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
131 unsigned long mmr_offset, int limit) 131 unsigned long mmr_offset, int limit)
132{ 132{
133 const struct cpumask *eligible_cpu = cpumask_of(cpu); 133 const struct cpumask *eligible_cpu = cpumask_of(cpu);
134 struct irq_cfg *cfg = get_irq_chip_data(irq); 134 struct irq_cfg *cfg = irq_get_chip_data(irq);
135 unsigned long mmr_value; 135 unsigned long mmr_value;
136 struct uv_IO_APIC_route_entry *entry; 136 struct uv_IO_APIC_route_entry *entry;
137 int mmr_pnode, err; 137 int mmr_pnode, err;
@@ -148,7 +148,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
148 else 148 else
149 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 149 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
150 150
151 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, 151 irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
152 irq_name); 152 irq_name);
153 153
154 mmr_value = 0; 154 mmr_value = 0;
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index 632037671746..fe4cf8294878 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -569,11 +569,13 @@ out_unlock:
569static struct irqaction master_action = { 569static struct irqaction master_action = {
570 .handler = piix4_master_intr, 570 .handler = piix4_master_intr,
571 .name = "PIIX4-8259", 571 .name = "PIIX4-8259",
572 .flags = IRQF_NO_THREAD,
572}; 573};
573 574
574static struct irqaction cascade_action = { 575static struct irqaction cascade_action = {
575 .handler = no_action, 576 .handler = no_action,
576 .name = "cascade", 577 .name = "cascade",
578 .flags = IRQF_NO_THREAD,
577}; 579};
578 580
579static inline void set_piix4_virtual_irq_type(void) 581static inline void set_piix4_virtual_irq_type(void)
@@ -606,7 +608,7 @@ static void __init visws_pre_intr_init(void)
606 chip = &cobalt_irq_type; 608 chip = &cobalt_irq_type;
607 609
608 if (chip) 610 if (chip)
609 set_irq_chip(i, chip); 611 irq_set_chip(i, chip);
610 } 612 }
611 613
612 setup_irq(CO_IRQ_8259, &master_action); 614 setup_irq(CO_IRQ_8259, &master_action);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5b54892e4bc3..1c7121ba18ff 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -38,7 +38,7 @@ config XEN_MAX_DOMAIN_MEMORY
38 38
39config XEN_SAVE_RESTORE 39config XEN_SAVE_RESTORE
40 bool 40 bool
41 depends on XEN && PM 41 depends on XEN
42 default y 42 default y
43 43
44config XEN_DEBUG_FS 44config XEN_DEBUG_FS
@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
48 help 48 help
49 Enable statistics output and various tuning options in debugfs. 49 Enable statistics output and various tuning options in debugfs.
50 Enabling this option may incur a significant performance overhead. 50 Enabling this option may incur a significant performance overhead.
51
52config XEN_DEBUG
53 bool "Enable Xen debug checks"
54 depends on XEN
55 default n
56 help
57 Enable various WARN_ON checks in the Xen MMU code.
58 Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 50542efe45fb..49dbd78ec3cb 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor)
1284 1284
1285 xen_setup_features(); 1285 xen_setup_features();
1286 1286
1287 pv_info = xen_info; 1287 pv_info.name = "Xen HVM";
1288 pv_info.kernel_rpl = 0;
1289 1288
1290 xen_domain_type = XEN_HVM_DOMAIN; 1289 xen_domain_type = XEN_HVM_DOMAIN;
1291 1290
1292 return 0; 1291 return 0;
1293} 1292}
1294 1293
1295void xen_hvm_init_shared_info(void) 1294void __ref xen_hvm_init_shared_info(void)
1296{ 1295{
1297 int cpu; 1296 int cpu;
1298 struct xen_add_to_physmap xatp; 1297 struct xen_add_to_physmap xatp;
@@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
1331 switch (action) { 1330 switch (action) {
1332 case CPU_UP_PREPARE: 1331 case CPU_UP_PREPARE:
1333 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1332 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1333 if (xen_have_vector_callback)
1334 xen_init_lock_cpu(cpu);
1334 break; 1335 break;
1335 default: 1336 default:
1336 break; 1337 break;
@@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void)
1355 1356
1356 if (xen_feature(XENFEAT_hvm_callback_vector)) 1357 if (xen_feature(XENFEAT_hvm_callback_vector))
1357 xen_have_vector_callback = 1; 1358 xen_have_vector_callback = 1;
1359 xen_hvm_smp_init();
1358 register_cpu_notifier(&xen_hvm_cpu_notifier); 1360 register_cpu_notifier(&xen_hvm_cpu_notifier);
1359 xen_unplug_emulated_devices(); 1361 xen_unplug_emulated_devices();
1360 have_vcpu_info_placement = 0; 1362 have_vcpu_info_placement = 0;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61ad574..3f6f3347aa17 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/gfp.h> 47#include <linux/gfp.h>
48#include <linux/memblock.h> 48#include <linux/memblock.h>
49#include <linux/seq_file.h>
49 50
50#include <asm/pgtable.h> 51#include <asm/pgtable.h>
51#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
@@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
416 if (val & _PAGE_PRESENT) { 417 if (val & _PAGE_PRESENT) {
417 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; 418 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
418 pteval_t flags = val & PTE_FLAGS_MASK; 419 pteval_t flags = val & PTE_FLAGS_MASK;
419 unsigned long mfn = pfn_to_mfn(pfn); 420 unsigned long mfn;
420 421
422 if (!xen_feature(XENFEAT_auto_translated_physmap))
423 mfn = get_phys_to_machine(pfn);
424 else
425 mfn = pfn;
421 /* 426 /*
422 * If there's no mfn for the pfn, then just create an 427 * If there's no mfn for the pfn, then just create an
423 * empty non-present pte. Unfortunately this loses 428 * empty non-present pte. Unfortunately this loses
@@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
427 if (unlikely(mfn == INVALID_P2M_ENTRY)) { 432 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
428 mfn = 0; 433 mfn = 0;
429 flags = 0; 434 flags = 0;
435 } else {
436 /*
437 * Paramount to do this test _after_ the
438 * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
439 * IDENTITY_FRAME_BIT resolves to true.
440 */
441 mfn &= ~FOREIGN_FRAME_BIT;
442 if (mfn & IDENTITY_FRAME_BIT) {
443 mfn &= ~IDENTITY_FRAME_BIT;
444 flags |= _PAGE_IOMAP;
445 }
430 } 446 }
431
432 val = ((pteval_t)mfn << PAGE_SHIFT) | flags; 447 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
433 } 448 }
434 449
@@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
532} 547}
533PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 548PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
534 549
550#ifdef CONFIG_XEN_DEBUG
551pte_t xen_make_pte_debug(pteval_t pte)
552{
553 phys_addr_t addr = (pte & PTE_PFN_MASK);
554 phys_addr_t other_addr;
555 bool io_page = false;
556 pte_t _pte;
557
558 if (pte & _PAGE_IOMAP)
559 io_page = true;
560
561 _pte = xen_make_pte(pte);
562
563 if (!addr)
564 return _pte;
565
566 if (io_page &&
567 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
568 other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
569 WARN(addr != other_addr,
570 "0x%lx is using VM_IO, but it is 0x%lx!\n",
571 (unsigned long)addr, (unsigned long)other_addr);
572 } else {
573 pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
574 other_addr = (_pte.pte & PTE_PFN_MASK);
575 WARN((addr == other_addr) && (!io_page) && (!iomap_set),
576 "0x%lx is missing VM_IO (and wasn't fixed)!\n",
577 (unsigned long)addr);
578 }
579
580 return _pte;
581}
582PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
583#endif
584
535pgd_t xen_make_pgd(pgdval_t pgd) 585pgd_t xen_make_pgd(pgdval_t pgd)
536{ 586{
537 pgd = pte_pfn_to_mfn(pgd); 587 pgd = pte_pfn_to_mfn(pgd);
@@ -986,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
986 */ 1036 */
987void xen_mm_pin_all(void) 1037void xen_mm_pin_all(void)
988{ 1038{
989 unsigned long flags;
990 struct page *page; 1039 struct page *page;
991 1040
992 spin_lock_irqsave(&pgd_lock, flags); 1041 spin_lock(&pgd_lock);
993 1042
994 list_for_each_entry(page, &pgd_list, lru) { 1043 list_for_each_entry(page, &pgd_list, lru) {
995 if (!PagePinned(page)) { 1044 if (!PagePinned(page)) {
@@ -998,7 +1047,7 @@ void xen_mm_pin_all(void)
998 } 1047 }
999 } 1048 }
1000 1049
1001 spin_unlock_irqrestore(&pgd_lock, flags); 1050 spin_unlock(&pgd_lock);
1002} 1051}
1003 1052
1004/* 1053/*
@@ -1099,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
1099 */ 1148 */
1100void xen_mm_unpin_all(void) 1149void xen_mm_unpin_all(void)
1101{ 1150{
1102 unsigned long flags;
1103 struct page *page; 1151 struct page *page;
1104 1152
1105 spin_lock_irqsave(&pgd_lock, flags); 1153 spin_lock(&pgd_lock);
1106 1154
1107 list_for_each_entry(page, &pgd_list, lru) { 1155 list_for_each_entry(page, &pgd_list, lru) {
1108 if (PageSavePinned(page)) { 1156 if (PageSavePinned(page)) {
@@ -1112,7 +1160,7 @@ void xen_mm_unpin_all(void)
1112 } 1160 }
1113 } 1161 }
1114 1162
1115 spin_unlock_irqrestore(&pgd_lock, flags); 1163 spin_unlock(&pgd_lock);
1116} 1164}
1117 1165
1118void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 1166void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
@@ -1443,7 +1491,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1443 * early_ioremap fixmap slot, make sure it is RO. 1491 * early_ioremap fixmap slot, make sure it is RO.
1444 */ 1492 */
1445 if (!is_early_ioremap_ptep(ptep) && 1493 if (!is_early_ioremap_ptep(ptep) &&
1446 pfn >= e820_table_start && pfn < e820_table_end) 1494 pfn >= pgt_buf_start && pfn < pgt_buf_end)
1447 pte = pte_wrprotect(pte); 1495 pte = pte_wrprotect(pte);
1448 1496
1449 return pte; 1497 return pte;
@@ -1942,6 +1990,9 @@ __init void xen_ident_map_ISA(void)
1942 1990
1943static __init void xen_post_allocator_init(void) 1991static __init void xen_post_allocator_init(void)
1944{ 1992{
1993#ifdef CONFIG_XEN_DEBUG
1994 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
1995#endif
1945 pv_mmu_ops.set_pte = xen_set_pte; 1996 pv_mmu_ops.set_pte = xen_set_pte;
1946 pv_mmu_ops.set_pmd = xen_set_pmd; 1997 pv_mmu_ops.set_pmd = xen_set_pmd;
1947 pv_mmu_ops.set_pud = xen_set_pud; 1998 pv_mmu_ops.set_pud = xen_set_pud;
@@ -2074,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2074 in_frames[i] = virt_to_mfn(vaddr); 2125 in_frames[i] = virt_to_mfn(vaddr);
2075 2126
2076 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); 2127 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2077 set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); 2128 __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2078 2129
2079 if (out_frames) 2130 if (out_frames)
2080 out_frames[i] = virt_to_pfn(vaddr); 2131 out_frames[i] = virt_to_pfn(vaddr);
@@ -2353,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
2353 2404
2354#ifdef CONFIG_XEN_DEBUG_FS 2405#ifdef CONFIG_XEN_DEBUG_FS
2355 2406
2407static int p2m_dump_open(struct inode *inode, struct file *filp)
2408{
2409 return single_open(filp, p2m_dump_show, NULL);
2410}
2411
2412static const struct file_operations p2m_dump_fops = {
2413 .open = p2m_dump_open,
2414 .read = seq_read,
2415 .llseek = seq_lseek,
2416 .release = single_release,
2417};
2418
2356static struct dentry *d_mmu_debug; 2419static struct dentry *d_mmu_debug;
2357 2420
2358static int __init xen_mmu_debugfs(void) 2421static int __init xen_mmu_debugfs(void)
@@ -2408,6 +2471,7 @@ static int __init xen_mmu_debugfs(void)
2408 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, 2471 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
2409 &mmu_stats.prot_commit_batched); 2472 &mmu_stats.prot_commit_batched);
2410 2473
2474 debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
2411 return 0; 2475 return 0;
2412} 2476}
2413fs_initcall(xen_mmu_debugfs); 2477fs_initcall(xen_mmu_debugfs);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index fd12d7ce7ff9..215a3ce61068 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -23,6 +23,129 @@
23 * P2M_PER_PAGE depends on the architecture, as a mfn is always 23 * P2M_PER_PAGE depends on the architecture, as a mfn is always
24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to 24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
25 * 512 and 1024 entries respectively. 25 * 512 and 1024 entries respectively.
26 *
27 * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
28 *
29 * However not all entries are filled with MFNs. Specifically for all other
30 * leaf entries, or for the top root, or middle one, for which there is a void
31 * entry, we assume it is "missing". So (for example)
32 * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
33 *
34 * We also have the possibility of setting 1-1 mappings on certain regions, so
35 * that:
36 * pfn_to_mfn(0xc0000)=0xc0000
37 *
38 * The benefit of this is, that we can assume for non-RAM regions (think
39 * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
40 * get the PFN value to match the MFN.
41 *
42 * For this to work efficiently we have one new page p2m_identity and
43 * allocate (via reserved_brk) any other pages we need to cover the sides
44 * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
45 * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
46 * no other fancy value).
47 *
48 * On lookup we spot that the entry points to p2m_identity and return the
49 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
50 * If the entry points to an allocated page, we just proceed as before and
51 * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
52 * appropriate functions (pfn_to_mfn).
53 *
54 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
55 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
56 * non-identity pfn. To protect ourselves against we elect to set (and get) the
57 * IDENTITY_FRAME_BIT on all identity mapped PFNs.
58 *
59 * This simplistic diagram is used to explain the more subtle piece of code.
60 * There is also a digram of the P2M at the end that can help.
61 * Imagine your E820 looking as so:
62 *
63 * 1GB 2GB
64 * /-------------------+---------\/----\ /----------\ /---+-----\
65 * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
66 * \-------------------+---------/\----/ \----------/ \---+-----/
67 * ^- 1029MB ^- 2001MB
68 *
69 * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
70 * 2048MB = 524288 (0x80000)]
71 *
72 * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
73 * is actually not present (would have to kick the balloon driver to put it in).
74 *
75 * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
76 * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
77 * of the PFN and the end PFN (263424 and 512256 respectively). The first step
78 * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
79 * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
80 * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
81 * to end pfn. We reserve_brk top leaf pages if they are missing (means they
82 * point to p2m_mid_missing).
83 *
84 * With the E820 example above, 263424 is not 1GB aligned so we allocate a
85 * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
86 * Each entry in the allocate page is "missing" (points to p2m_missing).
87 *
88 * Next stage is to determine if we need to do a more granular boundary check
89 * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
90 * We check if the start pfn and end pfn violate that boundary check, and if
91 * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
92 * granularity of setting which PFNs are missing and which ones are identity.
93 * In our example 263424 and 512256 both fail the check so we reserve_brk two
94 * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
95 * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
96 *
97 * At this point we would at minimum reserve_brk one page, but could be up to
98 * three. Each call to set_phys_range_identity has at maximum a three page
99 * cost. If we were to query the P2M at this stage, all those entries from
100 * start PFN through end PFN (so 1029MB -> 2001MB) would return
101 * INVALID_P2M_ENTRY ("missing").
102 *
103 * The next step is to walk from the start pfn to the end pfn setting
104 * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
105 * If we find that the middle leaf is pointing to p2m_missing we can swap it
106 * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
107 * point we do not need to worry about boundary aligment (so no need to
108 * reserve_brk a middle page, figure out which PFNs are "missing" and which
109 * ones are identity), as that has been done earlier. If we find that the
110 * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
111 * that page (which covers 512 PFNs) and set the appropriate PFN with
112 * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
113 * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
114 * IDENTITY_FRAME_BIT set.
115 *
116 * All other regions that are void (or not filled) either point to p2m_missing
117 * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
118 * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
119 * contain the INVALID_P2M_ENTRY value and are considered "missing."
120 *
121 * This is what the p2m ends up looking (for the E820 above) with this
122 * fabulous drawing:
123 *
124 * p2m /--------------\
125 * /-----\ | &mfn_list[0],| /-----------------\
126 * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
127 * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
128 * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
129 * |-----| \ | [p2m_identity]+\\ | .... |
130 * | 2 |--\ \-------------------->| ... | \\ \----------------/
131 * |-----| \ \---------------/ \\
132 * | 3 |\ \ \\ p2m_identity
133 * |-----| \ \-------------------->/---------------\ /-----------------\
134 * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
135 * \-----/ / | [p2m_identity]+-->| ..., ~0 |
136 * / /---------------\ | .... | \-----------------/
137 * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
138 * / | IDENTITY[@256]|<----/ \---------------/
139 * / | ~0, ~0, .... |
140 * | \---------------/
141 * |
142 * p2m_missing p2m_missing
143 * /------------------\ /------------\
144 * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
145 * | [p2m_mid_missing]+---->| ..., ~0 |
146 * \------------------/ \------------/
147 *
148 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
26 */ 149 */
27 150
28#include <linux/init.h> 151#include <linux/init.h>
@@ -30,6 +153,7 @@
30#include <linux/list.h> 153#include <linux/list.h>
31#include <linux/hash.h> 154#include <linux/hash.h>
32#include <linux/sched.h> 155#include <linux/sched.h>
156#include <linux/seq_file.h>
33 157
34#include <asm/cache.h> 158#include <asm/cache.h>
35#include <asm/setup.h> 159#include <asm/setup.h>
@@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
59static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); 183static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
60static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); 184static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
61 185
186static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
187
62RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 188RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
63RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 189RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
64 190
191/* We might hit two boundary violations at the start and end, at max each
192 * boundary violation will require three middle nodes. */
193RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
194
65static inline unsigned p2m_top_index(unsigned long pfn) 195static inline unsigned p2m_top_index(unsigned long pfn)
66{ 196{
67 BUG_ON(pfn >= MAX_P2M_PFN); 197 BUG_ON(pfn >= MAX_P2M_PFN);
@@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m)
136 * - After resume we're called from within stop_machine, but the mfn 266 * - After resume we're called from within stop_machine, but the mfn
137 * tree should alreay be completely allocated. 267 * tree should alreay be completely allocated.
138 */ 268 */
139void xen_build_mfn_list_list(void) 269void __ref xen_build_mfn_list_list(void)
140{ 270{
141 unsigned long pfn; 271 unsigned long pfn;
142 272
@@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
221 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); 351 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
222 p2m_top_init(p2m_top); 352 p2m_top_init(p2m_top);
223 353
354 p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
355 p2m_init(p2m_identity);
356
224 /* 357 /*
225 * The domain builder gives us a pre-constructed p2m array in 358 * The domain builder gives us a pre-constructed p2m array in
226 * mfn_list for all the pages initially given to us, so we just 359 * mfn_list for all the pages initially given to us, so we just
@@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
266 mididx = p2m_mid_index(pfn); 399 mididx = p2m_mid_index(pfn);
267 idx = p2m_index(pfn); 400 idx = p2m_index(pfn);
268 401
402 /*
403 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
404 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
405 * would be wrong.
406 */
407 if (p2m_top[topidx][mididx] == p2m_identity)
408 return IDENTITY_FRAME(pfn);
409
269 return p2m_top[topidx][mididx][idx]; 410 return p2m_top[topidx][mididx][idx];
270} 411}
271EXPORT_SYMBOL_GPL(get_phys_to_machine); 412EXPORT_SYMBOL_GPL(get_phys_to_machine);
@@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn)
335 p2m_top_mfn_p[topidx] = mid_mfn; 476 p2m_top_mfn_p[topidx] = mid_mfn;
336 } 477 }
337 478
338 if (p2m_top[topidx][mididx] == p2m_missing) { 479 if (p2m_top[topidx][mididx] == p2m_identity ||
480 p2m_top[topidx][mididx] == p2m_missing) {
339 /* p2m leaf page is missing */ 481 /* p2m leaf page is missing */
340 unsigned long *p2m; 482 unsigned long *p2m;
483 unsigned long *p2m_orig = p2m_top[topidx][mididx];
341 484
342 p2m = alloc_p2m_page(); 485 p2m = alloc_p2m_page();
343 if (!p2m) 486 if (!p2m)
@@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn)
345 488
346 p2m_init(p2m); 489 p2m_init(p2m);
347 490
348 if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) 491 if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
349 free_p2m_page(p2m); 492 free_p2m_page(p2m);
350 else 493 else
351 mid_mfn[mididx] = virt_to_mfn(p2m); 494 mid_mfn[mididx] = virt_to_mfn(p2m);
@@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn)
354 return true; 497 return true;
355} 498}
356 499
500bool __early_alloc_p2m(unsigned long pfn)
501{
502 unsigned topidx, mididx, idx;
503
504 topidx = p2m_top_index(pfn);
505 mididx = p2m_mid_index(pfn);
506 idx = p2m_index(pfn);
507
508 /* Pfff.. No boundary cross-over, lets get out. */
509 if (!idx)
510 return false;
511
512 WARN(p2m_top[topidx][mididx] == p2m_identity,
513 "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
514 topidx, mididx);
515
516 /*
517 * Could be done by xen_build_dynamic_phys_to_machine..
518 */
519 if (p2m_top[topidx][mididx] != p2m_missing)
520 return false;
521
522 /* Boundary cross-over for the edges: */
523 if (idx) {
524 unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
525
526 p2m_init(p2m);
527
528 p2m_top[topidx][mididx] = p2m;
529
530 }
531 return idx != 0;
532}
533unsigned long set_phys_range_identity(unsigned long pfn_s,
534 unsigned long pfn_e)
535{
536 unsigned long pfn;
537
538 if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
539 return 0;
540
541 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
542 return pfn_e - pfn_s;
543
544 if (pfn_s > pfn_e)
545 return 0;
546
547 for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
548 pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
549 pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
550 {
551 unsigned topidx = p2m_top_index(pfn);
552 if (p2m_top[topidx] == p2m_mid_missing) {
553 unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
554
555 p2m_mid_init(mid);
556
557 p2m_top[topidx] = mid;
558 }
559 }
560
561 __early_alloc_p2m(pfn_s);
562 __early_alloc_p2m(pfn_e);
563
564 for (pfn = pfn_s; pfn < pfn_e; pfn++)
565 if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
566 break;
567
568 if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
569 "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
570 (pfn_e - pfn_s) - (pfn - pfn_s)))
571 printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
572
573 return pfn - pfn_s;
574}
575
357/* Try to install p2m mapping; fail if intermediate bits missing */ 576/* Try to install p2m mapping; fail if intermediate bits missing */
358bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) 577bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
359{ 578{
360 unsigned topidx, mididx, idx; 579 unsigned topidx, mididx, idx;
361 580
581 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
582 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
583 return true;
584 }
362 if (unlikely(pfn >= MAX_P2M_PFN)) { 585 if (unlikely(pfn >= MAX_P2M_PFN)) {
363 BUG_ON(mfn != INVALID_P2M_ENTRY); 586 BUG_ON(mfn != INVALID_P2M_ENTRY);
364 return true; 587 return true;
@@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
368 mididx = p2m_mid_index(pfn); 591 mididx = p2m_mid_index(pfn);
369 idx = p2m_index(pfn); 592 idx = p2m_index(pfn);
370 593
594 /* For sparse holes were the p2m leaf has real PFN along with
595 * PCI holes, stick in the PFN as the MFN value.
596 */
597 if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
598 if (p2m_top[topidx][mididx] == p2m_identity)
599 return true;
600
601 /* Swap over from MISSING to IDENTITY if needed. */
602 if (p2m_top[topidx][mididx] == p2m_missing) {
603 WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
604 p2m_identity) != p2m_missing);
605 return true;
606 }
607 }
608
371 if (p2m_top[topidx][mididx] == p2m_missing) 609 if (p2m_top[topidx][mididx] == p2m_missing)
372 return mfn == INVALID_P2M_ENTRY; 610 return mfn == INVALID_P2M_ENTRY;
373 611
@@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
378 616
379bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) 617bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
380{ 618{
381 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
382 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
383 return true;
384 }
385
386 if (unlikely(!__set_phys_to_machine(pfn, mfn))) { 619 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
387 if (!alloc_p2m(pfn)) 620 if (!alloc_p2m(pfn))
388 return false; 621 return false;
@@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page)
421{ 654{
422 unsigned long flags; 655 unsigned long flags;
423 unsigned long pfn; 656 unsigned long pfn;
424 unsigned long address; 657 unsigned long uninitialized_var(address);
425 unsigned level; 658 unsigned level;
426 pte_t *ptep = NULL; 659 pte_t *ptep = NULL;
427 660
@@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page)
455 unsigned long flags; 688 unsigned long flags;
456 unsigned long mfn; 689 unsigned long mfn;
457 unsigned long pfn; 690 unsigned long pfn;
458 unsigned long address; 691 unsigned long uninitialized_var(address);
459 unsigned level; 692 unsigned level;
460 pte_t *ptep = NULL; 693 pte_t *ptep = NULL;
461 694
@@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
520 return ret; 753 return ret;
521} 754}
522EXPORT_SYMBOL_GPL(m2p_find_override_pfn); 755EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
756
757#ifdef CONFIG_XEN_DEBUG_FS
758
759int p2m_dump_show(struct seq_file *m, void *v)
760{
761 static const char * const level_name[] = { "top", "middle",
762 "entry", "abnormal" };
763 static const char * const type_name[] = { "identity", "missing",
764 "pfn", "abnormal"};
765#define TYPE_IDENTITY 0
766#define TYPE_MISSING 1
767#define TYPE_PFN 2
768#define TYPE_UNKNOWN 3
769 unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
770 unsigned int uninitialized_var(prev_level);
771 unsigned int uninitialized_var(prev_type);
772
773 if (!p2m_top)
774 return 0;
775
776 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
777 unsigned topidx = p2m_top_index(pfn);
778 unsigned mididx = p2m_mid_index(pfn);
779 unsigned idx = p2m_index(pfn);
780 unsigned lvl, type;
781
782 lvl = 4;
783 type = TYPE_UNKNOWN;
784 if (p2m_top[topidx] == p2m_mid_missing) {
785 lvl = 0; type = TYPE_MISSING;
786 } else if (p2m_top[topidx] == NULL) {
787 lvl = 0; type = TYPE_UNKNOWN;
788 } else if (p2m_top[topidx][mididx] == NULL) {
789 lvl = 1; type = TYPE_UNKNOWN;
790 } else if (p2m_top[topidx][mididx] == p2m_identity) {
791 lvl = 1; type = TYPE_IDENTITY;
792 } else if (p2m_top[topidx][mididx] == p2m_missing) {
793 lvl = 1; type = TYPE_MISSING;
794 } else if (p2m_top[topidx][mididx][idx] == 0) {
795 lvl = 2; type = TYPE_UNKNOWN;
796 } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
797 lvl = 2; type = TYPE_IDENTITY;
798 } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
799 lvl = 2; type = TYPE_MISSING;
800 } else if (p2m_top[topidx][mididx][idx] == pfn) {
801 lvl = 2; type = TYPE_PFN;
802 } else if (p2m_top[topidx][mididx][idx] != pfn) {
803 lvl = 2; type = TYPE_PFN;
804 }
805 if (pfn == 0) {
806 prev_level = lvl;
807 prev_type = type;
808 }
809 if (pfn == MAX_DOMAIN_PAGES-1) {
810 lvl = 3;
811 type = TYPE_UNKNOWN;
812 }
813 if (prev_type != type) {
814 seq_printf(m, " [0x%lx->0x%lx] %s\n",
815 prev_pfn_type, pfn, type_name[prev_type]);
816 prev_pfn_type = pfn;
817 prev_type = type;
818 }
819 if (prev_level != lvl) {
820 seq_printf(m, " [0x%lx->0x%lx] level %s\n",
821 prev_pfn_level, pfn, level_name[prev_level]);
822 prev_pfn_level = pfn;
823 prev_level = lvl;
824 }
825 }
826 return 0;
827#undef TYPE_IDENTITY
828#undef TYPE_MISSING
829#undef TYPE_PFN
830#undef TYPE_UNKNOWN
831}
832#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a8a66a50d446..fa0269a99377 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
52 52
53static __init void xen_add_extra_mem(unsigned long pages) 53static __init void xen_add_extra_mem(unsigned long pages)
54{ 54{
55 unsigned long pfn;
56
55 u64 size = (u64)pages * PAGE_SIZE; 57 u64 size = (u64)pages * PAGE_SIZE;
56 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; 58 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
57 59
@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
66 xen_extra_mem_size += size; 68 xen_extra_mem_size += size;
67 69
68 xen_max_p2m_pfn = PFN_DOWN(extra_start + size); 70 xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
71
72 for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
73 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
69} 74}
70 75
71static unsigned long __init xen_release_chunk(phys_addr_t start_addr, 76static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
104 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", 109 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
105 start, end, ret); 110 start, end, ret);
106 if (ret == 1) { 111 if (ret == 1) {
107 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 112 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
108 len++; 113 len++;
109 } 114 }
110 } 115 }
@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
138 return released; 143 return released;
139} 144}
140 145
146static unsigned long __init xen_set_identity(const struct e820entry *list,
147 ssize_t map_size)
148{
149 phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
150 phys_addr_t start_pci = last;
151 const struct e820entry *entry;
152 unsigned long identity = 0;
153 int i;
154
155 for (i = 0, entry = list; i < map_size; i++, entry++) {
156 phys_addr_t start = entry->addr;
157 phys_addr_t end = start + entry->size;
158
159 if (start < last)
160 start = last;
161
162 if (end <= start)
163 continue;
164
165 /* Skip over the 1MB region. */
166 if (last > end)
167 continue;
168
169 if (entry->type == E820_RAM) {
170 if (start > start_pci)
171 identity += set_phys_range_identity(
172 PFN_UP(start_pci), PFN_DOWN(start));
173
174 /* Without saving 'last' we would gooble RAM too
175 * at the end of the loop. */
176 last = end;
177 start_pci = end;
178 continue;
179 }
180 start_pci = min(start, start_pci);
181 last = end;
182 }
183 if (last > start_pci)
184 identity += set_phys_range_identity(
185 PFN_UP(start_pci), PFN_DOWN(last));
186 return identity;
187}
141/** 188/**
142 * machine_specific_memory_setup - Hook for machine specific memory setup. 189 * machine_specific_memory_setup - Hook for machine specific memory setup.
143 **/ 190 **/
144char * __init xen_memory_setup(void) 191char * __init xen_memory_setup(void)
145{ 192{
146 static struct e820entry map[E820MAX] __initdata; 193 static struct e820entry map[E820MAX] __initdata;
194 static struct e820entry map_raw[E820MAX] __initdata;
147 195
148 unsigned long max_pfn = xen_start_info->nr_pages; 196 unsigned long max_pfn = xen_start_info->nr_pages;
149 unsigned long long mem_end; 197 unsigned long long mem_end;
@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void)
151 struct xen_memory_map memmap; 199 struct xen_memory_map memmap;
152 unsigned long extra_pages = 0; 200 unsigned long extra_pages = 0;
153 unsigned long extra_limit; 201 unsigned long extra_limit;
202 unsigned long identity_pages = 0;
154 int i; 203 int i;
155 int op; 204 int op;
156 205
@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void)
176 } 225 }
177 BUG_ON(rc); 226 BUG_ON(rc);
178 227
228 memcpy(map_raw, map, sizeof(map));
179 e820.nr_map = 0; 229 e820.nr_map = 0;
180 xen_extra_mem_start = mem_end; 230 xen_extra_mem_start = mem_end;
181 for (i = 0; i < memmap.nr_entries; i++) { 231 for (i = 0; i < memmap.nr_entries; i++) {
@@ -194,6 +244,15 @@ char * __init xen_memory_setup(void)
194 end -= delta; 244 end -= delta;
195 245
196 extra_pages += PFN_DOWN(delta); 246 extra_pages += PFN_DOWN(delta);
247 /*
248 * Set RAM below 4GB that is not for us to be unusable.
249 * This prevents "System RAM" address space from being
250 * used as potential resource for I/O address (happens
251 * when 'allocate_resource' is called).
252 */
253 if (delta &&
254 (xen_initial_domain() && end < 0x100000000ULL))
255 e820_add_region(end, delta, E820_UNUSABLE);
197 } 256 }
198 257
199 if (map[i].size > 0 && end > xen_extra_mem_start) 258 if (map[i].size > 0 && end > xen_extra_mem_start)
@@ -251,6 +310,13 @@ char * __init xen_memory_setup(void)
251 310
252 xen_add_extra_mem(extra_pages); 311 xen_add_extra_mem(extra_pages);
253 312
313 /*
314 * Set P2M for all non-RAM pages and E820 gaps to be identity
315 * type PFNs. We supply it with the non-sanitized version
316 * of the E820.
317 */
318 identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
319 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
254 return "Xen"; 320 return "Xen";
255} 321}
256 322
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 72a4c7959045..30612441ed99 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -509,3 +509,41 @@ void __init xen_smp_init(void)
509 xen_fill_possible_map(); 509 xen_fill_possible_map();
510 xen_init_spinlocks(); 510 xen_init_spinlocks();
511} 511}
512
513static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
514{
515 native_smp_prepare_cpus(max_cpus);
516 WARN_ON(xen_smp_intr_init(0));
517
518 if (!xen_have_vector_callback)
519 return;
520 xen_init_lock_cpu(0);
521 xen_init_spinlocks();
522}
523
524static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
525{
526 int rc;
527 rc = native_cpu_up(cpu);
528 WARN_ON (xen_smp_intr_init(cpu));
529 return rc;
530}
531
532static void xen_hvm_cpu_die(unsigned int cpu)
533{
534 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
535 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
536 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
537 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
538 native_cpu_die(cpu);
539}
540
541void __init xen_hvm_smp_init(void)
542{
543 smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
544 smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
545 smp_ops.cpu_up = xen_hvm_cpu_up;
546 smp_ops.cpu_die = xen_hvm_cpu_die;
547 smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
548 smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
549}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 9bbd63a129b5..45329c8c226e 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -12,7 +12,7 @@
12#include "xen-ops.h" 12#include "xen-ops.h"
13#include "mmu.h" 13#include "mmu.h"
14 14
15void xen_pre_suspend(void) 15void xen_arch_pre_suspend(void)
16{ 16{
17 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); 17 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
18 xen_start_info->console.domU.mfn = 18 xen_start_info->console.domU.mfn =
@@ -26,8 +26,9 @@ void xen_pre_suspend(void)
26 BUG(); 26 BUG();
27} 27}
28 28
29void xen_hvm_post_suspend(int suspend_cancelled) 29void xen_arch_hvm_post_suspend(int suspend_cancelled)
30{ 30{
31#ifdef CONFIG_XEN_PVHVM
31 int cpu; 32 int cpu;
32 xen_hvm_init_shared_info(); 33 xen_hvm_init_shared_info();
33 xen_callback_vector(); 34 xen_callback_vector();
@@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled)
37 xen_setup_runstate_info(cpu); 38 xen_setup_runstate_info(cpu);
38 } 39 }
39 } 40 }
41#endif
40} 42}
41 43
42void xen_post_suspend(int suspend_cancelled) 44void xen_arch_post_suspend(int suspend_cancelled)
43{ 45{
44 xen_build_mfn_list_list(); 46 xen_build_mfn_list_list();
45 47
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 067759e3d6a5..2e2d370a47b1 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu)
397 name = "<timer kasprintf failed>"; 397 name = "<timer kasprintf failed>";
398 398
399 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, 399 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
400 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, 400 IRQF_DISABLED|IRQF_PERCPU|
401 IRQF_NOBALANCING|IRQF_TIMER|
402 IRQF_FORCE_RESUME,
401 name, NULL); 403 name, NULL);
402 404
403 evt = &per_cpu(xen_clock_events, cpu); 405 evt = &per_cpu(xen_clock_events, cpu);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 1a5ff24e29c0..aaa7291c9259 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -28,9 +28,9 @@ ENTRY(startup_xen)
28 __FINIT 28 __FINIT
29 29
30.pushsection .text 30.pushsection .text
31 .align PAGE_SIZE_asm 31 .align PAGE_SIZE
32ENTRY(hypercall_page) 32ENTRY(hypercall_page)
33 .skip PAGE_SIZE_asm 33 .skip PAGE_SIZE
34.popsection 34.popsection
35 35
36 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") 36 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9d41bf985757..3112f55638c4 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void);
64 64
65#ifdef CONFIG_SMP 65#ifdef CONFIG_SMP
66void xen_smp_init(void); 66void xen_smp_init(void);
67void __init xen_hvm_smp_init(void);
67 68
68extern cpumask_var_t xen_cpu_initialized_map; 69extern cpumask_var_t xen_cpu_initialized_map;
69#else 70#else
70static inline void xen_smp_init(void) {} 71static inline void xen_smp_init(void) {}
72static inline void xen_hvm_smp_init(void) {}
71#endif 73#endif
72 74
73#ifdef CONFIG_PARAVIRT_SPINLOCKS 75#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
index e39edf5c86f2..249619e7e7f2 100644
--- a/arch/xtensa/include/asm/rwsem.h
+++ b/arch/xtensa/include/asm/rwsem.h
@@ -17,44 +17,12 @@
17#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." 17#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
18#endif 18#endif
19 19
20#include <linux/list.h>
21#include <linux/spinlock.h>
22#include <asm/atomic.h>
23#include <asm/system.h>
24
25/*
26 * the semaphore definition
27 */
28struct rw_semaphore {
29 signed long count;
30#define RWSEM_UNLOCKED_VALUE 0x00000000 20#define RWSEM_UNLOCKED_VALUE 0x00000000
31#define RWSEM_ACTIVE_BIAS 0x00000001 21#define RWSEM_ACTIVE_BIAS 0x00000001
32#define RWSEM_ACTIVE_MASK 0x0000ffff 22#define RWSEM_ACTIVE_MASK 0x0000ffff
33#define RWSEM_WAITING_BIAS (-0x00010000) 23#define RWSEM_WAITING_BIAS (-0x00010000)
34#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 24#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
35#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 25#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
36 spinlock_t wait_lock;
37 struct list_head wait_list;
38};
39
40#define __RWSEM_INITIALIZER(name) \
41 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
42 LIST_HEAD_INIT((name).wait_list) }
43
44#define DECLARE_RWSEM(name) \
45 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
46
47extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
48extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
49extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
50extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
51
52static inline void init_rwsem(struct rw_semaphore *sem)
53{
54 sem->count = RWSEM_UNLOCKED_VALUE;
55 spin_lock_init(&sem->wait_lock);
56 INIT_LIST_HEAD(&sem->wait_list);
57}
58 26
59/* 27/*
60 * lock for reading 28 * lock for reading
@@ -160,9 +128,4 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
160 return atomic_add_return(delta, (atomic_t *)(&sem->count)); 128 return atomic_add_return(delta, (atomic_t *)(&sem->count));
161} 129}
162 130
163static inline int rwsem_is_locked(struct rw_semaphore *sem)
164{
165 return (sem->count != 0);
166}
167
168#endif /* _XTENSA_RWSEM_H */ 131#endif /* _XTENSA_RWSEM_H */
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 19df764f6399..f3e5eb43f71c 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -96,16 +96,12 @@ again:
96 update_process_times(user_mode(get_irq_regs())); 96 update_process_times(user_mode(get_irq_regs()));
97#endif 97#endif
98 98
99 write_seqlock(&xtime_lock); 99 xtime_update(1); /* Linux handler in kernel/time/timekeeping */
100
101 do_timer(1); /* Linux handler in kernel/timer.c */
102 100
103 /* Note that writing CCOMPARE clears the interrupt. */ 101 /* Note that writing CCOMPARE clears the interrupt. */
104 102
105 next += CCOUNT_PER_JIFFY; 103 next += CCOUNT_PER_JIFFY;
106 set_linux_timer(next); 104 set_linux_timer(next);
107
108 write_sequnlock(&xtime_lock);
109 } 105 }
110 106
111 /* Allow platform to do something useful (Wdog). */ 107 /* Allow platform to do something useful (Wdog). */
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 9b526154c9ba..a2820065927e 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -155,7 +155,7 @@ SECTIONS
155 INIT_RAM_FS 155 INIT_RAM_FS
156 } 156 }
157 157
158 PERCPU(PAGE_SIZE) 158 PERCPU(XCHAL_ICACHE_LINESIZE, PAGE_SIZE)
159 159
160 /* We need this dummy segment here */ 160 /* We need this dummy segment here */
161 161
diff --git a/block/blk-core.c b/block/blk-core.c
index 2f4002f79a24..518dd423a5fe 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -352,7 +352,7 @@ void blk_start_queue(struct request_queue *q)
352 WARN_ON(!irqs_disabled()); 352 WARN_ON(!irqs_disabled());
353 353
354 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 354 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
355 __blk_run_queue(q); 355 __blk_run_queue(q, false);
356} 356}
357EXPORT_SYMBOL(blk_start_queue); 357EXPORT_SYMBOL(blk_start_queue);
358 358
@@ -403,13 +403,14 @@ EXPORT_SYMBOL(blk_sync_queue);
403/** 403/**
404 * __blk_run_queue - run a single device queue 404 * __blk_run_queue - run a single device queue
405 * @q: The queue to run 405 * @q: The queue to run
406 * @force_kblockd: Don't run @q->request_fn directly. Use kblockd.
406 * 407 *
407 * Description: 408 * Description:
408 * See @blk_run_queue. This variant must be called with the queue lock 409 * See @blk_run_queue. This variant must be called with the queue lock
409 * held and interrupts disabled. 410 * held and interrupts disabled.
410 * 411 *
411 */ 412 */
412void __blk_run_queue(struct request_queue *q) 413void __blk_run_queue(struct request_queue *q, bool force_kblockd)
413{ 414{
414 blk_remove_plug(q); 415 blk_remove_plug(q);
415 416
@@ -423,7 +424,7 @@ void __blk_run_queue(struct request_queue *q)
423 * Only recurse once to avoid overrunning the stack, let the unplug 424 * Only recurse once to avoid overrunning the stack, let the unplug
424 * handling reinvoke the handler shortly if we already got there. 425 * handling reinvoke the handler shortly if we already got there.
425 */ 426 */
426 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 427 if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
427 q->request_fn(q); 428 q->request_fn(q);
428 queue_flag_clear(QUEUE_FLAG_REENTER, q); 429 queue_flag_clear(QUEUE_FLAG_REENTER, q);
429 } else { 430 } else {
@@ -446,7 +447,7 @@ void blk_run_queue(struct request_queue *q)
446 unsigned long flags; 447 unsigned long flags;
447 448
448 spin_lock_irqsave(q->queue_lock, flags); 449 spin_lock_irqsave(q->queue_lock, flags);
449 __blk_run_queue(q); 450 __blk_run_queue(q, false);
450 spin_unlock_irqrestore(q->queue_lock, flags); 451 spin_unlock_irqrestore(q->queue_lock, flags);
451} 452}
452EXPORT_SYMBOL(blk_run_queue); 453EXPORT_SYMBOL(blk_run_queue);
@@ -1053,7 +1054,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
1053 1054
1054 drive_stat_acct(rq, 1); 1055 drive_stat_acct(rq, 1);
1055 __elv_add_request(q, rq, where, 0); 1056 __elv_add_request(q, rq, where, 0);
1056 __blk_run_queue(q); 1057 __blk_run_queue(q, false);
1057 spin_unlock_irqrestore(q->queue_lock, flags); 1058 spin_unlock_irqrestore(q->queue_lock, flags);
1058} 1059}
1059EXPORT_SYMBOL(blk_insert_request); 1060EXPORT_SYMBOL(blk_insert_request);
@@ -2610,13 +2611,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2610} 2611}
2611EXPORT_SYMBOL(kblockd_schedule_work); 2612EXPORT_SYMBOL(kblockd_schedule_work);
2612 2613
2613int kblockd_schedule_delayed_work(struct request_queue *q,
2614 struct delayed_work *dwork, unsigned long delay)
2615{
2616 return queue_delayed_work(kblockd_workqueue, dwork, delay);
2617}
2618EXPORT_SYMBOL(kblockd_schedule_delayed_work);
2619
2620int __init blk_dev_init(void) 2614int __init blk_dev_init(void)
2621{ 2615{
2622 BUILD_BUG_ON(__REQ_NR_BITS > 8 * 2616 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 54b123d6563e..b27d0208611b 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -66,10 +66,12 @@ static void blk_flush_complete_seq_end_io(struct request_queue *q,
66 66
67 /* 67 /*
68 * Moving a request silently to empty queue_head may stall the 68 * Moving a request silently to empty queue_head may stall the
69 * queue. Kick the queue in those cases. 69 * queue. Kick the queue in those cases. This function is called
70 * from request completion path and calling directly into
71 * request_fn may confuse the driver. Always use kblockd.
70 */ 72 */
71 if (was_empty && next_rq) 73 if (was_empty && next_rq)
72 __blk_run_queue(q); 74 __blk_run_queue(q, true);
73} 75}
74 76
75static void pre_flush_end_io(struct request *rq, int error) 77static void pre_flush_end_io(struct request *rq, int error)
@@ -130,7 +132,7 @@ static struct request *queue_next_fseq(struct request_queue *q)
130 BUG(); 132 BUG();
131 } 133 }
132 134
133 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 135 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
134 return rq; 136 return rq;
135} 137}
136 138
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 1a320d2406b0..bd3e8df4d5e2 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -109,7 +109,6 @@ struct bio_batch
109 atomic_t done; 109 atomic_t done;
110 unsigned long flags; 110 unsigned long flags;
111 struct completion *wait; 111 struct completion *wait;
112 bio_end_io_t *end_io;
113}; 112};
114 113
115static void bio_batch_end_io(struct bio *bio, int err) 114static void bio_batch_end_io(struct bio *bio, int err)
@@ -122,17 +121,14 @@ static void bio_batch_end_io(struct bio *bio, int err)
122 else 121 else
123 clear_bit(BIO_UPTODATE, &bb->flags); 122 clear_bit(BIO_UPTODATE, &bb->flags);
124 } 123 }
125 if (bb) { 124 if (bb)
126 if (bb->end_io) 125 if (atomic_dec_and_test(&bb->done))
127 bb->end_io(bio, err); 126 complete(bb->wait);
128 atomic_inc(&bb->done);
129 complete(bb->wait);
130 }
131 bio_put(bio); 127 bio_put(bio);
132} 128}
133 129
134/** 130/**
135 * blkdev_issue_zeroout generate number of zero filed write bios 131 * blkdev_issue_zeroout - generate number of zero filed write bios
136 * @bdev: blockdev to issue 132 * @bdev: blockdev to issue
137 * @sector: start sector 133 * @sector: start sector
138 * @nr_sects: number of sectors to write 134 * @nr_sects: number of sectors to write
@@ -150,13 +146,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
150 int ret; 146 int ret;
151 struct bio *bio; 147 struct bio *bio;
152 struct bio_batch bb; 148 struct bio_batch bb;
153 unsigned int sz, issued = 0; 149 unsigned int sz;
154 DECLARE_COMPLETION_ONSTACK(wait); 150 DECLARE_COMPLETION_ONSTACK(wait);
155 151
156 atomic_set(&bb.done, 0); 152 atomic_set(&bb.done, 1);
157 bb.flags = 1 << BIO_UPTODATE; 153 bb.flags = 1 << BIO_UPTODATE;
158 bb.wait = &wait; 154 bb.wait = &wait;
159 bb.end_io = NULL;
160 155
161submit: 156submit:
162 ret = 0; 157 ret = 0;
@@ -185,12 +180,12 @@ submit:
185 break; 180 break;
186 } 181 }
187 ret = 0; 182 ret = 0;
188 issued++; 183 atomic_inc(&bb.done);
189 submit_bio(WRITE, bio); 184 submit_bio(WRITE, bio);
190 } 185 }
191 186
192 /* Wait for bios in-flight */ 187 /* Wait for bios in-flight */
193 while (issued != atomic_read(&bb.done)) 188 if (!atomic_dec_and_test(&bb.done))
194 wait_for_completion(&wait); 189 wait_for_completion(&wait);
195 190
196 if (!test_bit(BIO_UPTODATE, &bb.flags)) 191 if (!test_bit(BIO_UPTODATE, &bb.flags))
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a89043a3caa4..e36cc10a346c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -20,6 +20,11 @@ static int throtl_quantum = 32;
20/* Throttling is performed over 100ms slice and after that slice is renewed */ 20/* Throttling is performed over 100ms slice and after that slice is renewed */
21static unsigned long throtl_slice = HZ/10; /* 100 ms */ 21static unsigned long throtl_slice = HZ/10; /* 100 ms */
22 22
23/* A workqueue to queue throttle related work */
24static struct workqueue_struct *kthrotld_workqueue;
25static void throtl_schedule_delayed_work(struct throtl_data *td,
26 unsigned long delay);
27
23struct throtl_rb_root { 28struct throtl_rb_root {
24 struct rb_root rb; 29 struct rb_root rb;
25 struct rb_node *left; 30 struct rb_node *left;
@@ -345,10 +350,9 @@ static void throtl_schedule_next_dispatch(struct throtl_data *td)
345 update_min_dispatch_time(st); 350 update_min_dispatch_time(st);
346 351
347 if (time_before_eq(st->min_disptime, jiffies)) 352 if (time_before_eq(st->min_disptime, jiffies))
348 throtl_schedule_delayed_work(td->queue, 0); 353 throtl_schedule_delayed_work(td, 0);
349 else 354 else
350 throtl_schedule_delayed_work(td->queue, 355 throtl_schedule_delayed_work(td, (st->min_disptime - jiffies));
351 (st->min_disptime - jiffies));
352} 356}
353 357
354static inline void 358static inline void
@@ -815,10 +819,10 @@ void blk_throtl_work(struct work_struct *work)
815} 819}
816 820
817/* Call with queue lock held */ 821/* Call with queue lock held */
818void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) 822static void
823throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
819{ 824{
820 825
821 struct throtl_data *td = q->td;
822 struct delayed_work *dwork = &td->throtl_work; 826 struct delayed_work *dwork = &td->throtl_work;
823 827
824 if (total_nr_queued(td) > 0) { 828 if (total_nr_queued(td) > 0) {
@@ -827,12 +831,11 @@ void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay)
827 * Cancel that and schedule a new one. 831 * Cancel that and schedule a new one.
828 */ 832 */
829 __cancel_delayed_work(dwork); 833 __cancel_delayed_work(dwork);
830 kblockd_schedule_delayed_work(q, dwork, delay); 834 queue_delayed_work(kthrotld_workqueue, dwork, delay);
831 throtl_log(td, "schedule work. delay=%lu jiffies=%lu", 835 throtl_log(td, "schedule work. delay=%lu jiffies=%lu",
832 delay, jiffies); 836 delay, jiffies);
833 } 837 }
834} 838}
835EXPORT_SYMBOL(throtl_schedule_delayed_work);
836 839
837static void 840static void
838throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) 841throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
@@ -920,7 +923,7 @@ static void throtl_update_blkio_group_read_bps(void *key,
920 smp_mb__after_atomic_inc(); 923 smp_mb__after_atomic_inc();
921 924
922 /* Schedule a work now to process the limit change */ 925 /* Schedule a work now to process the limit change */
923 throtl_schedule_delayed_work(td->queue, 0); 926 throtl_schedule_delayed_work(td, 0);
924} 927}
925 928
926static void throtl_update_blkio_group_write_bps(void *key, 929static void throtl_update_blkio_group_write_bps(void *key,
@@ -934,7 +937,7 @@ static void throtl_update_blkio_group_write_bps(void *key,
934 smp_mb__before_atomic_inc(); 937 smp_mb__before_atomic_inc();
935 atomic_inc(&td->limits_changed); 938 atomic_inc(&td->limits_changed);
936 smp_mb__after_atomic_inc(); 939 smp_mb__after_atomic_inc();
937 throtl_schedule_delayed_work(td->queue, 0); 940 throtl_schedule_delayed_work(td, 0);
938} 941}
939 942
940static void throtl_update_blkio_group_read_iops(void *key, 943static void throtl_update_blkio_group_read_iops(void *key,
@@ -948,7 +951,7 @@ static void throtl_update_blkio_group_read_iops(void *key,
948 smp_mb__before_atomic_inc(); 951 smp_mb__before_atomic_inc();
949 atomic_inc(&td->limits_changed); 952 atomic_inc(&td->limits_changed);
950 smp_mb__after_atomic_inc(); 953 smp_mb__after_atomic_inc();
951 throtl_schedule_delayed_work(td->queue, 0); 954 throtl_schedule_delayed_work(td, 0);
952} 955}
953 956
954static void throtl_update_blkio_group_write_iops(void *key, 957static void throtl_update_blkio_group_write_iops(void *key,
@@ -962,7 +965,7 @@ static void throtl_update_blkio_group_write_iops(void *key,
962 smp_mb__before_atomic_inc(); 965 smp_mb__before_atomic_inc();
963 atomic_inc(&td->limits_changed); 966 atomic_inc(&td->limits_changed);
964 smp_mb__after_atomic_inc(); 967 smp_mb__after_atomic_inc();
965 throtl_schedule_delayed_work(td->queue, 0); 968 throtl_schedule_delayed_work(td, 0);
966} 969}
967 970
968void throtl_shutdown_timer_wq(struct request_queue *q) 971void throtl_shutdown_timer_wq(struct request_queue *q)
@@ -1135,6 +1138,10 @@ void blk_throtl_exit(struct request_queue *q)
1135 1138
1136static int __init throtl_init(void) 1139static int __init throtl_init(void)
1137{ 1140{
1141 kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
1142 if (!kthrotld_workqueue)
1143 panic("Failed to create kthrotld\n");
1144
1138 blkio_policy_register(&blkio_policy_throtl); 1145 blkio_policy_register(&blkio_policy_throtl);
1139 return 0; 1146 return 0;
1140} 1147}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 7be4c7959625..ea83a4f0c27d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3355,7 +3355,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3355 cfqd->busy_queues > 1) { 3355 cfqd->busy_queues > 1) {
3356 cfq_del_timer(cfqd, cfqq); 3356 cfq_del_timer(cfqd, cfqq);
3357 cfq_clear_cfqq_wait_request(cfqq); 3357 cfq_clear_cfqq_wait_request(cfqq);
3358 __blk_run_queue(cfqd->queue); 3358 __blk_run_queue(cfqd->queue, false);
3359 } else { 3359 } else {
3360 cfq_blkiocg_update_idle_time_stats( 3360 cfq_blkiocg_update_idle_time_stats(
3361 &cfqq->cfqg->blkg); 3361 &cfqq->cfqg->blkg);
@@ -3370,7 +3370,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3370 * this new queue is RT and the current one is BE 3370 * this new queue is RT and the current one is BE
3371 */ 3371 */
3372 cfq_preempt_queue(cfqd, cfqq); 3372 cfq_preempt_queue(cfqd, cfqq);
3373 __blk_run_queue(cfqd->queue); 3373 __blk_run_queue(cfqd->queue, false);
3374 } 3374 }
3375} 3375}
3376 3376
@@ -3731,7 +3731,7 @@ static void cfq_kick_queue(struct work_struct *work)
3731 struct request_queue *q = cfqd->queue; 3731 struct request_queue *q = cfqd->queue;
3732 3732
3733 spin_lock_irq(q->queue_lock); 3733 spin_lock_irq(q->queue_lock);
3734 __blk_run_queue(cfqd->queue); 3734 __blk_run_queue(cfqd->queue, false);
3735 spin_unlock_irq(q->queue_lock); 3735 spin_unlock_irq(q->queue_lock);
3736} 3736}
3737 3737
diff --git a/block/elevator.c b/block/elevator.c
index 2569512830d3..236e93c1f46c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -602,7 +602,7 @@ void elv_quiesce_start(struct request_queue *q)
602 */ 602 */
603 elv_drain_elevator(q); 603 elv_drain_elevator(q);
604 while (q->rq.elvpriv) { 604 while (q->rq.elvpriv) {
605 __blk_run_queue(q); 605 __blk_run_queue(q, false);
606 spin_unlock_irq(q->queue_lock); 606 spin_unlock_irq(q->queue_lock);
607 msleep(10); 607 msleep(10);
608 spin_lock_irq(q->queue_lock); 608 spin_lock_irq(q->queue_lock);
@@ -651,7 +651,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
651 * with anything. There's no point in delaying queue 651 * with anything. There's no point in delaying queue
652 * processing. 652 * processing.
653 */ 653 */
654 __blk_run_queue(q); 654 __blk_run_queue(q, false);
655 break; 655 break;
656 656
657 case ELEVATOR_INSERT_SORT: 657 case ELEVATOR_INSERT_SORT:
diff --git a/block/genhd.c b/block/genhd.c
index 6a5b772aa201..cbf1112a885c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1355,7 +1355,7 @@ int invalidate_partition(struct gendisk *disk, int partno)
1355 struct block_device *bdev = bdget_disk(disk, partno); 1355 struct block_device *bdev = bdget_disk(disk, partno);
1356 if (bdev) { 1356 if (bdev) {
1357 fsync_bdev(bdev); 1357 fsync_bdev(bdev);
1358 res = __invalidate_device(bdev); 1358 res = __invalidate_device(bdev, true);
1359 bdput(bdev); 1359 bdput(bdev);
1360 } 1360 }
1361 return res; 1361 return res;
diff --git a/block/ioctl.c b/block/ioctl.c
index 9049d460fa89..1124cd297263 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -294,9 +294,11 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
294 return -EINVAL; 294 return -EINVAL;
295 if (get_user(n, (int __user *) arg)) 295 if (get_user(n, (int __user *) arg))
296 return -EFAULT; 296 return -EFAULT;
297 if (!(mode & FMODE_EXCL) && 297 if (!(mode & FMODE_EXCL)) {
298 blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) 298 bdgrab(bdev);
299 return -EBUSY; 299 if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
300 return -EBUSY;
301 }
300 ret = set_blocksize(bdev, n); 302 ret = set_blocksize(bdev, n);
301 if (!(mode & FMODE_EXCL)) 303 if (!(mode & FMODE_EXCL))
302 blkdev_put(bdev, mode | FMODE_EXCL); 304 blkdev_put(bdev, mode | FMODE_EXCL);
diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
index a854df2a5a4b..fdc67d38660b 100644
--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -141,8 +141,7 @@ err:
141 141
142 if (walk->iv != req->info) 142 if (walk->iv != req->info)
143 memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize); 143 memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize);
144 if (walk->iv_buffer) 144 kfree(walk->iv_buffer);
145 kfree(walk->iv_buffer);
146 145
147 return err; 146 return err;
148} 147}
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 9aac5e58be94..e912ea5def3d 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -146,7 +146,8 @@ static void test_cipher_speed(const char *algo, int enc, unsigned int sec,
146 unsigned int tcount, u8 *keysize) 146 unsigned int tcount, u8 *keysize)
147{ 147{
148 unsigned int ret, i, j, iv_len; 148 unsigned int ret, i, j, iv_len;
149 const char *key, iv[128]; 149 const char *key;
150 char iv[128];
150 struct crypto_blkcipher *tfm; 151 struct crypto_blkcipher *tfm;
151 struct blkcipher_desc desc; 152 struct blkcipher_desc desc;
152 const char *e; 153 const char *e;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 27ea9fe9476f..2854865f2434 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2077,6 +2077,7 @@ static const struct alg_test_desc alg_test_descs[] = {
2077 }, { 2077 }, {
2078 .alg = "ghash", 2078 .alg = "ghash",
2079 .test = alg_test_hash, 2079 .test = alg_test_hash,
2080 .fips_allowed = 1,
2080 .suite = { 2081 .suite = {
2081 .hash = { 2082 .hash = {
2082 .vecs = ghash_tv_template, 2083 .vecs = ghash_tv_template,
@@ -2453,6 +2454,7 @@ static const struct alg_test_desc alg_test_descs[] = {
2453 }, { 2454 }, {
2454 .alg = "xts(aes)", 2455 .alg = "xts(aes)",
2455 .test = alg_test_skcipher, 2456 .test = alg_test_skcipher,
2457 .fips_allowed = 1,
2456 .suite = { 2458 .suite = {
2457 .cipher = { 2459 .cipher = {
2458 .enc = { 2460 .enc = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 834af7f2adee..aa6dac05f843 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -451,8 +451,9 @@ static struct hash_testvec rmd320_tv_template[] = {
451 451
452/* 452/*
453 * SHA1 test vectors from from FIPS PUB 180-1 453 * SHA1 test vectors from from FIPS PUB 180-1
454 * Long vector from CAVS 5.0
454 */ 455 */
455#define SHA1_TEST_VECTORS 2 456#define SHA1_TEST_VECTORS 3
456 457
457static struct hash_testvec sha1_tv_template[] = { 458static struct hash_testvec sha1_tv_template[] = {
458 { 459 {
@@ -467,6 +468,33 @@ static struct hash_testvec sha1_tv_template[] = {
467 "\x4a\xa1\xf9\x51\x29\xe5\xe5\x46\x70\xf1", 468 "\x4a\xa1\xf9\x51\x29\xe5\xe5\x46\x70\xf1",
468 .np = 2, 469 .np = 2,
469 .tap = { 28, 28 } 470 .tap = { 28, 28 }
471 }, {
472 .plaintext = "\xec\x29\x56\x12\x44\xed\xe7\x06"
473 "\xb6\xeb\x30\xa1\xc3\x71\xd7\x44"
474 "\x50\xa1\x05\xc3\xf9\x73\x5f\x7f"
475 "\xa9\xfe\x38\xcf\x67\xf3\x04\xa5"
476 "\x73\x6a\x10\x6e\x92\xe1\x71\x39"
477 "\xa6\x81\x3b\x1c\x81\xa4\xf3\xd3"
478 "\xfb\x95\x46\xab\x42\x96\xfa\x9f"
479 "\x72\x28\x26\xc0\x66\x86\x9e\xda"
480 "\xcd\x73\xb2\x54\x80\x35\x18\x58"
481 "\x13\xe2\x26\x34\xa9\xda\x44\x00"
482 "\x0d\x95\xa2\x81\xff\x9f\x26\x4e"
483 "\xcc\xe0\xa9\x31\x22\x21\x62\xd0"
484 "\x21\xcc\xa2\x8d\xb5\xf3\xc2\xaa"
485 "\x24\x94\x5a\xb1\xe3\x1c\xb4\x13"
486 "\xae\x29\x81\x0f\xd7\x94\xca\xd5"
487 "\xdf\xaf\x29\xec\x43\xcb\x38\xd1"
488 "\x98\xfe\x4a\xe1\xda\x23\x59\x78"
489 "\x02\x21\x40\x5b\xd6\x71\x2a\x53"
490 "\x05\xda\x4b\x1b\x73\x7f\xce\x7c"
491 "\xd2\x1c\x0e\xb7\x72\x8d\x08\x23"
492 "\x5a\x90\x11",
493 .psize = 163,
494 .digest = "\x97\x01\x11\xc4\xe7\x7b\xcc\x88\xcc\x20"
495 "\x45\x9c\x02\xb6\x9b\x4a\xa8\xf5\x82\x17",
496 .np = 4,
497 .tap = { 63, 64, 31, 5 }
470 } 498 }
471}; 499};
472 500
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 2aa042a5da6d..3a17ca5fff6f 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -7,7 +7,6 @@ menuconfig ACPI
7 depends on !IA64_HP_SIM 7 depends on !IA64_HP_SIM
8 depends on IA64 || X86 8 depends on IA64 || X86
9 depends on PCI 9 depends on PCI
10 depends on PM
11 select PNP 10 select PNP
12 default y 11 default y
13 help 12 help
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 54784bb42cec..edc25867ad9d 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -416,10 +416,15 @@ struct acpi_gpe_handler_info {
416 u8 originally_enabled; /* True if GPE was originally enabled */ 416 u8 originally_enabled; /* True if GPE was originally enabled */
417}; 417};
418 418
419struct acpi_gpe_notify_object {
420 struct acpi_namespace_node *node;
421 struct acpi_gpe_notify_object *next;
422};
423
419union acpi_gpe_dispatch_info { 424union acpi_gpe_dispatch_info {
420 struct acpi_namespace_node *method_node; /* Method node for this GPE level */ 425 struct acpi_namespace_node *method_node; /* Method node for this GPE level */
421 struct acpi_gpe_handler_info *handler; /* Installed GPE handler */ 426 struct acpi_gpe_handler_info *handler; /* Installed GPE handler */
422 struct acpi_namespace_node *device_node; /* Parent _PRW device for implicit notify */ 427 struct acpi_gpe_notify_object device; /* List of _PRW devices for implicit notify */
423}; 428};
424 429
425/* 430/*
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 14988a86066f..f4725212eb48 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -457,6 +457,7 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)
457 acpi_status status; 457 acpi_status status;
458 struct acpi_gpe_event_info *local_gpe_event_info; 458 struct acpi_gpe_event_info *local_gpe_event_info;
459 struct acpi_evaluate_info *info; 459 struct acpi_evaluate_info *info;
460 struct acpi_gpe_notify_object *notify_object;
460 461
461 ACPI_FUNCTION_TRACE(ev_asynch_execute_gpe_method); 462 ACPI_FUNCTION_TRACE(ev_asynch_execute_gpe_method);
462 463
@@ -508,10 +509,18 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)
508 * from this thread -- because handlers may in turn run other 509 * from this thread -- because handlers may in turn run other
509 * control methods. 510 * control methods.
510 */ 511 */
511 status = 512 status = acpi_ev_queue_notify_request(
512 acpi_ev_queue_notify_request(local_gpe_event_info->dispatch. 513 local_gpe_event_info->dispatch.device.node,
513 device_node, 514 ACPI_NOTIFY_DEVICE_WAKE);
514 ACPI_NOTIFY_DEVICE_WAKE); 515
516 notify_object = local_gpe_event_info->dispatch.device.next;
517 while (ACPI_SUCCESS(status) && notify_object) {
518 status = acpi_ev_queue_notify_request(
519 notify_object->node,
520 ACPI_NOTIFY_DEVICE_WAKE);
521 notify_object = notify_object->next;
522 }
523
515 break; 524 break;
516 525
517 case ACPI_GPE_DISPATCH_METHOD: 526 case ACPI_GPE_DISPATCH_METHOD:
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 3b20a3401b64..52aaff3df562 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -198,7 +198,9 @@ acpi_setup_gpe_for_wake(acpi_handle wake_device,
198 acpi_status status = AE_BAD_PARAMETER; 198 acpi_status status = AE_BAD_PARAMETER;
199 struct acpi_gpe_event_info *gpe_event_info; 199 struct acpi_gpe_event_info *gpe_event_info;
200 struct acpi_namespace_node *device_node; 200 struct acpi_namespace_node *device_node;
201 struct acpi_gpe_notify_object *notify_object;
201 acpi_cpu_flags flags; 202 acpi_cpu_flags flags;
203 u8 gpe_dispatch_mask;
202 204
203 ACPI_FUNCTION_TRACE(acpi_setup_gpe_for_wake); 205 ACPI_FUNCTION_TRACE(acpi_setup_gpe_for_wake);
204 206
@@ -221,27 +223,49 @@ acpi_setup_gpe_for_wake(acpi_handle wake_device,
221 goto unlock_and_exit; 223 goto unlock_and_exit;
222 } 224 }
223 225
226 if (wake_device == ACPI_ROOT_OBJECT) {
227 goto out;
228 }
229
224 /* 230 /*
225 * If there is no method or handler for this GPE, then the 231 * If there is no method or handler for this GPE, then the
226 * wake_device will be notified whenever this GPE fires (aka 232 * wake_device will be notified whenever this GPE fires (aka
227 * "implicit notify") Note: The GPE is assumed to be 233 * "implicit notify") Note: The GPE is assumed to be
228 * level-triggered (for windows compatibility). 234 * level-triggered (for windows compatibility).
229 */ 235 */
230 if (((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) == 236 gpe_dispatch_mask = gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK;
231 ACPI_GPE_DISPATCH_NONE) && (wake_device != ACPI_ROOT_OBJECT)) { 237 if (gpe_dispatch_mask != ACPI_GPE_DISPATCH_NONE
238 && gpe_dispatch_mask != ACPI_GPE_DISPATCH_NOTIFY) {
239 goto out;
240 }
232 241
233 /* Validate wake_device is of type Device */ 242 /* Validate wake_device is of type Device */
234 243
235 device_node = ACPI_CAST_PTR(struct acpi_namespace_node, 244 device_node = ACPI_CAST_PTR(struct acpi_namespace_node, wake_device);
236 wake_device); 245 if (device_node->type != ACPI_TYPE_DEVICE) {
237 if (device_node->type != ACPI_TYPE_DEVICE) { 246 goto unlock_and_exit;
238 goto unlock_and_exit; 247 }
239 } 248
249 if (gpe_dispatch_mask == ACPI_GPE_DISPATCH_NONE) {
240 gpe_event_info->flags = (ACPI_GPE_DISPATCH_NOTIFY | 250 gpe_event_info->flags = (ACPI_GPE_DISPATCH_NOTIFY |
241 ACPI_GPE_LEVEL_TRIGGERED); 251 ACPI_GPE_LEVEL_TRIGGERED);
242 gpe_event_info->dispatch.device_node = device_node; 252 gpe_event_info->dispatch.device.node = device_node;
253 gpe_event_info->dispatch.device.next = NULL;
254 } else {
255 /* There are multiple devices to notify implicitly. */
256
257 notify_object = ACPI_ALLOCATE_ZEROED(sizeof(*notify_object));
258 if (!notify_object) {
259 status = AE_NO_MEMORY;
260 goto unlock_and_exit;
261 }
262
263 notify_object->node = device_node;
264 notify_object->next = gpe_event_info->dispatch.device.next;
265 gpe_event_info->dispatch.device.next = notify_object;
243 } 266 }
244 267
268 out:
245 gpe_event_info->flags |= ACPI_GPE_CAN_WAKE; 269 gpe_event_info->flags |= ACPI_GPE_CAN_WAKE;
246 status = AE_OK; 270 status = AE_OK;
247 271
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 7ced61f39492..9749980ca6ca 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -40,6 +40,7 @@
40#include <acpi/acpi_bus.h> 40#include <acpi/acpi_bus.h>
41#include <acpi/acpi_drivers.h> 41#include <acpi/acpi_drivers.h>
42#include <linux/dmi.h> 42#include <linux/dmi.h>
43#include <linux/suspend.h>
43 44
44#include "internal.h" 45#include "internal.h"
45 46
@@ -1006,8 +1007,7 @@ struct kobject *acpi_kobj;
1006 1007
1007static int __init acpi_init(void) 1008static int __init acpi_init(void)
1008{ 1009{
1009 int result = 0; 1010 int result;
1010
1011 1011
1012 if (acpi_disabled) { 1012 if (acpi_disabled) {
1013 printk(KERN_INFO PREFIX "Interpreter disabled.\n"); 1013 printk(KERN_INFO PREFIX "Interpreter disabled.\n");
@@ -1022,29 +1022,18 @@ static int __init acpi_init(void)
1022 1022
1023 init_acpi_device_notify(); 1023 init_acpi_device_notify();
1024 result = acpi_bus_init(); 1024 result = acpi_bus_init();
1025 1025 if (result) {
1026 if (!result) {
1027 pci_mmcfg_late_init();
1028 if (!(pm_flags & PM_APM))
1029 pm_flags |= PM_ACPI;
1030 else {
1031 printk(KERN_INFO PREFIX
1032 "APM is already active, exiting\n");
1033 disable_acpi();
1034 result = -ENODEV;
1035 }
1036 } else
1037 disable_acpi(); 1026 disable_acpi();
1038
1039 if (acpi_disabled)
1040 return result; 1027 return result;
1028 }
1041 1029
1030 pci_mmcfg_late_init();
1042 acpi_scan_init(); 1031 acpi_scan_init();
1043 acpi_ec_init(); 1032 acpi_ec_init();
1044 acpi_debugfs_init(); 1033 acpi_debugfs_init();
1045 acpi_sleep_proc_init(); 1034 acpi_sleep_proc_init();
1046 acpi_wakeup_device_init(); 1035 acpi_wakeup_device_init();
1047 return result; 1036 return 0;
1048} 1037}
1049 1038
1050subsys_initcall(acpi_init); 1039subsys_initcall(acpi_init);
diff --git a/drivers/acpi/debugfs.c b/drivers/acpi/debugfs.c
index 5df67f1d6c61..384f7abcff77 100644
--- a/drivers/acpi/debugfs.c
+++ b/drivers/acpi/debugfs.c
@@ -26,7 +26,9 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
26 size_t count, loff_t *ppos) 26 size_t count, loff_t *ppos)
27{ 27{
28 static char *buf; 28 static char *buf;
29 static int uncopied_bytes; 29 static u32 max_size;
30 static u32 uncopied_bytes;
31
30 struct acpi_table_header table; 32 struct acpi_table_header table;
31 acpi_status status; 33 acpi_status status;
32 34
@@ -37,19 +39,24 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
37 if (copy_from_user(&table, user_buf, 39 if (copy_from_user(&table, user_buf,
38 sizeof(struct acpi_table_header))) 40 sizeof(struct acpi_table_header)))
39 return -EFAULT; 41 return -EFAULT;
40 uncopied_bytes = table.length; 42 uncopied_bytes = max_size = table.length;
41 buf = kzalloc(uncopied_bytes, GFP_KERNEL); 43 buf = kzalloc(max_size, GFP_KERNEL);
42 if (!buf) 44 if (!buf)
43 return -ENOMEM; 45 return -ENOMEM;
44 } 46 }
45 47
46 if (uncopied_bytes < count) { 48 if (buf == NULL)
47 kfree(buf); 49 return -EINVAL;
50
51 if ((*ppos > max_size) ||
52 (*ppos + count > max_size) ||
53 (*ppos + count < count) ||
54 (count > uncopied_bytes))
48 return -EINVAL; 55 return -EINVAL;
49 }
50 56
51 if (copy_from_user(buf + (*ppos), user_buf, count)) { 57 if (copy_from_user(buf + (*ppos), user_buf, count)) {
52 kfree(buf); 58 kfree(buf);
59 buf = NULL;
53 return -EFAULT; 60 return -EFAULT;
54 } 61 }
55 62
@@ -59,6 +66,7 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
59 if (!uncopied_bytes) { 66 if (!uncopied_bytes) {
60 status = acpi_install_method(buf); 67 status = acpi_install_method(buf);
61 kfree(buf); 68 kfree(buf);
69 buf = NULL;
62 if (ACPI_FAILURE(status)) 70 if (ACPI_FAILURE(status))
63 return -EINVAL; 71 return -EINVAL;
64 add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); 72 add_taint(TAINT_OVERRIDDEN_ACPI_TABLE);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 5eb25eb3ea48..3b5c3189fd99 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -274,7 +274,7 @@ acpi_table_parse_srat(enum acpi_srat_type id,
274 274
275int __init acpi_numa_init(void) 275int __init acpi_numa_init(void)
276{ 276{
277 int ret = 0; 277 int cnt = 0;
278 278
279 /* 279 /*
280 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= 280 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
@@ -288,7 +288,7 @@ int __init acpi_numa_init(void)
288 acpi_parse_x2apic_affinity, 0); 288 acpi_parse_x2apic_affinity, 0);
289 acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, 289 acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
290 acpi_parse_processor_affinity, 0); 290 acpi_parse_processor_affinity, 0);
291 ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, 291 cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
292 acpi_parse_memory_affinity, 292 acpi_parse_memory_affinity,
293 NR_NODE_MEMBLKS); 293 NR_NODE_MEMBLKS);
294 } 294 }
@@ -297,7 +297,10 @@ int __init acpi_numa_init(void)
297 acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); 297 acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
298 298
299 acpi_numa_arch_fixup(); 299 acpi_numa_arch_fixup();
300 return ret; 300
301 if (cnt <= 0)
302 return cnt ?: -ENOENT;
303 return 0;
301} 304}
302 305
303int acpi_get_pxm(acpi_handle h) 306int acpi_get_pxm(acpi_handle h)
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index c90c76aa7f8b..4a6753009d79 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1589,9 +1589,9 @@ acpi_status __init acpi_os_initialize(void)
1589 1589
1590acpi_status __init acpi_os_initialize1(void) 1590acpi_status __init acpi_os_initialize1(void)
1591{ 1591{
1592 kacpid_wq = create_workqueue("kacpid"); 1592 kacpid_wq = alloc_workqueue("kacpid", 0, 1);
1593 kacpi_notify_wq = create_workqueue("kacpi_notify"); 1593 kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1);
1594 kacpi_hotplug_wq = create_workqueue("kacpi_hotplug"); 1594 kacpi_hotplug_wq = alloc_workqueue("kacpi_hotplug", 0, 1);
1595 BUG_ON(!kacpid_wq); 1595 BUG_ON(!kacpid_wq);
1596 BUG_ON(!kacpi_notify_wq); 1596 BUG_ON(!kacpi_notify_wq);
1597 BUG_ON(!kacpi_hotplug_wq); 1597 BUG_ON(!kacpi_hotplug_wq);
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index e9fef94d1039..1850dac8f45c 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -586,7 +586,7 @@ int acpi_suspend(u32 acpi_state)
586 return -EINVAL; 586 return -EINVAL;
587} 587}
588 588
589#ifdef CONFIG_PM_OPS 589#ifdef CONFIG_PM
590/** 590/**
591 * acpi_pm_device_sleep_state - return preferred power state of ACPI device 591 * acpi_pm_device_sleep_state - return preferred power state of ACPI device
592 * in the system sleep state given by %acpi_target_sleep_state 592 * in the system sleep state given by %acpi_target_sleep_state
@@ -672,7 +672,7 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
672 *d_min_p = d_min; 672 *d_min_p = d_min;
673 return d_max; 673 return d_max;
674} 674}
675#endif /* CONFIG_PM_OPS */ 675#endif /* CONFIG_PM */
676 676
677#ifdef CONFIG_PM_SLEEP 677#ifdef CONFIG_PM_SLEEP
678/** 678/**
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index c2328aed0836..75afa75a515e 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -202,6 +202,18 @@ config SATA_DWC
202 202
203 If unsure, say N. 203 If unsure, say N.
204 204
205config SATA_DWC_DEBUG
206 bool "Debugging driver version"
207 depends on SATA_DWC
208 help
209 This option enables debugging output in the driver.
210
211config SATA_DWC_VDEBUG
212 bool "Verbose debug output"
213 depends on SATA_DWC_DEBUG
214 help
215 This option enables the taskfile dumping and NCQ debugging.
216
205config SATA_MV 217config SATA_MV
206 tristate "Marvell SATA support" 218 tristate "Marvell SATA support"
207 help 219 help
@@ -299,6 +311,12 @@ config PATA_AMD
299 311
300 If unsure, say N. 312 If unsure, say N.
301 313
314config PATA_ARASAN_CF
315 tristate "ARASAN CompactFlash PATA Controller Support"
316 select DMA_ENGINE
317 help
318 Say Y here to support the ARASAN CompactFlash PATA controller
319
302config PATA_ARTOP 320config PATA_ARTOP
303 tristate "ARTOP 6210/6260 PATA support" 321 tristate "ARTOP 6210/6260 PATA support"
304 depends on PCI 322 depends on PCI
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 27291aad6ca7..8ac64e1aa051 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_SATA_DWC) += sata_dwc_460ex.o
12 12
13# SFF w/ custom DMA 13# SFF w/ custom DMA
14obj-$(CONFIG_PDC_ADMA) += pdc_adma.o 14obj-$(CONFIG_PDC_ADMA) += pdc_adma.o
15obj-$(CONFIG_PATA_ARASAN_CF) += pata_arasan_cf.o
15obj-$(CONFIG_PATA_OCTEON_CF) += pata_octeon_cf.o 16obj-$(CONFIG_PATA_OCTEON_CF) += pata_octeon_cf.o
16obj-$(CONFIG_SATA_QSTOR) += sata_qstor.o 17obj-$(CONFIG_SATA_QSTOR) += sata_qstor.o
17obj-$(CONFIG_SATA_SX4) += sata_sx4.o 18obj-$(CONFIG_SATA_SX4) += sata_sx4.o
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index b8d96ce37fc9..e62f693be8ea 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -175,8 +175,7 @@ static const struct ata_port_info ahci_port_info[] = {
175 { 175 {
176 AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_MSI | 176 AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_MSI |
177 AHCI_HFLAG_MV_PATA | AHCI_HFLAG_NO_PMP), 177 AHCI_HFLAG_MV_PATA | AHCI_HFLAG_NO_PMP),
178 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 178 .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
179 ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA,
180 .pio_mask = ATA_PIO4, 179 .pio_mask = ATA_PIO4,
181 .udma_mask = ATA_UDMA6, 180 .udma_mask = ATA_UDMA6,
182 .port_ops = &ahci_ops, 181 .port_ops = &ahci_ops,
@@ -260,6 +259,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
260 { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */ 259 { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */
261 { PCI_VDEVICE(INTEL, 0x1d04), board_ahci }, /* PBG RAID */ 260 { PCI_VDEVICE(INTEL, 0x1d04), board_ahci }, /* PBG RAID */
262 { PCI_VDEVICE(INTEL, 0x1d06), board_ahci }, /* PBG RAID */ 261 { PCI_VDEVICE(INTEL, 0x1d06), board_ahci }, /* PBG RAID */
262 { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG RAID */
263 { PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */ 263 { PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */
264 264
265 /* JMicron 360/1/3/5/6, match class to avoid IDE function */ 265 /* JMicron 360/1/3/5/6, match class to avoid IDE function */
@@ -383,6 +383,10 @@ static const struct pci_device_id ahci_pci_tbl[] = {
383 .class = PCI_CLASS_STORAGE_SATA_AHCI, 383 .class = PCI_CLASS_STORAGE_SATA_AHCI,
384 .class_mask = 0xffffff, 384 .class_mask = 0xffffff,
385 .driver_data = board_ahci_yes_fbs }, /* 88se9128 */ 385 .driver_data = board_ahci_yes_fbs }, /* 88se9128 */
386 { PCI_DEVICE(0x1b4b, 0x9125),
387 .driver_data = board_ahci_yes_fbs }, /* 88se9125 */
388 { PCI_DEVICE(0x1b4b, 0x91a3),
389 .driver_data = board_ahci_yes_fbs },
386 390
387 /* Promise */ 391 /* Promise */
388 { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ 392 { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 3e606c34f57b..ccaf08122058 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -213,10 +213,8 @@ enum {
213 213
214 /* ap->flags bits */ 214 /* ap->flags bits */
215 215
216 AHCI_FLAG_COMMON = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 216 AHCI_FLAG_COMMON = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
217 ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA | 217 ATA_FLAG_ACPI_SATA | ATA_FLAG_AN,
218 ATA_FLAG_ACPI_SATA | ATA_FLAG_AN |
219 ATA_FLAG_LPM,
220 218
221 ICH_MAP = 0x90, /* ICH MAP register */ 219 ICH_MAP = 0x90, /* ICH MAP register */
222 220
diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index 6981f7680a00..721d38bfa339 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c
@@ -237,7 +237,7 @@ static struct pci_device_id ata_generic[] = {
237#endif 237#endif
238 /* Intel, IDE class device */ 238 /* Intel, IDE class device */
239 { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 239 { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
240 PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL, 240 PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL,
241 .driver_data = ATA_GEN_INTEL_IDER }, 241 .driver_data = ATA_GEN_INTEL_IDER },
242 /* Must come last. If you add entries adjust this table appropriately */ 242 /* Must come last. If you add entries adjust this table appropriately */
243 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL), 243 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL),
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 6cb14ca8ee85..cdec4ab3b159 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -230,7 +230,7 @@ static const struct pci_device_id piix_pci_tbl[] = {
230 { 0x8086, 0x2850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, 230 { 0x8086, 0x2850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 },
231 231
232 /* SATA ports */ 232 /* SATA ports */
233 233
234 /* 82801EB (ICH5) */ 234 /* 82801EB (ICH5) */
235 { 0x8086, 0x24d1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata }, 235 { 0x8086, 0x24d1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata },
236 /* 82801EB (ICH5) */ 236 /* 82801EB (ICH5) */
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 8b5ea399a4f4..a791b8ce6294 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -660,8 +660,7 @@ static int ata_acpi_filter_tf(struct ata_device *dev,
660 * @dev: target ATA device 660 * @dev: target ATA device
661 * @gtf: raw ATA taskfile register set (0x1f1 - 0x1f7) 661 * @gtf: raw ATA taskfile register set (0x1f1 - 0x1f7)
662 * 662 *
663 * Outputs ATA taskfile to standard ATA host controller using MMIO 663 * Outputs ATA taskfile to standard ATA host controller.
664 * or PIO as indicated by the ATA_FLAG_MMIO flag.
665 * Writes the control, feature, nsect, lbal, lbam, and lbah registers. 664 * Writes the control, feature, nsect, lbal, lbam, and lbah registers.
666 * Optionally (ATA_TFLAG_LBA48) writes hob_feature, hob_nsect, 665 * Optionally (ATA_TFLAG_LBA48) writes hob_feature, hob_nsect,
667 * hob_lbal, hob_lbam, and hob_lbah. 666 * hob_lbal, hob_lbam, and hob_lbah.
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index d4e52e214859..b91e19cab102 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4210,7 +4210,7 @@ static int glob_match (const char *text, const char *pattern)
4210 return 0; /* End of both strings: match */ 4210 return 0; /* End of both strings: match */
4211 return 1; /* No match */ 4211 return 1; /* No match */
4212} 4212}
4213 4213
4214static unsigned long ata_dev_blacklisted(const struct ata_device *dev) 4214static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
4215{ 4215{
4216 unsigned char model_num[ATA_ID_PROD_LEN + 1]; 4216 unsigned char model_num[ATA_ID_PROD_LEN + 1];
@@ -5479,7 +5479,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
5479 ap = kzalloc(sizeof(*ap), GFP_KERNEL); 5479 ap = kzalloc(sizeof(*ap), GFP_KERNEL);
5480 if (!ap) 5480 if (!ap)
5481 return NULL; 5481 return NULL;
5482 5482
5483 ap->pflags |= ATA_PFLAG_INITIALIZING; 5483 ap->pflags |= ATA_PFLAG_INITIALIZING;
5484 ap->lock = &host->lock; 5484 ap->lock = &host->lock;
5485 ap->print_id = -1; 5485 ap->print_id = -1;
@@ -5887,21 +5887,9 @@ void ata_host_init(struct ata_host *host, struct device *dev,
5887 host->ops = ops; 5887 host->ops = ops;
5888} 5888}
5889 5889
5890 5890int ata_port_probe(struct ata_port *ap)
5891static void async_port_probe(void *data, async_cookie_t cookie)
5892{ 5891{
5893 int rc; 5892 int rc = 0;
5894 struct ata_port *ap = data;
5895
5896 /*
5897 * If we're not allowed to scan this host in parallel,
5898 * we need to wait until all previous scans have completed
5899 * before going further.
5900 * Jeff Garzik says this is only within a controller, so we
5901 * don't need to wait for port 0, only for later ports.
5902 */
5903 if (!(ap->host->flags & ATA_HOST_PARALLEL_SCAN) && ap->port_no != 0)
5904 async_synchronize_cookie(cookie);
5905 5893
5906 /* probe */ 5894 /* probe */
5907 if (ap->ops->error_handler) { 5895 if (ap->ops->error_handler) {
@@ -5927,23 +5915,33 @@ static void async_port_probe(void *data, async_cookie_t cookie)
5927 DPRINTK("ata%u: bus probe begin\n", ap->print_id); 5915 DPRINTK("ata%u: bus probe begin\n", ap->print_id);
5928 rc = ata_bus_probe(ap); 5916 rc = ata_bus_probe(ap);
5929 DPRINTK("ata%u: bus probe end\n", ap->print_id); 5917 DPRINTK("ata%u: bus probe end\n", ap->print_id);
5930
5931 if (rc) {
5932 /* FIXME: do something useful here?
5933 * Current libata behavior will
5934 * tear down everything when
5935 * the module is removed
5936 * or the h/w is unplugged.
5937 */
5938 }
5939 } 5918 }
5919 return rc;
5920}
5921
5922
5923static void async_port_probe(void *data, async_cookie_t cookie)
5924{
5925 struct ata_port *ap = data;
5926
5927 /*
5928 * If we're not allowed to scan this host in parallel,
5929 * we need to wait until all previous scans have completed
5930 * before going further.
5931 * Jeff Garzik says this is only within a controller, so we
5932 * don't need to wait for port 0, only for later ports.
5933 */
5934 if (!(ap->host->flags & ATA_HOST_PARALLEL_SCAN) && ap->port_no != 0)
5935 async_synchronize_cookie(cookie);
5936
5937 (void)ata_port_probe(ap);
5940 5938
5941 /* in order to keep device order, we need to synchronize at this point */ 5939 /* in order to keep device order, we need to synchronize at this point */
5942 async_synchronize_cookie(cookie); 5940 async_synchronize_cookie(cookie);
5943 5941
5944 ata_scsi_scan_host(ap, 1); 5942 ata_scsi_scan_host(ap, 1);
5945
5946} 5943}
5944
5947/** 5945/**
5948 * ata_host_register - register initialized ATA host 5946 * ata_host_register - register initialized ATA host
5949 * @host: ATA host to register 5947 * @host: ATA host to register
@@ -5983,7 +5981,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
5983 for (i = 0; i < host->n_ports; i++) 5981 for (i = 0; i < host->n_ports; i++)
5984 host->ports[i]->print_id = ata_print_id++; 5982 host->ports[i]->print_id = ata_print_id++;
5985 5983
5986 5984
5987 /* Create associated sysfs transport objects */ 5985 /* Create associated sysfs transport objects */
5988 for (i = 0; i < host->n_ports; i++) { 5986 for (i = 0; i < host->n_ports; i++) {
5989 rc = ata_tport_add(host->dev,host->ports[i]); 5987 rc = ata_tport_add(host->dev,host->ports[i]);
@@ -6471,7 +6469,7 @@ static int __init ata_init(void)
6471 ata_sff_exit(); 6469 ata_sff_exit();
6472 rc = -ENOMEM; 6470 rc = -ENOMEM;
6473 goto err_out; 6471 goto err_out;
6474 } 6472 }
6475 6473
6476 printk(KERN_DEBUG "libata version " DRV_VERSION " loaded.\n"); 6474 printk(KERN_DEBUG "libata version " DRV_VERSION " loaded.\n");
6477 return 0; 6475 return 0;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 17a637877d03..df3f3140c9c7 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -587,11 +587,43 @@ static void ata_eh_unload(struct ata_port *ap)
587void ata_scsi_error(struct Scsi_Host *host) 587void ata_scsi_error(struct Scsi_Host *host)
588{ 588{
589 struct ata_port *ap = ata_shost_to_port(host); 589 struct ata_port *ap = ata_shost_to_port(host);
590 int i;
591 unsigned long flags; 590 unsigned long flags;
591 LIST_HEAD(eh_work_q);
592 592
593 DPRINTK("ENTER\n"); 593 DPRINTK("ENTER\n");
594 594
595 spin_lock_irqsave(host->host_lock, flags);
596 list_splice_init(&host->eh_cmd_q, &eh_work_q);
597 spin_unlock_irqrestore(host->host_lock, flags);
598
599 ata_scsi_cmd_error_handler(host, ap, &eh_work_q);
600
601 /* If we timed raced normal completion and there is nothing to
602 recover nr_timedout == 0 why exactly are we doing error recovery ? */
603 ata_scsi_port_error_handler(host, ap);
604
605 /* finish or retry handled scmd's and clean up */
606 WARN_ON(host->host_failed || !list_empty(&eh_work_q));
607
608 DPRINTK("EXIT\n");
609}
610
611/**
612 * ata_scsi_cmd_error_handler - error callback for a list of commands
613 * @host: scsi host containing the port
614 * @ap: ATA port within the host
615 * @eh_work_q: list of commands to process
616 *
617 * process the given list of commands and return those finished to the
618 * ap->eh_done_q. This function is the first part of the libata error
619 * handler which processes a given list of failed commands.
620 */
621void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
622 struct list_head *eh_work_q)
623{
624 int i;
625 unsigned long flags;
626
595 /* make sure sff pio task is not running */ 627 /* make sure sff pio task is not running */
596 ata_sff_flush_pio_task(ap); 628 ata_sff_flush_pio_task(ap);
597 629
@@ -627,7 +659,7 @@ void ata_scsi_error(struct Scsi_Host *host)
627 if (ap->ops->lost_interrupt) 659 if (ap->ops->lost_interrupt)
628 ap->ops->lost_interrupt(ap); 660 ap->ops->lost_interrupt(ap);
629 661
630 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 662 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) {
631 struct ata_queued_cmd *qc; 663 struct ata_queued_cmd *qc;
632 664
633 for (i = 0; i < ATA_MAX_QUEUE; i++) { 665 for (i = 0; i < ATA_MAX_QUEUE; i++) {
@@ -671,8 +703,20 @@ void ata_scsi_error(struct Scsi_Host *host)
671 } else 703 } else
672 spin_unlock_wait(ap->lock); 704 spin_unlock_wait(ap->lock);
673 705
674 /* If we timed raced normal completion and there is nothing to 706}
675 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 707EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
708
709/**
710 * ata_scsi_port_error_handler - recover the port after the commands
711 * @host: SCSI host containing the port
712 * @ap: the ATA port
713 *
714 * Handle the recovery of the port @ap after all the commands
715 * have been recovered.
716 */
717void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
718{
719 unsigned long flags;
676 720
677 /* invoke error handler */ 721 /* invoke error handler */
678 if (ap->ops->error_handler) { 722 if (ap->ops->error_handler) {
@@ -761,9 +805,6 @@ void ata_scsi_error(struct Scsi_Host *host)
761 ap->ops->eng_timeout(ap); 805 ap->ops->eng_timeout(ap);
762 } 806 }
763 807
764 /* finish or retry handled scmd's and clean up */
765 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
766
767 scsi_eh_flush_done_q(&ap->eh_done_q); 808 scsi_eh_flush_done_q(&ap->eh_done_q);
768 809
769 /* clean up */ 810 /* clean up */
@@ -784,9 +825,8 @@ void ata_scsi_error(struct Scsi_Host *host)
784 wake_up_all(&ap->eh_wait_q); 825 wake_up_all(&ap->eh_wait_q);
785 826
786 spin_unlock_irqrestore(ap->lock, flags); 827 spin_unlock_irqrestore(ap->lock, flags);
787
788 DPRINTK("EXIT\n");
789} 828}
829EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler);
790 830
791/** 831/**
792 * ata_port_wait_eh - Wait for the currently pending EH to complete 832 * ata_port_wait_eh - Wait for the currently pending EH to complete
@@ -1618,7 +1658,7 @@ static void ata_eh_analyze_serror(struct ata_link *link)
1618 * host links. For disabled PMP links, only N bit is 1658 * host links. For disabled PMP links, only N bit is
1619 * considered as X bit is left at 1 for link plugging. 1659 * considered as X bit is left at 1 for link plugging.
1620 */ 1660 */
1621 if (link->lpm_policy != ATA_LPM_MAX_POWER) 1661 if (link->lpm_policy > ATA_LPM_MAX_POWER)
1622 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ 1662 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */
1623 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1663 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))
1624 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1664 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 600f6353ecf8..a83419991357 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2056,6 +2056,17 @@ static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf)
2056 ATA_ID_SERNO_LEN); 2056 ATA_ID_SERNO_LEN);
2057 num += ATA_ID_SERNO_LEN; 2057 num += ATA_ID_SERNO_LEN;
2058 2058
2059 if (ata_id_has_wwn(args->id)) {
2060 /* SAT defined lu world wide name */
2061 /* piv=0, assoc=lu, code_set=binary, designator=NAA */
2062 rbuf[num + 0] = 1;
2063 rbuf[num + 1] = 3;
2064 rbuf[num + 3] = ATA_ID_WWN_LEN;
2065 num += 4;
2066 ata_id_string(args->id, (unsigned char *) rbuf + num,
2067 ATA_ID_WWN, ATA_ID_WWN_LEN);
2068 num += ATA_ID_WWN_LEN;
2069 }
2059 rbuf[3] = num - 4; /* page len (assume less than 256 bytes) */ 2070 rbuf[3] = num - 4; /* page len (assume less than 256 bytes) */
2060 return 0; 2071 return 0;
2061} 2072}
@@ -3759,7 +3770,7 @@ struct ata_port *ata_sas_port_alloc(struct ata_host *host,
3759 return NULL; 3770 return NULL;
3760 3771
3761 ap->port_no = 0; 3772 ap->port_no = 0;
3762 ap->lock = shost->host_lock; 3773 ap->lock = &host->lock;
3763 ap->pio_mask = port_info->pio_mask; 3774 ap->pio_mask = port_info->pio_mask;
3764 ap->mwdma_mask = port_info->mwdma_mask; 3775 ap->mwdma_mask = port_info->mwdma_mask;
3765 ap->udma_mask = port_info->udma_mask; 3776 ap->udma_mask = port_info->udma_mask;
@@ -3821,7 +3832,7 @@ int ata_sas_port_init(struct ata_port *ap)
3821 3832
3822 if (!rc) { 3833 if (!rc) {
3823 ap->print_id = ata_print_id++; 3834 ap->print_id = ata_print_id++;
3824 rc = ata_bus_probe(ap); 3835 rc = ata_port_probe(ap);
3825 } 3836 }
3826 3837
3827 return rc; 3838 return rc;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index af6141bb1ba3..cf7acbc0cfcb 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1302,6 +1302,18 @@ fsm_start:
1302} 1302}
1303EXPORT_SYMBOL_GPL(ata_sff_hsm_move); 1303EXPORT_SYMBOL_GPL(ata_sff_hsm_move);
1304 1304
1305void ata_sff_queue_work(struct work_struct *work)
1306{
1307 queue_work(ata_sff_wq, work);
1308}
1309EXPORT_SYMBOL_GPL(ata_sff_queue_work);
1310
1311void ata_sff_queue_delayed_work(struct delayed_work *dwork, unsigned long delay)
1312{
1313 queue_delayed_work(ata_sff_wq, dwork, delay);
1314}
1315EXPORT_SYMBOL_GPL(ata_sff_queue_delayed_work);
1316
1305void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay) 1317void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay)
1306{ 1318{
1307 struct ata_port *ap = link->ap; 1319 struct ata_port *ap = link->ap;
@@ -1311,8 +1323,7 @@ void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay)
1311 ap->sff_pio_task_link = link; 1323 ap->sff_pio_task_link = link;
1312 1324
1313 /* may fail if ata_sff_flush_pio_task() in progress */ 1325 /* may fail if ata_sff_flush_pio_task() in progress */
1314 queue_delayed_work(ata_sff_wq, &ap->sff_pio_task, 1326 ata_sff_queue_delayed_work(&ap->sff_pio_task, msecs_to_jiffies(delay));
1315 msecs_to_jiffies(delay));
1316} 1327}
1317EXPORT_SYMBOL_GPL(ata_sff_queue_pio_task); 1328EXPORT_SYMBOL_GPL(ata_sff_queue_pio_task);
1318 1329
@@ -1336,7 +1347,7 @@ static void ata_sff_pio_task(struct work_struct *work)
1336 u8 status; 1347 u8 status;
1337 int poll_next; 1348 int poll_next;
1338 1349
1339 BUG_ON(ap->sff_pio_task_link == NULL); 1350 BUG_ON(ap->sff_pio_task_link == NULL);
1340 /* qc can be NULL if timeout occurred */ 1351 /* qc can be NULL if timeout occurred */
1341 qc = ata_qc_from_tag(ap, link->active_tag); 1352 qc = ata_qc_from_tag(ap, link->active_tag);
1342 if (!qc) { 1353 if (!qc) {
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index a9be110dbf51..773de97988a2 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -103,6 +103,7 @@ extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg);
103extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg); 103extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
104extern struct ata_port *ata_port_alloc(struct ata_host *host); 104extern struct ata_port *ata_port_alloc(struct ata_host *host);
105extern const char *sata_spd_string(unsigned int spd); 105extern const char *sata_spd_string(unsigned int spd);
106extern int ata_port_probe(struct ata_port *ap);
106 107
107/* libata-acpi.c */ 108/* libata-acpi.c */
108#ifdef CONFIG_ATA_ACPI 109#ifdef CONFIG_ATA_ACPI
diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
index c8d47034d5e9..91949d997555 100644
--- a/drivers/ata/pata_acpi.c
+++ b/drivers/ata/pata_acpi.c
@@ -245,7 +245,7 @@ static struct ata_port_operations pacpi_ops = {
245static int pacpi_init_one (struct pci_dev *pdev, const struct pci_device_id *id) 245static int pacpi_init_one (struct pci_dev *pdev, const struct pci_device_id *id)
246{ 246{
247 static const struct ata_port_info info = { 247 static const struct ata_port_info info = {
248 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, 248 .flags = ATA_FLAG_SLAVE_POSS,
249 249
250 .pio_mask = ATA_PIO4, 250 .pio_mask = ATA_PIO4,
251 .mwdma_mask = ATA_MWDMA2, 251 .mwdma_mask = ATA_MWDMA2,
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
new file mode 100644
index 000000000000..65cee74605b4
--- /dev/null
+++ b/drivers/ata/pata_arasan_cf.c
@@ -0,0 +1,983 @@
1/*
2 * drivers/ata/pata_arasan_cf.c
3 *
4 * Arasan Compact Flash host controller source file
5 *
6 * Copyright (C) 2011 ST Microelectronics
7 * Viresh Kumar <viresh.kumar@st.com>
8 *
9 * This file is licensed under the terms of the GNU General Public
10 * License version 2. This program is licensed "as is" without any
11 * warranty of any kind, whether express or implied.
12 */
13
14/*
15 * The Arasan CompactFlash Device Controller IP core has three basic modes of
16 * operation: PC card ATA using I/O mode, PC card ATA using memory mode, PC card
17 * ATA using true IDE modes. This driver supports only True IDE mode currently.
18 *
19 * Arasan CF Controller shares global irq register with Arasan XD Controller.
20 *
21 * Tested on arch/arm/mach-spear13xx
22 */
23
24#include <linux/ata.h>
25#include <linux/clk.h>
26#include <linux/completion.h>
27#include <linux/delay.h>
28#include <linux/dmaengine.h>
29#include <linux/io.h>
30#include <linux/irq.h>
31#include <linux/kernel.h>
32#include <linux/libata.h>
33#include <linux/module.h>
34#include <linux/pata_arasan_cf_data.h>
35#include <linux/platform_device.h>
36#include <linux/pm.h>
37#include <linux/slab.h>
38#include <linux/spinlock.h>
39#include <linux/types.h>
40#include <linux/workqueue.h>
41
42#define DRIVER_NAME "arasan_cf"
43#define TIMEOUT msecs_to_jiffies(3000)
44
45/* Registers */
46/* CompactFlash Interface Status */
47#define CFI_STS 0x000
48 #define STS_CHG (1)
49 #define BIN_AUDIO_OUT (1 << 1)
50 #define CARD_DETECT1 (1 << 2)
51 #define CARD_DETECT2 (1 << 3)
52 #define INP_ACK (1 << 4)
53 #define CARD_READY (1 << 5)
54 #define IO_READY (1 << 6)
55 #define B16_IO_PORT_SEL (1 << 7)
56/* IRQ */
57#define IRQ_STS 0x004
58/* Interrupt Enable */
59#define IRQ_EN 0x008
60 #define CARD_DETECT_IRQ (1)
61 #define STATUS_CHNG_IRQ (1 << 1)
62 #define MEM_MODE_IRQ (1 << 2)
63 #define IO_MODE_IRQ (1 << 3)
64 #define TRUE_IDE_MODE_IRQ (1 << 8)
65 #define PIO_XFER_ERR_IRQ (1 << 9)
66 #define BUF_AVAIL_IRQ (1 << 10)
67 #define XFER_DONE_IRQ (1 << 11)
68 #define IGNORED_IRQS (STATUS_CHNG_IRQ | MEM_MODE_IRQ | IO_MODE_IRQ |\
69 TRUE_IDE_MODE_IRQ)
70 #define TRUE_IDE_IRQS (CARD_DETECT_IRQ | PIO_XFER_ERR_IRQ |\
71 BUF_AVAIL_IRQ | XFER_DONE_IRQ)
72/* Operation Mode */
73#define OP_MODE 0x00C
74 #define CARD_MODE_MASK (0x3)
75 #define MEM_MODE (0x0)
76 #define IO_MODE (0x1)
77 #define TRUE_IDE_MODE (0x2)
78
79 #define CARD_TYPE_MASK (1 << 2)
80 #define CF_CARD (0)
81 #define CF_PLUS_CARD (1 << 2)
82
83 #define CARD_RESET (1 << 3)
84 #define CFHOST_ENB (1 << 4)
85 #define OUTPUTS_TRISTATE (1 << 5)
86 #define ULTRA_DMA_ENB (1 << 8)
87 #define MULTI_WORD_DMA_ENB (1 << 9)
88 #define DRQ_BLOCK_SIZE_MASK (0x3 << 11)
89 #define DRQ_BLOCK_SIZE_512 (0)
90 #define DRQ_BLOCK_SIZE_1024 (1 << 11)
91 #define DRQ_BLOCK_SIZE_2048 (2 << 11)
92 #define DRQ_BLOCK_SIZE_4096 (3 << 11)
93/* CF Interface Clock Configuration */
94#define CLK_CFG 0x010
95 #define CF_IF_CLK_MASK (0XF)
96/* CF Timing Mode Configuration */
97#define TM_CFG 0x014
98 #define MEM_MODE_TIMING_MASK (0x3)
99 #define MEM_MODE_TIMING_250NS (0x0)
100 #define MEM_MODE_TIMING_120NS (0x1)
101 #define MEM_MODE_TIMING_100NS (0x2)
102 #define MEM_MODE_TIMING_80NS (0x3)
103
104 #define IO_MODE_TIMING_MASK (0x3 << 2)
105 #define IO_MODE_TIMING_250NS (0x0 << 2)
106 #define IO_MODE_TIMING_120NS (0x1 << 2)
107 #define IO_MODE_TIMING_100NS (0x2 << 2)
108 #define IO_MODE_TIMING_80NS (0x3 << 2)
109
110 #define TRUEIDE_PIO_TIMING_MASK (0x7 << 4)
111 #define TRUEIDE_PIO_TIMING_SHIFT 4
112
113 #define TRUEIDE_MWORD_DMA_TIMING_MASK (0x7 << 7)
114 #define TRUEIDE_MWORD_DMA_TIMING_SHIFT 7
115
116 #define ULTRA_DMA_TIMING_MASK (0x7 << 10)
117 #define ULTRA_DMA_TIMING_SHIFT 10
118/* CF Transfer Address */
119#define XFER_ADDR 0x014
120 #define XFER_ADDR_MASK (0x7FF)
121 #define MAX_XFER_COUNT 0x20000u
122/* Transfer Control */
123#define XFER_CTR 0x01C
124 #define XFER_COUNT_MASK (0x3FFFF)
125 #define ADDR_INC_DISABLE (1 << 24)
126 #define XFER_WIDTH_MASK (1 << 25)
127 #define XFER_WIDTH_8B (0)
128 #define XFER_WIDTH_16B (1 << 25)
129
130 #define MEM_TYPE_MASK (1 << 26)
131 #define MEM_TYPE_COMMON (0)
132 #define MEM_TYPE_ATTRIBUTE (1 << 26)
133
134 #define MEM_IO_XFER_MASK (1 << 27)
135 #define MEM_XFER (0)
136 #define IO_XFER (1 << 27)
137
138 #define DMA_XFER_MODE (1 << 28)
139
140 #define AHB_BUS_NORMAL_PIO_OPRTN (~(1 << 29))
141 #define XFER_DIR_MASK (1 << 30)
142 #define XFER_READ (0)
143 #define XFER_WRITE (1 << 30)
144
145 #define XFER_START (1 << 31)
146/* Write Data Port */
147#define WRITE_PORT 0x024
148/* Read Data Port */
149#define READ_PORT 0x028
150/* ATA Data Port */
151#define ATA_DATA_PORT 0x030
152 #define ATA_DATA_PORT_MASK (0xFFFF)
153/* ATA Error/Features */
154#define ATA_ERR_FTR 0x034
155/* ATA Sector Count */
156#define ATA_SC 0x038
157/* ATA Sector Number */
158#define ATA_SN 0x03C
159/* ATA Cylinder Low */
160#define ATA_CL 0x040
161/* ATA Cylinder High */
162#define ATA_CH 0x044
163/* ATA Select Card/Head */
164#define ATA_SH 0x048
165/* ATA Status-Command */
166#define ATA_STS_CMD 0x04C
167/* ATA Alternate Status/Device Control */
168#define ATA_ASTS_DCTR 0x050
169/* Extended Write Data Port 0x200-0x3FC */
170#define EXT_WRITE_PORT 0x200
171/* Extended Read Data Port 0x400-0x5FC */
172#define EXT_READ_PORT 0x400
173 #define FIFO_SIZE 0x200u
174/* Global Interrupt Status */
175#define GIRQ_STS 0x800
176/* Global Interrupt Status enable */
177#define GIRQ_STS_EN 0x804
178/* Global Interrupt Signal enable */
179#define GIRQ_SGN_EN 0x808
180 #define GIRQ_CF (1)
181 #define GIRQ_XD (1 << 1)
182
183/* Compact Flash Controller Dev Structure */
184struct arasan_cf_dev {
185 /* pointer to ata_host structure */
186 struct ata_host *host;
187 /* clk structure, only if HAVE_CLK is defined */
188#ifdef CONFIG_HAVE_CLK
189 struct clk *clk;
190#endif
191
192 /* physical base address of controller */
193 dma_addr_t pbase;
194 /* virtual base address of controller */
195 void __iomem *vbase;
196 /* irq number*/
197 int irq;
198
199 /* status to be updated to framework regarding DMA transfer */
200 u8 dma_status;
201 /* Card is present or Not */
202 u8 card_present;
203
204 /* dma specific */
205 /* Completion for transfer complete interrupt from controller */
206 struct completion cf_completion;
207 /* Completion for DMA transfer complete. */
208 struct completion dma_completion;
209 /* Dma channel allocated */
210 struct dma_chan *dma_chan;
211 /* Mask for DMA transfers */
212 dma_cap_mask_t mask;
213 /* dma channel private data */
214 void *dma_priv;
215 /* DMA transfer work */
216 struct work_struct work;
217 /* DMA delayed finish work */
218 struct delayed_work dwork;
219 /* qc to be transferred using DMA */
220 struct ata_queued_cmd *qc;
221};
222
223static struct scsi_host_template arasan_cf_sht = {
224 ATA_BASE_SHT(DRIVER_NAME),
225 .sg_tablesize = SG_NONE,
226 .dma_boundary = 0xFFFFFFFFUL,
227};
228
229static void cf_dumpregs(struct arasan_cf_dev *acdev)
230{
231 struct device *dev = acdev->host->dev;
232
233 dev_dbg(dev, ": =========== REGISTER DUMP ===========");
234 dev_dbg(dev, ": CFI_STS: %x", readl(acdev->vbase + CFI_STS));
235 dev_dbg(dev, ": IRQ_STS: %x", readl(acdev->vbase + IRQ_STS));
236 dev_dbg(dev, ": IRQ_EN: %x", readl(acdev->vbase + IRQ_EN));
237 dev_dbg(dev, ": OP_MODE: %x", readl(acdev->vbase + OP_MODE));
238 dev_dbg(dev, ": CLK_CFG: %x", readl(acdev->vbase + CLK_CFG));
239 dev_dbg(dev, ": TM_CFG: %x", readl(acdev->vbase + TM_CFG));
240 dev_dbg(dev, ": XFER_CTR: %x", readl(acdev->vbase + XFER_CTR));
241 dev_dbg(dev, ": GIRQ_STS: %x", readl(acdev->vbase + GIRQ_STS));
242 dev_dbg(dev, ": GIRQ_STS_EN: %x", readl(acdev->vbase + GIRQ_STS_EN));
243 dev_dbg(dev, ": GIRQ_SGN_EN: %x", readl(acdev->vbase + GIRQ_SGN_EN));
244 dev_dbg(dev, ": =====================================");
245}
246
247/* Enable/Disable global interrupts shared between CF and XD ctrlr. */
248static void cf_ginterrupt_enable(struct arasan_cf_dev *acdev, bool enable)
249{
250 /* enable should be 0 or 1 */
251 writel(enable, acdev->vbase + GIRQ_STS_EN);
252 writel(enable, acdev->vbase + GIRQ_SGN_EN);
253}
254
255/* Enable/Disable CF interrupts */
256static inline void
257cf_interrupt_enable(struct arasan_cf_dev *acdev, u32 mask, bool enable)
258{
259 u32 val = readl(acdev->vbase + IRQ_EN);
260 /* clear & enable/disable irqs */
261 if (enable) {
262 writel(mask, acdev->vbase + IRQ_STS);
263 writel(val | mask, acdev->vbase + IRQ_EN);
264 } else
265 writel(val & ~mask, acdev->vbase + IRQ_EN);
266}
267
268static inline void cf_card_reset(struct arasan_cf_dev *acdev)
269{
270 u32 val = readl(acdev->vbase + OP_MODE);
271
272 writel(val | CARD_RESET, acdev->vbase + OP_MODE);
273 udelay(200);
274 writel(val & ~CARD_RESET, acdev->vbase + OP_MODE);
275}
276
277static inline void cf_ctrl_reset(struct arasan_cf_dev *acdev)
278{
279 writel(readl(acdev->vbase + OP_MODE) & ~CFHOST_ENB,
280 acdev->vbase + OP_MODE);
281 writel(readl(acdev->vbase + OP_MODE) | CFHOST_ENB,
282 acdev->vbase + OP_MODE);
283}
284
285static void cf_card_detect(struct arasan_cf_dev *acdev, bool hotplugged)
286{
287 struct ata_port *ap = acdev->host->ports[0];
288 struct ata_eh_info *ehi = &ap->link.eh_info;
289 u32 val = readl(acdev->vbase + CFI_STS);
290
291 /* Both CD1 & CD2 should be low if card inserted completely */
292 if (!(val & (CARD_DETECT1 | CARD_DETECT2))) {
293 if (acdev->card_present)
294 return;
295 acdev->card_present = 1;
296 cf_card_reset(acdev);
297 } else {
298 if (!acdev->card_present)
299 return;
300 acdev->card_present = 0;
301 }
302
303 if (hotplugged) {
304 ata_ehi_hotplugged(ehi);
305 ata_port_freeze(ap);
306 }
307}
308
309static int cf_init(struct arasan_cf_dev *acdev)
310{
311 struct arasan_cf_pdata *pdata = dev_get_platdata(acdev->host->dev);
312 unsigned long flags;
313 int ret = 0;
314
315#ifdef CONFIG_HAVE_CLK
316 ret = clk_enable(acdev->clk);
317 if (ret) {
318 dev_dbg(acdev->host->dev, "clock enable failed");
319 return ret;
320 }
321#endif
322
323 spin_lock_irqsave(&acdev->host->lock, flags);
324 /* configure CF interface clock */
325 writel((pdata->cf_if_clk <= CF_IF_CLK_200M) ? pdata->cf_if_clk :
326 CF_IF_CLK_166M, acdev->vbase + CLK_CFG);
327
328 writel(TRUE_IDE_MODE | CFHOST_ENB, acdev->vbase + OP_MODE);
329 cf_interrupt_enable(acdev, CARD_DETECT_IRQ, 1);
330 cf_ginterrupt_enable(acdev, 1);
331 spin_unlock_irqrestore(&acdev->host->lock, flags);
332
333 return ret;
334}
335
336static void cf_exit(struct arasan_cf_dev *acdev)
337{
338 unsigned long flags;
339
340 spin_lock_irqsave(&acdev->host->lock, flags);
341 cf_ginterrupt_enable(acdev, 0);
342 cf_interrupt_enable(acdev, TRUE_IDE_IRQS, 0);
343 cf_card_reset(acdev);
344 writel(readl(acdev->vbase + OP_MODE) & ~CFHOST_ENB,
345 acdev->vbase + OP_MODE);
346 spin_unlock_irqrestore(&acdev->host->lock, flags);
347#ifdef CONFIG_HAVE_CLK
348 clk_disable(acdev->clk);
349#endif
350}
351
352static void dma_callback(void *dev)
353{
354 struct arasan_cf_dev *acdev = (struct arasan_cf_dev *) dev;
355
356 complete(&acdev->dma_completion);
357}
358
359static bool filter(struct dma_chan *chan, void *slave)
360{
361 chan->private = slave;
362 return true;
363}
364
365static inline void dma_complete(struct arasan_cf_dev *acdev)
366{
367 struct ata_queued_cmd *qc = acdev->qc;
368 unsigned long flags;
369
370 acdev->qc = NULL;
371 ata_sff_interrupt(acdev->irq, acdev->host);
372
373 spin_lock_irqsave(&acdev->host->lock, flags);
374 if (unlikely(qc->err_mask) && ata_is_dma(qc->tf.protocol))
375 ata_ehi_push_desc(&qc->ap->link.eh_info, "DMA Failed: Timeout");
376 spin_unlock_irqrestore(&acdev->host->lock, flags);
377}
378
379static inline int wait4buf(struct arasan_cf_dev *acdev)
380{
381 if (!wait_for_completion_timeout(&acdev->cf_completion, TIMEOUT)) {
382 u32 rw = acdev->qc->tf.flags & ATA_TFLAG_WRITE;
383
384 dev_err(acdev->host->dev, "%s TimeOut", rw ? "write" : "read");
385 return -ETIMEDOUT;
386 }
387
388 /* Check if PIO Error interrupt has occured */
389 if (acdev->dma_status & ATA_DMA_ERR)
390 return -EAGAIN;
391
392 return 0;
393}
394
395static int
396dma_xfer(struct arasan_cf_dev *acdev, dma_addr_t src, dma_addr_t dest, u32 len)
397{
398 struct dma_async_tx_descriptor *tx;
399 struct dma_chan *chan = acdev->dma_chan;
400 dma_cookie_t cookie;
401 unsigned long flags = DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
402 DMA_COMPL_SKIP_DEST_UNMAP;
403 int ret = 0;
404
405 tx = chan->device->device_prep_dma_memcpy(chan, dest, src, len, flags);
406 if (!tx) {
407 dev_err(acdev->host->dev, "device_prep_dma_memcpy failed\n");
408 return -EAGAIN;
409 }
410
411 tx->callback = dma_callback;
412 tx->callback_param = acdev;
413 cookie = tx->tx_submit(tx);
414
415 ret = dma_submit_error(cookie);
416 if (ret) {
417 dev_err(acdev->host->dev, "dma_submit_error\n");
418 return ret;
419 }
420
421 chan->device->device_issue_pending(chan);
422
423 /* Wait for DMA to complete */
424 if (!wait_for_completion_timeout(&acdev->dma_completion, TIMEOUT)) {
425 chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
426 dev_err(acdev->host->dev, "wait_for_completion_timeout\n");
427 return -ETIMEDOUT;
428 }
429
430 return ret;
431}
432
433static int sg_xfer(struct arasan_cf_dev *acdev, struct scatterlist *sg)
434{
435 dma_addr_t dest = 0, src = 0;
436 u32 xfer_cnt, sglen, dma_len, xfer_ctr;
437 u32 write = acdev->qc->tf.flags & ATA_TFLAG_WRITE;
438 unsigned long flags;
439 int ret = 0;
440
441 sglen = sg_dma_len(sg);
442 if (write) {
443 src = sg_dma_address(sg);
444 dest = acdev->pbase + EXT_WRITE_PORT;
445 } else {
446 dest = sg_dma_address(sg);
447 src = acdev->pbase + EXT_READ_PORT;
448 }
449
450 /*
451 * For each sg:
452 * MAX_XFER_COUNT data will be transferred before we get transfer
453 * complete interrupt. Inbetween after FIFO_SIZE data
454 * buffer available interrupt will be generated. At this time we will
455 * fill FIFO again: max FIFO_SIZE data.
456 */
457 while (sglen) {
458 xfer_cnt = min(sglen, MAX_XFER_COUNT);
459 spin_lock_irqsave(&acdev->host->lock, flags);
460 xfer_ctr = readl(acdev->vbase + XFER_CTR) &
461 ~XFER_COUNT_MASK;
462 writel(xfer_ctr | xfer_cnt | XFER_START,
463 acdev->vbase + XFER_CTR);
464 spin_unlock_irqrestore(&acdev->host->lock, flags);
465
466 /* continue dma xfers untill current sg is completed */
467 while (xfer_cnt) {
468 /* wait for read to complete */
469 if (!write) {
470 ret = wait4buf(acdev);
471 if (ret)
472 goto fail;
473 }
474
475 /* read/write FIFO in chunk of FIFO_SIZE */
476 dma_len = min(xfer_cnt, FIFO_SIZE);
477 ret = dma_xfer(acdev, src, dest, dma_len);
478 if (ret) {
479 dev_err(acdev->host->dev, "dma failed");
480 goto fail;
481 }
482
483 if (write)
484 src += dma_len;
485 else
486 dest += dma_len;
487
488 sglen -= dma_len;
489 xfer_cnt -= dma_len;
490
491 /* wait for write to complete */
492 if (write) {
493 ret = wait4buf(acdev);
494 if (ret)
495 goto fail;
496 }
497 }
498 }
499
500fail:
501 spin_lock_irqsave(&acdev->host->lock, flags);
502 writel(readl(acdev->vbase + XFER_CTR) & ~XFER_START,
503 acdev->vbase + XFER_CTR);
504 spin_unlock_irqrestore(&acdev->host->lock, flags);
505
506 return ret;
507}
508
509/*
510 * This routine uses External DMA controller to read/write data to FIFO of CF
511 * controller. There are two xfer related interrupt supported by CF controller:
512 * - buf_avail: This interrupt is generated as soon as we have buffer of 512
513 * bytes available for reading or empty buffer available for writing.
514 * - xfer_done: This interrupt is generated on transfer of "xfer_size" amount of
515 * data to/from FIFO. xfer_size is programmed in XFER_CTR register.
516 *
517 * Max buffer size = FIFO_SIZE = 512 Bytes.
518 * Max xfer_size = MAX_XFER_COUNT = 256 KB.
519 */
520static void data_xfer(struct work_struct *work)
521{
522 struct arasan_cf_dev *acdev = container_of(work, struct arasan_cf_dev,
523 work);
524 struct ata_queued_cmd *qc = acdev->qc;
525 struct scatterlist *sg;
526 unsigned long flags;
527 u32 temp;
528 int ret = 0;
529
530 /* request dma channels */
531 /* dma_request_channel may sleep, so calling from process context */
532 acdev->dma_chan = dma_request_channel(acdev->mask, filter,
533 acdev->dma_priv);
534 if (!acdev->dma_chan) {
535 dev_err(acdev->host->dev, "Unable to get dma_chan\n");
536 goto chan_request_fail;
537 }
538
539 for_each_sg(qc->sg, sg, qc->n_elem, temp) {
540 ret = sg_xfer(acdev, sg);
541 if (ret)
542 break;
543 }
544
545 dma_release_channel(acdev->dma_chan);
546
547 /* data xferred successfully */
548 if (!ret) {
549 u32 status;
550
551 spin_lock_irqsave(&acdev->host->lock, flags);
552 status = ioread8(qc->ap->ioaddr.altstatus_addr);
553 spin_unlock_irqrestore(&acdev->host->lock, flags);
554 if (status & (ATA_BUSY | ATA_DRQ)) {
555 ata_sff_queue_delayed_work(&acdev->dwork, 1);
556 return;
557 }
558
559 goto sff_intr;
560 }
561
562 cf_dumpregs(acdev);
563
564chan_request_fail:
565 spin_lock_irqsave(&acdev->host->lock, flags);
566 /* error when transfering data to/from memory */
567 qc->err_mask |= AC_ERR_HOST_BUS;
568 qc->ap->hsm_task_state = HSM_ST_ERR;
569
570 cf_ctrl_reset(acdev);
571 spin_unlock_irqrestore(qc->ap->lock, flags);
572sff_intr:
573 dma_complete(acdev);
574}
575
576static void delayed_finish(struct work_struct *work)
577{
578 struct arasan_cf_dev *acdev = container_of(work, struct arasan_cf_dev,
579 dwork.work);
580 struct ata_queued_cmd *qc = acdev->qc;
581 unsigned long flags;
582 u8 status;
583
584 spin_lock_irqsave(&acdev->host->lock, flags);
585 status = ioread8(qc->ap->ioaddr.altstatus_addr);
586 spin_unlock_irqrestore(&acdev->host->lock, flags);
587
588 if (status & (ATA_BUSY | ATA_DRQ))
589 ata_sff_queue_delayed_work(&acdev->dwork, 1);
590 else
591 dma_complete(acdev);
592}
593
594static irqreturn_t arasan_cf_interrupt(int irq, void *dev)
595{
596 struct arasan_cf_dev *acdev = ((struct ata_host *)dev)->private_data;
597 unsigned long flags;
598 u32 irqsts;
599
600 irqsts = readl(acdev->vbase + GIRQ_STS);
601 if (!(irqsts & GIRQ_CF))
602 return IRQ_NONE;
603
604 spin_lock_irqsave(&acdev->host->lock, flags);
605 irqsts = readl(acdev->vbase + IRQ_STS);
606 writel(irqsts, acdev->vbase + IRQ_STS); /* clear irqs */
607 writel(GIRQ_CF, acdev->vbase + GIRQ_STS); /* clear girqs */
608
609 /* handle only relevant interrupts */
610 irqsts &= ~IGNORED_IRQS;
611
612 if (irqsts & CARD_DETECT_IRQ) {
613 cf_card_detect(acdev, 1);
614 spin_unlock_irqrestore(&acdev->host->lock, flags);
615 return IRQ_HANDLED;
616 }
617
618 if (irqsts & PIO_XFER_ERR_IRQ) {
619 acdev->dma_status = ATA_DMA_ERR;
620 writel(readl(acdev->vbase + XFER_CTR) & ~XFER_START,
621 acdev->vbase + XFER_CTR);
622 spin_unlock_irqrestore(&acdev->host->lock, flags);
623 complete(&acdev->cf_completion);
624 dev_err(acdev->host->dev, "pio xfer err irq\n");
625 return IRQ_HANDLED;
626 }
627
628 spin_unlock_irqrestore(&acdev->host->lock, flags);
629
630 if (irqsts & BUF_AVAIL_IRQ) {
631 complete(&acdev->cf_completion);
632 return IRQ_HANDLED;
633 }
634
635 if (irqsts & XFER_DONE_IRQ) {
636 struct ata_queued_cmd *qc = acdev->qc;
637
638 /* Send Complete only for write */
639 if (qc->tf.flags & ATA_TFLAG_WRITE)
640 complete(&acdev->cf_completion);
641 }
642
643 return IRQ_HANDLED;
644}
645
646static void arasan_cf_freeze(struct ata_port *ap)
647{
648 struct arasan_cf_dev *acdev = ap->host->private_data;
649
650 /* stop transfer and reset controller */
651 writel(readl(acdev->vbase + XFER_CTR) & ~XFER_START,
652 acdev->vbase + XFER_CTR);
653 cf_ctrl_reset(acdev);
654 acdev->dma_status = ATA_DMA_ERR;
655
656 ata_sff_dma_pause(ap);
657 ata_sff_freeze(ap);
658}
659
660void arasan_cf_error_handler(struct ata_port *ap)
661{
662 struct arasan_cf_dev *acdev = ap->host->private_data;
663
664 /*
665 * DMA transfers using an external DMA controller may be scheduled.
666 * Abort them before handling error. Refer data_xfer() for further
667 * details.
668 */
669 cancel_work_sync(&acdev->work);
670 cancel_delayed_work_sync(&acdev->dwork);
671 return ata_sff_error_handler(ap);
672}
673
674static void arasan_cf_dma_start(struct arasan_cf_dev *acdev)
675{
676 u32 xfer_ctr = readl(acdev->vbase + XFER_CTR) & ~XFER_DIR_MASK;
677 u32 write = acdev->qc->tf.flags & ATA_TFLAG_WRITE;
678
679 xfer_ctr |= write ? XFER_WRITE : XFER_READ;
680 writel(xfer_ctr, acdev->vbase + XFER_CTR);
681
682 acdev->qc->ap->ops->sff_exec_command(acdev->qc->ap, &acdev->qc->tf);
683 ata_sff_queue_work(&acdev->work);
684}
685
686unsigned int arasan_cf_qc_issue(struct ata_queued_cmd *qc)
687{
688 struct ata_port *ap = qc->ap;
689 struct arasan_cf_dev *acdev = ap->host->private_data;
690
691 /* defer PIO handling to sff_qc_issue */
692 if (!ata_is_dma(qc->tf.protocol))
693 return ata_sff_qc_issue(qc);
694
695 /* select the device */
696 ata_wait_idle(ap);
697 ata_sff_dev_select(ap, qc->dev->devno);
698 ata_wait_idle(ap);
699
700 /* start the command */
701 switch (qc->tf.protocol) {
702 case ATA_PROT_DMA:
703 WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
704
705 ap->ops->sff_tf_load(ap, &qc->tf);
706 acdev->dma_status = 0;
707 acdev->qc = qc;
708 arasan_cf_dma_start(acdev);
709 ap->hsm_task_state = HSM_ST_LAST;
710 break;
711
712 default:
713 WARN_ON(1);
714 return AC_ERR_SYSTEM;
715 }
716
717 return 0;
718}
719
720static void arasan_cf_set_piomode(struct ata_port *ap, struct ata_device *adev)
721{
722 struct arasan_cf_dev *acdev = ap->host->private_data;
723 u8 pio = adev->pio_mode - XFER_PIO_0;
724 unsigned long flags;
725 u32 val;
726
727 /* Arasan ctrl supports Mode0 -> Mode6 */
728 if (pio > 6) {
729 dev_err(ap->dev, "Unknown PIO mode\n");
730 return;
731 }
732
733 spin_lock_irqsave(&acdev->host->lock, flags);
734 val = readl(acdev->vbase + OP_MODE) &
735 ~(ULTRA_DMA_ENB | MULTI_WORD_DMA_ENB | DRQ_BLOCK_SIZE_MASK);
736 writel(val, acdev->vbase + OP_MODE);
737 val = readl(acdev->vbase + TM_CFG) & ~TRUEIDE_PIO_TIMING_MASK;
738 val |= pio << TRUEIDE_PIO_TIMING_SHIFT;
739 writel(val, acdev->vbase + TM_CFG);
740
741 cf_interrupt_enable(acdev, BUF_AVAIL_IRQ | XFER_DONE_IRQ, 0);
742 cf_interrupt_enable(acdev, PIO_XFER_ERR_IRQ, 1);
743 spin_unlock_irqrestore(&acdev->host->lock, flags);
744}
745
746static void arasan_cf_set_dmamode(struct ata_port *ap, struct ata_device *adev)
747{
748 struct arasan_cf_dev *acdev = ap->host->private_data;
749 u32 opmode, tmcfg, dma_mode = adev->dma_mode;
750 unsigned long flags;
751
752 spin_lock_irqsave(&acdev->host->lock, flags);
753 opmode = readl(acdev->vbase + OP_MODE) &
754 ~(MULTI_WORD_DMA_ENB | ULTRA_DMA_ENB);
755 tmcfg = readl(acdev->vbase + TM_CFG);
756
757 if ((dma_mode >= XFER_UDMA_0) && (dma_mode <= XFER_UDMA_6)) {
758 opmode |= ULTRA_DMA_ENB;
759 tmcfg &= ~ULTRA_DMA_TIMING_MASK;
760 tmcfg |= (dma_mode - XFER_UDMA_0) << ULTRA_DMA_TIMING_SHIFT;
761 } else if ((dma_mode >= XFER_MW_DMA_0) && (dma_mode <= XFER_MW_DMA_4)) {
762 opmode |= MULTI_WORD_DMA_ENB;
763 tmcfg &= ~TRUEIDE_MWORD_DMA_TIMING_MASK;
764 tmcfg |= (dma_mode - XFER_MW_DMA_0) <<
765 TRUEIDE_MWORD_DMA_TIMING_SHIFT;
766 } else {
767 dev_err(ap->dev, "Unknown DMA mode\n");
768 spin_unlock_irqrestore(&acdev->host->lock, flags);
769 return;
770 }
771
772 writel(opmode, acdev->vbase + OP_MODE);
773 writel(tmcfg, acdev->vbase + TM_CFG);
774 writel(DMA_XFER_MODE, acdev->vbase + XFER_CTR);
775
776 cf_interrupt_enable(acdev, PIO_XFER_ERR_IRQ, 0);
777 cf_interrupt_enable(acdev, BUF_AVAIL_IRQ | XFER_DONE_IRQ, 1);
778 spin_unlock_irqrestore(&acdev->host->lock, flags);
779}
780
781static struct ata_port_operations arasan_cf_ops = {
782 .inherits = &ata_sff_port_ops,
783 .freeze = arasan_cf_freeze,
784 .error_handler = arasan_cf_error_handler,
785 .qc_issue = arasan_cf_qc_issue,
786 .set_piomode = arasan_cf_set_piomode,
787 .set_dmamode = arasan_cf_set_dmamode,
788};
789
790static int __devinit arasan_cf_probe(struct platform_device *pdev)
791{
792 struct arasan_cf_dev *acdev;
793 struct arasan_cf_pdata *pdata = dev_get_platdata(&pdev->dev);
794 struct ata_host *host;
795 struct ata_port *ap;
796 struct resource *res;
797 irq_handler_t irq_handler = NULL;
798 int ret = 0;
799
800 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
801 if (!res)
802 return -EINVAL;
803
804 if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
805 DRIVER_NAME)) {
806 dev_warn(&pdev->dev, "Failed to get memory region resource\n");
807 return -ENOENT;
808 }
809
810 acdev = devm_kzalloc(&pdev->dev, sizeof(*acdev), GFP_KERNEL);
811 if (!acdev) {
812 dev_warn(&pdev->dev, "kzalloc fail\n");
813 return -ENOMEM;
814 }
815
816 /* if irq is 0, support only PIO */
817 acdev->irq = platform_get_irq(pdev, 0);
818 if (acdev->irq)
819 irq_handler = arasan_cf_interrupt;
820 else
821 pdata->quirk |= CF_BROKEN_MWDMA | CF_BROKEN_UDMA;
822
823 acdev->pbase = res->start;
824 acdev->vbase = devm_ioremap_nocache(&pdev->dev, res->start,
825 resource_size(res));
826 if (!acdev->vbase) {
827 dev_warn(&pdev->dev, "ioremap fail\n");
828 return -ENOMEM;
829 }
830
831#ifdef CONFIG_HAVE_CLK
832 acdev->clk = clk_get(&pdev->dev, NULL);
833 if (IS_ERR(acdev->clk)) {
834 dev_warn(&pdev->dev, "Clock not found\n");
835 return PTR_ERR(acdev->clk);
836 }
837#endif
838
839 /* allocate host */
840 host = ata_host_alloc(&pdev->dev, 1);
841 if (!host) {
842 ret = -ENOMEM;
843 dev_warn(&pdev->dev, "alloc host fail\n");
844 goto free_clk;
845 }
846
847 ap = host->ports[0];
848 host->private_data = acdev;
849 acdev->host = host;
850 ap->ops = &arasan_cf_ops;
851 ap->pio_mask = ATA_PIO6;
852 ap->mwdma_mask = ATA_MWDMA4;
853 ap->udma_mask = ATA_UDMA6;
854
855 init_completion(&acdev->cf_completion);
856 init_completion(&acdev->dma_completion);
857 INIT_WORK(&acdev->work, data_xfer);
858 INIT_DELAYED_WORK(&acdev->dwork, delayed_finish);
859 dma_cap_set(DMA_MEMCPY, acdev->mask);
860 acdev->dma_priv = pdata->dma_priv;
861
862 /* Handle platform specific quirks */
863 if (pdata->quirk) {
864 if (pdata->quirk & CF_BROKEN_PIO) {
865 ap->ops->set_piomode = NULL;
866 ap->pio_mask = 0;
867 }
868 if (pdata->quirk & CF_BROKEN_MWDMA)
869 ap->mwdma_mask = 0;
870 if (pdata->quirk & CF_BROKEN_UDMA)
871 ap->udma_mask = 0;
872 }
873 ap->flags |= ATA_FLAG_PIO_POLLING | ATA_FLAG_NO_ATAPI;
874
875 ap->ioaddr.cmd_addr = acdev->vbase + ATA_DATA_PORT;
876 ap->ioaddr.data_addr = acdev->vbase + ATA_DATA_PORT;
877 ap->ioaddr.error_addr = acdev->vbase + ATA_ERR_FTR;
878 ap->ioaddr.feature_addr = acdev->vbase + ATA_ERR_FTR;
879 ap->ioaddr.nsect_addr = acdev->vbase + ATA_SC;
880 ap->ioaddr.lbal_addr = acdev->vbase + ATA_SN;
881 ap->ioaddr.lbam_addr = acdev->vbase + ATA_CL;
882 ap->ioaddr.lbah_addr = acdev->vbase + ATA_CH;
883 ap->ioaddr.device_addr = acdev->vbase + ATA_SH;
884 ap->ioaddr.status_addr = acdev->vbase + ATA_STS_CMD;
885 ap->ioaddr.command_addr = acdev->vbase + ATA_STS_CMD;
886 ap->ioaddr.altstatus_addr = acdev->vbase + ATA_ASTS_DCTR;
887 ap->ioaddr.ctl_addr = acdev->vbase + ATA_ASTS_DCTR;
888
889 ata_port_desc(ap, "phy_addr %llx virt_addr %p",
890 (unsigned long long) res->start, acdev->vbase);
891
892 ret = cf_init(acdev);
893 if (ret)
894 goto free_clk;
895
896 cf_card_detect(acdev, 0);
897
898 return ata_host_activate(host, acdev->irq, irq_handler, 0,
899 &arasan_cf_sht);
900
901free_clk:
902#ifdef CONFIG_HAVE_CLK
903 clk_put(acdev->clk);
904#endif
905 return ret;
906}
907
908static int __devexit arasan_cf_remove(struct platform_device *pdev)
909{
910 struct ata_host *host = dev_get_drvdata(&pdev->dev);
911 struct arasan_cf_dev *acdev = host->ports[0]->private_data;
912
913 ata_host_detach(host);
914 cf_exit(acdev);
915#ifdef CONFIG_HAVE_CLK
916 clk_put(acdev->clk);
917#endif
918
919 return 0;
920}
921
922#ifdef CONFIG_PM
923static int arasan_cf_suspend(struct device *dev)
924{
925 struct platform_device *pdev = to_platform_device(dev);
926 struct ata_host *host = dev_get_drvdata(&pdev->dev);
927 struct arasan_cf_dev *acdev = host->ports[0]->private_data;
928
929 if (acdev->dma_chan) {
930 acdev->dma_chan->device->device_control(acdev->dma_chan,
931 DMA_TERMINATE_ALL, 0);
932 dma_release_channel(acdev->dma_chan);
933 }
934 cf_exit(acdev);
935 return ata_host_suspend(host, PMSG_SUSPEND);
936}
937
938static int arasan_cf_resume(struct device *dev)
939{
940 struct platform_device *pdev = to_platform_device(dev);
941 struct ata_host *host = dev_get_drvdata(&pdev->dev);
942 struct arasan_cf_dev *acdev = host->ports[0]->private_data;
943
944 cf_init(acdev);
945 ata_host_resume(host);
946
947 return 0;
948}
949
950static const struct dev_pm_ops arasan_cf_pm_ops = {
951 .suspend = arasan_cf_suspend,
952 .resume = arasan_cf_resume,
953};
954#endif
955
956static struct platform_driver arasan_cf_driver = {
957 .probe = arasan_cf_probe,
958 .remove = __devexit_p(arasan_cf_remove),
959 .driver = {
960 .name = DRIVER_NAME,
961 .owner = THIS_MODULE,
962#ifdef CONFIG_PM
963 .pm = &arasan_cf_pm_ops,
964#endif
965 },
966};
967
968static int __init arasan_cf_init(void)
969{
970 return platform_driver_register(&arasan_cf_driver);
971}
972module_init(arasan_cf_init);
973
974static void __exit arasan_cf_exit(void)
975{
976 platform_driver_unregister(&arasan_cf_driver);
977}
978module_exit(arasan_cf_exit);
979
980MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
981MODULE_DESCRIPTION("Arasan ATA Compact Flash driver");
982MODULE_LICENSE("GPL");
983MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/ata/pata_at32.c b/drivers/ata/pata_at32.c
index 66ce6a526f27..36f189c7ee8c 100644
--- a/drivers/ata/pata_at32.c
+++ b/drivers/ata/pata_at32.c
@@ -194,7 +194,7 @@ static int __init pata_at32_init_one(struct device *dev,
194 /* Setup ATA bindings */ 194 /* Setup ATA bindings */
195 ap->ops = &at32_port_ops; 195 ap->ops = &at32_port_ops;
196 ap->pio_mask = PIO_MASK; 196 ap->pio_mask = PIO_MASK;
197 ap->flags |= ATA_FLAG_MMIO | ATA_FLAG_SLAVE_POSS; 197 ap->flags |= ATA_FLAG_SLAVE_POSS;
198 198
199 /* 199 /*
200 * Since all 8-bit taskfile transfers has to go on the lower 200 * Since all 8-bit taskfile transfers has to go on the lower
diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index 7aed5c792597..e0b58b8dfe6f 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c
@@ -1454,9 +1454,7 @@ static struct ata_port_operations bfin_pata_ops = {
1454 1454
1455static struct ata_port_info bfin_port_info[] = { 1455static struct ata_port_info bfin_port_info[] = {
1456 { 1456 {
1457 .flags = ATA_FLAG_SLAVE_POSS 1457 .flags = ATA_FLAG_SLAVE_POSS,
1458 | ATA_FLAG_MMIO
1459 | ATA_FLAG_NO_LEGACY,
1460 .pio_mask = ATA_PIO4, 1458 .pio_mask = ATA_PIO4,
1461 .mwdma_mask = 0, 1459 .mwdma_mask = 0,
1462 .udma_mask = 0, 1460 .udma_mask = 0,
diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c
index 538ec38ba995..6c77d68dbd05 100644
--- a/drivers/ata/pata_hpt366.c
+++ b/drivers/ata/pata_hpt366.c
@@ -14,6 +14,7 @@
14 * Look into engine reset on timeout errors. Should not be required. 14 * Look into engine reset on timeout errors. Should not be required.
15 */ 15 */
16 16
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
17 18
18#include <linux/kernel.h> 19#include <linux/kernel.h>
19#include <linux/module.h> 20#include <linux/module.h>
@@ -25,7 +26,7 @@
25#include <linux/libata.h> 26#include <linux/libata.h>
26 27
27#define DRV_NAME "pata_hpt366" 28#define DRV_NAME "pata_hpt366"
28#define DRV_VERSION "0.6.10" 29#define DRV_VERSION "0.6.11"
29 30
30struct hpt_clock { 31struct hpt_clock {
31 u8 xfer_mode; 32 u8 xfer_mode;
@@ -160,8 +161,8 @@ static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr,
160 161
161 while (list[i] != NULL) { 162 while (list[i] != NULL) {
162 if (!strcmp(list[i], model_num)) { 163 if (!strcmp(list[i], model_num)) {
163 pr_warning(DRV_NAME ": %s is not supported for %s.\n", 164 pr_warn("%s is not supported for %s\n",
164 modestr, list[i]); 165 modestr, list[i]);
165 return 1; 166 return 1;
166 } 167 }
167 i++; 168 i++;
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index 4c5b5183225e..9620636aa405 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -14,6 +14,8 @@
14 * Look into engine reset on timeout errors. Should not be required. 14 * Look into engine reset on timeout errors. Should not be required.
15 */ 15 */
16 16
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
17#include <linux/kernel.h> 19#include <linux/kernel.h>
18#include <linux/module.h> 20#include <linux/module.h>
19#include <linux/pci.h> 21#include <linux/pci.h>
@@ -24,7 +26,7 @@
24#include <linux/libata.h> 26#include <linux/libata.h>
25 27
26#define DRV_NAME "pata_hpt37x" 28#define DRV_NAME "pata_hpt37x"
27#define DRV_VERSION "0.6.22" 29#define DRV_VERSION "0.6.23"
28 30
29struct hpt_clock { 31struct hpt_clock {
30 u8 xfer_speed; 32 u8 xfer_speed;
@@ -229,8 +231,8 @@ static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr,
229 231
230 while (list[i] != NULL) { 232 while (list[i] != NULL) {
231 if (!strcmp(list[i], model_num)) { 233 if (!strcmp(list[i], model_num)) {
232 pr_warning(DRV_NAME ": %s is not supported for %s.\n", 234 pr_warn("%s is not supported for %s\n",
233 modestr, list[i]); 235 modestr, list[i]);
234 return 1; 236 return 1;
235 } 237 }
236 i++; 238 i++;
@@ -863,8 +865,8 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
863 chip_table = &hpt372; 865 chip_table = &hpt372;
864 break; 866 break;
865 default: 867 default:
866 pr_err(DRV_NAME ": Unknown HPT366 subtype, " 868 pr_err("Unknown HPT366 subtype, please report (%d)\n",
867 "please report (%d).\n", rev); 869 rev);
868 return -ENODEV; 870 return -ENODEV;
869 } 871 }
870 break; 872 break;
@@ -904,8 +906,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
904 *ppi = &info_hpt374_fn1; 906 *ppi = &info_hpt374_fn1;
905 break; 907 break;
906 default: 908 default:
907 pr_err(DRV_NAME ": PCI table is bogus, please report (%d).\n", 909 pr_err("PCI table is bogus, please report (%d)\n", dev->device);
908 dev->device);
909 return -ENODEV; 910 return -ENODEV;
910 } 911 }
911 /* Ok so this is a chip we support */ 912 /* Ok so this is a chip we support */
@@ -953,7 +954,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
953 u8 sr; 954 u8 sr;
954 u32 total = 0; 955 u32 total = 0;
955 956
956 pr_warning(DRV_NAME ": BIOS has not set timing clocks.\n"); 957 pr_warn("BIOS has not set timing clocks\n");
957 958
958 /* This is the process the HPT371 BIOS is reported to use */ 959 /* This is the process the HPT371 BIOS is reported to use */
959 for (i = 0; i < 128; i++) { 960 for (i = 0; i < 128; i++) {
@@ -1009,7 +1010,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
1009 (f_high << 16) | f_low | 0x100); 1010 (f_high << 16) | f_low | 0x100);
1010 } 1011 }
1011 if (adjust == 8) { 1012 if (adjust == 8) {
1012 pr_err(DRV_NAME ": DPLL did not stabilize!\n"); 1013 pr_err("DPLL did not stabilize!\n");
1013 return -ENODEV; 1014 return -ENODEV;
1014 } 1015 }
1015 if (dpll == 3) 1016 if (dpll == 3)
@@ -1017,7 +1018,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
1017 else 1018 else
1018 private_data = (void *)hpt37x_timings_50; 1019 private_data = (void *)hpt37x_timings_50;
1019 1020
1020 pr_info(DRV_NAME ": bus clock %dMHz, using %dMHz DPLL.\n", 1021 pr_info("bus clock %dMHz, using %dMHz DPLL\n",
1021 MHz[clock_slot], MHz[dpll]); 1022 MHz[clock_slot], MHz[dpll]);
1022 } else { 1023 } else {
1023 private_data = (void *)chip_table->clocks[clock_slot]; 1024 private_data = (void *)chip_table->clocks[clock_slot];
@@ -1032,7 +1033,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
1032 if (clock_slot < 2 && ppi[0] == &info_hpt370a) 1033 if (clock_slot < 2 && ppi[0] == &info_hpt370a)
1033 ppi[0] = &info_hpt370a_33; 1034 ppi[0] = &info_hpt370a_33;
1034 1035
1035 pr_info(DRV_NAME ": %s using %dMHz bus clock.\n", 1036 pr_info("%s using %dMHz bus clock\n",
1036 chip_table->name, MHz[clock_slot]); 1037 chip_table->name, MHz[clock_slot]);
1037 } 1038 }
1038 1039
diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c
index eca68caf5f46..765f136d8cd3 100644
--- a/drivers/ata/pata_hpt3x2n.c
+++ b/drivers/ata/pata_hpt3x2n.c
@@ -15,6 +15,8 @@
15 * Work out best PLL policy 15 * Work out best PLL policy
16 */ 16 */
17 17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
18#include <linux/kernel.h> 20#include <linux/kernel.h>
19#include <linux/module.h> 21#include <linux/module.h>
20#include <linux/pci.h> 22#include <linux/pci.h>
@@ -25,7 +27,7 @@
25#include <linux/libata.h> 27#include <linux/libata.h>
26 28
27#define DRV_NAME "pata_hpt3x2n" 29#define DRV_NAME "pata_hpt3x2n"
28#define DRV_VERSION "0.3.14" 30#define DRV_VERSION "0.3.15"
29 31
30enum { 32enum {
31 HPT_PCI_FAST = (1 << 31), 33 HPT_PCI_FAST = (1 << 31),
@@ -418,7 +420,7 @@ static int hpt3x2n_pci_clock(struct pci_dev *pdev)
418 u16 sr; 420 u16 sr;
419 u32 total = 0; 421 u32 total = 0;
420 422
421 pr_warning(DRV_NAME ": BIOS clock data not set.\n"); 423 pr_warn("BIOS clock data not set\n");
422 424
423 /* This is the process the HPT371 BIOS is reported to use */ 425 /* This is the process the HPT371 BIOS is reported to use */
424 for (i = 0; i < 128; i++) { 426 for (i = 0; i < 128; i++) {
@@ -528,8 +530,7 @@ hpt372n:
528 ppi[0] = &info_hpt372n; 530 ppi[0] = &info_hpt372n;
529 break; 531 break;
530 default: 532 default:
531 pr_err(DRV_NAME ": PCI table is bogus, please report (%d).\n", 533 pr_err("PCI table is bogus, please report (%d)\n", dev->device);
532 dev->device);
533 return -ENODEV; 534 return -ENODEV;
534 } 535 }
535 536
@@ -578,11 +579,11 @@ hpt372n:
578 pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low); 579 pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low);
579 } 580 }
580 if (adjust == 8) { 581 if (adjust == 8) {
581 pr_err(DRV_NAME ": DPLL did not stabilize!\n"); 582 pr_err("DPLL did not stabilize!\n");
582 return -ENODEV; 583 return -ENODEV;
583 } 584 }
584 585
585 pr_info(DRV_NAME ": bus clock %dMHz, using 66MHz DPLL.\n", pci_mhz); 586 pr_info("bus clock %dMHz, using 66MHz DPLL\n", pci_mhz);
586 587
587 /* 588 /*
588 * Set our private data up. We only need a few flags 589 * Set our private data up. We only need a few flags
diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c
index b63d5e2d4628..24d7df81546b 100644
--- a/drivers/ata/pata_hpt3x3.c
+++ b/drivers/ata/pata_hpt3x3.c
@@ -151,7 +151,7 @@ static struct ata_port_operations hpt3x3_port_ops = {
151 .check_atapi_dma= hpt3x3_atapi_dma, 151 .check_atapi_dma= hpt3x3_atapi_dma,
152 .freeze = hpt3x3_freeze, 152 .freeze = hpt3x3_freeze,
153#endif 153#endif
154 154
155}; 155};
156 156
157/** 157/**
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index aa0e0c51cc08..2d15f2548a10 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -616,7 +616,7 @@ static void it821x_display_disk(int n, u8 *buf)
616 if (buf[52] > 4) /* No Disk */ 616 if (buf[52] > 4) /* No Disk */
617 return; 617 return;
618 618
619 ata_id_c_string((u16 *)buf, id, 0, 41); 619 ata_id_c_string((u16 *)buf, id, 0, 41);
620 620
621 if (buf[51]) { 621 if (buf[51]) {
622 mode = ffs(buf[51]); 622 mode = ffs(buf[51]);
@@ -910,7 +910,7 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
910 rc = pcim_enable_device(pdev); 910 rc = pcim_enable_device(pdev);
911 if (rc) 911 if (rc)
912 return rc; 912 return rc;
913 913
914 if (pdev->vendor == PCI_VENDOR_ID_RDC) { 914 if (pdev->vendor == PCI_VENDOR_ID_RDC) {
915 /* Deal with Vortex86SX */ 915 /* Deal with Vortex86SX */
916 if (pdev->revision == 0x11) 916 if (pdev->revision == 0x11)
diff --git a/drivers/ata/pata_ixp4xx_cf.c b/drivers/ata/pata_ixp4xx_cf.c
index ba54b089f98c..5253b271b3fe 100644
--- a/drivers/ata/pata_ixp4xx_cf.c
+++ b/drivers/ata/pata_ixp4xx_cf.c
@@ -177,7 +177,7 @@ static __devinit int ixp4xx_pata_probe(struct platform_device *pdev)
177 177
178 ap->ops = &ixp4xx_port_ops; 178 ap->ops = &ixp4xx_port_ops;
179 ap->pio_mask = ATA_PIO4; 179 ap->pio_mask = ATA_PIO4;
180 ap->flags |= ATA_FLAG_MMIO | ATA_FLAG_NO_LEGACY | ATA_FLAG_NO_ATAPI; 180 ap->flags |= ATA_FLAG_NO_ATAPI;
181 181
182 ixp4xx_setup_port(ap, data, cs0->start, cs1->start); 182 ixp4xx_setup_port(ap, data, cs0->start, cs1->start);
183 183
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 75b49d01780b..46f589edccdb 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -1053,8 +1053,7 @@ static int __devinit pata_macio_common_init(struct pata_macio_priv *priv,
1053 /* Allocate libata host for 1 port */ 1053 /* Allocate libata host for 1 port */
1054 memset(&pinfo, 0, sizeof(struct ata_port_info)); 1054 memset(&pinfo, 0, sizeof(struct ata_port_info));
1055 pmac_macio_calc_timing_masks(priv, &pinfo); 1055 pmac_macio_calc_timing_masks(priv, &pinfo);
1056 pinfo.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_MMIO | 1056 pinfo.flags = ATA_FLAG_SLAVE_POSS;
1057 ATA_FLAG_NO_LEGACY;
1058 pinfo.port_ops = &pata_macio_ops; 1057 pinfo.port_ops = &pata_macio_ops;
1059 pinfo.private_data = priv; 1058 pinfo.private_data = priv;
1060 1059
diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c
index dd38083dcbeb..75a6a0c0094f 100644
--- a/drivers/ata/pata_marvell.c
+++ b/drivers/ata/pata_marvell.c
@@ -38,7 +38,7 @@ static int marvell_pata_active(struct pci_dev *pdev)
38 38
39 /* We don't yet know how to do this for other devices */ 39 /* We don't yet know how to do this for other devices */
40 if (pdev->device != 0x6145) 40 if (pdev->device != 0x6145)
41 return 1; 41 return 1;
42 42
43 barp = pci_iomap(pdev, 5, 0x10); 43 barp = pci_iomap(pdev, 5, 0x10);
44 if (barp == NULL) 44 if (barp == NULL)
diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c
index cc50bd09aa26..e277a142138c 100644
--- a/drivers/ata/pata_ninja32.c
+++ b/drivers/ata/pata_ninja32.c
@@ -165,7 +165,7 @@ static int ninja32_reinit_one(struct pci_dev *pdev)
165 return rc; 165 return rc;
166 ninja32_program(host->iomap[0]); 166 ninja32_program(host->iomap[0]);
167 ata_host_resume(host); 167 ata_host_resume(host);
168 return 0; 168 return 0;
169} 169}
170#endif 170#endif
171 171
diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index fa1b95a9a7ff..220ddc90608f 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -848,8 +848,7 @@ static int __devinit octeon_cf_probe(struct platform_device *pdev)
848 cf_port->ap = ap; 848 cf_port->ap = ap;
849 ap->ops = &octeon_cf_ops; 849 ap->ops = &octeon_cf_ops;
850 ap->pio_mask = ATA_PIO6; 850 ap->pio_mask = ATA_PIO6;
851 ap->flags |= ATA_FLAG_MMIO | ATA_FLAG_NO_LEGACY 851 ap->flags |= ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING;
852 | ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING;
853 852
854 base = cs0 + ocd->base_region_bias; 853 base = cs0 + ocd->base_region_bias;
855 if (!ocd->is16bit) { 854 if (!ocd->is16bit) {
diff --git a/drivers/ata/pata_palmld.c b/drivers/ata/pata_palmld.c
index 11fb4ccc74b4..a2a73d953840 100644
--- a/drivers/ata/pata_palmld.c
+++ b/drivers/ata/pata_palmld.c
@@ -85,7 +85,7 @@ static __devinit int palmld_pata_probe(struct platform_device *pdev)
85 ap = host->ports[0]; 85 ap = host->ports[0];
86 ap->ops = &palmld_port_ops; 86 ap->ops = &palmld_port_ops;
87 ap->pio_mask = ATA_PIO4; 87 ap->pio_mask = ATA_PIO4;
88 ap->flags |= ATA_FLAG_MMIO | ATA_FLAG_NO_LEGACY | ATA_FLAG_PIO_POLLING; 88 ap->flags |= ATA_FLAG_PIO_POLLING;
89 89
90 /* memory mapping voodoo */ 90 /* memory mapping voodoo */
91 ap->ioaddr.cmd_addr = mem + 0x10; 91 ap->ioaddr.cmd_addr = mem + 0x10;
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 806292160b3f..29af660d968b 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -124,7 +124,7 @@ static unsigned int ata_data_xfer_8bit(struct ata_device *dev,
124 * reset will recover the device. 124 * reset will recover the device.
125 * 125 *
126 */ 126 */
127 127
128static void pcmcia_8bit_drain_fifo(struct ata_queued_cmd *qc) 128static void pcmcia_8bit_drain_fifo(struct ata_queued_cmd *qc)
129{ 129{
130 int count; 130 int count;
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index b18351122525..9765ace16921 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -150,8 +150,7 @@ static struct ata_port_operations pdc2027x_pata133_ops = {
150static struct ata_port_info pdc2027x_port_info[] = { 150static struct ata_port_info pdc2027x_port_info[] = {
151 /* PDC_UDMA_100 */ 151 /* PDC_UDMA_100 */
152 { 152 {
153 .flags = ATA_FLAG_NO_LEGACY | ATA_FLAG_SLAVE_POSS | 153 .flags = ATA_FLAG_SLAVE_POSS,
154 ATA_FLAG_MMIO,
155 .pio_mask = ATA_PIO4, 154 .pio_mask = ATA_PIO4,
156 .mwdma_mask = ATA_MWDMA2, 155 .mwdma_mask = ATA_MWDMA2,
157 .udma_mask = ATA_UDMA5, 156 .udma_mask = ATA_UDMA5,
@@ -159,8 +158,7 @@ static struct ata_port_info pdc2027x_port_info[] = {
159 }, 158 },
160 /* PDC_UDMA_133 */ 159 /* PDC_UDMA_133 */
161 { 160 {
162 .flags = ATA_FLAG_NO_LEGACY | ATA_FLAG_SLAVE_POSS | 161 .flags = ATA_FLAG_SLAVE_POSS,
163 ATA_FLAG_MMIO,
164 .pio_mask = ATA_PIO4, 162 .pio_mask = ATA_PIO4,
165 .mwdma_mask = ATA_MWDMA2, 163 .mwdma_mask = ATA_MWDMA2,
166 .udma_mask = ATA_UDMA6, 164 .udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c
index 1898c6ed4b4e..b4ede40f8ae1 100644
--- a/drivers/ata/pata_pxa.c
+++ b/drivers/ata/pata_pxa.c
@@ -292,7 +292,6 @@ static int __devinit pxa_ata_probe(struct platform_device *pdev)
292 ap->ops = &pxa_ata_port_ops; 292 ap->ops = &pxa_ata_port_ops;
293 ap->pio_mask = ATA_PIO4; 293 ap->pio_mask = ATA_PIO4;
294 ap->mwdma_mask = ATA_MWDMA2; 294 ap->mwdma_mask = ATA_MWDMA2;
295 ap->flags = ATA_FLAG_MMIO;
296 295
297 ap->ioaddr.cmd_addr = devm_ioremap(&pdev->dev, cmd_res->start, 296 ap->ioaddr.cmd_addr = devm_ioremap(&pdev->dev, cmd_res->start,
298 resource_size(cmd_res)); 297 resource_size(cmd_res));
diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c
index 0ffd631000b7..baeaf938d55b 100644
--- a/drivers/ata/pata_rb532_cf.c
+++ b/drivers/ata/pata_rb532_cf.c
@@ -91,7 +91,6 @@ static void rb532_pata_setup_ports(struct ata_host *ah)
91 91
92 ap->ops = &rb532_pata_port_ops; 92 ap->ops = &rb532_pata_port_ops;
93 ap->pio_mask = ATA_PIO4; 93 ap->pio_mask = ATA_PIO4;
94 ap->flags = ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO;
95 94
96 ap->ioaddr.cmd_addr = info->iobase + RB500_CF_REG_BASE; 95 ap->ioaddr.cmd_addr = info->iobase + RB500_CF_REG_BASE;
97 ap->ioaddr.ctl_addr = info->iobase + RB500_CF_REG_CTRL; 96 ap->ioaddr.ctl_addr = info->iobase + RB500_CF_REG_CTRL;
diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c
index 8a51d673e5b2..c446ae6055a3 100644
--- a/drivers/ata/pata_samsung_cf.c
+++ b/drivers/ata/pata_samsung_cf.c
@@ -531,7 +531,6 @@ static int __init pata_s3c_probe(struct platform_device *pdev)
531 } 531 }
532 532
533 ap = host->ports[0]; 533 ap = host->ports[0];
534 ap->flags |= ATA_FLAG_MMIO;
535 ap->pio_mask = ATA_PIO4; 534 ap->pio_mask = ATA_PIO4;
536 535
537 if (cpu_type == TYPE_S3C64XX) { 536 if (cpu_type == TYPE_S3C64XX) {
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index 093715c3273a..88ea9b677b47 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -959,7 +959,7 @@ static struct ata_port_operations scc_pata_ops = {
959 959
960static struct ata_port_info scc_port_info[] = { 960static struct ata_port_info scc_port_info[] = {
961 { 961 {
962 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_MMIO | ATA_FLAG_NO_LEGACY, 962 .flags = ATA_FLAG_SLAVE_POSS,
963 .pio_mask = ATA_PIO4, 963 .pio_mask = ATA_PIO4,
964 /* No MWDMA */ 964 /* No MWDMA */
965 .udma_mask = ATA_UDMA6, 965 .udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c
index 60cea13cccce..c04abc393fc5 100644
--- a/drivers/ata/pata_sis.c
+++ b/drivers/ata/pata_sis.c
@@ -593,7 +593,7 @@ static const struct ata_port_info sis_info133 = {
593 .port_ops = &sis_133_ops, 593 .port_ops = &sis_133_ops,
594}; 594};
595const struct ata_port_info sis_info133_for_sata = { 595const struct ata_port_info sis_info133_for_sata = {
596 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, 596 .flags = ATA_FLAG_SLAVE_POSS,
597 .pio_mask = ATA_PIO4, 597 .pio_mask = ATA_PIO4,
598 /* No MWDMA */ 598 /* No MWDMA */
599 .udma_mask = ATA_UDMA6, 599 .udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index adbe0426c8f0..1111712b3d7d 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -166,9 +166,7 @@ static struct ata_port_operations adma_ata_ops = {
166static struct ata_port_info adma_port_info[] = { 166static struct ata_port_info adma_port_info[] = {
167 /* board_1841_idx */ 167 /* board_1841_idx */
168 { 168 {
169 .flags = ATA_FLAG_SLAVE_POSS | 169 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_POLLING,
170 ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO |
171 ATA_FLAG_PIO_POLLING,
172 .pio_mask = ATA_PIO4_ONLY, 170 .pio_mask = ATA_PIO4_ONLY,
173 .udma_mask = ATA_UDMA4, 171 .udma_mask = ATA_UDMA4,
174 .port_ops = &adma_ata_ops, 172 .port_ops = &adma_ata_ops,
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index 6cf57c5c2b5f..712ab5a4922e 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -40,8 +40,11 @@
40#include <scsi/scsi_host.h> 40#include <scsi/scsi_host.h>
41#include <scsi/scsi_cmnd.h> 41#include <scsi/scsi_cmnd.h>
42 42
43/* These two are defined in "libata.h" */
44#undef DRV_NAME
45#undef DRV_VERSION
43#define DRV_NAME "sata-dwc" 46#define DRV_NAME "sata-dwc"
44#define DRV_VERSION "1.0" 47#define DRV_VERSION "1.3"
45 48
46/* SATA DMA driver Globals */ 49/* SATA DMA driver Globals */
47#define DMA_NUM_CHANS 1 50#define DMA_NUM_CHANS 1
@@ -333,11 +336,47 @@ static int dma_dwc_xfer_setup(struct scatterlist *sg, int num_elems,
333 void __iomem *addr, int dir); 336 void __iomem *addr, int dir);
334static void dma_dwc_xfer_start(int dma_ch); 337static void dma_dwc_xfer_start(int dma_ch);
335 338
339static const char *get_prot_descript(u8 protocol)
340{
341 switch ((enum ata_tf_protocols)protocol) {
342 case ATA_PROT_NODATA:
343 return "ATA no data";
344 case ATA_PROT_PIO:
345 return "ATA PIO";
346 case ATA_PROT_DMA:
347 return "ATA DMA";
348 case ATA_PROT_NCQ:
349 return "ATA NCQ";
350 case ATAPI_PROT_NODATA:
351 return "ATAPI no data";
352 case ATAPI_PROT_PIO:
353 return "ATAPI PIO";
354 case ATAPI_PROT_DMA:
355 return "ATAPI DMA";
356 default:
357 return "unknown";
358 }
359}
360
361static const char *get_dma_dir_descript(int dma_dir)
362{
363 switch ((enum dma_data_direction)dma_dir) {
364 case DMA_BIDIRECTIONAL:
365 return "bidirectional";
366 case DMA_TO_DEVICE:
367 return "to device";
368 case DMA_FROM_DEVICE:
369 return "from device";
370 default:
371 return "none";
372 }
373}
374
336static void sata_dwc_tf_dump(struct ata_taskfile *tf) 375static void sata_dwc_tf_dump(struct ata_taskfile *tf)
337{ 376{
338 dev_vdbg(host_pvt.dwc_dev, "taskfile cmd: 0x%02x protocol: %s flags:" 377 dev_vdbg(host_pvt.dwc_dev, "taskfile cmd: 0x%02x protocol: %s flags:"
339 "0x%lx device: %x\n", tf->command, ata_get_cmd_descript\ 378 "0x%lx device: %x\n", tf->command,
340 (tf->protocol), tf->flags, tf->device); 379 get_prot_descript(tf->protocol), tf->flags, tf->device);
341 dev_vdbg(host_pvt.dwc_dev, "feature: 0x%02x nsect: 0x%x lbal: 0x%x " 380 dev_vdbg(host_pvt.dwc_dev, "feature: 0x%02x nsect: 0x%x lbal: 0x%x "
342 "lbam: 0x%x lbah: 0x%x\n", tf->feature, tf->nsect, tf->lbal, 381 "lbam: 0x%x lbah: 0x%x\n", tf->feature, tf->nsect, tf->lbal,
343 tf->lbam, tf->lbah); 382 tf->lbam, tf->lbah);
@@ -715,7 +754,7 @@ static int dma_dwc_xfer_setup(struct scatterlist *sg, int num_elems,
715 /* Program the CTL register with src enable / dst enable */ 754 /* Program the CTL register with src enable / dst enable */
716 out_le32(&(host_pvt.sata_dma_regs->chan_regs[dma_ch].ctl.low), 755 out_le32(&(host_pvt.sata_dma_regs->chan_regs[dma_ch].ctl.low),
717 DMA_CTL_LLP_SRCEN | DMA_CTL_LLP_DSTEN); 756 DMA_CTL_LLP_SRCEN | DMA_CTL_LLP_DSTEN);
718 return 0; 757 return dma_ch;
719} 758}
720 759
721/* 760/*
@@ -967,7 +1006,7 @@ static irqreturn_t sata_dwc_isr(int irq, void *dev_instance)
967 } 1006 }
968 1007
969 dev_dbg(ap->dev, "%s non-NCQ cmd interrupt, protocol: %s\n", 1008 dev_dbg(ap->dev, "%s non-NCQ cmd interrupt, protocol: %s\n",
970 __func__, ata_get_cmd_descript(qc->tf.protocol)); 1009 __func__, get_prot_descript(qc->tf.protocol));
971DRVSTILLBUSY: 1010DRVSTILLBUSY:
972 if (ata_is_dma(qc->tf.protocol)) { 1011 if (ata_is_dma(qc->tf.protocol)) {
973 /* 1012 /*
@@ -1057,7 +1096,7 @@ DRVSTILLBUSY:
1057 1096
1058 /* Process completed command */ 1097 /* Process completed command */
1059 dev_dbg(ap->dev, "%s NCQ command, protocol: %s\n", __func__, 1098 dev_dbg(ap->dev, "%s NCQ command, protocol: %s\n", __func__,
1060 ata_get_cmd_descript(qc->tf.protocol)); 1099 get_prot_descript(qc->tf.protocol));
1061 if (ata_is_dma(qc->tf.protocol)) { 1100 if (ata_is_dma(qc->tf.protocol)) {
1062 host_pvt.dma_interrupt_count++; 1101 host_pvt.dma_interrupt_count++;
1063 if (hsdevp->dma_pending[tag] == \ 1102 if (hsdevp->dma_pending[tag] == \
@@ -1142,8 +1181,8 @@ static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status)
1142 if (tag > 0) { 1181 if (tag > 0) {
1143 dev_info(ap->dev, "%s tag=%u cmd=0x%02x dma dir=%s proto=%s " 1182 dev_info(ap->dev, "%s tag=%u cmd=0x%02x dma dir=%s proto=%s "
1144 "dmacr=0x%08x\n", __func__, qc->tag, qc->tf.command, 1183 "dmacr=0x%08x\n", __func__, qc->tag, qc->tf.command,
1145 ata_get_cmd_descript(qc->dma_dir), 1184 get_dma_dir_descript(qc->dma_dir),
1146 ata_get_cmd_descript(qc->tf.protocol), 1185 get_prot_descript(qc->tf.protocol),
1147 in_le32(&(hsdev->sata_dwc_regs->dmacr))); 1186 in_le32(&(hsdev->sata_dwc_regs->dmacr)));
1148 } 1187 }
1149#endif 1188#endif
@@ -1354,7 +1393,7 @@ static void sata_dwc_exec_command_by_tag(struct ata_port *ap,
1354 struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap); 1393 struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
1355 1394
1356 dev_dbg(ap->dev, "%s cmd(0x%02x): %s tag=%d\n", __func__, tf->command, 1395 dev_dbg(ap->dev, "%s cmd(0x%02x): %s tag=%d\n", __func__, tf->command,
1357 ata_get_cmd_descript(tf), tag); 1396 ata_get_cmd_descript(tf->command), tag);
1358 1397
1359 spin_lock_irqsave(&ap->host->lock, flags); 1398 spin_lock_irqsave(&ap->host->lock, flags);
1360 hsdevp->cmd_issued[tag] = cmd_issued; 1399 hsdevp->cmd_issued[tag] = cmd_issued;
@@ -1413,7 +1452,7 @@ static void sata_dwc_bmdma_start_by_tag(struct ata_queued_cmd *qc, u8 tag)
1413 1452
1414 dev_dbg(ap->dev, "%s qc=%p tag: %x cmd: 0x%02x dma_dir: %s " 1453 dev_dbg(ap->dev, "%s qc=%p tag: %x cmd: 0x%02x dma_dir: %s "
1415 "start_dma? %x\n", __func__, qc, tag, qc->tf.command, 1454 "start_dma? %x\n", __func__, qc, tag, qc->tf.command,
1416 ata_get_cmd_descript(qc->dma_dir), start_dma); 1455 get_dma_dir_descript(qc->dma_dir), start_dma);
1417 sata_dwc_tf_dump(&(qc->tf)); 1456 sata_dwc_tf_dump(&(qc->tf));
1418 1457
1419 if (start_dma) { 1458 if (start_dma) {
@@ -1462,10 +1501,9 @@ static void sata_dwc_qc_prep_by_tag(struct ata_queued_cmd *qc, u8 tag)
1462 int dma_chan; 1501 int dma_chan;
1463 struct sata_dwc_device *hsdev = HSDEV_FROM_AP(ap); 1502 struct sata_dwc_device *hsdev = HSDEV_FROM_AP(ap);
1464 struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap); 1503 struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
1465 int err;
1466 1504
1467 dev_dbg(ap->dev, "%s: port=%d dma dir=%s n_elem=%d\n", 1505 dev_dbg(ap->dev, "%s: port=%d dma dir=%s n_elem=%d\n",
1468 __func__, ap->port_no, ata_get_cmd_descript(qc->dma_dir), 1506 __func__, ap->port_no, get_dma_dir_descript(qc->dma_dir),
1469 qc->n_elem); 1507 qc->n_elem);
1470 1508
1471 dma_chan = dma_dwc_xfer_setup(sg, qc->n_elem, hsdevp->llit[tag], 1509 dma_chan = dma_dwc_xfer_setup(sg, qc->n_elem, hsdevp->llit[tag],
@@ -1474,7 +1512,7 @@ static void sata_dwc_qc_prep_by_tag(struct ata_queued_cmd *qc, u8 tag)
1474 dmadr), qc->dma_dir); 1512 dmadr), qc->dma_dir);
1475 if (dma_chan < 0) { 1513 if (dma_chan < 0) {
1476 dev_err(ap->dev, "%s: dma_dwc_xfer_setup returns err %d\n", 1514 dev_err(ap->dev, "%s: dma_dwc_xfer_setup returns err %d\n",
1477 __func__, err); 1515 __func__, dma_chan);
1478 return; 1516 return;
1479 } 1517 }
1480 hsdevp->dma_chan[tag] = dma_chan; 1518 hsdevp->dma_chan[tag] = dma_chan;
@@ -1491,8 +1529,8 @@ static unsigned int sata_dwc_qc_issue(struct ata_queued_cmd *qc)
1491 dev_info(ap->dev, "%s ap id=%d cmd(0x%02x)=%s qc tag=%d " 1529 dev_info(ap->dev, "%s ap id=%d cmd(0x%02x)=%s qc tag=%d "
1492 "prot=%s ap active_tag=0x%08x ap sactive=0x%08x\n", 1530 "prot=%s ap active_tag=0x%08x ap sactive=0x%08x\n",
1493 __func__, ap->print_id, qc->tf.command, 1531 __func__, ap->print_id, qc->tf.command,
1494 ata_get_cmd_descript(&qc->tf), 1532 ata_get_cmd_descript(qc->tf.command),
1495 qc->tag, ata_get_cmd_descript(qc->tf.protocol), 1533 qc->tag, get_prot_descript(qc->tf.protocol),
1496 ap->link.active_tag, ap->link.sactive); 1534 ap->link.active_tag, ap->link.sactive);
1497#endif 1535#endif
1498 1536
@@ -1533,7 +1571,7 @@ static void sata_dwc_qc_prep(struct ata_queued_cmd *qc)
1533#ifdef DEBUG_NCQ 1571#ifdef DEBUG_NCQ
1534 if (qc->tag > 0) 1572 if (qc->tag > 0)
1535 dev_info(qc->ap->dev, "%s: qc->tag=%d ap->active_tag=0x%08x\n", 1573 dev_info(qc->ap->dev, "%s: qc->tag=%d ap->active_tag=0x%08x\n",
1536 __func__, tag, qc->ap->link.active_tag); 1574 __func__, qc->tag, qc->ap->link.active_tag);
1537 1575
1538 return ; 1576 return ;
1539#endif 1577#endif
@@ -1580,9 +1618,8 @@ static struct ata_port_operations sata_dwc_ops = {
1580 1618
1581static const struct ata_port_info sata_dwc_port_info[] = { 1619static const struct ata_port_info sata_dwc_port_info[] = {
1582 { 1620 {
1583 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 1621 .flags = ATA_FLAG_SATA | ATA_FLAG_NCQ,
1584 ATA_FLAG_MMIO | ATA_FLAG_NCQ, 1622 .pio_mask = ATA_PIO4,
1585 .pio_mask = 0x1f, /* pio 0-4 */
1586 .udma_mask = ATA_UDMA6, 1623 .udma_mask = ATA_UDMA6,
1587 .port_ops = &sata_dwc_ops, 1624 .port_ops = &sata_dwc_ops,
1588 }, 1625 },
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index b0214d00d50b..7f9eab34a386 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -33,8 +33,7 @@ enum {
33 SATA_FSL_MAX_PRD_USABLE = SATA_FSL_MAX_PRD - 1, 33 SATA_FSL_MAX_PRD_USABLE = SATA_FSL_MAX_PRD - 1,
34 SATA_FSL_MAX_PRD_DIRECT = 16, /* Direct PRDT entries */ 34 SATA_FSL_MAX_PRD_DIRECT = 16, /* Direct PRDT entries */
35 35
36 SATA_FSL_HOST_FLAGS = (ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 36 SATA_FSL_HOST_FLAGS = (ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
37 ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA |
38 ATA_FLAG_PMP | ATA_FLAG_NCQ | ATA_FLAG_AN), 37 ATA_FLAG_PMP | ATA_FLAG_NCQ | ATA_FLAG_AN),
39 38
40 SATA_FSL_MAX_CMDS = SATA_FSL_QUEUE_DEPTH, 39 SATA_FSL_MAX_CMDS = SATA_FSL_QUEUE_DEPTH,
@@ -186,6 +185,11 @@ enum {
186 COMMANDSTAT = 0x20, 185 COMMANDSTAT = 0x20,
187}; 186};
188 187
188/* TRANSCFG (transport-layer) configuration control */
189enum {
190 TRANSCFG_RX_WATER_MARK = (1 << 4),
191};
192
189/* PHY (link-layer) configuration control */ 193/* PHY (link-layer) configuration control */
190enum { 194enum {
191 PHY_BIST_ENABLE = 0x01, 195 PHY_BIST_ENABLE = 0x01,
@@ -1040,12 +1044,15 @@ static void sata_fsl_error_intr(struct ata_port *ap)
1040 1044
1041 /* find out the offending link and qc */ 1045 /* find out the offending link and qc */
1042 if (ap->nr_pmp_links) { 1046 if (ap->nr_pmp_links) {
1047 unsigned int dev_num;
1048
1043 dereg = ioread32(hcr_base + DE); 1049 dereg = ioread32(hcr_base + DE);
1044 iowrite32(dereg, hcr_base + DE); 1050 iowrite32(dereg, hcr_base + DE);
1045 iowrite32(cereg, hcr_base + CE); 1051 iowrite32(cereg, hcr_base + CE);
1046 1052
1047 if (dereg < ap->nr_pmp_links) { 1053 dev_num = ffs(dereg) - 1;
1048 link = &ap->pmp_link[dereg]; 1054 if (dev_num < ap->nr_pmp_links && dereg != 0) {
1055 link = &ap->pmp_link[dev_num];
1049 ehi = &link->eh_info; 1056 ehi = &link->eh_info;
1050 qc = ata_qc_from_tag(ap, link->active_tag); 1057 qc = ata_qc_from_tag(ap, link->active_tag);
1051 /* 1058 /*
@@ -1303,6 +1310,7 @@ static int sata_fsl_probe(struct platform_device *ofdev,
1303 struct sata_fsl_host_priv *host_priv = NULL; 1310 struct sata_fsl_host_priv *host_priv = NULL;
1304 int irq; 1311 int irq;
1305 struct ata_host *host; 1312 struct ata_host *host;
1313 u32 temp;
1306 1314
1307 struct ata_port_info pi = sata_fsl_port_info[0]; 1315 struct ata_port_info pi = sata_fsl_port_info[0];
1308 const struct ata_port_info *ppi[] = { &pi, NULL }; 1316 const struct ata_port_info *ppi[] = { &pi, NULL };
@@ -1317,6 +1325,12 @@ static int sata_fsl_probe(struct platform_device *ofdev,
1317 ssr_base = hcr_base + 0x100; 1325 ssr_base = hcr_base + 0x100;
1318 csr_base = hcr_base + 0x140; 1326 csr_base = hcr_base + 0x140;
1319 1327
1328 if (!of_device_is_compatible(ofdev->dev.of_node, "fsl,mpc8315-sata")) {
1329 temp = ioread32(csr_base + TRANSCFG);
1330 temp = temp & 0xffffffe0;
1331 iowrite32(temp | TRANSCFG_RX_WATER_MARK, csr_base + TRANSCFG);
1332 }
1333
1320 DPRINTK("@reset i/o = 0x%x\n", ioread32(csr_base + TRANSCFG)); 1334 DPRINTK("@reset i/o = 0x%x\n", ioread32(csr_base + TRANSCFG));
1321 DPRINTK("sizeof(cmd_desc) = %d\n", sizeof(struct command_desc)); 1335 DPRINTK("sizeof(cmd_desc) = %d\n", sizeof(struct command_desc));
1322 DPRINTK("sizeof(#define cmd_desc) = %d\n", SATA_FSL_CMD_DESC_SIZE); 1336 DPRINTK("sizeof(#define cmd_desc) = %d\n", SATA_FSL_CMD_DESC_SIZE);
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index bf74a36d3cc3..cd40651e9b72 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -160,8 +160,7 @@ enum {
160 /* Host Flags */ 160 /* Host Flags */
161 MV_FLAG_DUAL_HC = (1 << 30), /* two SATA Host Controllers */ 161 MV_FLAG_DUAL_HC = (1 << 30), /* two SATA Host Controllers */
162 162
163 MV_COMMON_FLAGS = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 163 MV_COMMON_FLAGS = ATA_FLAG_SATA | ATA_FLAG_PIO_POLLING,
164 ATA_FLAG_MMIO | ATA_FLAG_PIO_POLLING,
165 164
166 MV_GEN_I_FLAGS = MV_COMMON_FLAGS | ATA_FLAG_NO_ATAPI, 165 MV_GEN_I_FLAGS = MV_COMMON_FLAGS | ATA_FLAG_NO_ATAPI,
167 166
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 7254e255fd78..42344e3c686d 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -539,7 +539,7 @@ struct nv_pi_priv {
539static const struct ata_port_info nv_port_info[] = { 539static const struct ata_port_info nv_port_info[] = {
540 /* generic */ 540 /* generic */
541 { 541 {
542 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, 542 .flags = ATA_FLAG_SATA,
543 .pio_mask = NV_PIO_MASK, 543 .pio_mask = NV_PIO_MASK,
544 .mwdma_mask = NV_MWDMA_MASK, 544 .mwdma_mask = NV_MWDMA_MASK,
545 .udma_mask = NV_UDMA_MASK, 545 .udma_mask = NV_UDMA_MASK,
@@ -548,7 +548,7 @@ static const struct ata_port_info nv_port_info[] = {
548 }, 548 },
549 /* nforce2/3 */ 549 /* nforce2/3 */
550 { 550 {
551 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, 551 .flags = ATA_FLAG_SATA,
552 .pio_mask = NV_PIO_MASK, 552 .pio_mask = NV_PIO_MASK,
553 .mwdma_mask = NV_MWDMA_MASK, 553 .mwdma_mask = NV_MWDMA_MASK,
554 .udma_mask = NV_UDMA_MASK, 554 .udma_mask = NV_UDMA_MASK,
@@ -557,7 +557,7 @@ static const struct ata_port_info nv_port_info[] = {
557 }, 557 },
558 /* ck804 */ 558 /* ck804 */
559 { 559 {
560 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, 560 .flags = ATA_FLAG_SATA,
561 .pio_mask = NV_PIO_MASK, 561 .pio_mask = NV_PIO_MASK,
562 .mwdma_mask = NV_MWDMA_MASK, 562 .mwdma_mask = NV_MWDMA_MASK,
563 .udma_mask = NV_UDMA_MASK, 563 .udma_mask = NV_UDMA_MASK,
@@ -566,8 +566,7 @@ static const struct ata_port_info nv_port_info[] = {
566 }, 566 },
567 /* ADMA */ 567 /* ADMA */
568 { 568 {
569 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 569 .flags = ATA_FLAG_SATA | ATA_FLAG_NCQ,
570 ATA_FLAG_MMIO | ATA_FLAG_NCQ,
571 .pio_mask = NV_PIO_MASK, 570 .pio_mask = NV_PIO_MASK,
572 .mwdma_mask = NV_MWDMA_MASK, 571 .mwdma_mask = NV_MWDMA_MASK,
573 .udma_mask = NV_UDMA_MASK, 572 .udma_mask = NV_UDMA_MASK,
@@ -576,7 +575,7 @@ static const struct ata_port_info nv_port_info[] = {
576 }, 575 },
577 /* MCP5x */ 576 /* MCP5x */
578 { 577 {
579 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, 578 .flags = ATA_FLAG_SATA,
580 .pio_mask = NV_PIO_MASK, 579 .pio_mask = NV_PIO_MASK,
581 .mwdma_mask = NV_MWDMA_MASK, 580 .mwdma_mask = NV_MWDMA_MASK,
582 .udma_mask = NV_UDMA_MASK, 581 .udma_mask = NV_UDMA_MASK,
@@ -585,8 +584,7 @@ static const struct ata_port_info nv_port_info[] = {
585 }, 584 },
586 /* SWNCQ */ 585 /* SWNCQ */
587 { 586 {
588 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 587 .flags = ATA_FLAG_SATA | ATA_FLAG_NCQ,
589 ATA_FLAG_NCQ,
590 .pio_mask = NV_PIO_MASK, 588 .pio_mask = NV_PIO_MASK,
591 .mwdma_mask = NV_MWDMA_MASK, 589 .mwdma_mask = NV_MWDMA_MASK,
592 .udma_mask = NV_UDMA_MASK, 590 .udma_mask = NV_UDMA_MASK,
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index f03ad48273ff..a004b1e0ea6d 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -134,9 +134,7 @@ enum {
134 PDC_IRQ_DISABLE = (1 << 10), 134 PDC_IRQ_DISABLE = (1 << 10),
135 PDC_RESET = (1 << 11), /* HDMA reset */ 135 PDC_RESET = (1 << 11), /* HDMA reset */
136 136
137 PDC_COMMON_FLAGS = ATA_FLAG_NO_LEGACY | 137 PDC_COMMON_FLAGS = ATA_FLAG_PIO_POLLING,
138 ATA_FLAG_MMIO |
139 ATA_FLAG_PIO_POLLING,
140 138
141 /* ap->flags bits */ 139 /* ap->flags bits */
142 PDC_FLAG_GEN_II = (1 << 24), 140 PDC_FLAG_GEN_II = (1 << 24),
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index daeebf19a6a9..c5603265fa58 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -155,8 +155,7 @@ static struct ata_port_operations qs_ata_ops = {
155static const struct ata_port_info qs_port_info[] = { 155static const struct ata_port_info qs_port_info[] = {
156 /* board_2068_idx */ 156 /* board_2068_idx */
157 { 157 {
158 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 158 .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_POLLING,
159 ATA_FLAG_MMIO | ATA_FLAG_PIO_POLLING,
160 .pio_mask = ATA_PIO4_ONLY, 159 .pio_mask = ATA_PIO4_ONLY,
161 .udma_mask = ATA_UDMA6, 160 .udma_mask = ATA_UDMA6,
162 .port_ops = &qs_ata_ops, 161 .port_ops = &qs_ata_ops,
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 3a4f84219719..b42edaaf3a53 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -61,8 +61,7 @@ enum {
61 SIL_FLAG_RERR_ON_DMA_ACT = (1 << 29), 61 SIL_FLAG_RERR_ON_DMA_ACT = (1 << 29),
62 SIL_FLAG_MOD15WRITE = (1 << 30), 62 SIL_FLAG_MOD15WRITE = (1 << 30),
63 63
64 SIL_DFL_PORT_FLAGS = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 64 SIL_DFL_PORT_FLAGS = ATA_FLAG_SATA,
65 ATA_FLAG_MMIO,
66 65
67 /* 66 /*
68 * Controller IDs 67 * Controller IDs
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index af41c6fd1254..06c564e55051 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -244,8 +244,7 @@ enum {
244 BID_SIL3131 = 2, 244 BID_SIL3131 = 2,
245 245
246 /* host flags */ 246 /* host flags */
247 SIL24_COMMON_FLAGS = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 247 SIL24_COMMON_FLAGS = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
248 ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA |
249 ATA_FLAG_NCQ | ATA_FLAG_ACPI_SATA | 248 ATA_FLAG_NCQ | ATA_FLAG_ACPI_SATA |
250 ATA_FLAG_AN | ATA_FLAG_PMP, 249 ATA_FLAG_AN | ATA_FLAG_PMP,
251 SIL24_FLAG_PCIX_IRQ_WOC = (1 << 24), /* IRQ loss errata on PCI-X */ 250 SIL24_FLAG_PCIX_IRQ_WOC = (1 << 24), /* IRQ loss errata on PCI-X */
diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index 2bfe3ae03976..cdcc13e9cf51 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c
@@ -96,7 +96,7 @@ static struct ata_port_operations sis_ops = {
96}; 96};
97 97
98static const struct ata_port_info sis_port_info = { 98static const struct ata_port_info sis_port_info = {
99 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, 99 .flags = ATA_FLAG_SATA,
100 .pio_mask = ATA_PIO4, 100 .pio_mask = ATA_PIO4,
101 .mwdma_mask = ATA_MWDMA2, 101 .mwdma_mask = ATA_MWDMA2,
102 .udma_mask = ATA_UDMA6, 102 .udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index 7d9db4aaf07e..35eabcf34568 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c
@@ -359,8 +359,7 @@ static struct ata_port_operations k2_sata_ops = {
359static const struct ata_port_info k2_port_info[] = { 359static const struct ata_port_info k2_port_info[] = {
360 /* chip_svw4 */ 360 /* chip_svw4 */
361 { 361 {
362 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 362 .flags = ATA_FLAG_SATA | K2_FLAG_NO_ATAPI_DMA,
363 ATA_FLAG_MMIO | K2_FLAG_NO_ATAPI_DMA,
364 .pio_mask = ATA_PIO4, 363 .pio_mask = ATA_PIO4,
365 .mwdma_mask = ATA_MWDMA2, 364 .mwdma_mask = ATA_MWDMA2,
366 .udma_mask = ATA_UDMA6, 365 .udma_mask = ATA_UDMA6,
@@ -368,8 +367,7 @@ static const struct ata_port_info k2_port_info[] = {
368 }, 367 },
369 /* chip_svw8 */ 368 /* chip_svw8 */
370 { 369 {
371 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 370 .flags = ATA_FLAG_SATA | K2_FLAG_NO_ATAPI_DMA |
372 ATA_FLAG_MMIO | K2_FLAG_NO_ATAPI_DMA |
373 K2_FLAG_SATA_8_PORTS, 371 K2_FLAG_SATA_8_PORTS,
374 .pio_mask = ATA_PIO4, 372 .pio_mask = ATA_PIO4,
375 .mwdma_mask = ATA_MWDMA2, 373 .mwdma_mask = ATA_MWDMA2,
@@ -378,8 +376,7 @@ static const struct ata_port_info k2_port_info[] = {
378 }, 376 },
379 /* chip_svw42 */ 377 /* chip_svw42 */
380 { 378 {
381 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 379 .flags = ATA_FLAG_SATA | K2_FLAG_BAR_POS_3,
382 ATA_FLAG_MMIO | K2_FLAG_BAR_POS_3,
383 .pio_mask = ATA_PIO4, 380 .pio_mask = ATA_PIO4,
384 .mwdma_mask = ATA_MWDMA2, 381 .mwdma_mask = ATA_MWDMA2,
385 .udma_mask = ATA_UDMA6, 382 .udma_mask = ATA_UDMA6,
@@ -387,8 +384,7 @@ static const struct ata_port_info k2_port_info[] = {
387 }, 384 },
388 /* chip_svw43 */ 385 /* chip_svw43 */
389 { 386 {
390 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 387 .flags = ATA_FLAG_SATA,
391 ATA_FLAG_MMIO,
392 .pio_mask = ATA_PIO4, 388 .pio_mask = ATA_PIO4,
393 .mwdma_mask = ATA_MWDMA2, 389 .mwdma_mask = ATA_MWDMA2,
394 .udma_mask = ATA_UDMA6, 390 .udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index bedd5188e5b0..8fd3b7252bda 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -273,9 +273,8 @@ static struct ata_port_operations pdc_20621_ops = {
273static const struct ata_port_info pdc_port_info[] = { 273static const struct ata_port_info pdc_port_info[] = {
274 /* board_20621 */ 274 /* board_20621 */
275 { 275 {
276 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 276 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_ATAPI |
277 ATA_FLAG_SRST | ATA_FLAG_MMIO | 277 ATA_FLAG_PIO_POLLING,
278 ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING,
279 .pio_mask = ATA_PIO4, 278 .pio_mask = ATA_PIO4,
280 .mwdma_mask = ATA_MWDMA2, 279 .mwdma_mask = ATA_MWDMA2,
281 .udma_mask = ATA_UDMA6, 280 .udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index b8578c32d344..235be717a713 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c
@@ -88,8 +88,7 @@ static struct ata_port_operations uli_ops = {
88}; 88};
89 89
90static const struct ata_port_info uli_port_info = { 90static const struct ata_port_info uli_port_info = {
91 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 91 .flags = ATA_FLAG_SATA | ATA_FLAG_IGN_SIMPLEX,
92 ATA_FLAG_IGN_SIMPLEX,
93 .pio_mask = ATA_PIO4, 92 .pio_mask = ATA_PIO4,
94 .udma_mask = ATA_UDMA6, 93 .udma_mask = ATA_UDMA6,
95 .port_ops = &uli_ops, 94 .port_ops = &uli_ops,
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 8b677bbf2d37..21242c5709a0 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -148,7 +148,7 @@ static struct ata_port_operations vt8251_ops = {
148}; 148};
149 149
150static const struct ata_port_info vt6420_port_info = { 150static const struct ata_port_info vt6420_port_info = {
151 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, 151 .flags = ATA_FLAG_SATA,
152 .pio_mask = ATA_PIO4, 152 .pio_mask = ATA_PIO4,
153 .mwdma_mask = ATA_MWDMA2, 153 .mwdma_mask = ATA_MWDMA2,
154 .udma_mask = ATA_UDMA6, 154 .udma_mask = ATA_UDMA6,
@@ -156,7 +156,7 @@ static const struct ata_port_info vt6420_port_info = {
156}; 156};
157 157
158static struct ata_port_info vt6421_sport_info = { 158static struct ata_port_info vt6421_sport_info = {
159 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, 159 .flags = ATA_FLAG_SATA,
160 .pio_mask = ATA_PIO4, 160 .pio_mask = ATA_PIO4,
161 .mwdma_mask = ATA_MWDMA2, 161 .mwdma_mask = ATA_MWDMA2,
162 .udma_mask = ATA_UDMA6, 162 .udma_mask = ATA_UDMA6,
@@ -164,7 +164,7 @@ static struct ata_port_info vt6421_sport_info = {
164}; 164};
165 165
166static struct ata_port_info vt6421_pport_info = { 166static struct ata_port_info vt6421_pport_info = {
167 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_LEGACY, 167 .flags = ATA_FLAG_SLAVE_POSS,
168 .pio_mask = ATA_PIO4, 168 .pio_mask = ATA_PIO4,
169 /* No MWDMA */ 169 /* No MWDMA */
170 .udma_mask = ATA_UDMA6, 170 .udma_mask = ATA_UDMA6,
@@ -172,8 +172,7 @@ static struct ata_port_info vt6421_pport_info = {
172}; 172};
173 173
174static struct ata_port_info vt8251_port_info = { 174static struct ata_port_info vt8251_port_info = {
175 .flags = ATA_FLAG_SATA | ATA_FLAG_SLAVE_POSS | 175 .flags = ATA_FLAG_SATA | ATA_FLAG_SLAVE_POSS,
176 ATA_FLAG_NO_LEGACY,
177 .pio_mask = ATA_PIO4, 176 .pio_mask = ATA_PIO4,
178 .mwdma_mask = ATA_MWDMA2, 177 .mwdma_mask = ATA_MWDMA2,
179 .udma_mask = ATA_UDMA6, 178 .udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c
index e079cf29ed5d..7c987371136e 100644
--- a/drivers/ata/sata_vsc.c
+++ b/drivers/ata/sata_vsc.c
@@ -340,8 +340,7 @@ static int __devinit vsc_sata_init_one(struct pci_dev *pdev,
340 const struct pci_device_id *ent) 340 const struct pci_device_id *ent)
341{ 341{
342 static const struct ata_port_info pi = { 342 static const struct ata_port_info pi = {
343 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | 343 .flags = ATA_FLAG_SATA,
344 ATA_FLAG_MMIO,
345 .pio_mask = ATA_PIO4, 344 .pio_mask = ATA_PIO4,
346 .mwdma_mask = ATA_MWDMA2, 345 .mwdma_mask = ATA_MWDMA2,
347 .udma_mask = ATA_UDMA6, 346 .udma_mask = ATA_UDMA6,
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 73fb1c4f4cd4..25ef1a4556e6 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -866,8 +866,9 @@ static int popen(struct atm_vcc *vcc)
866 } 866 }
867 867
868 skb = alloc_skb(sizeof(*header), GFP_ATOMIC); 868 skb = alloc_skb(sizeof(*header), GFP_ATOMIC);
869 if (!skb && net_ratelimit()) { 869 if (!skb) {
870 dev_warn(&card->dev->dev, "Failed to allocate sk_buff in popen()\n"); 870 if (net_ratelimit())
871 dev_warn(&card->dev->dev, "Failed to allocate sk_buff in popen()\n");
871 return -ENOMEM; 872 return -ENOMEM;
872 } 873 }
873 header = (void *)skb_put(skb, sizeof(*header)); 874 header = (void *)skb_put(skb, sizeof(*header));
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 5f51c3b4451e..4c5701c15f53 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -1,6 +1,6 @@
1# Makefile for the Linux device tree 1# Makefile for the Linux device tree
2 2
3obj-y := core.o sys.o bus.o dd.o \ 3obj-y := core.o sys.o bus.o dd.o syscore.o \
4 driver.o class.o platform.o \ 4 driver.o class.o platform.o \
5 cpu.o firmware.o init.o map.o devres.o \ 5 cpu.o firmware.o init.o map.o devres.o \
6 attribute_container.o transport_class.o 6 attribute_container.o transport_class.o
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index abe46edfe5b4..118c1b92a511 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,7 +1,6 @@
1obj-$(CONFIG_PM) += sysfs.o 1obj-$(CONFIG_PM) += sysfs.o generic_ops.o
2obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o 2obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o
3obj-$(CONFIG_PM_RUNTIME) += runtime.o 3obj-$(CONFIG_PM_RUNTIME) += runtime.o
4obj-$(CONFIG_PM_OPS) += generic_ops.o
5obj-$(CONFIG_PM_TRACE_RTC) += trace.o 4obj-$(CONFIG_PM_TRACE_RTC) += trace.o
6obj-$(CONFIG_PM_OPP) += opp.o 5obj-$(CONFIG_PM_OPP) += opp.o
7 6
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 83404973f97a..052dc53eef38 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -423,26 +423,22 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
423 TRACE_DEVICE(dev); 423 TRACE_DEVICE(dev);
424 TRACE_RESUME(0); 424 TRACE_RESUME(0);
425 425
426 if (dev->bus && dev->bus->pm) { 426 if (dev->pwr_domain) {
427 pm_dev_dbg(dev, state, "EARLY "); 427 pm_dev_dbg(dev, state, "EARLY power domain ");
428 error = pm_noirq_op(dev, dev->bus->pm, state); 428 pm_noirq_op(dev, &dev->pwr_domain->ops, state);
429 if (error)
430 goto End;
431 } 429 }
432 430
433 if (dev->type && dev->type->pm) { 431 if (dev->type && dev->type->pm) {
434 pm_dev_dbg(dev, state, "EARLY type "); 432 pm_dev_dbg(dev, state, "EARLY type ");
435 error = pm_noirq_op(dev, dev->type->pm, state); 433 error = pm_noirq_op(dev, dev->type->pm, state);
436 if (error) 434 } else if (dev->class && dev->class->pm) {
437 goto End;
438 }
439
440 if (dev->class && dev->class->pm) {
441 pm_dev_dbg(dev, state, "EARLY class "); 435 pm_dev_dbg(dev, state, "EARLY class ");
442 error = pm_noirq_op(dev, dev->class->pm, state); 436 error = pm_noirq_op(dev, dev->class->pm, state);
437 } else if (dev->bus && dev->bus->pm) {
438 pm_dev_dbg(dev, state, "EARLY ");
439 error = pm_noirq_op(dev, dev->bus->pm, state);
443 } 440 }
444 441
445End:
446 TRACE_RESUME(error); 442 TRACE_RESUME(error);
447 return error; 443 return error;
448} 444}
@@ -518,36 +514,39 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
518 514
519 dev->power.in_suspend = false; 515 dev->power.in_suspend = false;
520 516
521 if (dev->bus) { 517 if (dev->pwr_domain) {
522 if (dev->bus->pm) { 518 pm_dev_dbg(dev, state, "power domain ");
523 pm_dev_dbg(dev, state, ""); 519 pm_op(dev, &dev->pwr_domain->ops, state);
524 error = pm_op(dev, dev->bus->pm, state);
525 } else if (dev->bus->resume) {
526 pm_dev_dbg(dev, state, "legacy ");
527 error = legacy_resume(dev, dev->bus->resume);
528 }
529 if (error)
530 goto End;
531 } 520 }
532 521
533 if (dev->type) { 522 if (dev->type && dev->type->pm) {
534 if (dev->type->pm) { 523 pm_dev_dbg(dev, state, "type ");
535 pm_dev_dbg(dev, state, "type "); 524 error = pm_op(dev, dev->type->pm, state);
536 error = pm_op(dev, dev->type->pm, state); 525 goto End;
537 }
538 if (error)
539 goto End;
540 } 526 }
541 527
542 if (dev->class) { 528 if (dev->class) {
543 if (dev->class->pm) { 529 if (dev->class->pm) {
544 pm_dev_dbg(dev, state, "class "); 530 pm_dev_dbg(dev, state, "class ");
545 error = pm_op(dev, dev->class->pm, state); 531 error = pm_op(dev, dev->class->pm, state);
532 goto End;
546 } else if (dev->class->resume) { 533 } else if (dev->class->resume) {
547 pm_dev_dbg(dev, state, "legacy class "); 534 pm_dev_dbg(dev, state, "legacy class ");
548 error = legacy_resume(dev, dev->class->resume); 535 error = legacy_resume(dev, dev->class->resume);
536 goto End;
549 } 537 }
550 } 538 }
539
540 if (dev->bus) {
541 if (dev->bus->pm) {
542 pm_dev_dbg(dev, state, "");
543 error = pm_op(dev, dev->bus->pm, state);
544 } else if (dev->bus->resume) {
545 pm_dev_dbg(dev, state, "legacy ");
546 error = legacy_resume(dev, dev->bus->resume);
547 }
548 }
549
551 End: 550 End:
552 device_unlock(dev); 551 device_unlock(dev);
553 complete_all(&dev->power.completion); 552 complete_all(&dev->power.completion);
@@ -629,19 +628,23 @@ static void device_complete(struct device *dev, pm_message_t state)
629{ 628{
630 device_lock(dev); 629 device_lock(dev);
631 630
632 if (dev->class && dev->class->pm && dev->class->pm->complete) { 631 if (dev->pwr_domain && dev->pwr_domain->ops.complete) {
633 pm_dev_dbg(dev, state, "completing class "); 632 pm_dev_dbg(dev, state, "completing power domain ");
634 dev->class->pm->complete(dev); 633 dev->pwr_domain->ops.complete(dev);
635 } 634 }
636 635
637 if (dev->type && dev->type->pm && dev->type->pm->complete) { 636 if (dev->type && dev->type->pm) {
638 pm_dev_dbg(dev, state, "completing type "); 637 pm_dev_dbg(dev, state, "completing type ");
639 dev->type->pm->complete(dev); 638 if (dev->type->pm->complete)
640 } 639 dev->type->pm->complete(dev);
641 640 } else if (dev->class && dev->class->pm) {
642 if (dev->bus && dev->bus->pm && dev->bus->pm->complete) { 641 pm_dev_dbg(dev, state, "completing class ");
642 if (dev->class->pm->complete)
643 dev->class->pm->complete(dev);
644 } else if (dev->bus && dev->bus->pm) {
643 pm_dev_dbg(dev, state, "completing "); 645 pm_dev_dbg(dev, state, "completing ");
644 dev->bus->pm->complete(dev); 646 if (dev->bus->pm->complete)
647 dev->bus->pm->complete(dev);
645 } 648 }
646 649
647 device_unlock(dev); 650 device_unlock(dev);
@@ -669,7 +672,6 @@ static void dpm_complete(pm_message_t state)
669 mutex_unlock(&dpm_list_mtx); 672 mutex_unlock(&dpm_list_mtx);
670 673
671 device_complete(dev, state); 674 device_complete(dev, state);
672 pm_runtime_put_sync(dev);
673 675
674 mutex_lock(&dpm_list_mtx); 676 mutex_lock(&dpm_list_mtx);
675 put_device(dev); 677 put_device(dev);
@@ -727,29 +729,31 @@ static pm_message_t resume_event(pm_message_t sleep_state)
727 */ 729 */
728static int device_suspend_noirq(struct device *dev, pm_message_t state) 730static int device_suspend_noirq(struct device *dev, pm_message_t state)
729{ 731{
730 int error = 0; 732 int error;
731
732 if (dev->class && dev->class->pm) {
733 pm_dev_dbg(dev, state, "LATE class ");
734 error = pm_noirq_op(dev, dev->class->pm, state);
735 if (error)
736 goto End;
737 }
738 733
739 if (dev->type && dev->type->pm) { 734 if (dev->type && dev->type->pm) {
740 pm_dev_dbg(dev, state, "LATE type "); 735 pm_dev_dbg(dev, state, "LATE type ");
741 error = pm_noirq_op(dev, dev->type->pm, state); 736 error = pm_noirq_op(dev, dev->type->pm, state);
742 if (error) 737 if (error)
743 goto End; 738 return error;
744 } 739 } else if (dev->class && dev->class->pm) {
745 740 pm_dev_dbg(dev, state, "LATE class ");
746 if (dev->bus && dev->bus->pm) { 741 error = pm_noirq_op(dev, dev->class->pm, state);
742 if (error)
743 return error;
744 } else if (dev->bus && dev->bus->pm) {
747 pm_dev_dbg(dev, state, "LATE "); 745 pm_dev_dbg(dev, state, "LATE ");
748 error = pm_noirq_op(dev, dev->bus->pm, state); 746 error = pm_noirq_op(dev, dev->bus->pm, state);
747 if (error)
748 return error;
749 } 749 }
750 750
751End: 751 if (dev->pwr_domain) {
752 return error; 752 pm_dev_dbg(dev, state, "LATE power domain ");
753 pm_noirq_op(dev, &dev->pwr_domain->ops, state);
754 }
755
756 return 0;
753} 757}
754 758
755/** 759/**
@@ -836,25 +840,22 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
836 goto End; 840 goto End;
837 } 841 }
838 842
843 if (dev->type && dev->type->pm) {
844 pm_dev_dbg(dev, state, "type ");
845 error = pm_op(dev, dev->type->pm, state);
846 goto Domain;
847 }
848
839 if (dev->class) { 849 if (dev->class) {
840 if (dev->class->pm) { 850 if (dev->class->pm) {
841 pm_dev_dbg(dev, state, "class "); 851 pm_dev_dbg(dev, state, "class ");
842 error = pm_op(dev, dev->class->pm, state); 852 error = pm_op(dev, dev->class->pm, state);
853 goto Domain;
843 } else if (dev->class->suspend) { 854 } else if (dev->class->suspend) {
844 pm_dev_dbg(dev, state, "legacy class "); 855 pm_dev_dbg(dev, state, "legacy class ");
845 error = legacy_suspend(dev, state, dev->class->suspend); 856 error = legacy_suspend(dev, state, dev->class->suspend);
857 goto Domain;
846 } 858 }
847 if (error)
848 goto End;
849 }
850
851 if (dev->type) {
852 if (dev->type->pm) {
853 pm_dev_dbg(dev, state, "type ");
854 error = pm_op(dev, dev->type->pm, state);
855 }
856 if (error)
857 goto End;
858 } 859 }
859 860
860 if (dev->bus) { 861 if (dev->bus) {
@@ -867,6 +868,12 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
867 } 868 }
868 } 869 }
869 870
871 Domain:
872 if (!error && dev->pwr_domain) {
873 pm_dev_dbg(dev, state, "power domain ");
874 pm_op(dev, &dev->pwr_domain->ops, state);
875 }
876
870 End: 877 End:
871 device_unlock(dev); 878 device_unlock(dev);
872 complete_all(&dev->power.completion); 879 complete_all(&dev->power.completion);
@@ -957,27 +964,34 @@ static int device_prepare(struct device *dev, pm_message_t state)
957 964
958 device_lock(dev); 965 device_lock(dev);
959 966
960 if (dev->bus && dev->bus->pm && dev->bus->pm->prepare) { 967 if (dev->type && dev->type->pm) {
968 pm_dev_dbg(dev, state, "preparing type ");
969 if (dev->type->pm->prepare)
970 error = dev->type->pm->prepare(dev);
971 suspend_report_result(dev->type->pm->prepare, error);
972 if (error)
973 goto End;
974 } else if (dev->class && dev->class->pm) {
975 pm_dev_dbg(dev, state, "preparing class ");
976 if (dev->class->pm->prepare)
977 error = dev->class->pm->prepare(dev);
978 suspend_report_result(dev->class->pm->prepare, error);
979 if (error)
980 goto End;
981 } else if (dev->bus && dev->bus->pm) {
961 pm_dev_dbg(dev, state, "preparing "); 982 pm_dev_dbg(dev, state, "preparing ");
962 error = dev->bus->pm->prepare(dev); 983 if (dev->bus->pm->prepare)
984 error = dev->bus->pm->prepare(dev);
963 suspend_report_result(dev->bus->pm->prepare, error); 985 suspend_report_result(dev->bus->pm->prepare, error);
964 if (error) 986 if (error)
965 goto End; 987 goto End;
966 } 988 }
967 989
968 if (dev->type && dev->type->pm && dev->type->pm->prepare) { 990 if (dev->pwr_domain && dev->pwr_domain->ops.prepare) {
969 pm_dev_dbg(dev, state, "preparing type "); 991 pm_dev_dbg(dev, state, "preparing power domain ");
970 error = dev->type->pm->prepare(dev); 992 dev->pwr_domain->ops.prepare(dev);
971 suspend_report_result(dev->type->pm->prepare, error);
972 if (error)
973 goto End;
974 } 993 }
975 994
976 if (dev->class && dev->class->pm && dev->class->pm->prepare) {
977 pm_dev_dbg(dev, state, "preparing class ");
978 error = dev->class->pm->prepare(dev);
979 suspend_report_result(dev->class->pm->prepare, error);
980 }
981 End: 995 End:
982 device_unlock(dev); 996 device_unlock(dev);
983 997
@@ -1005,12 +1019,9 @@ static int dpm_prepare(pm_message_t state)
1005 if (pm_runtime_barrier(dev) && device_may_wakeup(dev)) 1019 if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
1006 pm_wakeup_event(dev, 0); 1020 pm_wakeup_event(dev, 0);
1007 1021
1008 if (pm_wakeup_pending()) { 1022 pm_runtime_put_sync(dev);
1009 pm_runtime_put_sync(dev); 1023 error = pm_wakeup_pending() ?
1010 error = -EBUSY; 1024 -EBUSY : device_prepare(dev, state);
1011 } else {
1012 error = device_prepare(dev, state);
1013 }
1014 1025
1015 mutex_lock(&dpm_list_mtx); 1026 mutex_lock(&dpm_list_mtx);
1016 if (error) { 1027 if (error) {
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 2bb9b4cf59d7..56a6899f5e9e 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -222,7 +222,7 @@ int opp_get_opp_count(struct device *dev)
222 * opp_find_freq_exact() - search for an exact frequency 222 * opp_find_freq_exact() - search for an exact frequency
223 * @dev: device for which we do this operation 223 * @dev: device for which we do this operation
224 * @freq: frequency to search for 224 * @freq: frequency to search for
225 * @is_available: true/false - match for available opp 225 * @available: true/false - match for available opp
226 * 226 *
227 * Searches for exact match in the opp list and returns pointer to the matching 227 * Searches for exact match in the opp list and returns pointer to the matching
228 * opp if found, else returns ERR_PTR in case of error and should be handled 228 * opp if found, else returns ERR_PTR in case of error and should be handled
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 698dde742587..f2a25f18fde7 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -58,19 +58,18 @@ static inline void device_pm_move_last(struct device *dev) {}
58 * sysfs.c 58 * sysfs.c
59 */ 59 */
60 60
61extern int dpm_sysfs_add(struct device *); 61extern int dpm_sysfs_add(struct device *dev);
62extern void dpm_sysfs_remove(struct device *); 62extern void dpm_sysfs_remove(struct device *dev);
63extern void rpm_sysfs_remove(struct device *); 63extern void rpm_sysfs_remove(struct device *dev);
64extern int wakeup_sysfs_add(struct device *dev);
65extern void wakeup_sysfs_remove(struct device *dev);
64 66
65#else /* CONFIG_PM */ 67#else /* CONFIG_PM */
66 68
67static inline int dpm_sysfs_add(struct device *dev) 69static inline int dpm_sysfs_add(struct device *dev) { return 0; }
68{ 70static inline void dpm_sysfs_remove(struct device *dev) {}
69 return 0; 71static inline void rpm_sysfs_remove(struct device *dev) {}
70} 72static inline int wakeup_sysfs_add(struct device *dev) { return 0; }
71 73static inline void wakeup_sysfs_remove(struct device *dev) {}
72static inline void dpm_sysfs_remove(struct device *dev)
73{
74}
75 74
76#endif 75#endif
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 42615b419dfb..54597c859ecb 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -168,6 +168,7 @@ static int rpm_check_suspend_allowed(struct device *dev)
168static int rpm_idle(struct device *dev, int rpmflags) 168static int rpm_idle(struct device *dev, int rpmflags)
169{ 169{
170 int (*callback)(struct device *); 170 int (*callback)(struct device *);
171 int (*domain_callback)(struct device *);
171 int retval; 172 int retval;
172 173
173 retval = rpm_check_suspend_allowed(dev); 174 retval = rpm_check_suspend_allowed(dev);
@@ -213,19 +214,28 @@ static int rpm_idle(struct device *dev, int rpmflags)
213 214
214 dev->power.idle_notification = true; 215 dev->power.idle_notification = true;
215 216
216 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle) 217 if (dev->type && dev->type->pm)
217 callback = dev->bus->pm->runtime_idle;
218 else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle)
219 callback = dev->type->pm->runtime_idle; 218 callback = dev->type->pm->runtime_idle;
220 else if (dev->class && dev->class->pm) 219 else if (dev->class && dev->class->pm)
221 callback = dev->class->pm->runtime_idle; 220 callback = dev->class->pm->runtime_idle;
221 else if (dev->bus && dev->bus->pm)
222 callback = dev->bus->pm->runtime_idle;
222 else 223 else
223 callback = NULL; 224 callback = NULL;
224 225
225 if (callback) { 226 if (dev->pwr_domain)
227 domain_callback = dev->pwr_domain->ops.runtime_idle;
228 else
229 domain_callback = NULL;
230
231 if (callback || domain_callback) {
226 spin_unlock_irq(&dev->power.lock); 232 spin_unlock_irq(&dev->power.lock);
227 233
228 callback(dev); 234 if (domain_callback)
235 retval = domain_callback(dev);
236
237 if (!retval && callback)
238 callback(dev);
229 239
230 spin_lock_irq(&dev->power.lock); 240 spin_lock_irq(&dev->power.lock);
231 } 241 }
@@ -372,12 +382,12 @@ static int rpm_suspend(struct device *dev, int rpmflags)
372 382
373 __update_runtime_status(dev, RPM_SUSPENDING); 383 __update_runtime_status(dev, RPM_SUSPENDING);
374 384
375 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) 385 if (dev->type && dev->type->pm)
376 callback = dev->bus->pm->runtime_suspend;
377 else if (dev->type && dev->type->pm && dev->type->pm->runtime_suspend)
378 callback = dev->type->pm->runtime_suspend; 386 callback = dev->type->pm->runtime_suspend;
379 else if (dev->class && dev->class->pm) 387 else if (dev->class && dev->class->pm)
380 callback = dev->class->pm->runtime_suspend; 388 callback = dev->class->pm->runtime_suspend;
389 else if (dev->bus && dev->bus->pm)
390 callback = dev->bus->pm->runtime_suspend;
381 else 391 else
382 callback = NULL; 392 callback = NULL;
383 393
@@ -390,6 +400,8 @@ static int rpm_suspend(struct device *dev, int rpmflags)
390 else 400 else
391 pm_runtime_cancel_pending(dev); 401 pm_runtime_cancel_pending(dev);
392 } else { 402 } else {
403 if (dev->pwr_domain)
404 rpm_callback(dev->pwr_domain->ops.runtime_suspend, dev);
393 no_callback: 405 no_callback:
394 __update_runtime_status(dev, RPM_SUSPENDED); 406 __update_runtime_status(dev, RPM_SUSPENDED);
395 pm_runtime_deactivate_timer(dev); 407 pm_runtime_deactivate_timer(dev);
@@ -569,12 +581,15 @@ static int rpm_resume(struct device *dev, int rpmflags)
569 581
570 __update_runtime_status(dev, RPM_RESUMING); 582 __update_runtime_status(dev, RPM_RESUMING);
571 583
572 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) 584 if (dev->pwr_domain)
573 callback = dev->bus->pm->runtime_resume; 585 rpm_callback(dev->pwr_domain->ops.runtime_resume, dev);
574 else if (dev->type && dev->type->pm && dev->type->pm->runtime_resume) 586
587 if (dev->type && dev->type->pm)
575 callback = dev->type->pm->runtime_resume; 588 callback = dev->type->pm->runtime_resume;
576 else if (dev->class && dev->class->pm) 589 else if (dev->class && dev->class->pm)
577 callback = dev->class->pm->runtime_resume; 590 callback = dev->class->pm->runtime_resume;
591 else if (dev->bus && dev->bus->pm)
592 callback = dev->bus->pm->runtime_resume;
578 else 593 else
579 callback = NULL; 594 callback = NULL;
580 595
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 0b1e46bf3e56..fff49bee781d 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -431,26 +431,18 @@ static ssize_t async_store(struct device *dev, struct device_attribute *attr,
431static DEVICE_ATTR(async, 0644, async_show, async_store); 431static DEVICE_ATTR(async, 0644, async_show, async_store);
432#endif /* CONFIG_PM_ADVANCED_DEBUG */ 432#endif /* CONFIG_PM_ADVANCED_DEBUG */
433 433
434static struct attribute * power_attrs[] = { 434static struct attribute *power_attrs[] = {
435 &dev_attr_wakeup.attr,
436#ifdef CONFIG_PM_SLEEP
437 &dev_attr_wakeup_count.attr,
438 &dev_attr_wakeup_active_count.attr,
439 &dev_attr_wakeup_hit_count.attr,
440 &dev_attr_wakeup_active.attr,
441 &dev_attr_wakeup_total_time_ms.attr,
442 &dev_attr_wakeup_max_time_ms.attr,
443 &dev_attr_wakeup_last_time_ms.attr,
444#endif
445#ifdef CONFIG_PM_ADVANCED_DEBUG 435#ifdef CONFIG_PM_ADVANCED_DEBUG
436#ifdef CONFIG_PM_SLEEP
446 &dev_attr_async.attr, 437 &dev_attr_async.attr,
438#endif
447#ifdef CONFIG_PM_RUNTIME 439#ifdef CONFIG_PM_RUNTIME
448 &dev_attr_runtime_status.attr, 440 &dev_attr_runtime_status.attr,
449 &dev_attr_runtime_usage.attr, 441 &dev_attr_runtime_usage.attr,
450 &dev_attr_runtime_active_kids.attr, 442 &dev_attr_runtime_active_kids.attr,
451 &dev_attr_runtime_enabled.attr, 443 &dev_attr_runtime_enabled.attr,
452#endif 444#endif
453#endif 445#endif /* CONFIG_PM_ADVANCED_DEBUG */
454 NULL, 446 NULL,
455}; 447};
456static struct attribute_group pm_attr_group = { 448static struct attribute_group pm_attr_group = {
@@ -458,9 +450,26 @@ static struct attribute_group pm_attr_group = {
458 .attrs = power_attrs, 450 .attrs = power_attrs,
459}; 451};
460 452
461#ifdef CONFIG_PM_RUNTIME 453static struct attribute *wakeup_attrs[] = {
454#ifdef CONFIG_PM_SLEEP
455 &dev_attr_wakeup.attr,
456 &dev_attr_wakeup_count.attr,
457 &dev_attr_wakeup_active_count.attr,
458 &dev_attr_wakeup_hit_count.attr,
459 &dev_attr_wakeup_active.attr,
460 &dev_attr_wakeup_total_time_ms.attr,
461 &dev_attr_wakeup_max_time_ms.attr,
462 &dev_attr_wakeup_last_time_ms.attr,
463#endif
464 NULL,
465};
466static struct attribute_group pm_wakeup_attr_group = {
467 .name = power_group_name,
468 .attrs = wakeup_attrs,
469};
462 470
463static struct attribute *runtime_attrs[] = { 471static struct attribute *runtime_attrs[] = {
472#ifdef CONFIG_PM_RUNTIME
464#ifndef CONFIG_PM_ADVANCED_DEBUG 473#ifndef CONFIG_PM_ADVANCED_DEBUG
465 &dev_attr_runtime_status.attr, 474 &dev_attr_runtime_status.attr,
466#endif 475#endif
@@ -468,6 +477,7 @@ static struct attribute *runtime_attrs[] = {
468 &dev_attr_runtime_suspended_time.attr, 477 &dev_attr_runtime_suspended_time.attr,
469 &dev_attr_runtime_active_time.attr, 478 &dev_attr_runtime_active_time.attr,
470 &dev_attr_autosuspend_delay_ms.attr, 479 &dev_attr_autosuspend_delay_ms.attr,
480#endif /* CONFIG_PM_RUNTIME */
471 NULL, 481 NULL,
472}; 482};
473static struct attribute_group pm_runtime_attr_group = { 483static struct attribute_group pm_runtime_attr_group = {
@@ -480,35 +490,49 @@ int dpm_sysfs_add(struct device *dev)
480 int rc; 490 int rc;
481 491
482 rc = sysfs_create_group(&dev->kobj, &pm_attr_group); 492 rc = sysfs_create_group(&dev->kobj, &pm_attr_group);
483 if (rc == 0 && !dev->power.no_callbacks) { 493 if (rc)
494 return rc;
495
496 if (pm_runtime_callbacks_present(dev)) {
484 rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group); 497 rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group);
485 if (rc) 498 if (rc)
486 sysfs_remove_group(&dev->kobj, &pm_attr_group); 499 goto err_out;
500 }
501
502 if (device_can_wakeup(dev)) {
503 rc = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group);
504 if (rc) {
505 if (pm_runtime_callbacks_present(dev))
506 sysfs_unmerge_group(&dev->kobj,
507 &pm_runtime_attr_group);
508 goto err_out;
509 }
487 } 510 }
511 return 0;
512
513 err_out:
514 sysfs_remove_group(&dev->kobj, &pm_attr_group);
488 return rc; 515 return rc;
489} 516}
490 517
491void rpm_sysfs_remove(struct device *dev) 518int wakeup_sysfs_add(struct device *dev)
492{ 519{
493 sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); 520 return sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group);
494} 521}
495 522
496void dpm_sysfs_remove(struct device *dev) 523void wakeup_sysfs_remove(struct device *dev)
497{ 524{
498 rpm_sysfs_remove(dev); 525 sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
499 sysfs_remove_group(&dev->kobj, &pm_attr_group);
500} 526}
501 527
502#else /* CONFIG_PM_RUNTIME */ 528void rpm_sysfs_remove(struct device *dev)
503
504int dpm_sysfs_add(struct device * dev)
505{ 529{
506 return sysfs_create_group(&dev->kobj, &pm_attr_group); 530 sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
507} 531}
508 532
509void dpm_sysfs_remove(struct device * dev) 533void dpm_sysfs_remove(struct device *dev)
510{ 534{
535 rpm_sysfs_remove(dev);
536 sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
511 sysfs_remove_group(&dev->kobj, &pm_attr_group); 537 sysfs_remove_group(&dev->kobj, &pm_attr_group);
512} 538}
513
514#endif
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 9f4258df4cfd..c80e138b62fe 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -112,7 +112,7 @@ static unsigned int read_magic_time(void)
112 unsigned int val; 112 unsigned int val;
113 113
114 get_rtc_time(&time); 114 get_rtc_time(&time);
115 printk("Time: %2d:%02d:%02d Date: %02d/%02d/%02d\n", 115 pr_info("Time: %2d:%02d:%02d Date: %02d/%02d/%02d\n",
116 time.tm_hour, time.tm_min, time.tm_sec, 116 time.tm_hour, time.tm_min, time.tm_sec,
117 time.tm_mon + 1, time.tm_mday, time.tm_year % 100); 117 time.tm_mon + 1, time.tm_mday, time.tm_year % 100);
118 val = time.tm_year; /* 100 years */ 118 val = time.tm_year; /* 100 years */
@@ -179,7 +179,7 @@ static int show_file_hash(unsigned int value)
179 unsigned int hash = hash_string(lineno, file, FILEHASH); 179 unsigned int hash = hash_string(lineno, file, FILEHASH);
180 if (hash != value) 180 if (hash != value)
181 continue; 181 continue;
182 printk(" hash matches %s:%u\n", file, lineno); 182 pr_info(" hash matches %s:%u\n", file, lineno);
183 match++; 183 match++;
184 } 184 }
185 return match; 185 return match;
@@ -255,7 +255,7 @@ static int late_resume_init(void)
255 val = val / FILEHASH; 255 val = val / FILEHASH;
256 dev = val /* % DEVHASH */; 256 dev = val /* % DEVHASH */;
257 257
258 printk(" Magic number: %d:%d:%d\n", user, file, dev); 258 pr_info(" Magic number: %d:%d:%d\n", user, file, dev);
259 show_file_hash(file); 259 show_file_hash(file);
260 show_dev_hash(dev); 260 show_dev_hash(dev);
261 return 0; 261 return 0;
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 8ec406d8f548..4573c83df6dd 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -24,12 +24,26 @@
24 */ 24 */
25bool events_check_enabled; 25bool events_check_enabled;
26 26
27/* The counter of registered wakeup events. */ 27/*
28static atomic_t event_count = ATOMIC_INIT(0); 28 * Combined counters of registered wakeup events and wakeup events in progress.
29/* A preserved old value of event_count. */ 29 * They need to be modified together atomically, so it's better to use one
30 * atomic variable to hold them both.
31 */
32static atomic_t combined_event_count = ATOMIC_INIT(0);
33
34#define IN_PROGRESS_BITS (sizeof(int) * 4)
35#define MAX_IN_PROGRESS ((1 << IN_PROGRESS_BITS) - 1)
36
37static void split_counters(unsigned int *cnt, unsigned int *inpr)
38{
39 unsigned int comb = atomic_read(&combined_event_count);
40
41 *cnt = (comb >> IN_PROGRESS_BITS);
42 *inpr = comb & MAX_IN_PROGRESS;
43}
44
45/* A preserved old value of the events counter. */
30static unsigned int saved_count; 46static unsigned int saved_count;
31/* The counter of wakeup events being processed. */
32static atomic_t events_in_progress = ATOMIC_INIT(0);
33 47
34static DEFINE_SPINLOCK(events_lock); 48static DEFINE_SPINLOCK(events_lock);
35 49
@@ -228,6 +242,35 @@ int device_wakeup_disable(struct device *dev)
228EXPORT_SYMBOL_GPL(device_wakeup_disable); 242EXPORT_SYMBOL_GPL(device_wakeup_disable);
229 243
230/** 244/**
245 * device_set_wakeup_capable - Set/reset device wakeup capability flag.
246 * @dev: Device to handle.
247 * @capable: Whether or not @dev is capable of waking up the system from sleep.
248 *
249 * If @capable is set, set the @dev's power.can_wakeup flag and add its
250 * wakeup-related attributes to sysfs. Otherwise, unset the @dev's
251 * power.can_wakeup flag and remove its wakeup-related attributes from sysfs.
252 *
253 * This function may sleep and it can't be called from any context where
254 * sleeping is not allowed.
255 */
256void device_set_wakeup_capable(struct device *dev, bool capable)
257{
258 if (!!dev->power.can_wakeup == !!capable)
259 return;
260
261 if (device_is_registered(dev)) {
262 if (capable) {
263 if (wakeup_sysfs_add(dev))
264 return;
265 } else {
266 wakeup_sysfs_remove(dev);
267 }
268 }
269 dev->power.can_wakeup = capable;
270}
271EXPORT_SYMBOL_GPL(device_set_wakeup_capable);
272
273/**
231 * device_init_wakeup - Device wakeup initialization. 274 * device_init_wakeup - Device wakeup initialization.
232 * @dev: Device to handle. 275 * @dev: Device to handle.
233 * @enable: Whether or not to enable @dev as a wakeup device. 276 * @enable: Whether or not to enable @dev as a wakeup device.
@@ -307,7 +350,8 @@ static void wakeup_source_activate(struct wakeup_source *ws)
307 ws->timer_expires = jiffies; 350 ws->timer_expires = jiffies;
308 ws->last_time = ktime_get(); 351 ws->last_time = ktime_get();
309 352
310 atomic_inc(&events_in_progress); 353 /* Increment the counter of events in progress. */
354 atomic_inc(&combined_event_count);
311} 355}
312 356
313/** 357/**
@@ -394,14 +438,10 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
394 del_timer(&ws->timer); 438 del_timer(&ws->timer);
395 439
396 /* 440 /*
397 * event_count has to be incremented before events_in_progress is 441 * Increment the counter of registered wakeup events and decrement the
398 * modified, so that the callers of pm_check_wakeup_events() and 442 * couter of wakeup events in progress simultaneously.
399 * pm_save_wakeup_count() don't see the old value of event_count and
400 * events_in_progress equal to zero at the same time.
401 */ 443 */
402 atomic_inc(&event_count); 444 atomic_add(MAX_IN_PROGRESS, &combined_event_count);
403 smp_mb__before_atomic_dec();
404 atomic_dec(&events_in_progress);
405} 445}
406 446
407/** 447/**
@@ -556,8 +596,10 @@ bool pm_wakeup_pending(void)
556 596
557 spin_lock_irqsave(&events_lock, flags); 597 spin_lock_irqsave(&events_lock, flags);
558 if (events_check_enabled) { 598 if (events_check_enabled) {
559 ret = ((unsigned int)atomic_read(&event_count) != saved_count) 599 unsigned int cnt, inpr;
560 || atomic_read(&events_in_progress); 600
601 split_counters(&cnt, &inpr);
602 ret = (cnt != saved_count || inpr > 0);
561 events_check_enabled = !ret; 603 events_check_enabled = !ret;
562 } 604 }
563 spin_unlock_irqrestore(&events_lock, flags); 605 spin_unlock_irqrestore(&events_lock, flags);
@@ -573,25 +615,25 @@ bool pm_wakeup_pending(void)
573 * Store the number of registered wakeup events at the address in @count. Block 615 * Store the number of registered wakeup events at the address in @count. Block
574 * if the current number of wakeup events being processed is nonzero. 616 * if the current number of wakeup events being processed is nonzero.
575 * 617 *
576 * Return false if the wait for the number of wakeup events being processed to 618 * Return 'false' if the wait for the number of wakeup events being processed to
577 * drop down to zero has been interrupted by a signal (and the current number 619 * drop down to zero has been interrupted by a signal (and the current number
578 * of wakeup events being processed is still nonzero). Otherwise return true. 620 * of wakeup events being processed is still nonzero). Otherwise return 'true'.
579 */ 621 */
580bool pm_get_wakeup_count(unsigned int *count) 622bool pm_get_wakeup_count(unsigned int *count)
581{ 623{
582 bool ret; 624 unsigned int cnt, inpr;
583
584 if (capable(CAP_SYS_ADMIN))
585 events_check_enabled = false;
586 625
587 while (atomic_read(&events_in_progress) && !signal_pending(current)) { 626 for (;;) {
627 split_counters(&cnt, &inpr);
628 if (inpr == 0 || signal_pending(current))
629 break;
588 pm_wakeup_update_hit_counts(); 630 pm_wakeup_update_hit_counts();
589 schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT)); 631 schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
590 } 632 }
591 633
592 ret = !atomic_read(&events_in_progress); 634 split_counters(&cnt, &inpr);
593 *count = atomic_read(&event_count); 635 *count = cnt;
594 return ret; 636 return !inpr;
595} 637}
596 638
597/** 639/**
@@ -600,24 +642,25 @@ bool pm_get_wakeup_count(unsigned int *count)
600 * 642 *
601 * If @count is equal to the current number of registered wakeup events and the 643 * If @count is equal to the current number of registered wakeup events and the
602 * current number of wakeup events being processed is zero, store @count as the 644 * current number of wakeup events being processed is zero, store @count as the
603 * old number of registered wakeup events to be used by pm_check_wakeup_events() 645 * old number of registered wakeup events for pm_check_wakeup_events(), enable
604 * and return true. Otherwise return false. 646 * wakeup events detection and return 'true'. Otherwise disable wakeup events
647 * detection and return 'false'.
605 */ 648 */
606bool pm_save_wakeup_count(unsigned int count) 649bool pm_save_wakeup_count(unsigned int count)
607{ 650{
608 bool ret = false; 651 unsigned int cnt, inpr;
609 652
653 events_check_enabled = false;
610 spin_lock_irq(&events_lock); 654 spin_lock_irq(&events_lock);
611 if (count == (unsigned int)atomic_read(&event_count) 655 split_counters(&cnt, &inpr);
612 && !atomic_read(&events_in_progress)) { 656 if (cnt == count && inpr == 0) {
613 saved_count = count; 657 saved_count = count;
614 events_check_enabled = true; 658 events_check_enabled = true;
615 ret = true;
616 } 659 }
617 spin_unlock_irq(&events_lock); 660 spin_unlock_irq(&events_lock);
618 if (!ret) 661 if (!events_check_enabled)
619 pm_wakeup_update_hit_counts(); 662 pm_wakeup_update_hit_counts();
620 return ret; 663 return events_check_enabled;
621} 664}
622 665
623static struct dentry *wakeup_sources_stats_dentry; 666static struct dentry *wakeup_sources_stats_dentry;
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
new file mode 100644
index 000000000000..90af2943f9e4
--- /dev/null
+++ b/drivers/base/syscore.c
@@ -0,0 +1,117 @@
1/*
2 * syscore.c - Execution of system core operations.
3 *
4 * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
5 *
6 * This file is released under the GPLv2.
7 */
8
9#include <linux/syscore_ops.h>
10#include <linux/mutex.h>
11#include <linux/module.h>
12
13static LIST_HEAD(syscore_ops_list);
14static DEFINE_MUTEX(syscore_ops_lock);
15
16/**
17 * register_syscore_ops - Register a set of system core operations.
18 * @ops: System core operations to register.
19 */
20void register_syscore_ops(struct syscore_ops *ops)
21{
22 mutex_lock(&syscore_ops_lock);
23 list_add_tail(&ops->node, &syscore_ops_list);
24 mutex_unlock(&syscore_ops_lock);
25}
26EXPORT_SYMBOL_GPL(register_syscore_ops);
27
28/**
29 * unregister_syscore_ops - Unregister a set of system core operations.
30 * @ops: System core operations to unregister.
31 */
32void unregister_syscore_ops(struct syscore_ops *ops)
33{
34 mutex_lock(&syscore_ops_lock);
35 list_del(&ops->node);
36 mutex_unlock(&syscore_ops_lock);
37}
38EXPORT_SYMBOL_GPL(unregister_syscore_ops);
39
40#ifdef CONFIG_PM_SLEEP
41/**
42 * syscore_suspend - Execute all the registered system core suspend callbacks.
43 *
44 * This function is executed with one CPU on-line and disabled interrupts.
45 */
46int syscore_suspend(void)
47{
48 struct syscore_ops *ops;
49 int ret = 0;
50
51 WARN_ONCE(!irqs_disabled(),
52 "Interrupts enabled before system core suspend.\n");
53
54 list_for_each_entry_reverse(ops, &syscore_ops_list, node)
55 if (ops->suspend) {
56 if (initcall_debug)
57 pr_info("PM: Calling %pF\n", ops->suspend);
58 ret = ops->suspend();
59 if (ret)
60 goto err_out;
61 WARN_ONCE(!irqs_disabled(),
62 "Interrupts enabled after %pF\n", ops->suspend);
63 }
64
65 return 0;
66
67 err_out:
68 pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend);
69
70 list_for_each_entry_continue(ops, &syscore_ops_list, node)
71 if (ops->resume)
72 ops->resume();
73
74 return ret;
75}
76
77/**
78 * syscore_resume - Execute all the registered system core resume callbacks.
79 *
80 * This function is executed with one CPU on-line and disabled interrupts.
81 */
82void syscore_resume(void)
83{
84 struct syscore_ops *ops;
85
86 WARN_ONCE(!irqs_disabled(),
87 "Interrupts enabled before system core resume.\n");
88
89 list_for_each_entry(ops, &syscore_ops_list, node)
90 if (ops->resume) {
91 if (initcall_debug)
92 pr_info("PM: Calling %pF\n", ops->resume);
93 ops->resume();
94 WARN_ONCE(!irqs_disabled(),
95 "Interrupts enabled after %pF\n", ops->resume);
96 }
97}
98#endif /* CONFIG_PM_SLEEP */
99
100/**
101 * syscore_shutdown - Execute all the registered system core shutdown callbacks.
102 */
103void syscore_shutdown(void)
104{
105 struct syscore_ops *ops;
106
107 mutex_lock(&syscore_ops_lock);
108
109 list_for_each_entry_reverse(ops, &syscore_ops_list, node)
110 if (ops->shutdown) {
111 if (initcall_debug)
112 pr_info("PM: Calling %pF\n", ops->shutdown);
113 ops->shutdown();
114 }
115
116 mutex_unlock(&syscore_ops_lock);
117}
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index b9ba04fc2b34..77fc76f8aea9 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3281,7 +3281,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
3281 struct block_device *bdev = opened_bdev[cnt]; 3281 struct block_device *bdev = opened_bdev[cnt];
3282 if (!bdev || ITYPE(drive_state[cnt].fd_device) != type) 3282 if (!bdev || ITYPE(drive_state[cnt].fd_device) != type)
3283 continue; 3283 continue;
3284 __invalidate_device(bdev); 3284 __invalidate_device(bdev, true);
3285 } 3285 }
3286 mutex_unlock(&open_lock); 3286 mutex_unlock(&open_lock);
3287 } else { 3287 } else {
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 49e6a545eb63..dbf31ec9114d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -78,7 +78,6 @@
78 78
79#include <asm/uaccess.h> 79#include <asm/uaccess.h>
80 80
81static DEFINE_MUTEX(loop_mutex);
82static LIST_HEAD(loop_devices); 81static LIST_HEAD(loop_devices);
83static DEFINE_MUTEX(loop_devices_mutex); 82static DEFINE_MUTEX(loop_devices_mutex);
84 83
@@ -1501,11 +1500,9 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
1501{ 1500{
1502 struct loop_device *lo = bdev->bd_disk->private_data; 1501 struct loop_device *lo = bdev->bd_disk->private_data;
1503 1502
1504 mutex_lock(&loop_mutex);
1505 mutex_lock(&lo->lo_ctl_mutex); 1503 mutex_lock(&lo->lo_ctl_mutex);
1506 lo->lo_refcnt++; 1504 lo->lo_refcnt++;
1507 mutex_unlock(&lo->lo_ctl_mutex); 1505 mutex_unlock(&lo->lo_ctl_mutex);
1508 mutex_unlock(&loop_mutex);
1509 1506
1510 return 0; 1507 return 0;
1511} 1508}
@@ -1515,7 +1512,6 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
1515 struct loop_device *lo = disk->private_data; 1512 struct loop_device *lo = disk->private_data;
1516 int err; 1513 int err;
1517 1514
1518 mutex_lock(&loop_mutex);
1519 mutex_lock(&lo->lo_ctl_mutex); 1515 mutex_lock(&lo->lo_ctl_mutex);
1520 1516
1521 if (--lo->lo_refcnt) 1517 if (--lo->lo_refcnt)
@@ -1540,7 +1536,6 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
1540out: 1536out:
1541 mutex_unlock(&lo->lo_ctl_mutex); 1537 mutex_unlock(&lo->lo_ctl_mutex);
1542out_unlocked: 1538out_unlocked:
1543 mutex_unlock(&loop_mutex);
1544 return 0; 1539 return 0;
1545} 1540}
1546 1541
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d7aa39e349a6..9cb8668ff5f4 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -120,6 +120,10 @@ static DEFINE_SPINLOCK(minor_lock);
120#define EXTENDED (1<<EXT_SHIFT) 120#define EXTENDED (1<<EXT_SHIFT)
121#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) 121#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
122#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) 122#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
123#define EMULATED_HD_DISK_MINOR_OFFSET (0)
124#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
125#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
126#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
123 127
124#define DEV_NAME "xvd" /* name in /dev */ 128#define DEV_NAME "xvd" /* name in /dev */
125 129
@@ -281,7 +285,7 @@ static int blkif_queue_request(struct request *req)
281 info->shadow[id].request = req; 285 info->shadow[id].request = req;
282 286
283 ring_req->id = id; 287 ring_req->id = id;
284 ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); 288 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
285 ring_req->handle = info->handle; 289 ring_req->handle = info->handle;
286 290
287 ring_req->operation = rq_data_dir(req) ? 291 ring_req->operation = rq_data_dir(req) ?
@@ -317,7 +321,7 @@ static int blkif_queue_request(struct request *req)
317 rq_data_dir(req) ); 321 rq_data_dir(req) );
318 322
319 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 323 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
320 ring_req->seg[i] = 324 ring_req->u.rw.seg[i] =
321 (struct blkif_request_segment) { 325 (struct blkif_request_segment) {
322 .gref = ref, 326 .gref = ref,
323 .first_sect = fsect, 327 .first_sect = fsect,
@@ -434,6 +438,65 @@ static void xlvbd_flush(struct blkfront_info *info)
434 info->feature_flush ? "enabled" : "disabled"); 438 info->feature_flush ? "enabled" : "disabled");
435} 439}
436 440
441static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
442{
443 int major;
444 major = BLKIF_MAJOR(vdevice);
445 *minor = BLKIF_MINOR(vdevice);
446 switch (major) {
447 case XEN_IDE0_MAJOR:
448 *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
449 *minor = ((*minor / 64) * PARTS_PER_DISK) +
450 EMULATED_HD_DISK_MINOR_OFFSET;
451 break;
452 case XEN_IDE1_MAJOR:
453 *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
454 *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
455 EMULATED_HD_DISK_MINOR_OFFSET;
456 break;
457 case XEN_SCSI_DISK0_MAJOR:
458 *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
459 *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
460 break;
461 case XEN_SCSI_DISK1_MAJOR:
462 case XEN_SCSI_DISK2_MAJOR:
463 case XEN_SCSI_DISK3_MAJOR:
464 case XEN_SCSI_DISK4_MAJOR:
465 case XEN_SCSI_DISK5_MAJOR:
466 case XEN_SCSI_DISK6_MAJOR:
467 case XEN_SCSI_DISK7_MAJOR:
468 *offset = (*minor / PARTS_PER_DISK) +
469 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
470 EMULATED_SD_DISK_NAME_OFFSET;
471 *minor = *minor +
472 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
473 EMULATED_SD_DISK_MINOR_OFFSET;
474 break;
475 case XEN_SCSI_DISK8_MAJOR:
476 case XEN_SCSI_DISK9_MAJOR:
477 case XEN_SCSI_DISK10_MAJOR:
478 case XEN_SCSI_DISK11_MAJOR:
479 case XEN_SCSI_DISK12_MAJOR:
480 case XEN_SCSI_DISK13_MAJOR:
481 case XEN_SCSI_DISK14_MAJOR:
482 case XEN_SCSI_DISK15_MAJOR:
483 *offset = (*minor / PARTS_PER_DISK) +
484 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
485 EMULATED_SD_DISK_NAME_OFFSET;
486 *minor = *minor +
487 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
488 EMULATED_SD_DISK_MINOR_OFFSET;
489 break;
490 case XENVBD_MAJOR:
491 *offset = *minor / PARTS_PER_DISK;
492 break;
493 default:
494 printk(KERN_WARNING "blkfront: your disk configuration is "
495 "incorrect, please use an xvd device instead\n");
496 return -ENODEV;
497 }
498 return 0;
499}
437 500
438static int xlvbd_alloc_gendisk(blkif_sector_t capacity, 501static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
439 struct blkfront_info *info, 502 struct blkfront_info *info,
@@ -441,7 +504,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
441{ 504{
442 struct gendisk *gd; 505 struct gendisk *gd;
443 int nr_minors = 1; 506 int nr_minors = 1;
444 int err = -ENODEV; 507 int err;
445 unsigned int offset; 508 unsigned int offset;
446 int minor; 509 int minor;
447 int nr_parts; 510 int nr_parts;
@@ -456,12 +519,20 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
456 } 519 }
457 520
458 if (!VDEV_IS_EXTENDED(info->vdevice)) { 521 if (!VDEV_IS_EXTENDED(info->vdevice)) {
459 minor = BLKIF_MINOR(info->vdevice); 522 err = xen_translate_vdev(info->vdevice, &minor, &offset);
460 nr_parts = PARTS_PER_DISK; 523 if (err)
524 return err;
525 nr_parts = PARTS_PER_DISK;
461 } else { 526 } else {
462 minor = BLKIF_MINOR_EXT(info->vdevice); 527 minor = BLKIF_MINOR_EXT(info->vdevice);
463 nr_parts = PARTS_PER_EXT_DISK; 528 nr_parts = PARTS_PER_EXT_DISK;
529 offset = minor / nr_parts;
530 if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
531 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
532 "emulated IDE disks,\n\t choose an xvd device name"
533 "from xvde on\n", info->vdevice);
464 } 534 }
535 err = -ENODEV;
465 536
466 if ((minor % nr_parts) == 0) 537 if ((minor % nr_parts) == 0)
467 nr_minors = nr_parts; 538 nr_minors = nr_parts;
@@ -475,8 +546,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
475 if (gd == NULL) 546 if (gd == NULL)
476 goto release; 547 goto release;
477 548
478 offset = minor / nr_parts;
479
480 if (nr_minors > 1) { 549 if (nr_minors > 1) {
481 if (offset < 26) 550 if (offset < 26)
482 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); 551 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
@@ -615,7 +684,7 @@ static void blkif_completion(struct blk_shadow *s)
615{ 684{
616 int i; 685 int i;
617 for (i = 0; i < s->req.nr_segments; i++) 686 for (i = 0; i < s->req.nr_segments; i++)
618 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); 687 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
619} 688}
620 689
621static irqreturn_t blkif_interrupt(int irq, void *dev_id) 690static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -932,7 +1001,7 @@ static int blkif_recover(struct blkfront_info *info)
932 /* Rewrite any grant references invalidated by susp/resume. */ 1001 /* Rewrite any grant references invalidated by susp/resume. */
933 for (j = 0; j < req->nr_segments; j++) 1002 for (j = 0; j < req->nr_segments; j++)
934 gnttab_grant_foreign_access_ref( 1003 gnttab_grant_foreign_access_ref(
935 req->seg[j].gref, 1004 req->u.rw.seg[j].gref,
936 info->xbdev->otherend_id, 1005 info->xbdev->otherend_id,
937 pfn_to_mfn(info->shadow[req->id].frame[j]), 1006 pfn_to_mfn(info->shadow[req->id].frame[j]),
938 rq_data_dir(info->shadow[req->id].request)); 1007 rq_data_dir(info->shadow[req->id].request));
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index a126e614601f..6dcd55a74c0a 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -39,6 +39,11 @@ static struct usb_device_id ath3k_table[] = {
39 /* Atheros AR3011 with sflash firmware*/ 39 /* Atheros AR3011 with sflash firmware*/
40 { USB_DEVICE(0x0CF3, 0x3002) }, 40 { USB_DEVICE(0x0CF3, 0x3002) },
41 41
42 /* Atheros AR9285 Malbec with sflash firmware */
43 { USB_DEVICE(0x03F0, 0x311D) },
44
45 /* Atheros AR5BBU12 with sflash firmware */
46 { USB_DEVICE(0x0489, 0xE02C) },
42 { } /* Terminating entry */ 47 { } /* Terminating entry */
43}; 48};
44 49
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 1da773f899a2..700a3840fddc 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -102,6 +102,12 @@ static struct usb_device_id blacklist_table[] = {
102 /* Atheros 3011 with sflash firmware */ 102 /* Atheros 3011 with sflash firmware */
103 { USB_DEVICE(0x0cf3, 0x3002), .driver_info = BTUSB_IGNORE }, 103 { USB_DEVICE(0x0cf3, 0x3002), .driver_info = BTUSB_IGNORE },
104 104
105 /* Atheros AR9285 Malbec with sflash firmware */
106 { USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE },
107
108 /* Atheros AR5BBU12 with sflash firmware */
109 { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE },
110
105 /* Broadcom BCM2035 */ 111 /* Broadcom BCM2035 */
106 { USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU }, 112 { USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU },
107 { USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_WRONG_SCO_MTU }, 113 { USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_WRONG_SCO_MTU },
@@ -826,7 +832,7 @@ static void btusb_work(struct work_struct *work)
826 832
827 if (hdev->conn_hash.sco_num > 0) { 833 if (hdev->conn_hash.sco_num > 0) {
828 if (!test_bit(BTUSB_DID_ISO_RESUME, &data->flags)) { 834 if (!test_bit(BTUSB_DID_ISO_RESUME, &data->flags)) {
829 err = usb_autopm_get_interface(data->isoc); 835 err = usb_autopm_get_interface(data->isoc ? data->isoc : data->intf);
830 if (err < 0) { 836 if (err < 0) {
831 clear_bit(BTUSB_ISOC_RUNNING, &data->flags); 837 clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
832 usb_kill_anchored_urbs(&data->isoc_anchor); 838 usb_kill_anchored_urbs(&data->isoc_anchor);
@@ -855,7 +861,7 @@ static void btusb_work(struct work_struct *work)
855 861
856 __set_isoc_interface(hdev, 0); 862 __set_isoc_interface(hdev, 0);
857 if (test_and_clear_bit(BTUSB_DID_ISO_RESUME, &data->flags)) 863 if (test_and_clear_bit(BTUSB_DID_ISO_RESUME, &data->flags))
858 usb_autopm_put_interface(data->isoc); 864 usb_autopm_put_interface(data->isoc ? data->isoc : data->intf);
859 } 865 }
860} 866}
861 867
@@ -1038,8 +1044,6 @@ static int btusb_probe(struct usb_interface *intf,
1038 1044
1039 usb_set_intfdata(intf, data); 1045 usb_set_intfdata(intf, data);
1040 1046
1041 usb_enable_autosuspend(interface_to_usbdev(intf));
1042
1043 return 0; 1047 return 0;
1044} 1048}
1045 1049
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 9252e85706ef..780498d76581 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -773,18 +773,23 @@ int __init agp_amd64_init(void)
773#else 773#else
774 printk(KERN_INFO PFX "You can boot with agp=try_unsupported\n"); 774 printk(KERN_INFO PFX "You can boot with agp=try_unsupported\n");
775#endif 775#endif
776 pci_unregister_driver(&agp_amd64_pci_driver);
776 return -ENODEV; 777 return -ENODEV;
777 } 778 }
778 779
779 /* First check that we have at least one AMD64 NB */ 780 /* First check that we have at least one AMD64 NB */
780 if (!pci_dev_present(amd_nb_misc_ids)) 781 if (!pci_dev_present(amd_nb_misc_ids)) {
782 pci_unregister_driver(&agp_amd64_pci_driver);
781 return -ENODEV; 783 return -ENODEV;
784 }
782 785
783 /* Look for any AGP bridge */ 786 /* Look for any AGP bridge */
784 agp_amd64_pci_driver.id_table = agp_amd64_pci_promisc_table; 787 agp_amd64_pci_driver.id_table = agp_amd64_pci_promisc_table;
785 err = driver_attach(&agp_amd64_pci_driver.driver); 788 err = driver_attach(&agp_amd64_pci_driver.driver);
786 if (err == 0 && agp_bridges_found == 0) 789 if (err == 0 && agp_bridges_found == 0) {
790 pci_unregister_driver(&agp_amd64_pci_driver);
787 err = -ENODEV; 791 err = -ENODEV;
792 }
788 } 793 }
789 return err; 794 return err;
790} 795}
diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h
index c195bfeade11..5feebe2800e9 100644
--- a/drivers/char/agp/intel-agp.h
+++ b/drivers/char/agp/intel-agp.h
@@ -130,6 +130,7 @@
130#define INTEL_GMCH_GMS_STOLEN_352M (0xd << 4) 130#define INTEL_GMCH_GMS_STOLEN_352M (0xd << 4)
131 131
132#define I915_IFPADDR 0x60 132#define I915_IFPADDR 0x60
133#define I830_HIC 0x70
133 134
134/* Intel 965G registers */ 135/* Intel 965G registers */
135#define I965_MSAC 0x62 136#define I965_MSAC 0x62
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index fab3d3265adb..0d09b537bb9a 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -21,6 +21,7 @@
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/agp_backend.h> 23#include <linux/agp_backend.h>
24#include <linux/delay.h>
24#include <asm/smp.h> 25#include <asm/smp.h>
25#include "agp.h" 26#include "agp.h"
26#include "intel-agp.h" 27#include "intel-agp.h"
@@ -70,12 +71,8 @@ static struct _intel_private {
70 u32 __iomem *gtt; /* I915G */ 71 u32 __iomem *gtt; /* I915G */
71 bool clear_fake_agp; /* on first access via agp, fill with scratch */ 72 bool clear_fake_agp; /* on first access via agp, fill with scratch */
72 int num_dcache_entries; 73 int num_dcache_entries;
73 union { 74 void __iomem *i9xx_flush_page;
74 void __iomem *i9xx_flush_page;
75 void *i8xx_flush_page;
76 };
77 char *i81x_gtt_table; 75 char *i81x_gtt_table;
78 struct page *i8xx_page;
79 struct resource ifp_resource; 76 struct resource ifp_resource;
80 int resource_valid; 77 int resource_valid;
81 struct page *scratch_page; 78 struct page *scratch_page;
@@ -722,28 +719,6 @@ static int intel_fake_agp_fetch_size(void)
722 719
723static void i830_cleanup(void) 720static void i830_cleanup(void)
724{ 721{
725 if (intel_private.i8xx_flush_page) {
726 kunmap(intel_private.i8xx_flush_page);
727 intel_private.i8xx_flush_page = NULL;
728 }
729
730 __free_page(intel_private.i8xx_page);
731 intel_private.i8xx_page = NULL;
732}
733
734static void intel_i830_setup_flush(void)
735{
736 /* return if we've already set the flush mechanism up */
737 if (intel_private.i8xx_page)
738 return;
739
740 intel_private.i8xx_page = alloc_page(GFP_KERNEL);
741 if (!intel_private.i8xx_page)
742 return;
743
744 intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page);
745 if (!intel_private.i8xx_flush_page)
746 i830_cleanup();
747} 722}
748 723
749/* The chipset_flush interface needs to get data that has already been 724/* The chipset_flush interface needs to get data that has already been
@@ -758,14 +733,27 @@ static void intel_i830_setup_flush(void)
758 */ 733 */
759static void i830_chipset_flush(void) 734static void i830_chipset_flush(void)
760{ 735{
761 unsigned int *pg = intel_private.i8xx_flush_page; 736 unsigned long timeout = jiffies + msecs_to_jiffies(1000);
737
738 /* Forcibly evict everything from the CPU write buffers.
739 * clflush appears to be insufficient.
740 */
741 wbinvd_on_all_cpus();
742
743 /* Now we've only seen documents for this magic bit on 855GM,
744 * we hope it exists for the other gen2 chipsets...
745 *
746 * Also works as advertised on my 845G.
747 */
748 writel(readl(intel_private.registers+I830_HIC) | (1<<31),
749 intel_private.registers+I830_HIC);
762 750
763 memset(pg, 0, 1024); 751 while (readl(intel_private.registers+I830_HIC) & (1<<31)) {
752 if (time_after(jiffies, timeout))
753 break;
764 754
765 if (cpu_has_clflush) 755 udelay(50);
766 clflush_cache_range(pg, 1024); 756 }
767 else if (wbinvd_on_all_cpus() != 0)
768 printk(KERN_ERR "Timed out waiting for cache flush.\n");
769} 757}
770 758
771static void i830_write_entry(dma_addr_t addr, unsigned int entry, 759static void i830_write_entry(dma_addr_t addr, unsigned int entry,
@@ -849,8 +837,6 @@ static int i830_setup(void)
849 837
850 intel_private.gtt_bus_addr = reg_addr + I810_PTE_BASE; 838 intel_private.gtt_bus_addr = reg_addr + I810_PTE_BASE;
851 839
852 intel_i830_setup_flush();
853
854 return 0; 840 return 0;
855} 841}
856 842
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index d31483c54883..beecd1cf9b99 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -198,3 +198,15 @@ config HW_RANDOM_NOMADIK
198 module will be called nomadik-rng. 198 module will be called nomadik-rng.
199 199
200 If unsure, say Y. 200 If unsure, say Y.
201
202config HW_RANDOM_PICOXCELL
203 tristate "Picochip picoXcell true random number generator support"
204 depends on HW_RANDOM && ARCH_PICOXCELL && PICOXCELL_PC3X3
205 ---help---
206 This driver provides kernel-side support for the Random Number
207 Generator hardware found on Picochip PC3x3 and later devices.
208
209 To compile this driver as a module, choose M here: the
210 module will be called picoxcell-rng.
211
212 If unsure, say Y.
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 4273308aa1e3..3db4eb8b19c0 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_HW_RANDOM_TX4939) += tx4939-rng.o
19obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o 19obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o
20obj-$(CONFIG_HW_RANDOM_OCTEON) += octeon-rng.o 20obj-$(CONFIG_HW_RANDOM_OCTEON) += octeon-rng.o
21obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o 21obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
22obj-$(CONFIG_HW_RANDOM_PICOXCELL) += picoxcell-rng.o
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 06aad0831c73..2cc755a64302 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -91,7 +91,7 @@ static struct hwrng omap_rng_ops = {
91 91
92static int __devinit omap_rng_probe(struct platform_device *pdev) 92static int __devinit omap_rng_probe(struct platform_device *pdev)
93{ 93{
94 struct resource *res, *mem; 94 struct resource *res;
95 int ret; 95 int ret;
96 96
97 /* 97 /*
@@ -116,14 +116,12 @@ static int __devinit omap_rng_probe(struct platform_device *pdev)
116 if (!res) 116 if (!res)
117 return -ENOENT; 117 return -ENOENT;
118 118
119 mem = request_mem_region(res->start, resource_size(res), 119 if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
120 pdev->name);
121 if (mem == NULL) {
122 ret = -EBUSY; 120 ret = -EBUSY;
123 goto err_region; 121 goto err_region;
124 } 122 }
125 123
126 dev_set_drvdata(&pdev->dev, mem); 124 dev_set_drvdata(&pdev->dev, res);
127 rng_base = ioremap(res->start, resource_size(res)); 125 rng_base = ioremap(res->start, resource_size(res));
128 if (!rng_base) { 126 if (!rng_base) {
129 ret = -ENOMEM; 127 ret = -ENOMEM;
@@ -146,7 +144,7 @@ err_register:
146 iounmap(rng_base); 144 iounmap(rng_base);
147 rng_base = NULL; 145 rng_base = NULL;
148err_ioremap: 146err_ioremap:
149 release_resource(mem); 147 release_mem_region(res->start, resource_size(res));
150err_region: 148err_region:
151 if (cpu_is_omap24xx()) { 149 if (cpu_is_omap24xx()) {
152 clk_disable(rng_ick); 150 clk_disable(rng_ick);
@@ -157,7 +155,7 @@ err_region:
157 155
158static int __exit omap_rng_remove(struct platform_device *pdev) 156static int __exit omap_rng_remove(struct platform_device *pdev)
159{ 157{
160 struct resource *mem = dev_get_drvdata(&pdev->dev); 158 struct resource *res = dev_get_drvdata(&pdev->dev);
161 159
162 hwrng_unregister(&omap_rng_ops); 160 hwrng_unregister(&omap_rng_ops);
163 161
@@ -170,7 +168,7 @@ static int __exit omap_rng_remove(struct platform_device *pdev)
170 clk_put(rng_ick); 168 clk_put(rng_ick);
171 } 169 }
172 170
173 release_resource(mem); 171 release_mem_region(res->start, resource_size(res));
174 rng_base = NULL; 172 rng_base = NULL;
175 173
176 return 0; 174 return 0;
diff --git a/drivers/char/hw_random/picoxcell-rng.c b/drivers/char/hw_random/picoxcell-rng.c
new file mode 100644
index 000000000000..990d55a5e3e8
--- /dev/null
+++ b/drivers/char/hw_random/picoxcell-rng.c
@@ -0,0 +1,208 @@
1/*
2 * Copyright (c) 2010-2011 Picochip Ltd., Jamie Iles
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * All enquiries to support@picochip.com
9 */
10#include <linux/clk.h>
11#include <linux/delay.h>
12#include <linux/err.h>
13#include <linux/hw_random.h>
14#include <linux/io.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/platform_device.h>
18
19#define DATA_REG_OFFSET 0x0200
20#define CSR_REG_OFFSET 0x0278
21#define CSR_OUT_EMPTY_MASK (1 << 24)
22#define CSR_FAULT_MASK (1 << 1)
23#define TRNG_BLOCK_RESET_MASK (1 << 0)
24#define TAI_REG_OFFSET 0x0380
25
26/*
27 * The maximum amount of time in microseconds to spend waiting for data if the
28 * core wants us to wait. The TRNG should generate 32 bits every 320ns so a
29 * timeout of 20us seems reasonable. The TRNG does builtin tests of the data
30 * for randomness so we can't always assume there is data present.
31 */
32#define PICO_TRNG_TIMEOUT 20
33
34static void __iomem *rng_base;
35static struct clk *rng_clk;
36struct device *rng_dev;
37
38static inline u32 picoxcell_trng_read_csr(void)
39{
40 return __raw_readl(rng_base + CSR_REG_OFFSET);
41}
42
43static inline bool picoxcell_trng_is_empty(void)
44{
45 return picoxcell_trng_read_csr() & CSR_OUT_EMPTY_MASK;
46}
47
48/*
49 * Take the random number generator out of reset and make sure the interrupts
50 * are masked. We shouldn't need to get large amounts of random bytes so just
51 * poll the status register. The hardware generates 32 bits every 320ns so we
52 * shouldn't have to wait long enough to warrant waiting for an IRQ.
53 */
54static void picoxcell_trng_start(void)
55{
56 __raw_writel(0, rng_base + TAI_REG_OFFSET);
57 __raw_writel(0, rng_base + CSR_REG_OFFSET);
58}
59
60static void picoxcell_trng_reset(void)
61{
62 __raw_writel(TRNG_BLOCK_RESET_MASK, rng_base + CSR_REG_OFFSET);
63 __raw_writel(TRNG_BLOCK_RESET_MASK, rng_base + TAI_REG_OFFSET);
64 picoxcell_trng_start();
65}
66
67/*
68 * Get some random data from the random number generator. The hw_random core
69 * layer provides us with locking.
70 */
71static int picoxcell_trng_read(struct hwrng *rng, void *buf, size_t max,
72 bool wait)
73{
74 int i;
75
76 /* Wait for some data to become available. */
77 for (i = 0; i < PICO_TRNG_TIMEOUT && picoxcell_trng_is_empty(); ++i) {
78 if (!wait)
79 return 0;
80
81 udelay(1);
82 }
83
84 if (picoxcell_trng_read_csr() & CSR_FAULT_MASK) {
85 dev_err(rng_dev, "fault detected, resetting TRNG\n");
86 picoxcell_trng_reset();
87 return -EIO;
88 }
89
90 if (i == PICO_TRNG_TIMEOUT)
91 return 0;
92
93 *(u32 *)buf = __raw_readl(rng_base + DATA_REG_OFFSET);
94 return sizeof(u32);
95}
96
97static struct hwrng picoxcell_trng = {
98 .name = "picoxcell",
99 .read = picoxcell_trng_read,
100};
101
102static int picoxcell_trng_probe(struct platform_device *pdev)
103{
104 int ret;
105 struct resource *mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
106
107 if (!mem) {
108 dev_warn(&pdev->dev, "no memory resource\n");
109 return -ENOMEM;
110 }
111
112 if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem),
113 "picoxcell_trng")) {
114 dev_warn(&pdev->dev, "unable to request io mem\n");
115 return -EBUSY;
116 }
117
118 rng_base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
119 if (!rng_base) {
120 dev_warn(&pdev->dev, "unable to remap io mem\n");
121 return -ENOMEM;
122 }
123
124 rng_clk = clk_get(&pdev->dev, NULL);
125 if (IS_ERR(rng_clk)) {
126 dev_warn(&pdev->dev, "no clk\n");
127 return PTR_ERR(rng_clk);
128 }
129
130 ret = clk_enable(rng_clk);
131 if (ret) {
132 dev_warn(&pdev->dev, "unable to enable clk\n");
133 goto err_enable;
134 }
135
136 picoxcell_trng_start();
137 ret = hwrng_register(&picoxcell_trng);
138 if (ret)
139 goto err_register;
140
141 rng_dev = &pdev->dev;
142 dev_info(&pdev->dev, "pixoxcell random number generator active\n");
143
144 return 0;
145
146err_register:
147 clk_disable(rng_clk);
148err_enable:
149 clk_put(rng_clk);
150
151 return ret;
152}
153
154static int __devexit picoxcell_trng_remove(struct platform_device *pdev)
155{
156 hwrng_unregister(&picoxcell_trng);
157 clk_disable(rng_clk);
158 clk_put(rng_clk);
159
160 return 0;
161}
162
163#ifdef CONFIG_PM
164static int picoxcell_trng_suspend(struct device *dev)
165{
166 clk_disable(rng_clk);
167
168 return 0;
169}
170
171static int picoxcell_trng_resume(struct device *dev)
172{
173 return clk_enable(rng_clk);
174}
175
176static const struct dev_pm_ops picoxcell_trng_pm_ops = {
177 .suspend = picoxcell_trng_suspend,
178 .resume = picoxcell_trng_resume,
179};
180#endif /* CONFIG_PM */
181
182static struct platform_driver picoxcell_trng_driver = {
183 .probe = picoxcell_trng_probe,
184 .remove = __devexit_p(picoxcell_trng_remove),
185 .driver = {
186 .name = "picoxcell-trng",
187 .owner = THIS_MODULE,
188#ifdef CONFIG_PM
189 .pm = &picoxcell_trng_pm_ops,
190#endif /* CONFIG_PM */
191 },
192};
193
194static int __init picoxcell_trng_init(void)
195{
196 return platform_driver_register(&picoxcell_trng_driver);
197}
198module_init(picoxcell_trng_init);
199
200static void __exit picoxcell_trng_exit(void)
201{
202 platform_driver_unregister(&picoxcell_trng_driver);
203}
204module_exit(picoxcell_trng_exit);
205
206MODULE_LICENSE("GPL");
207MODULE_AUTHOR("Jamie Iles");
208MODULE_DESCRIPTION("Picochip picoXcell TRNG driver");
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 7855f9f45b8e..62787e30d508 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -900,6 +900,14 @@ static void sender(void *send_info,
900 printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec); 900 printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec);
901#endif 901#endif
902 902
903 /*
904 * last_timeout_jiffies is updated here to avoid
905 * smi_timeout() handler passing very large time_diff
906 * value to smi_event_handler() that causes
907 * the send command to abort.
908 */
909 smi_info->last_timeout_jiffies = jiffies;
910
903 mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES); 911 mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES);
904 912
905 if (smi_info->thread) 913 if (smi_info->thread)
diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index e6d75627c6c8..33dc2298af73 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -53,6 +53,8 @@ MODULE_LICENSE("GPL");
53 53
54#define RTC_BITS 55 /* 55 bits for this implementation */ 54#define RTC_BITS 55 /* 55 bits for this implementation */
55 55
56static struct k_clock sgi_clock;
57
56extern unsigned long sn_rtc_cycles_per_second; 58extern unsigned long sn_rtc_cycles_per_second;
57 59
58#define RTC_COUNTER_ADDR ((long *)LOCAL_MMR_ADDR(SH_RTC)) 60#define RTC_COUNTER_ADDR ((long *)LOCAL_MMR_ADDR(SH_RTC))
@@ -487,7 +489,7 @@ static int sgi_clock_get(clockid_t clockid, struct timespec *tp)
487 return 0; 489 return 0;
488}; 490};
489 491
490static int sgi_clock_set(clockid_t clockid, struct timespec *tp) 492static int sgi_clock_set(const clockid_t clockid, const struct timespec *tp)
491{ 493{
492 494
493 u64 nsec; 495 u64 nsec;
@@ -763,15 +765,21 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
763 return err; 765 return err;
764} 766}
765 767
768static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp)
769{
770 tp->tv_sec = 0;
771 tp->tv_nsec = sgi_clock_period;
772 return 0;
773}
774
766static struct k_clock sgi_clock = { 775static struct k_clock sgi_clock = {
767 .res = 0, 776 .clock_set = sgi_clock_set,
768 .clock_set = sgi_clock_set, 777 .clock_get = sgi_clock_get,
769 .clock_get = sgi_clock_get, 778 .clock_getres = sgi_clock_getres,
770 .timer_create = sgi_timer_create, 779 .timer_create = sgi_timer_create,
771 .nsleep = do_posix_clock_nonanosleep, 780 .timer_set = sgi_timer_set,
772 .timer_set = sgi_timer_set, 781 .timer_del = sgi_timer_del,
773 .timer_del = sgi_timer_del, 782 .timer_get = sgi_timer_get
774 .timer_get = sgi_timer_get
775}; 783};
776 784
777/** 785/**
@@ -831,8 +839,8 @@ static int __init mmtimer_init(void)
831 (unsigned long) node); 839 (unsigned long) node);
832 } 840 }
833 841
834 sgi_clock_period = sgi_clock.res = NSEC_PER_SEC / sn_rtc_cycles_per_second; 842 sgi_clock_period = NSEC_PER_SEC / sn_rtc_cycles_per_second;
835 register_posix_clock(CLOCK_SGI_CYCLE, &sgi_clock); 843 posix_timers_register_clock(CLOCK_SGI_CYCLE, &sgi_clock);
836 844
837 printk(KERN_INFO "%s: v%s, %ld MHz\n", MMTIMER_DESC, MMTIMER_VERSION, 845 printk(KERN_INFO "%s: v%s, %ld MHz\n", MMTIMER_DESC, MMTIMER_VERSION,
838 sn_rtc_cycles_per_second/(unsigned long)1E6); 846 sn_rtc_cycles_per_second/(unsigned long)1E6);
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 777181a2e603..bcbbc71febb7 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -830,8 +830,7 @@ static void monitor_card(unsigned long p)
830 test_bit(IS_ANY_T1, &dev->flags))) { 830 test_bit(IS_ANY_T1, &dev->flags))) {
831 DEBUGP(4, dev, "Perform AUTOPPS\n"); 831 DEBUGP(4, dev, "Perform AUTOPPS\n");
832 set_bit(IS_AUTOPPS_ACT, &dev->flags); 832 set_bit(IS_AUTOPPS_ACT, &dev->flags);
833 ptsreq.protocol = ptsreq.protocol = 833 ptsreq.protocol = (0x01 << dev->proto);
834 (0x01 << dev->proto);
835 ptsreq.flags = 0x01; 834 ptsreq.flags = 0x01;
836 ptsreq.pts1 = 0x00; 835 ptsreq.pts1 = 0x00;
837 ptsreq.pts2 = 0x00; 836 ptsreq.pts2 = 0x00;
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 94b8eb4d691d..444155a305ae 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -78,7 +78,6 @@ static void signalled_reboot_callback(void *callback_data)
78static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data) 78static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
79{ 79{
80 struct ipw_dev *ipw = priv_data; 80 struct ipw_dev *ipw = priv_data;
81 struct resource *io_resource;
82 int ret; 81 int ret;
83 82
84 p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH; 83 p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
@@ -92,9 +91,12 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
92 if (ret) 91 if (ret)
93 return ret; 92 return ret;
94 93
95 io_resource = request_region(p_dev->resource[0]->start, 94 if (!request_region(p_dev->resource[0]->start,
96 resource_size(p_dev->resource[0]), 95 resource_size(p_dev->resource[0]),
97 IPWIRELESS_PCCARD_NAME); 96 IPWIRELESS_PCCARD_NAME)) {
97 ret = -EBUSY;
98 goto exit;
99 }
98 100
99 p_dev->resource[2]->flags |= 101 p_dev->resource[2]->flags |=
100 WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE; 102 WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE;
@@ -105,22 +107,25 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
105 107
106 ret = pcmcia_map_mem_page(p_dev, p_dev->resource[2], p_dev->card_addr); 108 ret = pcmcia_map_mem_page(p_dev, p_dev->resource[2], p_dev->card_addr);
107 if (ret != 0) 109 if (ret != 0)
108 goto exit2; 110 goto exit1;
109 111
110 ipw->is_v2_card = resource_size(p_dev->resource[2]) == 0x100; 112 ipw->is_v2_card = resource_size(p_dev->resource[2]) == 0x100;
111 113
112 ipw->attr_memory = ioremap(p_dev->resource[2]->start, 114 ipw->common_memory = ioremap(p_dev->resource[2]->start,
113 resource_size(p_dev->resource[2])); 115 resource_size(p_dev->resource[2]));
114 request_mem_region(p_dev->resource[2]->start, 116 if (!request_mem_region(p_dev->resource[2]->start,
115 resource_size(p_dev->resource[2]), 117 resource_size(p_dev->resource[2]),
116 IPWIRELESS_PCCARD_NAME); 118 IPWIRELESS_PCCARD_NAME)) {
119 ret = -EBUSY;
120 goto exit2;
121 }
117 122
118 p_dev->resource[3]->flags |= WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM | 123 p_dev->resource[3]->flags |= WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM |
119 WIN_ENABLE; 124 WIN_ENABLE;
120 p_dev->resource[3]->end = 0; /* this used to be 0x1000 */ 125 p_dev->resource[3]->end = 0; /* this used to be 0x1000 */
121 ret = pcmcia_request_window(p_dev, p_dev->resource[3], 0); 126 ret = pcmcia_request_window(p_dev, p_dev->resource[3], 0);
122 if (ret != 0) 127 if (ret != 0)
123 goto exit2; 128 goto exit3;
124 129
125 ret = pcmcia_map_mem_page(p_dev, p_dev->resource[3], 0); 130 ret = pcmcia_map_mem_page(p_dev, p_dev->resource[3], 0);
126 if (ret != 0) 131 if (ret != 0)
@@ -128,23 +133,28 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
128 133
129 ipw->attr_memory = ioremap(p_dev->resource[3]->start, 134 ipw->attr_memory = ioremap(p_dev->resource[3]->start,
130 resource_size(p_dev->resource[3])); 135 resource_size(p_dev->resource[3]));
131 request_mem_region(p_dev->resource[3]->start, 136 if (!request_mem_region(p_dev->resource[3]->start,
132 resource_size(p_dev->resource[3]), 137 resource_size(p_dev->resource[3]),
133 IPWIRELESS_PCCARD_NAME); 138 IPWIRELESS_PCCARD_NAME)) {
139 ret = -EBUSY;
140 goto exit4;
141 }
134 142
135 return 0; 143 return 0;
136 144
145exit4:
146 iounmap(ipw->attr_memory);
137exit3: 147exit3:
148 release_mem_region(p_dev->resource[2]->start,
149 resource_size(p_dev->resource[2]));
138exit2: 150exit2:
139 if (ipw->common_memory) { 151 iounmap(ipw->common_memory);
140 release_mem_region(p_dev->resource[2]->start,
141 resource_size(p_dev->resource[2]));
142 iounmap(ipw->common_memory);
143 }
144exit1: 152exit1:
145 release_resource(io_resource); 153 release_region(p_dev->resource[0]->start,
154 resource_size(p_dev->resource[0]));
155exit:
146 pcmcia_disable_device(p_dev); 156 pcmcia_disable_device(p_dev);
147 return -1; 157 return ret;
148} 158}
149 159
150static int config_ipwireless(struct ipw_dev *ipw) 160static int config_ipwireless(struct ipw_dev *ipw)
@@ -219,6 +229,8 @@ exit:
219 229
220static void release_ipwireless(struct ipw_dev *ipw) 230static void release_ipwireless(struct ipw_dev *ipw)
221{ 231{
232 release_region(ipw->link->resource[0]->start,
233 resource_size(ipw->link->resource[0]));
222 if (ipw->common_memory) { 234 if (ipw->common_memory) {
223 release_mem_region(ipw->link->resource[2]->start, 235 release_mem_region(ipw->link->resource[2]->start,
224 resource_size(ipw->link->resource[2])); 236 resource_size(ipw->link->resource[2]));
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 72a4fcb17745..5e29e8031bbc 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -128,6 +128,7 @@
128 * void add_input_randomness(unsigned int type, unsigned int code, 128 * void add_input_randomness(unsigned int type, unsigned int code,
129 * unsigned int value); 129 * unsigned int value);
130 * void add_interrupt_randomness(int irq); 130 * void add_interrupt_randomness(int irq);
131 * void add_disk_randomness(struct gendisk *disk);
131 * 132 *
132 * add_input_randomness() uses the input layer interrupt timing, as well as 133 * add_input_randomness() uses the input layer interrupt timing, as well as
133 * the event type information from the hardware. 134 * the event type information from the hardware.
@@ -136,9 +137,15 @@
136 * inputs to the entropy pool. Note that not all interrupts are good 137 * inputs to the entropy pool. Note that not all interrupts are good
137 * sources of randomness! For example, the timer interrupts is not a 138 * sources of randomness! For example, the timer interrupts is not a
138 * good choice, because the periodicity of the interrupts is too 139 * good choice, because the periodicity of the interrupts is too
139 * regular, and hence predictable to an attacker. Disk interrupts are 140 * regular, and hence predictable to an attacker. Network Interface
140 * a better measure, since the timing of the disk interrupts are more 141 * Controller interrupts are a better measure, since the timing of the
141 * unpredictable. 142 * NIC interrupts are more unpredictable.
143 *
144 * add_disk_randomness() uses what amounts to the seek time of block
145 * layer request events, on a per-disk_devt basis, as input to the
146 * entropy pool. Note that high-speed solid state drives with very low
147 * seek times do not make for good sources of entropy, as their seek
148 * times are usually fairly consistent.
142 * 149 *
143 * All of these routines try to estimate how many bits of randomness a 150 * All of these routines try to estimate how many bits of randomness a
144 * particular randomness source. They do this by keeping track of the 151 * particular randomness source. They do this by keeping track of the
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index faf5a2c65926..1f46f1cd9225 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -364,14 +364,12 @@ unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip,
364 tpm_protected_ordinal_duration[ordinal & 364 tpm_protected_ordinal_duration[ordinal &
365 TPM_PROTECTED_ORDINAL_MASK]; 365 TPM_PROTECTED_ORDINAL_MASK];
366 366
367 if (duration_idx != TPM_UNDEFINED) { 367 if (duration_idx != TPM_UNDEFINED)
368 duration = chip->vendor.duration[duration_idx]; 368 duration = chip->vendor.duration[duration_idx];
369 /* if duration is 0, it's because chip->vendor.duration wasn't */ 369 if (duration <= 0)
370 /* filled yet, so we set the lowest timeout just to give enough */
371 /* time for tpm_get_timeouts() to succeed */
372 return (duration <= 0 ? HZ : duration);
373 } else
374 return 2 * 60 * HZ; 370 return 2 * 60 * HZ;
371 else
372 return duration;
375} 373}
376EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration); 374EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration);
377 375
@@ -577,11 +575,9 @@ duration:
577 if (rc) 575 if (rc)
578 return; 576 return;
579 577
580 if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 || 578 if (be32_to_cpu(tpm_cmd.header.out.return_code)
581 be32_to_cpu(tpm_cmd.header.out.length) 579 != 3 * sizeof(u32))
582 != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32))
583 return; 580 return;
584
585 duration_cap = &tpm_cmd.params.getcap_out.cap.duration; 581 duration_cap = &tpm_cmd.params.getcap_out.cap.duration;
586 chip->vendor.duration[TPM_SHORT] = 582 chip->vendor.duration[TPM_SHORT] =
587 usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short)); 583 usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short));
@@ -941,18 +937,6 @@ ssize_t tpm_show_caps_1_2(struct device * dev,
941} 937}
942EXPORT_SYMBOL_GPL(tpm_show_caps_1_2); 938EXPORT_SYMBOL_GPL(tpm_show_caps_1_2);
943 939
944ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr,
945 char *buf)
946{
947 struct tpm_chip *chip = dev_get_drvdata(dev);
948
949 return sprintf(buf, "%d %d %d\n",
950 jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]),
951 jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]),
952 jiffies_to_usecs(chip->vendor.duration[TPM_LONG]));
953}
954EXPORT_SYMBOL_GPL(tpm_show_timeouts);
955
956ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr, 940ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
957 const char *buf, size_t count) 941 const char *buf, size_t count)
958{ 942{
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index d84ff772c26f..72ddb031b69a 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -56,8 +56,6 @@ extern ssize_t tpm_show_owned(struct device *, struct device_attribute *attr,
56 char *); 56 char *);
57extern ssize_t tpm_show_temp_deactivated(struct device *, 57extern ssize_t tpm_show_temp_deactivated(struct device *,
58 struct device_attribute *attr, char *); 58 struct device_attribute *attr, char *);
59extern ssize_t tpm_show_timeouts(struct device *,
60 struct device_attribute *attr, char *);
61 59
62struct tpm_chip; 60struct tpm_chip;
63 61
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 0d1d38e5f266..dd21df55689d 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -376,7 +376,6 @@ static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
376 NULL); 376 NULL);
377static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); 377static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
378static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); 378static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
379static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
380 379
381static struct attribute *tis_attrs[] = { 380static struct attribute *tis_attrs[] = {
382 &dev_attr_pubek.attr, 381 &dev_attr_pubek.attr,
@@ -386,8 +385,7 @@ static struct attribute *tis_attrs[] = {
386 &dev_attr_owned.attr, 385 &dev_attr_owned.attr,
387 &dev_attr_temp_deactivated.attr, 386 &dev_attr_temp_deactivated.attr,
388 &dev_attr_caps.attr, 387 &dev_attr_caps.attr,
389 &dev_attr_cancel.attr, 388 &dev_attr_cancel.attr, NULL,
390 &dev_attr_timeouts.attr, NULL,
391}; 389};
392 390
393static struct attribute_group tis_attr_grp = { 391static struct attribute_group tis_attr_grp = {
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 490393186338..84b164d1eb2b 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -388,6 +388,10 @@ static void discard_port_data(struct port *port)
388 unsigned int len; 388 unsigned int len;
389 int ret; 389 int ret;
390 390
391 if (!port->portdev) {
392 /* Device has been unplugged. vqs are already gone. */
393 return;
394 }
391 vq = port->in_vq; 395 vq = port->in_vq;
392 if (port->inbuf) 396 if (port->inbuf)
393 buf = port->inbuf; 397 buf = port->inbuf;
@@ -470,6 +474,10 @@ static void reclaim_consumed_buffers(struct port *port)
470 void *buf; 474 void *buf;
471 unsigned int len; 475 unsigned int len;
472 476
477 if (!port->portdev) {
478 /* Device has been unplugged. vqs are already gone. */
479 return;
480 }
473 while ((buf = virtqueue_get_buf(port->out_vq, &len))) { 481 while ((buf = virtqueue_get_buf(port->out_vq, &len))) {
474 kfree(buf); 482 kfree(buf);
475 port->outvq_full = false; 483 port->outvq_full = false;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 1109f6848a43..5cb4d09919d6 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1919,8 +1919,10 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1919 1919
1920 ret = sysdev_driver_register(&cpu_sysdev_class, 1920 ret = sysdev_driver_register(&cpu_sysdev_class,
1921 &cpufreq_sysdev_driver); 1921 &cpufreq_sysdev_driver);
1922 if (ret)
1923 goto err_null_driver;
1922 1924
1923 if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) { 1925 if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1924 int i; 1926 int i;
1925 ret = -ENODEV; 1927 ret = -ENODEV;
1926 1928
@@ -1935,21 +1937,22 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1935 if (ret) { 1937 if (ret) {
1936 dprintk("no CPU initialized for driver %s\n", 1938 dprintk("no CPU initialized for driver %s\n",
1937 driver_data->name); 1939 driver_data->name);
1938 sysdev_driver_unregister(&cpu_sysdev_class, 1940 goto err_sysdev_unreg;
1939 &cpufreq_sysdev_driver);
1940
1941 spin_lock_irqsave(&cpufreq_driver_lock, flags);
1942 cpufreq_driver = NULL;
1943 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1944 } 1941 }
1945 } 1942 }
1946 1943
1947 if (!ret) { 1944 register_hotcpu_notifier(&cpufreq_cpu_notifier);
1948 register_hotcpu_notifier(&cpufreq_cpu_notifier); 1945 dprintk("driver %s up and running\n", driver_data->name);
1949 dprintk("driver %s up and running\n", driver_data->name); 1946 cpufreq_debug_enable_ratelimit();
1950 cpufreq_debug_enable_ratelimit();
1951 }
1952 1947
1948 return 0;
1949err_sysdev_unreg:
1950 sysdev_driver_unregister(&cpu_sysdev_class,
1951 &cpufreq_sysdev_driver);
1952err_null_driver:
1953 spin_lock_irqsave(&cpufreq_driver_lock, flags);
1954 cpufreq_driver = NULL;
1955 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1953 return ret; 1956 return ret;
1954} 1957}
1955EXPORT_SYMBOL_GPL(cpufreq_register_driver); 1958EXPORT_SYMBOL_GPL(cpufreq_register_driver);
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 526bfbf69611..94284c8473b1 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -81,8 +81,6 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */
81 */ 81 */
82static DEFINE_MUTEX(dbs_mutex); 82static DEFINE_MUTEX(dbs_mutex);
83 83
84static struct workqueue_struct *kconservative_wq;
85
86static struct dbs_tuners { 84static struct dbs_tuners {
87 unsigned int sampling_rate; 85 unsigned int sampling_rate;
88 unsigned int sampling_down_factor; 86 unsigned int sampling_down_factor;
@@ -560,7 +558,7 @@ static void do_dbs_timer(struct work_struct *work)
560 558
561 dbs_check_cpu(dbs_info); 559 dbs_check_cpu(dbs_info);
562 560
563 queue_delayed_work_on(cpu, kconservative_wq, &dbs_info->work, delay); 561 schedule_delayed_work_on(cpu, &dbs_info->work, delay);
564 mutex_unlock(&dbs_info->timer_mutex); 562 mutex_unlock(&dbs_info->timer_mutex);
565} 563}
566 564
@@ -572,8 +570,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
572 570
573 dbs_info->enable = 1; 571 dbs_info->enable = 1;
574 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 572 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
575 queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work, 573 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
576 delay);
577} 574}
578 575
579static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 576static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
@@ -716,25 +713,12 @@ struct cpufreq_governor cpufreq_gov_conservative = {
716 713
717static int __init cpufreq_gov_dbs_init(void) 714static int __init cpufreq_gov_dbs_init(void)
718{ 715{
719 int err; 716 return cpufreq_register_governor(&cpufreq_gov_conservative);
720
721 kconservative_wq = create_workqueue("kconservative");
722 if (!kconservative_wq) {
723 printk(KERN_ERR "Creation of kconservative failed\n");
724 return -EFAULT;
725 }
726
727 err = cpufreq_register_governor(&cpufreq_gov_conservative);
728 if (err)
729 destroy_workqueue(kconservative_wq);
730
731 return err;
732} 717}
733 718
734static void __exit cpufreq_gov_dbs_exit(void) 719static void __exit cpufreq_gov_dbs_exit(void)
735{ 720{
736 cpufreq_unregister_governor(&cpufreq_gov_conservative); 721 cpufreq_unregister_governor(&cpufreq_gov_conservative);
737 destroy_workqueue(kconservative_wq);
738} 722}
739 723
740 724
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index c631f27a3dcc..58aa85ea5ec6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -104,8 +104,6 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */
104 */ 104 */
105static DEFINE_MUTEX(dbs_mutex); 105static DEFINE_MUTEX(dbs_mutex);
106 106
107static struct workqueue_struct *kondemand_wq;
108
109static struct dbs_tuners { 107static struct dbs_tuners {
110 unsigned int sampling_rate; 108 unsigned int sampling_rate;
111 unsigned int up_threshold; 109 unsigned int up_threshold;
@@ -667,7 +665,7 @@ static void do_dbs_timer(struct work_struct *work)
667 __cpufreq_driver_target(dbs_info->cur_policy, 665 __cpufreq_driver_target(dbs_info->cur_policy,
668 dbs_info->freq_lo, CPUFREQ_RELATION_H); 666 dbs_info->freq_lo, CPUFREQ_RELATION_H);
669 } 667 }
670 queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); 668 schedule_delayed_work_on(cpu, &dbs_info->work, delay);
671 mutex_unlock(&dbs_info->timer_mutex); 669 mutex_unlock(&dbs_info->timer_mutex);
672} 670}
673 671
@@ -681,8 +679,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
681 679
682 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 680 dbs_info->sample_type = DBS_NORMAL_SAMPLE;
683 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 681 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
684 queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work, 682 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
685 delay);
686} 683}
687 684
688static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 685static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
@@ -814,7 +811,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
814 811
815static int __init cpufreq_gov_dbs_init(void) 812static int __init cpufreq_gov_dbs_init(void)
816{ 813{
817 int err;
818 cputime64_t wall; 814 cputime64_t wall;
819 u64 idle_time; 815 u64 idle_time;
820 int cpu = get_cpu(); 816 int cpu = get_cpu();
@@ -838,22 +834,12 @@ static int __init cpufreq_gov_dbs_init(void)
838 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 834 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
839 } 835 }
840 836
841 kondemand_wq = create_workqueue("kondemand"); 837 return cpufreq_register_governor(&cpufreq_gov_ondemand);
842 if (!kondemand_wq) {
843 printk(KERN_ERR "Creation of kondemand failed\n");
844 return -EFAULT;
845 }
846 err = cpufreq_register_governor(&cpufreq_gov_ondemand);
847 if (err)
848 destroy_workqueue(kondemand_wq);
849
850 return err;
851} 838}
852 839
853static void __exit cpufreq_gov_dbs_exit(void) 840static void __exit cpufreq_gov_dbs_exit(void)
854{ 841{
855 cpufreq_unregister_governor(&cpufreq_gov_ondemand); 842 cpufreq_unregister_governor(&cpufreq_gov_ondemand);
856 destroy_workqueue(kondemand_wq);
857} 843}
858 844
859 845
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index eab2cf7a0269..e54185223c8c 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -252,4 +252,21 @@ config CRYPTO_DEV_OMAP_AES
252 OMAP processors have AES module accelerator. Select this if you 252 OMAP processors have AES module accelerator. Select this if you
253 want to use the OMAP module for AES algorithms. 253 want to use the OMAP module for AES algorithms.
254 254
255config CRYPTO_DEV_PICOXCELL
256 tristate "Support for picoXcell IPSEC and Layer2 crypto engines"
257 depends on ARCH_PICOXCELL
258 select CRYPTO_AES
259 select CRYPTO_AUTHENC
260 select CRYPTO_ALGAPI
261 select CRYPTO_DES
262 select CRYPTO_CBC
263 select CRYPTO_ECB
264 select CRYPTO_SEQIV
265 help
266 This option enables support for the hardware offload engines in the
267 Picochip picoXcell SoC devices. Select this for IPSEC ESP offload
268 and for 3gpp Layer 2 ciphering support.
269
270 Saying m here will build a module named pipcoxcell_crypto.
271
255endif # CRYPTO_HW 272endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 256697330a41..5203e34248d7 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
10obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ 10obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
11obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o 11obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
12obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o 12obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
13 13obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index add2a1a72ba4..5b970d9e9956 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -839,9 +839,9 @@ static int omap_aes_probe(struct platform_device *pdev)
839 839
840 /* Initializing the clock */ 840 /* Initializing the clock */
841 dd->iclk = clk_get(dev, "ick"); 841 dd->iclk = clk_get(dev, "ick");
842 if (!dd->iclk) { 842 if (IS_ERR(dd->iclk)) {
843 dev_err(dev, "clock intialization failed.\n"); 843 dev_err(dev, "clock intialization failed.\n");
844 err = -ENODEV; 844 err = PTR_ERR(dd->iclk);
845 goto err_res; 845 goto err_res;
846 } 846 }
847 847
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 2e71123516e0..465cde3e4f60 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -1206,9 +1206,9 @@ static int __devinit omap_sham_probe(struct platform_device *pdev)
1206 1206
1207 /* Initializing the clock */ 1207 /* Initializing the clock */
1208 dd->iclk = clk_get(dev, "ick"); 1208 dd->iclk = clk_get(dev, "ick");
1209 if (!dd->iclk) { 1209 if (IS_ERR(dd->iclk)) {
1210 dev_err(dev, "clock intialization failed.\n"); 1210 dev_err(dev, "clock intialization failed.\n");
1211 err = -ENODEV; 1211 err = PTR_ERR(dd->iclk);
1212 goto clk_err; 1212 goto clk_err;
1213 } 1213 }
1214 1214
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
new file mode 100644
index 000000000000..b092d0a65837
--- /dev/null
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -0,0 +1,1867 @@
1/*
2 * Copyright (c) 2010-2011 Picochip Ltd., Jamie Iles
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18#include <crypto/aead.h>
19#include <crypto/aes.h>
20#include <crypto/algapi.h>
21#include <crypto/authenc.h>
22#include <crypto/des.h>
23#include <crypto/md5.h>
24#include <crypto/sha.h>
25#include <crypto/internal/skcipher.h>
26#include <linux/clk.h>
27#include <linux/crypto.h>
28#include <linux/delay.h>
29#include <linux/dma-mapping.h>
30#include <linux/dmapool.h>
31#include <linux/err.h>
32#include <linux/init.h>
33#include <linux/interrupt.h>
34#include <linux/io.h>
35#include <linux/list.h>
36#include <linux/module.h>
37#include <linux/platform_device.h>
38#include <linux/pm.h>
39#include <linux/rtnetlink.h>
40#include <linux/scatterlist.h>
41#include <linux/sched.h>
42#include <linux/slab.h>
43#include <linux/timer.h>
44
45#include "picoxcell_crypto_regs.h"
46
47/*
48 * The threshold for the number of entries in the CMD FIFO available before
49 * the CMD0_CNT interrupt is raised. Increasing this value will reduce the
50 * number of interrupts raised to the CPU.
51 */
52#define CMD0_IRQ_THRESHOLD 1
53
54/*
55 * The timeout period (in jiffies) for a PDU. When the the number of PDUs in
56 * flight is greater than the STAT_IRQ_THRESHOLD or 0 the timer is disabled.
57 * When there are packets in flight but lower than the threshold, we enable
58 * the timer and at expiry, attempt to remove any processed packets from the
59 * queue and if there are still packets left, schedule the timer again.
60 */
61#define PACKET_TIMEOUT 1
62
63/* The priority to register each algorithm with. */
64#define SPACC_CRYPTO_ALG_PRIORITY 10000
65
66#define SPACC_CRYPTO_KASUMI_F8_KEY_LEN 16
67#define SPACC_CRYPTO_IPSEC_CIPHER_PG_SZ 64
68#define SPACC_CRYPTO_IPSEC_HASH_PG_SZ 64
69#define SPACC_CRYPTO_IPSEC_MAX_CTXS 32
70#define SPACC_CRYPTO_IPSEC_FIFO_SZ 32
71#define SPACC_CRYPTO_L2_CIPHER_PG_SZ 64
72#define SPACC_CRYPTO_L2_HASH_PG_SZ 64
73#define SPACC_CRYPTO_L2_MAX_CTXS 128
74#define SPACC_CRYPTO_L2_FIFO_SZ 128
75
76#define MAX_DDT_LEN 16
77
78/* DDT format. This must match the hardware DDT format exactly. */
79struct spacc_ddt {
80 dma_addr_t p;
81 u32 len;
82};
83
84/*
85 * Asynchronous crypto request structure.
86 *
87 * This structure defines a request that is either queued for processing or
88 * being processed.
89 */
90struct spacc_req {
91 struct list_head list;
92 struct spacc_engine *engine;
93 struct crypto_async_request *req;
94 int result;
95 bool is_encrypt;
96 unsigned ctx_id;
97 dma_addr_t src_addr, dst_addr;
98 struct spacc_ddt *src_ddt, *dst_ddt;
99 void (*complete)(struct spacc_req *req);
100
101 /* AEAD specific bits. */
102 u8 *giv;
103 size_t giv_len;
104 dma_addr_t giv_pa;
105};
106
107struct spacc_engine {
108 void __iomem *regs;
109 struct list_head pending;
110 int next_ctx;
111 spinlock_t hw_lock;
112 int in_flight;
113 struct list_head completed;
114 struct list_head in_progress;
115 struct tasklet_struct complete;
116 unsigned long fifo_sz;
117 void __iomem *cipher_ctx_base;
118 void __iomem *hash_key_base;
119 struct spacc_alg *algs;
120 unsigned num_algs;
121 struct list_head registered_algs;
122 size_t cipher_pg_sz;
123 size_t hash_pg_sz;
124 const char *name;
125 struct clk *clk;
126 struct device *dev;
127 unsigned max_ctxs;
128 struct timer_list packet_timeout;
129 unsigned stat_irq_thresh;
130 struct dma_pool *req_pool;
131};
132
133/* Algorithm type mask. */
134#define SPACC_CRYPTO_ALG_MASK 0x7
135
136/* SPACC definition of a crypto algorithm. */
137struct spacc_alg {
138 unsigned long ctrl_default;
139 unsigned long type;
140 struct crypto_alg alg;
141 struct spacc_engine *engine;
142 struct list_head entry;
143 int key_offs;
144 int iv_offs;
145};
146
147/* Generic context structure for any algorithm type. */
148struct spacc_generic_ctx {
149 struct spacc_engine *engine;
150 int flags;
151 int key_offs;
152 int iv_offs;
153};
154
155/* Block cipher context. */
156struct spacc_ablk_ctx {
157 struct spacc_generic_ctx generic;
158 u8 key[AES_MAX_KEY_SIZE];
159 u8 key_len;
160 /*
161 * The fallback cipher. If the operation can't be done in hardware,
162 * fallback to a software version.
163 */
164 struct crypto_ablkcipher *sw_cipher;
165};
166
167/* AEAD cipher context. */
168struct spacc_aead_ctx {
169 struct spacc_generic_ctx generic;
170 u8 cipher_key[AES_MAX_KEY_SIZE];
171 u8 hash_ctx[SPACC_CRYPTO_IPSEC_HASH_PG_SZ];
172 u8 cipher_key_len;
173 u8 hash_key_len;
174 struct crypto_aead *sw_cipher;
175 size_t auth_size;
176 u8 salt[AES_BLOCK_SIZE];
177};
178
179static inline struct spacc_alg *to_spacc_alg(struct crypto_alg *alg)
180{
181 return alg ? container_of(alg, struct spacc_alg, alg) : NULL;
182}
183
184static inline int spacc_fifo_cmd_full(struct spacc_engine *engine)
185{
186 u32 fifo_stat = readl(engine->regs + SPA_FIFO_STAT_REG_OFFSET);
187
188 return fifo_stat & SPA_FIFO_CMD_FULL;
189}
190
191/*
192 * Given a cipher context, and a context number, get the base address of the
193 * context page.
194 *
195 * Returns the address of the context page where the key/context may
196 * be written.
197 */
198static inline void __iomem *spacc_ctx_page_addr(struct spacc_generic_ctx *ctx,
199 unsigned indx,
200 bool is_cipher_ctx)
201{
202 return is_cipher_ctx ? ctx->engine->cipher_ctx_base +
203 (indx * ctx->engine->cipher_pg_sz) :
204 ctx->engine->hash_key_base + (indx * ctx->engine->hash_pg_sz);
205}
206
207/* The context pages can only be written with 32-bit accesses. */
208static inline void memcpy_toio32(u32 __iomem *dst, const void *src,
209 unsigned count)
210{
211 const u32 *src32 = (const u32 *) src;
212
213 while (count--)
214 writel(*src32++, dst++);
215}
216
217static void spacc_cipher_write_ctx(struct spacc_generic_ctx *ctx,
218 void __iomem *page_addr, const u8 *key,
219 size_t key_len, const u8 *iv, size_t iv_len)
220{
221 void __iomem *key_ptr = page_addr + ctx->key_offs;
222 void __iomem *iv_ptr = page_addr + ctx->iv_offs;
223
224 memcpy_toio32(key_ptr, key, key_len / 4);
225 memcpy_toio32(iv_ptr, iv, iv_len / 4);
226}
227
228/*
229 * Load a context into the engines context memory.
230 *
231 * Returns the index of the context page where the context was loaded.
232 */
233static unsigned spacc_load_ctx(struct spacc_generic_ctx *ctx,
234 const u8 *ciph_key, size_t ciph_len,
235 const u8 *iv, size_t ivlen, const u8 *hash_key,
236 size_t hash_len)
237{
238 unsigned indx = ctx->engine->next_ctx++;
239 void __iomem *ciph_page_addr, *hash_page_addr;
240
241 ciph_page_addr = spacc_ctx_page_addr(ctx, indx, 1);
242 hash_page_addr = spacc_ctx_page_addr(ctx, indx, 0);
243
244 ctx->engine->next_ctx &= ctx->engine->fifo_sz - 1;
245 spacc_cipher_write_ctx(ctx, ciph_page_addr, ciph_key, ciph_len, iv,
246 ivlen);
247 writel(ciph_len | (indx << SPA_KEY_SZ_CTX_INDEX_OFFSET) |
248 (1 << SPA_KEY_SZ_CIPHER_OFFSET),
249 ctx->engine->regs + SPA_KEY_SZ_REG_OFFSET);
250
251 if (hash_key) {
252 memcpy_toio32(hash_page_addr, hash_key, hash_len / 4);
253 writel(hash_len | (indx << SPA_KEY_SZ_CTX_INDEX_OFFSET),
254 ctx->engine->regs + SPA_KEY_SZ_REG_OFFSET);
255 }
256
257 return indx;
258}
259
260/* Count the number of scatterlist entries in a scatterlist. */
261static int sg_count(struct scatterlist *sg_list, int nbytes)
262{
263 struct scatterlist *sg = sg_list;
264 int sg_nents = 0;
265
266 while (nbytes > 0) {
267 ++sg_nents;
268 nbytes -= sg->length;
269 sg = sg_next(sg);
270 }
271
272 return sg_nents;
273}
274
275static inline void ddt_set(struct spacc_ddt *ddt, dma_addr_t phys, size_t len)
276{
277 ddt->p = phys;
278 ddt->len = len;
279}
280
281/*
282 * Take a crypto request and scatterlists for the data and turn them into DDTs
283 * for passing to the crypto engines. This also DMA maps the data so that the
284 * crypto engines can DMA to/from them.
285 */
286static struct spacc_ddt *spacc_sg_to_ddt(struct spacc_engine *engine,
287 struct scatterlist *payload,
288 unsigned nbytes,
289 enum dma_data_direction dir,
290 dma_addr_t *ddt_phys)
291{
292 unsigned nents, mapped_ents;
293 struct scatterlist *cur;
294 struct spacc_ddt *ddt;
295 int i;
296
297 nents = sg_count(payload, nbytes);
298 mapped_ents = dma_map_sg(engine->dev, payload, nents, dir);
299
300 if (mapped_ents + 1 > MAX_DDT_LEN)
301 goto out;
302
303 ddt = dma_pool_alloc(engine->req_pool, GFP_ATOMIC, ddt_phys);
304 if (!ddt)
305 goto out;
306
307 for_each_sg(payload, cur, mapped_ents, i)
308 ddt_set(&ddt[i], sg_dma_address(cur), sg_dma_len(cur));
309 ddt_set(&ddt[mapped_ents], 0, 0);
310
311 return ddt;
312
313out:
314 dma_unmap_sg(engine->dev, payload, nents, dir);
315 return NULL;
316}
317
318static int spacc_aead_make_ddts(struct spacc_req *req, u8 *giv)
319{
320 struct aead_request *areq = container_of(req->req, struct aead_request,
321 base);
322 struct spacc_engine *engine = req->engine;
323 struct spacc_ddt *src_ddt, *dst_ddt;
324 unsigned ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(areq));
325 unsigned nents = sg_count(areq->src, areq->cryptlen);
326 dma_addr_t iv_addr;
327 struct scatterlist *cur;
328 int i, dst_ents, src_ents, assoc_ents;
329 u8 *iv = giv ? giv : areq->iv;
330
331 src_ddt = dma_pool_alloc(engine->req_pool, GFP_ATOMIC, &req->src_addr);
332 if (!src_ddt)
333 return -ENOMEM;
334
335 dst_ddt = dma_pool_alloc(engine->req_pool, GFP_ATOMIC, &req->dst_addr);
336 if (!dst_ddt) {
337 dma_pool_free(engine->req_pool, src_ddt, req->src_addr);
338 return -ENOMEM;
339 }
340
341 req->src_ddt = src_ddt;
342 req->dst_ddt = dst_ddt;
343
344 assoc_ents = dma_map_sg(engine->dev, areq->assoc,
345 sg_count(areq->assoc, areq->assoclen), DMA_TO_DEVICE);
346 if (areq->src != areq->dst) {
347 src_ents = dma_map_sg(engine->dev, areq->src, nents,
348 DMA_TO_DEVICE);
349 dst_ents = dma_map_sg(engine->dev, areq->dst, nents,
350 DMA_FROM_DEVICE);
351 } else {
352 src_ents = dma_map_sg(engine->dev, areq->src, nents,
353 DMA_BIDIRECTIONAL);
354 dst_ents = 0;
355 }
356
357 /*
358 * Map the IV/GIV. For the GIV it needs to be bidirectional as it is
359 * formed by the crypto block and sent as the ESP IV for IPSEC.
360 */
361 iv_addr = dma_map_single(engine->dev, iv, ivsize,
362 giv ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
363 req->giv_pa = iv_addr;
364
365 /*
366 * Map the associated data. For decryption we don't copy the
367 * associated data.
368 */
369 for_each_sg(areq->assoc, cur, assoc_ents, i) {
370 ddt_set(src_ddt++, sg_dma_address(cur), sg_dma_len(cur));
371 if (req->is_encrypt)
372 ddt_set(dst_ddt++, sg_dma_address(cur),
373 sg_dma_len(cur));
374 }
375 ddt_set(src_ddt++, iv_addr, ivsize);
376
377 if (giv || req->is_encrypt)
378 ddt_set(dst_ddt++, iv_addr, ivsize);
379
380 /*
381 * Now map in the payload for the source and destination and terminate
382 * with the NULL pointers.
383 */
384 for_each_sg(areq->src, cur, src_ents, i) {
385 ddt_set(src_ddt++, sg_dma_address(cur), sg_dma_len(cur));
386 if (areq->src == areq->dst)
387 ddt_set(dst_ddt++, sg_dma_address(cur),
388 sg_dma_len(cur));
389 }
390
391 for_each_sg(areq->dst, cur, dst_ents, i)
392 ddt_set(dst_ddt++, sg_dma_address(cur),
393 sg_dma_len(cur));
394
395 ddt_set(src_ddt, 0, 0);
396 ddt_set(dst_ddt, 0, 0);
397
398 return 0;
399}
400
401static void spacc_aead_free_ddts(struct spacc_req *req)
402{
403 struct aead_request *areq = container_of(req->req, struct aead_request,
404 base);
405 struct spacc_alg *alg = to_spacc_alg(req->req->tfm->__crt_alg);
406 struct spacc_ablk_ctx *aead_ctx = crypto_tfm_ctx(req->req->tfm);
407 struct spacc_engine *engine = aead_ctx->generic.engine;
408 unsigned ivsize = alg->alg.cra_aead.ivsize;
409 unsigned nents = sg_count(areq->src, areq->cryptlen);
410
411 if (areq->src != areq->dst) {
412 dma_unmap_sg(engine->dev, areq->src, nents, DMA_TO_DEVICE);
413 dma_unmap_sg(engine->dev, areq->dst,
414 sg_count(areq->dst, areq->cryptlen),
415 DMA_FROM_DEVICE);
416 } else
417 dma_unmap_sg(engine->dev, areq->src, nents, DMA_BIDIRECTIONAL);
418
419 dma_unmap_sg(engine->dev, areq->assoc,
420 sg_count(areq->assoc, areq->assoclen), DMA_TO_DEVICE);
421
422 dma_unmap_single(engine->dev, req->giv_pa, ivsize, DMA_BIDIRECTIONAL);
423
424 dma_pool_free(engine->req_pool, req->src_ddt, req->src_addr);
425 dma_pool_free(engine->req_pool, req->dst_ddt, req->dst_addr);
426}
427
428static void spacc_free_ddt(struct spacc_req *req, struct spacc_ddt *ddt,
429 dma_addr_t ddt_addr, struct scatterlist *payload,
430 unsigned nbytes, enum dma_data_direction dir)
431{
432 unsigned nents = sg_count(payload, nbytes);
433
434 dma_unmap_sg(req->engine->dev, payload, nents, dir);
435 dma_pool_free(req->engine->req_pool, ddt, ddt_addr);
436}
437
438/*
439 * Set key for a DES operation in an AEAD cipher. This also performs weak key
440 * checking if required.
441 */
442static int spacc_aead_des_setkey(struct crypto_aead *aead, const u8 *key,
443 unsigned int len)
444{
445 struct crypto_tfm *tfm = crypto_aead_tfm(aead);
446 struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
447 u32 tmp[DES_EXPKEY_WORDS];
448
449 if (unlikely(!des_ekey(tmp, key)) &&
450 (crypto_aead_get_flags(aead)) & CRYPTO_TFM_REQ_WEAK_KEY) {
451 tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
452 return -EINVAL;
453 }
454
455 memcpy(ctx->cipher_key, key, len);
456 ctx->cipher_key_len = len;
457
458 return 0;
459}
460
461/* Set the key for the AES block cipher component of the AEAD transform. */
462static int spacc_aead_aes_setkey(struct crypto_aead *aead, const u8 *key,
463 unsigned int len)
464{
465 struct crypto_tfm *tfm = crypto_aead_tfm(aead);
466 struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
467
468 /*
469 * IPSec engine only supports 128 and 256 bit AES keys. If we get a
470 * request for any other size (192 bits) then we need to do a software
471 * fallback.
472 */
473 if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256) {
474 /*
475 * Set the fallback transform to use the same request flags as
476 * the hardware transform.
477 */
478 ctx->sw_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
479 ctx->sw_cipher->base.crt_flags |=
480 tfm->crt_flags & CRYPTO_TFM_REQ_MASK;
481 return crypto_aead_setkey(ctx->sw_cipher, key, len);
482 }
483
484 memcpy(ctx->cipher_key, key, len);
485 ctx->cipher_key_len = len;
486
487 return 0;
488}
489
490static int spacc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
491 unsigned int keylen)
492{
493 struct spacc_aead_ctx *ctx = crypto_aead_ctx(tfm);
494 struct spacc_alg *alg = to_spacc_alg(tfm->base.__crt_alg);
495 struct rtattr *rta = (void *)key;
496 struct crypto_authenc_key_param *param;
497 unsigned int authkeylen, enckeylen;
498 int err = -EINVAL;
499
500 if (!RTA_OK(rta, keylen))
501 goto badkey;
502
503 if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM)
504 goto badkey;
505
506 if (RTA_PAYLOAD(rta) < sizeof(*param))
507 goto badkey;
508
509 param = RTA_DATA(rta);
510 enckeylen = be32_to_cpu(param->enckeylen);
511
512 key += RTA_ALIGN(rta->rta_len);
513 keylen -= RTA_ALIGN(rta->rta_len);
514
515 if (keylen < enckeylen)
516 goto badkey;
517
518 authkeylen = keylen - enckeylen;
519
520 if (enckeylen > AES_MAX_KEY_SIZE)
521 goto badkey;
522
523 if ((alg->ctrl_default & SPACC_CRYPTO_ALG_MASK) ==
524 SPA_CTRL_CIPH_ALG_AES)
525 err = spacc_aead_aes_setkey(tfm, key + authkeylen, enckeylen);
526 else
527 err = spacc_aead_des_setkey(tfm, key + authkeylen, enckeylen);
528
529 if (err)
530 goto badkey;
531
532 memcpy(ctx->hash_ctx, key, authkeylen);
533 ctx->hash_key_len = authkeylen;
534
535 return 0;
536
537badkey:
538 crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
539 return -EINVAL;
540}
541
542static int spacc_aead_setauthsize(struct crypto_aead *tfm,
543 unsigned int authsize)
544{
545 struct spacc_aead_ctx *ctx = crypto_tfm_ctx(crypto_aead_tfm(tfm));
546
547 ctx->auth_size = authsize;
548
549 return 0;
550}
551
552/*
553 * Check if an AEAD request requires a fallback operation. Some requests can't
554 * be completed in hardware because the hardware may not support certain key
555 * sizes. In these cases we need to complete the request in software.
556 */
557static int spacc_aead_need_fallback(struct spacc_req *req)
558{
559 struct aead_request *aead_req;
560 struct crypto_tfm *tfm = req->req->tfm;
561 struct crypto_alg *alg = req->req->tfm->__crt_alg;
562 struct spacc_alg *spacc_alg = to_spacc_alg(alg);
563 struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
564
565 aead_req = container_of(req->req, struct aead_request, base);
566 /*
567 * If we have a non-supported key-length, then we need to do a
568 * software fallback.
569 */
570 if ((spacc_alg->ctrl_default & SPACC_CRYPTO_ALG_MASK) ==
571 SPA_CTRL_CIPH_ALG_AES &&
572 ctx->cipher_key_len != AES_KEYSIZE_128 &&
573 ctx->cipher_key_len != AES_KEYSIZE_256)
574 return 1;
575
576 return 0;
577}
578
579static int spacc_aead_do_fallback(struct aead_request *req, unsigned alg_type,
580 bool is_encrypt)
581{
582 struct crypto_tfm *old_tfm = crypto_aead_tfm(crypto_aead_reqtfm(req));
583 struct spacc_aead_ctx *ctx = crypto_tfm_ctx(old_tfm);
584 int err;
585
586 if (ctx->sw_cipher) {
587 /*
588 * Change the request to use the software fallback transform,
589 * and once the ciphering has completed, put the old transform
590 * back into the request.
591 */
592 aead_request_set_tfm(req, ctx->sw_cipher);
593 err = is_encrypt ? crypto_aead_encrypt(req) :
594 crypto_aead_decrypt(req);
595 aead_request_set_tfm(req, __crypto_aead_cast(old_tfm));
596 } else
597 err = -EINVAL;
598
599 return err;
600}
601
602static void spacc_aead_complete(struct spacc_req *req)
603{
604 spacc_aead_free_ddts(req);
605 req->req->complete(req->req, req->result);
606}
607
608static int spacc_aead_submit(struct spacc_req *req)
609{
610 struct crypto_tfm *tfm = req->req->tfm;
611 struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
612 struct crypto_alg *alg = req->req->tfm->__crt_alg;
613 struct spacc_alg *spacc_alg = to_spacc_alg(alg);
614 struct spacc_engine *engine = ctx->generic.engine;
615 u32 ctrl, proc_len, assoc_len;
616 struct aead_request *aead_req =
617 container_of(req->req, struct aead_request, base);
618
619 req->result = -EINPROGRESS;
620 req->ctx_id = spacc_load_ctx(&ctx->generic, ctx->cipher_key,
621 ctx->cipher_key_len, aead_req->iv, alg->cra_aead.ivsize,
622 ctx->hash_ctx, ctx->hash_key_len);
623
624 /* Set the source and destination DDT pointers. */
625 writel(req->src_addr, engine->regs + SPA_SRC_PTR_REG_OFFSET);
626 writel(req->dst_addr, engine->regs + SPA_DST_PTR_REG_OFFSET);
627 writel(0, engine->regs + SPA_OFFSET_REG_OFFSET);
628
629 assoc_len = aead_req->assoclen;
630 proc_len = aead_req->cryptlen + assoc_len;
631
632 /*
633 * If we aren't generating an IV, then we need to include the IV in the
634 * associated data so that it is included in the hash.
635 */
636 if (!req->giv) {
637 assoc_len += crypto_aead_ivsize(crypto_aead_reqtfm(aead_req));
638 proc_len += crypto_aead_ivsize(crypto_aead_reqtfm(aead_req));
639 } else
640 proc_len += req->giv_len;
641
642 /*
643 * If we are decrypting, we need to take the length of the ICV out of
644 * the processing length.
645 */
646 if (!req->is_encrypt)
647 proc_len -= ctx->auth_size;
648
649 writel(proc_len, engine->regs + SPA_PROC_LEN_REG_OFFSET);
650 writel(assoc_len, engine->regs + SPA_AAD_LEN_REG_OFFSET);
651 writel(ctx->auth_size, engine->regs + SPA_ICV_LEN_REG_OFFSET);
652 writel(0, engine->regs + SPA_ICV_OFFSET_REG_OFFSET);
653 writel(0, engine->regs + SPA_AUX_INFO_REG_OFFSET);
654
655 ctrl = spacc_alg->ctrl_default | (req->ctx_id << SPA_CTRL_CTX_IDX) |
656 (1 << SPA_CTRL_ICV_APPEND);
657 if (req->is_encrypt)
658 ctrl |= (1 << SPA_CTRL_ENCRYPT_IDX) | (1 << SPA_CTRL_AAD_COPY);
659 else
660 ctrl |= (1 << SPA_CTRL_KEY_EXP);
661
662 mod_timer(&engine->packet_timeout, jiffies + PACKET_TIMEOUT);
663
664 writel(ctrl, engine->regs + SPA_CTRL_REG_OFFSET);
665
666 return -EINPROGRESS;
667}
668
669/*
670 * Setup an AEAD request for processing. This will configure the engine, load
671 * the context and then start the packet processing.
672 *
673 * @giv Pointer to destination address for a generated IV. If the
674 * request does not need to generate an IV then this should be set to NULL.
675 */
676static int spacc_aead_setup(struct aead_request *req, u8 *giv,
677 unsigned alg_type, bool is_encrypt)
678{
679 struct crypto_alg *alg = req->base.tfm->__crt_alg;
680 struct spacc_engine *engine = to_spacc_alg(alg)->engine;
681 struct spacc_req *dev_req = aead_request_ctx(req);
682 int err = -EINPROGRESS;
683 unsigned long flags;
684 unsigned ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(req));
685
686 dev_req->giv = giv;
687 dev_req->giv_len = ivsize;
688 dev_req->req = &req->base;
689 dev_req->is_encrypt = is_encrypt;
690 dev_req->result = -EBUSY;
691 dev_req->engine = engine;
692 dev_req->complete = spacc_aead_complete;
693
694 if (unlikely(spacc_aead_need_fallback(dev_req)))
695 return spacc_aead_do_fallback(req, alg_type, is_encrypt);
696
697 spacc_aead_make_ddts(dev_req, dev_req->giv);
698
699 err = -EINPROGRESS;
700 spin_lock_irqsave(&engine->hw_lock, flags);
701 if (unlikely(spacc_fifo_cmd_full(engine))) {
702 if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
703 err = -EBUSY;
704 spin_unlock_irqrestore(&engine->hw_lock, flags);
705 goto out_free_ddts;
706 }
707 list_add_tail(&dev_req->list, &engine->pending);
708 } else {
709 ++engine->in_flight;
710 list_add_tail(&dev_req->list, &engine->in_progress);
711 spacc_aead_submit(dev_req);
712 }
713 spin_unlock_irqrestore(&engine->hw_lock, flags);
714
715 goto out;
716
717out_free_ddts:
718 spacc_aead_free_ddts(dev_req);
719out:
720 return err;
721}
722
723static int spacc_aead_encrypt(struct aead_request *req)
724{
725 struct crypto_aead *aead = crypto_aead_reqtfm(req);
726 struct crypto_tfm *tfm = crypto_aead_tfm(aead);
727 struct spacc_alg *alg = to_spacc_alg(tfm->__crt_alg);
728
729 return spacc_aead_setup(req, NULL, alg->type, 1);
730}
731
732static int spacc_aead_givencrypt(struct aead_givcrypt_request *req)
733{
734 struct crypto_aead *tfm = aead_givcrypt_reqtfm(req);
735 struct spacc_aead_ctx *ctx = crypto_aead_ctx(tfm);
736 size_t ivsize = crypto_aead_ivsize(tfm);
737 struct spacc_alg *alg = to_spacc_alg(tfm->base.__crt_alg);
738 unsigned len;
739 __be64 seq;
740
741 memcpy(req->areq.iv, ctx->salt, ivsize);
742 len = ivsize;
743 if (ivsize > sizeof(u64)) {
744 memset(req->giv, 0, ivsize - sizeof(u64));
745 len = sizeof(u64);
746 }
747 seq = cpu_to_be64(req->seq);
748 memcpy(req->giv + ivsize - len, &seq, len);
749
750 return spacc_aead_setup(&req->areq, req->giv, alg->type, 1);
751}
752
753static int spacc_aead_decrypt(struct aead_request *req)
754{
755 struct crypto_aead *aead = crypto_aead_reqtfm(req);
756 struct crypto_tfm *tfm = crypto_aead_tfm(aead);
757 struct spacc_alg *alg = to_spacc_alg(tfm->__crt_alg);
758
759 return spacc_aead_setup(req, NULL, alg->type, 0);
760}
761
762/*
763 * Initialise a new AEAD context. This is responsible for allocating the
764 * fallback cipher and initialising the context.
765 */
766static int spacc_aead_cra_init(struct crypto_tfm *tfm)
767{
768 struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
769 struct crypto_alg *alg = tfm->__crt_alg;
770 struct spacc_alg *spacc_alg = to_spacc_alg(alg);
771 struct spacc_engine *engine = spacc_alg->engine;
772
773 ctx->generic.flags = spacc_alg->type;
774 ctx->generic.engine = engine;
775 ctx->sw_cipher = crypto_alloc_aead(alg->cra_name, 0,
776 CRYPTO_ALG_ASYNC |
777 CRYPTO_ALG_NEED_FALLBACK);
778 if (IS_ERR(ctx->sw_cipher)) {
779 dev_warn(engine->dev, "failed to allocate fallback for %s\n",
780 alg->cra_name);
781 ctx->sw_cipher = NULL;
782 }
783 ctx->generic.key_offs = spacc_alg->key_offs;
784 ctx->generic.iv_offs = spacc_alg->iv_offs;
785
786 get_random_bytes(ctx->salt, sizeof(ctx->salt));
787
788 tfm->crt_aead.reqsize = sizeof(struct spacc_req);
789
790 return 0;
791}
792
793/*
794 * Destructor for an AEAD context. This is called when the transform is freed
795 * and must free the fallback cipher.
796 */
797static void spacc_aead_cra_exit(struct crypto_tfm *tfm)
798{
799 struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
800
801 if (ctx->sw_cipher)
802 crypto_free_aead(ctx->sw_cipher);
803 ctx->sw_cipher = NULL;
804}
805
806/*
807 * Set the DES key for a block cipher transform. This also performs weak key
808 * checking if the transform has requested it.
809 */
810static int spacc_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
811 unsigned int len)
812{
813 struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
814 struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
815 u32 tmp[DES_EXPKEY_WORDS];
816
817 if (len > DES3_EDE_KEY_SIZE) {
818 crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
819 return -EINVAL;
820 }
821
822 if (unlikely(!des_ekey(tmp, key)) &&
823 (crypto_ablkcipher_get_flags(cipher) & CRYPTO_TFM_REQ_WEAK_KEY)) {
824 tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
825 return -EINVAL;
826 }
827
828 memcpy(ctx->key, key, len);
829 ctx->key_len = len;
830
831 return 0;
832}
833
834/*
835 * Set the key for an AES block cipher. Some key lengths are not supported in
836 * hardware so this must also check whether a fallback is needed.
837 */
838static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
839 unsigned int len)
840{
841 struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
842 struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
843 int err = 0;
844
845 if (len > AES_MAX_KEY_SIZE) {
846 crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
847 return -EINVAL;
848 }
849
850 /*
851 * IPSec engine only supports 128 and 256 bit AES keys. If we get a
852 * request for any other size (192 bits) then we need to do a software
853 * fallback.
854 */
855 if ((len != AES_KEYSIZE_128 || len != AES_KEYSIZE_256) &&
856 ctx->sw_cipher) {
857 /*
858 * Set the fallback transform to use the same request flags as
859 * the hardware transform.
860 */
861 ctx->sw_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
862 ctx->sw_cipher->base.crt_flags |=
863 cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK;
864
865 err = crypto_ablkcipher_setkey(ctx->sw_cipher, key, len);
866 if (err)
867 goto sw_setkey_failed;
868 } else if ((len != AES_KEYSIZE_128 || len != AES_KEYSIZE_256) &&
869 !ctx->sw_cipher)
870 err = -EINVAL;
871
872 memcpy(ctx->key, key, len);
873 ctx->key_len = len;
874
875sw_setkey_failed:
876 if (err && ctx->sw_cipher) {
877 tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
878 tfm->crt_flags |=
879 ctx->sw_cipher->base.crt_flags & CRYPTO_TFM_RES_MASK;
880 }
881
882 return err;
883}
884
885static int spacc_kasumi_f8_setkey(struct crypto_ablkcipher *cipher,
886 const u8 *key, unsigned int len)
887{
888 struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
889 struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
890 int err = 0;
891
892 if (len > AES_MAX_KEY_SIZE) {
893 crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
894 err = -EINVAL;
895 goto out;
896 }
897
898 memcpy(ctx->key, key, len);
899 ctx->key_len = len;
900
901out:
902 return err;
903}
904
905static int spacc_ablk_need_fallback(struct spacc_req *req)
906{
907 struct spacc_ablk_ctx *ctx;
908 struct crypto_tfm *tfm = req->req->tfm;
909 struct crypto_alg *alg = req->req->tfm->__crt_alg;
910 struct spacc_alg *spacc_alg = to_spacc_alg(alg);
911
912 ctx = crypto_tfm_ctx(tfm);
913
914 return (spacc_alg->ctrl_default & SPACC_CRYPTO_ALG_MASK) ==
915 SPA_CTRL_CIPH_ALG_AES &&
916 ctx->key_len != AES_KEYSIZE_128 &&
917 ctx->key_len != AES_KEYSIZE_256;
918}
919
920static void spacc_ablk_complete(struct spacc_req *req)
921{
922 struct ablkcipher_request *ablk_req =
923 container_of(req->req, struct ablkcipher_request, base);
924
925 if (ablk_req->src != ablk_req->dst) {
926 spacc_free_ddt(req, req->src_ddt, req->src_addr, ablk_req->src,
927 ablk_req->nbytes, DMA_TO_DEVICE);
928 spacc_free_ddt(req, req->dst_ddt, req->dst_addr, ablk_req->dst,
929 ablk_req->nbytes, DMA_FROM_DEVICE);
930 } else
931 spacc_free_ddt(req, req->dst_ddt, req->dst_addr, ablk_req->dst,
932 ablk_req->nbytes, DMA_BIDIRECTIONAL);
933
934 req->req->complete(req->req, req->result);
935}
936
937static int spacc_ablk_submit(struct spacc_req *req)
938{
939 struct crypto_tfm *tfm = req->req->tfm;
940 struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
941 struct ablkcipher_request *ablk_req = ablkcipher_request_cast(req->req);
942 struct crypto_alg *alg = req->req->tfm->__crt_alg;
943 struct spacc_alg *spacc_alg = to_spacc_alg(alg);
944 struct spacc_engine *engine = ctx->generic.engine;
945 u32 ctrl;
946
947 req->ctx_id = spacc_load_ctx(&ctx->generic, ctx->key,
948 ctx->key_len, ablk_req->info, alg->cra_ablkcipher.ivsize,
949 NULL, 0);
950
951 writel(req->src_addr, engine->regs + SPA_SRC_PTR_REG_OFFSET);
952 writel(req->dst_addr, engine->regs + SPA_DST_PTR_REG_OFFSET);
953 writel(0, engine->regs + SPA_OFFSET_REG_OFFSET);
954
955 writel(ablk_req->nbytes, engine->regs + SPA_PROC_LEN_REG_OFFSET);
956 writel(0, engine->regs + SPA_ICV_OFFSET_REG_OFFSET);
957 writel(0, engine->regs + SPA_AUX_INFO_REG_OFFSET);
958 writel(0, engine->regs + SPA_AAD_LEN_REG_OFFSET);
959
960 ctrl = spacc_alg->ctrl_default | (req->ctx_id << SPA_CTRL_CTX_IDX) |
961 (req->is_encrypt ? (1 << SPA_CTRL_ENCRYPT_IDX) :
962 (1 << SPA_CTRL_KEY_EXP));
963
964 mod_timer(&engine->packet_timeout, jiffies + PACKET_TIMEOUT);
965
966 writel(ctrl, engine->regs + SPA_CTRL_REG_OFFSET);
967
968 return -EINPROGRESS;
969}
970
971static int spacc_ablk_do_fallback(struct ablkcipher_request *req,
972 unsigned alg_type, bool is_encrypt)
973{
974 struct crypto_tfm *old_tfm =
975 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
976 struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm);
977 int err;
978
979 if (!ctx->sw_cipher)
980 return -EINVAL;
981
982 /*
983 * Change the request to use the software fallback transform, and once
984 * the ciphering has completed, put the old transform back into the
985 * request.
986 */
987 ablkcipher_request_set_tfm(req, ctx->sw_cipher);
988 err = is_encrypt ? crypto_ablkcipher_encrypt(req) :
989 crypto_ablkcipher_decrypt(req);
990 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(old_tfm));
991
992 return err;
993}
994
995static int spacc_ablk_setup(struct ablkcipher_request *req, unsigned alg_type,
996 bool is_encrypt)
997{
998 struct crypto_alg *alg = req->base.tfm->__crt_alg;
999 struct spacc_engine *engine = to_spacc_alg(alg)->engine;
1000 struct spacc_req *dev_req = ablkcipher_request_ctx(req);
1001 unsigned long flags;
1002 int err = -ENOMEM;
1003
1004 dev_req->req = &req->base;
1005 dev_req->is_encrypt = is_encrypt;
1006 dev_req->engine = engine;
1007 dev_req->complete = spacc_ablk_complete;
1008 dev_req->result = -EINPROGRESS;
1009
1010 if (unlikely(spacc_ablk_need_fallback(dev_req)))
1011 return spacc_ablk_do_fallback(req, alg_type, is_encrypt);
1012
1013 /*
1014 * Create the DDT's for the engine. If we share the same source and
1015 * destination then we can optimize by reusing the DDT's.
1016 */
1017 if (req->src != req->dst) {
1018 dev_req->src_ddt = spacc_sg_to_ddt(engine, req->src,
1019 req->nbytes, DMA_TO_DEVICE, &dev_req->src_addr);
1020 if (!dev_req->src_ddt)
1021 goto out;
1022
1023 dev_req->dst_ddt = spacc_sg_to_ddt(engine, req->dst,
1024 req->nbytes, DMA_FROM_DEVICE, &dev_req->dst_addr);
1025 if (!dev_req->dst_ddt)
1026 goto out_free_src;
1027 } else {
1028 dev_req->dst_ddt = spacc_sg_to_ddt(engine, req->dst,
1029 req->nbytes, DMA_BIDIRECTIONAL, &dev_req->dst_addr);
1030 if (!dev_req->dst_ddt)
1031 goto out;
1032
1033 dev_req->src_ddt = NULL;
1034 dev_req->src_addr = dev_req->dst_addr;
1035 }
1036
1037 err = -EINPROGRESS;
1038 spin_lock_irqsave(&engine->hw_lock, flags);
1039 /*
1040 * Check if the engine will accept the operation now. If it won't then
1041 * we either stick it on the end of a pending list if we can backlog,
1042 * or bailout with an error if not.
1043 */
1044 if (unlikely(spacc_fifo_cmd_full(engine))) {
1045 if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
1046 err = -EBUSY;
1047 spin_unlock_irqrestore(&engine->hw_lock, flags);
1048 goto out_free_ddts;
1049 }
1050 list_add_tail(&dev_req->list, &engine->pending);
1051 } else {
1052 ++engine->in_flight;
1053 list_add_tail(&dev_req->list, &engine->in_progress);
1054 spacc_ablk_submit(dev_req);
1055 }
1056 spin_unlock_irqrestore(&engine->hw_lock, flags);
1057
1058 goto out;
1059
1060out_free_ddts:
1061 spacc_free_ddt(dev_req, dev_req->dst_ddt, dev_req->dst_addr, req->dst,
1062 req->nbytes, req->src == req->dst ?
1063 DMA_BIDIRECTIONAL : DMA_FROM_DEVICE);
1064out_free_src:
1065 if (req->src != req->dst)
1066 spacc_free_ddt(dev_req, dev_req->src_ddt, dev_req->src_addr,
1067 req->src, req->nbytes, DMA_TO_DEVICE);
1068out:
1069 return err;
1070}
1071
1072static int spacc_ablk_cra_init(struct crypto_tfm *tfm)
1073{
1074 struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
1075 struct crypto_alg *alg = tfm->__crt_alg;
1076 struct spacc_alg *spacc_alg = to_spacc_alg(alg);
1077 struct spacc_engine *engine = spacc_alg->engine;
1078
1079 ctx->generic.flags = spacc_alg->type;
1080 ctx->generic.engine = engine;
1081 if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
1082 ctx->sw_cipher = crypto_alloc_ablkcipher(alg->cra_name, 0,
1083 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
1084 if (IS_ERR(ctx->sw_cipher)) {
1085 dev_warn(engine->dev, "failed to allocate fallback for %s\n",
1086 alg->cra_name);
1087 ctx->sw_cipher = NULL;
1088 }
1089 }
1090 ctx->generic.key_offs = spacc_alg->key_offs;
1091 ctx->generic.iv_offs = spacc_alg->iv_offs;
1092
1093 tfm->crt_ablkcipher.reqsize = sizeof(struct spacc_req);
1094
1095 return 0;
1096}
1097
1098static void spacc_ablk_cra_exit(struct crypto_tfm *tfm)
1099{
1100 struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
1101
1102 if (ctx->sw_cipher)
1103 crypto_free_ablkcipher(ctx->sw_cipher);
1104 ctx->sw_cipher = NULL;
1105}
1106
1107static int spacc_ablk_encrypt(struct ablkcipher_request *req)
1108{
1109 struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req);
1110 struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
1111 struct spacc_alg *alg = to_spacc_alg(tfm->__crt_alg);
1112
1113 return spacc_ablk_setup(req, alg->type, 1);
1114}
1115
1116static int spacc_ablk_decrypt(struct ablkcipher_request *req)
1117{
1118 struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req);
1119 struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
1120 struct spacc_alg *alg = to_spacc_alg(tfm->__crt_alg);
1121
1122 return spacc_ablk_setup(req, alg->type, 0);
1123}
1124
1125static inline int spacc_fifo_stat_empty(struct spacc_engine *engine)
1126{
1127 return readl(engine->regs + SPA_FIFO_STAT_REG_OFFSET) &
1128 SPA_FIFO_STAT_EMPTY;
1129}
1130
1131static void spacc_process_done(struct spacc_engine *engine)
1132{
1133 struct spacc_req *req;
1134 unsigned long flags;
1135
1136 spin_lock_irqsave(&engine->hw_lock, flags);
1137
1138 while (!spacc_fifo_stat_empty(engine)) {
1139 req = list_first_entry(&engine->in_progress, struct spacc_req,
1140 list);
1141 list_move_tail(&req->list, &engine->completed);
1142
1143 /* POP the status register. */
1144 writel(~0, engine->regs + SPA_STAT_POP_REG_OFFSET);
1145 req->result = (readl(engine->regs + SPA_STATUS_REG_OFFSET) &
1146 SPA_STATUS_RES_CODE_MASK) >> SPA_STATUS_RES_CODE_OFFSET;
1147
1148 /*
1149 * Convert the SPAcc error status into the standard POSIX error
1150 * codes.
1151 */
1152 if (unlikely(req->result)) {
1153 switch (req->result) {
1154 case SPA_STATUS_ICV_FAIL:
1155 req->result = -EBADMSG;
1156 break;
1157
1158 case SPA_STATUS_MEMORY_ERROR:
1159 dev_warn(engine->dev,
1160 "memory error triggered\n");
1161 req->result = -EFAULT;
1162 break;
1163
1164 case SPA_STATUS_BLOCK_ERROR:
1165 dev_warn(engine->dev,
1166 "block error triggered\n");
1167 req->result = -EIO;
1168 break;
1169 }
1170 }
1171 }
1172
1173 tasklet_schedule(&engine->complete);
1174
1175 spin_unlock_irqrestore(&engine->hw_lock, flags);
1176}
1177
1178static irqreturn_t spacc_spacc_irq(int irq, void *dev)
1179{
1180 struct spacc_engine *engine = (struct spacc_engine *)dev;
1181 u32 spacc_irq_stat = readl(engine->regs + SPA_IRQ_STAT_REG_OFFSET);
1182
1183 writel(spacc_irq_stat, engine->regs + SPA_IRQ_STAT_REG_OFFSET);
1184 spacc_process_done(engine);
1185
1186 return IRQ_HANDLED;
1187}
1188
1189static void spacc_packet_timeout(unsigned long data)
1190{
1191 struct spacc_engine *engine = (struct spacc_engine *)data;
1192
1193 spacc_process_done(engine);
1194}
1195
1196static int spacc_req_submit(struct spacc_req *req)
1197{
1198 struct crypto_alg *alg = req->req->tfm->__crt_alg;
1199
1200 if (CRYPTO_ALG_TYPE_AEAD == (CRYPTO_ALG_TYPE_MASK & alg->cra_flags))
1201 return spacc_aead_submit(req);
1202 else
1203 return spacc_ablk_submit(req);
1204}
1205
1206static void spacc_spacc_complete(unsigned long data)
1207{
1208 struct spacc_engine *engine = (struct spacc_engine *)data;
1209 struct spacc_req *req, *tmp;
1210 unsigned long flags;
1211 int num_removed = 0;
1212 LIST_HEAD(completed);
1213
1214 spin_lock_irqsave(&engine->hw_lock, flags);
1215 list_splice_init(&engine->completed, &completed);
1216 spin_unlock_irqrestore(&engine->hw_lock, flags);
1217
1218 list_for_each_entry_safe(req, tmp, &completed, list) {
1219 ++num_removed;
1220 req->complete(req);
1221 }
1222
1223 /* Try and fill the engine back up again. */
1224 spin_lock_irqsave(&engine->hw_lock, flags);
1225
1226 engine->in_flight -= num_removed;
1227
1228 list_for_each_entry_safe(req, tmp, &engine->pending, list) {
1229 if (spacc_fifo_cmd_full(engine))
1230 break;
1231
1232 list_move_tail(&req->list, &engine->in_progress);
1233 ++engine->in_flight;
1234 req->result = spacc_req_submit(req);
1235 }
1236
1237 if (engine->in_flight)
1238 mod_timer(&engine->packet_timeout, jiffies + PACKET_TIMEOUT);
1239
1240 spin_unlock_irqrestore(&engine->hw_lock, flags);
1241}
1242
1243#ifdef CONFIG_PM
1244static int spacc_suspend(struct device *dev)
1245{
1246 struct platform_device *pdev = to_platform_device(dev);
1247 struct spacc_engine *engine = platform_get_drvdata(pdev);
1248
1249 /*
1250 * We only support standby mode. All we have to do is gate the clock to
1251 * the spacc. The hardware will preserve state until we turn it back
1252 * on again.
1253 */
1254 clk_disable(engine->clk);
1255
1256 return 0;
1257}
1258
1259static int spacc_resume(struct device *dev)
1260{
1261 struct platform_device *pdev = to_platform_device(dev);
1262 struct spacc_engine *engine = platform_get_drvdata(pdev);
1263
1264 return clk_enable(engine->clk);
1265}
1266
1267static const struct dev_pm_ops spacc_pm_ops = {
1268 .suspend = spacc_suspend,
1269 .resume = spacc_resume,
1270};
1271#endif /* CONFIG_PM */
1272
1273static inline struct spacc_engine *spacc_dev_to_engine(struct device *dev)
1274{
1275 return dev ? platform_get_drvdata(to_platform_device(dev)) : NULL;
1276}
1277
1278static ssize_t spacc_stat_irq_thresh_show(struct device *dev,
1279 struct device_attribute *attr,
1280 char *buf)
1281{
1282 struct spacc_engine *engine = spacc_dev_to_engine(dev);
1283
1284 return snprintf(buf, PAGE_SIZE, "%u\n", engine->stat_irq_thresh);
1285}
1286
1287static ssize_t spacc_stat_irq_thresh_store(struct device *dev,
1288 struct device_attribute *attr,
1289 const char *buf, size_t len)
1290{
1291 struct spacc_engine *engine = spacc_dev_to_engine(dev);
1292 unsigned long thresh;
1293
1294 if (strict_strtoul(buf, 0, &thresh))
1295 return -EINVAL;
1296
1297 thresh = clamp(thresh, 1UL, engine->fifo_sz - 1);
1298
1299 engine->stat_irq_thresh = thresh;
1300 writel(engine->stat_irq_thresh << SPA_IRQ_CTRL_STAT_CNT_OFFSET,
1301 engine->regs + SPA_IRQ_CTRL_REG_OFFSET);
1302
1303 return len;
1304}
1305static DEVICE_ATTR(stat_irq_thresh, 0644, spacc_stat_irq_thresh_show,
1306 spacc_stat_irq_thresh_store);
1307
1308static struct spacc_alg ipsec_engine_algs[] = {
1309 {
1310 .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC,
1311 .key_offs = 0,
1312 .iv_offs = AES_MAX_KEY_SIZE,
1313 .alg = {
1314 .cra_name = "cbc(aes)",
1315 .cra_driver_name = "cbc-aes-picoxcell",
1316 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1317 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
1318 CRYPTO_ALG_ASYNC |
1319 CRYPTO_ALG_NEED_FALLBACK,
1320 .cra_blocksize = AES_BLOCK_SIZE,
1321 .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
1322 .cra_type = &crypto_ablkcipher_type,
1323 .cra_module = THIS_MODULE,
1324 .cra_ablkcipher = {
1325 .setkey = spacc_aes_setkey,
1326 .encrypt = spacc_ablk_encrypt,
1327 .decrypt = spacc_ablk_decrypt,
1328 .min_keysize = AES_MIN_KEY_SIZE,
1329 .max_keysize = AES_MAX_KEY_SIZE,
1330 .ivsize = AES_BLOCK_SIZE,
1331 },
1332 .cra_init = spacc_ablk_cra_init,
1333 .cra_exit = spacc_ablk_cra_exit,
1334 },
1335 },
1336 {
1337 .key_offs = 0,
1338 .iv_offs = AES_MAX_KEY_SIZE,
1339 .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_ECB,
1340 .alg = {
1341 .cra_name = "ecb(aes)",
1342 .cra_driver_name = "ecb-aes-picoxcell",
1343 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1344 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
1345 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
1346 .cra_blocksize = AES_BLOCK_SIZE,
1347 .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
1348 .cra_type = &crypto_ablkcipher_type,
1349 .cra_module = THIS_MODULE,
1350 .cra_ablkcipher = {
1351 .setkey = spacc_aes_setkey,
1352 .encrypt = spacc_ablk_encrypt,
1353 .decrypt = spacc_ablk_decrypt,
1354 .min_keysize = AES_MIN_KEY_SIZE,
1355 .max_keysize = AES_MAX_KEY_SIZE,
1356 },
1357 .cra_init = spacc_ablk_cra_init,
1358 .cra_exit = spacc_ablk_cra_exit,
1359 },
1360 },
1361 {
1362 .key_offs = DES_BLOCK_SIZE,
1363 .iv_offs = 0,
1364 .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC,
1365 .alg = {
1366 .cra_name = "cbc(des)",
1367 .cra_driver_name = "cbc-des-picoxcell",
1368 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1369 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1370 .cra_blocksize = DES_BLOCK_SIZE,
1371 .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
1372 .cra_type = &crypto_ablkcipher_type,
1373 .cra_module = THIS_MODULE,
1374 .cra_ablkcipher = {
1375 .setkey = spacc_des_setkey,
1376 .encrypt = spacc_ablk_encrypt,
1377 .decrypt = spacc_ablk_decrypt,
1378 .min_keysize = DES_KEY_SIZE,
1379 .max_keysize = DES_KEY_SIZE,
1380 .ivsize = DES_BLOCK_SIZE,
1381 },
1382 .cra_init = spacc_ablk_cra_init,
1383 .cra_exit = spacc_ablk_cra_exit,
1384 },
1385 },
1386 {
1387 .key_offs = DES_BLOCK_SIZE,
1388 .iv_offs = 0,
1389 .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_ECB,
1390 .alg = {
1391 .cra_name = "ecb(des)",
1392 .cra_driver_name = "ecb-des-picoxcell",
1393 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1394 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1395 .cra_blocksize = DES_BLOCK_SIZE,
1396 .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
1397 .cra_type = &crypto_ablkcipher_type,
1398 .cra_module = THIS_MODULE,
1399 .cra_ablkcipher = {
1400 .setkey = spacc_des_setkey,
1401 .encrypt = spacc_ablk_encrypt,
1402 .decrypt = spacc_ablk_decrypt,
1403 .min_keysize = DES_KEY_SIZE,
1404 .max_keysize = DES_KEY_SIZE,
1405 },
1406 .cra_init = spacc_ablk_cra_init,
1407 .cra_exit = spacc_ablk_cra_exit,
1408 },
1409 },
1410 {
1411 .key_offs = DES_BLOCK_SIZE,
1412 .iv_offs = 0,
1413 .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC,
1414 .alg = {
1415 .cra_name = "cbc(des3_ede)",
1416 .cra_driver_name = "cbc-des3-ede-picoxcell",
1417 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1418 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1419 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
1420 .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
1421 .cra_type = &crypto_ablkcipher_type,
1422 .cra_module = THIS_MODULE,
1423 .cra_ablkcipher = {
1424 .setkey = spacc_des_setkey,
1425 .encrypt = spacc_ablk_encrypt,
1426 .decrypt = spacc_ablk_decrypt,
1427 .min_keysize = DES3_EDE_KEY_SIZE,
1428 .max_keysize = DES3_EDE_KEY_SIZE,
1429 .ivsize = DES3_EDE_BLOCK_SIZE,
1430 },
1431 .cra_init = spacc_ablk_cra_init,
1432 .cra_exit = spacc_ablk_cra_exit,
1433 },
1434 },
1435 {
1436 .key_offs = DES_BLOCK_SIZE,
1437 .iv_offs = 0,
1438 .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_ECB,
1439 .alg = {
1440 .cra_name = "ecb(des3_ede)",
1441 .cra_driver_name = "ecb-des3-ede-picoxcell",
1442 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1443 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1444 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
1445 .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
1446 .cra_type = &crypto_ablkcipher_type,
1447 .cra_module = THIS_MODULE,
1448 .cra_ablkcipher = {
1449 .setkey = spacc_des_setkey,
1450 .encrypt = spacc_ablk_encrypt,
1451 .decrypt = spacc_ablk_decrypt,
1452 .min_keysize = DES3_EDE_KEY_SIZE,
1453 .max_keysize = DES3_EDE_KEY_SIZE,
1454 },
1455 .cra_init = spacc_ablk_cra_init,
1456 .cra_exit = spacc_ablk_cra_exit,
1457 },
1458 },
1459 {
1460 .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC |
1461 SPA_CTRL_HASH_ALG_SHA | SPA_CTRL_HASH_MODE_HMAC,
1462 .key_offs = 0,
1463 .iv_offs = AES_MAX_KEY_SIZE,
1464 .alg = {
1465 .cra_name = "authenc(hmac(sha1),cbc(aes))",
1466 .cra_driver_name = "authenc-hmac-sha1-cbc-aes-picoxcell",
1467 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1468 .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
1469 .cra_blocksize = AES_BLOCK_SIZE,
1470 .cra_ctxsize = sizeof(struct spacc_aead_ctx),
1471 .cra_type = &crypto_aead_type,
1472 .cra_module = THIS_MODULE,
1473 .cra_aead = {
1474 .setkey = spacc_aead_setkey,
1475 .setauthsize = spacc_aead_setauthsize,
1476 .encrypt = spacc_aead_encrypt,
1477 .decrypt = spacc_aead_decrypt,
1478 .givencrypt = spacc_aead_givencrypt,
1479 .ivsize = AES_BLOCK_SIZE,
1480 .maxauthsize = SHA1_DIGEST_SIZE,
1481 },
1482 .cra_init = spacc_aead_cra_init,
1483 .cra_exit = spacc_aead_cra_exit,
1484 },
1485 },
1486 {
1487 .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC |
1488 SPA_CTRL_HASH_ALG_SHA256 |
1489 SPA_CTRL_HASH_MODE_HMAC,
1490 .key_offs = 0,
1491 .iv_offs = AES_MAX_KEY_SIZE,
1492 .alg = {
1493 .cra_name = "authenc(hmac(sha256),cbc(aes))",
1494 .cra_driver_name = "authenc-hmac-sha256-cbc-aes-picoxcell",
1495 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1496 .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
1497 .cra_blocksize = AES_BLOCK_SIZE,
1498 .cra_ctxsize = sizeof(struct spacc_aead_ctx),
1499 .cra_type = &crypto_aead_type,
1500 .cra_module = THIS_MODULE,
1501 .cra_aead = {
1502 .setkey = spacc_aead_setkey,
1503 .setauthsize = spacc_aead_setauthsize,
1504 .encrypt = spacc_aead_encrypt,
1505 .decrypt = spacc_aead_decrypt,
1506 .givencrypt = spacc_aead_givencrypt,
1507 .ivsize = AES_BLOCK_SIZE,
1508 .maxauthsize = SHA256_DIGEST_SIZE,
1509 },
1510 .cra_init = spacc_aead_cra_init,
1511 .cra_exit = spacc_aead_cra_exit,
1512 },
1513 },
1514 {
1515 .key_offs = 0,
1516 .iv_offs = AES_MAX_KEY_SIZE,
1517 .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC |
1518 SPA_CTRL_HASH_ALG_MD5 | SPA_CTRL_HASH_MODE_HMAC,
1519 .alg = {
1520 .cra_name = "authenc(hmac(md5),cbc(aes))",
1521 .cra_driver_name = "authenc-hmac-md5-cbc-aes-picoxcell",
1522 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1523 .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
1524 .cra_blocksize = AES_BLOCK_SIZE,
1525 .cra_ctxsize = sizeof(struct spacc_aead_ctx),
1526 .cra_type = &crypto_aead_type,
1527 .cra_module = THIS_MODULE,
1528 .cra_aead = {
1529 .setkey = spacc_aead_setkey,
1530 .setauthsize = spacc_aead_setauthsize,
1531 .encrypt = spacc_aead_encrypt,
1532 .decrypt = spacc_aead_decrypt,
1533 .givencrypt = spacc_aead_givencrypt,
1534 .ivsize = AES_BLOCK_SIZE,
1535 .maxauthsize = MD5_DIGEST_SIZE,
1536 },
1537 .cra_init = spacc_aead_cra_init,
1538 .cra_exit = spacc_aead_cra_exit,
1539 },
1540 },
1541 {
1542 .key_offs = DES_BLOCK_SIZE,
1543 .iv_offs = 0,
1544 .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC |
1545 SPA_CTRL_HASH_ALG_SHA | SPA_CTRL_HASH_MODE_HMAC,
1546 .alg = {
1547 .cra_name = "authenc(hmac(sha1),cbc(des3_ede))",
1548 .cra_driver_name = "authenc-hmac-sha1-cbc-3des-picoxcell",
1549 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1550 .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
1551 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
1552 .cra_ctxsize = sizeof(struct spacc_aead_ctx),
1553 .cra_type = &crypto_aead_type,
1554 .cra_module = THIS_MODULE,
1555 .cra_aead = {
1556 .setkey = spacc_aead_setkey,
1557 .setauthsize = spacc_aead_setauthsize,
1558 .encrypt = spacc_aead_encrypt,
1559 .decrypt = spacc_aead_decrypt,
1560 .givencrypt = spacc_aead_givencrypt,
1561 .ivsize = DES3_EDE_BLOCK_SIZE,
1562 .maxauthsize = SHA1_DIGEST_SIZE,
1563 },
1564 .cra_init = spacc_aead_cra_init,
1565 .cra_exit = spacc_aead_cra_exit,
1566 },
1567 },
1568 {
1569 .key_offs = DES_BLOCK_SIZE,
1570 .iv_offs = 0,
1571 .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC |
1572 SPA_CTRL_HASH_ALG_SHA256 |
1573 SPA_CTRL_HASH_MODE_HMAC,
1574 .alg = {
1575 .cra_name = "authenc(hmac(sha256),cbc(des3_ede))",
1576 .cra_driver_name = "authenc-hmac-sha256-cbc-3des-picoxcell",
1577 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1578 .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
1579 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
1580 .cra_ctxsize = sizeof(struct spacc_aead_ctx),
1581 .cra_type = &crypto_aead_type,
1582 .cra_module = THIS_MODULE,
1583 .cra_aead = {
1584 .setkey = spacc_aead_setkey,
1585 .setauthsize = spacc_aead_setauthsize,
1586 .encrypt = spacc_aead_encrypt,
1587 .decrypt = spacc_aead_decrypt,
1588 .givencrypt = spacc_aead_givencrypt,
1589 .ivsize = DES3_EDE_BLOCK_SIZE,
1590 .maxauthsize = SHA256_DIGEST_SIZE,
1591 },
1592 .cra_init = spacc_aead_cra_init,
1593 .cra_exit = spacc_aead_cra_exit,
1594 },
1595 },
1596 {
1597 .key_offs = DES_BLOCK_SIZE,
1598 .iv_offs = 0,
1599 .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC |
1600 SPA_CTRL_HASH_ALG_MD5 | SPA_CTRL_HASH_MODE_HMAC,
1601 .alg = {
1602 .cra_name = "authenc(hmac(md5),cbc(des3_ede))",
1603 .cra_driver_name = "authenc-hmac-md5-cbc-3des-picoxcell",
1604 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1605 .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
1606 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
1607 .cra_ctxsize = sizeof(struct spacc_aead_ctx),
1608 .cra_type = &crypto_aead_type,
1609 .cra_module = THIS_MODULE,
1610 .cra_aead = {
1611 .setkey = spacc_aead_setkey,
1612 .setauthsize = spacc_aead_setauthsize,
1613 .encrypt = spacc_aead_encrypt,
1614 .decrypt = spacc_aead_decrypt,
1615 .givencrypt = spacc_aead_givencrypt,
1616 .ivsize = DES3_EDE_BLOCK_SIZE,
1617 .maxauthsize = MD5_DIGEST_SIZE,
1618 },
1619 .cra_init = spacc_aead_cra_init,
1620 .cra_exit = spacc_aead_cra_exit,
1621 },
1622 },
1623};
1624
1625static struct spacc_alg l2_engine_algs[] = {
1626 {
1627 .key_offs = 0,
1628 .iv_offs = SPACC_CRYPTO_KASUMI_F8_KEY_LEN,
1629 .ctrl_default = SPA_CTRL_CIPH_ALG_KASUMI |
1630 SPA_CTRL_CIPH_MODE_F8,
1631 .alg = {
1632 .cra_name = "f8(kasumi)",
1633 .cra_driver_name = "f8-kasumi-picoxcell",
1634 .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
1635 .cra_flags = CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_ASYNC,
1636 .cra_blocksize = 8,
1637 .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
1638 .cra_type = &crypto_ablkcipher_type,
1639 .cra_module = THIS_MODULE,
1640 .cra_ablkcipher = {
1641 .setkey = spacc_kasumi_f8_setkey,
1642 .encrypt = spacc_ablk_encrypt,
1643 .decrypt = spacc_ablk_decrypt,
1644 .min_keysize = 16,
1645 .max_keysize = 16,
1646 .ivsize = 8,
1647 },
1648 .cra_init = spacc_ablk_cra_init,
1649 .cra_exit = spacc_ablk_cra_exit,
1650 },
1651 },
1652};
1653
1654static int __devinit spacc_probe(struct platform_device *pdev,
1655 unsigned max_ctxs, size_t cipher_pg_sz,
1656 size_t hash_pg_sz, size_t fifo_sz,
1657 struct spacc_alg *algs, size_t num_algs)
1658{
1659 int i, err, ret = -EINVAL;
1660 struct resource *mem, *irq;
1661 struct spacc_engine *engine = devm_kzalloc(&pdev->dev, sizeof(*engine),
1662 GFP_KERNEL);
1663 if (!engine)
1664 return -ENOMEM;
1665
1666 engine->max_ctxs = max_ctxs;
1667 engine->cipher_pg_sz = cipher_pg_sz;
1668 engine->hash_pg_sz = hash_pg_sz;
1669 engine->fifo_sz = fifo_sz;
1670 engine->algs = algs;
1671 engine->num_algs = num_algs;
1672 engine->name = dev_name(&pdev->dev);
1673
1674 mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1675 irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
1676 if (!mem || !irq) {
1677 dev_err(&pdev->dev, "no memory/irq resource for engine\n");
1678 return -ENXIO;
1679 }
1680
1681 if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem),
1682 engine->name))
1683 return -ENOMEM;
1684
1685 engine->regs = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
1686 if (!engine->regs) {
1687 dev_err(&pdev->dev, "memory map failed\n");
1688 return -ENOMEM;
1689 }
1690
1691 if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0,
1692 engine->name, engine)) {
1693 dev_err(engine->dev, "failed to request IRQ\n");
1694 return -EBUSY;
1695 }
1696
1697 engine->dev = &pdev->dev;
1698 engine->cipher_ctx_base = engine->regs + SPA_CIPH_KEY_BASE_REG_OFFSET;
1699 engine->hash_key_base = engine->regs + SPA_HASH_KEY_BASE_REG_OFFSET;
1700
1701 engine->req_pool = dmam_pool_create(engine->name, engine->dev,
1702 MAX_DDT_LEN * sizeof(struct spacc_ddt), 8, SZ_64K);
1703 if (!engine->req_pool)
1704 return -ENOMEM;
1705
1706 spin_lock_init(&engine->hw_lock);
1707
1708 engine->clk = clk_get(&pdev->dev, NULL);
1709 if (IS_ERR(engine->clk)) {
1710 dev_info(&pdev->dev, "clk unavailable\n");
1711 device_remove_file(&pdev->dev, &dev_attr_stat_irq_thresh);
1712 return PTR_ERR(engine->clk);
1713 }
1714
1715 if (clk_enable(engine->clk)) {
1716 dev_info(&pdev->dev, "unable to enable clk\n");
1717 clk_put(engine->clk);
1718 return -EIO;
1719 }
1720
1721 err = device_create_file(&pdev->dev, &dev_attr_stat_irq_thresh);
1722 if (err) {
1723 clk_disable(engine->clk);
1724 clk_put(engine->clk);
1725 return err;
1726 }
1727
1728
1729 /*
1730 * Use an IRQ threshold of 50% as a default. This seems to be a
1731 * reasonable trade off of latency against throughput but can be
1732 * changed at runtime.
1733 */
1734 engine->stat_irq_thresh = (engine->fifo_sz / 2);
1735
1736 /*
1737 * Configure the interrupts. We only use the STAT_CNT interrupt as we
1738 * only submit a new packet for processing when we complete another in
1739 * the queue. This minimizes time spent in the interrupt handler.
1740 */
1741 writel(engine->stat_irq_thresh << SPA_IRQ_CTRL_STAT_CNT_OFFSET,
1742 engine->regs + SPA_IRQ_CTRL_REG_OFFSET);
1743 writel(SPA_IRQ_EN_STAT_EN | SPA_IRQ_EN_GLBL_EN,
1744 engine->regs + SPA_IRQ_EN_REG_OFFSET);
1745
1746 setup_timer(&engine->packet_timeout, spacc_packet_timeout,
1747 (unsigned long)engine);
1748
1749 INIT_LIST_HEAD(&engine->pending);
1750 INIT_LIST_HEAD(&engine->completed);
1751 INIT_LIST_HEAD(&engine->in_progress);
1752 engine->in_flight = 0;
1753 tasklet_init(&engine->complete, spacc_spacc_complete,
1754 (unsigned long)engine);
1755
1756 platform_set_drvdata(pdev, engine);
1757
1758 INIT_LIST_HEAD(&engine->registered_algs);
1759 for (i = 0; i < engine->num_algs; ++i) {
1760 engine->algs[i].engine = engine;
1761 err = crypto_register_alg(&engine->algs[i].alg);
1762 if (!err) {
1763 list_add_tail(&engine->algs[i].entry,
1764 &engine->registered_algs);
1765 ret = 0;
1766 }
1767 if (err)
1768 dev_err(engine->dev, "failed to register alg \"%s\"\n",
1769 engine->algs[i].alg.cra_name);
1770 else
1771 dev_dbg(engine->dev, "registered alg \"%s\"\n",
1772 engine->algs[i].alg.cra_name);
1773 }
1774
1775 return ret;
1776}
1777
1778static int __devexit spacc_remove(struct platform_device *pdev)
1779{
1780 struct spacc_alg *alg, *next;
1781 struct spacc_engine *engine = platform_get_drvdata(pdev);
1782
1783 del_timer_sync(&engine->packet_timeout);
1784 device_remove_file(&pdev->dev, &dev_attr_stat_irq_thresh);
1785
1786 list_for_each_entry_safe(alg, next, &engine->registered_algs, entry) {
1787 list_del(&alg->entry);
1788 crypto_unregister_alg(&alg->alg);
1789 }
1790
1791 clk_disable(engine->clk);
1792 clk_put(engine->clk);
1793
1794 return 0;
1795}
1796
1797static int __devinit ipsec_probe(struct platform_device *pdev)
1798{
1799 return spacc_probe(pdev, SPACC_CRYPTO_IPSEC_MAX_CTXS,
1800 SPACC_CRYPTO_IPSEC_CIPHER_PG_SZ,
1801 SPACC_CRYPTO_IPSEC_HASH_PG_SZ,
1802 SPACC_CRYPTO_IPSEC_FIFO_SZ, ipsec_engine_algs,
1803 ARRAY_SIZE(ipsec_engine_algs));
1804}
1805
1806static struct platform_driver ipsec_driver = {
1807 .probe = ipsec_probe,
1808 .remove = __devexit_p(spacc_remove),
1809 .driver = {
1810 .name = "picoxcell-ipsec",
1811#ifdef CONFIG_PM
1812 .pm = &spacc_pm_ops,
1813#endif /* CONFIG_PM */
1814 },
1815};
1816
1817static int __devinit l2_probe(struct platform_device *pdev)
1818{
1819 return spacc_probe(pdev, SPACC_CRYPTO_L2_MAX_CTXS,
1820 SPACC_CRYPTO_L2_CIPHER_PG_SZ,
1821 SPACC_CRYPTO_L2_HASH_PG_SZ, SPACC_CRYPTO_L2_FIFO_SZ,
1822 l2_engine_algs, ARRAY_SIZE(l2_engine_algs));
1823}
1824
1825static struct platform_driver l2_driver = {
1826 .probe = l2_probe,
1827 .remove = __devexit_p(spacc_remove),
1828 .driver = {
1829 .name = "picoxcell-l2",
1830#ifdef CONFIG_PM
1831 .pm = &spacc_pm_ops,
1832#endif /* CONFIG_PM */
1833 },
1834};
1835
1836static int __init spacc_init(void)
1837{
1838 int ret = platform_driver_register(&ipsec_driver);
1839 if (ret) {
1840 pr_err("failed to register ipsec spacc driver");
1841 goto out;
1842 }
1843
1844 ret = platform_driver_register(&l2_driver);
1845 if (ret) {
1846 pr_err("failed to register l2 spacc driver");
1847 goto l2_failed;
1848 }
1849
1850 return 0;
1851
1852l2_failed:
1853 platform_driver_unregister(&ipsec_driver);
1854out:
1855 return ret;
1856}
1857module_init(spacc_init);
1858
1859static void __exit spacc_exit(void)
1860{
1861 platform_driver_unregister(&ipsec_driver);
1862 platform_driver_unregister(&l2_driver);
1863}
1864module_exit(spacc_exit);
1865
1866MODULE_LICENSE("GPL");
1867MODULE_AUTHOR("Jamie Iles");
diff --git a/drivers/crypto/picoxcell_crypto_regs.h b/drivers/crypto/picoxcell_crypto_regs.h
new file mode 100644
index 000000000000..af93442564c9
--- /dev/null
+++ b/drivers/crypto/picoxcell_crypto_regs.h
@@ -0,0 +1,128 @@
1/*
2 * Copyright (c) 2010 Picochip Ltd., Jamie Iles
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18#ifndef __PICOXCELL_CRYPTO_REGS_H__
19#define __PICOXCELL_CRYPTO_REGS_H__
20
21#define SPA_STATUS_OK 0
22#define SPA_STATUS_ICV_FAIL 1
23#define SPA_STATUS_MEMORY_ERROR 2
24#define SPA_STATUS_BLOCK_ERROR 3
25
26#define SPA_IRQ_CTRL_STAT_CNT_OFFSET 16
27#define SPA_IRQ_STAT_STAT_MASK (1 << 4)
28#define SPA_FIFO_STAT_STAT_OFFSET 16
29#define SPA_FIFO_STAT_STAT_CNT_MASK (0x3F << SPA_FIFO_STAT_STAT_OFFSET)
30#define SPA_STATUS_RES_CODE_OFFSET 24
31#define SPA_STATUS_RES_CODE_MASK (0x3 << SPA_STATUS_RES_CODE_OFFSET)
32#define SPA_KEY_SZ_CTX_INDEX_OFFSET 8
33#define SPA_KEY_SZ_CIPHER_OFFSET 31
34
35#define SPA_IRQ_EN_REG_OFFSET 0x00000000
36#define SPA_IRQ_STAT_REG_OFFSET 0x00000004
37#define SPA_IRQ_CTRL_REG_OFFSET 0x00000008
38#define SPA_FIFO_STAT_REG_OFFSET 0x0000000C
39#define SPA_SDMA_BRST_SZ_REG_OFFSET 0x00000010
40#define SPA_SRC_PTR_REG_OFFSET 0x00000020
41#define SPA_DST_PTR_REG_OFFSET 0x00000024
42#define SPA_OFFSET_REG_OFFSET 0x00000028
43#define SPA_AAD_LEN_REG_OFFSET 0x0000002C
44#define SPA_PROC_LEN_REG_OFFSET 0x00000030
45#define SPA_ICV_LEN_REG_OFFSET 0x00000034
46#define SPA_ICV_OFFSET_REG_OFFSET 0x00000038
47#define SPA_SW_CTRL_REG_OFFSET 0x0000003C
48#define SPA_CTRL_REG_OFFSET 0x00000040
49#define SPA_AUX_INFO_REG_OFFSET 0x0000004C
50#define SPA_STAT_POP_REG_OFFSET 0x00000050
51#define SPA_STATUS_REG_OFFSET 0x00000054
52#define SPA_KEY_SZ_REG_OFFSET 0x00000100
53#define SPA_CIPH_KEY_BASE_REG_OFFSET 0x00004000
54#define SPA_HASH_KEY_BASE_REG_OFFSET 0x00008000
55#define SPA_RC4_CTX_BASE_REG_OFFSET 0x00020000
56
57#define SPA_IRQ_EN_REG_RESET 0x00000000
58#define SPA_IRQ_CTRL_REG_RESET 0x00000000
59#define SPA_FIFO_STAT_REG_RESET 0x00000000
60#define SPA_SDMA_BRST_SZ_REG_RESET 0x00000000
61#define SPA_SRC_PTR_REG_RESET 0x00000000
62#define SPA_DST_PTR_REG_RESET 0x00000000
63#define SPA_OFFSET_REG_RESET 0x00000000
64#define SPA_AAD_LEN_REG_RESET 0x00000000
65#define SPA_PROC_LEN_REG_RESET 0x00000000
66#define SPA_ICV_LEN_REG_RESET 0x00000000
67#define SPA_ICV_OFFSET_REG_RESET 0x00000000
68#define SPA_SW_CTRL_REG_RESET 0x00000000
69#define SPA_CTRL_REG_RESET 0x00000000
70#define SPA_AUX_INFO_REG_RESET 0x00000000
71#define SPA_STAT_POP_REG_RESET 0x00000000
72#define SPA_STATUS_REG_RESET 0x00000000
73#define SPA_KEY_SZ_REG_RESET 0x00000000
74
75#define SPA_CTRL_HASH_ALG_IDX 4
76#define SPA_CTRL_CIPH_MODE_IDX 8
77#define SPA_CTRL_HASH_MODE_IDX 12
78#define SPA_CTRL_CTX_IDX 16
79#define SPA_CTRL_ENCRYPT_IDX 24
80#define SPA_CTRL_AAD_COPY 25
81#define SPA_CTRL_ICV_PT 26
82#define SPA_CTRL_ICV_ENC 27
83#define SPA_CTRL_ICV_APPEND 28
84#define SPA_CTRL_KEY_EXP 29
85
86#define SPA_KEY_SZ_CXT_IDX 8
87#define SPA_KEY_SZ_CIPHER_IDX 31
88
89#define SPA_IRQ_EN_CMD0_EN (1 << 0)
90#define SPA_IRQ_EN_STAT_EN (1 << 4)
91#define SPA_IRQ_EN_GLBL_EN (1 << 31)
92
93#define SPA_CTRL_CIPH_ALG_NULL 0x00
94#define SPA_CTRL_CIPH_ALG_DES 0x01
95#define SPA_CTRL_CIPH_ALG_AES 0x02
96#define SPA_CTRL_CIPH_ALG_RC4 0x03
97#define SPA_CTRL_CIPH_ALG_MULTI2 0x04
98#define SPA_CTRL_CIPH_ALG_KASUMI 0x05
99
100#define SPA_CTRL_HASH_ALG_NULL (0x00 << SPA_CTRL_HASH_ALG_IDX)
101#define SPA_CTRL_HASH_ALG_MD5 (0x01 << SPA_CTRL_HASH_ALG_IDX)
102#define SPA_CTRL_HASH_ALG_SHA (0x02 << SPA_CTRL_HASH_ALG_IDX)
103#define SPA_CTRL_HASH_ALG_SHA224 (0x03 << SPA_CTRL_HASH_ALG_IDX)
104#define SPA_CTRL_HASH_ALG_SHA256 (0x04 << SPA_CTRL_HASH_ALG_IDX)
105#define SPA_CTRL_HASH_ALG_SHA384 (0x05 << SPA_CTRL_HASH_ALG_IDX)
106#define SPA_CTRL_HASH_ALG_SHA512 (0x06 << SPA_CTRL_HASH_ALG_IDX)
107#define SPA_CTRL_HASH_ALG_AESMAC (0x07 << SPA_CTRL_HASH_ALG_IDX)
108#define SPA_CTRL_HASH_ALG_AESCMAC (0x08 << SPA_CTRL_HASH_ALG_IDX)
109#define SPA_CTRL_HASH_ALG_KASF9 (0x09 << SPA_CTRL_HASH_ALG_IDX)
110
111#define SPA_CTRL_CIPH_MODE_NULL (0x00 << SPA_CTRL_CIPH_MODE_IDX)
112#define SPA_CTRL_CIPH_MODE_ECB (0x00 << SPA_CTRL_CIPH_MODE_IDX)
113#define SPA_CTRL_CIPH_MODE_CBC (0x01 << SPA_CTRL_CIPH_MODE_IDX)
114#define SPA_CTRL_CIPH_MODE_CTR (0x02 << SPA_CTRL_CIPH_MODE_IDX)
115#define SPA_CTRL_CIPH_MODE_CCM (0x03 << SPA_CTRL_CIPH_MODE_IDX)
116#define SPA_CTRL_CIPH_MODE_GCM (0x05 << SPA_CTRL_CIPH_MODE_IDX)
117#define SPA_CTRL_CIPH_MODE_OFB (0x07 << SPA_CTRL_CIPH_MODE_IDX)
118#define SPA_CTRL_CIPH_MODE_CFB (0x08 << SPA_CTRL_CIPH_MODE_IDX)
119#define SPA_CTRL_CIPH_MODE_F8 (0x09 << SPA_CTRL_CIPH_MODE_IDX)
120
121#define SPA_CTRL_HASH_MODE_RAW (0x00 << SPA_CTRL_HASH_MODE_IDX)
122#define SPA_CTRL_HASH_MODE_SSLMAC (0x01 << SPA_CTRL_HASH_MODE_IDX)
123#define SPA_CTRL_HASH_MODE_HMAC (0x02 << SPA_CTRL_HASH_MODE_IDX)
124
125#define SPA_FIFO_STAT_EMPTY (1 << 31)
126#define SPA_FIFO_CMD_FULL (1 << 7)
127
128#endif /* __PICOXCELL_CRYPTO_REGS_H__ */
diff --git a/drivers/gpio/ml_ioh_gpio.c b/drivers/gpio/ml_ioh_gpio.c
index cead8e6ff345..7f6f01a4b145 100644
--- a/drivers/gpio/ml_ioh_gpio.c
+++ b/drivers/gpio/ml_ioh_gpio.c
@@ -326,6 +326,7 @@ static DEFINE_PCI_DEVICE_TABLE(ioh_gpio_pcidev_id) = {
326 { PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x802E) }, 326 { PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x802E) },
327 { 0, } 327 { 0, }
328}; 328};
329MODULE_DEVICE_TABLE(pci, ioh_gpio_pcidev_id);
329 330
330static struct pci_driver ioh_gpio_driver = { 331static struct pci_driver ioh_gpio_driver = {
331 .name = "ml_ioh_gpio", 332 .name = "ml_ioh_gpio",
diff --git a/drivers/gpio/pch_gpio.c b/drivers/gpio/pch_gpio.c
index 0eba0a75c804..2c6af8705103 100644
--- a/drivers/gpio/pch_gpio.c
+++ b/drivers/gpio/pch_gpio.c
@@ -286,6 +286,7 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gpio_pcidev_id) = {
286 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) }, 286 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) },
287 { 0, } 287 { 0, }
288}; 288};
289MODULE_DEVICE_TABLE(pci, pch_gpio_pcidev_id);
289 290
290static struct pci_driver pch_gpio_driver = { 291static struct pci_driver pch_gpio_driver = {
291 .name = "pch_gpio", 292 .name = "pch_gpio",
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 6977a1ce9d98..f73ef4390db6 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -672,7 +672,7 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
672 struct drm_crtc_helper_funcs *crtc_funcs; 672 struct drm_crtc_helper_funcs *crtc_funcs;
673 u16 *red, *green, *blue, *transp; 673 u16 *red, *green, *blue, *transp;
674 struct drm_crtc *crtc; 674 struct drm_crtc *crtc;
675 int i, rc = 0; 675 int i, j, rc = 0;
676 int start; 676 int start;
677 677
678 for (i = 0; i < fb_helper->crtc_count; i++) { 678 for (i = 0; i < fb_helper->crtc_count; i++) {
@@ -685,7 +685,7 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
685 transp = cmap->transp; 685 transp = cmap->transp;
686 start = cmap->start; 686 start = cmap->start;
687 687
688 for (i = 0; i < cmap->len; i++) { 688 for (j = 0; j < cmap->len; j++) {
689 u16 hred, hgreen, hblue, htransp = 0xffff; 689 u16 hred, hgreen, hblue, htransp = 0xffff;
690 690
691 hred = *red++; 691 hred = *red++;
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 3dadfa2a8528..28d1d3c24d65 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -164,8 +164,10 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc)
164 * available. In that case we can't account for this and just 164 * available. In that case we can't account for this and just
165 * hope for the best. 165 * hope for the best.
166 */ 166 */
167 if ((vblrc > 0) && (abs(diff_ns) > 1000000)) 167 if ((vblrc > 0) && (abs64(diff_ns) > 1000000)) {
168 atomic_inc(&dev->_vblank_count[crtc]); 168 atomic_inc(&dev->_vblank_count[crtc]);
169 smp_mb__after_atomic_inc();
170 }
169 171
170 /* Invalidate all timestamps while vblank irq's are off. */ 172 /* Invalidate all timestamps while vblank irq's are off. */
171 clear_vblank_timestamps(dev, crtc); 173 clear_vblank_timestamps(dev, crtc);
@@ -491,6 +493,12 @@ void drm_calc_timestamping_constants(struct drm_crtc *crtc)
491 /* Dot clock in Hz: */ 493 /* Dot clock in Hz: */
492 dotclock = (u64) crtc->hwmode.clock * 1000; 494 dotclock = (u64) crtc->hwmode.clock * 1000;
493 495
496 /* Fields of interlaced scanout modes are only halve a frame duration.
497 * Double the dotclock to get halve the frame-/line-/pixelduration.
498 */
499 if (crtc->hwmode.flags & DRM_MODE_FLAG_INTERLACE)
500 dotclock *= 2;
501
494 /* Valid dotclock? */ 502 /* Valid dotclock? */
495 if (dotclock > 0) { 503 if (dotclock > 0) {
496 /* Convert scanline length in pixels and video dot clock to 504 /* Convert scanline length in pixels and video dot clock to
@@ -603,14 +611,6 @@ int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, int crtc,
603 return -EAGAIN; 611 return -EAGAIN;
604 } 612 }
605 613
606 /* Don't know yet how to handle interlaced or
607 * double scan modes. Just no-op for now.
608 */
609 if (mode->flags & (DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLSCAN)) {
610 DRM_DEBUG("crtc %d: Noop due to unsupported mode.\n", crtc);
611 return -ENOTSUPP;
612 }
613
614 /* Get current scanout position with system timestamp. 614 /* Get current scanout position with system timestamp.
615 * Repeat query up to DRM_TIMESTAMP_MAXRETRIES times 615 * Repeat query up to DRM_TIMESTAMP_MAXRETRIES times
616 * if single query takes longer than max_error nanoseconds. 616 * if single query takes longer than max_error nanoseconds.
@@ -858,10 +858,11 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc)
858 if (rc) { 858 if (rc) {
859 tslot = atomic_read(&dev->_vblank_count[crtc]) + diff; 859 tslot = atomic_read(&dev->_vblank_count[crtc]) + diff;
860 vblanktimestamp(dev, crtc, tslot) = t_vblank; 860 vblanktimestamp(dev, crtc, tslot) = t_vblank;
861 smp_wmb();
862 } 861 }
863 862
863 smp_mb__before_atomic_inc();
864 atomic_add(diff, &dev->_vblank_count[crtc]); 864 atomic_add(diff, &dev->_vblank_count[crtc]);
865 smp_mb__after_atomic_inc();
865} 866}
866 867
867/** 868/**
@@ -1011,7 +1012,8 @@ int drm_modeset_ctl(struct drm_device *dev, void *data,
1011 struct drm_file *file_priv) 1012 struct drm_file *file_priv)
1012{ 1013{
1013 struct drm_modeset_ctl *modeset = data; 1014 struct drm_modeset_ctl *modeset = data;
1014 int crtc, ret = 0; 1015 int ret = 0;
1016 unsigned int crtc;
1015 1017
1016 /* If drm_vblank_init() hasn't been called yet, just no-op */ 1018 /* If drm_vblank_init() hasn't been called yet, just no-op */
1017 if (!dev->num_crtcs) 1019 if (!dev->num_crtcs)
@@ -1293,15 +1295,16 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc)
1293 * e.g., due to spurious vblank interrupts. We need to 1295 * e.g., due to spurious vblank interrupts. We need to
1294 * ignore those for accounting. 1296 * ignore those for accounting.
1295 */ 1297 */
1296 if (abs(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS) { 1298 if (abs64(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS) {
1297 /* Store new timestamp in ringbuffer. */ 1299 /* Store new timestamp in ringbuffer. */
1298 vblanktimestamp(dev, crtc, vblcount + 1) = tvblank; 1300 vblanktimestamp(dev, crtc, vblcount + 1) = tvblank;
1299 smp_wmb();
1300 1301
1301 /* Increment cooked vblank count. This also atomically commits 1302 /* Increment cooked vblank count. This also atomically commits
1302 * the timestamp computed above. 1303 * the timestamp computed above.
1303 */ 1304 */
1305 smp_mb__before_atomic_inc();
1304 atomic_inc(&dev->_vblank_count[crtc]); 1306 atomic_inc(&dev->_vblank_count[crtc]);
1307 smp_mb__after_atomic_inc();
1305 } else { 1308 } else {
1306 DRM_DEBUG("crtc %d: Redundant vblirq ignored. diff_ns = %d\n", 1309 DRM_DEBUG("crtc %d: Redundant vblirq ignored. diff_ns = %d\n",
1307 crtc, (int) diff_ns); 1310 crtc, (int) diff_ns);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3601466c5502..4ff9b6cc973f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -865,7 +865,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
865 int max_freq; 865 int max_freq;
866 866
867 /* RPSTAT1 is in the GT power well */ 867 /* RPSTAT1 is in the GT power well */
868 __gen6_force_wake_get(dev_priv); 868 __gen6_gt_force_wake_get(dev_priv);
869 869
870 seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); 870 seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
871 seq_printf(m, "RPSTAT1: 0x%08x\n", I915_READ(GEN6_RPSTAT1)); 871 seq_printf(m, "RPSTAT1: 0x%08x\n", I915_READ(GEN6_RPSTAT1));
@@ -888,7 +888,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
888 seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", 888 seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
889 max_freq * 100); 889 max_freq * 100);
890 890
891 __gen6_force_wake_put(dev_priv); 891 __gen6_gt_force_wake_put(dev_priv);
892 } else { 892 } else {
893 seq_printf(m, "no P-state info available\n"); 893 seq_printf(m, "no P-state info available\n");
894 } 894 }
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 17bd766f2081..e33d9be7df3b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1895,6 +1895,17 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
1895 if (IS_GEN2(dev)) 1895 if (IS_GEN2(dev))
1896 dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(30)); 1896 dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(30));
1897 1897
1898 /* 965GM sometimes incorrectly writes to hardware status page (HWS)
1899 * using 32bit addressing, overwriting memory if HWS is located
1900 * above 4GB.
1901 *
1902 * The documentation also mentions an issue with undefined
1903 * behaviour if any general state is accessed within a page above 4GB,
1904 * which also needs to be handled carefully.
1905 */
1906 if (IS_BROADWATER(dev) || IS_CRESTLINE(dev))
1907 dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(32));
1908
1898 mmio_bar = IS_GEN2(dev) ? 1 : 0; 1909 mmio_bar = IS_GEN2(dev) ? 1 : 0;
1899 dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, 0); 1910 dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, 0);
1900 if (!dev_priv->regs) { 1911 if (!dev_priv->regs) {
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0ad533f06af9..22ec066adae6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -46,6 +46,9 @@ module_param_named(fbpercrtc, i915_fbpercrtc, int, 0400);
46unsigned int i915_powersave = 1; 46unsigned int i915_powersave = 1;
47module_param_named(powersave, i915_powersave, int, 0600); 47module_param_named(powersave, i915_powersave, int, 0600);
48 48
49unsigned int i915_semaphores = 0;
50module_param_named(semaphores, i915_semaphores, int, 0600);
51
49unsigned int i915_enable_rc6 = 0; 52unsigned int i915_enable_rc6 = 0;
50module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600); 53module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600);
51 54
@@ -254,7 +257,7 @@ void intel_detect_pch (struct drm_device *dev)
254 } 257 }
255} 258}
256 259
257void __gen6_force_wake_get(struct drm_i915_private *dev_priv) 260void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
258{ 261{
259 int count; 262 int count;
260 263
@@ -270,12 +273,22 @@ void __gen6_force_wake_get(struct drm_i915_private *dev_priv)
270 udelay(10); 273 udelay(10);
271} 274}
272 275
273void __gen6_force_wake_put(struct drm_i915_private *dev_priv) 276void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
274{ 277{
275 I915_WRITE_NOTRACE(FORCEWAKE, 0); 278 I915_WRITE_NOTRACE(FORCEWAKE, 0);
276 POSTING_READ(FORCEWAKE); 279 POSTING_READ(FORCEWAKE);
277} 280}
278 281
282void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
283{
284 int loop = 500;
285 u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
286 while (fifo < 20 && loop--) {
287 udelay(10);
288 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
289 }
290}
291
279static int i915_drm_freeze(struct drm_device *dev) 292static int i915_drm_freeze(struct drm_device *dev)
280{ 293{
281 struct drm_i915_private *dev_priv = dev->dev_private; 294 struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 65dfe81d0035..456f40484838 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -956,6 +956,7 @@ extern struct drm_ioctl_desc i915_ioctls[];
956extern int i915_max_ioctl; 956extern int i915_max_ioctl;
957extern unsigned int i915_fbpercrtc; 957extern unsigned int i915_fbpercrtc;
958extern unsigned int i915_powersave; 958extern unsigned int i915_powersave;
959extern unsigned int i915_semaphores;
959extern unsigned int i915_lvds_downclock; 960extern unsigned int i915_lvds_downclock;
960extern unsigned int i915_panel_use_ssc; 961extern unsigned int i915_panel_use_ssc;
961extern unsigned int i915_enable_rc6; 962extern unsigned int i915_enable_rc6;
@@ -1177,6 +1178,9 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
1177void i915_gem_free_all_phys_object(struct drm_device *dev); 1178void i915_gem_free_all_phys_object(struct drm_device *dev);
1178void i915_gem_release(struct drm_device *dev, struct drm_file *file); 1179void i915_gem_release(struct drm_device *dev, struct drm_file *file);
1179 1180
1181uint32_t
1182i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj);
1183
1180/* i915_gem_gtt.c */ 1184/* i915_gem_gtt.c */
1181void i915_gem_restore_gtt_mappings(struct drm_device *dev); 1185void i915_gem_restore_gtt_mappings(struct drm_device *dev);
1182int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj); 1186int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj);
@@ -1353,22 +1357,32 @@ __i915_write(64, q)
1353 * must be set to prevent GT core from power down and stale values being 1357 * must be set to prevent GT core from power down and stale values being
1354 * returned. 1358 * returned.
1355 */ 1359 */
1356void __gen6_force_wake_get(struct drm_i915_private *dev_priv); 1360void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
1357void __gen6_force_wake_put (struct drm_i915_private *dev_priv); 1361void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
1358static inline u32 i915_safe_read(struct drm_i915_private *dev_priv, u32 reg) 1362void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
1363
1364static inline u32 i915_gt_read(struct drm_i915_private *dev_priv, u32 reg)
1359{ 1365{
1360 u32 val; 1366 u32 val;
1361 1367
1362 if (dev_priv->info->gen >= 6) { 1368 if (dev_priv->info->gen >= 6) {
1363 __gen6_force_wake_get(dev_priv); 1369 __gen6_gt_force_wake_get(dev_priv);
1364 val = I915_READ(reg); 1370 val = I915_READ(reg);
1365 __gen6_force_wake_put(dev_priv); 1371 __gen6_gt_force_wake_put(dev_priv);
1366 } else 1372 } else
1367 val = I915_READ(reg); 1373 val = I915_READ(reg);
1368 1374
1369 return val; 1375 return val;
1370} 1376}
1371 1377
1378static inline void i915_gt_write(struct drm_i915_private *dev_priv,
1379 u32 reg, u32 val)
1380{
1381 if (dev_priv->info->gen >= 6)
1382 __gen6_gt_wait_for_fifo(dev_priv);
1383 I915_WRITE(reg, val);
1384}
1385
1372static inline void 1386static inline void
1373i915_write(struct drm_i915_private *dev_priv, u32 reg, u64 val, int len) 1387i915_write(struct drm_i915_private *dev_priv, u32 reg, u64 val, int len)
1374{ 1388{
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cf4f74c7c6fb..36e66cc5225e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1398,7 +1398,7 @@ i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj)
1398 * Return the required GTT alignment for an object, only taking into account 1398 * Return the required GTT alignment for an object, only taking into account
1399 * unfenced tiled surface requirements. 1399 * unfenced tiled surface requirements.
1400 */ 1400 */
1401static uint32_t 1401uint32_t
1402i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj) 1402i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj)
1403{ 1403{
1404 struct drm_device *dev = obj->base.dev; 1404 struct drm_device *dev = obj->base.dev;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d2f445e825f2..50ab1614571c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -772,8 +772,8 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
772 if (from == NULL || to == from) 772 if (from == NULL || to == from)
773 return 0; 773 return 0;
774 774
775 /* XXX gpu semaphores are currently causing hard hangs on SNB mobile */ 775 /* XXX gpu semaphores are implicated in various hard hangs on SNB */
776 if (INTEL_INFO(obj->base.dev)->gen < 6 || IS_MOBILE(obj->base.dev)) 776 if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores)
777 return i915_gem_object_wait_rendering(obj, true); 777 return i915_gem_object_wait_rendering(obj, true);
778 778
779 idx = intel_ring_sync_index(from, to); 779 idx = intel_ring_sync_index(from, to);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 22a32b9932c5..d64843e18df2 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -349,14 +349,27 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
349 (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end && 349 (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end &&
350 i915_gem_object_fence_ok(obj, args->tiling_mode)); 350 i915_gem_object_fence_ok(obj, args->tiling_mode));
351 351
352 obj->tiling_changed = true; 352 /* Rebind if we need a change of alignment */
353 obj->tiling_mode = args->tiling_mode; 353 if (!obj->map_and_fenceable) {
354 obj->stride = args->stride; 354 u32 unfenced_alignment =
355 i915_gem_get_unfenced_gtt_alignment(obj);
356 if (obj->gtt_offset & (unfenced_alignment - 1))
357 ret = i915_gem_object_unbind(obj);
358 }
359
360 if (ret == 0) {
361 obj->tiling_changed = true;
362 obj->tiling_mode = args->tiling_mode;
363 obj->stride = args->stride;
364 }
355 } 365 }
366 /* we have to maintain this existing ABI... */
367 args->stride = obj->stride;
368 args->tiling_mode = obj->tiling_mode;
356 drm_gem_object_unreference(&obj->base); 369 drm_gem_object_unreference(&obj->base);
357 mutex_unlock(&dev->struct_mutex); 370 mutex_unlock(&dev->struct_mutex);
358 371
359 return 0; 372 return ret;
360} 373}
361 374
362/** 375/**
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 97f946dcc1aa..8a9e08bf1cf7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -316,6 +316,8 @@ static void i915_hotplug_work_func(struct work_struct *work)
316 struct drm_mode_config *mode_config = &dev->mode_config; 316 struct drm_mode_config *mode_config = &dev->mode_config;
317 struct intel_encoder *encoder; 317 struct intel_encoder *encoder;
318 318
319 DRM_DEBUG_KMS("running encoder hotplug functions\n");
320
319 list_for_each_entry(encoder, &mode_config->encoder_list, base.head) 321 list_for_each_entry(encoder, &mode_config->encoder_list, base.head)
320 if (encoder->hot_plug) 322 if (encoder->hot_plug)
321 encoder->hot_plug(encoder); 323 encoder->hot_plug(encoder);
@@ -1649,9 +1651,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
1649 } else { 1651 } else {
1650 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG | 1652 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG |
1651 SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG; 1653 SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG;
1652 hotplug_mask |= SDE_AUX_MASK | SDE_FDI_MASK | SDE_TRANS_MASK; 1654 hotplug_mask |= SDE_AUX_MASK;
1653 I915_WRITE(FDI_RXA_IMR, 0);
1654 I915_WRITE(FDI_RXB_IMR, 0);
1655 } 1655 }
1656 1656
1657 dev_priv->pch_irq_mask = ~hotplug_mask; 1657 dev_priv->pch_irq_mask = ~hotplug_mask;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 15d94c63918c..2abe240dae58 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3271,6 +3271,8 @@
3271#define FORCEWAKE 0xA18C 3271#define FORCEWAKE 0xA18C
3272#define FORCEWAKE_ACK 0x130090 3272#define FORCEWAKE_ACK 0x130090
3273 3273
3274#define GT_FIFO_FREE_ENTRIES 0x120008
3275
3274#define GEN6_RPNSWREQ 0xA008 3276#define GEN6_RPNSWREQ 0xA008
3275#define GEN6_TURBO_DISABLE (1<<31) 3277#define GEN6_TURBO_DISABLE (1<<31)
3276#define GEN6_FREQUENCY(x) ((x)<<25) 3278#define GEN6_FREQUENCY(x) ((x)<<25)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3b006536b3d2..49fb54fd9a18 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1219,7 +1219,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
1219 u32 blt_ecoskpd; 1219 u32 blt_ecoskpd;
1220 1220
1221 /* Make sure blitter notifies FBC of writes */ 1221 /* Make sure blitter notifies FBC of writes */
1222 __gen6_force_wake_get(dev_priv); 1222 __gen6_gt_force_wake_get(dev_priv);
1223 blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD); 1223 blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
1224 blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY << 1224 blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
1225 GEN6_BLITTER_LOCK_SHIFT; 1225 GEN6_BLITTER_LOCK_SHIFT;
@@ -1230,7 +1230,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
1230 GEN6_BLITTER_LOCK_SHIFT); 1230 GEN6_BLITTER_LOCK_SHIFT);
1231 I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); 1231 I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
1232 POSTING_READ(GEN6_BLITTER_ECOSKPD); 1232 POSTING_READ(GEN6_BLITTER_ECOSKPD);
1233 __gen6_force_wake_put(dev_priv); 1233 __gen6_gt_force_wake_put(dev_priv);
1234} 1234}
1235 1235
1236static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval) 1236static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
@@ -1630,19 +1630,19 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
1630 struct drm_i915_gem_object *obj = to_intel_framebuffer(old_fb)->obj; 1630 struct drm_i915_gem_object *obj = to_intel_framebuffer(old_fb)->obj;
1631 1631
1632 wait_event(dev_priv->pending_flip_queue, 1632 wait_event(dev_priv->pending_flip_queue,
1633 atomic_read(&dev_priv->mm.wedged) ||
1633 atomic_read(&obj->pending_flip) == 0); 1634 atomic_read(&obj->pending_flip) == 0);
1634 1635
1635 /* Big Hammer, we also need to ensure that any pending 1636 /* Big Hammer, we also need to ensure that any pending
1636 * MI_WAIT_FOR_EVENT inside a user batch buffer on the 1637 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
1637 * current scanout is retired before unpinning the old 1638 * current scanout is retired before unpinning the old
1638 * framebuffer. 1639 * framebuffer.
1640 *
1641 * This should only fail upon a hung GPU, in which case we
1642 * can safely continue.
1639 */ 1643 */
1640 ret = i915_gem_object_flush_gpu(obj, false); 1644 ret = i915_gem_object_flush_gpu(obj, false);
1641 if (ret) { 1645 (void) ret;
1642 i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
1643 mutex_unlock(&dev->struct_mutex);
1644 return ret;
1645 }
1646 } 1646 }
1647 1647
1648 ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y, 1648 ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
@@ -2045,6 +2045,31 @@ static void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
2045 atomic_read(&obj->pending_flip) == 0); 2045 atomic_read(&obj->pending_flip) == 0);
2046} 2046}
2047 2047
2048static bool intel_crtc_driving_pch(struct drm_crtc *crtc)
2049{
2050 struct drm_device *dev = crtc->dev;
2051 struct drm_mode_config *mode_config = &dev->mode_config;
2052 struct intel_encoder *encoder;
2053
2054 /*
2055 * If there's a non-PCH eDP on this crtc, it must be DP_A, and that
2056 * must be driven by its own crtc; no sharing is possible.
2057 */
2058 list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
2059 if (encoder->base.crtc != crtc)
2060 continue;
2061
2062 switch (encoder->type) {
2063 case INTEL_OUTPUT_EDP:
2064 if (!intel_encoder_is_pch_edp(&encoder->base))
2065 return false;
2066 continue;
2067 }
2068 }
2069
2070 return true;
2071}
2072
2048static void ironlake_crtc_enable(struct drm_crtc *crtc) 2073static void ironlake_crtc_enable(struct drm_crtc *crtc)
2049{ 2074{
2050 struct drm_device *dev = crtc->dev; 2075 struct drm_device *dev = crtc->dev;
@@ -2053,6 +2078,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
2053 int pipe = intel_crtc->pipe; 2078 int pipe = intel_crtc->pipe;
2054 int plane = intel_crtc->plane; 2079 int plane = intel_crtc->plane;
2055 u32 reg, temp; 2080 u32 reg, temp;
2081 bool is_pch_port = false;
2056 2082
2057 if (intel_crtc->active) 2083 if (intel_crtc->active)
2058 return; 2084 return;
@@ -2066,7 +2092,56 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
2066 I915_WRITE(PCH_LVDS, temp | LVDS_PORT_EN); 2092 I915_WRITE(PCH_LVDS, temp | LVDS_PORT_EN);
2067 } 2093 }
2068 2094
2069 ironlake_fdi_enable(crtc); 2095 is_pch_port = intel_crtc_driving_pch(crtc);
2096
2097 if (is_pch_port)
2098 ironlake_fdi_enable(crtc);
2099 else {
2100 /* disable CPU FDI tx and PCH FDI rx */
2101 reg = FDI_TX_CTL(pipe);
2102 temp = I915_READ(reg);
2103 I915_WRITE(reg, temp & ~FDI_TX_ENABLE);
2104 POSTING_READ(reg);
2105
2106 reg = FDI_RX_CTL(pipe);
2107 temp = I915_READ(reg);
2108 temp &= ~(0x7 << 16);
2109 temp |= (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) << 11;
2110 I915_WRITE(reg, temp & ~FDI_RX_ENABLE);
2111
2112 POSTING_READ(reg);
2113 udelay(100);
2114
2115 /* Ironlake workaround, disable clock pointer after downing FDI */
2116 if (HAS_PCH_IBX(dev))
2117 I915_WRITE(FDI_RX_CHICKEN(pipe),
2118 I915_READ(FDI_RX_CHICKEN(pipe) &
2119 ~FDI_RX_PHASE_SYNC_POINTER_ENABLE));
2120
2121 /* still set train pattern 1 */
2122 reg = FDI_TX_CTL(pipe);
2123 temp = I915_READ(reg);
2124 temp &= ~FDI_LINK_TRAIN_NONE;
2125 temp |= FDI_LINK_TRAIN_PATTERN_1;
2126 I915_WRITE(reg, temp);
2127
2128 reg = FDI_RX_CTL(pipe);
2129 temp = I915_READ(reg);
2130 if (HAS_PCH_CPT(dev)) {
2131 temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
2132 temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
2133 } else {
2134 temp &= ~FDI_LINK_TRAIN_NONE;
2135 temp |= FDI_LINK_TRAIN_PATTERN_1;
2136 }
2137 /* BPC in FDI rx is consistent with that in PIPECONF */
2138 temp &= ~(0x07 << 16);
2139 temp |= (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) << 11;
2140 I915_WRITE(reg, temp);
2141
2142 POSTING_READ(reg);
2143 udelay(100);
2144 }
2070 2145
2071 /* Enable panel fitting for LVDS */ 2146 /* Enable panel fitting for LVDS */
2072 if (dev_priv->pch_pf_size && 2147 if (dev_priv->pch_pf_size &&
@@ -2100,6 +2175,10 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
2100 intel_flush_display_plane(dev, plane); 2175 intel_flush_display_plane(dev, plane);
2101 } 2176 }
2102 2177
2178 /* Skip the PCH stuff if possible */
2179 if (!is_pch_port)
2180 goto done;
2181
2103 /* For PCH output, training FDI link */ 2182 /* For PCH output, training FDI link */
2104 if (IS_GEN6(dev)) 2183 if (IS_GEN6(dev))
2105 gen6_fdi_link_train(crtc); 2184 gen6_fdi_link_train(crtc);
@@ -2184,7 +2263,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
2184 I915_WRITE(reg, temp | TRANS_ENABLE); 2263 I915_WRITE(reg, temp | TRANS_ENABLE);
2185 if (wait_for(I915_READ(reg) & TRANS_STATE_ENABLE, 100)) 2264 if (wait_for(I915_READ(reg) & TRANS_STATE_ENABLE, 100))
2186 DRM_ERROR("failed to enable transcoder %d\n", pipe); 2265 DRM_ERROR("failed to enable transcoder %d\n", pipe);
2187 2266done:
2188 intel_crtc_load_lut(crtc); 2267 intel_crtc_load_lut(crtc);
2189 intel_update_fbc(dev); 2268 intel_update_fbc(dev);
2190 intel_crtc_update_cursor(crtc, true); 2269 intel_crtc_update_cursor(crtc, true);
@@ -6203,7 +6282,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
6203 * userspace... 6282 * userspace...
6204 */ 6283 */
6205 I915_WRITE(GEN6_RC_STATE, 0); 6284 I915_WRITE(GEN6_RC_STATE, 0);
6206 __gen6_force_wake_get(dev_priv); 6285 __gen6_gt_force_wake_get(dev_priv);
6207 6286
6208 /* disable the counters and set deterministic thresholds */ 6287 /* disable the counters and set deterministic thresholds */
6209 I915_WRITE(GEN6_RC_CONTROL, 0); 6288 I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -6301,7 +6380,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
6301 /* enable all PM interrupts */ 6380 /* enable all PM interrupts */
6302 I915_WRITE(GEN6_PMINTRMSK, 0); 6381 I915_WRITE(GEN6_PMINTRMSK, 0);
6303 6382
6304 __gen6_force_wake_put(dev_priv); 6383 __gen6_gt_force_wake_put(dev_priv);
6305} 6384}
6306 6385
6307void intel_enable_clock_gating(struct drm_device *dev) 6386void intel_enable_clock_gating(struct drm_device *dev)
@@ -6496,7 +6575,7 @@ static void ironlake_disable_rc6(struct drm_device *dev)
6496 POSTING_READ(RSTDBYCTL); 6575 POSTING_READ(RSTDBYCTL);
6497 } 6576 }
6498 6577
6499 ironlake_disable_rc6(dev); 6578 ironlake_teardown_rc6(dev);
6500} 6579}
6501 6580
6502static int ironlake_setup_rc6(struct drm_device *dev) 6581static int ironlake_setup_rc6(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index c65992df458d..f8f86e57df22 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -208,7 +208,6 @@ u32 intel_panel_get_backlight(struct drm_device *dev)
208 val &= ~1; 208 val &= ~1;
209 pci_read_config_byte(dev->pdev, PCI_LBPC, &lbpc); 209 pci_read_config_byte(dev->pdev, PCI_LBPC, &lbpc);
210 val *= lbpc; 210 val *= lbpc;
211 val >>= 1;
212 } 211 }
213 } 212 }
214 213
@@ -235,11 +234,11 @@ void intel_panel_set_backlight(struct drm_device *dev, u32 level)
235 234
236 if (is_backlight_combination_mode(dev)){ 235 if (is_backlight_combination_mode(dev)){
237 u32 max = intel_panel_get_max_backlight(dev); 236 u32 max = intel_panel_get_max_backlight(dev);
238 u8 lpbc; 237 u8 lbpc;
239 238
240 lpbc = level * 0xfe / max + 1; 239 lbpc = level * 0xfe / max + 1;
241 level /= lpbc; 240 level /= lbpc;
242 pci_write_config_byte(dev->pdev, PCI_LBPC, lpbc); 241 pci_write_config_byte(dev->pdev, PCI_LBPC, lbpc);
243 } 242 }
244 243
245 tmp = I915_READ(BLC_PWM_CTL); 244 tmp = I915_READ(BLC_PWM_CTL);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6d6fde85a636..34306865a5df 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -14,22 +14,23 @@ struct intel_hw_status_page {
14 struct drm_i915_gem_object *obj; 14 struct drm_i915_gem_object *obj;
15}; 15};
16 16
17#define I915_RING_READ(reg) i915_safe_read(dev_priv, reg) 17#define I915_RING_READ(reg) i915_gt_read(dev_priv, reg)
18#define I915_RING_WRITE(reg, val) i915_gt_write(dev_priv, reg, val)
18 19
19#define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base)) 20#define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base))
20#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val) 21#define I915_WRITE_TAIL(ring, val) I915_RING_WRITE(RING_TAIL((ring)->mmio_base), val)
21 22
22#define I915_READ_START(ring) I915_RING_READ(RING_START((ring)->mmio_base)) 23#define I915_READ_START(ring) I915_RING_READ(RING_START((ring)->mmio_base))
23#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val) 24#define I915_WRITE_START(ring, val) I915_RING_WRITE(RING_START((ring)->mmio_base), val)
24 25
25#define I915_READ_HEAD(ring) I915_RING_READ(RING_HEAD((ring)->mmio_base)) 26#define I915_READ_HEAD(ring) I915_RING_READ(RING_HEAD((ring)->mmio_base))
26#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val) 27#define I915_WRITE_HEAD(ring, val) I915_RING_WRITE(RING_HEAD((ring)->mmio_base), val)
27 28
28#define I915_READ_CTL(ring) I915_RING_READ(RING_CTL((ring)->mmio_base)) 29#define I915_READ_CTL(ring) I915_RING_READ(RING_CTL((ring)->mmio_base))
29#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val) 30#define I915_WRITE_CTL(ring, val) I915_RING_WRITE(RING_CTL((ring)->mmio_base), val)
30 31
31#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
32#define I915_READ_IMR(ring) I915_RING_READ(RING_IMR((ring)->mmio_base)) 32#define I915_READ_IMR(ring) I915_RING_READ(RING_IMR((ring)->mmio_base))
33#define I915_WRITE_IMR(ring, val) I915_RING_WRITE(RING_IMR((ring)->mmio_base), val)
33 34
34#define I915_READ_NOPID(ring) I915_RING_READ(RING_NOPID((ring)->mmio_base)) 35#define I915_READ_NOPID(ring) I915_RING_READ(RING_NOPID((ring)->mmio_base))
35#define I915_READ_SYNC_0(ring) I915_RING_READ(RING_SYNC_0((ring)->mmio_base)) 36#define I915_READ_SYNC_0(ring) I915_RING_READ(RING_SYNC_0((ring)->mmio_base))
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 49e5e99917e2..6bdab891c64e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -6228,7 +6228,7 @@ parse_dcb15_entry(struct drm_device *dev, struct dcb_table *dcb,
6228 entry->tvconf.has_component_output = false; 6228 entry->tvconf.has_component_output = false;
6229 break; 6229 break;
6230 case OUTPUT_LVDS: 6230 case OUTPUT_LVDS:
6231 if ((conn & 0x00003f00) != 0x10) 6231 if ((conn & 0x00003f00) >> 8 != 0x10)
6232 entry->lvdsconf.use_straps_for_mode = true; 6232 entry->lvdsconf.use_straps_for_mode = true;
6233 entry->lvdsconf.use_power_scripts = true; 6233 entry->lvdsconf.use_power_scripts = true;
6234 break; 6234 break;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index a7fae26f4654..a52184007f5f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -49,7 +49,10 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
49 DRM_ERROR("bo %p still attached to GEM object\n", bo); 49 DRM_ERROR("bo %p still attached to GEM object\n", bo);
50 50
51 nv10_mem_put_tile_region(dev, nvbo->tile, NULL); 51 nv10_mem_put_tile_region(dev, nvbo->tile, NULL);
52 nouveau_vm_put(&nvbo->vma); 52 if (nvbo->vma.node) {
53 nouveau_vm_unmap(&nvbo->vma);
54 nouveau_vm_put(&nvbo->vma);
55 }
53 kfree(nvbo); 56 kfree(nvbo);
54} 57}
55 58
@@ -128,6 +131,7 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
128 } 131 }
129 } 132 }
130 133
134 nvbo->bo.mem.num_pages = size >> PAGE_SHIFT;
131 nouveau_bo_placement_set(nvbo, flags, 0); 135 nouveau_bo_placement_set(nvbo, flags, 0);
132 136
133 nvbo->channel = chan; 137 nvbo->channel = chan;
@@ -166,17 +170,17 @@ static void
166set_placement_range(struct nouveau_bo *nvbo, uint32_t type) 170set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
167{ 171{
168 struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev); 172 struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev);
173 int vram_pages = dev_priv->vram_size >> PAGE_SHIFT;
169 174
170 if (dev_priv->card_type == NV_10 && 175 if (dev_priv->card_type == NV_10 &&
171 nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM)) { 176 nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) &&
177 nvbo->bo.mem.num_pages < vram_pages / 2) {
172 /* 178 /*
173 * Make sure that the color and depth buffers are handled 179 * Make sure that the color and depth buffers are handled
174 * by independent memory controller units. Up to a 9x 180 * by independent memory controller units. Up to a 9x
175 * speed up when alpha-blending and depth-test are enabled 181 * speed up when alpha-blending and depth-test are enabled
176 * at the same time. 182 * at the same time.
177 */ 183 */
178 int vram_pages = dev_priv->vram_size >> PAGE_SHIFT;
179
180 if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) { 184 if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) {
181 nvbo->placement.fpfn = vram_pages / 2; 185 nvbo->placement.fpfn = vram_pages / 2;
182 nvbo->placement.lpfn = ~0; 186 nvbo->placement.lpfn = ~0;
@@ -785,7 +789,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
785 if (ret) 789 if (ret)
786 goto out; 790 goto out;
787 791
788 ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, new_mem); 792 ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
789out: 793out:
790 ttm_bo_mem_put(bo, &tmp_mem); 794 ttm_bo_mem_put(bo, &tmp_mem);
791 return ret; 795 return ret;
@@ -811,11 +815,11 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
811 if (ret) 815 if (ret)
812 return ret; 816 return ret;
813 817
814 ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, &tmp_mem); 818 ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
815 if (ret) 819 if (ret)
816 goto out; 820 goto out;
817 821
818 ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem); 822 ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, new_mem);
819 if (ret) 823 if (ret)
820 goto out; 824 goto out;
821 825
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index a21e00076839..390d82c3c4b0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -507,6 +507,7 @@ nouveau_connector_native_mode(struct drm_connector *connector)
507 int high_w = 0, high_h = 0, high_v = 0; 507 int high_w = 0, high_h = 0, high_v = 0;
508 508
509 list_for_each_entry(mode, &nv_connector->base.probed_modes, head) { 509 list_for_each_entry(mode, &nv_connector->base.probed_modes, head) {
510 mode->vrefresh = drm_mode_vrefresh(mode);
510 if (helper->mode_valid(connector, mode) != MODE_OK || 511 if (helper->mode_valid(connector, mode) != MODE_OK ||
511 (mode->flags & DRM_MODE_FLAG_INTERLACE)) 512 (mode->flags & DRM_MODE_FLAG_INTERLACE))
512 continue; 513 continue;
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 65699bfaaaea..b368ed74aad7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -83,7 +83,8 @@ nouveau_dma_init(struct nouveau_channel *chan)
83 return ret; 83 return ret;
84 84
85 /* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */ 85 /* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */
86 ret = nouveau_notifier_alloc(chan, NvNotify0, 32, &chan->m2mf_ntfy); 86 ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfd0, 0x1000,
87 &chan->m2mf_ntfy);
87 if (ret) 88 if (ret)
88 return ret; 89 return ret;
89 90
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 9821fcacc3d2..982d70b12722 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -852,7 +852,8 @@ extern const struct ttm_mem_type_manager_func nouveau_vram_manager;
852extern int nouveau_notifier_init_channel(struct nouveau_channel *); 852extern int nouveau_notifier_init_channel(struct nouveau_channel *);
853extern void nouveau_notifier_takedown_channel(struct nouveau_channel *); 853extern void nouveau_notifier_takedown_channel(struct nouveau_channel *);
854extern int nouveau_notifier_alloc(struct nouveau_channel *, uint32_t handle, 854extern int nouveau_notifier_alloc(struct nouveau_channel *, uint32_t handle,
855 int cout, uint32_t *offset); 855 int cout, uint32_t start, uint32_t end,
856 uint32_t *offset);
856extern int nouveau_notifier_offset(struct nouveau_gpuobj *, uint32_t *); 857extern int nouveau_notifier_offset(struct nouveau_gpuobj *, uint32_t *);
857extern int nouveau_ioctl_notifier_alloc(struct drm_device *, void *data, 858extern int nouveau_ioctl_notifier_alloc(struct drm_device *, void *data,
858 struct drm_file *); 859 struct drm_file *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 26347b7cd872..b0fb9bdcddb7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -725,8 +725,10 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
725 ret = vram->get(dev, mem->num_pages << PAGE_SHIFT, 725 ret = vram->get(dev, mem->num_pages << PAGE_SHIFT,
726 mem->page_alignment << PAGE_SHIFT, size_nc, 726 mem->page_alignment << PAGE_SHIFT, size_nc,
727 (nvbo->tile_flags >> 8) & 0xff, &node); 727 (nvbo->tile_flags >> 8) & 0xff, &node);
728 if (ret) 728 if (ret) {
729 return ret; 729 mem->mm_node = NULL;
730 return (ret == -ENOSPC) ? 0 : ret;
731 }
730 732
731 node->page_shift = 12; 733 node->page_shift = 12;
732 if (nvbo->vma.node) 734 if (nvbo->vma.node)
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.c b/drivers/gpu/drm/nouveau/nouveau_mm.c
index 8844b50c3e54..7609756b6faf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.c
@@ -123,7 +123,7 @@ nouveau_mm_get(struct nouveau_mm *rmm, int type, u32 size, u32 size_nc,
123 return 0; 123 return 0;
124 } 124 }
125 125
126 return -ENOMEM; 126 return -ENOSPC;
127} 127}
128 128
129int 129int
diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
index fe29d604b820..5ea167623a82 100644
--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
@@ -96,7 +96,8 @@ nouveau_notifier_gpuobj_dtor(struct drm_device *dev,
96 96
97int 97int
98nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle, 98nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
99 int size, uint32_t *b_offset) 99 int size, uint32_t start, uint32_t end,
100 uint32_t *b_offset)
100{ 101{
101 struct drm_device *dev = chan->dev; 102 struct drm_device *dev = chan->dev;
102 struct nouveau_gpuobj *nobj = NULL; 103 struct nouveau_gpuobj *nobj = NULL;
@@ -104,9 +105,10 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
104 uint32_t offset; 105 uint32_t offset;
105 int target, ret; 106 int target, ret;
106 107
107 mem = drm_mm_search_free(&chan->notifier_heap, size, 0, 0); 108 mem = drm_mm_search_free_in_range(&chan->notifier_heap, size, 0,
109 start, end, 0);
108 if (mem) 110 if (mem)
109 mem = drm_mm_get_block(mem, size, 0); 111 mem = drm_mm_get_block_range(mem, size, 0, start, end);
110 if (!mem) { 112 if (!mem) {
111 NV_ERROR(dev, "Channel %d notifier block full\n", chan->id); 113 NV_ERROR(dev, "Channel %d notifier block full\n", chan->id);
112 return -ENOMEM; 114 return -ENOMEM;
@@ -177,7 +179,8 @@ nouveau_ioctl_notifier_alloc(struct drm_device *dev, void *data,
177 if (IS_ERR(chan)) 179 if (IS_ERR(chan))
178 return PTR_ERR(chan); 180 return PTR_ERR(chan);
179 181
180 ret = nouveau_notifier_alloc(chan, na->handle, na->size, &na->offset); 182 ret = nouveau_notifier_alloc(chan, na->handle, na->size, 0, 0x1000,
183 &na->offset);
181 nouveau_channel_put(&chan); 184 nouveau_channel_put(&chan);
182 return ret; 185 return ret;
183} 186}
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c
index f05c0cddfeca..4399e2f34db4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.c
@@ -543,7 +543,7 @@ nouveau_pm_resume(struct drm_device *dev)
543 struct nouveau_pm_engine *pm = &dev_priv->engine.pm; 543 struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
544 struct nouveau_pm_level *perflvl; 544 struct nouveau_pm_level *perflvl;
545 545
546 if (pm->cur == &pm->boot) 546 if (!pm->cur || pm->cur == &pm->boot)
547 return; 547 return;
548 548
549 perflvl = pm->cur; 549 perflvl = pm->cur;
diff --git a/drivers/gpu/drm/nouveau/nv04_dfp.c b/drivers/gpu/drm/nouveau/nv04_dfp.c
index ef23550407b5..c82db37d9f41 100644
--- a/drivers/gpu/drm/nouveau/nv04_dfp.c
+++ b/drivers/gpu/drm/nouveau/nv04_dfp.c
@@ -342,8 +342,8 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
342 if (nv_encoder->dcb->type == OUTPUT_LVDS) { 342 if (nv_encoder->dcb->type == OUTPUT_LVDS) {
343 bool duallink, dummy; 343 bool duallink, dummy;
344 344
345 nouveau_bios_parse_lvds_table(dev, nv_connector->native_mode-> 345 nouveau_bios_parse_lvds_table(dev, output_mode->clock,
346 clock, &duallink, &dummy); 346 &duallink, &dummy);
347 if (duallink) 347 if (duallink)
348 regp->fp_control |= (8 << 28); 348 regp->fp_control |= (8 << 28);
349 } else 349 } else
@@ -518,8 +518,6 @@ static void nv04_lvds_dpms(struct drm_encoder *encoder, int mode)
518 return; 518 return;
519 519
520 if (nv_encoder->dcb->lvdsconf.use_power_scripts) { 520 if (nv_encoder->dcb->lvdsconf.use_power_scripts) {
521 struct nouveau_connector *nv_connector = nouveau_encoder_connector_get(nv_encoder);
522
523 /* when removing an output, crtc may not be set, but PANEL_OFF 521 /* when removing an output, crtc may not be set, but PANEL_OFF
524 * must still be run 522 * must still be run
525 */ 523 */
@@ -527,12 +525,8 @@ static void nv04_lvds_dpms(struct drm_encoder *encoder, int mode)
527 nv04_dfp_get_bound_head(dev, nv_encoder->dcb); 525 nv04_dfp_get_bound_head(dev, nv_encoder->dcb);
528 526
529 if (mode == DRM_MODE_DPMS_ON) { 527 if (mode == DRM_MODE_DPMS_ON) {
530 if (!nv_connector->native_mode) {
531 NV_ERROR(dev, "Not turning on LVDS without native mode\n");
532 return;
533 }
534 call_lvds_script(dev, nv_encoder->dcb, head, 528 call_lvds_script(dev, nv_encoder->dcb, head,
535 LVDS_PANEL_ON, nv_connector->native_mode->clock); 529 LVDS_PANEL_ON, nv_encoder->mode.clock);
536 } else 530 } else
537 /* pxclk of 0 is fine for PANEL_OFF, and for a 531 /* pxclk of 0 is fine for PANEL_OFF, and for a
538 * disconnected LVDS encoder there is no native_mode 532 * disconnected LVDS encoder there is no native_mode
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index 8870d72388c8..18d30c2c1aa6 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -211,18 +211,32 @@ nv40_graph_set_tile_region(struct drm_device *dev, int i)
211 struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i]; 211 struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
212 212
213 switch (dev_priv->chipset) { 213 switch (dev_priv->chipset) {
214 case 0x40:
215 case 0x41: /* guess */
216 case 0x42:
217 case 0x43:
218 case 0x45: /* guess */
219 case 0x4e:
220 nv_wr32(dev, NV20_PGRAPH_TSIZE(i), tile->pitch);
221 nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), tile->limit);
222 nv_wr32(dev, NV20_PGRAPH_TILE(i), tile->addr);
223 nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tile->pitch);
224 nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tile->limit);
225 nv_wr32(dev, NV40_PGRAPH_TILE1(i), tile->addr);
226 break;
214 case 0x44: 227 case 0x44:
215 case 0x4a: 228 case 0x4a:
216 case 0x4e:
217 nv_wr32(dev, NV20_PGRAPH_TSIZE(i), tile->pitch); 229 nv_wr32(dev, NV20_PGRAPH_TSIZE(i), tile->pitch);
218 nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), tile->limit); 230 nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), tile->limit);
219 nv_wr32(dev, NV20_PGRAPH_TILE(i), tile->addr); 231 nv_wr32(dev, NV20_PGRAPH_TILE(i), tile->addr);
220 break; 232 break;
221
222 case 0x46: 233 case 0x46:
223 case 0x47: 234 case 0x47:
224 case 0x49: 235 case 0x49:
225 case 0x4b: 236 case 0x4b:
237 case 0x4c:
238 case 0x67:
239 default:
226 nv_wr32(dev, NV47_PGRAPH_TSIZE(i), tile->pitch); 240 nv_wr32(dev, NV47_PGRAPH_TSIZE(i), tile->pitch);
227 nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), tile->limit); 241 nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), tile->limit);
228 nv_wr32(dev, NV47_PGRAPH_TILE(i), tile->addr); 242 nv_wr32(dev, NV47_PGRAPH_TILE(i), tile->addr);
@@ -230,15 +244,6 @@ nv40_graph_set_tile_region(struct drm_device *dev, int i)
230 nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tile->limit); 244 nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tile->limit);
231 nv_wr32(dev, NV40_PGRAPH_TILE1(i), tile->addr); 245 nv_wr32(dev, NV40_PGRAPH_TILE1(i), tile->addr);
232 break; 246 break;
233
234 default:
235 nv_wr32(dev, NV20_PGRAPH_TSIZE(i), tile->pitch);
236 nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), tile->limit);
237 nv_wr32(dev, NV20_PGRAPH_TILE(i), tile->addr);
238 nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tile->pitch);
239 nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tile->limit);
240 nv_wr32(dev, NV40_PGRAPH_TILE1(i), tile->addr);
241 break;
242 } 247 }
243} 248}
244 249
@@ -396,17 +401,20 @@ nv40_graph_init(struct drm_device *dev)
396 break; 401 break;
397 default: 402 default:
398 switch (dev_priv->chipset) { 403 switch (dev_priv->chipset) {
399 case 0x46: 404 case 0x41:
400 case 0x47: 405 case 0x42:
401 case 0x49: 406 case 0x43:
402 case 0x4b: 407 case 0x45:
403 nv_wr32(dev, 0x400DF0, nv_rd32(dev, NV04_PFB_CFG0)); 408 case 0x4e:
404 nv_wr32(dev, 0x400DF4, nv_rd32(dev, NV04_PFB_CFG1)); 409 case 0x44:
405 break; 410 case 0x4a:
406 default:
407 nv_wr32(dev, 0x4009F0, nv_rd32(dev, NV04_PFB_CFG0)); 411 nv_wr32(dev, 0x4009F0, nv_rd32(dev, NV04_PFB_CFG0));
408 nv_wr32(dev, 0x4009F4, nv_rd32(dev, NV04_PFB_CFG1)); 412 nv_wr32(dev, 0x4009F4, nv_rd32(dev, NV04_PFB_CFG1));
409 break; 413 break;
414 default:
415 nv_wr32(dev, 0x400DF0, nv_rd32(dev, NV04_PFB_CFG0));
416 nv_wr32(dev, 0x400DF4, nv_rd32(dev, NV04_PFB_CFG1));
417 break;
410 } 418 }
411 nv_wr32(dev, 0x4069F0, nv_rd32(dev, NV04_PFB_CFG0)); 419 nv_wr32(dev, 0x4069F0, nv_rd32(dev, NV04_PFB_CFG0));
412 nv_wr32(dev, 0x4069F4, nv_rd32(dev, NV04_PFB_CFG1)); 420 nv_wr32(dev, 0x4069F4, nv_rd32(dev, NV04_PFB_CFG1));
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index ea0041810ae3..e57caa2a00e3 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -403,16 +403,24 @@ nv50_instmem_unmap(struct nouveau_gpuobj *gpuobj)
403void 403void
404nv50_instmem_flush(struct drm_device *dev) 404nv50_instmem_flush(struct drm_device *dev)
405{ 405{
406 struct drm_nouveau_private *dev_priv = dev->dev_private;
407
408 spin_lock(&dev_priv->ramin_lock);
406 nv_wr32(dev, 0x00330c, 0x00000001); 409 nv_wr32(dev, 0x00330c, 0x00000001);
407 if (!nv_wait(dev, 0x00330c, 0x00000002, 0x00000000)) 410 if (!nv_wait(dev, 0x00330c, 0x00000002, 0x00000000))
408 NV_ERROR(dev, "PRAMIN flush timeout\n"); 411 NV_ERROR(dev, "PRAMIN flush timeout\n");
412 spin_unlock(&dev_priv->ramin_lock);
409} 413}
410 414
411void 415void
412nv84_instmem_flush(struct drm_device *dev) 416nv84_instmem_flush(struct drm_device *dev)
413{ 417{
418 struct drm_nouveau_private *dev_priv = dev->dev_private;
419
420 spin_lock(&dev_priv->ramin_lock);
414 nv_wr32(dev, 0x070000, 0x00000001); 421 nv_wr32(dev, 0x070000, 0x00000001);
415 if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000)) 422 if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000))
416 NV_ERROR(dev, "PRAMIN flush timeout\n"); 423 NV_ERROR(dev, "PRAMIN flush timeout\n");
424 spin_unlock(&dev_priv->ramin_lock);
417} 425}
418 426
diff --git a/drivers/gpu/drm/nouveau/nv50_vm.c b/drivers/gpu/drm/nouveau/nv50_vm.c
index 459ff08241e5..6144156f255a 100644
--- a/drivers/gpu/drm/nouveau/nv50_vm.c
+++ b/drivers/gpu/drm/nouveau/nv50_vm.c
@@ -169,7 +169,11 @@ nv50_vm_flush(struct nouveau_vm *vm)
169void 169void
170nv50_vm_flush_engine(struct drm_device *dev, int engine) 170nv50_vm_flush_engine(struct drm_device *dev, int engine)
171{ 171{
172 struct drm_nouveau_private *dev_priv = dev->dev_private;
173
174 spin_lock(&dev_priv->ramin_lock);
172 nv_wr32(dev, 0x100c80, (engine << 16) | 1); 175 nv_wr32(dev, 0x100c80, (engine << 16) | 1);
173 if (!nv_wait(dev, 0x100c80, 0x00000001, 0x00000000)) 176 if (!nv_wait(dev, 0x100c80, 0x00000001, 0x00000000))
174 NV_ERROR(dev, "vm flush timeout: engine %d\n", engine); 177 NV_ERROR(dev, "vm flush timeout: engine %d\n", engine);
178 spin_unlock(&dev_priv->ramin_lock);
175} 179}
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 095bc507fb16..a4e5e53e0a62 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -557,9 +557,9 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
557 557
558 /* use recommended ref_div for ss */ 558 /* use recommended ref_div for ss */
559 if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { 559 if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
560 pll->flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP;
561 if (ss_enabled) { 560 if (ss_enabled) {
562 if (ss->refdiv) { 561 if (ss->refdiv) {
562 pll->flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP;
563 pll->flags |= RADEON_PLL_USE_REF_DIV; 563 pll->flags |= RADEON_PLL_USE_REF_DIV;
564 pll->reference_div = ss->refdiv; 564 pll->reference_div = ss->refdiv;
565 if (ASIC_IS_AVIVO(rdev)) 565 if (ASIC_IS_AVIVO(rdev))
@@ -662,10 +662,12 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
662 index, (uint32_t *)&args); 662 index, (uint32_t *)&args);
663 adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10; 663 adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10;
664 if (args.v3.sOutput.ucRefDiv) { 664 if (args.v3.sOutput.ucRefDiv) {
665 pll->flags |= RADEON_PLL_USE_FRAC_FB_DIV;
665 pll->flags |= RADEON_PLL_USE_REF_DIV; 666 pll->flags |= RADEON_PLL_USE_REF_DIV;
666 pll->reference_div = args.v3.sOutput.ucRefDiv; 667 pll->reference_div = args.v3.sOutput.ucRefDiv;
667 } 668 }
668 if (args.v3.sOutput.ucPostDiv) { 669 if (args.v3.sOutput.ucPostDiv) {
670 pll->flags |= RADEON_PLL_USE_FRAC_FB_DIV;
669 pll->flags |= RADEON_PLL_USE_POST_DIV; 671 pll->flags |= RADEON_PLL_USE_POST_DIV;
670 pll->post_div = args.v3.sOutput.ucPostDiv; 672 pll->post_div = args.v3.sOutput.ucPostDiv;
671 } 673 }
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index d270b3ff896b..6140ea1de45a 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2194,7 +2194,6 @@ int evergreen_mc_init(struct radeon_device *rdev)
2194 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; 2194 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2195 } 2195 }
2196 rdev->mc.visible_vram_size = rdev->mc.aper_size; 2196 rdev->mc.visible_vram_size = rdev->mc.aper_size;
2197 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
2198 r700_vram_gtt_location(rdev, &rdev->mc); 2197 r700_vram_gtt_location(rdev, &rdev->mc);
2199 radeon_update_bandwidth_info(rdev); 2198 radeon_update_bandwidth_info(rdev);
2200 2199
@@ -2934,7 +2933,7 @@ static int evergreen_startup(struct radeon_device *rdev)
2934 /* XXX: ontario has problems blitting to gart at the moment */ 2933 /* XXX: ontario has problems blitting to gart at the moment */
2935 if (rdev->family == CHIP_PALM) { 2934 if (rdev->family == CHIP_PALM) {
2936 rdev->asic->copy = NULL; 2935 rdev->asic->copy = NULL;
2937 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 2936 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2938 } 2937 }
2939 2938
2940 /* allocate wb buffer */ 2939 /* allocate wb buffer */
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2adfb03f479b..2be698e78ff2 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -623,7 +623,7 @@ done:
623 dev_err(rdev->dev, "(%d) pin blit object failed\n", r); 623 dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
624 return r; 624 return r;
625 } 625 }
626 rdev->mc.active_vram_size = rdev->mc.real_vram_size; 626 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
627 return 0; 627 return 0;
628} 628}
629 629
@@ -631,7 +631,7 @@ void evergreen_blit_fini(struct radeon_device *rdev)
631{ 631{
632 int r; 632 int r;
633 633
634 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 634 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
635 if (rdev->r600_blit.shader_obj == NULL) 635 if (rdev->r600_blit.shader_obj == NULL)
636 return; 636 return;
637 /* If we can't reserve the bo, unref should be enough to destroy 637 /* If we can't reserve the bo, unref should be enough to destroy
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 56deae5bf02e..e372f9e1e5ce 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -70,23 +70,6 @@ MODULE_FIRMWARE(FIRMWARE_R520);
70 70
71void r100_pre_page_flip(struct radeon_device *rdev, int crtc) 71void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
72{ 72{
73 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc];
74 u32 tmp;
75
76 /* make sure flip is at vb rather than hb */
77 tmp = RREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset);
78 tmp &= ~RADEON_CRTC_OFFSET_FLIP_CNTL;
79 /* make sure pending bit is asserted */
80 tmp |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN;
81 WREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset, tmp);
82
83 /* set pageflip to happen as late as possible in the vblank interval.
84 * same field for crtc1/2
85 */
86 tmp = RREG32(RADEON_CRTC_GEN_CNTL);
87 tmp &= ~RADEON_CRTC_VSTAT_MODE_MASK;
88 WREG32(RADEON_CRTC_GEN_CNTL, tmp);
89
90 /* enable the pflip int */ 73 /* enable the pflip int */
91 radeon_irq_kms_pflip_irq_get(rdev, crtc); 74 radeon_irq_kms_pflip_irq_get(rdev, crtc);
92} 75}
@@ -1041,7 +1024,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1041 return r; 1024 return r;
1042 } 1025 }
1043 rdev->cp.ready = true; 1026 rdev->cp.ready = true;
1044 rdev->mc.active_vram_size = rdev->mc.real_vram_size; 1027 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1045 return 0; 1028 return 0;
1046} 1029}
1047 1030
@@ -1059,7 +1042,7 @@ void r100_cp_fini(struct radeon_device *rdev)
1059void r100_cp_disable(struct radeon_device *rdev) 1042void r100_cp_disable(struct radeon_device *rdev)
1060{ 1043{
1061 /* Disable ring */ 1044 /* Disable ring */
1062 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 1045 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1063 rdev->cp.ready = false; 1046 rdev->cp.ready = false;
1064 WREG32(RADEON_CP_CSQ_MODE, 0); 1047 WREG32(RADEON_CP_CSQ_MODE, 0);
1065 WREG32(RADEON_CP_CSQ_CNTL, 0); 1048 WREG32(RADEON_CP_CSQ_CNTL, 0);
@@ -2329,7 +2312,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
2329 /* FIXME we don't use the second aperture yet when we could use it */ 2312 /* FIXME we don't use the second aperture yet when we could use it */
2330 if (rdev->mc.visible_vram_size > rdev->mc.aper_size) 2313 if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2331 rdev->mc.visible_vram_size = rdev->mc.aper_size; 2314 rdev->mc.visible_vram_size = rdev->mc.aper_size;
2332 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
2333 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 2315 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2334 if (rdev->flags & RADEON_IS_IGP) { 2316 if (rdev->flags & RADEON_IS_IGP) {
2335 uint32_t tom; 2317 uint32_t tom;
@@ -3490,7 +3472,7 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track
3490 track->num_texture = 16; 3472 track->num_texture = 16;
3491 track->maxy = 4096; 3473 track->maxy = 4096;
3492 track->separate_cube = 0; 3474 track->separate_cube = 0;
3493 track->aaresolve = true; 3475 track->aaresolve = false;
3494 track->aa.robj = NULL; 3476 track->aa.robj = NULL;
3495 } 3477 }
3496 3478
@@ -3801,8 +3783,6 @@ static int r100_startup(struct radeon_device *rdev)
3801 r100_mc_program(rdev); 3783 r100_mc_program(rdev);
3802 /* Resume clock */ 3784 /* Resume clock */
3803 r100_clock_startup(rdev); 3785 r100_clock_startup(rdev);
3804 /* Initialize GPU configuration (# pipes, ...) */
3805// r100_gpu_init(rdev);
3806 /* Initialize GART (initialize after TTM so we can allocate 3786 /* Initialize GART (initialize after TTM so we can allocate
3807 * memory through TTM but finalize after TTM) */ 3787 * memory through TTM but finalize after TTM) */
3808 r100_enable_bm(rdev); 3788 r100_enable_bm(rdev);
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 768c60ee4ab6..069efa8c8ecf 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -910,6 +910,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
910 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 910 track->textures[i].compress_format = R100_TRACK_COMP_NONE;
911 break; 911 break;
912 case R300_TX_FORMAT_X16: 912 case R300_TX_FORMAT_X16:
913 case R300_TX_FORMAT_FL_I16:
913 case R300_TX_FORMAT_Y8X8: 914 case R300_TX_FORMAT_Y8X8:
914 case R300_TX_FORMAT_Z5Y6X5: 915 case R300_TX_FORMAT_Z5Y6X5:
915 case R300_TX_FORMAT_Z6Y5X5: 916 case R300_TX_FORMAT_Z6Y5X5:
@@ -922,6 +923,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
922 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 923 track->textures[i].compress_format = R100_TRACK_COMP_NONE;
923 break; 924 break;
924 case R300_TX_FORMAT_Y16X16: 925 case R300_TX_FORMAT_Y16X16:
926 case R300_TX_FORMAT_FL_I16A16:
925 case R300_TX_FORMAT_Z11Y11X10: 927 case R300_TX_FORMAT_Z11Y11X10:
926 case R300_TX_FORMAT_Z10Y11X11: 928 case R300_TX_FORMAT_Z10Y11X11:
927 case R300_TX_FORMAT_W8Z8Y8X8: 929 case R300_TX_FORMAT_W8Z8Y8X8:
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index de88624d5f87..9b3fad23b76c 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1255,7 +1255,6 @@ int r600_mc_init(struct radeon_device *rdev)
1255 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); 1255 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
1256 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); 1256 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
1257 rdev->mc.visible_vram_size = rdev->mc.aper_size; 1257 rdev->mc.visible_vram_size = rdev->mc.aper_size;
1258 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
1259 r600_vram_gtt_location(rdev, &rdev->mc); 1258 r600_vram_gtt_location(rdev, &rdev->mc);
1260 1259
1261 if (rdev->flags & RADEON_IS_IGP) { 1260 if (rdev->flags & RADEON_IS_IGP) {
@@ -1937,7 +1936,7 @@ void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
1937 */ 1936 */
1938void r600_cp_stop(struct radeon_device *rdev) 1937void r600_cp_stop(struct radeon_device *rdev)
1939{ 1938{
1940 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 1939 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1941 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); 1940 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
1942 WREG32(SCRATCH_UMSK, 0); 1941 WREG32(SCRATCH_UMSK, 0);
1943} 1942}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 41f7aafc97c4..df68d91e8190 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -558,7 +558,7 @@ done:
558 dev_err(rdev->dev, "(%d) pin blit object failed\n", r); 558 dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
559 return r; 559 return r;
560 } 560 }
561 rdev->mc.active_vram_size = rdev->mc.real_vram_size; 561 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
562 return 0; 562 return 0;
563} 563}
564 564
@@ -566,7 +566,7 @@ void r600_blit_fini(struct radeon_device *rdev)
566{ 566{
567 int r; 567 int r;
568 568
569 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 569 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
570 if (rdev->r600_blit.shader_obj == NULL) 570 if (rdev->r600_blit.shader_obj == NULL)
571 return; 571 return;
572 /* If we can't reserve the bo, unref should be enough to destroy 572 /* If we can't reserve the bo, unref should be enough to destroy
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 56c48b67ef3d..6b3429495118 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -345,7 +345,6 @@ struct radeon_mc {
345 * about vram size near mc fb location */ 345 * about vram size near mc fb location */
346 u64 mc_vram_size; 346 u64 mc_vram_size;
347 u64 visible_vram_size; 347 u64 visible_vram_size;
348 u64 active_vram_size;
349 u64 gtt_size; 348 u64 gtt_size;
350 u64 gtt_start; 349 u64 gtt_start;
351 u64 gtt_end; 350 u64 gtt_end;
@@ -1448,6 +1447,7 @@ extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *m
1448extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); 1447extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
1449extern int radeon_resume_kms(struct drm_device *dev); 1448extern int radeon_resume_kms(struct drm_device *dev);
1450extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state); 1449extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
1450extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size);
1451 1451
1452/* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */ 1452/* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */
1453extern bool r600_card_posted(struct radeon_device *rdev); 1453extern bool r600_card_posted(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index e75d63b8e21d..793c5e6026ad 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -834,6 +834,9 @@ static struct radeon_asic sumo_asic = {
834 .pm_finish = &evergreen_pm_finish, 834 .pm_finish = &evergreen_pm_finish,
835 .pm_init_profile = &rs780_pm_init_profile, 835 .pm_init_profile = &rs780_pm_init_profile,
836 .pm_get_dynpm_state = &r600_pm_get_dynpm_state, 836 .pm_get_dynpm_state = &r600_pm_get_dynpm_state,
837 .pre_page_flip = &evergreen_pre_page_flip,
838 .page_flip = &evergreen_page_flip,
839 .post_page_flip = &evergreen_post_page_flip,
837}; 840};
838 841
839static struct radeon_asic btc_asic = { 842static struct radeon_asic btc_asic = {
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 0e657095de7c..3e7e7f9eb781 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -971,7 +971,7 @@ void radeon_compute_pll_legacy(struct radeon_pll *pll,
971 max_fractional_feed_div = pll->max_frac_feedback_div; 971 max_fractional_feed_div = pll->max_frac_feedback_div;
972 } 972 }
973 973
974 for (post_div = min_post_div; post_div <= max_post_div; ++post_div) { 974 for (post_div = max_post_div; post_div >= min_post_div; --post_div) {
975 uint32_t ref_div; 975 uint32_t ref_div;
976 976
977 if ((pll->flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1)) 977 if ((pll->flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1))
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 66324b5bb5ba..cc44bdfec80f 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -113,11 +113,14 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev,
113 u32 tiling_flags = 0; 113 u32 tiling_flags = 0;
114 int ret; 114 int ret;
115 int aligned_size, size; 115 int aligned_size, size;
116 int height = mode_cmd->height;
116 117
117 /* need to align pitch with crtc limits */ 118 /* need to align pitch with crtc limits */
118 mode_cmd->pitch = radeon_align_pitch(rdev, mode_cmd->width, mode_cmd->bpp, fb_tiled) * ((mode_cmd->bpp + 1) / 8); 119 mode_cmd->pitch = radeon_align_pitch(rdev, mode_cmd->width, mode_cmd->bpp, fb_tiled) * ((mode_cmd->bpp + 1) / 8);
119 120
120 size = mode_cmd->pitch * mode_cmd->height; 121 if (rdev->family >= CHIP_R600)
122 height = ALIGN(mode_cmd->height, 8);
123 size = mode_cmd->pitch * height;
121 aligned_size = ALIGN(size, PAGE_SIZE); 124 aligned_size = ALIGN(size, PAGE_SIZE);
122 ret = radeon_gem_object_create(rdev, aligned_size, 0, 125 ret = radeon_gem_object_create(rdev, aligned_size, 0,
123 RADEON_GEM_DOMAIN_VRAM, 126 RADEON_GEM_DOMAIN_VRAM,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index df95eb83dac6..1fe95dfe48c9 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -156,9 +156,12 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
156{ 156{
157 struct radeon_device *rdev = dev->dev_private; 157 struct radeon_device *rdev = dev->dev_private;
158 struct drm_radeon_gem_info *args = data; 158 struct drm_radeon_gem_info *args = data;
159 struct ttm_mem_type_manager *man;
160
161 man = &rdev->mman.bdev.man[TTM_PL_VRAM];
159 162
160 args->vram_size = rdev->mc.real_vram_size; 163 args->vram_size = rdev->mc.real_vram_size;
161 args->vram_visible = rdev->mc.real_vram_size; 164 args->vram_visible = (u64)man->size << PAGE_SHIFT;
162 if (rdev->stollen_vga_memory) 165 if (rdev->stollen_vga_memory)
163 args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory); 166 args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory);
164 args->vram_visible -= radeon_fbdev_total_size(rdev); 167 args->vram_visible -= radeon_fbdev_total_size(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index cf0638c3b7c7..78968b738e88 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -443,7 +443,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
443 (target_fb->bits_per_pixel * 8)); 443 (target_fb->bits_per_pixel * 8));
444 crtc_pitch |= crtc_pitch << 16; 444 crtc_pitch |= crtc_pitch << 16;
445 445
446 446 crtc_offset_cntl |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN;
447 if (tiling_flags & RADEON_TILING_MACRO) { 447 if (tiling_flags & RADEON_TILING_MACRO) {
448 if (ASIC_IS_R300(rdev)) 448 if (ASIC_IS_R300(rdev))
449 crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN | 449 crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN |
@@ -502,6 +502,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
502 gen_cntl_val = RREG32(gen_cntl_reg); 502 gen_cntl_val = RREG32(gen_cntl_reg);
503 gen_cntl_val &= ~(0xf << 8); 503 gen_cntl_val &= ~(0xf << 8);
504 gen_cntl_val |= (format << 8); 504 gen_cntl_val |= (format << 8);
505 gen_cntl_val &= ~RADEON_CRTC_VSTAT_MODE_MASK;
505 WREG32(gen_cntl_reg, gen_cntl_val); 506 WREG32(gen_cntl_reg, gen_cntl_val);
506 507
507 crtc_offset = (u32)base; 508 crtc_offset = (u32)base;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e5b2cf10cbf4..8389b4c63d12 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -589,6 +589,20 @@ void radeon_ttm_fini(struct radeon_device *rdev)
589 DRM_INFO("radeon: ttm finalized\n"); 589 DRM_INFO("radeon: ttm finalized\n");
590} 590}
591 591
592/* this should only be called at bootup or when userspace
593 * isn't running */
594void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size)
595{
596 struct ttm_mem_type_manager *man;
597
598 if (!rdev->mman.initialized)
599 return;
600
601 man = &rdev->mman.bdev.man[TTM_PL_VRAM];
602 /* this just adjusts TTM size idea, which sets lpfn to the correct value */
603 man->size = size >> PAGE_SHIFT;
604}
605
592static struct vm_operations_struct radeon_ttm_vm_ops; 606static struct vm_operations_struct radeon_ttm_vm_ops;
593static const struct vm_operations_struct *ttm_vm_ops = NULL; 607static const struct vm_operations_struct *ttm_vm_ops = NULL;
594 608
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 5afe294ed51f..8af4679db23e 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -751,7 +751,6 @@ void rs600_mc_init(struct radeon_device *rdev)
751 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); 751 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
752 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 752 rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
753 rdev->mc.visible_vram_size = rdev->mc.aper_size; 753 rdev->mc.visible_vram_size = rdev->mc.aper_size;
754 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
755 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev); 754 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
756 base = RREG32_MC(R_000004_MC_FB_LOCATION); 755 base = RREG32_MC(R_000004_MC_FB_LOCATION);
757 base = G_000004_MC_FB_START(base) << 16; 756 base = G_000004_MC_FB_START(base) << 16;
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 6638c8e4c81b..66c949b7c18c 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -157,7 +157,6 @@ void rs690_mc_init(struct radeon_device *rdev)
157 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 157 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
158 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 158 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
159 rdev->mc.visible_vram_size = rdev->mc.aper_size; 159 rdev->mc.visible_vram_size = rdev->mc.aper_size;
160 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
161 base = RREG32_MC(R_000100_MCCFG_FB_LOCATION); 160 base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
162 base = G_000100_MC_FB_START(base) << 16; 161 base = G_000100_MC_FB_START(base) << 16;
163 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev); 162 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index d8ba67690656..714ad45757d0 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -307,7 +307,7 @@ static void rv770_mc_program(struct radeon_device *rdev)
307 */ 307 */
308void r700_cp_stop(struct radeon_device *rdev) 308void r700_cp_stop(struct radeon_device *rdev)
309{ 309{
310 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 310 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
311 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 311 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
312 WREG32(SCRATCH_UMSK, 0); 312 WREG32(SCRATCH_UMSK, 0);
313} 313}
@@ -1123,7 +1123,6 @@ int rv770_mc_init(struct radeon_device *rdev)
1123 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); 1123 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
1124 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); 1124 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
1125 rdev->mc.visible_vram_size = rdev->mc.aper_size; 1125 rdev->mc.visible_vram_size = rdev->mc.aper_size;
1126 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
1127 r700_vram_gtt_location(rdev, &rdev->mc); 1126 r700_vram_gtt_location(rdev, &rdev->mc);
1128 radeon_update_bandwidth_info(rdev); 1127 radeon_update_bandwidth_info(rdev);
1129 1128
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 773e484f1646..297bc9a7d6e6 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -238,13 +238,13 @@ config SENSORS_K8TEMP
238 will be called k8temp. 238 will be called k8temp.
239 239
240config SENSORS_K10TEMP 240config SENSORS_K10TEMP
241 tristate "AMD Phenom/Sempron/Turion/Opteron temperature sensor" 241 tristate "AMD Family 10h/11h/12h/14h temperature sensor"
242 depends on X86 && PCI 242 depends on X86 && PCI
243 help 243 help
244 If you say yes here you get support for the temperature 244 If you say yes here you get support for the temperature
245 sensor(s) inside your CPU. Supported are later revisions of 245 sensor(s) inside your CPU. Supported are later revisions of
246 the AMD Family 10h and all revisions of the AMD Family 11h 246 the AMD Family 10h and all revisions of the AMD Family 11h,
247 microarchitectures. 247 12h (Llano), and 14h (Brazos) microarchitectures.
248 248
249 This driver can also be built as a module. If so, the module 249 This driver can also be built as a module. If so, the module
250 will be called k10temp. 250 will be called k10temp.
@@ -455,13 +455,14 @@ config SENSORS_JZ4740
455 called jz4740-hwmon. 455 called jz4740-hwmon.
456 456
457config SENSORS_JC42 457config SENSORS_JC42
458 tristate "JEDEC JC42.4 compliant temperature sensors" 458 tristate "JEDEC JC42.4 compliant memory module temperature sensors"
459 depends on I2C 459 depends on I2C
460 help 460 help
461 If you say yes here you get support for Jedec JC42.4 compliant 461 If you say yes here, you get support for JEDEC JC42.4 compliant
462 temperature sensors. Support will include, but not be limited to, 462 temperature sensors, which are used on many DDR3 memory modules for
463 ADT7408, CAT34TS02,, CAT6095, MAX6604, MCP9805, MCP98242, MCP98243, 463 mobile devices and servers. Support will include, but not be limited
464 MCP9843, SE97, SE98, STTS424, TSE2002B3, and TS3000B3. 464 to, ADT7408, CAT34TS02, CAT6095, MAX6604, MCP9805, MCP98242, MCP98243,
465 MCP9843, SE97, SE98, STTS424(E), TSE2002B3, and TS3000B3.
465 466
466 This driver can also be built as a module. If so, the module 467 This driver can also be built as a module. If so, the module
467 will be called jc42. 468 will be called jc42.
@@ -574,7 +575,7 @@ config SENSORS_LM85
574 help 575 help
575 If you say yes here you get support for National Semiconductor LM85 576 If you say yes here you get support for National Semiconductor LM85
576 sensor chips and clones: ADM1027, ADT7463, ADT7468, EMC6D100, 577 sensor chips and clones: ADM1027, ADT7463, ADT7468, EMC6D100,
577 EMC6D101 and EMC6D102. 578 EMC6D101, EMC6D102, and EMC6D103.
578 579
579 This driver can also be built as a module. If so, the module 580 This driver can also be built as a module. If so, the module
580 will be called lm85. 581 will be called lm85.
diff --git a/drivers/hwmon/ad7414.c b/drivers/hwmon/ad7414.c
index 86d822aa9bbf..d46c0c758ddf 100644
--- a/drivers/hwmon/ad7414.c
+++ b/drivers/hwmon/ad7414.c
@@ -242,6 +242,7 @@ static const struct i2c_device_id ad7414_id[] = {
242 { "ad7414", 0 }, 242 { "ad7414", 0 },
243 {} 243 {}
244}; 244};
245MODULE_DEVICE_TABLE(i2c, ad7414_id);
245 246
246static struct i2c_driver ad7414_driver = { 247static struct i2c_driver ad7414_driver = {
247 .driver = { 248 .driver = {
diff --git a/drivers/hwmon/adt7411.c b/drivers/hwmon/adt7411.c
index f13c843a2964..5cc3e3784b42 100644
--- a/drivers/hwmon/adt7411.c
+++ b/drivers/hwmon/adt7411.c
@@ -334,6 +334,7 @@ static const struct i2c_device_id adt7411_id[] = {
334 { "adt7411", 0 }, 334 { "adt7411", 0 },
335 { } 335 { }
336}; 336};
337MODULE_DEVICE_TABLE(i2c, adt7411_id);
337 338
338static struct i2c_driver adt7411_driver = { 339static struct i2c_driver adt7411_driver = {
339 .driver = { 340 .driver = {
diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
index 3f49dd376f02..6e06019015a5 100644
--- a/drivers/hwmon/f71882fg.c
+++ b/drivers/hwmon/f71882fg.c
@@ -37,7 +37,7 @@
37#define SIO_F71858FG_LD_HWM 0x02 /* Hardware monitor logical device */ 37#define SIO_F71858FG_LD_HWM 0x02 /* Hardware monitor logical device */
38#define SIO_F71882FG_LD_HWM 0x04 /* Hardware monitor logical device */ 38#define SIO_F71882FG_LD_HWM 0x04 /* Hardware monitor logical device */
39#define SIO_UNLOCK_KEY 0x87 /* Key to enable Super-I/O */ 39#define SIO_UNLOCK_KEY 0x87 /* Key to enable Super-I/O */
40#define SIO_LOCK_KEY 0xAA /* Key to diasble Super-I/O */ 40#define SIO_LOCK_KEY 0xAA /* Key to disable Super-I/O */
41 41
42#define SIO_REG_LDSEL 0x07 /* Logical device select */ 42#define SIO_REG_LDSEL 0x07 /* Logical device select */
43#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */ 43#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */
@@ -2111,7 +2111,6 @@ static int f71882fg_remove(struct platform_device *pdev)
2111 int nr_fans = (data->type == f71882fg) ? 4 : 3; 2111 int nr_fans = (data->type == f71882fg) ? 4 : 3;
2112 u8 start_reg = f71882fg_read8(data, F71882FG_REG_START); 2112 u8 start_reg = f71882fg_read8(data, F71882FG_REG_START);
2113 2113
2114 platform_set_drvdata(pdev, NULL);
2115 if (data->hwmon_dev) 2114 if (data->hwmon_dev)
2116 hwmon_device_unregister(data->hwmon_dev); 2115 hwmon_device_unregister(data->hwmon_dev);
2117 2116
@@ -2178,6 +2177,7 @@ static int f71882fg_remove(struct platform_device *pdev)
2178 } 2177 }
2179 } 2178 }
2180 2179
2180 platform_set_drvdata(pdev, NULL);
2181 kfree(data); 2181 kfree(data);
2182 2182
2183 return 0; 2183 return 0;
diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c
index 340fc78c8dde..934991237061 100644
--- a/drivers/hwmon/jc42.c
+++ b/drivers/hwmon/jc42.c
@@ -53,6 +53,8 @@ static const unsigned short normal_i2c[] = {
53 53
54/* Configuration register defines */ 54/* Configuration register defines */
55#define JC42_CFG_CRIT_ONLY (1 << 2) 55#define JC42_CFG_CRIT_ONLY (1 << 2)
56#define JC42_CFG_TCRIT_LOCK (1 << 6)
57#define JC42_CFG_EVENT_LOCK (1 << 7)
56#define JC42_CFG_SHUTDOWN (1 << 8) 58#define JC42_CFG_SHUTDOWN (1 << 8)
57#define JC42_CFG_HYST_SHIFT 9 59#define JC42_CFG_HYST_SHIFT 9
58#define JC42_CFG_HYST_MASK 0x03 60#define JC42_CFG_HYST_MASK 0x03
@@ -332,7 +334,7 @@ static ssize_t set_temp_crit_hyst(struct device *dev,
332{ 334{
333 struct i2c_client *client = to_i2c_client(dev); 335 struct i2c_client *client = to_i2c_client(dev);
334 struct jc42_data *data = i2c_get_clientdata(client); 336 struct jc42_data *data = i2c_get_clientdata(client);
335 long val; 337 unsigned long val;
336 int diff, hyst; 338 int diff, hyst;
337 int err; 339 int err;
338 int ret = count; 340 int ret = count;
@@ -380,14 +382,14 @@ static ssize_t show_alarm(struct device *dev,
380 382
381static DEVICE_ATTR(temp1_input, S_IRUGO, 383static DEVICE_ATTR(temp1_input, S_IRUGO,
382 show_temp_input, NULL); 384 show_temp_input, NULL);
383static DEVICE_ATTR(temp1_crit, S_IWUSR | S_IRUGO, 385static DEVICE_ATTR(temp1_crit, S_IRUGO,
384 show_temp_crit, set_temp_crit); 386 show_temp_crit, set_temp_crit);
385static DEVICE_ATTR(temp1_min, S_IWUSR | S_IRUGO, 387static DEVICE_ATTR(temp1_min, S_IRUGO,
386 show_temp_min, set_temp_min); 388 show_temp_min, set_temp_min);
387static DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, 389static DEVICE_ATTR(temp1_max, S_IRUGO,
388 show_temp_max, set_temp_max); 390 show_temp_max, set_temp_max);
389 391
390static DEVICE_ATTR(temp1_crit_hyst, S_IWUSR | S_IRUGO, 392static DEVICE_ATTR(temp1_crit_hyst, S_IRUGO,
391 show_temp_crit_hyst, set_temp_crit_hyst); 393 show_temp_crit_hyst, set_temp_crit_hyst);
392static DEVICE_ATTR(temp1_max_hyst, S_IRUGO, 394static DEVICE_ATTR(temp1_max_hyst, S_IRUGO,
393 show_temp_max_hyst, NULL); 395 show_temp_max_hyst, NULL);
@@ -412,8 +414,31 @@ static struct attribute *jc42_attributes[] = {
412 NULL 414 NULL
413}; 415};
414 416
417static mode_t jc42_attribute_mode(struct kobject *kobj,
418 struct attribute *attr, int index)
419{
420 struct device *dev = container_of(kobj, struct device, kobj);
421 struct i2c_client *client = to_i2c_client(dev);
422 struct jc42_data *data = i2c_get_clientdata(client);
423 unsigned int config = data->config;
424 bool readonly;
425
426 if (attr == &dev_attr_temp1_crit.attr)
427 readonly = config & JC42_CFG_TCRIT_LOCK;
428 else if (attr == &dev_attr_temp1_min.attr ||
429 attr == &dev_attr_temp1_max.attr)
430 readonly = config & JC42_CFG_EVENT_LOCK;
431 else if (attr == &dev_attr_temp1_crit_hyst.attr)
432 readonly = config & (JC42_CFG_EVENT_LOCK | JC42_CFG_TCRIT_LOCK);
433 else
434 readonly = true;
435
436 return S_IRUGO | (readonly ? 0 : S_IWUSR);
437}
438
415static const struct attribute_group jc42_group = { 439static const struct attribute_group jc42_group = {
416 .attrs = jc42_attributes, 440 .attrs = jc42_attributes,
441 .is_visible = jc42_attribute_mode,
417}; 442};
418 443
419/* Return 0 if detection is successful, -ENODEV otherwise */ 444/* Return 0 if detection is successful, -ENODEV otherwise */
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index da5a2404cd3e..82bf65aa2968 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * k10temp.c - AMD Family 10h/11h processor hardware monitoring 2 * k10temp.c - AMD Family 10h/11h/12h/14h processor hardware monitoring
3 * 3 *
4 * Copyright (c) 2009 Clemens Ladisch <clemens@ladisch.de> 4 * Copyright (c) 2009 Clemens Ladisch <clemens@ladisch.de>
5 * 5 *
@@ -25,7 +25,7 @@
25#include <linux/pci.h> 25#include <linux/pci.h>
26#include <asm/processor.h> 26#include <asm/processor.h>
27 27
28MODULE_DESCRIPTION("AMD Family 10h/11h CPU core temperature monitor"); 28MODULE_DESCRIPTION("AMD Family 10h/11h/12h/14h CPU core temperature monitor");
29MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>"); 29MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
30MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
31 31
@@ -208,6 +208,7 @@ static void __devexit k10temp_remove(struct pci_dev *pdev)
208static const struct pci_device_id k10temp_id_table[] = { 208static const struct pci_device_id k10temp_id_table[] = {
209 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 209 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
210 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, 210 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) },
211 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
211 {} 212 {}
212}; 213};
213MODULE_DEVICE_TABLE(pci, k10temp_id_table); 214MODULE_DEVICE_TABLE(pci, k10temp_id_table);
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 1e229847f37a..d2cc28660816 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -41,7 +41,7 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
41enum chips { 41enum chips {
42 any_chip, lm85b, lm85c, 42 any_chip, lm85b, lm85c,
43 adm1027, adt7463, adt7468, 43 adm1027, adt7463, adt7468,
44 emc6d100, emc6d102 44 emc6d100, emc6d102, emc6d103
45}; 45};
46 46
47/* The LM85 registers */ 47/* The LM85 registers */
@@ -90,6 +90,9 @@ enum chips {
90#define LM85_VERSTEP_EMC6D100_A0 0x60 90#define LM85_VERSTEP_EMC6D100_A0 0x60
91#define LM85_VERSTEP_EMC6D100_A1 0x61 91#define LM85_VERSTEP_EMC6D100_A1 0x61
92#define LM85_VERSTEP_EMC6D102 0x65 92#define LM85_VERSTEP_EMC6D102 0x65
93#define LM85_VERSTEP_EMC6D103_A0 0x68
94#define LM85_VERSTEP_EMC6D103_A1 0x69
95#define LM85_VERSTEP_EMC6D103S 0x6A /* Also known as EMC6D103:A2 */
93 96
94#define LM85_REG_CONFIG 0x40 97#define LM85_REG_CONFIG 0x40
95 98
@@ -348,6 +351,7 @@ static const struct i2c_device_id lm85_id[] = {
348 { "emc6d100", emc6d100 }, 351 { "emc6d100", emc6d100 },
349 { "emc6d101", emc6d100 }, 352 { "emc6d101", emc6d100 },
350 { "emc6d102", emc6d102 }, 353 { "emc6d102", emc6d102 },
354 { "emc6d103", emc6d103 },
351 { } 355 { }
352}; 356};
353MODULE_DEVICE_TABLE(i2c, lm85_id); 357MODULE_DEVICE_TABLE(i2c, lm85_id);
@@ -1250,6 +1254,20 @@ static int lm85_detect(struct i2c_client *client, struct i2c_board_info *info)
1250 case LM85_VERSTEP_EMC6D102: 1254 case LM85_VERSTEP_EMC6D102:
1251 type_name = "emc6d102"; 1255 type_name = "emc6d102";
1252 break; 1256 break;
1257 case LM85_VERSTEP_EMC6D103_A0:
1258 case LM85_VERSTEP_EMC6D103_A1:
1259 type_name = "emc6d103";
1260 break;
1261 /*
1262 * Registers apparently missing in EMC6D103S/EMC6D103:A2
1263 * compared to EMC6D103:A0, EMC6D103:A1, and EMC6D102
1264 * (according to the data sheets), but used unconditionally
1265 * in the driver: 62[5:7], 6D[0:7], and 6E[0:7].
1266 * So skip EMC6D103S for now.
1267 case LM85_VERSTEP_EMC6D103S:
1268 type_name = "emc6d103s";
1269 break;
1270 */
1253 } 1271 }
1254 } else { 1272 } else {
1255 dev_dbg(&adapter->dev, 1273 dev_dbg(&adapter->dev,
@@ -1283,6 +1301,7 @@ static int lm85_probe(struct i2c_client *client,
1283 case adt7468: 1301 case adt7468:
1284 case emc6d100: 1302 case emc6d100:
1285 case emc6d102: 1303 case emc6d102:
1304 case emc6d103:
1286 data->freq_map = adm1027_freq_map; 1305 data->freq_map = adm1027_freq_map;
1287 break; 1306 break;
1288 default: 1307 default:
@@ -1468,7 +1487,7 @@ static struct lm85_data *lm85_update_device(struct device *dev)
1468 /* More alarm bits */ 1487 /* More alarm bits */
1469 data->alarms |= lm85_read_value(client, 1488 data->alarms |= lm85_read_value(client,
1470 EMC6D100_REG_ALARM3) << 16; 1489 EMC6D100_REG_ALARM3) << 16;
1471 } else if (data->type == emc6d102) { 1490 } else if (data->type == emc6d102 || data->type == emc6d103) {
1472 /* Have to read LSB bits after the MSB ones because 1491 /* Have to read LSB bits after the MSB ones because
1473 the reading of the MSB bits has frozen the 1492 the reading of the MSB bits has frozen the
1474 LSBs (backward from the ADM1027). 1493 LSBs (backward from the ADM1027).
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 2e067dd2ee51..50ea1f43bdc1 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -29,6 +29,7 @@
29#include <linux/pci.h> 29#include <linux/pci.h>
30#include <linux/mutex.h> 30#include <linux/mutex.h>
31#include <linux/ktime.h> 31#include <linux/ktime.h>
32#include <linux/slab.h>
32 33
33#define PCH_EVENT_SET 0 /* I2C Interrupt Event Set Status */ 34#define PCH_EVENT_SET 0 /* I2C Interrupt Event Set Status */
34#define PCH_EVENT_NONE 1 /* I2C Interrupt Event Clear Status */ 35#define PCH_EVENT_NONE 1 /* I2C Interrupt Event Clear Status */
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index ef3bcb1ce864..1b46a9d9f907 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -249,7 +249,7 @@ static struct i2c_adapter ocores_adapter = {
249static int ocores_i2c_of_probe(struct platform_device* pdev, 249static int ocores_i2c_of_probe(struct platform_device* pdev,
250 struct ocores_i2c* i2c) 250 struct ocores_i2c* i2c)
251{ 251{
252 __be32* val; 252 const __be32* val;
253 253
254 val = of_get_property(pdev->dev.of_node, "regstep", NULL); 254 val = of_get_property(pdev->dev.of_node, "regstep", NULL);
255 if (!val) { 255 if (!val) {
@@ -330,9 +330,7 @@ static int __devinit ocores_i2c_probe(struct platform_device *pdev)
330 i2c->adap = ocores_adapter; 330 i2c->adap = ocores_adapter;
331 i2c_set_adapdata(&i2c->adap, i2c); 331 i2c_set_adapdata(&i2c->adap, i2c);
332 i2c->adap.dev.parent = &pdev->dev; 332 i2c->adap.dev.parent = &pdev->dev;
333#ifdef CONFIG_OF
334 i2c->adap.dev.of_node = pdev->dev.of_node; 333 i2c->adap.dev.of_node = pdev->dev.of_node;
335#endif
336 334
337 /* add i2c adapter to i2c tree */ 335 /* add i2c adapter to i2c tree */
338 ret = i2c_add_adapter(&i2c->adap); 336 ret = i2c_add_adapter(&i2c->adap);
@@ -390,15 +388,11 @@ static int ocores_i2c_resume(struct platform_device *pdev)
390#define ocores_i2c_resume NULL 388#define ocores_i2c_resume NULL
391#endif 389#endif
392 390
393#ifdef CONFIG_OF
394static struct of_device_id ocores_i2c_match[] = { 391static struct of_device_id ocores_i2c_match[] = {
395 { 392 { .compatible = "opencores,i2c-ocores", },
396 .compatible = "opencores,i2c-ocores", 393 {},
397 },
398 {},
399}; 394};
400MODULE_DEVICE_TABLE(of, ocores_i2c_match); 395MODULE_DEVICE_TABLE(of, ocores_i2c_match);
401#endif
402 396
403/* work with hotplug and coldplug */ 397/* work with hotplug and coldplug */
404MODULE_ALIAS("platform:ocores-i2c"); 398MODULE_ALIAS("platform:ocores-i2c");
@@ -411,9 +405,7 @@ static struct platform_driver ocores_i2c_driver = {
411 .driver = { 405 .driver = {
412 .owner = THIS_MODULE, 406 .owner = THIS_MODULE,
413 .name = "ocores-i2c", 407 .name = "ocores-i2c",
414#ifdef CONFIG_OF 408 .of_match_table = ocores_i2c_match,
415 .of_match_table = ocores_i2c_match,
416#endif
417 }, 409 },
418}; 410};
419 411
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index b605ff3a1fa0..58a58c7eaa17 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -378,9 +378,7 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
378 * REVISIT: Some wkup sources might not be needed. 378 * REVISIT: Some wkup sources might not be needed.
379 */ 379 */
380 dev->westate = OMAP_I2C_WE_ALL; 380 dev->westate = OMAP_I2C_WE_ALL;
381 if (dev->rev < OMAP_I2C_REV_ON_4430) 381 omap_i2c_write_reg(dev, OMAP_I2C_WE_REG, dev->westate);
382 omap_i2c_write_reg(dev, OMAP_I2C_WE_REG,
383 dev->westate);
384 } 382 }
385 } 383 }
386 omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0); 384 omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
@@ -847,11 +845,15 @@ complete:
847 dev_err(dev->dev, "Arbitration lost\n"); 845 dev_err(dev->dev, "Arbitration lost\n");
848 err |= OMAP_I2C_STAT_AL; 846 err |= OMAP_I2C_STAT_AL;
849 } 847 }
848 /*
849 * ProDB0017052: Clear ARDY bit twice
850 */
850 if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK | 851 if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK |
851 OMAP_I2C_STAT_AL)) { 852 OMAP_I2C_STAT_AL)) {
852 omap_i2c_ack_stat(dev, stat & 853 omap_i2c_ack_stat(dev, stat &
853 (OMAP_I2C_STAT_RRDY | OMAP_I2C_STAT_RDR | 854 (OMAP_I2C_STAT_RRDY | OMAP_I2C_STAT_RDR |
854 OMAP_I2C_STAT_XRDY | OMAP_I2C_STAT_XDR)); 855 OMAP_I2C_STAT_XRDY | OMAP_I2C_STAT_XDR |
856 OMAP_I2C_STAT_ARDY));
855 omap_i2c_complete_cmd(dev, err); 857 omap_i2c_complete_cmd(dev, err);
856 return IRQ_HANDLED; 858 return IRQ_HANDLED;
857 } 859 }
@@ -1137,12 +1139,41 @@ omap_i2c_remove(struct platform_device *pdev)
1137 return 0; 1139 return 0;
1138} 1140}
1139 1141
1142#ifdef CONFIG_SUSPEND
1143static int omap_i2c_suspend(struct device *dev)
1144{
1145 if (!pm_runtime_suspended(dev))
1146 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
1147 dev->bus->pm->runtime_suspend(dev);
1148
1149 return 0;
1150}
1151
1152static int omap_i2c_resume(struct device *dev)
1153{
1154 if (!pm_runtime_suspended(dev))
1155 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
1156 dev->bus->pm->runtime_resume(dev);
1157
1158 return 0;
1159}
1160
1161static struct dev_pm_ops omap_i2c_pm_ops = {
1162 .suspend = omap_i2c_suspend,
1163 .resume = omap_i2c_resume,
1164};
1165#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops)
1166#else
1167#define OMAP_I2C_PM_OPS NULL
1168#endif
1169
1140static struct platform_driver omap_i2c_driver = { 1170static struct platform_driver omap_i2c_driver = {
1141 .probe = omap_i2c_probe, 1171 .probe = omap_i2c_probe,
1142 .remove = omap_i2c_remove, 1172 .remove = omap_i2c_remove,
1143 .driver = { 1173 .driver = {
1144 .name = "omap_i2c", 1174 .name = "omap_i2c",
1145 .owner = THIS_MODULE, 1175 .owner = THIS_MODULE,
1176 .pm = OMAP_I2C_PM_OPS,
1146 }, 1177 },
1147}; 1178};
1148 1179
diff --git a/drivers/i2c/busses/i2c-stu300.c b/drivers/i2c/busses/i2c-stu300.c
index 495be451d326..266135ddf7fa 100644
--- a/drivers/i2c/busses/i2c-stu300.c
+++ b/drivers/i2c/busses/i2c-stu300.c
@@ -942,7 +942,7 @@ stu300_probe(struct platform_device *pdev)
942 adap->owner = THIS_MODULE; 942 adap->owner = THIS_MODULE;
943 /* DDC class but actually often used for more generic I2C */ 943 /* DDC class but actually often used for more generic I2C */
944 adap->class = I2C_CLASS_DDC; 944 adap->class = I2C_CLASS_DDC;
945 strncpy(adap->name, "ST Microelectronics DDC I2C adapter", 945 strlcpy(adap->name, "ST Microelectronics DDC I2C adapter",
946 sizeof(adap->name)); 946 sizeof(adap->name));
947 adap->nr = bus_nr; 947 adap->nr = bus_nr;
948 adap->algo = &stu300_algo; 948 adap->algo = &stu300_algo;
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index f0bd5bcdf563..045ba6efea48 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -537,9 +537,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
537 client->dev.parent = &client->adapter->dev; 537 client->dev.parent = &client->adapter->dev;
538 client->dev.bus = &i2c_bus_type; 538 client->dev.bus = &i2c_bus_type;
539 client->dev.type = &i2c_client_type; 539 client->dev.type = &i2c_client_type;
540#ifdef CONFIG_OF
541 client->dev.of_node = info->of_node; 540 client->dev.of_node = info->of_node;
542#endif
543 541
544 dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap), 542 dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
545 client->addr); 543 client->addr);
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 1fa091e05690..4a5c4a44ffb1 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -62,6 +62,7 @@
62#include <linux/notifier.h> 62#include <linux/notifier.h>
63#include <linux/cpu.h> 63#include <linux/cpu.h>
64#include <asm/mwait.h> 64#include <asm/mwait.h>
65#include <asm/msr.h>
65 66
66#define INTEL_IDLE_VERSION "0.4" 67#define INTEL_IDLE_VERSION "0.4"
67#define PREFIX "intel_idle: " 68#define PREFIX "intel_idle: "
@@ -85,6 +86,12 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state);
85static struct cpuidle_state *cpuidle_state_table; 86static struct cpuidle_state *cpuidle_state_table;
86 87
87/* 88/*
89 * Hardware C-state auto-demotion may not always be optimal.
90 * Indicate which enable bits to clear here.
91 */
92static unsigned long long auto_demotion_disable_flags;
93
94/*
88 * Set this flag for states where the HW flushes the TLB for us 95 * Set this flag for states where the HW flushes the TLB for us
89 * and so we don't need cross-calls to keep it consistent. 96 * and so we don't need cross-calls to keep it consistent.
90 * If this flag is set, SW flushes the TLB, so even if the 97 * If this flag is set, SW flushes the TLB, so even if the
@@ -281,6 +288,15 @@ static struct notifier_block setup_broadcast_notifier = {
281 .notifier_call = setup_broadcast_cpuhp_notify, 288 .notifier_call = setup_broadcast_cpuhp_notify,
282}; 289};
283 290
291static void auto_demotion_disable(void *dummy)
292{
293 unsigned long long msr_bits;
294
295 rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
296 msr_bits &= ~auto_demotion_disable_flags;
297 wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
298}
299
284/* 300/*
285 * intel_idle_probe() 301 * intel_idle_probe()
286 */ 302 */
@@ -324,11 +340,17 @@ static int intel_idle_probe(void)
324 case 0x25: /* Westmere */ 340 case 0x25: /* Westmere */
325 case 0x2C: /* Westmere */ 341 case 0x2C: /* Westmere */
326 cpuidle_state_table = nehalem_cstates; 342 cpuidle_state_table = nehalem_cstates;
343 auto_demotion_disable_flags =
344 (NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE);
327 break; 345 break;
328 346
329 case 0x1C: /* 28 - Atom Processor */ 347 case 0x1C: /* 28 - Atom Processor */
348 cpuidle_state_table = atom_cstates;
349 break;
350
330 case 0x26: /* 38 - Lincroft Atom Processor */ 351 case 0x26: /* 38 - Lincroft Atom Processor */
331 cpuidle_state_table = atom_cstates; 352 cpuidle_state_table = atom_cstates;
353 auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE;
332 break; 354 break;
333 355
334 case 0x2A: /* SNB */ 356 case 0x2A: /* SNB */
@@ -436,6 +458,8 @@ static int intel_idle_cpuidle_devices_init(void)
436 return -EIO; 458 return -EIO;
437 } 459 }
438 } 460 }
461 if (auto_demotion_disable_flags)
462 smp_call_function(auto_demotion_disable, NULL, 1);
439 463
440 return 0; 464 return 0;
441} 465}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 64e0903091a8..f804e28e1ebb 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1988,6 +1988,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
1988 goto out; 1988 goto out;
1989 } 1989 }
1990 1990
1991 if (cm_id->lap_state == IB_CM_LAP_SENT ||
1992 cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
1993 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1994
1991 ret = cm_alloc_msg(cm_id_priv, &msg); 1995 ret = cm_alloc_msg(cm_id_priv, &msg);
1992 if (ret) { 1996 if (ret) {
1993 cm_enter_timewait(cm_id_priv); 1997 cm_enter_timewait(cm_id_priv);
@@ -2129,6 +2133,10 @@ static int cm_dreq_handler(struct cm_work *work)
2129 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); 2133 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2130 break; 2134 break;
2131 case IB_CM_ESTABLISHED: 2135 case IB_CM_ESTABLISHED:
2136 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2137 cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2138 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2139 break;
2132 case IB_CM_MRA_REP_RCVD: 2140 case IB_CM_MRA_REP_RCVD:
2133 break; 2141 break;
2134 case IB_CM_TIMEWAIT: 2142 case IB_CM_TIMEWAIT:
@@ -2349,9 +2357,18 @@ static int cm_rej_handler(struct cm_work *work)
2349 /* fall through */ 2357 /* fall through */
2350 case IB_CM_REP_RCVD: 2358 case IB_CM_REP_RCVD:
2351 case IB_CM_MRA_REP_SENT: 2359 case IB_CM_MRA_REP_SENT:
2352 case IB_CM_ESTABLISHED:
2353 cm_enter_timewait(cm_id_priv); 2360 cm_enter_timewait(cm_id_priv);
2354 break; 2361 break;
2362 case IB_CM_ESTABLISHED:
2363 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2364 cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2365 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2366 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2367 cm_id_priv->msg);
2368 cm_enter_timewait(cm_id_priv);
2369 break;
2370 }
2371 /* fall through */
2355 default: 2372 default:
2356 spin_unlock_irq(&cm_id_priv->lock); 2373 spin_unlock_irq(&cm_id_priv->lock);
2357 ret = -EINVAL; 2374 ret = -EINVAL;
@@ -2989,6 +3006,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
2989 goto out; /* No match. */ 3006 goto out; /* No match. */
2990 } 3007 }
2991 atomic_inc(&cur_cm_id_priv->refcount); 3008 atomic_inc(&cur_cm_id_priv->refcount);
3009 atomic_inc(&cm_id_priv->refcount);
2992 spin_unlock_irq(&cm.lock); 3010 spin_unlock_irq(&cm.lock);
2993 3011
2994 cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; 3012 cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6884da24fde1..5ed9d25d021a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -308,11 +308,13 @@ static inline void release_mc(struct kref *kref)
308 kfree(mc); 308 kfree(mc);
309} 309}
310 310
311static void cma_detach_from_dev(struct rdma_id_private *id_priv) 311static void cma_release_dev(struct rdma_id_private *id_priv)
312{ 312{
313 mutex_lock(&lock);
313 list_del(&id_priv->list); 314 list_del(&id_priv->list);
314 cma_deref_dev(id_priv->cma_dev); 315 cma_deref_dev(id_priv->cma_dev);
315 id_priv->cma_dev = NULL; 316 id_priv->cma_dev = NULL;
317 mutex_unlock(&lock);
316} 318}
317 319
318static int cma_set_qkey(struct rdma_id_private *id_priv) 320static int cma_set_qkey(struct rdma_id_private *id_priv)
@@ -373,6 +375,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
373 enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? 375 enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
374 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; 376 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
375 377
378 mutex_lock(&lock);
376 iboe_addr_get_sgid(dev_addr, &iboe_gid); 379 iboe_addr_get_sgid(dev_addr, &iboe_gid);
377 memcpy(&gid, dev_addr->src_dev_addr + 380 memcpy(&gid, dev_addr->src_dev_addr +
378 rdma_addr_gid_offset(dev_addr), sizeof gid); 381 rdma_addr_gid_offset(dev_addr), sizeof gid);
@@ -398,6 +401,7 @@ out:
398 if (!ret) 401 if (!ret)
399 cma_attach_to_dev(id_priv, cma_dev); 402 cma_attach_to_dev(id_priv, cma_dev);
400 403
404 mutex_unlock(&lock);
401 return ret; 405 return ret;
402} 406}
403 407
@@ -904,9 +908,14 @@ void rdma_destroy_id(struct rdma_cm_id *id)
904 state = cma_exch(id_priv, CMA_DESTROYING); 908 state = cma_exch(id_priv, CMA_DESTROYING);
905 cma_cancel_operation(id_priv, state); 909 cma_cancel_operation(id_priv, state);
906 910
907 mutex_lock(&lock); 911 /*
912 * Wait for any active callback to finish. New callbacks will find
913 * the id_priv state set to destroying and abort.
914 */
915 mutex_lock(&id_priv->handler_mutex);
916 mutex_unlock(&id_priv->handler_mutex);
917
908 if (id_priv->cma_dev) { 918 if (id_priv->cma_dev) {
909 mutex_unlock(&lock);
910 switch (rdma_node_get_transport(id_priv->id.device->node_type)) { 919 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
911 case RDMA_TRANSPORT_IB: 920 case RDMA_TRANSPORT_IB:
912 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) 921 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
@@ -920,10 +929,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
920 break; 929 break;
921 } 930 }
922 cma_leave_mc_groups(id_priv); 931 cma_leave_mc_groups(id_priv);
923 mutex_lock(&lock); 932 cma_release_dev(id_priv);
924 cma_detach_from_dev(id_priv);
925 } 933 }
926 mutex_unlock(&lock);
927 934
928 cma_release_port(id_priv); 935 cma_release_port(id_priv);
929 cma_deref_id(id_priv); 936 cma_deref_id(id_priv);
@@ -1200,9 +1207,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1200 } 1207 }
1201 1208
1202 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1209 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1203 mutex_lock(&lock);
1204 ret = cma_acquire_dev(conn_id); 1210 ret = cma_acquire_dev(conn_id);
1205 mutex_unlock(&lock);
1206 if (ret) 1211 if (ret)
1207 goto release_conn_id; 1212 goto release_conn_id;
1208 1213
@@ -1210,6 +1215,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1210 cm_id->context = conn_id; 1215 cm_id->context = conn_id;
1211 cm_id->cm_handler = cma_ib_handler; 1216 cm_id->cm_handler = cma_ib_handler;
1212 1217
1218 /*
1219 * Protect against the user destroying conn_id from another thread
1220 * until we're done accessing it.
1221 */
1222 atomic_inc(&conn_id->refcount);
1213 ret = conn_id->id.event_handler(&conn_id->id, &event); 1223 ret = conn_id->id.event_handler(&conn_id->id, &event);
1214 if (!ret) { 1224 if (!ret) {
1215 /* 1225 /*
@@ -1222,8 +1232,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1222 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1232 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1223 mutex_unlock(&lock); 1233 mutex_unlock(&lock);
1224 mutex_unlock(&conn_id->handler_mutex); 1234 mutex_unlock(&conn_id->handler_mutex);
1235 cma_deref_id(conn_id);
1225 goto out; 1236 goto out;
1226 } 1237 }
1238 cma_deref_id(conn_id);
1227 1239
1228 /* Destroy the CM ID by returning a non-zero value. */ 1240 /* Destroy the CM ID by returning a non-zero value. */
1229 conn_id->cm_id.ib = NULL; 1241 conn_id->cm_id.ib = NULL;
@@ -1394,9 +1406,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1394 goto out; 1406 goto out;
1395 } 1407 }
1396 1408
1397 mutex_lock(&lock);
1398 ret = cma_acquire_dev(conn_id); 1409 ret = cma_acquire_dev(conn_id);
1399 mutex_unlock(&lock);
1400 if (ret) { 1410 if (ret) {
1401 mutex_unlock(&conn_id->handler_mutex); 1411 mutex_unlock(&conn_id->handler_mutex);
1402 rdma_destroy_id(new_cm_id); 1412 rdma_destroy_id(new_cm_id);
@@ -1425,17 +1435,25 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1425 event.param.conn.private_data_len = iw_event->private_data_len; 1435 event.param.conn.private_data_len = iw_event->private_data_len;
1426 event.param.conn.initiator_depth = attr.max_qp_init_rd_atom; 1436 event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1427 event.param.conn.responder_resources = attr.max_qp_rd_atom; 1437 event.param.conn.responder_resources = attr.max_qp_rd_atom;
1438
1439 /*
1440 * Protect against the user destroying conn_id from another thread
1441 * until we're done accessing it.
1442 */
1443 atomic_inc(&conn_id->refcount);
1428 ret = conn_id->id.event_handler(&conn_id->id, &event); 1444 ret = conn_id->id.event_handler(&conn_id->id, &event);
1429 if (ret) { 1445 if (ret) {
1430 /* User wants to destroy the CM ID */ 1446 /* User wants to destroy the CM ID */
1431 conn_id->cm_id.iw = NULL; 1447 conn_id->cm_id.iw = NULL;
1432 cma_exch(conn_id, CMA_DESTROYING); 1448 cma_exch(conn_id, CMA_DESTROYING);
1433 mutex_unlock(&conn_id->handler_mutex); 1449 mutex_unlock(&conn_id->handler_mutex);
1450 cma_deref_id(conn_id);
1434 rdma_destroy_id(&conn_id->id); 1451 rdma_destroy_id(&conn_id->id);
1435 goto out; 1452 goto out;
1436 } 1453 }
1437 1454
1438 mutex_unlock(&conn_id->handler_mutex); 1455 mutex_unlock(&conn_id->handler_mutex);
1456 cma_deref_id(conn_id);
1439 1457
1440out: 1458out:
1441 if (dev) 1459 if (dev)
@@ -1951,20 +1969,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
1951 1969
1952 memset(&event, 0, sizeof event); 1970 memset(&event, 0, sizeof event);
1953 mutex_lock(&id_priv->handler_mutex); 1971 mutex_lock(&id_priv->handler_mutex);
1954 1972 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
1955 /*
1956 * Grab mutex to block rdma_destroy_id() from removing the device while
1957 * we're trying to acquire it.
1958 */
1959 mutex_lock(&lock);
1960 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
1961 mutex_unlock(&lock);
1962 goto out; 1973 goto out;
1963 }
1964 1974
1965 if (!status && !id_priv->cma_dev) 1975 if (!status && !id_priv->cma_dev)
1966 status = cma_acquire_dev(id_priv); 1976 status = cma_acquire_dev(id_priv);
1967 mutex_unlock(&lock);
1968 1977
1969 if (status) { 1978 if (status) {
1970 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND)) 1979 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
@@ -2265,9 +2274,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2265 if (ret) 2274 if (ret)
2266 goto err1; 2275 goto err1;
2267 2276
2268 mutex_lock(&lock);
2269 ret = cma_acquire_dev(id_priv); 2277 ret = cma_acquire_dev(id_priv);
2270 mutex_unlock(&lock);
2271 if (ret) 2278 if (ret)
2272 goto err1; 2279 goto err1;
2273 } 2280 }
@@ -2279,11 +2286,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2279 2286
2280 return 0; 2287 return 0;
2281err2: 2288err2:
2282 if (id_priv->cma_dev) { 2289 if (id_priv->cma_dev)
2283 mutex_lock(&lock); 2290 cma_release_dev(id_priv);
2284 cma_detach_from_dev(id_priv);
2285 mutex_unlock(&lock);
2286 }
2287err1: 2291err1:
2288 cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE); 2292 cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2289 return ret; 2293 return ret;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 8b00e6c46f01..b4d9e4caf3c9 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -61,9 +61,9 @@ static char *states[] = {
61 NULL, 61 NULL,
62}; 62};
63 63
64static int dack_mode; 64static int dack_mode = 1;
65module_param(dack_mode, int, 0644); 65module_param(dack_mode, int, 0644);
66MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)"); 66MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
67 67
68int c4iw_max_read_depth = 8; 68int c4iw_max_read_depth = 8;
69module_param(c4iw_max_read_depth, int, 0644); 69module_param(c4iw_max_read_depth, int, 0644);
@@ -482,6 +482,7 @@ static int send_connect(struct c4iw_ep *ep)
482 TX_CHAN(ep->tx_chan) | 482 TX_CHAN(ep->tx_chan) |
483 SMAC_SEL(ep->smac_idx) | 483 SMAC_SEL(ep->smac_idx) |
484 DSCP(ep->tos) | 484 DSCP(ep->tos) |
485 ULP_MODE(ULP_MODE_TCPDDP) |
485 RCV_BUFSIZ(rcv_win>>10); 486 RCV_BUFSIZ(rcv_win>>10);
486 opt2 = RX_CHANNEL(0) | 487 opt2 = RX_CHANNEL(0) |
487 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); 488 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
@@ -1274,6 +1275,7 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
1274 TX_CHAN(ep->tx_chan) | 1275 TX_CHAN(ep->tx_chan) |
1275 SMAC_SEL(ep->smac_idx) | 1276 SMAC_SEL(ep->smac_idx) |
1276 DSCP(ep->tos) | 1277 DSCP(ep->tos) |
1278 ULP_MODE(ULP_MODE_TCPDDP) |
1277 RCV_BUFSIZ(rcv_win>>10); 1279 RCV_BUFSIZ(rcv_win>>10);
1278 opt2 = RX_CHANNEL(0) | 1280 opt2 = RX_CHANNEL(0) |
1279 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); 1281 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 54fbc1118abe..e29172c2afcb 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -87,17 +87,22 @@ static int dump_qp(int id, void *p, void *data)
87 return 1; 87 return 1;
88 88
89 if (qp->ep) 89 if (qp->ep)
90 cc = snprintf(qpd->buf + qpd->pos, space, "qp id %u state %u " 90 cc = snprintf(qpd->buf + qpd->pos, space,
91 "qp sq id %u rq id %u state %u onchip %u "
91 "ep tid %u state %u %pI4:%u->%pI4:%u\n", 92 "ep tid %u state %u %pI4:%u->%pI4:%u\n",
92 qp->wq.sq.qid, (int)qp->attr.state, 93 qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state,
94 qp->wq.sq.flags & T4_SQ_ONCHIP,
93 qp->ep->hwtid, (int)qp->ep->com.state, 95 qp->ep->hwtid, (int)qp->ep->com.state,
94 &qp->ep->com.local_addr.sin_addr.s_addr, 96 &qp->ep->com.local_addr.sin_addr.s_addr,
95 ntohs(qp->ep->com.local_addr.sin_port), 97 ntohs(qp->ep->com.local_addr.sin_port),
96 &qp->ep->com.remote_addr.sin_addr.s_addr, 98 &qp->ep->com.remote_addr.sin_addr.s_addr,
97 ntohs(qp->ep->com.remote_addr.sin_port)); 99 ntohs(qp->ep->com.remote_addr.sin_port));
98 else 100 else
99 cc = snprintf(qpd->buf + qpd->pos, space, "qp id %u state %u\n", 101 cc = snprintf(qpd->buf + qpd->pos, space,
100 qp->wq.sq.qid, (int)qp->attr.state); 102 "qp sq id %u rq id %u state %u onchip %u\n",
103 qp->wq.sq.qid, qp->wq.rq.qid,
104 (int)qp->attr.state,
105 qp->wq.sq.flags & T4_SQ_ONCHIP);
101 if (cc < space) 106 if (cc < space)
102 qpd->pos += cc; 107 qpd->pos += cc;
103 return 0; 108 return 0;
@@ -368,7 +373,6 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
368static void c4iw_remove(struct c4iw_dev *dev) 373static void c4iw_remove(struct c4iw_dev *dev)
369{ 374{
370 PDBG("%s c4iw_dev %p\n", __func__, dev); 375 PDBG("%s c4iw_dev %p\n", __func__, dev);
371 cancel_delayed_work_sync(&dev->db_drop_task);
372 list_del(&dev->entry); 376 list_del(&dev->entry);
373 if (dev->registered) 377 if (dev->registered)
374 c4iw_unregister_device(dev); 378 c4iw_unregister_device(dev);
@@ -523,8 +527,16 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
523 case CXGB4_STATE_START_RECOVERY: 527 case CXGB4_STATE_START_RECOVERY:
524 printk(KERN_INFO MOD "%s: Fatal Error\n", 528 printk(KERN_INFO MOD "%s: Fatal Error\n",
525 pci_name(dev->rdev.lldi.pdev)); 529 pci_name(dev->rdev.lldi.pdev));
526 if (dev->registered) 530 dev->rdev.flags |= T4_FATAL_ERROR;
531 if (dev->registered) {
532 struct ib_event event;
533
534 memset(&event, 0, sizeof event);
535 event.event = IB_EVENT_DEVICE_FATAL;
536 event.device = &dev->ibdev;
537 ib_dispatch_event(&event);
527 c4iw_unregister_device(dev); 538 c4iw_unregister_device(dev);
539 }
528 break; 540 break;
529 case CXGB4_STATE_DETACH: 541 case CXGB4_STATE_DETACH:
530 printk(KERN_INFO MOD "%s: Detach\n", 542 printk(KERN_INFO MOD "%s: Detach\n",
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 2fe19ec9ba60..9f6166f59268 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -176,7 +176,6 @@ struct c4iw_dev {
176 struct idr mmidr; 176 struct idr mmidr;
177 spinlock_t lock; 177 spinlock_t lock;
178 struct list_head entry; 178 struct list_head entry;
179 struct delayed_work db_drop_task;
180 struct dentry *debugfs_root; 179 struct dentry *debugfs_root;
181 u8 registered; 180 u8 registered;
182}; 181};
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 4f0be25cab1a..70a5a3c646da 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -31,9 +31,9 @@
31 */ 31 */
32#include "iw_cxgb4.h" 32#include "iw_cxgb4.h"
33 33
34static int ocqp_support; 34static int ocqp_support = 1;
35module_param(ocqp_support, int, 0644); 35module_param(ocqp_support, int, 0644);
36MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=0)"); 36MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
37 37
38static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) 38static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
39{ 39{
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 70004425d695..24af12fc8228 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -507,8 +507,14 @@ static inline void t4_swcq_consume(struct t4_cq *cq)
507static inline void t4_hwcq_consume(struct t4_cq *cq) 507static inline void t4_hwcq_consume(struct t4_cq *cq)
508{ 508{
509 cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts; 509 cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts;
510 if (++cq->cidx_inc == cq->size) 510 if (++cq->cidx_inc == (cq->size >> 4)) {
511 u32 val;
512
513 val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) |
514 INGRESSQID(cq->cqid);
515 writel(val, cq->gts);
511 cq->cidx_inc = 0; 516 cq->cidx_inc = 0;
517 }
512 if (++cq->cidx == cq->size) { 518 if (++cq->cidx == cq->size) {
513 cq->cidx = 0; 519 cq->cidx = 0;
514 cq->gen ^= 1; 520 cq->gen ^= 1;
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index b8cb2f145ae4..8991677e9a08 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -557,6 +557,7 @@ static ssize_t store_reset(struct device *dev,
557 dev_info(dev,"Unit %d is disabled, can't reset\n", 557 dev_info(dev,"Unit %d is disabled, can't reset\n",
558 dd->ipath_unit); 558 dd->ipath_unit);
559 ret = -EINVAL; 559 ret = -EINVAL;
560 goto bail;
560 } 561 }
561 ret = ipath_reset_device(dd->ipath_unit); 562 ret = ipath_reset_device(dd->ipath_unit);
562bail: 563bail:
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 8b606fd64022..08c194861af5 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -2610,9 +2610,11 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
2610 netif_carrier_on(nesvnic->netdev); 2610 netif_carrier_on(nesvnic->netdev);
2611 2611
2612 spin_lock(&nesvnic->port_ibevent_lock); 2612 spin_lock(&nesvnic->port_ibevent_lock);
2613 if (nesdev->iw_status == 0) { 2613 if (nesvnic->of_device_registered) {
2614 nesdev->iw_status = 1; 2614 if (nesdev->iw_status == 0) {
2615 nes_port_ibevent(nesvnic); 2615 nesdev->iw_status = 1;
2616 nes_port_ibevent(nesvnic);
2617 }
2616 } 2618 }
2617 spin_unlock(&nesvnic->port_ibevent_lock); 2619 spin_unlock(&nesvnic->port_ibevent_lock);
2618 } 2620 }
@@ -2642,9 +2644,11 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
2642 netif_carrier_off(nesvnic->netdev); 2644 netif_carrier_off(nesvnic->netdev);
2643 2645
2644 spin_lock(&nesvnic->port_ibevent_lock); 2646 spin_lock(&nesvnic->port_ibevent_lock);
2645 if (nesdev->iw_status == 1) { 2647 if (nesvnic->of_device_registered) {
2646 nesdev->iw_status = 0; 2648 if (nesdev->iw_status == 1) {
2647 nes_port_ibevent(nesvnic); 2649 nesdev->iw_status = 0;
2650 nes_port_ibevent(nesvnic);
2651 }
2648 } 2652 }
2649 spin_unlock(&nesvnic->port_ibevent_lock); 2653 spin_unlock(&nesvnic->port_ibevent_lock);
2650 } 2654 }
@@ -2703,9 +2707,11 @@ void nes_recheck_link_status(struct work_struct *work)
2703 netif_carrier_on(nesvnic->netdev); 2707 netif_carrier_on(nesvnic->netdev);
2704 2708
2705 spin_lock(&nesvnic->port_ibevent_lock); 2709 spin_lock(&nesvnic->port_ibevent_lock);
2706 if (nesdev->iw_status == 0) { 2710 if (nesvnic->of_device_registered) {
2707 nesdev->iw_status = 1; 2711 if (nesdev->iw_status == 0) {
2708 nes_port_ibevent(nesvnic); 2712 nesdev->iw_status = 1;
2713 nes_port_ibevent(nesvnic);
2714 }
2709 } 2715 }
2710 spin_unlock(&nesvnic->port_ibevent_lock); 2716 spin_unlock(&nesvnic->port_ibevent_lock);
2711 } 2717 }
@@ -2723,9 +2729,11 @@ void nes_recheck_link_status(struct work_struct *work)
2723 netif_carrier_off(nesvnic->netdev); 2729 netif_carrier_off(nesvnic->netdev);
2724 2730
2725 spin_lock(&nesvnic->port_ibevent_lock); 2731 spin_lock(&nesvnic->port_ibevent_lock);
2726 if (nesdev->iw_status == 1) { 2732 if (nesvnic->of_device_registered) {
2727 nesdev->iw_status = 0; 2733 if (nesdev->iw_status == 1) {
2728 nes_port_ibevent(nesvnic); 2734 nesdev->iw_status = 0;
2735 nes_port_ibevent(nesvnic);
2736 }
2729 } 2737 }
2730 spin_unlock(&nesvnic->port_ibevent_lock); 2738 spin_unlock(&nesvnic->port_ibevent_lock);
2731 } 2739 }
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index b01809a82cb0..4a2d21e15a70 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -5582,9 +5582,16 @@ static void qsfp_7322_event(struct work_struct *work)
5582 * even on failure to read cable information. We don't 5582 * even on failure to read cable information. We don't
5583 * get here for QME, so IS_QME check not needed here. 5583 * get here for QME, so IS_QME check not needed here.
5584 */ 5584 */
5585 le2 = (!ret && qd->cache.atten[1] >= qib_long_atten && 5585 if (!ret && !ppd->dd->cspec->r1) {
5586 !ppd->dd->cspec->r1 && QSFP_IS_CU(qd->cache.tech)) ? 5586 if (QSFP_IS_ACTIVE_FAR(qd->cache.tech))
5587 LE2_5m : LE2_DEFAULT; 5587 le2 = LE2_QME;
5588 else if (qd->cache.atten[1] >= qib_long_atten &&
5589 QSFP_IS_CU(qd->cache.tech))
5590 le2 = LE2_5m;
5591 else
5592 le2 = LE2_DEFAULT;
5593 } else
5594 le2 = LE2_DEFAULT;
5588 ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7)); 5595 ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
5589 init_txdds_table(ppd, 0); 5596 init_txdds_table(ppd, 0);
5590} 5597}
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 5ad224e4a38b..8fd3df5bf04d 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -464,8 +464,9 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
464 memset(smp->data, 0, sizeof(smp->data)); 464 memset(smp->data, 0, sizeof(smp->data));
465 465
466 /* Only return the mkey if the protection field allows it. */ 466 /* Only return the mkey if the protection field allows it. */
467 if (smp->method == IB_MGMT_METHOD_SET || ibp->mkey == smp->mkey || 467 if (!(smp->method == IB_MGMT_METHOD_GET &&
468 ibp->mkeyprot == 0) 468 ibp->mkey != smp->mkey &&
469 ibp->mkeyprot == 1))
469 pip->mkey = ibp->mkey; 470 pip->mkey = ibp->mkey;
470 pip->gid_prefix = ibp->gid_prefix; 471 pip->gid_prefix = ibp->gid_prefix;
471 lid = ppd->lid; 472 lid = ppd->lid;
@@ -705,7 +706,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
705 lwe = pip->link_width_enabled; 706 lwe = pip->link_width_enabled;
706 if (lwe) { 707 if (lwe) {
707 if (lwe == 0xFF) 708 if (lwe == 0xFF)
708 lwe = ppd->link_width_supported; 709 set_link_width_enabled(ppd, ppd->link_width_supported);
709 else if (lwe >= 16 || (lwe & ~ppd->link_width_supported)) 710 else if (lwe >= 16 || (lwe & ~ppd->link_width_supported))
710 smp->status |= IB_SMP_INVALID_FIELD; 711 smp->status |= IB_SMP_INVALID_FIELD;
711 else if (lwe != ppd->link_width_enabled) 712 else if (lwe != ppd->link_width_enabled)
@@ -720,7 +721,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
720 * speeds. 721 * speeds.
721 */ 722 */
722 if (lse == 15) 723 if (lse == 15)
723 lse = ppd->link_speed_supported; 724 set_link_speed_enabled(ppd,
725 ppd->link_speed_supported);
724 else if (lse >= 8 || (lse & ~ppd->link_speed_supported)) 726 else if (lse >= 8 || (lse & ~ppd->link_speed_supported))
725 smp->status |= IB_SMP_INVALID_FIELD; 727 smp->status |= IB_SMP_INVALID_FIELD;
726 else if (lse != ppd->link_speed_enabled) 728 else if (lse != ppd->link_speed_enabled)
@@ -849,7 +851,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
849 if (clientrereg) 851 if (clientrereg)
850 pip->clientrereg_resv_subnetto |= 0x80; 852 pip->clientrereg_resv_subnetto |= 0x80;
851 853
852 goto done; 854 goto get_only;
853 855
854err: 856err:
855 smp->status |= IB_SMP_INVALID_FIELD; 857 smp->status |= IB_SMP_INVALID_FIELD;
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
index 19b527bafd57..c109bbdc90ac 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.h
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -79,6 +79,8 @@
79extern const char *const qib_qsfp_devtech[16]; 79extern const char *const qib_qsfp_devtech[16];
80/* Active Equalization includes fiber, copper full EQ, and copper near Eq */ 80/* Active Equalization includes fiber, copper full EQ, and copper near Eq */
81#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1) 81#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1)
82/* Active Equalization includes fiber, copper full EQ, and copper far Eq */
83#define QSFP_IS_ACTIVE_FAR(tech) ((0x32FF >> ((tech) >> 4)) & 1)
82/* Attenuation should be valid for copper other than full/near Eq */ 84/* Attenuation should be valid for copper other than full/near Eq */
83#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1) 85#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1)
84/* Length is only valid if technology is "copper" */ 86/* Length is only valid if technology is "copper" */
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 8245237b67ce..eca0c41f1226 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -1005,7 +1005,8 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
1005 * there are still requests that haven't been acked. 1005 * there are still requests that haven't been acked.
1006 */ 1006 */
1007 if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && 1007 if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
1008 !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN))) 1008 !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) &&
1009 (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
1009 start_timer(qp); 1010 start_timer(qp);
1010 1011
1011 while (qp->s_last != qp->s_acked) { 1012 while (qp->s_last != qp->s_acked) {
@@ -1439,6 +1440,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
1439 } 1440 }
1440 1441
1441 spin_lock_irqsave(&qp->s_lock, flags); 1442 spin_lock_irqsave(&qp->s_lock, flags);
1443 if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
1444 goto ack_done;
1442 1445
1443 /* Ignore invalid responses. */ 1446 /* Ignore invalid responses. */
1444 if (qib_cmp24(psn, qp->s_next_psn) >= 0) 1447 if (qib_cmp24(psn, qp->s_next_psn) >= 0)
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index 23cf8fc933ec..5b8f59d6c3e8 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -360,7 +360,7 @@ static int gameport_queue_event(void *object, struct module *owner,
360 event->owner = owner; 360 event->owner = owner;
361 361
362 list_add_tail(&event->node, &gameport_event_list); 362 list_add_tail(&event->node, &gameport_event_list);
363 schedule_work(&gameport_event_work); 363 queue_work(system_long_wq, &gameport_event_work);
364 364
365out: 365out:
366 spin_unlock_irqrestore(&gameport_event_lock, flags); 366 spin_unlock_irqrestore(&gameport_event_lock, flags);
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index ac471b77c18e..99ce9032d08c 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -71,8 +71,9 @@ struct tegra_kbc {
71 spinlock_t lock; 71 spinlock_t lock;
72 unsigned int repoll_dly; 72 unsigned int repoll_dly;
73 unsigned long cp_dly_jiffies; 73 unsigned long cp_dly_jiffies;
74 bool use_fn_map;
74 const struct tegra_kbc_platform_data *pdata; 75 const struct tegra_kbc_platform_data *pdata;
75 unsigned short keycode[KBC_MAX_KEY]; 76 unsigned short keycode[KBC_MAX_KEY * 2];
76 unsigned short current_keys[KBC_MAX_KPENT]; 77 unsigned short current_keys[KBC_MAX_KPENT];
77 unsigned int num_pressed_keys; 78 unsigned int num_pressed_keys;
78 struct timer_list timer; 79 struct timer_list timer;
@@ -178,6 +179,40 @@ static const u32 tegra_kbc_default_keymap[] = {
178 KEY(15, 5, KEY_F2), 179 KEY(15, 5, KEY_F2),
179 KEY(15, 6, KEY_CAPSLOCK), 180 KEY(15, 6, KEY_CAPSLOCK),
180 KEY(15, 7, KEY_F6), 181 KEY(15, 7, KEY_F6),
182
183 /* Software Handled Function Keys */
184 KEY(20, 0, KEY_KP7),
185
186 KEY(21, 0, KEY_KP9),
187 KEY(21, 1, KEY_KP8),
188 KEY(21, 2, KEY_KP4),
189 KEY(21, 4, KEY_KP1),
190
191 KEY(22, 1, KEY_KPSLASH),
192 KEY(22, 2, KEY_KP6),
193 KEY(22, 3, KEY_KP5),
194 KEY(22, 4, KEY_KP3),
195 KEY(22, 5, KEY_KP2),
196 KEY(22, 7, KEY_KP0),
197
198 KEY(27, 1, KEY_KPASTERISK),
199 KEY(27, 3, KEY_KPMINUS),
200 KEY(27, 4, KEY_KPPLUS),
201 KEY(27, 5, KEY_KPDOT),
202
203 KEY(28, 5, KEY_VOLUMEUP),
204
205 KEY(29, 3, KEY_HOME),
206 KEY(29, 4, KEY_END),
207 KEY(29, 5, KEY_BRIGHTNESSDOWN),
208 KEY(29, 6, KEY_VOLUMEDOWN),
209 KEY(29, 7, KEY_BRIGHTNESSUP),
210
211 KEY(30, 0, KEY_NUMLOCK),
212 KEY(30, 1, KEY_SCROLLLOCK),
213 KEY(30, 2, KEY_MUTE),
214
215 KEY(31, 4, KEY_HELP),
181}; 216};
182 217
183static const struct matrix_keymap_data tegra_kbc_default_keymap_data = { 218static const struct matrix_keymap_data tegra_kbc_default_keymap_data = {
@@ -224,6 +259,7 @@ static void tegra_kbc_report_keys(struct tegra_kbc *kbc)
224 unsigned int i; 259 unsigned int i;
225 unsigned int num_down = 0; 260 unsigned int num_down = 0;
226 unsigned long flags; 261 unsigned long flags;
262 bool fn_keypress = false;
227 263
228 spin_lock_irqsave(&kbc->lock, flags); 264 spin_lock_irqsave(&kbc->lock, flags);
229 for (i = 0; i < KBC_MAX_KPENT; i++) { 265 for (i = 0; i < KBC_MAX_KPENT; i++) {
@@ -237,11 +273,28 @@ static void tegra_kbc_report_keys(struct tegra_kbc *kbc)
237 MATRIX_SCAN_CODE(row, col, KBC_ROW_SHIFT); 273 MATRIX_SCAN_CODE(row, col, KBC_ROW_SHIFT);
238 274
239 scancodes[num_down] = scancode; 275 scancodes[num_down] = scancode;
240 keycodes[num_down++] = kbc->keycode[scancode]; 276 keycodes[num_down] = kbc->keycode[scancode];
277 /* If driver uses Fn map, do not report the Fn key. */
278 if ((keycodes[num_down] == KEY_FN) && kbc->use_fn_map)
279 fn_keypress = true;
280 else
281 num_down++;
241 } 282 }
242 283
243 val >>= 8; 284 val >>= 8;
244 } 285 }
286
287 /*
288 * If the platform uses Fn keymaps, translate keys on a Fn keypress.
289 * Function keycodes are KBC_MAX_KEY apart from the plain keycodes.
290 */
291 if (fn_keypress) {
292 for (i = 0; i < num_down; i++) {
293 scancodes[i] += KBC_MAX_KEY;
294 keycodes[i] = kbc->keycode[scancodes[i]];
295 }
296 }
297
245 spin_unlock_irqrestore(&kbc->lock, flags); 298 spin_unlock_irqrestore(&kbc->lock, flags);
246 299
247 tegra_kbc_report_released_keys(kbc->idev, 300 tegra_kbc_report_released_keys(kbc->idev,
@@ -594,8 +647,11 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev)
594 647
595 input_dev->keycode = kbc->keycode; 648 input_dev->keycode = kbc->keycode;
596 input_dev->keycodesize = sizeof(kbc->keycode[0]); 649 input_dev->keycodesize = sizeof(kbc->keycode[0]);
597 input_dev->keycodemax = ARRAY_SIZE(kbc->keycode); 650 input_dev->keycodemax = KBC_MAX_KEY;
651 if (pdata->use_fn_map)
652 input_dev->keycodemax *= 2;
598 653
654 kbc->use_fn_map = pdata->use_fn_map;
599 keymap_data = pdata->keymap_data ?: &tegra_kbc_default_keymap_data; 655 keymap_data = pdata->keymap_data ?: &tegra_kbc_default_keymap_data;
600 matrix_keypad_build_keymap(keymap_data, KBC_ROW_SHIFT, 656 matrix_keypad_build_keymap(keymap_data, KBC_ROW_SHIFT,
601 input_dev->keycode, input_dev->keybit); 657 input_dev->keycode, input_dev->keybit);
diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h
index 25e5d042a72c..7453938bf5ef 100644
--- a/drivers/input/mouse/synaptics.h
+++ b/drivers/input/mouse/synaptics.h
@@ -51,6 +51,29 @@
51#define SYN_EXT_CAP_REQUESTS(c) (((c) & 0x700000) >> 20) 51#define SYN_EXT_CAP_REQUESTS(c) (((c) & 0x700000) >> 20)
52#define SYN_CAP_MULTI_BUTTON_NO(ec) (((ec) & 0x00f000) >> 12) 52#define SYN_CAP_MULTI_BUTTON_NO(ec) (((ec) & 0x00f000) >> 12)
53#define SYN_CAP_PRODUCT_ID(ec) (((ec) & 0xff0000) >> 16) 53#define SYN_CAP_PRODUCT_ID(ec) (((ec) & 0xff0000) >> 16)
54
55/*
56 * The following describes response for the 0x0c query.
57 *
58 * byte mask name meaning
59 * ---- ---- ------- ------------
60 * 1 0x01 adjustable threshold capacitive button sensitivity
61 * can be adjusted
62 * 1 0x02 report max query 0x0d gives max coord reported
63 * 1 0x04 clearpad sensor is ClearPad product
64 * 1 0x08 advanced gesture not particularly meaningful
65 * 1 0x10 clickpad bit 0 1-button ClickPad
66 * 1 0x60 multifinger mode identifies firmware finger counting
67 * (not reporting!) algorithm.
68 * Not particularly meaningful
69 * 1 0x80 covered pad W clipped to 14, 15 == pad mostly covered
70 * 2 0x01 clickpad bit 1 2-button ClickPad
71 * 2 0x02 deluxe LED controls touchpad support LED commands
72 * ala multimedia control bar
73 * 2 0x04 reduced filtering firmware does less filtering on
74 * position data, driver should watch
75 * for noise.
76 */
54#define SYN_CAP_CLICKPAD(ex0c) ((ex0c) & 0x100000) /* 1-button ClickPad */ 77#define SYN_CAP_CLICKPAD(ex0c) ((ex0c) & 0x100000) /* 1-button ClickPad */
55#define SYN_CAP_CLICKPAD2BTN(ex0c) ((ex0c) & 0x000100) /* 2-button ClickPad */ 78#define SYN_CAP_CLICKPAD2BTN(ex0c) ((ex0c) & 0x000100) /* 2-button ClickPad */
56#define SYN_CAP_MAX_DIMENSIONS(ex0c) ((ex0c) & 0x020000) 79#define SYN_CAP_MAX_DIMENSIONS(ex0c) ((ex0c) & 0x020000)
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 7c38d1fbabf2..ba70058e2be3 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -299,7 +299,7 @@ static int serio_queue_event(void *object, struct module *owner,
299 event->owner = owner; 299 event->owner = owner;
300 300
301 list_add_tail(&event->node, &serio_event_list); 301 list_add_tail(&event->node, &serio_event_list);
302 schedule_work(&serio_event_work); 302 queue_work(system_long_wq, &serio_event_work);
303 303
304out: 304out:
305 spin_unlock_irqrestore(&serio_event_lock, flags); 305 spin_unlock_irqrestore(&serio_event_lock, flags);
diff --git a/drivers/input/touchscreen/tps6507x-ts.c b/drivers/input/touchscreen/tps6507x-ts.c
index c8c136cf7bbc..43031492d733 100644
--- a/drivers/input/touchscreen/tps6507x-ts.c
+++ b/drivers/input/touchscreen/tps6507x-ts.c
@@ -43,7 +43,6 @@ struct tps6507x_ts {
43 struct input_dev *input_dev; 43 struct input_dev *input_dev;
44 struct device *dev; 44 struct device *dev;
45 char phys[32]; 45 char phys[32];
46 struct workqueue_struct *wq;
47 struct delayed_work work; 46 struct delayed_work work;
48 unsigned polling; /* polling is active */ 47 unsigned polling; /* polling is active */
49 struct ts_event tc; 48 struct ts_event tc;
@@ -220,8 +219,8 @@ done:
220 poll = 1; 219 poll = 1;
221 220
222 if (poll) { 221 if (poll) {
223 schd = queue_delayed_work(tsc->wq, &tsc->work, 222 schd = schedule_delayed_work(&tsc->work,
224 msecs_to_jiffies(tsc->poll_period)); 223 msecs_to_jiffies(tsc->poll_period));
225 if (schd) 224 if (schd)
226 tsc->polling = 1; 225 tsc->polling = 1;
227 else { 226 else {
@@ -303,7 +302,6 @@ static int tps6507x_ts_probe(struct platform_device *pdev)
303 tsc->input_dev = input_dev; 302 tsc->input_dev = input_dev;
304 303
305 INIT_DELAYED_WORK(&tsc->work, tps6507x_ts_handler); 304 INIT_DELAYED_WORK(&tsc->work, tps6507x_ts_handler);
306 tsc->wq = create_workqueue("TPS6507x Touchscreen");
307 305
308 if (init_data) { 306 if (init_data) {
309 tsc->poll_period = init_data->poll_period; 307 tsc->poll_period = init_data->poll_period;
@@ -325,8 +323,8 @@ static int tps6507x_ts_probe(struct platform_device *pdev)
325 if (error) 323 if (error)
326 goto err2; 324 goto err2;
327 325
328 schd = queue_delayed_work(tsc->wq, &tsc->work, 326 schd = schedule_delayed_work(&tsc->work,
329 msecs_to_jiffies(tsc->poll_period)); 327 msecs_to_jiffies(tsc->poll_period));
330 328
331 if (schd) 329 if (schd)
332 tsc->polling = 1; 330 tsc->polling = 1;
@@ -341,7 +339,6 @@ static int tps6507x_ts_probe(struct platform_device *pdev)
341 339
342err2: 340err2:
343 cancel_delayed_work_sync(&tsc->work); 341 cancel_delayed_work_sync(&tsc->work);
344 destroy_workqueue(tsc->wq);
345 input_free_device(input_dev); 342 input_free_device(input_dev);
346err1: 343err1:
347 kfree(tsc); 344 kfree(tsc);
@@ -357,7 +354,6 @@ static int __devexit tps6507x_ts_remove(struct platform_device *pdev)
357 struct input_dev *input_dev = tsc->input_dev; 354 struct input_dev *input_dev = tsc->input_dev;
358 355
359 cancel_delayed_work_sync(&tsc->work); 356 cancel_delayed_work_sync(&tsc->work);
360 destroy_workqueue(tsc->wq);
361 357
362 input_unregister_device(input_dev); 358 input_unregister_device(input_dev);
363 359
diff --git a/drivers/isdn/hardware/eicon/istream.c b/drivers/isdn/hardware/eicon/istream.c
index 18f8798442fa..7bd5baa547be 100644
--- a/drivers/isdn/hardware/eicon/istream.c
+++ b/drivers/isdn/hardware/eicon/istream.c
@@ -62,7 +62,7 @@ void diva_xdi_provide_istream_info (ADAPTER* a,
62 stream interface. 62 stream interface.
63 If synchronous service was requested, then function 63 If synchronous service was requested, then function
64 does return amount of data written to stream. 64 does return amount of data written to stream.
65 'final' does indicate that pice of data to be written is 65 'final' does indicate that piece of data to be written is
66 final part of frame (necessary only by structured datatransfer) 66 final part of frame (necessary only by structured datatransfer)
67 return 0 if zero lengh packet was written 67 return 0 if zero lengh packet was written
68 return -1 if stream is full 68 return -1 if stream is full
diff --git a/drivers/isdn/hisax/isdnl2.c b/drivers/isdn/hisax/isdnl2.c
index 0858791978d8..cfff0c41d298 100644
--- a/drivers/isdn/hisax/isdnl2.c
+++ b/drivers/isdn/hisax/isdnl2.c
@@ -1247,10 +1247,10 @@ static void
1247l2_pull_iqueue(struct FsmInst *fi, int event, void *arg) 1247l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
1248{ 1248{
1249 struct PStack *st = fi->userdata; 1249 struct PStack *st = fi->userdata;
1250 struct sk_buff *skb, *oskb; 1250 struct sk_buff *skb;
1251 struct Layer2 *l2 = &st->l2; 1251 struct Layer2 *l2 = &st->l2;
1252 u_char header[MAX_HEADER_LEN]; 1252 u_char header[MAX_HEADER_LEN];
1253 int i; 1253 int i, hdr_space_needed;
1254 int unsigned p1; 1254 int unsigned p1;
1255 u_long flags; 1255 u_long flags;
1256 1256
@@ -1261,6 +1261,16 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
1261 if (!skb) 1261 if (!skb)
1262 return; 1262 return;
1263 1263
1264 hdr_space_needed = l2headersize(l2, 0);
1265 if (hdr_space_needed > skb_headroom(skb)) {
1266 struct sk_buff *orig_skb = skb;
1267
1268 skb = skb_realloc_headroom(skb, hdr_space_needed);
1269 if (!skb) {
1270 dev_kfree_skb(orig_skb);
1271 return;
1272 }
1273 }
1264 spin_lock_irqsave(&l2->lock, flags); 1274 spin_lock_irqsave(&l2->lock, flags);
1265 if(test_bit(FLG_MOD128, &l2->flag)) 1275 if(test_bit(FLG_MOD128, &l2->flag))
1266 p1 = (l2->vs - l2->va) % 128; 1276 p1 = (l2->vs - l2->va) % 128;
@@ -1285,19 +1295,7 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
1285 l2->vs = (l2->vs + 1) % 8; 1295 l2->vs = (l2->vs + 1) % 8;
1286 } 1296 }
1287 spin_unlock_irqrestore(&l2->lock, flags); 1297 spin_unlock_irqrestore(&l2->lock, flags);
1288 p1 = skb->data - skb->head; 1298 memcpy(skb_push(skb, i), header, i);
1289 if (p1 >= i)
1290 memcpy(skb_push(skb, i), header, i);
1291 else {
1292 printk(KERN_WARNING
1293 "isdl2 pull_iqueue skb header(%d/%d) too short\n", i, p1);
1294 oskb = skb;
1295 skb = alloc_skb(oskb->len + i, GFP_ATOMIC);
1296 memcpy(skb_put(skb, i), header, i);
1297 skb_copy_from_linear_data(oskb,
1298 skb_put(skb, oskb->len), oskb->len);
1299 dev_kfree_skb(oskb);
1300 }
1301 st->l2.l2l1(st, PH_PULL | INDICATION, skb); 1299 st->l2.l2l1(st, PH_PULL | INDICATION, skb);
1302 test_and_clear_bit(FLG_ACK_PEND, &st->l2.flag); 1300 test_and_clear_bit(FLG_ACK_PEND, &st->l2.flag);
1303 if (!test_and_set_bit(FLG_T200_RUN, &st->l2.flag)) { 1301 if (!test_and_set_bit(FLG_T200_RUN, &st->l2.flag)) {
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 8a2f767f26d8..0ed7f6bc2a7f 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -216,7 +216,6 @@ static int linear_run (mddev_t *mddev)
216 216
217 if (md_check_no_bitmap(mddev)) 217 if (md_check_no_bitmap(mddev))
218 return -EINVAL; 218 return -EINVAL;
219 mddev->queue->queue_lock = &mddev->queue->__queue_lock;
220 conf = linear_conf(mddev, mddev->raid_disks); 219 conf = linear_conf(mddev, mddev->raid_disks);
221 220
222 if (!conf) 221 if (!conf)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0cc30ecda4c1..d5ad7723b172 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -553,6 +553,9 @@ static mddev_t * mddev_find(dev_t unit)
553{ 553{
554 mddev_t *mddev, *new = NULL; 554 mddev_t *mddev, *new = NULL;
555 555
556 if (unit && MAJOR(unit) != MD_MAJOR)
557 unit &= ~((1<<MdpMinorShift)-1);
558
556 retry: 559 retry:
557 spin_lock(&all_mddevs_lock); 560 spin_lock(&all_mddevs_lock);
558 561
@@ -4138,10 +4141,10 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
4138 } 4141 }
4139 4142
4140 mddev->array_sectors = sectors; 4143 mddev->array_sectors = sectors;
4141 set_capacity(mddev->gendisk, mddev->array_sectors); 4144 if (mddev->pers) {
4142 if (mddev->pers) 4145 set_capacity(mddev->gendisk, mddev->array_sectors);
4143 revalidate_disk(mddev->gendisk); 4146 revalidate_disk(mddev->gendisk);
4144 4147 }
4145 return len; 4148 return len;
4146} 4149}
4147 4150
@@ -4624,6 +4627,7 @@ static int do_md_run(mddev_t *mddev)
4624 } 4627 }
4625 set_capacity(mddev->gendisk, mddev->array_sectors); 4628 set_capacity(mddev->gendisk, mddev->array_sectors);
4626 revalidate_disk(mddev->gendisk); 4629 revalidate_disk(mddev->gendisk);
4630 mddev->changed = 1;
4627 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 4631 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4628out: 4632out:
4629 return err; 4633 return err;
@@ -4712,6 +4716,7 @@ static void md_clean(mddev_t *mddev)
4712 mddev->sync_speed_min = mddev->sync_speed_max = 0; 4716 mddev->sync_speed_min = mddev->sync_speed_max = 0;
4713 mddev->recovery = 0; 4717 mddev->recovery = 0;
4714 mddev->in_sync = 0; 4718 mddev->in_sync = 0;
4719 mddev->changed = 0;
4715 mddev->degraded = 0; 4720 mddev->degraded = 0;
4716 mddev->safemode = 0; 4721 mddev->safemode = 0;
4717 mddev->bitmap_info.offset = 0; 4722 mddev->bitmap_info.offset = 0;
@@ -4827,6 +4832,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4827 4832
4828 set_capacity(disk, 0); 4833 set_capacity(disk, 0);
4829 mutex_unlock(&mddev->open_mutex); 4834 mutex_unlock(&mddev->open_mutex);
4835 mddev->changed = 1;
4830 revalidate_disk(disk); 4836 revalidate_disk(disk);
4831 4837
4832 if (mddev->ro) 4838 if (mddev->ro)
@@ -6011,7 +6017,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
6011 atomic_inc(&mddev->openers); 6017 atomic_inc(&mddev->openers);
6012 mutex_unlock(&mddev->open_mutex); 6018 mutex_unlock(&mddev->open_mutex);
6013 6019
6014 check_disk_size_change(mddev->gendisk, bdev); 6020 check_disk_change(bdev);
6015 out: 6021 out:
6016 return err; 6022 return err;
6017} 6023}
@@ -6026,6 +6032,21 @@ static int md_release(struct gendisk *disk, fmode_t mode)
6026 6032
6027 return 0; 6033 return 0;
6028} 6034}
6035
6036static int md_media_changed(struct gendisk *disk)
6037{
6038 mddev_t *mddev = disk->private_data;
6039
6040 return mddev->changed;
6041}
6042
6043static int md_revalidate(struct gendisk *disk)
6044{
6045 mddev_t *mddev = disk->private_data;
6046
6047 mddev->changed = 0;
6048 return 0;
6049}
6029static const struct block_device_operations md_fops = 6050static const struct block_device_operations md_fops =
6030{ 6051{
6031 .owner = THIS_MODULE, 6052 .owner = THIS_MODULE,
@@ -6036,6 +6057,8 @@ static const struct block_device_operations md_fops =
6036 .compat_ioctl = md_compat_ioctl, 6057 .compat_ioctl = md_compat_ioctl,
6037#endif 6058#endif
6038 .getgeo = md_getgeo, 6059 .getgeo = md_getgeo,
6060 .media_changed = md_media_changed,
6061 .revalidate_disk= md_revalidate,
6039}; 6062};
6040 6063
6041static int md_thread(void * arg) 6064static int md_thread(void * arg)
@@ -7338,7 +7361,7 @@ static int __init md_init(void)
7338{ 7361{
7339 int ret = -ENOMEM; 7362 int ret = -ENOMEM;
7340 7363
7341 md_wq = alloc_workqueue("md", WQ_RESCUER, 0); 7364 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
7342 if (!md_wq) 7365 if (!md_wq)
7343 goto err_wq; 7366 goto err_wq;
7344 7367
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 7e90b8593b2a..12215d437fcc 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -274,6 +274,8 @@ struct mddev_s
274 atomic_t active; /* general refcount */ 274 atomic_t active; /* general refcount */
275 atomic_t openers; /* number of active opens */ 275 atomic_t openers; /* number of active opens */
276 276
277 int changed; /* True if we might need to
278 * reread partition info */
277 int degraded; /* whether md should consider 279 int degraded; /* whether md should consider
278 * adding a spare 280 * adding a spare
279 */ 281 */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 6d7ddf32ef2e..3a62d440e27b 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -435,7 +435,6 @@ static int multipath_run (mddev_t *mddev)
435 * bookkeeping area. [whatever we allocate in multipath_run(), 435 * bookkeeping area. [whatever we allocate in multipath_run(),
436 * should be freed in multipath_stop()] 436 * should be freed in multipath_stop()]
437 */ 437 */
438 mddev->queue->queue_lock = &mddev->queue->__queue_lock;
439 438
440 conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL); 439 conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
441 mddev->private = conf; 440 mddev->private = conf;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 637a96855edb..c0ac457f1218 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -361,7 +361,6 @@ static int raid0_run(mddev_t *mddev)
361 if (md_check_no_bitmap(mddev)) 361 if (md_check_no_bitmap(mddev))
362 return -EINVAL; 362 return -EINVAL;
363 blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); 363 blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
364 mddev->queue->queue_lock = &mddev->queue->__queue_lock;
365 364
366 /* if private is not null, we are here after takeover */ 365 /* if private is not null, we are here after takeover */
367 if (mddev->private == NULL) { 366 if (mddev->private == NULL) {
@@ -670,6 +669,7 @@ static void *raid0_takeover_raid1(mddev_t *mddev)
670 mddev->new_layout = 0; 669 mddev->new_layout = 0;
671 mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */ 670 mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
672 mddev->delta_disks = 1 - mddev->raid_disks; 671 mddev->delta_disks = 1 - mddev->raid_disks;
672 mddev->raid_disks = 1;
673 /* make sure it will be not marked as dirty */ 673 /* make sure it will be not marked as dirty */
674 mddev->recovery_cp = MaxSector; 674 mddev->recovery_cp = MaxSector;
675 675
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a23ffa397ba9..06cd712807d0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -593,7 +593,10 @@ static int flush_pending_writes(conf_t *conf)
593 if (conf->pending_bio_list.head) { 593 if (conf->pending_bio_list.head) {
594 struct bio *bio; 594 struct bio *bio;
595 bio = bio_list_get(&conf->pending_bio_list); 595 bio = bio_list_get(&conf->pending_bio_list);
596 /* Only take the spinlock to quiet a warning */
597 spin_lock(conf->mddev->queue->queue_lock);
596 blk_remove_plug(conf->mddev->queue); 598 blk_remove_plug(conf->mddev->queue);
599 spin_unlock(conf->mddev->queue->queue_lock);
597 spin_unlock_irq(&conf->device_lock); 600 spin_unlock_irq(&conf->device_lock);
598 /* flush any pending bitmap writes to 601 /* flush any pending bitmap writes to
599 * disk before proceeding w/ I/O */ 602 * disk before proceeding w/ I/O */
@@ -959,7 +962,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
959 atomic_inc(&r1_bio->remaining); 962 atomic_inc(&r1_bio->remaining);
960 spin_lock_irqsave(&conf->device_lock, flags); 963 spin_lock_irqsave(&conf->device_lock, flags);
961 bio_list_add(&conf->pending_bio_list, mbio); 964 bio_list_add(&conf->pending_bio_list, mbio);
962 blk_plug_device(mddev->queue); 965 blk_plug_device_unlocked(mddev->queue);
963 spin_unlock_irqrestore(&conf->device_lock, flags); 966 spin_unlock_irqrestore(&conf->device_lock, flags);
964 } 967 }
965 r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); 968 r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
@@ -2021,7 +2024,6 @@ static int run(mddev_t *mddev)
2021 if (IS_ERR(conf)) 2024 if (IS_ERR(conf))
2022 return PTR_ERR(conf); 2025 return PTR_ERR(conf);
2023 2026
2024 mddev->queue->queue_lock = &conf->device_lock;
2025 list_for_each_entry(rdev, &mddev->disks, same_set) { 2027 list_for_each_entry(rdev, &mddev->disks, same_set) {
2026 disk_stack_limits(mddev->gendisk, rdev->bdev, 2028 disk_stack_limits(mddev->gendisk, rdev->bdev,
2027 rdev->data_offset << 9); 2029 rdev->data_offset << 9);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3b607b28741b..747d061d8e05 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -662,7 +662,10 @@ static int flush_pending_writes(conf_t *conf)
662 if (conf->pending_bio_list.head) { 662 if (conf->pending_bio_list.head) {
663 struct bio *bio; 663 struct bio *bio;
664 bio = bio_list_get(&conf->pending_bio_list); 664 bio = bio_list_get(&conf->pending_bio_list);
665 /* Spinlock only taken to quiet a warning */
666 spin_lock(conf->mddev->queue->queue_lock);
665 blk_remove_plug(conf->mddev->queue); 667 blk_remove_plug(conf->mddev->queue);
668 spin_unlock(conf->mddev->queue->queue_lock);
666 spin_unlock_irq(&conf->device_lock); 669 spin_unlock_irq(&conf->device_lock);
667 /* flush any pending bitmap writes to disk 670 /* flush any pending bitmap writes to disk
668 * before proceeding w/ I/O */ 671 * before proceeding w/ I/O */
@@ -971,7 +974,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
971 atomic_inc(&r10_bio->remaining); 974 atomic_inc(&r10_bio->remaining);
972 spin_lock_irqsave(&conf->device_lock, flags); 975 spin_lock_irqsave(&conf->device_lock, flags);
973 bio_list_add(&conf->pending_bio_list, mbio); 976 bio_list_add(&conf->pending_bio_list, mbio);
974 blk_plug_device(mddev->queue); 977 blk_plug_device_unlocked(mddev->queue);
975 spin_unlock_irqrestore(&conf->device_lock, flags); 978 spin_unlock_irqrestore(&conf->device_lock, flags);
976 } 979 }
977 980
@@ -2304,8 +2307,6 @@ static int run(mddev_t *mddev)
2304 if (!conf) 2307 if (!conf)
2305 goto out; 2308 goto out;
2306 2309
2307 mddev->queue->queue_lock = &conf->device_lock;
2308
2309 mddev->thread = conf->thread; 2310 mddev->thread = conf->thread;
2310 conf->thread = NULL; 2311 conf->thread = NULL;
2311 2312
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 702812824195..78536fdbd87f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5204,7 +5204,6 @@ static int run(mddev_t *mddev)
5204 5204
5205 mddev->queue->backing_dev_info.congested_data = mddev; 5205 mddev->queue->backing_dev_info.congested_data = mddev;
5206 mddev->queue->backing_dev_info.congested_fn = raid5_congested; 5206 mddev->queue->backing_dev_info.congested_fn = raid5_congested;
5207 mddev->queue->queue_lock = &conf->device_lock;
5208 mddev->queue->unplug_fn = raid5_unplug_queue; 5207 mddev->queue->unplug_fn = raid5_unplug_queue;
5209 5208
5210 chunk_size = mddev->chunk_sectors << 9; 5209 chunk_size = mddev->chunk_sectors << 9;
diff --git a/drivers/media/common/tuners/tda8290.c b/drivers/media/common/tuners/tda8290.c
index bc6a67768af1..8c4852114eeb 100644
--- a/drivers/media/common/tuners/tda8290.c
+++ b/drivers/media/common/tuners/tda8290.c
@@ -658,13 +658,13 @@ static int tda8290_probe(struct tuner_i2c_props *i2c_props)
658#define TDA8290_ID 0x89 658#define TDA8290_ID 0x89
659 u8 reg = 0x1f, id; 659 u8 reg = 0x1f, id;
660 struct i2c_msg msg_read[] = { 660 struct i2c_msg msg_read[] = {
661 { .addr = 0x4b, .flags = 0, .len = 1, .buf = &reg }, 661 { .addr = i2c_props->addr, .flags = 0, .len = 1, .buf = &reg },
662 { .addr = 0x4b, .flags = I2C_M_RD, .len = 1, .buf = &id }, 662 { .addr = i2c_props->addr, .flags = I2C_M_RD, .len = 1, .buf = &id },
663 }; 663 };
664 664
665 /* detect tda8290 */ 665 /* detect tda8290 */
666 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) { 666 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) {
667 printk(KERN_WARNING "%s: tda8290 couldn't read register 0x%02x\n", 667 printk(KERN_WARNING "%s: couldn't read register 0x%02x\n",
668 __func__, reg); 668 __func__, reg);
669 return -ENODEV; 669 return -ENODEV;
670 } 670 }
@@ -685,13 +685,13 @@ static int tda8295_probe(struct tuner_i2c_props *i2c_props)
685#define TDA8295C2_ID 0x8b 685#define TDA8295C2_ID 0x8b
686 u8 reg = 0x2f, id; 686 u8 reg = 0x2f, id;
687 struct i2c_msg msg_read[] = { 687 struct i2c_msg msg_read[] = {
688 { .addr = 0x4b, .flags = 0, .len = 1, .buf = &reg }, 688 { .addr = i2c_props->addr, .flags = 0, .len = 1, .buf = &reg },
689 { .addr = 0x4b, .flags = I2C_M_RD, .len = 1, .buf = &id }, 689 { .addr = i2c_props->addr, .flags = I2C_M_RD, .len = 1, .buf = &id },
690 }; 690 };
691 691
692 /* detect tda8290 */ 692 /* detect tda8295 */
693 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) { 693 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) {
694 printk(KERN_WARNING "%s: tda8290 couldn't read register 0x%02x\n", 694 printk(KERN_WARNING "%s: couldn't read register 0x%02x\n",
695 __func__, reg); 695 __func__, reg);
696 return -ENODEV; 696 return -ENODEV;
697 } 697 }
diff --git a/drivers/media/dvb/dvb-usb/dib0700_devices.c b/drivers/media/dvb/dvb-usb/dib0700_devices.c
index defd83964ce2..193cdb77b76a 100644
--- a/drivers/media/dvb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/dvb/dvb-usb/dib0700_devices.c
@@ -870,6 +870,23 @@ static int dib7070p_tuner_attach(struct dvb_usb_adapter *adap)
870 return 0; 870 return 0;
871} 871}
872 872
873static int stk7700p_pid_filter(struct dvb_usb_adapter *adapter, int index,
874 u16 pid, int onoff)
875{
876 struct dib0700_state *st = adapter->dev->priv;
877 if (st->is_dib7000pc)
878 return dib7000p_pid_filter(adapter->fe, index, pid, onoff);
879 return dib7000m_pid_filter(adapter->fe, index, pid, onoff);
880}
881
882static int stk7700p_pid_filter_ctrl(struct dvb_usb_adapter *adapter, int onoff)
883{
884 struct dib0700_state *st = adapter->dev->priv;
885 if (st->is_dib7000pc)
886 return dib7000p_pid_filter_ctrl(adapter->fe, onoff);
887 return dib7000m_pid_filter_ctrl(adapter->fe, onoff);
888}
889
873static int stk70x0p_pid_filter(struct dvb_usb_adapter *adapter, int index, u16 pid, int onoff) 890static int stk70x0p_pid_filter(struct dvb_usb_adapter *adapter, int index, u16 pid, int onoff)
874{ 891{
875 return dib7000p_pid_filter(adapter->fe, index, pid, onoff); 892 return dib7000p_pid_filter(adapter->fe, index, pid, onoff);
@@ -1875,8 +1892,8 @@ struct dvb_usb_device_properties dib0700_devices[] = {
1875 { 1892 {
1876 .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, 1893 .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF,
1877 .pid_filter_count = 32, 1894 .pid_filter_count = 32,
1878 .pid_filter = stk70x0p_pid_filter, 1895 .pid_filter = stk7700p_pid_filter,
1879 .pid_filter_ctrl = stk70x0p_pid_filter_ctrl, 1896 .pid_filter_ctrl = stk7700p_pid_filter_ctrl,
1880 .frontend_attach = stk7700p_frontend_attach, 1897 .frontend_attach = stk7700p_frontend_attach,
1881 .tuner_attach = stk7700p_tuner_attach, 1898 .tuner_attach = stk7700p_tuner_attach,
1882 1899
diff --git a/drivers/media/dvb/dvb-usb/lmedm04.c b/drivers/media/dvb/dvb-usb/lmedm04.c
index 9eea4188303b..46ccd01a7696 100644
--- a/drivers/media/dvb/dvb-usb/lmedm04.c
+++ b/drivers/media/dvb/dvb-usb/lmedm04.c
@@ -659,7 +659,7 @@ static int lme2510_download_firmware(struct usb_device *dev,
659} 659}
660 660
661/* Default firmware for LME2510C */ 661/* Default firmware for LME2510C */
662const char lme_firmware[50] = "dvb-usb-lme2510c-s7395.fw"; 662char lme_firmware[50] = "dvb-usb-lme2510c-s7395.fw";
663 663
664static void lme_coldreset(struct usb_device *dev) 664static void lme_coldreset(struct usb_device *dev)
665{ 665{
@@ -1006,7 +1006,7 @@ static struct dvb_usb_device_properties lme2510c_properties = {
1006 .caps = DVB_USB_IS_AN_I2C_ADAPTER, 1006 .caps = DVB_USB_IS_AN_I2C_ADAPTER,
1007 .usb_ctrl = DEVICE_SPECIFIC, 1007 .usb_ctrl = DEVICE_SPECIFIC,
1008 .download_firmware = lme2510_download_firmware, 1008 .download_firmware = lme2510_download_firmware,
1009 .firmware = lme_firmware, 1009 .firmware = (const char *)&lme_firmware,
1010 .size_of_priv = sizeof(struct lme2510_state), 1010 .size_of_priv = sizeof(struct lme2510_state),
1011 .num_adapters = 1, 1011 .num_adapters = 1,
1012 .adapter = { 1012 .adapter = {
@@ -1109,5 +1109,5 @@ module_exit(lme2510_module_exit);
1109 1109
1110MODULE_AUTHOR("Malcolm Priestley <tvboxspy@gmail.com>"); 1110MODULE_AUTHOR("Malcolm Priestley <tvboxspy@gmail.com>");
1111MODULE_DESCRIPTION("LME2510(C) DVB-S USB2.0"); 1111MODULE_DESCRIPTION("LME2510(C) DVB-S USB2.0");
1112MODULE_VERSION("1.74"); 1112MODULE_VERSION("1.75");
1113MODULE_LICENSE("GPL"); 1113MODULE_LICENSE("GPL");
diff --git a/drivers/media/dvb/frontends/dib7000m.c b/drivers/media/dvb/frontends/dib7000m.c
index c7f5ccf54aa5..289a79837f24 100644
--- a/drivers/media/dvb/frontends/dib7000m.c
+++ b/drivers/media/dvb/frontends/dib7000m.c
@@ -1285,6 +1285,25 @@ struct i2c_adapter * dib7000m_get_i2c_master(struct dvb_frontend *demod, enum di
1285} 1285}
1286EXPORT_SYMBOL(dib7000m_get_i2c_master); 1286EXPORT_SYMBOL(dib7000m_get_i2c_master);
1287 1287
1288int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe, u8 onoff)
1289{
1290 struct dib7000m_state *state = fe->demodulator_priv;
1291 u16 val = dib7000m_read_word(state, 294 + state->reg_offs) & 0xffef;
1292 val |= (onoff & 0x1) << 4;
1293 dprintk("PID filter enabled %d", onoff);
1294 return dib7000m_write_word(state, 294 + state->reg_offs, val);
1295}
1296EXPORT_SYMBOL(dib7000m_pid_filter_ctrl);
1297
1298int dib7000m_pid_filter(struct dvb_frontend *fe, u8 id, u16 pid, u8 onoff)
1299{
1300 struct dib7000m_state *state = fe->demodulator_priv;
1301 dprintk("PID filter: index %x, PID %d, OnOff %d", id, pid, onoff);
1302 return dib7000m_write_word(state, 300 + state->reg_offs + id,
1303 onoff ? (1 << 13) | pid : 0);
1304}
1305EXPORT_SYMBOL(dib7000m_pid_filter);
1306
1288#if 0 1307#if 0
1289/* used with some prototype boards */ 1308/* used with some prototype boards */
1290int dib7000m_i2c_enumeration(struct i2c_adapter *i2c, int no_of_demods, 1309int dib7000m_i2c_enumeration(struct i2c_adapter *i2c, int no_of_demods,
diff --git a/drivers/media/dvb/frontends/dib7000m.h b/drivers/media/dvb/frontends/dib7000m.h
index 113819ce9f0d..81fcf2241c64 100644
--- a/drivers/media/dvb/frontends/dib7000m.h
+++ b/drivers/media/dvb/frontends/dib7000m.h
@@ -46,6 +46,8 @@ extern struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap,
46extern struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *, 46extern struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *,
47 enum dibx000_i2c_interface, 47 enum dibx000_i2c_interface,
48 int); 48 int);
49extern int dib7000m_pid_filter(struct dvb_frontend *, u8 id, u16 pid, u8 onoff);
50extern int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe, u8 onoff);
49#else 51#else
50static inline 52static inline
51struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap, 53struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap,
@@ -63,6 +65,19 @@ struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *demod,
63 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__); 65 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
64 return NULL; 66 return NULL;
65} 67}
68static inline int dib7000m_pid_filter(struct dvb_frontend *fe, u8 id,
69 u16 pid, u8 onoff)
70{
71 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
72 return -ENODEV;
73}
74
75static inline int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe,
76 uint8_t onoff)
77{
78 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
79 return -ENODEV;
80}
66#endif 81#endif
67 82
68/* TODO 83/* TODO
diff --git a/drivers/media/dvb/mantis/mantis_pci.c b/drivers/media/dvb/mantis/mantis_pci.c
index 59feeb84aec7..10a432a79d00 100644
--- a/drivers/media/dvb/mantis/mantis_pci.c
+++ b/drivers/media/dvb/mantis/mantis_pci.c
@@ -22,7 +22,6 @@
22#include <linux/moduleparam.h> 22#include <linux/moduleparam.h>
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <asm/io.h> 24#include <asm/io.h>
25#include <asm/pgtable.h>
26#include <asm/page.h> 25#include <asm/page.h>
27#include <linux/kmod.h> 26#include <linux/kmod.h>
28#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
diff --git a/drivers/media/rc/ir-raw.c b/drivers/media/rc/ir-raw.c
index 73230ff93b8a..01f258a2a57a 100644
--- a/drivers/media/rc/ir-raw.c
+++ b/drivers/media/rc/ir-raw.c
@@ -112,7 +112,7 @@ int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type)
112{ 112{
113 ktime_t now; 113 ktime_t now;
114 s64 delta; /* ns */ 114 s64 delta; /* ns */
115 struct ir_raw_event ev; 115 DEFINE_IR_RAW_EVENT(ev);
116 int rc = 0; 116 int rc = 0;
117 117
118 if (!dev->raw) 118 if (!dev->raw)
@@ -125,7 +125,6 @@ int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type)
125 * being called for the first time, note that delta can't 125 * being called for the first time, note that delta can't
126 * possibly be negative. 126 * possibly be negative.
127 */ 127 */
128 ev.duration = 0;
129 if (delta > IR_MAX_DURATION || !dev->raw->last_type) 128 if (delta > IR_MAX_DURATION || !dev->raw->last_type)
130 type |= IR_START_EVENT; 129 type |= IR_START_EVENT;
131 else 130 else
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
index 6df0a4980645..e4f8eac7f717 100644
--- a/drivers/media/rc/mceusb.c
+++ b/drivers/media/rc/mceusb.c
@@ -148,6 +148,7 @@ enum mceusb_model_type {
148 MCE_GEN2_TX_INV, 148 MCE_GEN2_TX_INV,
149 POLARIS_EVK, 149 POLARIS_EVK,
150 CX_HYBRID_TV, 150 CX_HYBRID_TV,
151 MULTIFUNCTION,
151}; 152};
152 153
153struct mceusb_model { 154struct mceusb_model {
@@ -155,9 +156,10 @@ struct mceusb_model {
155 u32 mce_gen2:1; 156 u32 mce_gen2:1;
156 u32 mce_gen3:1; 157 u32 mce_gen3:1;
157 u32 tx_mask_normal:1; 158 u32 tx_mask_normal:1;
158 u32 is_polaris:1;
159 u32 no_tx:1; 159 u32 no_tx:1;
160 160
161 int ir_intfnum;
162
161 const char *rc_map; /* Allow specify a per-board map */ 163 const char *rc_map; /* Allow specify a per-board map */
162 const char *name; /* per-board name */ 164 const char *name; /* per-board name */
163}; 165};
@@ -179,7 +181,6 @@ static const struct mceusb_model mceusb_model[] = {
179 .tx_mask_normal = 1, 181 .tx_mask_normal = 1,
180 }, 182 },
181 [POLARIS_EVK] = { 183 [POLARIS_EVK] = {
182 .is_polaris = 1,
183 /* 184 /*
184 * In fact, the EVK is shipped without 185 * In fact, the EVK is shipped without
185 * remotes, but we should have something handy, 186 * remotes, but we should have something handy,
@@ -189,10 +190,13 @@ static const struct mceusb_model mceusb_model[] = {
189 .name = "Conexant Hybrid TV (cx231xx) MCE IR", 190 .name = "Conexant Hybrid TV (cx231xx) MCE IR",
190 }, 191 },
191 [CX_HYBRID_TV] = { 192 [CX_HYBRID_TV] = {
192 .is_polaris = 1,
193 .no_tx = 1, /* tx isn't wired up at all */ 193 .no_tx = 1, /* tx isn't wired up at all */
194 .name = "Conexant Hybrid TV (cx231xx) MCE IR", 194 .name = "Conexant Hybrid TV (cx231xx) MCE IR",
195 }, 195 },
196 [MULTIFUNCTION] = {
197 .mce_gen2 = 1,
198 .ir_intfnum = 2,
199 },
196}; 200};
197 201
198static struct usb_device_id mceusb_dev_table[] = { 202static struct usb_device_id mceusb_dev_table[] = {
@@ -216,8 +220,9 @@ static struct usb_device_id mceusb_dev_table[] = {
216 { USB_DEVICE(VENDOR_PHILIPS, 0x206c) }, 220 { USB_DEVICE(VENDOR_PHILIPS, 0x206c) },
217 /* Philips/Spinel plus IR transceiver for ASUS */ 221 /* Philips/Spinel plus IR transceiver for ASUS */
218 { USB_DEVICE(VENDOR_PHILIPS, 0x2088) }, 222 { USB_DEVICE(VENDOR_PHILIPS, 0x2088) },
219 /* Realtek MCE IR Receiver */ 223 /* Realtek MCE IR Receiver and card reader */
220 { USB_DEVICE(VENDOR_REALTEK, 0x0161) }, 224 { USB_DEVICE(VENDOR_REALTEK, 0x0161),
225 .driver_info = MULTIFUNCTION },
221 /* SMK/Toshiba G83C0004D410 */ 226 /* SMK/Toshiba G83C0004D410 */
222 { USB_DEVICE(VENDOR_SMK, 0x031d), 227 { USB_DEVICE(VENDOR_SMK, 0x031d),
223 .driver_info = MCE_GEN2_TX_INV }, 228 .driver_info = MCE_GEN2_TX_INV },
@@ -1101,7 +1106,7 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1101 bool is_gen3; 1106 bool is_gen3;
1102 bool is_microsoft_gen1; 1107 bool is_microsoft_gen1;
1103 bool tx_mask_normal; 1108 bool tx_mask_normal;
1104 bool is_polaris; 1109 int ir_intfnum;
1105 1110
1106 dev_dbg(&intf->dev, "%s called\n", __func__); 1111 dev_dbg(&intf->dev, "%s called\n", __func__);
1107 1112
@@ -1110,13 +1115,11 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1110 is_gen3 = mceusb_model[model].mce_gen3; 1115 is_gen3 = mceusb_model[model].mce_gen3;
1111 is_microsoft_gen1 = mceusb_model[model].mce_gen1; 1116 is_microsoft_gen1 = mceusb_model[model].mce_gen1;
1112 tx_mask_normal = mceusb_model[model].tx_mask_normal; 1117 tx_mask_normal = mceusb_model[model].tx_mask_normal;
1113 is_polaris = mceusb_model[model].is_polaris; 1118 ir_intfnum = mceusb_model[model].ir_intfnum;
1114 1119
1115 if (is_polaris) { 1120 /* There are multi-function devices with non-IR interfaces */
1116 /* Interface 0 is IR */ 1121 if (idesc->desc.bInterfaceNumber != ir_intfnum)
1117 if (idesc->desc.bInterfaceNumber) 1122 return -ENODEV;
1118 return -ENODEV;
1119 }
1120 1123
1121 /* step through the endpoints to find first bulk in and out endpoint */ 1124 /* step through the endpoints to find first bulk in and out endpoint */
1122 for (i = 0; i < idesc->desc.bNumEndpoints; ++i) { 1125 for (i = 0; i < idesc->desc.bNumEndpoints; ++i) {
diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c
index 273d9d674792..d4d64492a057 100644
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -385,8 +385,9 @@ static void nvt_cir_regs_init(struct nvt_dev *nvt)
385 385
386static void nvt_cir_wake_regs_init(struct nvt_dev *nvt) 386static void nvt_cir_wake_regs_init(struct nvt_dev *nvt)
387{ 387{
388 /* set number of bytes needed for wake key comparison (default 67) */ 388 /* set number of bytes needed for wake from s3 (default 65) */
389 nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_LEN, CIR_WAKE_FIFO_CMP_DEEP); 389 nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_CMP_BYTES,
390 CIR_WAKE_FIFO_CMP_DEEP);
390 391
391 /* set tolerance/variance allowed per byte during wake compare */ 392 /* set tolerance/variance allowed per byte during wake compare */
392 nvt_cir_wake_reg_write(nvt, CIR_WAKE_CMP_TOLERANCE, 393 nvt_cir_wake_reg_write(nvt, CIR_WAKE_CMP_TOLERANCE,
diff --git a/drivers/media/rc/nuvoton-cir.h b/drivers/media/rc/nuvoton-cir.h
index 1df82351cb03..048135eea702 100644
--- a/drivers/media/rc/nuvoton-cir.h
+++ b/drivers/media/rc/nuvoton-cir.h
@@ -305,8 +305,11 @@ struct nvt_dev {
305#define CIR_WAKE_IRFIFOSTS_RX_EMPTY 0x20 305#define CIR_WAKE_IRFIFOSTS_RX_EMPTY 0x20
306#define CIR_WAKE_IRFIFOSTS_RX_FULL 0x10 306#define CIR_WAKE_IRFIFOSTS_RX_FULL 0x10
307 307
308/* CIR Wake FIFO buffer is 67 bytes long */ 308/*
309#define CIR_WAKE_FIFO_LEN 67 309 * The CIR Wake FIFO buffer is 67 bytes long, but the stock remote wakes
310 * the system comparing only 65 bytes (fails with this set to 67)
311 */
312#define CIR_WAKE_FIFO_CMP_BYTES 65
310/* CIR Wake byte comparison tolerance */ 313/* CIR Wake byte comparison tolerance */
311#define CIR_WAKE_CMP_TOLERANCE 5 314#define CIR_WAKE_CMP_TOLERANCE 5
312 315
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 512a2f4ada0e..5b4422ef4e6d 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -850,7 +850,7 @@ static ssize_t store_protocols(struct device *device,
850 count++; 850 count++;
851 } else { 851 } else {
852 for (i = 0; i < ARRAY_SIZE(proto_names); i++) { 852 for (i = 0; i < ARRAY_SIZE(proto_names); i++) {
853 if (!strncasecmp(tmp, proto_names[i].name, strlen(proto_names[i].name))) { 853 if (!strcasecmp(tmp, proto_names[i].name)) {
854 tmp += strlen(proto_names[i].name); 854 tmp += strlen(proto_names[i].name);
855 mask = proto_names[i].type; 855 mask = proto_names[i].type;
856 break; 856 break;
diff --git a/drivers/media/video/au0828/au0828-video.c b/drivers/media/video/au0828/au0828-video.c
index e41e4ad5cc40..9c475c600fc9 100644
--- a/drivers/media/video/au0828/au0828-video.c
+++ b/drivers/media/video/au0828/au0828-video.c
@@ -1758,7 +1758,12 @@ static int vidioc_reqbufs(struct file *file, void *priv,
1758 if (rc < 0) 1758 if (rc < 0)
1759 return rc; 1759 return rc;
1760 1760
1761 return videobuf_reqbufs(&fh->vb_vidq, rb); 1761 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1762 rc = videobuf_reqbufs(&fh->vb_vidq, rb);
1763 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1764 rc = videobuf_reqbufs(&fh->vb_vbiq, rb);
1765
1766 return rc;
1762} 1767}
1763 1768
1764static int vidioc_querybuf(struct file *file, void *priv, 1769static int vidioc_querybuf(struct file *file, void *priv,
@@ -1772,7 +1777,12 @@ static int vidioc_querybuf(struct file *file, void *priv,
1772 if (rc < 0) 1777 if (rc < 0)
1773 return rc; 1778 return rc;
1774 1779
1775 return videobuf_querybuf(&fh->vb_vidq, b); 1780 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1781 rc = videobuf_querybuf(&fh->vb_vidq, b);
1782 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1783 rc = videobuf_querybuf(&fh->vb_vbiq, b);
1784
1785 return rc;
1776} 1786}
1777 1787
1778static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b) 1788static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b)
@@ -1785,7 +1795,12 @@ static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b)
1785 if (rc < 0) 1795 if (rc < 0)
1786 return rc; 1796 return rc;
1787 1797
1788 return videobuf_qbuf(&fh->vb_vidq, b); 1798 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1799 rc = videobuf_qbuf(&fh->vb_vidq, b);
1800 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1801 rc = videobuf_qbuf(&fh->vb_vbiq, b);
1802
1803 return rc;
1789} 1804}
1790 1805
1791static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b) 1806static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
@@ -1806,7 +1821,12 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
1806 dev->greenscreen_detected = 0; 1821 dev->greenscreen_detected = 0;
1807 } 1822 }
1808 1823
1809 return videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK); 1824 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1825 rc = videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK);
1826 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1827 rc = videobuf_dqbuf(&fh->vb_vbiq, b, file->f_flags & O_NONBLOCK);
1828
1829 return rc;
1810} 1830}
1811 1831
1812static struct v4l2_file_operations au0828_v4l_fops = { 1832static struct v4l2_file_operations au0828_v4l_fops = {
diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c
index 87177733cf92..68ad1963f421 100644
--- a/drivers/media/video/cx18/cx18-cards.c
+++ b/drivers/media/video/cx18/cx18-cards.c
@@ -95,6 +95,53 @@ static const struct cx18_card cx18_card_hvr1600_esmt = {
95 .i2c = &cx18_i2c_std, 95 .i2c = &cx18_i2c_std,
96}; 96};
97 97
98static const struct cx18_card cx18_card_hvr1600_s5h1411 = {
99 .type = CX18_CARD_HVR_1600_S5H1411,
100 .name = "Hauppauge HVR-1600",
101 .comment = "Simultaneous Digital and Analog TV capture supported\n",
102 .v4l2_capabilities = CX18_CAP_ENCODER,
103 .hw_audio_ctrl = CX18_HW_418_AV,
104 .hw_muxer = CX18_HW_CS5345,
105 .hw_all = CX18_HW_TVEEPROM | CX18_HW_418_AV | CX18_HW_TUNER |
106 CX18_HW_CS5345 | CX18_HW_DVB | CX18_HW_GPIO_RESET_CTRL |
107 CX18_HW_Z8F0811_IR_HAUP,
108 .video_inputs = {
109 { CX18_CARD_INPUT_VID_TUNER, 0, CX18_AV_COMPOSITE7 },
110 { CX18_CARD_INPUT_SVIDEO1, 1, CX18_AV_SVIDEO1 },
111 { CX18_CARD_INPUT_COMPOSITE1, 1, CX18_AV_COMPOSITE3 },
112 { CX18_CARD_INPUT_SVIDEO2, 2, CX18_AV_SVIDEO2 },
113 { CX18_CARD_INPUT_COMPOSITE2, 2, CX18_AV_COMPOSITE4 },
114 },
115 .audio_inputs = {
116 { CX18_CARD_INPUT_AUD_TUNER,
117 CX18_AV_AUDIO8, CS5345_IN_1 | CS5345_MCLK_1_5 },
118 { CX18_CARD_INPUT_LINE_IN1,
119 CX18_AV_AUDIO_SERIAL1, CS5345_IN_2 },
120 { CX18_CARD_INPUT_LINE_IN2,
121 CX18_AV_AUDIO_SERIAL1, CS5345_IN_3 },
122 },
123 .radio_input = { CX18_CARD_INPUT_AUD_TUNER,
124 CX18_AV_AUDIO_SERIAL1, CS5345_IN_4 },
125 .ddr = {
126 /* ESMT M13S128324A-5B memory */
127 .chip_config = 0x003,
128 .refresh = 0x30c,
129 .timing1 = 0x44220e82,
130 .timing2 = 0x08,
131 .tune_lane = 0,
132 .initial_emrs = 0,
133 },
134 .gpio_init.initial_value = 0x3001,
135 .gpio_init.direction = 0x3001,
136 .gpio_i2c_slave_reset = {
137 .active_lo_mask = 0x3001,
138 .msecs_asserted = 10,
139 .msecs_recovery = 40,
140 .ir_reset_mask = 0x0001,
141 },
142 .i2c = &cx18_i2c_std,
143};
144
98static const struct cx18_card cx18_card_hvr1600_samsung = { 145static const struct cx18_card cx18_card_hvr1600_samsung = {
99 .type = CX18_CARD_HVR_1600_SAMSUNG, 146 .type = CX18_CARD_HVR_1600_SAMSUNG,
100 .name = "Hauppauge HVR-1600 (Preproduction)", 147 .name = "Hauppauge HVR-1600 (Preproduction)",
@@ -523,7 +570,8 @@ static const struct cx18_card *cx18_card_list[] = {
523 &cx18_card_toshiba_qosmio_dvbt, 570 &cx18_card_toshiba_qosmio_dvbt,
524 &cx18_card_leadtek_pvr2100, 571 &cx18_card_leadtek_pvr2100,
525 &cx18_card_leadtek_dvr3100h, 572 &cx18_card_leadtek_dvr3100h,
526 &cx18_card_gotview_dvd3 573 &cx18_card_gotview_dvd3,
574 &cx18_card_hvr1600_s5h1411
527}; 575};
528 576
529const struct cx18_card *cx18_get_card(u16 index) 577const struct cx18_card *cx18_get_card(u16 index)
diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
index 944af8adbe0c..b1c3cbd92743 100644
--- a/drivers/media/video/cx18/cx18-driver.c
+++ b/drivers/media/video/cx18/cx18-driver.c
@@ -157,6 +157,7 @@ MODULE_PARM_DESC(cardtype,
157 "\t\t\t 7 = Leadtek WinFast PVR2100\n" 157 "\t\t\t 7 = Leadtek WinFast PVR2100\n"
158 "\t\t\t 8 = Leadtek WinFast DVR3100 H\n" 158 "\t\t\t 8 = Leadtek WinFast DVR3100 H\n"
159 "\t\t\t 9 = GoTView PCI DVD3 Hybrid\n" 159 "\t\t\t 9 = GoTView PCI DVD3 Hybrid\n"
160 "\t\t\t 10 = Hauppauge HVR 1600 (S5H1411)\n"
160 "\t\t\t 0 = Autodetect (default)\n" 161 "\t\t\t 0 = Autodetect (default)\n"
161 "\t\t\t-1 = Ignore this card\n\t\t"); 162 "\t\t\t-1 = Ignore this card\n\t\t");
162MODULE_PARM_DESC(pal, "Set PAL standard: B, G, H, D, K, I, M, N, Nc, 60"); 163MODULE_PARM_DESC(pal, "Set PAL standard: B, G, H, D, K, I, M, N, Nc, 60");
@@ -337,6 +338,7 @@ void cx18_read_eeprom(struct cx18 *cx, struct tveeprom *tv)
337 switch (cx->card->type) { 338 switch (cx->card->type) {
338 case CX18_CARD_HVR_1600_ESMT: 339 case CX18_CARD_HVR_1600_ESMT:
339 case CX18_CARD_HVR_1600_SAMSUNG: 340 case CX18_CARD_HVR_1600_SAMSUNG:
341 case CX18_CARD_HVR_1600_S5H1411:
340 tveeprom_hauppauge_analog(&c, tv, eedata); 342 tveeprom_hauppauge_analog(&c, tv, eedata);
341 break; 343 break;
342 case CX18_CARD_YUAN_MPC718: 344 case CX18_CARD_YUAN_MPC718:
@@ -365,7 +367,25 @@ static void cx18_process_eeprom(struct cx18 *cx)
365 from the model number. Use the cardtype module option if you 367 from the model number. Use the cardtype module option if you
366 have one of these preproduction models. */ 368 have one of these preproduction models. */
367 switch (tv.model) { 369 switch (tv.model) {
368 case 74000 ... 74999: 370 case 74301: /* Retail models */
371 case 74321:
372 case 74351: /* OEM models */
373 case 74361:
374 /* Digital side is s5h1411/tda18271 */
375 cx->card = cx18_get_card(CX18_CARD_HVR_1600_S5H1411);
376 break;
377 case 74021: /* Retail models */
378 case 74031:
379 case 74041:
380 case 74141:
381 case 74541: /* OEM models */
382 case 74551:
383 case 74591:
384 case 74651:
385 case 74691:
386 case 74751:
387 case 74891:
388 /* Digital side is s5h1409/mxl5005s */
369 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT); 389 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT);
370 break; 390 break;
371 case 0x718: 391 case 0x718:
@@ -377,7 +397,8 @@ static void cx18_process_eeprom(struct cx18 *cx)
377 CX18_ERR("Invalid EEPROM\n"); 397 CX18_ERR("Invalid EEPROM\n");
378 return; 398 return;
379 default: 399 default:
380 CX18_ERR("Unknown model %d, defaulting to HVR-1600\n", tv.model); 400 CX18_ERR("Unknown model %d, defaulting to original HVR-1600 "
401 "(cardtype=1)\n", tv.model);
381 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT); 402 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT);
382 break; 403 break;
383 } 404 }
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index 306caac6d3fc..f736679d2517 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -85,7 +85,8 @@
85#define CX18_CARD_LEADTEK_PVR2100 6 /* Leadtek WinFast PVR2100 */ 85#define CX18_CARD_LEADTEK_PVR2100 6 /* Leadtek WinFast PVR2100 */
86#define CX18_CARD_LEADTEK_DVR3100H 7 /* Leadtek WinFast DVR3100 H */ 86#define CX18_CARD_LEADTEK_DVR3100H 7 /* Leadtek WinFast DVR3100 H */
87#define CX18_CARD_GOTVIEW_PCI_DVD3 8 /* GoTView PCI DVD3 Hybrid */ 87#define CX18_CARD_GOTVIEW_PCI_DVD3 8 /* GoTView PCI DVD3 Hybrid */
88#define CX18_CARD_LAST 8 88#define CX18_CARD_HVR_1600_S5H1411 9 /* Hauppauge HVR 1600 s5h1411/tda18271*/
89#define CX18_CARD_LAST 9
89 90
90#define CX18_ENC_STREAM_TYPE_MPG 0 91#define CX18_ENC_STREAM_TYPE_MPG 0
91#define CX18_ENC_STREAM_TYPE_TS 1 92#define CX18_ENC_STREAM_TYPE_TS 1
diff --git a/drivers/media/video/cx18/cx18-dvb.c b/drivers/media/video/cx18/cx18-dvb.c
index f0381d62518d..f41922bd4020 100644
--- a/drivers/media/video/cx18/cx18-dvb.c
+++ b/drivers/media/video/cx18/cx18-dvb.c
@@ -29,6 +29,8 @@
29#include "cx18-gpio.h" 29#include "cx18-gpio.h"
30#include "s5h1409.h" 30#include "s5h1409.h"
31#include "mxl5005s.h" 31#include "mxl5005s.h"
32#include "s5h1411.h"
33#include "tda18271.h"
32#include "zl10353.h" 34#include "zl10353.h"
33 35
34#include <linux/firmware.h> 36#include <linux/firmware.h>
@@ -77,6 +79,32 @@ static struct s5h1409_config hauppauge_hvr1600_config = {
77}; 79};
78 80
79/* 81/*
82 * CX18_CARD_HVR_1600_S5H1411
83 */
84static struct s5h1411_config hcw_s5h1411_config = {
85 .output_mode = S5H1411_SERIAL_OUTPUT,
86 .gpio = S5H1411_GPIO_OFF,
87 .vsb_if = S5H1411_IF_44000,
88 .qam_if = S5H1411_IF_4000,
89 .inversion = S5H1411_INVERSION_ON,
90 .status_mode = S5H1411_DEMODLOCKING,
91 .mpeg_timing = S5H1411_MPEGTIMING_CONTINOUS_NONINVERTING_CLOCK,
92};
93
94static struct tda18271_std_map hauppauge_tda18271_std_map = {
95 .atsc_6 = { .if_freq = 5380, .agc_mode = 3, .std = 3,
96 .if_lvl = 6, .rfagc_top = 0x37 },
97 .qam_6 = { .if_freq = 4000, .agc_mode = 3, .std = 0,
98 .if_lvl = 6, .rfagc_top = 0x37 },
99};
100
101static struct tda18271_config hauppauge_tda18271_config = {
102 .std_map = &hauppauge_tda18271_std_map,
103 .gate = TDA18271_GATE_DIGITAL,
104 .output_opt = TDA18271_OUTPUT_LT_OFF,
105};
106
107/*
80 * CX18_CARD_LEADTEK_DVR3100H 108 * CX18_CARD_LEADTEK_DVR3100H
81 */ 109 */
82/* Information/confirmation of proper config values provided by Terry Wu */ 110/* Information/confirmation of proper config values provided by Terry Wu */
@@ -244,6 +272,7 @@ static int cx18_dvb_start_feed(struct dvb_demux_feed *feed)
244 switch (cx->card->type) { 272 switch (cx->card->type) {
245 case CX18_CARD_HVR_1600_ESMT: 273 case CX18_CARD_HVR_1600_ESMT:
246 case CX18_CARD_HVR_1600_SAMSUNG: 274 case CX18_CARD_HVR_1600_SAMSUNG:
275 case CX18_CARD_HVR_1600_S5H1411:
247 v = cx18_read_reg(cx, CX18_REG_DMUX_NUM_PORT_0_CONTROL); 276 v = cx18_read_reg(cx, CX18_REG_DMUX_NUM_PORT_0_CONTROL);
248 v |= 0x00400000; /* Serial Mode */ 277 v |= 0x00400000; /* Serial Mode */
249 v |= 0x00002000; /* Data Length - Byte */ 278 v |= 0x00002000; /* Data Length - Byte */
@@ -455,6 +484,15 @@ static int dvb_register(struct cx18_stream *stream)
455 ret = 0; 484 ret = 0;
456 } 485 }
457 break; 486 break;
487 case CX18_CARD_HVR_1600_S5H1411:
488 dvb->fe = dvb_attach(s5h1411_attach,
489 &hcw_s5h1411_config,
490 &cx->i2c_adap[0]);
491 if (dvb->fe != NULL)
492 dvb_attach(tda18271_attach, dvb->fe,
493 0x60, &cx->i2c_adap[0],
494 &hauppauge_tda18271_config);
495 break;
458 case CX18_CARD_LEADTEK_DVR3100H: 496 case CX18_CARD_LEADTEK_DVR3100H:
459 dvb->fe = dvb_attach(zl10353_attach, 497 dvb->fe = dvb_attach(zl10353_attach,
460 &leadtek_dvr3100h_demod, 498 &leadtek_dvr3100h_demod,
diff --git a/drivers/media/video/cx23885/cx23885-i2c.c b/drivers/media/video/cx23885/cx23885-i2c.c
index ed3d8f55029b..307ff543c254 100644
--- a/drivers/media/video/cx23885/cx23885-i2c.c
+++ b/drivers/media/video/cx23885/cx23885-i2c.c
@@ -122,10 +122,6 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
122 122
123 if (!i2c_wait_done(i2c_adap)) 123 if (!i2c_wait_done(i2c_adap))
124 goto eio; 124 goto eio;
125 if (!i2c_slave_did_ack(i2c_adap)) {
126 retval = -ENXIO;
127 goto err;
128 }
129 if (i2c_debug) { 125 if (i2c_debug) {
130 printk(" <W %02x %02x", msg->addr << 1, msg->buf[0]); 126 printk(" <W %02x %02x", msg->addr << 1, msg->buf[0]);
131 if (!(ctrl & I2C_NOSTOP)) 127 if (!(ctrl & I2C_NOSTOP))
@@ -158,7 +154,6 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
158 154
159 eio: 155 eio:
160 retval = -EIO; 156 retval = -EIO;
161 err:
162 if (i2c_debug) 157 if (i2c_debug)
163 printk(KERN_ERR " ERR: %d\n", retval); 158 printk(KERN_ERR " ERR: %d\n", retval);
164 return retval; 159 return retval;
@@ -209,10 +204,6 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
209 204
210 if (!i2c_wait_done(i2c_adap)) 205 if (!i2c_wait_done(i2c_adap))
211 goto eio; 206 goto eio;
212 if (cnt == 0 && !i2c_slave_did_ack(i2c_adap)) {
213 retval = -ENXIO;
214 goto err;
215 }
216 msg->buf[cnt] = cx_read(bus->reg_rdata) & 0xff; 207 msg->buf[cnt] = cx_read(bus->reg_rdata) & 0xff;
217 if (i2c_debug) { 208 if (i2c_debug) {
218 dprintk(1, " %02x", msg->buf[cnt]); 209 dprintk(1, " %02x", msg->buf[cnt]);
@@ -224,7 +215,6 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
224 215
225 eio: 216 eio:
226 retval = -EIO; 217 retval = -EIO;
227 err:
228 if (i2c_debug) 218 if (i2c_debug)
229 printk(KERN_ERR " ERR: %d\n", retval); 219 printk(KERN_ERR " ERR: %d\n", retval);
230 return retval; 220 return retval;
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index 6fc09dd41b9d..35796e035247 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -2015,7 +2015,8 @@ static int cx25840_probe(struct i2c_client *client,
2015 kfree(state); 2015 kfree(state);
2016 return err; 2016 return err;
2017 } 2017 }
2018 v4l2_ctrl_cluster(2, &state->volume); 2018 if (!is_cx2583x(state))
2019 v4l2_ctrl_cluster(2, &state->volume);
2019 v4l2_ctrl_handler_setup(&state->hdl); 2020 v4l2_ctrl_handler_setup(&state->hdl);
2020 2021
2021 if (client->dev.platform_data) { 2022 if (client->dev.platform_data) {
diff --git a/drivers/media/video/ivtv/ivtv-irq.c b/drivers/media/video/ivtv/ivtv-irq.c
index 9b4faf009196..9c29e964d400 100644
--- a/drivers/media/video/ivtv/ivtv-irq.c
+++ b/drivers/media/video/ivtv/ivtv-irq.c
@@ -628,22 +628,66 @@ static void ivtv_irq_enc_pio_complete(struct ivtv *itv)
628static void ivtv_irq_dma_err(struct ivtv *itv) 628static void ivtv_irq_dma_err(struct ivtv *itv)
629{ 629{
630 u32 data[CX2341X_MBOX_MAX_DATA]; 630 u32 data[CX2341X_MBOX_MAX_DATA];
631 u32 status;
631 632
632 del_timer(&itv->dma_timer); 633 del_timer(&itv->dma_timer);
634
633 ivtv_api_get_data(&itv->enc_mbox, IVTV_MBOX_DMA_END, 2, data); 635 ivtv_api_get_data(&itv->enc_mbox, IVTV_MBOX_DMA_END, 2, data);
636 status = read_reg(IVTV_REG_DMASTATUS);
634 IVTV_DEBUG_WARN("DMA ERROR %08x %08x %08x %d\n", data[0], data[1], 637 IVTV_DEBUG_WARN("DMA ERROR %08x %08x %08x %d\n", data[0], data[1],
635 read_reg(IVTV_REG_DMASTATUS), itv->cur_dma_stream); 638 status, itv->cur_dma_stream);
636 write_reg(read_reg(IVTV_REG_DMASTATUS) & 3, IVTV_REG_DMASTATUS); 639 /*
640 * We do *not* write back to the IVTV_REG_DMASTATUS register to
641 * clear the error status, if either the encoder write (0x02) or
642 * decoder read (0x01) bus master DMA operation do not indicate
643 * completed. We can race with the DMA engine, which may have
644 * transitioned to completed status *after* we read the register.
645 * Setting a IVTV_REG_DMASTATUS flag back to "busy" status, after the
646 * DMA engine has completed, will cause the DMA engine to stop working.
647 */
648 status &= 0x3;
649 if (status == 0x3)
650 write_reg(status, IVTV_REG_DMASTATUS);
651
637 if (!test_bit(IVTV_F_I_UDMA, &itv->i_flags) && 652 if (!test_bit(IVTV_F_I_UDMA, &itv->i_flags) &&
638 itv->cur_dma_stream >= 0 && itv->cur_dma_stream < IVTV_MAX_STREAMS) { 653 itv->cur_dma_stream >= 0 && itv->cur_dma_stream < IVTV_MAX_STREAMS) {
639 struct ivtv_stream *s = &itv->streams[itv->cur_dma_stream]; 654 struct ivtv_stream *s = &itv->streams[itv->cur_dma_stream];
640 655
641 /* retry */ 656 if (s->type >= IVTV_DEC_STREAM_TYPE_MPG) {
642 if (s->type >= IVTV_DEC_STREAM_TYPE_MPG) 657 /* retry */
658 /*
659 * FIXME - handle cases of DMA error similar to
660 * encoder below, except conditioned on status & 0x1
661 */
643 ivtv_dma_dec_start(s); 662 ivtv_dma_dec_start(s);
644 else 663 return;
645 ivtv_dma_enc_start(s); 664 } else {
646 return; 665 if ((status & 0x2) == 0) {
666 /*
667 * CX2341x Bus Master DMA write is ongoing.
668 * Reset the timer and let it complete.
669 */
670 itv->dma_timer.expires =
671 jiffies + msecs_to_jiffies(600);
672 add_timer(&itv->dma_timer);
673 return;
674 }
675
676 if (itv->dma_retries < 3) {
677 /*
678 * CX2341x Bus Master DMA write has ended.
679 * Retry the write, starting with the first
680 * xfer segment. Just retrying the current
681 * segment is not sufficient.
682 */
683 s->sg_processed = 0;
684 itv->dma_retries++;
685 ivtv_dma_enc_start_xfer(s);
686 return;
687 }
688 /* Too many retries, give up on this one */
689 }
690
647 } 691 }
648 if (test_bit(IVTV_F_I_UDMA, &itv->i_flags)) { 692 if (test_bit(IVTV_F_I_UDMA, &itv->i_flags)) {
649 ivtv_udma_start(itv); 693 ivtv_udma_start(itv);
diff --git a/drivers/media/video/mem2mem_testdev.c b/drivers/media/video/mem2mem_testdev.c
index c179041d91f8..e7e717800ee2 100644
--- a/drivers/media/video/mem2mem_testdev.c
+++ b/drivers/media/video/mem2mem_testdev.c
@@ -1011,7 +1011,6 @@ static int m2mtest_remove(struct platform_device *pdev)
1011 v4l2_m2m_release(dev->m2m_dev); 1011 v4l2_m2m_release(dev->m2m_dev);
1012 del_timer_sync(&dev->timer); 1012 del_timer_sync(&dev->timer);
1013 video_unregister_device(dev->vfd); 1013 video_unregister_device(dev->vfd);
1014 video_device_release(dev->vfd);
1015 v4l2_device_unregister(&dev->v4l2_dev); 1014 v4l2_device_unregister(&dev->v4l2_dev);
1016 kfree(dev); 1015 kfree(dev);
1017 1016
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index b63f8cafa671..561909b65ce6 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -57,7 +57,7 @@
57#include <linux/usb.h> 57#include <linux/usb.h>
58 58
59#define S2255_MAJOR_VERSION 1 59#define S2255_MAJOR_VERSION 1
60#define S2255_MINOR_VERSION 20 60#define S2255_MINOR_VERSION 21
61#define S2255_RELEASE 0 61#define S2255_RELEASE 0
62#define S2255_VERSION KERNEL_VERSION(S2255_MAJOR_VERSION, \ 62#define S2255_VERSION KERNEL_VERSION(S2255_MAJOR_VERSION, \
63 S2255_MINOR_VERSION, \ 63 S2255_MINOR_VERSION, \
@@ -312,9 +312,9 @@ struct s2255_fh {
312}; 312};
313 313
314/* current cypress EEPROM firmware version */ 314/* current cypress EEPROM firmware version */
315#define S2255_CUR_USB_FWVER ((3 << 8) | 6) 315#define S2255_CUR_USB_FWVER ((3 << 8) | 11)
316/* current DSP FW version */ 316/* current DSP FW version */
317#define S2255_CUR_DSP_FWVER 8 317#define S2255_CUR_DSP_FWVER 10102
318/* Need DSP version 5+ for video status feature */ 318/* Need DSP version 5+ for video status feature */
319#define S2255_MIN_DSP_STATUS 5 319#define S2255_MIN_DSP_STATUS 5
320#define S2255_MIN_DSP_COLORFILTER 8 320#define S2255_MIN_DSP_COLORFILTER 8
@@ -492,9 +492,11 @@ static void planar422p_to_yuv_packed(const unsigned char *in,
492 492
493static void s2255_reset_dsppower(struct s2255_dev *dev) 493static void s2255_reset_dsppower(struct s2255_dev *dev)
494{ 494{
495 s2255_vendor_req(dev, 0x40, 0x0b0b, 0x0b0b, NULL, 0, 1); 495 s2255_vendor_req(dev, 0x40, 0x0b0b, 0x0b01, NULL, 0, 1);
496 msleep(10); 496 msleep(10);
497 s2255_vendor_req(dev, 0x50, 0x0000, 0x0000, NULL, 0, 1); 497 s2255_vendor_req(dev, 0x50, 0x0000, 0x0000, NULL, 0, 1);
498 msleep(600);
499 s2255_vendor_req(dev, 0x10, 0x0000, 0x0000, NULL, 0, 1);
498 return; 500 return;
499} 501}
500 502
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index e9a3eab7b0cf..8c1d85e27be4 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -621,7 +621,7 @@ static int __init memstick_init(void)
621{ 621{
622 int rc; 622 int rc;
623 623
624 workqueue = create_freezeable_workqueue("kmemstick"); 624 workqueue = create_freezable_workqueue("kmemstick");
625 if (!workqueue) 625 if (!workqueue)
626 return -ENOMEM; 626 return -ENOMEM;
627 627
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index f71f22948477..1735c84ff757 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -76,8 +76,8 @@
76#define COPYRIGHT "Copyright (c) 1999-2008 " MODULEAUTHOR 76#define COPYRIGHT "Copyright (c) 1999-2008 " MODULEAUTHOR
77#endif 77#endif
78 78
79#define MPT_LINUX_VERSION_COMMON "3.04.17" 79#define MPT_LINUX_VERSION_COMMON "3.04.18"
80#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.04.17" 80#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.04.18"
81#define WHAT_MAGIC_STRING "@" "(" "#" ")" 81#define WHAT_MAGIC_STRING "@" "(" "#" ")"
82 82
83#define show_mptmod_ver(s,ver) \ 83#define show_mptmod_ver(s,ver) \
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index a3856ed90aef..e8deb8ed0499 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -597,6 +597,13 @@ mptctl_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
597} 597}
598 598
599static int 599static int
600mptctl_release(struct inode *inode, struct file *filep)
601{
602 fasync_helper(-1, filep, 0, &async_queue);
603 return 0;
604}
605
606static int
600mptctl_fasync(int fd, struct file *filep, int mode) 607mptctl_fasync(int fd, struct file *filep, int mode)
601{ 608{
602 MPT_ADAPTER *ioc; 609 MPT_ADAPTER *ioc;
@@ -2815,6 +2822,7 @@ static const struct file_operations mptctl_fops = {
2815 .llseek = no_llseek, 2822 .llseek = no_llseek,
2816 .fasync = mptctl_fasync, 2823 .fasync = mptctl_fasync,
2817 .unlocked_ioctl = mptctl_ioctl, 2824 .unlocked_ioctl = mptctl_ioctl,
2825 .release = mptctl_release,
2818#ifdef CONFIG_COMPAT 2826#ifdef CONFIG_COMPAT
2819 .compat_ioctl = compat_mpctl_ioctl, 2827 .compat_ioctl = compat_mpctl_ioctl,
2820#endif 2828#endif
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 59b8f53d1ece..0d9b82a44540 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -1873,8 +1873,9 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
1873 } 1873 }
1874 1874
1875 out: 1875 out:
1876 printk(MYIOC_s_INFO_FMT "task abort: %s (sc=%p)\n", 1876 printk(MYIOC_s_INFO_FMT "task abort: %s (rv=%04x) (sc=%p) (sn=%ld)\n",
1877 ioc->name, ((retval == SUCCESS) ? "SUCCESS" : "FAILED"), SCpnt); 1877 ioc->name, ((retval == SUCCESS) ? "SUCCESS" : "FAILED"), retval,
1878 SCpnt, SCpnt->serial_number);
1878 1879
1879 return retval; 1880 return retval;
1880} 1881}
@@ -1911,7 +1912,7 @@ mptscsih_dev_reset(struct scsi_cmnd * SCpnt)
1911 1912
1912 vdevice = SCpnt->device->hostdata; 1913 vdevice = SCpnt->device->hostdata;
1913 if (!vdevice || !vdevice->vtarget) { 1914 if (!vdevice || !vdevice->vtarget) {
1914 retval = SUCCESS; 1915 retval = 0;
1915 goto out; 1916 goto out;
1916 } 1917 }
1917 1918
diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c
index a0421efe04ca..8a5b2d8f4daf 100644
--- a/drivers/message/i2o/driver.c
+++ b/drivers/message/i2o/driver.c
@@ -84,7 +84,8 @@ int i2o_driver_register(struct i2o_driver *drv)
84 osm_debug("Register driver %s\n", drv->name); 84 osm_debug("Register driver %s\n", drv->name);
85 85
86 if (drv->event) { 86 if (drv->event) {
87 drv->event_queue = create_workqueue(drv->name); 87 drv->event_queue = alloc_workqueue(drv->name,
88 WQ_MEM_RECLAIM, 1);
88 if (!drv->event_queue) { 89 if (!drv->event_queue) {
89 osm_err("Could not initialize event queue for driver " 90 osm_err("Could not initialize event queue for driver "
90 "%s\n", drv->name); 91 "%s\n", drv->name);
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 6a1f94042612..c45e6305b26f 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -143,9 +143,9 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
143 unsigned long flags; 143 unsigned long flags;
144 struct asic3 *asic; 144 struct asic3 *asic;
145 145
146 desc->chip->ack(irq); 146 desc->irq_data.chip->irq_ack(&desc->irq_data);
147 147
148 asic = desc->handler_data; 148 asic = get_irq_data(irq);
149 149
150 for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) { 150 for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) {
151 u32 status; 151 u32 status;
diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c
index 33c923d215c7..fdd8a1b8bc67 100644
--- a/drivers/mfd/davinci_voicecodec.c
+++ b/drivers/mfd/davinci_voicecodec.c
@@ -118,12 +118,12 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
118 118
119 /* Voice codec interface client */ 119 /* Voice codec interface client */
120 cell = &davinci_vc->cells[DAVINCI_VC_VCIF_CELL]; 120 cell = &davinci_vc->cells[DAVINCI_VC_VCIF_CELL];
121 cell->name = "davinci_vcif"; 121 cell->name = "davinci-vcif";
122 cell->driver_data = davinci_vc; 122 cell->driver_data = davinci_vc;
123 123
124 /* Voice codec CQ93VC client */ 124 /* Voice codec CQ93VC client */
125 cell = &davinci_vc->cells[DAVINCI_VC_CQ93VC_CELL]; 125 cell = &davinci_vc->cells[DAVINCI_VC_CQ93VC_CELL];
126 cell->name = "cq93vc"; 126 cell->name = "cq93vc-codec";
127 cell->driver_data = davinci_vc; 127 cell->driver_data = davinci_vc;
128 128
129 ret = mfd_add_devices(&pdev->dev, pdev->id, davinci_vc->cells, 129 ret = mfd_add_devices(&pdev->dev, pdev->id, davinci_vc->cells,
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 627cf577b16d..e9018d1394ee 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -150,12 +150,12 @@ static inline int __tps6586x_write(struct i2c_client *client,
150static inline int __tps6586x_writes(struct i2c_client *client, int reg, 150static inline int __tps6586x_writes(struct i2c_client *client, int reg,
151 int len, uint8_t *val) 151 int len, uint8_t *val)
152{ 152{
153 int ret; 153 int ret, i;
154 154
155 ret = i2c_smbus_write_i2c_block_data(client, reg, len, val); 155 for (i = 0; i < len; i++) {
156 if (ret < 0) { 156 ret = __tps6586x_write(client, reg + i, *(val + i));
157 dev_err(&client->dev, "failed writings to 0x%02x\n", reg); 157 if (ret < 0)
158 return ret; 158 return ret;
159 } 159 }
160 160
161 return 0; 161 return 0;
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 000cb414a78a..92b85e28a15e 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -385,12 +385,18 @@ static int ucb1x00_ts_add(struct ucb1x00_dev *dev)
385 idev->close = ucb1x00_ts_close; 385 idev->close = ucb1x00_ts_close;
386 386
387 __set_bit(EV_ABS, idev->evbit); 387 __set_bit(EV_ABS, idev->evbit);
388 __set_bit(ABS_X, idev->absbit);
389 __set_bit(ABS_Y, idev->absbit);
390 __set_bit(ABS_PRESSURE, idev->absbit);
391 388
392 input_set_drvdata(idev, ts); 389 input_set_drvdata(idev, ts);
393 390
391 ucb1x00_adc_enable(ts->ucb);
392 ts->x_res = ucb1x00_ts_read_xres(ts);
393 ts->y_res = ucb1x00_ts_read_yres(ts);
394 ucb1x00_adc_disable(ts->ucb);
395
396 input_set_abs_params(idev, ABS_X, 0, ts->x_res, 0, 0);
397 input_set_abs_params(idev, ABS_Y, 0, ts->y_res, 0, 0);
398 input_set_abs_params(idev, ABS_PRESSURE, 0, 0, 0, 0);
399
394 err = input_register_device(idev); 400 err = input_register_device(idev);
395 if (err) 401 if (err)
396 goto fail; 402 goto fail;
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 41233c7fa581..f4016a075fd6 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -246,6 +246,16 @@ static int wm8994_suspend(struct device *dev)
246 struct wm8994 *wm8994 = dev_get_drvdata(dev); 246 struct wm8994 *wm8994 = dev_get_drvdata(dev);
247 int ret; 247 int ret;
248 248
249 /* Don't actually go through with the suspend if the CODEC is
250 * still active (eg, for audio passthrough from CP. */
251 ret = wm8994_reg_read(wm8994, WM8994_POWER_MANAGEMENT_1);
252 if (ret < 0) {
253 dev_err(dev, "Failed to read power status: %d\n", ret);
254 } else if (ret & WM8994_VMID_SEL_MASK) {
255 dev_dbg(dev, "CODEC still active, ignoring suspend\n");
256 return 0;
257 }
258
249 /* GPIO configuration state is saved here since we may be configuring 259 /* GPIO configuration state is saved here since we may be configuring
250 * the GPIO alternate functions even if we're not using the gpiolib 260 * the GPIO alternate functions even if we're not using the gpiolib
251 * driver for them. 261 * driver for them.
@@ -261,6 +271,8 @@ static int wm8994_suspend(struct device *dev)
261 if (ret < 0) 271 if (ret < 0)
262 dev_err(dev, "Failed to save LDO registers: %d\n", ret); 272 dev_err(dev, "Failed to save LDO registers: %d\n", ret);
263 273
274 wm8994->suspended = true;
275
264 ret = regulator_bulk_disable(wm8994->num_supplies, 276 ret = regulator_bulk_disable(wm8994->num_supplies,
265 wm8994->supplies); 277 wm8994->supplies);
266 if (ret != 0) { 278 if (ret != 0) {
@@ -276,6 +288,10 @@ static int wm8994_resume(struct device *dev)
276 struct wm8994 *wm8994 = dev_get_drvdata(dev); 288 struct wm8994 *wm8994 = dev_get_drvdata(dev);
277 int ret; 289 int ret;
278 290
291 /* We may have lied to the PM core about suspending */
292 if (!wm8994->suspended)
293 return 0;
294
279 ret = regulator_bulk_enable(wm8994->num_supplies, 295 ret = regulator_bulk_enable(wm8994->num_supplies,
280 wm8994->supplies); 296 wm8994->supplies);
281 if (ret != 0) { 297 if (ret != 0) {
@@ -298,6 +314,8 @@ static int wm8994_resume(struct device *dev)
298 if (ret < 0) 314 if (ret < 0)
299 dev_err(dev, "Failed to restore GPIO registers: %d\n", ret); 315 dev_err(dev, "Failed to restore GPIO registers: %d\n", ret);
300 316
317 wm8994->suspended = false;
318
301 return 0; 319 return 0;
302} 320}
303#endif 321#endif
diff --git a/drivers/misc/bmp085.c b/drivers/misc/bmp085.c
index 63ee4c1a5315..b6e1c9a6679e 100644
--- a/drivers/misc/bmp085.c
+++ b/drivers/misc/bmp085.c
@@ -449,6 +449,7 @@ static const struct i2c_device_id bmp085_id[] = {
449 { "bmp085", 0 }, 449 { "bmp085", 0 },
450 { } 450 { }
451}; 451};
452MODULE_DEVICE_TABLE(i2c, bmp085_id);
452 453
453static struct i2c_driver bmp085_driver = { 454static struct i2c_driver bmp085_driver = {
454 .driver = { 455 .driver = {
diff --git a/drivers/misc/iwmc3200top/iwmc3200top.h b/drivers/misc/iwmc3200top/iwmc3200top.h
index 740ff0738ea8..620973ed8bf9 100644
--- a/drivers/misc/iwmc3200top/iwmc3200top.h
+++ b/drivers/misc/iwmc3200top/iwmc3200top.h
@@ -183,9 +183,7 @@ struct iwmct_priv {
183 u32 barker; 183 u32 barker;
184 struct iwmct_dbg dbg; 184 struct iwmct_dbg dbg;
185 185
186 /* drivers work queue */ 186 /* drivers work items */
187 struct workqueue_struct *wq;
188 struct workqueue_struct *bus_rescan_wq;
189 struct work_struct bus_rescan_worker; 187 struct work_struct bus_rescan_worker;
190 struct work_struct isr_worker; 188 struct work_struct isr_worker;
191 189
diff --git a/drivers/misc/iwmc3200top/main.c b/drivers/misc/iwmc3200top/main.c
index c73cef2c3c5e..727af07f1fbd 100644
--- a/drivers/misc/iwmc3200top/main.c
+++ b/drivers/misc/iwmc3200top/main.c
@@ -89,7 +89,7 @@ static void op_top_message(struct iwmct_priv *priv, struct top_msg *msg)
89 switch (msg->hdr.opcode) { 89 switch (msg->hdr.opcode) {
90 case OP_OPR_ALIVE: 90 case OP_OPR_ALIVE:
91 LOG_INFO(priv, FW_MSG, "Got ALIVE from device, wake rescan\n"); 91 LOG_INFO(priv, FW_MSG, "Got ALIVE from device, wake rescan\n");
92 queue_work(priv->bus_rescan_wq, &priv->bus_rescan_worker); 92 schedule_work(&priv->bus_rescan_worker);
93 break; 93 break;
94 default: 94 default:
95 LOG_INFO(priv, FW_MSG, "Received msg opcode 0x%X\n", 95 LOG_INFO(priv, FW_MSG, "Received msg opcode 0x%X\n",
@@ -360,7 +360,7 @@ static void iwmct_irq(struct sdio_func *func)
360 /* clear the function's interrupt request bit (write 1 to clear) */ 360 /* clear the function's interrupt request bit (write 1 to clear) */
361 sdio_writeb(func, 1, IWMC_SDIO_INTR_CLEAR_ADDR, &ret); 361 sdio_writeb(func, 1, IWMC_SDIO_INTR_CLEAR_ADDR, &ret);
362 362
363 queue_work(priv->wq, &priv->isr_worker); 363 schedule_work(&priv->isr_worker);
364 364
365 LOG_TRACE(priv, IRQ, "exit iwmct_irq\n"); 365 LOG_TRACE(priv, IRQ, "exit iwmct_irq\n");
366 366
@@ -506,10 +506,6 @@ static int iwmct_probe(struct sdio_func *func,
506 priv->func = func; 506 priv->func = func;
507 sdio_set_drvdata(func, priv); 507 sdio_set_drvdata(func, priv);
508 508
509
510 /* create drivers work queue */
511 priv->wq = create_workqueue(DRV_NAME "_wq");
512 priv->bus_rescan_wq = create_workqueue(DRV_NAME "_rescan_wq");
513 INIT_WORK(&priv->bus_rescan_worker, iwmct_rescan_worker); 509 INIT_WORK(&priv->bus_rescan_worker, iwmct_rescan_worker);
514 INIT_WORK(&priv->isr_worker, iwmct_irq_read_worker); 510 INIT_WORK(&priv->isr_worker, iwmct_irq_read_worker);
515 511
@@ -604,9 +600,9 @@ static void iwmct_remove(struct sdio_func *func)
604 sdio_release_irq(func); 600 sdio_release_irq(func);
605 sdio_release_host(func); 601 sdio_release_host(func);
606 602
607 /* Safely destroy osc workqueue */ 603 /* Make sure works are finished */
608 destroy_workqueue(priv->bus_rescan_wq); 604 flush_work_sync(&priv->bus_rescan_worker);
609 destroy_workqueue(priv->wq); 605 flush_work_sync(&priv->isr_worker);
610 606
611 sdio_claim_host(func); 607 sdio_claim_host(func);
612 sdio_disable_func(func); 608 sdio_disable_func(func);
diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c
index 5f6852dff40b..44d4475a09dd 100644
--- a/drivers/misc/tifm_core.c
+++ b/drivers/misc/tifm_core.c
@@ -329,7 +329,7 @@ static int __init tifm_init(void)
329{ 329{
330 int rc; 330 int rc;
331 331
332 workqueue = create_freezeable_workqueue("tifm"); 332 workqueue = create_freezable_workqueue("tifm");
333 if (!workqueue) 333 if (!workqueue)
334 return -ENOMEM; 334 return -ENOMEM;
335 335
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 4d2ea8e80140..6df5a55da110 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -785,7 +785,7 @@ static int __init vmballoon_init(void)
785 if (x86_hyper != &x86_hyper_vmware) 785 if (x86_hyper != &x86_hyper_vmware)
786 return -ENODEV; 786 return -ENODEV;
787 787
788 vmballoon_wq = create_freezeable_workqueue("vmmemctl"); 788 vmballoon_wq = create_freezable_workqueue("vmmemctl");
789 if (!vmballoon_wq) { 789 if (!vmballoon_wq) {
790 pr_err("failed to create workqueue\n"); 790 pr_err("failed to create workqueue\n");
791 return -ENOMEM; 791 return -ENOMEM;
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 6625c057be05..150b5f3cd401 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1529,7 +1529,7 @@ void mmc_rescan(struct work_struct *work)
1529 * still present 1529 * still present
1530 */ 1530 */
1531 if (host->bus_ops && host->bus_ops->detect && !host->bus_dead 1531 if (host->bus_ops && host->bus_ops->detect && !host->bus_dead
1532 && mmc_card_is_removable(host)) 1532 && !(host->caps & MMC_CAP_NONREMOVABLE))
1533 host->bus_ops->detect(host); 1533 host->bus_ops->detect(host);
1534 1534
1535 /* 1535 /*
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 5c4a54d9b6a4..ebc62ad4cc56 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -792,7 +792,6 @@ int mmc_attach_sdio(struct mmc_host *host)
792 */ 792 */
793 mmc_release_host(host); 793 mmc_release_host(host);
794 err = mmc_add_card(host->card); 794 err = mmc_add_card(host->card);
795 mmc_claim_host(host);
796 if (err) 795 if (err)
797 goto remove_added; 796 goto remove_added;
798 797
@@ -805,12 +804,12 @@ int mmc_attach_sdio(struct mmc_host *host)
805 goto remove_added; 804 goto remove_added;
806 } 805 }
807 806
807 mmc_claim_host(host);
808 return 0; 808 return 0;
809 809
810 810
811remove_added: 811remove_added:
812 /* Remove without lock if the device has been added. */ 812 /* Remove without lock if the device has been added. */
813 mmc_release_host(host);
814 mmc_sdio_remove(host); 813 mmc_sdio_remove(host);
815 mmc_claim_host(host); 814 mmc_claim_host(host);
816remove: 815remove:
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index fd877f633dd2..2f7fc0c5146f 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1516,21 +1516,17 @@ static int __devexit mmc_spi_remove(struct spi_device *spi)
1516 return 0; 1516 return 0;
1517} 1517}
1518 1518
1519#if defined(CONFIG_OF)
1520static struct of_device_id mmc_spi_of_match_table[] __devinitdata = { 1519static struct of_device_id mmc_spi_of_match_table[] __devinitdata = {
1521 { .compatible = "mmc-spi-slot", }, 1520 { .compatible = "mmc-spi-slot", },
1522 {}, 1521 {},
1523}; 1522};
1524#endif
1525 1523
1526static struct spi_driver mmc_spi_driver = { 1524static struct spi_driver mmc_spi_driver = {
1527 .driver = { 1525 .driver = {
1528 .name = "mmc_spi", 1526 .name = "mmc_spi",
1529 .bus = &spi_bus_type, 1527 .bus = &spi_bus_type,
1530 .owner = THIS_MODULE, 1528 .owner = THIS_MODULE,
1531#if defined(CONFIG_OF)
1532 .of_match_table = mmc_spi_of_match_table, 1529 .of_match_table = mmc_spi_of_match_table,
1533#endif
1534 }, 1530 },
1535 .probe = mmc_spi_probe, 1531 .probe = mmc_spi_probe,
1536 .remove = __devexit_p(mmc_spi_remove), 1532 .remove = __devexit_p(mmc_spi_remove),
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index a8c3e1c9b02a..4aaa88f8ab5f 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -1230,10 +1230,32 @@ static int inval_cache_and_wait_for_operation(
1230 sleep_time = chip_op_time / 2; 1230 sleep_time = chip_op_time / 2;
1231 1231
1232 for (;;) { 1232 for (;;) {
1233 if (chip->state != chip_state) {
1234 /* Someone's suspended the operation: sleep */
1235 DECLARE_WAITQUEUE(wait, current);
1236 set_current_state(TASK_UNINTERRUPTIBLE);
1237 add_wait_queue(&chip->wq, &wait);
1238 mutex_unlock(&chip->mutex);
1239 schedule();
1240 remove_wait_queue(&chip->wq, &wait);
1241 mutex_lock(&chip->mutex);
1242 continue;
1243 }
1244
1233 status = map_read(map, cmd_adr); 1245 status = map_read(map, cmd_adr);
1234 if (map_word_andequal(map, status, status_OK, status_OK)) 1246 if (map_word_andequal(map, status, status_OK, status_OK))
1235 break; 1247 break;
1236 1248
1249 if (chip->erase_suspended && chip_state == FL_ERASING) {
1250 /* Erase suspend occured while sleep: reset timeout */
1251 timeo = reset_timeo;
1252 chip->erase_suspended = 0;
1253 }
1254 if (chip->write_suspended && chip_state == FL_WRITING) {
1255 /* Write suspend occured while sleep: reset timeout */
1256 timeo = reset_timeo;
1257 chip->write_suspended = 0;
1258 }
1237 if (!timeo) { 1259 if (!timeo) {
1238 map_write(map, CMD(0x70), cmd_adr); 1260 map_write(map, CMD(0x70), cmd_adr);
1239 chip->state = FL_STATUS; 1261 chip->state = FL_STATUS;
@@ -1257,27 +1279,6 @@ static int inval_cache_and_wait_for_operation(
1257 timeo--; 1279 timeo--;
1258 } 1280 }
1259 mutex_lock(&chip->mutex); 1281 mutex_lock(&chip->mutex);
1260
1261 while (chip->state != chip_state) {
1262 /* Someone's suspended the operation: sleep */
1263 DECLARE_WAITQUEUE(wait, current);
1264 set_current_state(TASK_UNINTERRUPTIBLE);
1265 add_wait_queue(&chip->wq, &wait);
1266 mutex_unlock(&chip->mutex);
1267 schedule();
1268 remove_wait_queue(&chip->wq, &wait);
1269 mutex_lock(&chip->mutex);
1270 }
1271 if (chip->erase_suspended && chip_state == FL_ERASING) {
1272 /* Erase suspend occured while sleep: reset timeout */
1273 timeo = reset_timeo;
1274 chip->erase_suspended = 0;
1275 }
1276 if (chip->write_suspended && chip_state == FL_WRITING) {
1277 /* Write suspend occured while sleep: reset timeout */
1278 timeo = reset_timeo;
1279 chip->write_suspended = 0;
1280 }
1281 } 1282 }
1282 1283
1283 /* Done and happy. */ 1284 /* Done and happy. */
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index d72a5fb2d041..4e1be51cc122 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -1935,14 +1935,14 @@ static void jedec_reset(u32 base, struct map_info *map, struct cfi_private *cfi)
1935} 1935}
1936 1936
1937 1937
1938static int cfi_jedec_setup(struct cfi_private *p_cfi, int index) 1938static int cfi_jedec_setup(struct map_info *map, struct cfi_private *cfi, int index)
1939{ 1939{
1940 int i,num_erase_regions; 1940 int i,num_erase_regions;
1941 uint8_t uaddr; 1941 uint8_t uaddr;
1942 1942
1943 if (! (jedec_table[index].devtypes & p_cfi->device_type)) { 1943 if (!(jedec_table[index].devtypes & cfi->device_type)) {
1944 DEBUG(MTD_DEBUG_LEVEL1, "Rejecting potential %s with incompatible %d-bit device type\n", 1944 DEBUG(MTD_DEBUG_LEVEL1, "Rejecting potential %s with incompatible %d-bit device type\n",
1945 jedec_table[index].name, 4 * (1<<p_cfi->device_type)); 1945 jedec_table[index].name, 4 * (1<<cfi->device_type));
1946 return 0; 1946 return 0;
1947 } 1947 }
1948 1948
@@ -1950,27 +1950,28 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
1950 1950
1951 num_erase_regions = jedec_table[index].nr_regions; 1951 num_erase_regions = jedec_table[index].nr_regions;
1952 1952
1953 p_cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL); 1953 cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL);
1954 if (!p_cfi->cfiq) { 1954 if (!cfi->cfiq) {
1955 //xx printk(KERN_WARNING "%s: kmalloc failed for CFI ident structure\n", map->name); 1955 //xx printk(KERN_WARNING "%s: kmalloc failed for CFI ident structure\n", map->name);
1956 return 0; 1956 return 0;
1957 } 1957 }
1958 1958
1959 memset(p_cfi->cfiq,0,sizeof(struct cfi_ident)); 1959 memset(cfi->cfiq, 0, sizeof(struct cfi_ident));
1960 1960
1961 p_cfi->cfiq->P_ID = jedec_table[index].cmd_set; 1961 cfi->cfiq->P_ID = jedec_table[index].cmd_set;
1962 p_cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions; 1962 cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions;
1963 p_cfi->cfiq->DevSize = jedec_table[index].dev_size; 1963 cfi->cfiq->DevSize = jedec_table[index].dev_size;
1964 p_cfi->cfi_mode = CFI_MODE_JEDEC; 1964 cfi->cfi_mode = CFI_MODE_JEDEC;
1965 cfi->sector_erase_cmd = CMD(0x30);
1965 1966
1966 for (i=0; i<num_erase_regions; i++){ 1967 for (i=0; i<num_erase_regions; i++){
1967 p_cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i]; 1968 cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i];
1968 } 1969 }
1969 p_cfi->cmdset_priv = NULL; 1970 cfi->cmdset_priv = NULL;
1970 1971
1971 /* This may be redundant for some cases, but it doesn't hurt */ 1972 /* This may be redundant for some cases, but it doesn't hurt */
1972 p_cfi->mfr = jedec_table[index].mfr_id; 1973 cfi->mfr = jedec_table[index].mfr_id;
1973 p_cfi->id = jedec_table[index].dev_id; 1974 cfi->id = jedec_table[index].dev_id;
1974 1975
1975 uaddr = jedec_table[index].uaddr; 1976 uaddr = jedec_table[index].uaddr;
1976 1977
@@ -1978,8 +1979,8 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
1978 our brains explode when we see the datasheets talking about address 1979 our brains explode when we see the datasheets talking about address
1979 lines numbered from A-1 to A18. The CFI table has unlock addresses 1980 lines numbered from A-1 to A18. The CFI table has unlock addresses
1980 in device-words according to the mode the device is connected in */ 1981 in device-words according to the mode the device is connected in */
1981 p_cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / p_cfi->device_type; 1982 cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / cfi->device_type;
1982 p_cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / p_cfi->device_type; 1983 cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / cfi->device_type;
1983 1984
1984 return 1; /* ok */ 1985 return 1; /* ok */
1985} 1986}
@@ -2175,7 +2176,7 @@ static int jedec_probe_chip(struct map_info *map, __u32 base,
2175 "MTD %s(): matched device 0x%x,0x%x unlock_addrs: 0x%.4x 0x%.4x\n", 2176 "MTD %s(): matched device 0x%x,0x%x unlock_addrs: 0x%.4x 0x%.4x\n",
2176 __func__, cfi->mfr, cfi->id, 2177 __func__, cfi->mfr, cfi->id,
2177 cfi->addr_unlock1, cfi->addr_unlock2 ); 2178 cfi->addr_unlock1, cfi->addr_unlock2 );
2178 if (!cfi_jedec_setup(cfi, i)) 2179 if (!cfi_jedec_setup(map, cfi, i))
2179 return 0; 2180 return 0;
2180 goto ok_out; 2181 goto ok_out;
2181 } 2182 }
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 77d64ce19e9f..92de7e3a49a5 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -151,6 +151,7 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev,
151 printk(KERN_ERR MOD_NAME 151 printk(KERN_ERR MOD_NAME
152 " %s(): Unable to register resource %pR - kernel bug?\n", 152 " %s(): Unable to register resource %pR - kernel bug?\n",
153 __func__, &window->rsrc); 153 __func__, &window->rsrc);
154 return -EBUSY;
154 } 155 }
155 156
156 157
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index cb20c67995d8..e0a2373bf0e2 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -413,7 +413,6 @@ error3:
413error2: 413error2:
414 list_del(&new->list); 414 list_del(&new->list);
415error1: 415error1:
416 kfree(new);
417 return ret; 416 return ret;
418} 417}
419 418
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 15682ec8530e..28af71c61834 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -968,6 +968,6 @@ static void __exit omap_nand_exit(void)
968module_init(omap_nand_init); 968module_init(omap_nand_init);
969module_exit(omap_nand_exit); 969module_exit(omap_nand_exit);
970 970
971MODULE_ALIAS(DRIVER_NAME); 971MODULE_ALIAS("platform:" DRIVER_NAME);
972MODULE_LICENSE("GPL"); 972MODULE_LICENSE("GPL");
973MODULE_DESCRIPTION("Glue layer for NAND flash on TI OMAP boards"); 973MODULE_DESCRIPTION("Glue layer for NAND flash on TI OMAP boards");
diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c
index d9d7efbc77cc..6322d1fb5d62 100644
--- a/drivers/mtd/nand/r852.c
+++ b/drivers/mtd/nand/r852.c
@@ -930,7 +930,7 @@ int r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
930 930
931 init_completion(&dev->dma_done); 931 init_completion(&dev->dma_done);
932 932
933 dev->card_workqueue = create_freezeable_workqueue(DRV_NAME); 933 dev->card_workqueue = create_freezable_workqueue(DRV_NAME);
934 934
935 if (!dev->card_workqueue) 935 if (!dev->card_workqueue)
936 goto error9; 936 goto error9;
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index e78914938c5c..ac08750748a3 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -131,7 +131,7 @@ static struct platform_driver generic_onenand_driver = {
131 .remove = __devexit_p(generic_onenand_remove), 131 .remove = __devexit_p(generic_onenand_remove),
132}; 132};
133 133
134MODULE_ALIAS(DRIVER_NAME); 134MODULE_ALIAS("platform:" DRIVER_NAME);
135 135
136static int __init generic_onenand_init(void) 136static int __init generic_onenand_init(void)
137{ 137{
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index ac31f461cc1c..c849cacf4b2f 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -860,7 +860,7 @@ static void __exit omap2_onenand_exit(void)
860module_init(omap2_onenand_init); 860module_init(omap2_onenand_init);
861module_exit(omap2_onenand_exit); 861module_exit(omap2_onenand_exit);
862 862
863MODULE_ALIAS(DRIVER_NAME); 863MODULE_ALIAS("platform:" DRIVER_NAME);
864MODULE_LICENSE("GPL"); 864MODULE_LICENSE("GPL");
865MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 865MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
866MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3"); 866MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3");
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 67822cf6c025..ac0d6a8613b5 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -1258,7 +1258,7 @@ static struct mtd_blktrans_ops sm_ftl_ops = {
1258static __init int sm_module_init(void) 1258static __init int sm_module_init(void)
1259{ 1259{
1260 int error = 0; 1260 int error = 0;
1261 cache_flush_workqueue = create_freezeable_workqueue("smflush"); 1261 cache_flush_workqueue = create_freezable_workqueue("smflush");
1262 1262
1263 if (IS_ERR(cache_flush_workqueue)) 1263 if (IS_ERR(cache_flush_workqueue))
1264 return PTR_ERR(cache_flush_workqueue); 1264 return PTR_ERR(cache_flush_workqueue);
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 39214e512452..7ca0eded2561 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -425,11 +425,6 @@ static irqreturn_t ariadne_interrupt(int irq, void *data)
425 int csr0, boguscnt; 425 int csr0, boguscnt;
426 int handled = 0; 426 int handled = 0;
427 427
428 if (dev == NULL) {
429 printk(KERN_WARNING "ariadne_interrupt(): irq for unknown device.\n");
430 return IRQ_NONE;
431 }
432
433 lance->RAP = CSR0; /* PCnet-ISA Controller Status */ 428 lance->RAP = CSR0; /* PCnet-ISA Controller Status */
434 429
435 if (!(lance->RDP & INTR)) /* Check if any interrupt has been */ 430 if (!(lance->RDP & INTR)) /* Check if any interrupt has been */
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 653c62475cb6..8849699c66c4 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -22,7 +22,7 @@
22 * (you will need to reboot afterwards) */ 22 * (you will need to reboot afterwards) */
23/* #define BNX2X_STOP_ON_ERROR */ 23/* #define BNX2X_STOP_ON_ERROR */
24 24
25#define DRV_MODULE_VERSION "1.62.00-5" 25#define DRV_MODULE_VERSION "1.62.00-6"
26#define DRV_MODULE_RELDATE "2011/01/30" 26#define DRV_MODULE_RELDATE "2011/01/30"
27#define BNX2X_BC_VER 0x040200 27#define BNX2X_BC_VER 0x040200
28 28
@@ -1211,6 +1211,7 @@ struct bnx2x {
1211 /* DCBX Negotation results */ 1211 /* DCBX Negotation results */
1212 struct dcbx_features dcbx_local_feat; 1212 struct dcbx_features dcbx_local_feat;
1213 u32 dcbx_error; 1213 u32 dcbx_error;
1214 u32 pending_max;
1214}; 1215};
1215 1216
1216/** 1217/**
@@ -1613,19 +1614,23 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
1613#define BNX2X_BTR 4 1614#define BNX2X_BTR 4
1614#define MAX_SPQ_PENDING 8 1615#define MAX_SPQ_PENDING 8
1615 1616
1616 1617/* CMNG constants, as derived from system spec calculations */
1617/* CMNG constants 1618/* default MIN rate in case VNIC min rate is configured to zero - 100Mbps */
1618 derived from lab experiments, and not from system spec calculations !!! */ 1619#define DEF_MIN_RATE 100
1619#define DEF_MIN_RATE 100 1620/* resolution of the rate shaping timer - 400 usec */
1620/* resolution of the rate shaping timer - 100 usec */ 1621#define RS_PERIODIC_TIMEOUT_USEC 400
1621#define RS_PERIODIC_TIMEOUT_USEC 100
1622/* resolution of fairness algorithm in usecs -
1623 coefficient for calculating the actual t fair */
1624#define T_FAIR_COEF 10000000
1625/* number of bytes in single QM arbitration cycle - 1622/* number of bytes in single QM arbitration cycle -
1626 coefficient for calculating the fairness timer */ 1623 * coefficient for calculating the fairness timer */
1627#define QM_ARB_BYTES 40000 1624#define QM_ARB_BYTES 160000
1628#define FAIR_MEM 2 1625/* resolution of Min algorithm 1:100 */
1626#define MIN_RES 100
1627/* how many bytes above threshold for the minimal credit of Min algorithm*/
1628#define MIN_ABOVE_THRESH 32768
1629/* Fairness algorithm integration time coefficient -
1630 * for calculating the actual Tfair */
1631#define T_FAIR_COEF ((MIN_ABOVE_THRESH + QM_ARB_BYTES) * 8 * MIN_RES)
1632/* Memory of fairness algorithm . 2 cycles */
1633#define FAIR_MEM 2
1629 1634
1630 1635
1631#define ATTN_NIG_FOR_FUNC (1L << 8) 1636#define ATTN_NIG_FOR_FUNC (1L << 8)
diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index 710ce5d04c53..a71b32940533 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -259,10 +259,44 @@ static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
259#endif 259#endif
260} 260}
261 261
262/* Timestamp option length allowed for TPA aggregation:
263 *
264 * nop nop kind length echo val
265 */
266#define TPA_TSTAMP_OPT_LEN 12
267/**
268 * Calculate the approximate value of the MSS for this
269 * aggregation using the first packet of it.
270 *
271 * @param bp
272 * @param parsing_flags Parsing flags from the START CQE
273 * @param len_on_bd Total length of the first packet for the
274 * aggregation.
275 */
276static inline u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
277 u16 len_on_bd)
278{
279 /* TPA arrgregation won't have an IP options and TCP options
280 * other than timestamp.
281 */
282 u16 hdrs_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct tcphdr);
283
284
285 /* Check if there was a TCP timestamp, if there is it's will
286 * always be 12 bytes length: nop nop kind length echo val.
287 *
288 * Otherwise FW would close the aggregation.
289 */
290 if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
291 hdrs_len += TPA_TSTAMP_OPT_LEN;
292
293 return len_on_bd - hdrs_len;
294}
295
262static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, 296static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
263 struct sk_buff *skb, 297 struct sk_buff *skb,
264 struct eth_fast_path_rx_cqe *fp_cqe, 298 struct eth_fast_path_rx_cqe *fp_cqe,
265 u16 cqe_idx) 299 u16 cqe_idx, u16 parsing_flags)
266{ 300{
267 struct sw_rx_page *rx_pg, old_rx_pg; 301 struct sw_rx_page *rx_pg, old_rx_pg;
268 u16 len_on_bd = le16_to_cpu(fp_cqe->len_on_bd); 302 u16 len_on_bd = le16_to_cpu(fp_cqe->len_on_bd);
@@ -275,8 +309,8 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
275 309
276 /* This is needed in order to enable forwarding support */ 310 /* This is needed in order to enable forwarding support */
277 if (frag_size) 311 if (frag_size)
278 skb_shinfo(skb)->gso_size = min((u32)SGE_PAGE_SIZE, 312 skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp, parsing_flags,
279 max(frag_size, (u32)len_on_bd)); 313 len_on_bd);
280 314
281#ifdef BNX2X_STOP_ON_ERROR 315#ifdef BNX2X_STOP_ON_ERROR
282 if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) { 316 if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) {
@@ -344,6 +378,8 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
344 if (likely(new_skb)) { 378 if (likely(new_skb)) {
345 /* fix ip xsum and give it to the stack */ 379 /* fix ip xsum and give it to the stack */
346 /* (no need to map the new skb) */ 380 /* (no need to map the new skb) */
381 u16 parsing_flags =
382 le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags);
347 383
348 prefetch(skb); 384 prefetch(skb);
349 prefetch(((char *)(skb)) + L1_CACHE_BYTES); 385 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
@@ -373,9 +409,9 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
373 } 409 }
374 410
375 if (!bnx2x_fill_frag_skb(bp, fp, skb, 411 if (!bnx2x_fill_frag_skb(bp, fp, skb,
376 &cqe->fast_path_cqe, cqe_idx)) { 412 &cqe->fast_path_cqe, cqe_idx,
377 if ((le16_to_cpu(cqe->fast_path_cqe. 413 parsing_flags)) {
378 pars_flags.flags) & PARSING_FLAGS_VLAN)) 414 if (parsing_flags & PARSING_FLAGS_VLAN)
379 __vlan_hwaccel_put_tag(skb, 415 __vlan_hwaccel_put_tag(skb,
380 le16_to_cpu(cqe->fast_path_cqe. 416 le16_to_cpu(cqe->fast_path_cqe.
381 vlan_tag)); 417 vlan_tag));
@@ -703,19 +739,20 @@ u16 bnx2x_get_mf_speed(struct bnx2x *bp)
703{ 739{
704 u16 line_speed = bp->link_vars.line_speed; 740 u16 line_speed = bp->link_vars.line_speed;
705 if (IS_MF(bp)) { 741 if (IS_MF(bp)) {
706 u16 maxCfg = (bp->mf_config[BP_VN(bp)] & 742 u16 maxCfg = bnx2x_extract_max_cfg(bp,
707 FUNC_MF_CFG_MAX_BW_MASK) >> 743 bp->mf_config[BP_VN(bp)]);
708 FUNC_MF_CFG_MAX_BW_SHIFT; 744
709 /* Calculate the current MAX line speed limit for the DCC 745 /* Calculate the current MAX line speed limit for the MF
710 * capable devices 746 * devices
711 */ 747 */
712 if (IS_MF_SD(bp)) { 748 if (IS_MF_SI(bp))
749 line_speed = (line_speed * maxCfg) / 100;
750 else { /* SD mode */
713 u16 vn_max_rate = maxCfg * 100; 751 u16 vn_max_rate = maxCfg * 100;
714 752
715 if (vn_max_rate < line_speed) 753 if (vn_max_rate < line_speed)
716 line_speed = vn_max_rate; 754 line_speed = vn_max_rate;
717 } else /* IS_MF_SI(bp)) */ 755 }
718 line_speed = (line_speed * maxCfg) / 100;
719 } 756 }
720 757
721 return line_speed; 758 return line_speed;
@@ -959,6 +996,23 @@ void bnx2x_free_skbs(struct bnx2x *bp)
959 bnx2x_free_rx_skbs(bp); 996 bnx2x_free_rx_skbs(bp);
960} 997}
961 998
999void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
1000{
1001 /* load old values */
1002 u32 mf_cfg = bp->mf_config[BP_VN(bp)];
1003
1004 if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
1005 /* leave all but MAX value */
1006 mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
1007
1008 /* set new MAX value */
1009 mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
1010 & FUNC_MF_CFG_MAX_BW_MASK;
1011
1012 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
1013 }
1014}
1015
962static void bnx2x_free_msix_irqs(struct bnx2x *bp) 1016static void bnx2x_free_msix_irqs(struct bnx2x *bp)
963{ 1017{
964 int i, offset = 1; 1018 int i, offset = 1;
@@ -1427,6 +1481,11 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
1427 1481
1428 bnx2x_set_eth_mac(bp, 1); 1482 bnx2x_set_eth_mac(bp, 1);
1429 1483
1484 if (bp->pending_max) {
1485 bnx2x_update_max_mf_config(bp, bp->pending_max);
1486 bp->pending_max = 0;
1487 }
1488
1430 if (bp->port.pmf) 1489 if (bp->port.pmf)
1431 bnx2x_initial_phy_init(bp, load_mode); 1490 bnx2x_initial_phy_init(bp, load_mode);
1432 1491
diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h
index 03eb4d68e6bb..85ea7f26b51f 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/bnx2x/bnx2x_cmn.h
@@ -341,6 +341,15 @@ void bnx2x_dcbx_init(struct bnx2x *bp);
341 */ 341 */
342int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state); 342int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state);
343 343
344/**
345 * Updates MAX part of MF configuration in HW
346 * (if required)
347 *
348 * @param bp
349 * @param value
350 */
351void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value);
352
344/* dev_close main block */ 353/* dev_close main block */
345int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode); 354int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode);
346 355
@@ -1044,4 +1053,24 @@ static inline void storm_memset_cmng(struct bnx2x *bp,
1044void bnx2x_acquire_phy_lock(struct bnx2x *bp); 1053void bnx2x_acquire_phy_lock(struct bnx2x *bp);
1045void bnx2x_release_phy_lock(struct bnx2x *bp); 1054void bnx2x_release_phy_lock(struct bnx2x *bp);
1046 1055
1056/**
1057 * Extracts MAX BW part from MF configuration.
1058 *
1059 * @param bp
1060 * @param mf_cfg
1061 *
1062 * @return u16
1063 */
1064static inline u16 bnx2x_extract_max_cfg(struct bnx2x *bp, u32 mf_cfg)
1065{
1066 u16 max_cfg = (mf_cfg & FUNC_MF_CFG_MAX_BW_MASK) >>
1067 FUNC_MF_CFG_MAX_BW_SHIFT;
1068 if (!max_cfg) {
1069 BNX2X_ERR("Illegal configuration detected for Max BW - "
1070 "using 100 instead\n");
1071 max_cfg = 100;
1072 }
1073 return max_cfg;
1074}
1075
1047#endif /* BNX2X_CMN_H */ 1076#endif /* BNX2X_CMN_H */
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index 5b44a8b48509..7e92f9d0dcfd 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -238,7 +238,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
238 speed |= (cmd->speed_hi << 16); 238 speed |= (cmd->speed_hi << 16);
239 239
240 if (IS_MF_SI(bp)) { 240 if (IS_MF_SI(bp)) {
241 u32 param = 0; 241 u32 part;
242 u32 line_speed = bp->link_vars.line_speed; 242 u32 line_speed = bp->link_vars.line_speed;
243 243
244 /* use 10G if no link detected */ 244 /* use 10G if no link detected */
@@ -251,23 +251,22 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
251 REQ_BC_VER_4_SET_MF_BW); 251 REQ_BC_VER_4_SET_MF_BW);
252 return -EINVAL; 252 return -EINVAL;
253 } 253 }
254 if (line_speed < speed) { 254
255 BNX2X_DEV_INFO("New speed should be less or equal " 255 part = (speed * 100) / line_speed;
256 "to actual line speed\n"); 256
257 if (line_speed < speed || !part) {
258 BNX2X_DEV_INFO("Speed setting should be in a range "
259 "from 1%% to 100%% "
260 "of actual line speed\n");
257 return -EINVAL; 261 return -EINVAL;
258 } 262 }
259 /* load old values */
260 param = bp->mf_config[BP_VN(bp)];
261
262 /* leave only MIN value */
263 param &= FUNC_MF_CFG_MIN_BW_MASK;
264 263
265 /* set new MAX value */ 264 if (bp->state != BNX2X_STATE_OPEN)
266 param |= (((speed * 100) / line_speed) 265 /* store value for following "load" */
267 << FUNC_MF_CFG_MAX_BW_SHIFT) 266 bp->pending_max = part;
268 & FUNC_MF_CFG_MAX_BW_MASK; 267 else
268 bnx2x_update_max_mf_config(bp, part);
269 269
270 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, param);
271 return 0; 270 return 0;
272 } 271 }
273 272
@@ -1781,9 +1780,7 @@ static int bnx2x_test_nvram(struct bnx2x *bp)
1781 { 0x100, 0x350 }, /* manuf_info */ 1780 { 0x100, 0x350 }, /* manuf_info */
1782 { 0x450, 0xf0 }, /* feature_info */ 1781 { 0x450, 0xf0 }, /* feature_info */
1783 { 0x640, 0x64 }, /* upgrade_key_info */ 1782 { 0x640, 0x64 }, /* upgrade_key_info */
1784 { 0x6a4, 0x64 },
1785 { 0x708, 0x70 }, /* manuf_key_info */ 1783 { 0x708, 0x70 }, /* manuf_key_info */
1786 { 0x778, 0x70 },
1787 { 0, 0 } 1784 { 0, 0 }
1788 }; 1785 };
1789 __be32 buf[0x350 / 4]; 1786 __be32 buf[0x350 / 4];
@@ -1933,11 +1930,11 @@ static void bnx2x_self_test(struct net_device *dev,
1933 buf[4] = 1; 1930 buf[4] = 1;
1934 etest->flags |= ETH_TEST_FL_FAILED; 1931 etest->flags |= ETH_TEST_FL_FAILED;
1935 } 1932 }
1936 if (bp->port.pmf) 1933
1937 if (bnx2x_link_test(bp, is_serdes) != 0) { 1934 if (bnx2x_link_test(bp, is_serdes) != 0) {
1938 buf[5] = 1; 1935 buf[5] = 1;
1939 etest->flags |= ETH_TEST_FL_FAILED; 1936 etest->flags |= ETH_TEST_FL_FAILED;
1940 } 1937 }
1941 1938
1942#ifdef BNX2X_EXTRA_DEBUG 1939#ifdef BNX2X_EXTRA_DEBUG
1943 bnx2x_panic_dump(bp); 1940 bnx2x_panic_dump(bp);
diff --git a/drivers/net/bnx2x/bnx2x_init.h b/drivers/net/bnx2x/bnx2x_init.h
index 5a268e9a0895..fa6dbe3f2058 100644
--- a/drivers/net/bnx2x/bnx2x_init.h
+++ b/drivers/net/bnx2x/bnx2x_init.h
@@ -241,7 +241,7 @@ static const struct {
241 /* Block IGU, MISC, PXP and PXP2 parity errors as long as we don't 241 /* Block IGU, MISC, PXP and PXP2 parity errors as long as we don't
242 * want to handle "system kill" flow at the moment. 242 * want to handle "system kill" flow at the moment.
243 */ 243 */
244 BLOCK_PRTY_INFO(PXP, 0x3ffffff, 0x3ffffff, 0x3ffffff, 0x3ffffff), 244 BLOCK_PRTY_INFO(PXP, 0x7ffffff, 0x3ffffff, 0x3ffffff, 0x7ffffff),
245 BLOCK_PRTY_INFO_0(PXP2, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), 245 BLOCK_PRTY_INFO_0(PXP2, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
246 BLOCK_PRTY_INFO_1(PXP2, 0x7ff, 0x7f, 0x7f, 0x7ff), 246 BLOCK_PRTY_INFO_1(PXP2, 0x7ff, 0x7f, 0x7f, 0x7ff),
247 BLOCK_PRTY_INFO(HC, 0x7, 0x7, 0x7, 0), 247 BLOCK_PRTY_INFO(HC, 0x7, 0x7, 0x7, 0),
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index d584d32c747d..aa032339e321 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -1974,13 +1974,22 @@ static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
1974 vn_max_rate = 0; 1974 vn_max_rate = 0;
1975 1975
1976 } else { 1976 } else {
1977 u32 maxCfg = bnx2x_extract_max_cfg(bp, vn_cfg);
1978
1977 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >> 1979 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >>
1978 FUNC_MF_CFG_MIN_BW_SHIFT) * 100; 1980 FUNC_MF_CFG_MIN_BW_SHIFT) * 100;
1979 /* If min rate is zero - set it to 1 */ 1981 /* If fairness is enabled (not all min rates are zeroes) and
1982 if current min rate is zero - set it to 1.
1983 This is a requirement of the algorithm. */
1980 if (bp->vn_weight_sum && (vn_min_rate == 0)) 1984 if (bp->vn_weight_sum && (vn_min_rate == 0))
1981 vn_min_rate = DEF_MIN_RATE; 1985 vn_min_rate = DEF_MIN_RATE;
1982 vn_max_rate = ((vn_cfg & FUNC_MF_CFG_MAX_BW_MASK) >> 1986
1983 FUNC_MF_CFG_MAX_BW_SHIFT) * 100; 1987 if (IS_MF_SI(bp))
1988 /* maxCfg in percents of linkspeed */
1989 vn_max_rate = (bp->link_vars.line_speed * maxCfg) / 100;
1990 else
1991 /* maxCfg is absolute in 100Mb units */
1992 vn_max_rate = maxCfg * 100;
1984 } 1993 }
1985 1994
1986 DP(NETIF_MSG_IFUP, 1995 DP(NETIF_MSG_IFUP,
@@ -2006,7 +2015,8 @@ static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
2006 m_fair_vn.vn_credit_delta = 2015 m_fair_vn.vn_credit_delta =
2007 max_t(u32, (vn_min_rate * (T_FAIR_COEF / 2016 max_t(u32, (vn_min_rate * (T_FAIR_COEF /
2008 (8 * bp->vn_weight_sum))), 2017 (8 * bp->vn_weight_sum))),
2009 (bp->cmng.fair_vars.fair_threshold * 2)); 2018 (bp->cmng.fair_vars.fair_threshold +
2019 MIN_ABOVE_THRESH));
2010 DP(NETIF_MSG_IFUP, "m_fair_vn.vn_credit_delta %d\n", 2020 DP(NETIF_MSG_IFUP, "m_fair_vn.vn_credit_delta %d\n",
2011 m_fair_vn.vn_credit_delta); 2021 m_fair_vn.vn_credit_delta);
2012 } 2022 }
@@ -2082,8 +2092,9 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
2082 bnx2x_calc_vn_weight_sum(bp); 2092 bnx2x_calc_vn_weight_sum(bp);
2083 2093
2084 /* calculate and set min-max rate for each vn */ 2094 /* calculate and set min-max rate for each vn */
2085 for (vn = VN_0; vn < E1HVN_MAX; vn++) 2095 if (bp->port.pmf)
2086 bnx2x_init_vn_minmax(bp, vn); 2096 for (vn = VN_0; vn < E1HVN_MAX; vn++)
2097 bnx2x_init_vn_minmax(bp, vn);
2087 2098
2088 /* always enable rate shaping and fairness */ 2099 /* always enable rate shaping and fairness */
2089 bp->cmng.flags.cmng_enables |= 2100 bp->cmng.flags.cmng_enables |=
@@ -2152,13 +2163,6 @@ static void bnx2x_link_attn(struct bnx2x *bp)
2152 bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP); 2163 bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
2153 } 2164 }
2154 2165
2155 /* indicate link status only if link status actually changed */
2156 if (prev_link_status != bp->link_vars.link_status)
2157 bnx2x_link_report(bp);
2158
2159 if (IS_MF(bp))
2160 bnx2x_link_sync_notify(bp);
2161
2162 if (bp->link_vars.link_up && bp->link_vars.line_speed) { 2166 if (bp->link_vars.link_up && bp->link_vars.line_speed) {
2163 int cmng_fns = bnx2x_get_cmng_fns_mode(bp); 2167 int cmng_fns = bnx2x_get_cmng_fns_mode(bp);
2164 2168
@@ -2170,6 +2174,13 @@ static void bnx2x_link_attn(struct bnx2x *bp)
2170 DP(NETIF_MSG_IFUP, 2174 DP(NETIF_MSG_IFUP,
2171 "single function mode without fairness\n"); 2175 "single function mode without fairness\n");
2172 } 2176 }
2177
2178 if (IS_MF(bp))
2179 bnx2x_link_sync_notify(bp);
2180
2181 /* indicate link status only if link status actually changed */
2182 if (prev_link_status != bp->link_vars.link_status)
2183 bnx2x_link_report(bp);
2173} 2184}
2174 2185
2175void bnx2x__link_status_update(struct bnx2x *bp) 2186void bnx2x__link_status_update(struct bnx2x *bp)
diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c
index bda60d590fa8..3445ded6674f 100644
--- a/drivers/net/bnx2x/bnx2x_stats.c
+++ b/drivers/net/bnx2x/bnx2x_stats.c
@@ -1239,14 +1239,14 @@ void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event)
1239 if (unlikely(bp->panic)) 1239 if (unlikely(bp->panic))
1240 return; 1240 return;
1241 1241
1242 bnx2x_stats_stm[bp->stats_state][event].action(bp);
1243
1242 /* Protect a state change flow */ 1244 /* Protect a state change flow */
1243 spin_lock_bh(&bp->stats_lock); 1245 spin_lock_bh(&bp->stats_lock);
1244 state = bp->stats_state; 1246 state = bp->stats_state;
1245 bp->stats_state = bnx2x_stats_stm[state][event].next_state; 1247 bp->stats_state = bnx2x_stats_stm[state][event].next_state;
1246 spin_unlock_bh(&bp->stats_lock); 1248 spin_unlock_bh(&bp->stats_lock);
1247 1249
1248 bnx2x_stats_stm[state][event].action(bp);
1249
1250 if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp)) 1250 if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp))
1251 DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n", 1251 DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n",
1252 state, event, bp->stats_state); 1252 state, event, bp->stats_state);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 1024ae158227..a5d5d0b5b155 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -281,23 +281,23 @@ static inline int __check_agg_selection_timer(struct port *port)
281} 281}
282 282
283/** 283/**
284 * __get_rx_machine_lock - lock the port's RX machine 284 * __get_state_machine_lock - lock the port's state machines
285 * @port: the port we're looking at 285 * @port: the port we're looking at
286 * 286 *
287 */ 287 */
288static inline void __get_rx_machine_lock(struct port *port) 288static inline void __get_state_machine_lock(struct port *port)
289{ 289{
290 spin_lock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); 290 spin_lock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
291} 291}
292 292
293/** 293/**
294 * __release_rx_machine_lock - unlock the port's RX machine 294 * __release_state_machine_lock - unlock the port's state machines
295 * @port: the port we're looking at 295 * @port: the port we're looking at
296 * 296 *
297 */ 297 */
298static inline void __release_rx_machine_lock(struct port *port) 298static inline void __release_state_machine_lock(struct port *port)
299{ 299{
300 spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); 300 spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
301} 301}
302 302
303/** 303/**
@@ -388,14 +388,14 @@ static u8 __get_duplex(struct port *port)
388} 388}
389 389
390/** 390/**
391 * __initialize_port_locks - initialize a port's RX machine spinlock 391 * __initialize_port_locks - initialize a port's STATE machine spinlock
392 * @port: the port we're looking at 392 * @port: the port we're looking at
393 * 393 *
394 */ 394 */
395static inline void __initialize_port_locks(struct port *port) 395static inline void __initialize_port_locks(struct port *port)
396{ 396{
397 // make sure it isn't called twice 397 // make sure it isn't called twice
398 spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); 398 spin_lock_init(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
399} 399}
400 400
401//conversions 401//conversions
@@ -1025,9 +1025,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
1025{ 1025{
1026 rx_states_t last_state; 1026 rx_states_t last_state;
1027 1027
1028 // Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback)
1029 __get_rx_machine_lock(port);
1030
1031 // keep current State Machine state to compare later if it was changed 1028 // keep current State Machine state to compare later if it was changed
1032 last_state = port->sm_rx_state; 1029 last_state = port->sm_rx_state;
1033 1030
@@ -1133,7 +1130,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
1133 pr_err("%s: An illegal loopback occurred on adapter (%s).\n" 1130 pr_err("%s: An illegal loopback occurred on adapter (%s).\n"
1134 "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n", 1131 "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n",
1135 port->slave->dev->master->name, port->slave->dev->name); 1132 port->slave->dev->master->name, port->slave->dev->name);
1136 __release_rx_machine_lock(port);
1137 return; 1133 return;
1138 } 1134 }
1139 __update_selected(lacpdu, port); 1135 __update_selected(lacpdu, port);
@@ -1153,7 +1149,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
1153 break; 1149 break;
1154 } 1150 }
1155 } 1151 }
1156 __release_rx_machine_lock(port);
1157} 1152}
1158 1153
1159/** 1154/**
@@ -2155,6 +2150,12 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2155 goto re_arm; 2150 goto re_arm;
2156 } 2151 }
2157 2152
2153 /* Lock around state machines to protect data accessed
2154 * by all (e.g., port->sm_vars). ad_rx_machine may run
2155 * concurrently due to incoming LACPDU.
2156 */
2157 __get_state_machine_lock(port);
2158
2158 ad_rx_machine(NULL, port); 2159 ad_rx_machine(NULL, port);
2159 ad_periodic_machine(port); 2160 ad_periodic_machine(port);
2160 ad_port_selection_logic(port); 2161 ad_port_selection_logic(port);
@@ -2164,6 +2165,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2164 // turn off the BEGIN bit, since we already handled it 2165 // turn off the BEGIN bit, since we already handled it
2165 if (port->sm_vars & AD_PORT_BEGIN) 2166 if (port->sm_vars & AD_PORT_BEGIN)
2166 port->sm_vars &= ~AD_PORT_BEGIN; 2167 port->sm_vars &= ~AD_PORT_BEGIN;
2168
2169 __release_state_machine_lock(port);
2167 } 2170 }
2168 2171
2169re_arm: 2172re_arm:
@@ -2200,7 +2203,10 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u
2200 case AD_TYPE_LACPDU: 2203 case AD_TYPE_LACPDU:
2201 pr_debug("Received LACPDU on port %d\n", 2204 pr_debug("Received LACPDU on port %d\n",
2202 port->actor_port_number); 2205 port->actor_port_number);
2206 /* Protect against concurrent state machines */
2207 __get_state_machine_lock(port);
2203 ad_rx_machine(lacpdu, port); 2208 ad_rx_machine(lacpdu, port);
2209 __release_state_machine_lock(port);
2204 break; 2210 break;
2205 2211
2206 case AD_TYPE_MARKER: 2212 case AD_TYPE_MARKER:
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
index 2c46a154f2c6..b28baff70864 100644
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -264,7 +264,8 @@ struct ad_bond_info {
264struct ad_slave_info { 264struct ad_slave_info {
265 struct aggregator aggregator; // 802.3ad aggregator structure 265 struct aggregator aggregator; // 802.3ad aggregator structure
266 struct port port; // 802.3ad port structure 266 struct port port; // 802.3ad port structure
267 spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt 267 spinlock_t state_machine_lock; /* mutex state machines vs.
268 incoming LACPDU */
268 u16 id; 269 u16 id;
269}; 270};
270 271
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index 7ab534aee452..7513c4523ac4 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -940,7 +940,7 @@ static int mcp251x_open(struct net_device *net)
940 goto open_unlock; 940 goto open_unlock;
941 } 941 }
942 942
943 priv->wq = create_freezeable_workqueue("mcp251x_wq"); 943 priv->wq = create_freezable_workqueue("mcp251x_wq");
944 INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler); 944 INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler);
945 INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler); 945 INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler);
946 946
diff --git a/drivers/net/can/softing/Kconfig b/drivers/net/can/softing/Kconfig
index 8ba81b3ddd90..5de46a9a77bb 100644
--- a/drivers/net/can/softing/Kconfig
+++ b/drivers/net/can/softing/Kconfig
@@ -18,7 +18,7 @@ config CAN_SOFTING
18config CAN_SOFTING_CS 18config CAN_SOFTING_CS
19 tristate "Softing Gmbh CAN pcmcia cards" 19 tristate "Softing Gmbh CAN pcmcia cards"
20 depends on PCMCIA 20 depends on PCMCIA
21 select CAN_SOFTING 21 depends on CAN_SOFTING
22 ---help--- 22 ---help---
23 Support for PCMCIA cards from Softing Gmbh & some cards 23 Support for PCMCIA cards from Softing Gmbh & some cards
24 from Vector Gmbh. 24 from Vector Gmbh.
diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c
index 5157e15e96eb..aeea9f9ff6e8 100644
--- a/drivers/net/can/softing/softing_main.c
+++ b/drivers/net/can/softing/softing_main.c
@@ -633,6 +633,7 @@ static const struct net_device_ops softing_netdev_ops = {
633}; 633};
634 634
635static const struct can_bittiming_const softing_btr_const = { 635static const struct can_bittiming_const softing_btr_const = {
636 .name = "softing",
636 .tseg1_min = 1, 637 .tseg1_min = 1,
637 .tseg1_max = 16, 638 .tseg1_max = 16,
638 .tseg2_min = 1, 639 .tseg2_min = 1,
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index 7ff170cbc7dc..302be4aa69d6 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -2760,6 +2760,8 @@ static u32 cnic_service_bnx2_queues(struct cnic_dev *dev)
2760 u32 status_idx = (u16) *cp->kcq1.status_idx_ptr; 2760 u32 status_idx = (u16) *cp->kcq1.status_idx_ptr;
2761 int kcqe_cnt; 2761 int kcqe_cnt;
2762 2762
2763 /* status block index must be read before reading other fields */
2764 rmb();
2763 cp->kwq_con_idx = *cp->kwq_con_idx_ptr; 2765 cp->kwq_con_idx = *cp->kwq_con_idx_ptr;
2764 2766
2765 while ((kcqe_cnt = cnic_get_kcqes(dev, &cp->kcq1))) { 2767 while ((kcqe_cnt = cnic_get_kcqes(dev, &cp->kcq1))) {
@@ -2770,6 +2772,8 @@ static u32 cnic_service_bnx2_queues(struct cnic_dev *dev)
2770 barrier(); 2772 barrier();
2771 if (status_idx != *cp->kcq1.status_idx_ptr) { 2773 if (status_idx != *cp->kcq1.status_idx_ptr) {
2772 status_idx = (u16) *cp->kcq1.status_idx_ptr; 2774 status_idx = (u16) *cp->kcq1.status_idx_ptr;
2775 /* status block index must be read first */
2776 rmb();
2773 cp->kwq_con_idx = *cp->kwq_con_idx_ptr; 2777 cp->kwq_con_idx = *cp->kwq_con_idx_ptr;
2774 } else 2778 } else
2775 break; 2779 break;
@@ -2888,6 +2892,8 @@ static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info)
2888 u32 last_status = *info->status_idx_ptr; 2892 u32 last_status = *info->status_idx_ptr;
2889 int kcqe_cnt; 2893 int kcqe_cnt;
2890 2894
2895 /* status block index must be read before reading the KCQ */
2896 rmb();
2891 while ((kcqe_cnt = cnic_get_kcqes(dev, info))) { 2897 while ((kcqe_cnt = cnic_get_kcqes(dev, info))) {
2892 2898
2893 service_kcqes(dev, kcqe_cnt); 2899 service_kcqes(dev, kcqe_cnt);
@@ -2898,6 +2904,8 @@ static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info)
2898 break; 2904 break;
2899 2905
2900 last_status = *info->status_idx_ptr; 2906 last_status = *info->status_idx_ptr;
2907 /* status block index must be read before reading the KCQ */
2908 rmb();
2901 } 2909 }
2902 return last_status; 2910 return last_status;
2903} 2911}
@@ -2906,26 +2914,35 @@ static void cnic_service_bnx2x_bh(unsigned long data)
2906{ 2914{
2907 struct cnic_dev *dev = (struct cnic_dev *) data; 2915 struct cnic_dev *dev = (struct cnic_dev *) data;
2908 struct cnic_local *cp = dev->cnic_priv; 2916 struct cnic_local *cp = dev->cnic_priv;
2909 u32 status_idx; 2917 u32 status_idx, new_status_idx;
2910 2918
2911 if (unlikely(!test_bit(CNIC_F_CNIC_UP, &dev->flags))) 2919 if (unlikely(!test_bit(CNIC_F_CNIC_UP, &dev->flags)))
2912 return; 2920 return;
2913 2921
2914 status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq1); 2922 while (1) {
2923 status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq1);
2915 2924
2916 CNIC_WR16(dev, cp->kcq1.io_addr, cp->kcq1.sw_prod_idx + MAX_KCQ_IDX); 2925 CNIC_WR16(dev, cp->kcq1.io_addr,
2926 cp->kcq1.sw_prod_idx + MAX_KCQ_IDX);
2917 2927
2918 if (BNX2X_CHIP_IS_E2(cp->chip_id)) { 2928 if (!BNX2X_CHIP_IS_E2(cp->chip_id)) {
2919 status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq2); 2929 cnic_ack_bnx2x_int(dev, cp->bnx2x_igu_sb_id, USTORM_ID,
2930 status_idx, IGU_INT_ENABLE, 1);
2931 break;
2932 }
2933
2934 new_status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq2);
2935
2936 if (new_status_idx != status_idx)
2937 continue;
2920 2938
2921 CNIC_WR16(dev, cp->kcq2.io_addr, cp->kcq2.sw_prod_idx + 2939 CNIC_WR16(dev, cp->kcq2.io_addr, cp->kcq2.sw_prod_idx +
2922 MAX_KCQ_IDX); 2940 MAX_KCQ_IDX);
2923 2941
2924 cnic_ack_igu_sb(dev, cp->bnx2x_igu_sb_id, IGU_SEG_ACCESS_DEF, 2942 cnic_ack_igu_sb(dev, cp->bnx2x_igu_sb_id, IGU_SEG_ACCESS_DEF,
2925 status_idx, IGU_INT_ENABLE, 1); 2943 status_idx, IGU_INT_ENABLE, 1);
2926 } else { 2944
2927 cnic_ack_bnx2x_int(dev, cp->bnx2x_igu_sb_id, USTORM_ID, 2945 break;
2928 status_idx, IGU_INT_ENABLE, 1);
2929 } 2946 }
2930} 2947}
2931 2948
diff --git a/drivers/net/cxgb4/t4_msg.h b/drivers/net/cxgb4/t4_msg.h
index a550d0c706f3..eb71b8250b91 100644
--- a/drivers/net/cxgb4/t4_msg.h
+++ b/drivers/net/cxgb4/t4_msg.h
@@ -123,6 +123,7 @@ enum {
123 ULP_MODE_NONE = 0, 123 ULP_MODE_NONE = 0,
124 ULP_MODE_ISCSI = 2, 124 ULP_MODE_ISCSI = 2,
125 ULP_MODE_RDMA = 4, 125 ULP_MODE_RDMA = 4,
126 ULP_MODE_TCPDDP = 5,
126 ULP_MODE_FCOE = 6, 127 ULP_MODE_FCOE = 6,
127}; 128};
128 129
diff --git a/drivers/net/cxgb4vf/cxgb4vf_main.c b/drivers/net/cxgb4vf/cxgb4vf_main.c
index 56166ae2059f..6aad64df4dcb 100644
--- a/drivers/net/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/cxgb4vf/cxgb4vf_main.c
@@ -2040,7 +2040,7 @@ static int __devinit setup_debugfs(struct adapter *adapter)
2040{ 2040{
2041 int i; 2041 int i;
2042 2042
2043 BUG_ON(adapter->debugfs_root == NULL); 2043 BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2044 2044
2045 /* 2045 /*
2046 * Debugfs support is best effort. 2046 * Debugfs support is best effort.
@@ -2061,7 +2061,7 @@ static int __devinit setup_debugfs(struct adapter *adapter)
2061 */ 2061 */
2062static void cleanup_debugfs(struct adapter *adapter) 2062static void cleanup_debugfs(struct adapter *adapter)
2063{ 2063{
2064 BUG_ON(adapter->debugfs_root == NULL); 2064 BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2065 2065
2066 /* 2066 /*
2067 * Unlike our sister routine cleanup_proc(), we don't need to remove 2067 * Unlike our sister routine cleanup_proc(), we don't need to remove
@@ -2489,17 +2489,6 @@ static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
2489 struct net_device *netdev; 2489 struct net_device *netdev;
2490 2490
2491 /* 2491 /*
2492 * Vet our module parameters.
2493 */
2494 if (msi != MSI_MSIX && msi != MSI_MSI) {
2495 dev_err(&pdev->dev, "bad module parameter msi=%d; must be %d"
2496 " (MSI-X or MSI) or %d (MSI)\n", msi, MSI_MSIX,
2497 MSI_MSI);
2498 err = -EINVAL;
2499 goto err_out;
2500 }
2501
2502 /*
2503 * Print our driver banner the first time we're called to initialize a 2492 * Print our driver banner the first time we're called to initialize a
2504 * device. 2493 * device.
2505 */ 2494 */
@@ -2711,11 +2700,11 @@ static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
2711 /* 2700 /*
2712 * Set up our debugfs entries. 2701 * Set up our debugfs entries.
2713 */ 2702 */
2714 if (cxgb4vf_debugfs_root) { 2703 if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2715 adapter->debugfs_root = 2704 adapter->debugfs_root =
2716 debugfs_create_dir(pci_name(pdev), 2705 debugfs_create_dir(pci_name(pdev),
2717 cxgb4vf_debugfs_root); 2706 cxgb4vf_debugfs_root);
2718 if (adapter->debugfs_root == NULL) 2707 if (IS_ERR_OR_NULL(adapter->debugfs_root))
2719 dev_warn(&pdev->dev, "could not create debugfs" 2708 dev_warn(&pdev->dev, "could not create debugfs"
2720 " directory"); 2709 " directory");
2721 else 2710 else
@@ -2770,7 +2759,7 @@ static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
2770 */ 2759 */
2771 2760
2772err_free_debugfs: 2761err_free_debugfs:
2773 if (adapter->debugfs_root) { 2762 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2774 cleanup_debugfs(adapter); 2763 cleanup_debugfs(adapter);
2775 debugfs_remove_recursive(adapter->debugfs_root); 2764 debugfs_remove_recursive(adapter->debugfs_root);
2776 } 2765 }
@@ -2802,7 +2791,6 @@ err_release_regions:
2802err_disable_device: 2791err_disable_device:
2803 pci_disable_device(pdev); 2792 pci_disable_device(pdev);
2804 2793
2805err_out:
2806 return err; 2794 return err;
2807} 2795}
2808 2796
@@ -2840,7 +2828,7 @@ static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev)
2840 /* 2828 /*
2841 * Tear down our debugfs entries. 2829 * Tear down our debugfs entries.
2842 */ 2830 */
2843 if (adapter->debugfs_root) { 2831 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2844 cleanup_debugfs(adapter); 2832 cleanup_debugfs(adapter);
2845 debugfs_remove_recursive(adapter->debugfs_root); 2833 debugfs_remove_recursive(adapter->debugfs_root);
2846 } 2834 }
@@ -2874,6 +2862,46 @@ static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev)
2874} 2862}
2875 2863
2876/* 2864/*
2865 * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
2866 * delivery.
2867 */
2868static void __devexit cxgb4vf_pci_shutdown(struct pci_dev *pdev)
2869{
2870 struct adapter *adapter;
2871 int pidx;
2872
2873 adapter = pci_get_drvdata(pdev);
2874 if (!adapter)
2875 return;
2876
2877 /*
2878 * Disable all Virtual Interfaces. This will shut down the
2879 * delivery of all ingress packets into the chip for these
2880 * Virtual Interfaces.
2881 */
2882 for_each_port(adapter, pidx) {
2883 struct net_device *netdev;
2884 struct port_info *pi;
2885
2886 if (!test_bit(pidx, &adapter->registered_device_map))
2887 continue;
2888
2889 netdev = adapter->port[pidx];
2890 if (!netdev)
2891 continue;
2892
2893 pi = netdev_priv(netdev);
2894 t4vf_enable_vi(adapter, pi->viid, false, false);
2895 }
2896
2897 /*
2898 * Free up all Queues which will prevent further DMA and
2899 * Interrupts allowing various internal pathways to drain.
2900 */
2901 t4vf_free_sge_resources(adapter);
2902}
2903
2904/*
2877 * PCI Device registration data structures. 2905 * PCI Device registration data structures.
2878 */ 2906 */
2879#define CH_DEVICE(devid, idx) \ 2907#define CH_DEVICE(devid, idx) \
@@ -2906,6 +2934,7 @@ static struct pci_driver cxgb4vf_driver = {
2906 .id_table = cxgb4vf_pci_tbl, 2934 .id_table = cxgb4vf_pci_tbl,
2907 .probe = cxgb4vf_pci_probe, 2935 .probe = cxgb4vf_pci_probe,
2908 .remove = __devexit_p(cxgb4vf_pci_remove), 2936 .remove = __devexit_p(cxgb4vf_pci_remove),
2937 .shutdown = __devexit_p(cxgb4vf_pci_shutdown),
2909}; 2938};
2910 2939
2911/* 2940/*
@@ -2915,14 +2944,25 @@ static int __init cxgb4vf_module_init(void)
2915{ 2944{
2916 int ret; 2945 int ret;
2917 2946
2947 /*
2948 * Vet our module parameters.
2949 */
2950 if (msi != MSI_MSIX && msi != MSI_MSI) {
2951 printk(KERN_WARNING KBUILD_MODNAME
2952 ": bad module parameter msi=%d; must be %d"
2953 " (MSI-X or MSI) or %d (MSI)\n",
2954 msi, MSI_MSIX, MSI_MSI);
2955 return -EINVAL;
2956 }
2957
2918 /* Debugfs support is optional, just warn if this fails */ 2958 /* Debugfs support is optional, just warn if this fails */
2919 cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL); 2959 cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
2920 if (!cxgb4vf_debugfs_root) 2960 if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2921 printk(KERN_WARNING KBUILD_MODNAME ": could not create" 2961 printk(KERN_WARNING KBUILD_MODNAME ": could not create"
2922 " debugfs entry, continuing\n"); 2962 " debugfs entry, continuing\n");
2923 2963
2924 ret = pci_register_driver(&cxgb4vf_driver); 2964 ret = pci_register_driver(&cxgb4vf_driver);
2925 if (ret < 0) 2965 if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2926 debugfs_remove(cxgb4vf_debugfs_root); 2966 debugfs_remove(cxgb4vf_debugfs_root);
2927 return ret; 2967 return ret;
2928} 2968}
diff --git a/drivers/net/cxgb4vf/t4vf_hw.c b/drivers/net/cxgb4vf/t4vf_hw.c
index 0f51c80475ce..192db226ec7f 100644
--- a/drivers/net/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/cxgb4vf/t4vf_hw.c
@@ -171,7 +171,7 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
171 delay_idx = 0; 171 delay_idx = 0;
172 ms = delay[0]; 172 ms = delay[0];
173 173
174 for (i = 0; i < 500; i += ms) { 174 for (i = 0; i < FW_CMD_MAX_TIMEOUT; i += ms) {
175 if (sleep_ok) { 175 if (sleep_ok) {
176 ms = delay[delay_idx]; 176 ms = delay[delay_idx];
177 if (delay_idx < ARRAY_SIZE(delay) - 1) 177 if (delay_idx < ARRAY_SIZE(delay) - 1)
diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 2a628d17d178..7018bfe408a4 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -1008,7 +1008,7 @@ static void emac_rx_handler(void *token, int len, int status)
1008 int ret; 1008 int ret;
1009 1009
1010 /* free and bail if we are shutting down */ 1010 /* free and bail if we are shutting down */
1011 if (unlikely(!netif_running(ndev))) { 1011 if (unlikely(!netif_running(ndev) || !netif_carrier_ok(ndev))) {
1012 dev_kfree_skb_any(skb); 1012 dev_kfree_skb_any(skb);
1013 return; 1013 return;
1014 } 1014 }
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 2d4c4fc1d900..461dd6f905f7 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -802,10 +802,7 @@ dm9000_init_dm9000(struct net_device *dev)
802 /* Checksum mode */ 802 /* Checksum mode */
803 dm9000_set_rx_csum_unlocked(dev, db->rx_csum); 803 dm9000_set_rx_csum_unlocked(dev, db->rx_csum);
804 804
805 /* GPIO0 on pre-activate PHY */
806 iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */
807 iow(db, DM9000_GPCR, GPCR_GEP_CNTL); /* Let GPIO0 output */ 805 iow(db, DM9000_GPCR, GPCR_GEP_CNTL); /* Let GPIO0 output */
808 iow(db, DM9000_GPR, 0); /* Enable PHY */
809 806
810 ncr = (db->flags & DM9000_PLATF_EXT_PHY) ? NCR_EXT_PHY : 0; 807 ncr = (db->flags & DM9000_PLATF_EXT_PHY) ? NCR_EXT_PHY : 0;
811 808
@@ -852,8 +849,8 @@ static void dm9000_timeout(struct net_device *dev)
852 unsigned long flags; 849 unsigned long flags;
853 850
854 /* Save previous register address */ 851 /* Save previous register address */
855 reg_save = readb(db->io_addr);
856 spin_lock_irqsave(&db->lock, flags); 852 spin_lock_irqsave(&db->lock, flags);
853 reg_save = readb(db->io_addr);
857 854
858 netif_stop_queue(dev); 855 netif_stop_queue(dev);
859 dm9000_reset(db); 856 dm9000_reset(db);
@@ -1194,6 +1191,10 @@ dm9000_open(struct net_device *dev)
1194 if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev)) 1191 if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev))
1195 return -EAGAIN; 1192 return -EAGAIN;
1196 1193
1194 /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
1195 iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */
1196 mdelay(1); /* delay needs by DM9000B */
1197
1197 /* Initialize DM9000 board */ 1198 /* Initialize DM9000 board */
1198 dm9000_reset(db); 1199 dm9000_reset(db);
1199 dm9000_init_dm9000(dev); 1200 dm9000_init_dm9000(dev);
diff --git a/drivers/net/dnet.c b/drivers/net/dnet.c
index 9d8a20b72fa9..8318ea06cb6d 100644
--- a/drivers/net/dnet.c
+++ b/drivers/net/dnet.c
@@ -337,8 +337,6 @@ static int dnet_mii_init(struct dnet *bp)
337 for (i = 0; i < PHY_MAX_ADDR; i++) 337 for (i = 0; i < PHY_MAX_ADDR; i++)
338 bp->mii_bus->irq[i] = PHY_POLL; 338 bp->mii_bus->irq[i] = PHY_POLL;
339 339
340 platform_set_drvdata(bp->dev, bp->mii_bus);
341
342 if (mdiobus_register(bp->mii_bus)) { 340 if (mdiobus_register(bp->mii_bus)) {
343 err = -ENXIO; 341 err = -ENXIO;
344 goto err_out_free_mdio_irq; 342 goto err_out_free_mdio_irq;
@@ -863,6 +861,7 @@ static int __devinit dnet_probe(struct platform_device *pdev)
863 bp = netdev_priv(dev); 861 bp = netdev_priv(dev);
864 bp->dev = dev; 862 bp->dev = dev;
865 863
864 platform_set_drvdata(pdev, dev);
866 SET_NETDEV_DEV(dev, &pdev->dev); 865 SET_NETDEV_DEV(dev, &pdev->dev);
867 866
868 spin_lock_init(&bp->lock); 867 spin_lock_init(&bp->lock);
diff --git a/drivers/net/e1000/e1000_osdep.h b/drivers/net/e1000/e1000_osdep.h
index 55c1711f1688..33e7c45a4fe4 100644
--- a/drivers/net/e1000/e1000_osdep.h
+++ b/drivers/net/e1000/e1000_osdep.h
@@ -42,7 +42,8 @@
42#define GBE_CONFIG_RAM_BASE \ 42#define GBE_CONFIG_RAM_BASE \
43 ((unsigned int)(CONFIG_RAM_BASE + GBE_CONFIG_OFFSET)) 43 ((unsigned int)(CONFIG_RAM_BASE + GBE_CONFIG_OFFSET))
44 44
45#define GBE_CONFIG_BASE_VIRT phys_to_virt(GBE_CONFIG_RAM_BASE) 45#define GBE_CONFIG_BASE_VIRT \
46 ((void __iomem *)phys_to_virt(GBE_CONFIG_RAM_BASE))
46 47
47#define GBE_CONFIG_FLASH_WRITE(base, offset, count, data) \ 48#define GBE_CONFIG_FLASH_WRITE(base, offset, count, data) \
48 (iowrite16_rep(base + offset, data, count)) 49 (iowrite16_rep(base + offset, data, count))
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 3065870cf2a7..6d513a383340 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -937,6 +937,9 @@ static void e1000_print_hw_hang(struct work_struct *work)
937 u16 phy_status, phy_1000t_status, phy_ext_status; 937 u16 phy_status, phy_1000t_status, phy_ext_status;
938 u16 pci_status; 938 u16 pci_status;
939 939
940 if (test_bit(__E1000_DOWN, &adapter->state))
941 return;
942
940 e1e_rphy(hw, PHY_STATUS, &phy_status); 943 e1e_rphy(hw, PHY_STATUS, &phy_status);
941 e1e_rphy(hw, PHY_1000T_STATUS, &phy_1000t_status); 944 e1e_rphy(hw, PHY_1000T_STATUS, &phy_1000t_status);
942 e1e_rphy(hw, PHY_EXT_STATUS, &phy_ext_status); 945 e1e_rphy(hw, PHY_EXT_STATUS, &phy_ext_status);
@@ -1506,6 +1509,9 @@ static void e1000e_downshift_workaround(struct work_struct *work)
1506 struct e1000_adapter *adapter = container_of(work, 1509 struct e1000_adapter *adapter = container_of(work,
1507 struct e1000_adapter, downshift_task); 1510 struct e1000_adapter, downshift_task);
1508 1511
1512 if (test_bit(__E1000_DOWN, &adapter->state))
1513 return;
1514
1509 e1000e_gig_downshift_workaround_ich8lan(&adapter->hw); 1515 e1000e_gig_downshift_workaround_ich8lan(&adapter->hw);
1510} 1516}
1511 1517
@@ -3338,6 +3344,21 @@ int e1000e_up(struct e1000_adapter *adapter)
3338 return 0; 3344 return 0;
3339} 3345}
3340 3346
3347static void e1000e_flush_descriptors(struct e1000_adapter *adapter)
3348{
3349 struct e1000_hw *hw = &adapter->hw;
3350
3351 if (!(adapter->flags2 & FLAG2_DMA_BURST))
3352 return;
3353
3354 /* flush pending descriptor writebacks to memory */
3355 ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
3356 ew32(RDTR, adapter->rx_int_delay | E1000_RDTR_FPD);
3357
3358 /* execute the writes immediately */
3359 e1e_flush();
3360}
3361
3341void e1000e_down(struct e1000_adapter *adapter) 3362void e1000e_down(struct e1000_adapter *adapter)
3342{ 3363{
3343 struct net_device *netdev = adapter->netdev; 3364 struct net_device *netdev = adapter->netdev;
@@ -3377,6 +3398,9 @@ void e1000e_down(struct e1000_adapter *adapter)
3377 3398
3378 if (!pci_channel_offline(adapter->pdev)) 3399 if (!pci_channel_offline(adapter->pdev))
3379 e1000e_reset(adapter); 3400 e1000e_reset(adapter);
3401
3402 e1000e_flush_descriptors(adapter);
3403
3380 e1000_clean_tx_ring(adapter); 3404 e1000_clean_tx_ring(adapter);
3381 e1000_clean_rx_ring(adapter); 3405 e1000_clean_rx_ring(adapter);
3382 3406
@@ -3765,6 +3789,10 @@ static void e1000e_update_phy_task(struct work_struct *work)
3765{ 3789{
3766 struct e1000_adapter *adapter = container_of(work, 3790 struct e1000_adapter *adapter = container_of(work,
3767 struct e1000_adapter, update_phy_task); 3791 struct e1000_adapter, update_phy_task);
3792
3793 if (test_bit(__E1000_DOWN, &adapter->state))
3794 return;
3795
3768 e1000_get_phy_info(&adapter->hw); 3796 e1000_get_phy_info(&adapter->hw);
3769} 3797}
3770 3798
@@ -3775,6 +3803,10 @@ static void e1000e_update_phy_task(struct work_struct *work)
3775static void e1000_update_phy_info(unsigned long data) 3803static void e1000_update_phy_info(unsigned long data)
3776{ 3804{
3777 struct e1000_adapter *adapter = (struct e1000_adapter *) data; 3805 struct e1000_adapter *adapter = (struct e1000_adapter *) data;
3806
3807 if (test_bit(__E1000_DOWN, &adapter->state))
3808 return;
3809
3778 schedule_work(&adapter->update_phy_task); 3810 schedule_work(&adapter->update_phy_task);
3779} 3811}
3780 3812
@@ -4149,6 +4181,9 @@ static void e1000_watchdog_task(struct work_struct *work)
4149 u32 link, tctl; 4181 u32 link, tctl;
4150 int tx_pending = 0; 4182 int tx_pending = 0;
4151 4183
4184 if (test_bit(__E1000_DOWN, &adapter->state))
4185 return;
4186
4152 link = e1000e_has_link(adapter); 4187 link = e1000e_has_link(adapter);
4153 if ((netif_carrier_ok(netdev)) && link) { 4188 if ((netif_carrier_ok(netdev)) && link) {
4154 /* Cancel scheduled suspend requests. */ 4189 /* Cancel scheduled suspend requests. */
@@ -4337,19 +4372,12 @@ link_up:
4337 else 4372 else
4338 ew32(ICS, E1000_ICS_RXDMT0); 4373 ew32(ICS, E1000_ICS_RXDMT0);
4339 4374
4375 /* flush pending descriptors to memory before detecting Tx hang */
4376 e1000e_flush_descriptors(adapter);
4377
4340 /* Force detection of hung controller every watchdog period */ 4378 /* Force detection of hung controller every watchdog period */
4341 adapter->detect_tx_hung = 1; 4379 adapter->detect_tx_hung = 1;
4342 4380
4343 /* flush partial descriptors to memory before detecting Tx hang */
4344 if (adapter->flags2 & FLAG2_DMA_BURST) {
4345 ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
4346 ew32(RDTR, adapter->rx_int_delay | E1000_RDTR_FPD);
4347 /*
4348 * no need to flush the writes because the timeout code does
4349 * an er32 first thing
4350 */
4351 }
4352
4353 /* 4381 /*
4354 * With 82571 controllers, LAA may be overwritten due to controller 4382 * With 82571 controllers, LAA may be overwritten due to controller
4355 * reset from the other port. Set the appropriate LAA in RAR[0] 4383 * reset from the other port. Set the appropriate LAA in RAR[0]
@@ -4887,6 +4915,10 @@ static void e1000_reset_task(struct work_struct *work)
4887 struct e1000_adapter *adapter; 4915 struct e1000_adapter *adapter;
4888 adapter = container_of(work, struct e1000_adapter, reset_task); 4916 adapter = container_of(work, struct e1000_adapter, reset_task);
4889 4917
4918 /* don't run the task if already down */
4919 if (test_bit(__E1000_DOWN, &adapter->state))
4920 return;
4921
4890 if (!((adapter->flags & FLAG_RX_NEEDS_RESTART) && 4922 if (!((adapter->flags & FLAG_RX_NEEDS_RESTART) &&
4891 (adapter->flags & FLAG_RX_RESTART_NOW))) { 4923 (adapter->flags & FLAG_RX_RESTART_NOW))) {
4892 e1000e_dump(adapter); 4924 e1000e_dump(adapter);
@@ -5306,7 +5338,7 @@ void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
5306 __e1000e_disable_aspm(pdev, state); 5338 __e1000e_disable_aspm(pdev, state);
5307} 5339}
5308 5340
5309#ifdef CONFIG_PM_OPS 5341#ifdef CONFIG_PM
5310static bool e1000e_pm_ready(struct e1000_adapter *adapter) 5342static bool e1000e_pm_ready(struct e1000_adapter *adapter)
5311{ 5343{
5312 return !!adapter->tx_ring->buffer_info; 5344 return !!adapter->tx_ring->buffer_info;
@@ -5457,7 +5489,7 @@ static int e1000_runtime_resume(struct device *dev)
5457 return __e1000_resume(pdev); 5489 return __e1000_resume(pdev);
5458} 5490}
5459#endif /* CONFIG_PM_RUNTIME */ 5491#endif /* CONFIG_PM_RUNTIME */
5460#endif /* CONFIG_PM_OPS */ 5492#endif /* CONFIG_PM */
5461 5493
5462static void e1000_shutdown(struct pci_dev *pdev) 5494static void e1000_shutdown(struct pci_dev *pdev)
5463{ 5495{
@@ -5935,7 +5967,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
5935 /* APME bit in EEPROM is mapped to WUC.APME */ 5967 /* APME bit in EEPROM is mapped to WUC.APME */
5936 eeprom_data = er32(WUC); 5968 eeprom_data = er32(WUC);
5937 eeprom_apme_mask = E1000_WUC_APME; 5969 eeprom_apme_mask = E1000_WUC_APME;
5938 if (eeprom_data & E1000_WUC_PHY_WAKE) 5970 if ((hw->mac.type > e1000_ich10lan) &&
5971 (eeprom_data & E1000_WUC_PHY_WAKE))
5939 adapter->flags2 |= FLAG2_HAS_PHY_WAKEUP; 5972 adapter->flags2 |= FLAG2_HAS_PHY_WAKEUP;
5940 } else if (adapter->flags & FLAG_APME_IN_CTRL3) { 5973 } else if (adapter->flags & FLAG_APME_IN_CTRL3) {
5941 if (adapter->flags & FLAG_APME_CHECK_PORT_B && 5974 if (adapter->flags & FLAG_APME_CHECK_PORT_B &&
@@ -6163,7 +6196,7 @@ static DEFINE_PCI_DEVICE_TABLE(e1000_pci_tbl) = {
6163}; 6196};
6164MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); 6197MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
6165 6198
6166#ifdef CONFIG_PM_OPS 6199#ifdef CONFIG_PM
6167static const struct dev_pm_ops e1000_pm_ops = { 6200static const struct dev_pm_ops e1000_pm_ops = {
6168 SET_SYSTEM_SLEEP_PM_OPS(e1000_suspend, e1000_resume) 6201 SET_SYSTEM_SLEEP_PM_OPS(e1000_suspend, e1000_resume)
6169 SET_RUNTIME_PM_OPS(e1000_runtime_suspend, 6202 SET_RUNTIME_PM_OPS(e1000_runtime_suspend,
@@ -6177,7 +6210,7 @@ static struct pci_driver e1000_driver = {
6177 .id_table = e1000_pci_tbl, 6210 .id_table = e1000_pci_tbl,
6178 .probe = e1000_probe, 6211 .probe = e1000_probe,
6179 .remove = __devexit_p(e1000_remove), 6212 .remove = __devexit_p(e1000_remove),
6180#ifdef CONFIG_PM_OPS 6213#ifdef CONFIG_PM
6181 .driver.pm = &e1000_pm_ops, 6214 .driver.pm = &e1000_pm_ops,
6182#endif 6215#endif
6183 .shutdown = e1000_shutdown, 6216 .shutdown = e1000_shutdown,
diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index b79d7e1555d5..db0290f05bdf 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -1163,15 +1163,11 @@ static int ethoc_resume(struct platform_device *pdev)
1163# define ethoc_resume NULL 1163# define ethoc_resume NULL
1164#endif 1164#endif
1165 1165
1166#ifdef CONFIG_OF
1167static struct of_device_id ethoc_match[] = { 1166static struct of_device_id ethoc_match[] = {
1168 { 1167 { .compatible = "opencores,ethoc", },
1169 .compatible = "opencores,ethoc",
1170 },
1171 {}, 1168 {},
1172}; 1169};
1173MODULE_DEVICE_TABLE(of, ethoc_match); 1170MODULE_DEVICE_TABLE(of, ethoc_match);
1174#endif
1175 1171
1176static struct platform_driver ethoc_driver = { 1172static struct platform_driver ethoc_driver = {
1177 .probe = ethoc_probe, 1173 .probe = ethoc_probe,
@@ -1181,9 +1177,7 @@ static struct platform_driver ethoc_driver = {
1181 .driver = { 1177 .driver = {
1182 .name = "ethoc", 1178 .name = "ethoc",
1183 .owner = THIS_MODULE, 1179 .owner = THIS_MODULE,
1184#ifdef CONFIG_OF
1185 .of_match_table = ethoc_match, 1180 .of_match_table = ethoc_match,
1186#endif
1187 }, 1181 },
1188}; 1182};
1189 1183
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 2a71373719ae..cd0282d5d40f 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -74,7 +74,8 @@ static struct platform_device_id fec_devtype[] = {
74 }, { 74 }, {
75 .name = "imx28-fec", 75 .name = "imx28-fec",
76 .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME, 76 .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME,
77 } 77 },
78 { }
78}; 79};
79 80
80static unsigned char macaddr[ETH_ALEN]; 81static unsigned char macaddr[ETH_ALEN];
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index af09296ef0dd..9c0b1bac6af6 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -5645,6 +5645,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
5645 goto out_error; 5645 goto out_error;
5646 } 5646 }
5647 5647
5648 netif_carrier_off(dev);
5649
5648 dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n", 5650 dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
5649 dev->name, np->phy_oui, np->phyaddr, dev->dev_addr); 5651 dev->name, np->phy_oui, np->phyaddr, dev->dev_addr);
5650 5652
diff --git a/drivers/net/igbvf/vf.c b/drivers/net/igbvf/vf.c
index 74486a8b009a..af3822f9ea9a 100644
--- a/drivers/net/igbvf/vf.c
+++ b/drivers/net/igbvf/vf.c
@@ -220,7 +220,7 @@ static u32 e1000_hash_mc_addr_vf(struct e1000_hw *hw, u8 *mc_addr)
220 * The parameter rar_count will usually be hw->mac.rar_entry_count 220 * The parameter rar_count will usually be hw->mac.rar_entry_count
221 * unless there are workarounds that change this. 221 * unless there are workarounds that change this.
222 **/ 222 **/
223void e1000_update_mc_addr_list_vf(struct e1000_hw *hw, 223static void e1000_update_mc_addr_list_vf(struct e1000_hw *hw,
224 u8 *mc_addr_list, u32 mc_addr_count, 224 u8 *mc_addr_list, u32 mc_addr_count,
225 u32 rar_used_count, u32 rar_count) 225 u32 rar_used_count, u32 rar_count)
226{ 226{
diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c
index 8753980668c7..c54a88274d51 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ixgbe/ixgbe_fcoe.c
@@ -159,7 +159,7 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
159 struct scatterlist *sg; 159 struct scatterlist *sg;
160 unsigned int i, j, dmacount; 160 unsigned int i, j, dmacount;
161 unsigned int len; 161 unsigned int len;
162 static const unsigned int bufflen = 4096; 162 static const unsigned int bufflen = IXGBE_FCBUFF_MIN;
163 unsigned int firstoff = 0; 163 unsigned int firstoff = 0;
164 unsigned int lastsize; 164 unsigned int lastsize;
165 unsigned int thisoff = 0; 165 unsigned int thisoff = 0;
@@ -254,6 +254,24 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
254 /* only the last buffer may have non-full bufflen */ 254 /* only the last buffer may have non-full bufflen */
255 lastsize = thisoff + thislen; 255 lastsize = thisoff + thislen;
256 256
257 /*
258 * lastsize can not be buffer len.
259 * If it is then adding another buffer with lastsize = 1.
260 */
261 if (lastsize == bufflen) {
262 if (j >= IXGBE_BUFFCNT_MAX) {
263 e_err(drv, "xid=%x:%d,%d,%d:addr=%llx "
264 "not enough user buffers. We need an extra "
265 "buffer because lastsize is bufflen.\n",
266 xid, i, j, dmacount, (u64)addr);
267 goto out_noddp_free;
268 }
269
270 ddp->udl[j] = (u64)(fcoe->extra_ddp_buffer_dma);
271 j++;
272 lastsize = 1;
273 }
274
257 fcbuff = (IXGBE_FCBUFF_4KB << IXGBE_FCBUFF_BUFFSIZE_SHIFT); 275 fcbuff = (IXGBE_FCBUFF_4KB << IXGBE_FCBUFF_BUFFSIZE_SHIFT);
258 fcbuff |= ((j & 0xff) << IXGBE_FCBUFF_BUFFCNT_SHIFT); 276 fcbuff |= ((j & 0xff) << IXGBE_FCBUFF_BUFFCNT_SHIFT);
259 fcbuff |= (firstoff << IXGBE_FCBUFF_OFFSET_SHIFT); 277 fcbuff |= (firstoff << IXGBE_FCBUFF_OFFSET_SHIFT);
@@ -532,6 +550,24 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
532 e_err(drv, "failed to allocated FCoE DDP pool\n"); 550 e_err(drv, "failed to allocated FCoE DDP pool\n");
533 551
534 spin_lock_init(&fcoe->lock); 552 spin_lock_init(&fcoe->lock);
553
554 /* Extra buffer to be shared by all DDPs for HW work around */
555 fcoe->extra_ddp_buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_ATOMIC);
556 if (fcoe->extra_ddp_buffer == NULL) {
557 e_err(drv, "failed to allocated extra DDP buffer\n");
558 goto out_extra_ddp_buffer_alloc;
559 }
560
561 fcoe->extra_ddp_buffer_dma =
562 dma_map_single(&adapter->pdev->dev,
563 fcoe->extra_ddp_buffer,
564 IXGBE_FCBUFF_MIN,
565 DMA_FROM_DEVICE);
566 if (dma_mapping_error(&adapter->pdev->dev,
567 fcoe->extra_ddp_buffer_dma)) {
568 e_err(drv, "failed to map extra DDP buffer\n");
569 goto out_extra_ddp_buffer_dma;
570 }
535 } 571 }
536 572
537 /* Enable L2 eth type filter for FCoE */ 573 /* Enable L2 eth type filter for FCoE */
@@ -581,6 +617,14 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
581 } 617 }
582 } 618 }
583#endif 619#endif
620
621 return;
622
623out_extra_ddp_buffer_dma:
624 kfree(fcoe->extra_ddp_buffer);
625out_extra_ddp_buffer_alloc:
626 pci_pool_destroy(fcoe->pool);
627 fcoe->pool = NULL;
584} 628}
585 629
586/** 630/**
@@ -600,6 +644,11 @@ void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter)
600 if (fcoe->pool) { 644 if (fcoe->pool) {
601 for (i = 0; i < IXGBE_FCOE_DDP_MAX; i++) 645 for (i = 0; i < IXGBE_FCOE_DDP_MAX; i++)
602 ixgbe_fcoe_ddp_put(adapter->netdev, i); 646 ixgbe_fcoe_ddp_put(adapter->netdev, i);
647 dma_unmap_single(&adapter->pdev->dev,
648 fcoe->extra_ddp_buffer_dma,
649 IXGBE_FCBUFF_MIN,
650 DMA_FROM_DEVICE);
651 kfree(fcoe->extra_ddp_buffer);
603 pci_pool_destroy(fcoe->pool); 652 pci_pool_destroy(fcoe->pool);
604 fcoe->pool = NULL; 653 fcoe->pool = NULL;
605 } 654 }
diff --git a/drivers/net/ixgbe/ixgbe_fcoe.h b/drivers/net/ixgbe/ixgbe_fcoe.h
index 4bc2c551c8db..65cc8fb14fe7 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ixgbe/ixgbe_fcoe.h
@@ -70,6 +70,8 @@ struct ixgbe_fcoe {
70 spinlock_t lock; 70 spinlock_t lock;
71 struct pci_pool *pool; 71 struct pci_pool *pool;
72 struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX]; 72 struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
73 unsigned char *extra_ddp_buffer;
74 dma_addr_t extra_ddp_buffer_dma;
73}; 75};
74 76
75#endif /* _IXGBE_FCOE_H */ 77#endif /* _IXGBE_FCOE_H */
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index fbae703b46d7..30f9ccfb4f87 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3728,7 +3728,8 @@ static void ixgbe_sfp_link_config(struct ixgbe_adapter *adapter)
3728 * We need to try and force an autonegotiation 3728 * We need to try and force an autonegotiation
3729 * session, then bring up link. 3729 * session, then bring up link.
3730 */ 3730 */
3731 hw->mac.ops.setup_sfp(hw); 3731 if (hw->mac.ops.setup_sfp)
3732 hw->mac.ops.setup_sfp(hw);
3732 if (!(adapter->flags & IXGBE_FLAG_IN_SFP_LINK_TASK)) 3733 if (!(adapter->flags & IXGBE_FLAG_IN_SFP_LINK_TASK))
3733 schedule_work(&adapter->multispeed_fiber_task); 3734 schedule_work(&adapter->multispeed_fiber_task);
3734 } else { 3735 } else {
@@ -5968,7 +5969,8 @@ static void ixgbe_sfp_config_module_task(struct work_struct *work)
5968 unregister_netdev(adapter->netdev); 5969 unregister_netdev(adapter->netdev);
5969 return; 5970 return;
5970 } 5971 }
5971 hw->mac.ops.setup_sfp(hw); 5972 if (hw->mac.ops.setup_sfp)
5973 hw->mac.ops.setup_sfp(hw);
5972 5974
5973 if (!(adapter->flags & IXGBE_FLAG_IN_SFP_LINK_TASK)) 5975 if (!(adapter->flags & IXGBE_FLAG_IN_SFP_LINK_TASK))
5974 /* This will also work for DA Twinax connections */ 5976 /* This will also work for DA Twinax connections */
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index f69e73e2191e..79ccb54ab00c 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -260,7 +260,7 @@ static int macb_mii_init(struct macb *bp)
260 for (i = 0; i < PHY_MAX_ADDR; i++) 260 for (i = 0; i < PHY_MAX_ADDR; i++)
261 bp->mii_bus->irq[i] = PHY_POLL; 261 bp->mii_bus->irq[i] = PHY_POLL;
262 262
263 platform_set_drvdata(bp->dev, bp->mii_bus); 263 dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
264 264
265 if (mdiobus_register(bp->mii_bus)) 265 if (mdiobus_register(bp->mii_bus))
266 goto err_out_free_mdio_irq; 266 goto err_out_free_mdio_irq;
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 5933621ac3ff..fc27a9926d9e 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -528,8 +528,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
528 vnet_hdr_len = q->vnet_hdr_sz; 528 vnet_hdr_len = q->vnet_hdr_sz;
529 529
530 err = -EINVAL; 530 err = -EINVAL;
531 if ((len -= vnet_hdr_len) < 0) 531 if (len < vnet_hdr_len)
532 goto err; 532 goto err;
533 len -= vnet_hdr_len;
533 534
534 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, 535 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0,
535 sizeof(vnet_hdr)); 536 sizeof(vnet_hdr));
diff --git a/drivers/net/pch_gbe/pch_gbe.h b/drivers/net/pch_gbe/pch_gbe.h
index a0c26a99520f..e1e33c80fb25 100644
--- a/drivers/net/pch_gbe/pch_gbe.h
+++ b/drivers/net/pch_gbe/pch_gbe.h
@@ -73,7 +73,7 @@ struct pch_gbe_regs {
73 struct pch_gbe_regs_mac_adr mac_adr[16]; 73 struct pch_gbe_regs_mac_adr mac_adr[16];
74 u32 ADDR_MASK; 74 u32 ADDR_MASK;
75 u32 MIIM; 75 u32 MIIM;
76 u32 reserve2; 76 u32 MAC_ADDR_LOAD;
77 u32 RGMII_ST; 77 u32 RGMII_ST;
78 u32 RGMII_CTRL; 78 u32 RGMII_CTRL;
79 u32 reserve3[3]; 79 u32 reserve3[3];
diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c
index 4c9a7d4f3fca..8c66e22c3a0a 100644
--- a/drivers/net/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/pch_gbe/pch_gbe_main.c
@@ -29,6 +29,7 @@ const char pch_driver_version[] = DRV_VERSION;
29#define PCH_GBE_SHORT_PKT 64 29#define PCH_GBE_SHORT_PKT 64
30#define DSC_INIT16 0xC000 30#define DSC_INIT16 0xC000
31#define PCH_GBE_DMA_ALIGN 0 31#define PCH_GBE_DMA_ALIGN 0
32#define PCH_GBE_DMA_PADDING 2
32#define PCH_GBE_WATCHDOG_PERIOD (1 * HZ) /* watchdog time */ 33#define PCH_GBE_WATCHDOG_PERIOD (1 * HZ) /* watchdog time */
33#define PCH_GBE_COPYBREAK_DEFAULT 256 34#define PCH_GBE_COPYBREAK_DEFAULT 256
34#define PCH_GBE_PCI_BAR 1 35#define PCH_GBE_PCI_BAR 1
@@ -88,6 +89,12 @@ static unsigned int copybreak __read_mostly = PCH_GBE_COPYBREAK_DEFAULT;
88static int pch_gbe_mdio_read(struct net_device *netdev, int addr, int reg); 89static int pch_gbe_mdio_read(struct net_device *netdev, int addr, int reg);
89static void pch_gbe_mdio_write(struct net_device *netdev, int addr, int reg, 90static void pch_gbe_mdio_write(struct net_device *netdev, int addr, int reg,
90 int data); 91 int data);
92
93inline void pch_gbe_mac_load_mac_addr(struct pch_gbe_hw *hw)
94{
95 iowrite32(0x01, &hw->reg->MAC_ADDR_LOAD);
96}
97
91/** 98/**
92 * pch_gbe_mac_read_mac_addr - Read MAC address 99 * pch_gbe_mac_read_mac_addr - Read MAC address
93 * @hw: Pointer to the HW structure 100 * @hw: Pointer to the HW structure
@@ -1365,16 +1372,13 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
1365 struct pch_gbe_buffer *buffer_info; 1372 struct pch_gbe_buffer *buffer_info;
1366 struct pch_gbe_rx_desc *rx_desc; 1373 struct pch_gbe_rx_desc *rx_desc;
1367 u32 length; 1374 u32 length;
1368 unsigned char tmp_packet[ETH_HLEN];
1369 unsigned int i; 1375 unsigned int i;
1370 unsigned int cleaned_count = 0; 1376 unsigned int cleaned_count = 0;
1371 bool cleaned = false; 1377 bool cleaned = false;
1372 struct sk_buff *skb; 1378 struct sk_buff *skb, *new_skb;
1373 u8 dma_status; 1379 u8 dma_status;
1374 u16 gbec_status; 1380 u16 gbec_status;
1375 u32 tcp_ip_status; 1381 u32 tcp_ip_status;
1376 u8 skb_copy_flag = 0;
1377 u8 skb_padding_flag = 0;
1378 1382
1379 i = rx_ring->next_to_clean; 1383 i = rx_ring->next_to_clean;
1380 1384
@@ -1418,55 +1422,70 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
1418 pr_err("Receive CRC Error\n"); 1422 pr_err("Receive CRC Error\n");
1419 } else { 1423 } else {
1420 /* get receive length */ 1424 /* get receive length */
1421 /* length convert[-3], padding[-2] */ 1425 /* length convert[-3] */
1422 length = (rx_desc->rx_words_eob) - 3 - 2; 1426 length = (rx_desc->rx_words_eob) - 3;
1423 1427
1424 /* Decide the data conversion method */ 1428 /* Decide the data conversion method */
1425 if (!adapter->rx_csum) { 1429 if (!adapter->rx_csum) {
1426 /* [Header:14][payload] */ 1430 /* [Header:14][payload] */
1427 skb_padding_flag = 0; 1431 if (NET_IP_ALIGN) {
1428 skb_copy_flag = 1; 1432 /* Because alignment differs,
1433 * the new_skb is newly allocated,
1434 * and data is copied to new_skb.*/
1435 new_skb = netdev_alloc_skb(netdev,
1436 length + NET_IP_ALIGN);
1437 if (!new_skb) {
1438 /* dorrop error */
1439 pr_err("New skb allocation "
1440 "Error\n");
1441 goto dorrop;
1442 }
1443 skb_reserve(new_skb, NET_IP_ALIGN);
1444 memcpy(new_skb->data, skb->data,
1445 length);
1446 skb = new_skb;
1447 } else {
1448 /* DMA buffer is used as SKB as it is.*/
1449 buffer_info->skb = NULL;
1450 }
1429 } else { 1451 } else {
1430 /* [Header:14][padding:2][payload] */ 1452 /* [Header:14][padding:2][payload] */
1431 skb_padding_flag = 1; 1453 /* The length includes padding length */
1432 if (length < copybreak) 1454 length = length - PCH_GBE_DMA_PADDING;
1433 skb_copy_flag = 1; 1455 if ((length < copybreak) ||
1434 else 1456 (NET_IP_ALIGN != PCH_GBE_DMA_PADDING)) {
1435 skb_copy_flag = 0; 1457 /* Because alignment differs,
1436 } 1458 * the new_skb is newly allocated,
1437 1459 * and data is copied to new_skb.
1438 /* Data conversion */ 1460 * Padding data is deleted
1439 if (skb_copy_flag) { /* recycle skb */ 1461 * at the time of a copy.*/
1440 struct sk_buff *new_skb; 1462 new_skb = netdev_alloc_skb(netdev,
1441 new_skb = 1463 length + NET_IP_ALIGN);
1442 netdev_alloc_skb(netdev, 1464 if (!new_skb) {
1443 length + NET_IP_ALIGN); 1465 /* dorrop error */
1444 if (new_skb) { 1466 pr_err("New skb allocation "
1445 if (!skb_padding_flag) { 1467 "Error\n");
1446 skb_reserve(new_skb, 1468 goto dorrop;
1447 NET_IP_ALIGN);
1448 } 1469 }
1470 skb_reserve(new_skb, NET_IP_ALIGN);
1449 memcpy(new_skb->data, skb->data, 1471 memcpy(new_skb->data, skb->data,
1450 length); 1472 ETH_HLEN);
1451 /* save the skb 1473 memcpy(&new_skb->data[ETH_HLEN],
1452 * in buffer_info as good */ 1474 &skb->data[ETH_HLEN +
1475 PCH_GBE_DMA_PADDING],
1476 length - ETH_HLEN);
1453 skb = new_skb; 1477 skb = new_skb;
1454 } else if (!skb_padding_flag) { 1478 } else {
1455 /* dorrop error */ 1479 /* Padding data is deleted
1456 pr_err("New skb allocation Error\n"); 1480 * by moving header data.*/
1457 goto dorrop; 1481 memmove(&skb->data[PCH_GBE_DMA_PADDING],
1482 &skb->data[0], ETH_HLEN);
1483 skb_reserve(skb, NET_IP_ALIGN);
1484 buffer_info->skb = NULL;
1458 } 1485 }
1459 } else {
1460 buffer_info->skb = NULL;
1461 } 1486 }
1462 if (skb_padding_flag) { 1487 /* The length includes FCS length */
1463 memcpy(&tmp_packet[0], &skb->data[0], ETH_HLEN); 1488 length = length - ETH_FCS_LEN;
1464 memcpy(&skb->data[NET_IP_ALIGN], &tmp_packet[0],
1465 ETH_HLEN);
1466 skb_reserve(skb, NET_IP_ALIGN);
1467
1468 }
1469
1470 /* update status of driver */ 1489 /* update status of driver */
1471 adapter->stats.rx_bytes += length; 1490 adapter->stats.rx_bytes += length;
1472 adapter->stats.rx_packets++; 1491 adapter->stats.rx_packets++;
@@ -2318,6 +2337,7 @@ static int pch_gbe_probe(struct pci_dev *pdev,
2318 netdev->features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_GRO; 2337 netdev->features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_GRO;
2319 pch_gbe_set_ethtool_ops(netdev); 2338 pch_gbe_set_ethtool_ops(netdev);
2320 2339
2340 pch_gbe_mac_load_mac_addr(&adapter->hw);
2321 pch_gbe_mac_reset_hw(&adapter->hw); 2341 pch_gbe_mac_reset_hw(&adapter->hw);
2322 2342
2323 /* setup the private structure */ 2343 /* setup the private structure */
@@ -2426,7 +2446,7 @@ static struct pci_driver pch_gbe_pcidev = {
2426 .id_table = pch_gbe_pcidev_id, 2446 .id_table = pch_gbe_pcidev_id,
2427 .probe = pch_gbe_probe, 2447 .probe = pch_gbe_probe,
2428 .remove = pch_gbe_remove, 2448 .remove = pch_gbe_remove,
2429#ifdef CONFIG_PM_OPS 2449#ifdef CONFIG_PM
2430 .driver.pm = &pch_gbe_pm_ops, 2450 .driver.pm = &pch_gbe_pm_ops,
2431#endif 2451#endif
2432 .shutdown = pch_gbe_shutdown, 2452 .shutdown = pch_gbe_shutdown,
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 9226cda4d054..530ab5a10bd3 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -691,6 +691,7 @@ static struct pcmcia_device_id fmvj18x_ids[] = {
691 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0105, 0x0e0a), 691 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0105, 0x0e0a),
692 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0e01), 692 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0e01),
693 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0a05), 693 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0a05),
694 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0b05),
694 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x1101), 695 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x1101),
695 PCMCIA_DEVICE_NULL, 696 PCMCIA_DEVICE_NULL,
696}; 697};
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 27e6f6d43cac..e3ebd90ae651 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -49,8 +49,8 @@
49#include <asm/processor.h> 49#include <asm/processor.h>
50 50
51#define DRV_NAME "r6040" 51#define DRV_NAME "r6040"
52#define DRV_VERSION "0.26" 52#define DRV_VERSION "0.27"
53#define DRV_RELDATE "30May2010" 53#define DRV_RELDATE "23Feb2011"
54 54
55/* PHY CHIP Address */ 55/* PHY CHIP Address */
56#define PHY1_ADDR 1 /* For MAC1 */ 56#define PHY1_ADDR 1 /* For MAC1 */
@@ -69,6 +69,8 @@
69 69
70/* MAC registers */ 70/* MAC registers */
71#define MCR0 0x00 /* Control register 0 */ 71#define MCR0 0x00 /* Control register 0 */
72#define MCR0_PROMISC 0x0020 /* Promiscuous mode */
73#define MCR0_HASH_EN 0x0100 /* Enable multicast hash table function */
72#define MCR1 0x04 /* Control register 1 */ 74#define MCR1 0x04 /* Control register 1 */
73#define MAC_RST 0x0001 /* Reset the MAC */ 75#define MAC_RST 0x0001 /* Reset the MAC */
74#define MBCR 0x08 /* Bus control */ 76#define MBCR 0x08 /* Bus control */
@@ -851,77 +853,92 @@ static void r6040_multicast_list(struct net_device *dev)
851{ 853{
852 struct r6040_private *lp = netdev_priv(dev); 854 struct r6040_private *lp = netdev_priv(dev);
853 void __iomem *ioaddr = lp->base; 855 void __iomem *ioaddr = lp->base;
854 u16 *adrp;
855 u16 reg;
856 unsigned long flags; 856 unsigned long flags;
857 struct netdev_hw_addr *ha; 857 struct netdev_hw_addr *ha;
858 int i; 858 int i;
859 u16 *adrp;
860 u16 hash_table[4] = { 0 };
861
862 spin_lock_irqsave(&lp->lock, flags);
859 863
860 /* MAC Address */ 864 /* Keep our MAC Address */
861 adrp = (u16 *)dev->dev_addr; 865 adrp = (u16 *)dev->dev_addr;
862 iowrite16(adrp[0], ioaddr + MID_0L); 866 iowrite16(adrp[0], ioaddr + MID_0L);
863 iowrite16(adrp[1], ioaddr + MID_0M); 867 iowrite16(adrp[1], ioaddr + MID_0M);
864 iowrite16(adrp[2], ioaddr + MID_0H); 868 iowrite16(adrp[2], ioaddr + MID_0H);
865 869
866 /* Promiscous Mode */
867 spin_lock_irqsave(&lp->lock, flags);
868
869 /* Clear AMCP & PROM bits */ 870 /* Clear AMCP & PROM bits */
870 reg = ioread16(ioaddr) & ~0x0120; 871 lp->mcr0 = ioread16(ioaddr + MCR0) & ~(MCR0_PROMISC | MCR0_HASH_EN);
871 if (dev->flags & IFF_PROMISC) {
872 reg |= 0x0020;
873 lp->mcr0 |= 0x0020;
874 }
875 /* Too many multicast addresses
876 * accept all traffic */
877 else if ((netdev_mc_count(dev) > MCAST_MAX) ||
878 (dev->flags & IFF_ALLMULTI))
879 reg |= 0x0020;
880 872
881 iowrite16(reg, ioaddr); 873 /* Promiscuous mode */
882 spin_unlock_irqrestore(&lp->lock, flags); 874 if (dev->flags & IFF_PROMISC)
875 lp->mcr0 |= MCR0_PROMISC;
883 876
884 /* Build the hash table */ 877 /* Enable multicast hash table function to
885 if (netdev_mc_count(dev) > MCAST_MAX) { 878 * receive all multicast packets. */
886 u16 hash_table[4]; 879 else if (dev->flags & IFF_ALLMULTI) {
887 u32 crc; 880 lp->mcr0 |= MCR0_HASH_EN;
888 881
889 for (i = 0; i < 4; i++) 882 for (i = 0; i < MCAST_MAX ; i++) {
890 hash_table[i] = 0; 883 iowrite16(0, ioaddr + MID_1L + 8 * i);
884 iowrite16(0, ioaddr + MID_1M + 8 * i);
885 iowrite16(0, ioaddr + MID_1H + 8 * i);
886 }
891 887
888 for (i = 0; i < 4; i++)
889 hash_table[i] = 0xffff;
890 }
891 /* Use internal multicast address registers if the number of
892 * multicast addresses is not greater than MCAST_MAX. */
893 else if (netdev_mc_count(dev) <= MCAST_MAX) {
894 i = 0;
892 netdev_for_each_mc_addr(ha, dev) { 895 netdev_for_each_mc_addr(ha, dev) {
893 char *addrs = ha->addr; 896 u16 *adrp = (u16 *) ha->addr;
897 iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
898 iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
899 iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
900 i++;
901 }
902 while (i < MCAST_MAX) {
903 iowrite16(0, ioaddr + MID_1L + 8 * i);
904 iowrite16(0, ioaddr + MID_1M + 8 * i);
905 iowrite16(0, ioaddr + MID_1H + 8 * i);
906 i++;
907 }
908 }
909 /* Otherwise, Enable multicast hash table function. */
910 else {
911 u32 crc;
894 912
895 if (!(*addrs & 1)) 913 lp->mcr0 |= MCR0_HASH_EN;
896 continue; 914
915 for (i = 0; i < MCAST_MAX ; i++) {
916 iowrite16(0, ioaddr + MID_1L + 8 * i);
917 iowrite16(0, ioaddr + MID_1M + 8 * i);
918 iowrite16(0, ioaddr + MID_1H + 8 * i);
919 }
897 920
898 crc = ether_crc_le(6, addrs); 921 /* Build multicast hash table */
922 netdev_for_each_mc_addr(ha, dev) {
923 u8 *addrs = ha->addr;
924
925 crc = ether_crc(ETH_ALEN, addrs);
899 crc >>= 26; 926 crc >>= 26;
900 hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf)); 927 hash_table[crc >> 4] |= 1 << (crc & 0xf);
901 } 928 }
902 /* Fill the MAC hash tables with their values */ 929 }
930
931 iowrite16(lp->mcr0, ioaddr + MCR0);
932
933 /* Fill the MAC hash tables with their values */
934 if (lp->mcr0 && MCR0_HASH_EN) {
903 iowrite16(hash_table[0], ioaddr + MAR0); 935 iowrite16(hash_table[0], ioaddr + MAR0);
904 iowrite16(hash_table[1], ioaddr + MAR1); 936 iowrite16(hash_table[1], ioaddr + MAR1);
905 iowrite16(hash_table[2], ioaddr + MAR2); 937 iowrite16(hash_table[2], ioaddr + MAR2);
906 iowrite16(hash_table[3], ioaddr + MAR3); 938 iowrite16(hash_table[3], ioaddr + MAR3);
907 } 939 }
908 /* Multicast Address 1~4 case */ 940
909 i = 0; 941 spin_unlock_irqrestore(&lp->lock, flags);
910 netdev_for_each_mc_addr(ha, dev) {
911 if (i >= MCAST_MAX)
912 break;
913 adrp = (u16 *) ha->addr;
914 iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
915 iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
916 iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
917 i++;
918 }
919 while (i < MCAST_MAX) {
920 iowrite16(0xffff, ioaddr + MID_1L + 8 * i);
921 iowrite16(0xffff, ioaddr + MID_1M + 8 * i);
922 iowrite16(0xffff, ioaddr + MID_1H + 8 * i);
923 i++;
924 }
925} 942}
926 943
927static void netdev_get_drvinfo(struct net_device *dev, 944static void netdev_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 59ccf0c5c610..7ffdb80adf40 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -25,6 +25,7 @@
25#include <linux/dma-mapping.h> 25#include <linux/dma-mapping.h>
26#include <linux/pm_runtime.h> 26#include <linux/pm_runtime.h>
27#include <linux/firmware.h> 27#include <linux/firmware.h>
28#include <linux/pci-aspm.h>
28 29
29#include <asm/system.h> 30#include <asm/system.h>
30#include <asm/io.h> 31#include <asm/io.h>
@@ -617,8 +618,9 @@ static void ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg, u32 data)
617 } 618 }
618} 619}
619 620
620static void rtl8168_oob_notify(void __iomem *ioaddr, u8 cmd) 621static void rtl8168_oob_notify(struct rtl8169_private *tp, u8 cmd)
621{ 622{
623 void __iomem *ioaddr = tp->mmio_addr;
622 int i; 624 int i;
623 625
624 RTL_W8(ERIDR, cmd); 626 RTL_W8(ERIDR, cmd);
@@ -630,7 +632,7 @@ static void rtl8168_oob_notify(void __iomem *ioaddr, u8 cmd)
630 break; 632 break;
631 } 633 }
632 634
633 ocp_write(ioaddr, 0x1, 0x30, 0x00000001); 635 ocp_write(tp, 0x1, 0x30, 0x00000001);
634} 636}
635 637
636#define OOB_CMD_RESET 0x00 638#define OOB_CMD_RESET 0x00
@@ -2868,8 +2870,11 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
2868{ 2870{
2869 void __iomem *ioaddr = tp->mmio_addr; 2871 void __iomem *ioaddr = tp->mmio_addr;
2870 2872
2871 if (tp->mac_version == RTL_GIGA_MAC_VER_27) 2873 if (((tp->mac_version == RTL_GIGA_MAC_VER_27) ||
2874 (tp->mac_version == RTL_GIGA_MAC_VER_28)) &&
2875 (ocp_read(tp, 0x0f, 0x0010) & 0x00008000)) {
2872 return; 2876 return;
2877 }
2873 2878
2874 if (((tp->mac_version == RTL_GIGA_MAC_VER_23) || 2879 if (((tp->mac_version == RTL_GIGA_MAC_VER_23) ||
2875 (tp->mac_version == RTL_GIGA_MAC_VER_24)) && 2880 (tp->mac_version == RTL_GIGA_MAC_VER_24)) &&
@@ -2891,6 +2896,8 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
2891 switch (tp->mac_version) { 2896 switch (tp->mac_version) {
2892 case RTL_GIGA_MAC_VER_25: 2897 case RTL_GIGA_MAC_VER_25:
2893 case RTL_GIGA_MAC_VER_26: 2898 case RTL_GIGA_MAC_VER_26:
2899 case RTL_GIGA_MAC_VER_27:
2900 case RTL_GIGA_MAC_VER_28:
2894 RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80); 2901 RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
2895 break; 2902 break;
2896 } 2903 }
@@ -2900,12 +2907,17 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
2900{ 2907{
2901 void __iomem *ioaddr = tp->mmio_addr; 2908 void __iomem *ioaddr = tp->mmio_addr;
2902 2909
2903 if (tp->mac_version == RTL_GIGA_MAC_VER_27) 2910 if (((tp->mac_version == RTL_GIGA_MAC_VER_27) ||
2911 (tp->mac_version == RTL_GIGA_MAC_VER_28)) &&
2912 (ocp_read(tp, 0x0f, 0x0010) & 0x00008000)) {
2904 return; 2913 return;
2914 }
2905 2915
2906 switch (tp->mac_version) { 2916 switch (tp->mac_version) {
2907 case RTL_GIGA_MAC_VER_25: 2917 case RTL_GIGA_MAC_VER_25:
2908 case RTL_GIGA_MAC_VER_26: 2918 case RTL_GIGA_MAC_VER_26:
2919 case RTL_GIGA_MAC_VER_27:
2920 case RTL_GIGA_MAC_VER_28:
2909 RTL_W8(PMCH, RTL_R8(PMCH) | 0x80); 2921 RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
2910 break; 2922 break;
2911 } 2923 }
@@ -3009,6 +3021,11 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
3009 mii->reg_num_mask = 0x1f; 3021 mii->reg_num_mask = 0x1f;
3010 mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII); 3022 mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
3011 3023
3024 /* disable ASPM completely as that cause random device stop working
3025 * problems as well as full system hangs for some PCIe devices users */
3026 pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
3027 PCIE_LINK_STATE_CLKPM);
3028
3012 /* enable device (incl. PCI PM wakeup and hotplug setup) */ 3029 /* enable device (incl. PCI PM wakeup and hotplug setup) */
3013 rc = pci_enable_device(pdev); 3030 rc = pci_enable_device(pdev);
3014 if (rc < 0) { 3031 if (rc < 0) {
@@ -3042,7 +3059,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
3042 goto err_out_mwi_2; 3059 goto err_out_mwi_2;
3043 } 3060 }
3044 3061
3045 tp->cp_cmd = PCIMulRW | RxChkSum; 3062 tp->cp_cmd = RxChkSum;
3046 3063
3047 if ((sizeof(dma_addr_t) > 4) && 3064 if ((sizeof(dma_addr_t) > 4) &&
3048 !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && use_dac) { 3065 !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && use_dac) {
@@ -3190,6 +3207,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
3190 if (pci_dev_run_wake(pdev)) 3207 if (pci_dev_run_wake(pdev))
3191 pm_runtime_put_noidle(&pdev->dev); 3208 pm_runtime_put_noidle(&pdev->dev);
3192 3209
3210 netif_carrier_off(dev);
3211
3193out: 3212out:
3194 return rc; 3213 return rc;
3195 3214
@@ -3316,7 +3335,8 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
3316 /* Disable interrupts */ 3335 /* Disable interrupts */
3317 rtl8169_irq_mask_and_ack(ioaddr); 3336 rtl8169_irq_mask_and_ack(ioaddr);
3318 3337
3319 if (tp->mac_version == RTL_GIGA_MAC_VER_28) { 3338 if (tp->mac_version == RTL_GIGA_MAC_VER_27 ||
3339 tp->mac_version == RTL_GIGA_MAC_VER_28) {
3320 while (RTL_R8(TxPoll) & NPQ) 3340 while (RTL_R8(TxPoll) & NPQ)
3321 udelay(20); 3341 udelay(20);
3322 3342
@@ -3845,8 +3865,7 @@ static void rtl_hw_start_8168(struct net_device *dev)
3845 Cxpl_dbg_sel | \ 3865 Cxpl_dbg_sel | \
3846 ASF | \ 3866 ASF | \
3847 PktCntrDisable | \ 3867 PktCntrDisable | \
3848 PCIDAC | \ 3868 Mac_dbgo_sel)
3849 PCIMulRW)
3850 3869
3851static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev) 3870static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev)
3852{ 3871{
@@ -3876,8 +3895,6 @@ static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev)
3876 if ((cfg1 & LEDS0) && (cfg1 & LEDS1)) 3895 if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
3877 RTL_W8(Config1, cfg1 & ~LEDS0); 3896 RTL_W8(Config1, cfg1 & ~LEDS0);
3878 3897
3879 RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R810X_CPCMD_QUIRK_MASK);
3880
3881 rtl_ephy_init(ioaddr, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1)); 3898 rtl_ephy_init(ioaddr, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
3882} 3899}
3883 3900
@@ -3889,8 +3906,6 @@ static void rtl_hw_start_8102e_2(void __iomem *ioaddr, struct pci_dev *pdev)
3889 3906
3890 RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable); 3907 RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable);
3891 RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en); 3908 RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
3892
3893 RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R810X_CPCMD_QUIRK_MASK);
3894} 3909}
3895 3910
3896static void rtl_hw_start_8102e_3(void __iomem *ioaddr, struct pci_dev *pdev) 3911static void rtl_hw_start_8102e_3(void __iomem *ioaddr, struct pci_dev *pdev)
@@ -3916,6 +3931,8 @@ static void rtl_hw_start_8101(struct net_device *dev)
3916 } 3931 }
3917 } 3932 }
3918 3933
3934 RTL_W8(Cfg9346, Cfg9346_Unlock);
3935
3919 switch (tp->mac_version) { 3936 switch (tp->mac_version) {
3920 case RTL_GIGA_MAC_VER_07: 3937 case RTL_GIGA_MAC_VER_07:
3921 rtl_hw_start_8102e_1(ioaddr, pdev); 3938 rtl_hw_start_8102e_1(ioaddr, pdev);
@@ -3930,14 +3947,13 @@ static void rtl_hw_start_8101(struct net_device *dev)
3930 break; 3947 break;
3931 } 3948 }
3932 3949
3933 RTL_W8(Cfg9346, Cfg9346_Unlock); 3950 RTL_W8(Cfg9346, Cfg9346_Lock);
3934 3951
3935 RTL_W8(MaxTxPacketSize, TxPacketMax); 3952 RTL_W8(MaxTxPacketSize, TxPacketMax);
3936 3953
3937 rtl_set_rx_max_size(ioaddr, rx_buf_sz); 3954 rtl_set_rx_max_size(ioaddr, rx_buf_sz);
3938 3955
3939 tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW; 3956 tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
3940
3941 RTL_W16(CPlusCmd, tp->cp_cmd); 3957 RTL_W16(CPlusCmd, tp->cp_cmd);
3942 3958
3943 RTL_W16(IntrMitigate, 0x0000); 3959 RTL_W16(IntrMitigate, 0x0000);
@@ -3947,14 +3963,10 @@ static void rtl_hw_start_8101(struct net_device *dev)
3947 RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); 3963 RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
3948 rtl_set_rx_tx_config_registers(tp); 3964 rtl_set_rx_tx_config_registers(tp);
3949 3965
3950 RTL_W8(Cfg9346, Cfg9346_Lock);
3951
3952 RTL_R8(IntrMask); 3966 RTL_R8(IntrMask);
3953 3967
3954 rtl_set_rx_mode(dev); 3968 rtl_set_rx_mode(dev);
3955 3969
3956 RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
3957
3958 RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000); 3970 RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
3959 3971
3960 RTL_W16(IntrMask, tp->intr_event); 3972 RTL_W16(IntrMask, tp->intr_event);
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index 0e8bb19ed60d..ca886d98bdc7 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -569,9 +569,14 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
569 struct ethtool_test *test, u64 *data) 569 struct ethtool_test *test, u64 *data)
570{ 570{
571 struct efx_nic *efx = netdev_priv(net_dev); 571 struct efx_nic *efx = netdev_priv(net_dev);
572 struct efx_self_tests efx_tests; 572 struct efx_self_tests *efx_tests;
573 int already_up; 573 int already_up;
574 int rc; 574 int rc = -ENOMEM;
575
576 efx_tests = kzalloc(sizeof(*efx_tests), GFP_KERNEL);
577 if (!efx_tests)
578 goto fail;
579
575 580
576 ASSERT_RTNL(); 581 ASSERT_RTNL();
577 if (efx->state != STATE_RUNNING) { 582 if (efx->state != STATE_RUNNING) {
@@ -589,13 +594,11 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
589 if (rc) { 594 if (rc) {
590 netif_err(efx, drv, efx->net_dev, 595 netif_err(efx, drv, efx->net_dev,
591 "failed opening device.\n"); 596 "failed opening device.\n");
592 goto fail2; 597 goto fail1;
593 } 598 }
594 } 599 }
595 600
596 memset(&efx_tests, 0, sizeof(efx_tests)); 601 rc = efx_selftest(efx, efx_tests, test->flags);
597
598 rc = efx_selftest(efx, &efx_tests, test->flags);
599 602
600 if (!already_up) 603 if (!already_up)
601 dev_close(efx->net_dev); 604 dev_close(efx->net_dev);
@@ -604,10 +607,11 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
604 rc == 0 ? "passed" : "failed", 607 rc == 0 ? "passed" : "failed",
605 (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on"); 608 (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on");
606 609
607 fail2: 610fail1:
608 fail1:
609 /* Fill ethtool results structures */ 611 /* Fill ethtool results structures */
610 efx_ethtool_fill_self_tests(efx, &efx_tests, NULL, data); 612 efx_ethtool_fill_self_tests(efx, efx_tests, NULL, data);
613 kfree(efx_tests);
614fail:
611 if (rc) 615 if (rc)
612 test->flags |= ETH_TEST_FL_FAILED; 616 test->flags |= ETH_TEST_FL_FAILED;
613} 617}
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 42daf98ba736..35b28f42d208 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -3856,9 +3856,6 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
3856 memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port*8, ETH_ALEN); 3856 memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port*8, ETH_ALEN);
3857 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); 3857 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
3858 3858
3859 /* device is off until link detection */
3860 netif_carrier_off(dev);
3861
3862 return dev; 3859 return dev;
3863} 3860}
3864 3861
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 64bfdae5956f..d70bde95460b 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1178,6 +1178,11 @@ static int smsc911x_open(struct net_device *dev)
1178 smsc911x_reg_write(pdata, HW_CFG, 0x00050000); 1178 smsc911x_reg_write(pdata, HW_CFG, 0x00050000);
1179 smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740); 1179 smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740);
1180 1180
1181 /* Increase the legal frame size of VLAN tagged frames to 1522 bytes */
1182 spin_lock_irq(&pdata->mac_lock);
1183 smsc911x_mac_write(pdata, VLAN1, ETH_P_8021Q);
1184 spin_unlock_irq(&pdata->mac_lock);
1185
1181 /* Make sure EEPROM has finished loading before setting GPIO_CFG */ 1186 /* Make sure EEPROM has finished loading before setting GPIO_CFG */
1182 timeout = 50; 1187 timeout = 50;
1183 while ((smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) && 1188 while ((smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) &&
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index 34a0af3837f9..0e5f03135b50 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1560,8 +1560,10 @@ static int stmmac_mac_device_setup(struct net_device *dev)
1560 1560
1561 priv->hw = device; 1561 priv->hw = device;
1562 1562
1563 if (device_can_wakeup(priv->device)) 1563 if (device_can_wakeup(priv->device)) {
1564 priv->wolopts = WAKE_MAGIC; /* Magic Frame as default */ 1564 priv->wolopts = WAKE_MAGIC; /* Magic Frame as default */
1565 enable_irq_wake(dev->irq);
1566 }
1565 1567
1566 return 0; 1568 return 0;
1567} 1569}
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 93b32d366611..06c0e5033656 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -11158,7 +11158,9 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
11158 if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES) 11158 if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES)
11159 break; /* We have no PHY */ 11159 break; /* We have no PHY */
11160 11160
11161 if (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) 11161 if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) ||
11162 ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
11163 !netif_running(dev)))
11162 return -EAGAIN; 11164 return -EAGAIN;
11163 11165
11164 spin_lock_bh(&tp->lock); 11166 spin_lock_bh(&tp->lock);
@@ -11174,7 +11176,9 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
11174 if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES) 11176 if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES)
11175 break; /* We have no PHY */ 11177 break; /* We have no PHY */
11176 11178
11177 if (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) 11179 if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) ||
11180 ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
11181 !netif_running(dev)))
11178 return -EAGAIN; 11182 return -EAGAIN;
11179 11183
11180 spin_lock_bh(&tp->lock); 11184 spin_lock_bh(&tp->lock);
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 02b622e3b9fb..5002f5be47be 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -651,6 +651,10 @@ static const struct usb_device_id products[] = {
651 .driver_info = (unsigned long)&dm9601_info, 651 .driver_info = (unsigned long)&dm9601_info,
652 }, 652 },
653 { 653 {
654 USB_DEVICE(0x0fe6, 0x9700), /* DM9601 USB to Fast Ethernet Adapter */
655 .driver_info = (unsigned long)&dm9601_info,
656 },
657 {
654 USB_DEVICE(0x0a46, 0x9000), /* DM9000E */ 658 USB_DEVICE(0x0a46, 0x9000), /* DM9000E */
655 .driver_info = (unsigned long)&dm9601_info, 659 .driver_info = (unsigned long)&dm9601_info,
656 }, 660 },
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index bed8fcedff49..6d83812603b6 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2628,15 +2628,15 @@ exit:
2628 2628
2629static void hso_free_tiomget(struct hso_serial *serial) 2629static void hso_free_tiomget(struct hso_serial *serial)
2630{ 2630{
2631 struct hso_tiocmget *tiocmget = serial->tiocmget; 2631 struct hso_tiocmget *tiocmget;
2632 if (!serial)
2633 return;
2634 tiocmget = serial->tiocmget;
2632 if (tiocmget) { 2635 if (tiocmget) {
2633 if (tiocmget->urb) { 2636 usb_free_urb(tiocmget->urb);
2634 usb_free_urb(tiocmget->urb); 2637 tiocmget->urb = NULL;
2635 tiocmget->urb = NULL;
2636 }
2637 serial->tiocmget = NULL; 2638 serial->tiocmget = NULL;
2638 kfree(tiocmget); 2639 kfree(tiocmget);
2639
2640 } 2640 }
2641} 2641}
2642 2642
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index ed9a41643ff4..95c41d56631c 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -931,8 +931,10 @@ fail_halt:
931 if (urb != NULL) { 931 if (urb != NULL) {
932 clear_bit (EVENT_RX_MEMORY, &dev->flags); 932 clear_bit (EVENT_RX_MEMORY, &dev->flags);
933 status = usb_autopm_get_interface(dev->intf); 933 status = usb_autopm_get_interface(dev->intf);
934 if (status < 0) 934 if (status < 0) {
935 usb_free_urb(urb);
935 goto fail_lowmem; 936 goto fail_lowmem;
937 }
936 if (rx_submit (dev, urb, GFP_KERNEL) == -ENOLINK) 938 if (rx_submit (dev, urb, GFP_KERNEL) == -ENOLINK)
937 resched = 0; 939 resched = 0;
938 usb_autopm_put_interface(dev->intf); 940 usb_autopm_put_interface(dev->intf);
diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c
index 78c26fdccad1..62ce2f4e8605 100644
--- a/drivers/net/wireless/ath/ath5k/phy.c
+++ b/drivers/net/wireless/ath/ath5k/phy.c
@@ -282,6 +282,34 @@ int ath5k_hw_phy_disable(struct ath5k_hw *ah)
282 return 0; 282 return 0;
283} 283}
284 284
285/*
286 * Wait for synth to settle
287 */
288static void ath5k_hw_wait_for_synth(struct ath5k_hw *ah,
289 struct ieee80211_channel *channel)
290{
291 /*
292 * On 5211+ read activation -> rx delay
293 * and use it (100ns steps).
294 */
295 if (ah->ah_version != AR5K_AR5210) {
296 u32 delay;
297 delay = ath5k_hw_reg_read(ah, AR5K_PHY_RX_DELAY) &
298 AR5K_PHY_RX_DELAY_M;
299 delay = (channel->hw_value & CHANNEL_CCK) ?
300 ((delay << 2) / 22) : (delay / 10);
301 if (ah->ah_bwmode == AR5K_BWMODE_10MHZ)
302 delay = delay << 1;
303 if (ah->ah_bwmode == AR5K_BWMODE_5MHZ)
304 delay = delay << 2;
305 /* XXX: /2 on turbo ? Let's be safe
306 * for now */
307 udelay(100 + delay);
308 } else {
309 mdelay(1);
310 }
311}
312
285 313
286/**********************\ 314/**********************\
287* RF Gain optimization * 315* RF Gain optimization *
@@ -1253,6 +1281,7 @@ static int ath5k_hw_channel(struct ath5k_hw *ah,
1253 case AR5K_RF5111: 1281 case AR5K_RF5111:
1254 ret = ath5k_hw_rf5111_channel(ah, channel); 1282 ret = ath5k_hw_rf5111_channel(ah, channel);
1255 break; 1283 break;
1284 case AR5K_RF2317:
1256 case AR5K_RF2425: 1285 case AR5K_RF2425:
1257 ret = ath5k_hw_rf2425_channel(ah, channel); 1286 ret = ath5k_hw_rf2425_channel(ah, channel);
1258 break; 1287 break;
@@ -3237,6 +3266,13 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
3237 /* Failed */ 3266 /* Failed */
3238 if (i >= 100) 3267 if (i >= 100)
3239 return -EIO; 3268 return -EIO;
3269
3270 /* Set channel and wait for synth */
3271 ret = ath5k_hw_channel(ah, channel);
3272 if (ret)
3273 return ret;
3274
3275 ath5k_hw_wait_for_synth(ah, channel);
3240 } 3276 }
3241 3277
3242 /* 3278 /*
@@ -3251,13 +3287,53 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
3251 if (ret) 3287 if (ret)
3252 return ret; 3288 return ret;
3253 3289
3290 /* Write OFDM timings on 5212*/
3291 if (ah->ah_version == AR5K_AR5212 &&
3292 channel->hw_value & CHANNEL_OFDM) {
3293
3294 ret = ath5k_hw_write_ofdm_timings(ah, channel);
3295 if (ret)
3296 return ret;
3297
3298 /* Spur info is available only from EEPROM versions
3299 * greater than 5.3, but the EEPROM routines will use
3300 * static values for older versions */
3301 if (ah->ah_mac_srev >= AR5K_SREV_AR5424)
3302 ath5k_hw_set_spur_mitigation_filter(ah,
3303 channel);
3304 }
3305
3306 /* If we used fast channel switching
3307 * we are done, release RF bus and
3308 * fire up NF calibration.
3309 *
3310 * Note: Only NF calibration due to
3311 * channel change, not AGC calibration
3312 * since AGC is still running !
3313 */
3314 if (fast) {
3315 /*
3316 * Release RF Bus grant
3317 */
3318 AR5K_REG_DISABLE_BITS(ah, AR5K_PHY_RFBUS_REQ,
3319 AR5K_PHY_RFBUS_REQ_REQUEST);
3320
3321 /*
3322 * Start NF calibration
3323 */
3324 AR5K_REG_ENABLE_BITS(ah, AR5K_PHY_AGCCTL,
3325 AR5K_PHY_AGCCTL_NF);
3326
3327 return ret;
3328 }
3329
3254 /* 3330 /*
3255 * For 5210 we do all initialization using 3331 * For 5210 we do all initialization using
3256 * initvals, so we don't have to modify 3332 * initvals, so we don't have to modify
3257 * any settings (5210 also only supports 3333 * any settings (5210 also only supports
3258 * a/aturbo modes) 3334 * a/aturbo modes)
3259 */ 3335 */
3260 if ((ah->ah_version != AR5K_AR5210) && !fast) { 3336 if (ah->ah_version != AR5K_AR5210) {
3261 3337
3262 /* 3338 /*
3263 * Write initial RF gain settings 3339 * Write initial RF gain settings
@@ -3276,22 +3352,6 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
3276 if (ret) 3352 if (ret)
3277 return ret; 3353 return ret;
3278 3354
3279 /* Write OFDM timings on 5212*/
3280 if (ah->ah_version == AR5K_AR5212 &&
3281 channel->hw_value & CHANNEL_OFDM) {
3282
3283 ret = ath5k_hw_write_ofdm_timings(ah, channel);
3284 if (ret)
3285 return ret;
3286
3287 /* Spur info is available only from EEPROM versions
3288 * greater than 5.3, but the EEPROM routines will use
3289 * static values for older versions */
3290 if (ah->ah_mac_srev >= AR5K_SREV_AR5424)
3291 ath5k_hw_set_spur_mitigation_filter(ah,
3292 channel);
3293 }
3294
3295 /*Enable/disable 802.11b mode on 5111 3355 /*Enable/disable 802.11b mode on 5111
3296 (enable 2111 frequency converter + CCK)*/ 3356 (enable 2111 frequency converter + CCK)*/
3297 if (ah->ah_radio == AR5K_RF5111) { 3357 if (ah->ah_radio == AR5K_RF5111) {
@@ -3322,47 +3382,20 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
3322 */ 3382 */
3323 ath5k_hw_reg_write(ah, AR5K_PHY_ACT_ENABLE, AR5K_PHY_ACT); 3383 ath5k_hw_reg_write(ah, AR5K_PHY_ACT_ENABLE, AR5K_PHY_ACT);
3324 3384
3385 ath5k_hw_wait_for_synth(ah, channel);
3386
3325 /* 3387 /*
3326 * On 5211+ read activation -> rx delay 3388 * Perform ADC test to see if baseband is ready
3327 * and use it. 3389 * Set tx hold and check adc test register
3328 */ 3390 */
3329 if (ah->ah_version != AR5K_AR5210) { 3391 phy_tst1 = ath5k_hw_reg_read(ah, AR5K_PHY_TST1);
3330 u32 delay; 3392 ath5k_hw_reg_write(ah, AR5K_PHY_TST1_TXHOLD, AR5K_PHY_TST1);
3331 delay = ath5k_hw_reg_read(ah, AR5K_PHY_RX_DELAY) & 3393 for (i = 0; i <= 20; i++) {
3332 AR5K_PHY_RX_DELAY_M; 3394 if (!(ath5k_hw_reg_read(ah, AR5K_PHY_ADC_TEST) & 0x10))
3333 delay = (channel->hw_value & CHANNEL_CCK) ? 3395 break;
3334 ((delay << 2) / 22) : (delay / 10); 3396 udelay(200);
3335 if (ah->ah_bwmode == AR5K_BWMODE_10MHZ)
3336 delay = delay << 1;
3337 if (ah->ah_bwmode == AR5K_BWMODE_5MHZ)
3338 delay = delay << 2;
3339 /* XXX: /2 on turbo ? Let's be safe
3340 * for now */
3341 udelay(100 + delay);
3342 } else {
3343 mdelay(1);
3344 }
3345
3346 if (fast)
3347 /*
3348 * Release RF Bus grant
3349 */
3350 AR5K_REG_DISABLE_BITS(ah, AR5K_PHY_RFBUS_REQ,
3351 AR5K_PHY_RFBUS_REQ_REQUEST);
3352 else {
3353 /*
3354 * Perform ADC test to see if baseband is ready
3355 * Set tx hold and check adc test register
3356 */
3357 phy_tst1 = ath5k_hw_reg_read(ah, AR5K_PHY_TST1);
3358 ath5k_hw_reg_write(ah, AR5K_PHY_TST1_TXHOLD, AR5K_PHY_TST1);
3359 for (i = 0; i <= 20; i++) {
3360 if (!(ath5k_hw_reg_read(ah, AR5K_PHY_ADC_TEST) & 0x10))
3361 break;
3362 udelay(200);
3363 }
3364 ath5k_hw_reg_write(ah, phy_tst1, AR5K_PHY_TST1);
3365 } 3397 }
3398 ath5k_hw_reg_write(ah, phy_tst1, AR5K_PHY_TST1);
3366 3399
3367 /* 3400 /*
3368 * Start automatic gain control calibration 3401 * Start automatic gain control calibration
diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index 23838e37d45f..1a7fa6ea4cf5 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -21,7 +21,6 @@
21#include <linux/device.h> 21#include <linux/device.h>
22#include <linux/leds.h> 22#include <linux/leds.h>
23#include <linux/completion.h> 23#include <linux/completion.h>
24#include <linux/pm_qos_params.h>
25 24
26#include "debug.h" 25#include "debug.h"
27#include "common.h" 26#include "common.h"
@@ -57,8 +56,6 @@ struct ath_node;
57 56
58#define A_MAX(a, b) ((a) > (b) ? (a) : (b)) 57#define A_MAX(a, b) ((a) > (b) ? (a) : (b))
59 58
60#define ATH9K_PM_QOS_DEFAULT_VALUE 55
61
62#define TSF_TO_TU(_h,_l) \ 59#define TSF_TO_TU(_h,_l) \
63 ((((u32)(_h)) << 22) | (((u32)(_l)) >> 10)) 60 ((((u32)(_h)) << 22) | (((u32)(_l)) >> 10))
64 61
@@ -633,8 +630,6 @@ struct ath_softc {
633 struct ath_descdma txsdma; 630 struct ath_descdma txsdma;
634 631
635 struct ath_ant_comb ant_comb; 632 struct ath_ant_comb ant_comb;
636
637 struct pm_qos_request_list pm_qos_req;
638}; 633};
639 634
640struct ath_wiphy { 635struct ath_wiphy {
@@ -666,7 +661,6 @@ static inline void ath_read_cachesize(struct ath_common *common, int *csz)
666extern struct ieee80211_ops ath9k_ops; 661extern struct ieee80211_ops ath9k_ops;
667extern int ath9k_modparam_nohwcrypt; 662extern int ath9k_modparam_nohwcrypt;
668extern int led_blink; 663extern int led_blink;
669extern int ath9k_pm_qos_value;
670extern bool is_ath9k_unloaded; 664extern bool is_ath9k_unloaded;
671 665
672irqreturn_t ath_isr(int irq, void *dev); 666irqreturn_t ath_isr(int irq, void *dev);
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index 5ab3084eb9cb..07b1633b7f3f 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -219,8 +219,9 @@ static int __hif_usb_tx(struct hif_device_usb *hif_dev)
219 struct tx_buf *tx_buf = NULL; 219 struct tx_buf *tx_buf = NULL;
220 struct sk_buff *nskb = NULL; 220 struct sk_buff *nskb = NULL;
221 int ret = 0, i; 221 int ret = 0, i;
222 u16 *hdr, tx_skb_cnt = 0; 222 u16 tx_skb_cnt = 0;
223 u8 *buf; 223 u8 *buf;
224 __le16 *hdr;
224 225
225 if (hif_dev->tx.tx_skb_cnt == 0) 226 if (hif_dev->tx.tx_skb_cnt == 0)
226 return 0; 227 return 0;
@@ -245,9 +246,9 @@ static int __hif_usb_tx(struct hif_device_usb *hif_dev)
245 246
246 buf = tx_buf->buf; 247 buf = tx_buf->buf;
247 buf += tx_buf->offset; 248 buf += tx_buf->offset;
248 hdr = (u16 *)buf; 249 hdr = (__le16 *)buf;
249 *hdr++ = nskb->len; 250 *hdr++ = cpu_to_le16(nskb->len);
250 *hdr++ = ATH_USB_TX_STREAM_MODE_TAG; 251 *hdr++ = cpu_to_le16(ATH_USB_TX_STREAM_MODE_TAG);
251 buf += 4; 252 buf += 4;
252 memcpy(buf, nskb->data, nskb->len); 253 memcpy(buf, nskb->data, nskb->len);
253 tx_buf->len = nskb->len + 4; 254 tx_buf->len = nskb->len + 4;
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 087a6a95edd5..a033d01bf8a0 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -41,10 +41,6 @@ static int ath9k_btcoex_enable;
41module_param_named(btcoex_enable, ath9k_btcoex_enable, int, 0444); 41module_param_named(btcoex_enable, ath9k_btcoex_enable, int, 0444);
42MODULE_PARM_DESC(btcoex_enable, "Enable wifi-BT coexistence"); 42MODULE_PARM_DESC(btcoex_enable, "Enable wifi-BT coexistence");
43 43
44int ath9k_pm_qos_value = ATH9K_PM_QOS_DEFAULT_VALUE;
45module_param_named(pmqos, ath9k_pm_qos_value, int, S_IRUSR | S_IRGRP | S_IROTH);
46MODULE_PARM_DESC(pmqos, "User specified PM-QOS value");
47
48bool is_ath9k_unloaded; 44bool is_ath9k_unloaded;
49/* We use the hw_value as an index into our private channel structure */ 45/* We use the hw_value as an index into our private channel structure */
50 46
@@ -762,9 +758,6 @@ int ath9k_init_device(u16 devid, struct ath_softc *sc, u16 subsysid,
762 ath_init_leds(sc); 758 ath_init_leds(sc);
763 ath_start_rfkill_poll(sc); 759 ath_start_rfkill_poll(sc);
764 760
765 pm_qos_add_request(&sc->pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
766 PM_QOS_DEFAULT_VALUE);
767
768 return 0; 761 return 0;
769 762
770error_world: 763error_world:
@@ -831,7 +824,6 @@ void ath9k_deinit_device(struct ath_softc *sc)
831 } 824 }
832 825
833 ieee80211_unregister_hw(hw); 826 ieee80211_unregister_hw(hw);
834 pm_qos_remove_request(&sc->pm_qos_req);
835 ath_rx_cleanup(sc); 827 ath_rx_cleanup(sc);
836 ath_tx_cleanup(sc); 828 ath_tx_cleanup(sc);
837 ath9k_deinit_softc(sc); 829 ath9k_deinit_softc(sc);
diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c
index 180170d3ce25..2915b11edefb 100644
--- a/drivers/net/wireless/ath/ath9k/mac.c
+++ b/drivers/net/wireless/ath/ath9k/mac.c
@@ -885,7 +885,7 @@ void ath9k_hw_set_interrupts(struct ath_hw *ah, enum ath9k_int ints)
885 struct ath_common *common = ath9k_hw_common(ah); 885 struct ath_common *common = ath9k_hw_common(ah);
886 886
887 if (!(ints & ATH9K_INT_GLOBAL)) 887 if (!(ints & ATH9K_INT_GLOBAL))
888 ath9k_hw_enable_interrupts(ah); 888 ath9k_hw_disable_interrupts(ah);
889 889
890 ath_dbg(common, ATH_DBG_INTERRUPT, "0x%x => 0x%x\n", omask, ints); 890 ath_dbg(common, ATH_DBG_INTERRUPT, "0x%x => 0x%x\n", omask, ints);
891 891
@@ -963,7 +963,8 @@ void ath9k_hw_set_interrupts(struct ath_hw *ah, enum ath9k_int ints)
963 REG_CLR_BIT(ah, AR_IMR_S5, AR_IMR_S5_TIM_TIMER); 963 REG_CLR_BIT(ah, AR_IMR_S5, AR_IMR_S5_TIM_TIMER);
964 } 964 }
965 965
966 ath9k_hw_enable_interrupts(ah); 966 if (ints & ATH9K_INT_GLOBAL)
967 ath9k_hw_enable_interrupts(ah);
967 968
968 return; 969 return;
969} 970}
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index da5c64597c1f..a09d15f7aa6e 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1173,12 +1173,6 @@ static int ath9k_start(struct ieee80211_hw *hw)
1173 ath9k_btcoex_timer_resume(sc); 1173 ath9k_btcoex_timer_resume(sc);
1174 } 1174 }
1175 1175
1176 /* User has the option to provide pm-qos value as a module
1177 * parameter rather than using the default value of
1178 * 'ATH9K_PM_QOS_DEFAULT_VALUE'.
1179 */
1180 pm_qos_update_request(&sc->pm_qos_req, ath9k_pm_qos_value);
1181
1182 if (ah->caps.pcie_lcr_extsync_en && common->bus_ops->extn_synch_en) 1176 if (ah->caps.pcie_lcr_extsync_en && common->bus_ops->extn_synch_en)
1183 common->bus_ops->extn_synch_en(common); 1177 common->bus_ops->extn_synch_en(common);
1184 1178
@@ -1345,8 +1339,6 @@ static void ath9k_stop(struct ieee80211_hw *hw)
1345 1339
1346 sc->sc_flags |= SC_OP_INVALID; 1340 sc->sc_flags |= SC_OP_INVALID;
1347 1341
1348 pm_qos_update_request(&sc->pm_qos_req, PM_QOS_DEFAULT_VALUE);
1349
1350 mutex_unlock(&sc->mutex); 1342 mutex_unlock(&sc->mutex);
1351 1343
1352 ath_dbg(common, ATH_DBG_CONFIG, "Driver halt\n"); 1344 ath_dbg(common, ATH_DBG_CONFIG, "Driver halt\n");
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index 537732e5964f..f82c400be288 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -118,6 +118,8 @@ static struct usb_device_id carl9170_usb_ids[] = {
118 { USB_DEVICE(0x057c, 0x8402) }, 118 { USB_DEVICE(0x057c, 0x8402) },
119 /* Qwest/Actiontec 802AIN Wireless N USB Network Adapter */ 119 /* Qwest/Actiontec 802AIN Wireless N USB Network Adapter */
120 { USB_DEVICE(0x1668, 0x1200) }, 120 { USB_DEVICE(0x1668, 0x1200) },
121 /* Airlive X.USB a/b/g/n */
122 { USB_DEVICE(0x1b75, 0x9170) },
121 123
122 /* terminate */ 124 /* terminate */
123 {} 125 {}
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 61915f371416..471a52a2f8d4 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -706,11 +706,10 @@ static void schedule_reset(struct ipw2100_priv *priv)
706 netif_stop_queue(priv->net_dev); 706 netif_stop_queue(priv->net_dev);
707 priv->status |= STATUS_RESET_PENDING; 707 priv->status |= STATUS_RESET_PENDING;
708 if (priv->reset_backoff) 708 if (priv->reset_backoff)
709 queue_delayed_work(priv->workqueue, &priv->reset_work, 709 schedule_delayed_work(&priv->reset_work,
710 priv->reset_backoff * HZ); 710 priv->reset_backoff * HZ);
711 else 711 else
712 queue_delayed_work(priv->workqueue, &priv->reset_work, 712 schedule_delayed_work(&priv->reset_work, 0);
713 0);
714 713
715 if (priv->reset_backoff < MAX_RESET_BACKOFF) 714 if (priv->reset_backoff < MAX_RESET_BACKOFF)
716 priv->reset_backoff++; 715 priv->reset_backoff++;
@@ -1474,7 +1473,7 @@ static int ipw2100_enable_adapter(struct ipw2100_priv *priv)
1474 1473
1475 if (priv->stop_hang_check) { 1474 if (priv->stop_hang_check) {
1476 priv->stop_hang_check = 0; 1475 priv->stop_hang_check = 0;
1477 queue_delayed_work(priv->workqueue, &priv->hang_check, HZ / 2); 1476 schedule_delayed_work(&priv->hang_check, HZ / 2);
1478 } 1477 }
1479 1478
1480 fail_up: 1479 fail_up:
@@ -1808,8 +1807,8 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
1808 1807
1809 if (priv->stop_rf_kill) { 1808 if (priv->stop_rf_kill) {
1810 priv->stop_rf_kill = 0; 1809 priv->stop_rf_kill = 0;
1811 queue_delayed_work(priv->workqueue, &priv->rf_kill, 1810 schedule_delayed_work(&priv->rf_kill,
1812 round_jiffies_relative(HZ)); 1811 round_jiffies_relative(HZ));
1813 } 1812 }
1814 1813
1815 deferred = 1; 1814 deferred = 1;
@@ -2086,7 +2085,7 @@ static void isr_indicate_associated(struct ipw2100_priv *priv, u32 status)
2086 priv->status |= STATUS_ASSOCIATING; 2085 priv->status |= STATUS_ASSOCIATING;
2087 priv->connect_start = get_seconds(); 2086 priv->connect_start = get_seconds();
2088 2087
2089 queue_delayed_work(priv->workqueue, &priv->wx_event_work, HZ / 10); 2088 schedule_delayed_work(&priv->wx_event_work, HZ / 10);
2090} 2089}
2091 2090
2092static int ipw2100_set_essid(struct ipw2100_priv *priv, char *essid, 2091static int ipw2100_set_essid(struct ipw2100_priv *priv, char *essid,
@@ -2166,9 +2165,9 @@ static void isr_indicate_association_lost(struct ipw2100_priv *priv, u32 status)
2166 return; 2165 return;
2167 2166
2168 if (priv->status & STATUS_SECURITY_UPDATED) 2167 if (priv->status & STATUS_SECURITY_UPDATED)
2169 queue_delayed_work(priv->workqueue, &priv->security_work, 0); 2168 schedule_delayed_work(&priv->security_work, 0);
2170 2169
2171 queue_delayed_work(priv->workqueue, &priv->wx_event_work, 0); 2170 schedule_delayed_work(&priv->wx_event_work, 0);
2172} 2171}
2173 2172
2174static void isr_indicate_rf_kill(struct ipw2100_priv *priv, u32 status) 2173static void isr_indicate_rf_kill(struct ipw2100_priv *priv, u32 status)
@@ -2183,8 +2182,7 @@ static void isr_indicate_rf_kill(struct ipw2100_priv *priv, u32 status)
2183 /* Make sure the RF Kill check timer is running */ 2182 /* Make sure the RF Kill check timer is running */
2184 priv->stop_rf_kill = 0; 2183 priv->stop_rf_kill = 0;
2185 cancel_delayed_work(&priv->rf_kill); 2184 cancel_delayed_work(&priv->rf_kill);
2186 queue_delayed_work(priv->workqueue, &priv->rf_kill, 2185 schedule_delayed_work(&priv->rf_kill, round_jiffies_relative(HZ));
2187 round_jiffies_relative(HZ));
2188} 2186}
2189 2187
2190static void send_scan_event(void *data) 2188static void send_scan_event(void *data)
@@ -2219,13 +2217,12 @@ static void isr_scan_complete(struct ipw2100_priv *priv, u32 status)
2219 /* Only userspace-requested scan completion events go out immediately */ 2217 /* Only userspace-requested scan completion events go out immediately */
2220 if (!priv->user_requested_scan) { 2218 if (!priv->user_requested_scan) {
2221 if (!delayed_work_pending(&priv->scan_event_later)) 2219 if (!delayed_work_pending(&priv->scan_event_later))
2222 queue_delayed_work(priv->workqueue, 2220 schedule_delayed_work(&priv->scan_event_later,
2223 &priv->scan_event_later, 2221 round_jiffies_relative(msecs_to_jiffies(4000)));
2224 round_jiffies_relative(msecs_to_jiffies(4000)));
2225 } else { 2222 } else {
2226 priv->user_requested_scan = 0; 2223 priv->user_requested_scan = 0;
2227 cancel_delayed_work(&priv->scan_event_later); 2224 cancel_delayed_work(&priv->scan_event_later);
2228 queue_work(priv->workqueue, &priv->scan_event_now); 2225 schedule_work(&priv->scan_event_now);
2229 } 2226 }
2230} 2227}
2231 2228
@@ -4329,8 +4326,8 @@ static int ipw_radio_kill_sw(struct ipw2100_priv *priv, int disable_radio)
4329 /* Make sure the RF_KILL check timer is running */ 4326 /* Make sure the RF_KILL check timer is running */
4330 priv->stop_rf_kill = 0; 4327 priv->stop_rf_kill = 0;
4331 cancel_delayed_work(&priv->rf_kill); 4328 cancel_delayed_work(&priv->rf_kill);
4332 queue_delayed_work(priv->workqueue, &priv->rf_kill, 4329 schedule_delayed_work(&priv->rf_kill,
4333 round_jiffies_relative(HZ)); 4330 round_jiffies_relative(HZ));
4334 } else 4331 } else
4335 schedule_reset(priv); 4332 schedule_reset(priv);
4336 } 4333 }
@@ -4461,20 +4458,17 @@ static void bd_queue_initialize(struct ipw2100_priv *priv,
4461 IPW_DEBUG_INFO("exit\n"); 4458 IPW_DEBUG_INFO("exit\n");
4462} 4459}
4463 4460
4464static void ipw2100_kill_workqueue(struct ipw2100_priv *priv) 4461static void ipw2100_kill_works(struct ipw2100_priv *priv)
4465{ 4462{
4466 if (priv->workqueue) { 4463 priv->stop_rf_kill = 1;
4467 priv->stop_rf_kill = 1; 4464 priv->stop_hang_check = 1;
4468 priv->stop_hang_check = 1; 4465 cancel_delayed_work_sync(&priv->reset_work);
4469 cancel_delayed_work(&priv->reset_work); 4466 cancel_delayed_work_sync(&priv->security_work);
4470 cancel_delayed_work(&priv->security_work); 4467 cancel_delayed_work_sync(&priv->wx_event_work);
4471 cancel_delayed_work(&priv->wx_event_work); 4468 cancel_delayed_work_sync(&priv->hang_check);
4472 cancel_delayed_work(&priv->hang_check); 4469 cancel_delayed_work_sync(&priv->rf_kill);
4473 cancel_delayed_work(&priv->rf_kill); 4470 cancel_work_sync(&priv->scan_event_now);
4474 cancel_delayed_work(&priv->scan_event_later); 4471 cancel_delayed_work_sync(&priv->scan_event_later);
4475 destroy_workqueue(priv->workqueue);
4476 priv->workqueue = NULL;
4477 }
4478} 4472}
4479 4473
4480static int ipw2100_tx_allocate(struct ipw2100_priv *priv) 4474static int ipw2100_tx_allocate(struct ipw2100_priv *priv)
@@ -6046,7 +6040,7 @@ static void ipw2100_hang_check(struct work_struct *work)
6046 priv->last_rtc = rtc; 6040 priv->last_rtc = rtc;
6047 6041
6048 if (!priv->stop_hang_check) 6042 if (!priv->stop_hang_check)
6049 queue_delayed_work(priv->workqueue, &priv->hang_check, HZ / 2); 6043 schedule_delayed_work(&priv->hang_check, HZ / 2);
6050 6044
6051 spin_unlock_irqrestore(&priv->low_lock, flags); 6045 spin_unlock_irqrestore(&priv->low_lock, flags);
6052} 6046}
@@ -6062,8 +6056,8 @@ static void ipw2100_rf_kill(struct work_struct *work)
6062 if (rf_kill_active(priv)) { 6056 if (rf_kill_active(priv)) {
6063 IPW_DEBUG_RF_KILL("RF Kill active, rescheduling GPIO check\n"); 6057 IPW_DEBUG_RF_KILL("RF Kill active, rescheduling GPIO check\n");
6064 if (!priv->stop_rf_kill) 6058 if (!priv->stop_rf_kill)
6065 queue_delayed_work(priv->workqueue, &priv->rf_kill, 6059 schedule_delayed_work(&priv->rf_kill,
6066 round_jiffies_relative(HZ)); 6060 round_jiffies_relative(HZ));
6067 goto exit_unlock; 6061 goto exit_unlock;
6068 } 6062 }
6069 6063
@@ -6209,8 +6203,6 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev,
6209 INIT_LIST_HEAD(&priv->fw_pend_list); 6203 INIT_LIST_HEAD(&priv->fw_pend_list);
6210 INIT_STAT(&priv->fw_pend_stat); 6204 INIT_STAT(&priv->fw_pend_stat);
6211 6205
6212 priv->workqueue = create_workqueue(DRV_NAME);
6213
6214 INIT_DELAYED_WORK(&priv->reset_work, ipw2100_reset_adapter); 6206 INIT_DELAYED_WORK(&priv->reset_work, ipw2100_reset_adapter);
6215 INIT_DELAYED_WORK(&priv->security_work, ipw2100_security_work); 6207 INIT_DELAYED_WORK(&priv->security_work, ipw2100_security_work);
6216 INIT_DELAYED_WORK(&priv->wx_event_work, ipw2100_wx_event_work); 6208 INIT_DELAYED_WORK(&priv->wx_event_work, ipw2100_wx_event_work);
@@ -6410,7 +6402,7 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev,
6410 if (dev->irq) 6402 if (dev->irq)
6411 free_irq(dev->irq, priv); 6403 free_irq(dev->irq, priv);
6412 6404
6413 ipw2100_kill_workqueue(priv); 6405 ipw2100_kill_works(priv);
6414 6406
6415 /* These are safe to call even if they weren't allocated */ 6407 /* These are safe to call even if they weren't allocated */
6416 ipw2100_queues_free(priv); 6408 ipw2100_queues_free(priv);
@@ -6460,9 +6452,7 @@ static void __devexit ipw2100_pci_remove_one(struct pci_dev *pci_dev)
6460 * first, then close() will crash. */ 6452 * first, then close() will crash. */
6461 unregister_netdev(dev); 6453 unregister_netdev(dev);
6462 6454
6463 /* ipw2100_down will ensure that there is no more pending work 6455 ipw2100_kill_works(priv);
6464 * in the workqueue's, so we can safely remove them now. */
6465 ipw2100_kill_workqueue(priv);
6466 6456
6467 ipw2100_queues_free(priv); 6457 ipw2100_queues_free(priv);
6468 6458
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.h b/drivers/net/wireless/ipw2x00/ipw2100.h
index 838002b4881e..99cba968aa58 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.h
+++ b/drivers/net/wireless/ipw2x00/ipw2100.h
@@ -580,7 +580,6 @@ struct ipw2100_priv {
580 580
581 struct tasklet_struct irq_tasklet; 581 struct tasklet_struct irq_tasklet;
582 582
583 struct workqueue_struct *workqueue;
584 struct delayed_work reset_work; 583 struct delayed_work reset_work;
585 struct delayed_work security_work; 584 struct delayed_work security_work;
586 struct delayed_work wx_event_work; 585 struct delayed_work wx_event_work;
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index ae438ed80c2f..160881f234cc 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -894,9 +894,8 @@ static void ipw_led_link_on(struct ipw_priv *priv)
894 894
895 /* If we aren't associated, schedule turning the LED off */ 895 /* If we aren't associated, schedule turning the LED off */
896 if (!(priv->status & STATUS_ASSOCIATED)) 896 if (!(priv->status & STATUS_ASSOCIATED))
897 queue_delayed_work(priv->workqueue, 897 schedule_delayed_work(&priv->led_link_off,
898 &priv->led_link_off, 898 LD_TIME_LINK_ON);
899 LD_TIME_LINK_ON);
900 } 899 }
901 900
902 spin_unlock_irqrestore(&priv->lock, flags); 901 spin_unlock_irqrestore(&priv->lock, flags);
@@ -939,8 +938,8 @@ static void ipw_led_link_off(struct ipw_priv *priv)
939 * turning the LED on (blink while unassociated) */ 938 * turning the LED on (blink while unassociated) */
940 if (!(priv->status & STATUS_RF_KILL_MASK) && 939 if (!(priv->status & STATUS_RF_KILL_MASK) &&
941 !(priv->status & STATUS_ASSOCIATED)) 940 !(priv->status & STATUS_ASSOCIATED))
942 queue_delayed_work(priv->workqueue, &priv->led_link_on, 941 schedule_delayed_work(&priv->led_link_on,
943 LD_TIME_LINK_OFF); 942 LD_TIME_LINK_OFF);
944 943
945 } 944 }
946 945
@@ -980,13 +979,11 @@ static void __ipw_led_activity_on(struct ipw_priv *priv)
980 priv->status |= STATUS_LED_ACT_ON; 979 priv->status |= STATUS_LED_ACT_ON;
981 980
982 cancel_delayed_work(&priv->led_act_off); 981 cancel_delayed_work(&priv->led_act_off);
983 queue_delayed_work(priv->workqueue, &priv->led_act_off, 982 schedule_delayed_work(&priv->led_act_off, LD_TIME_ACT_ON);
984 LD_TIME_ACT_ON);
985 } else { 983 } else {
986 /* Reschedule LED off for full time period */ 984 /* Reschedule LED off for full time period */
987 cancel_delayed_work(&priv->led_act_off); 985 cancel_delayed_work(&priv->led_act_off);
988 queue_delayed_work(priv->workqueue, &priv->led_act_off, 986 schedule_delayed_work(&priv->led_act_off, LD_TIME_ACT_ON);
989 LD_TIME_ACT_ON);
990 } 987 }
991} 988}
992 989
@@ -1795,13 +1792,11 @@ static int ipw_radio_kill_sw(struct ipw_priv *priv, int disable_radio)
1795 if (disable_radio) { 1792 if (disable_radio) {
1796 priv->status |= STATUS_RF_KILL_SW; 1793 priv->status |= STATUS_RF_KILL_SW;
1797 1794
1798 if (priv->workqueue) { 1795 cancel_delayed_work(&priv->request_scan);
1799 cancel_delayed_work(&priv->request_scan); 1796 cancel_delayed_work(&priv->request_direct_scan);
1800 cancel_delayed_work(&priv->request_direct_scan); 1797 cancel_delayed_work(&priv->request_passive_scan);
1801 cancel_delayed_work(&priv->request_passive_scan); 1798 cancel_delayed_work(&priv->scan_event);
1802 cancel_delayed_work(&priv->scan_event); 1799 schedule_work(&priv->down);
1803 }
1804 queue_work(priv->workqueue, &priv->down);
1805 } else { 1800 } else {
1806 priv->status &= ~STATUS_RF_KILL_SW; 1801 priv->status &= ~STATUS_RF_KILL_SW;
1807 if (rf_kill_active(priv)) { 1802 if (rf_kill_active(priv)) {
@@ -1809,10 +1804,10 @@ static int ipw_radio_kill_sw(struct ipw_priv *priv, int disable_radio)
1809 "disabled by HW switch\n"); 1804 "disabled by HW switch\n");
1810 /* Make sure the RF_KILL check timer is running */ 1805 /* Make sure the RF_KILL check timer is running */
1811 cancel_delayed_work(&priv->rf_kill); 1806 cancel_delayed_work(&priv->rf_kill);
1812 queue_delayed_work(priv->workqueue, &priv->rf_kill, 1807 schedule_delayed_work(&priv->rf_kill,
1813 round_jiffies_relative(2 * HZ)); 1808 round_jiffies_relative(2 * HZ));
1814 } else 1809 } else
1815 queue_work(priv->workqueue, &priv->up); 1810 schedule_work(&priv->up);
1816 } 1811 }
1817 1812
1818 return 1; 1813 return 1;
@@ -2063,7 +2058,7 @@ static void ipw_irq_tasklet(struct ipw_priv *priv)
2063 cancel_delayed_work(&priv->request_passive_scan); 2058 cancel_delayed_work(&priv->request_passive_scan);
2064 cancel_delayed_work(&priv->scan_event); 2059 cancel_delayed_work(&priv->scan_event);
2065 schedule_work(&priv->link_down); 2060 schedule_work(&priv->link_down);
2066 queue_delayed_work(priv->workqueue, &priv->rf_kill, 2 * HZ); 2061 schedule_delayed_work(&priv->rf_kill, 2 * HZ);
2067 handled |= IPW_INTA_BIT_RF_KILL_DONE; 2062 handled |= IPW_INTA_BIT_RF_KILL_DONE;
2068 } 2063 }
2069 2064
@@ -2103,7 +2098,7 @@ static void ipw_irq_tasklet(struct ipw_priv *priv)
2103 priv->status &= ~STATUS_HCMD_ACTIVE; 2098 priv->status &= ~STATUS_HCMD_ACTIVE;
2104 wake_up_interruptible(&priv->wait_command_queue); 2099 wake_up_interruptible(&priv->wait_command_queue);
2105 2100
2106 queue_work(priv->workqueue, &priv->adapter_restart); 2101 schedule_work(&priv->adapter_restart);
2107 handled |= IPW_INTA_BIT_FATAL_ERROR; 2102 handled |= IPW_INTA_BIT_FATAL_ERROR;
2108 } 2103 }
2109 2104
@@ -2323,11 +2318,6 @@ static int ipw_send_adapter_address(struct ipw_priv *priv, u8 * mac)
2323 return ipw_send_cmd_pdu(priv, IPW_CMD_ADAPTER_ADDRESS, ETH_ALEN, mac); 2318 return ipw_send_cmd_pdu(priv, IPW_CMD_ADAPTER_ADDRESS, ETH_ALEN, mac);
2324} 2319}
2325 2320
2326/*
2327 * NOTE: This must be executed from our workqueue as it results in udelay
2328 * being called which may corrupt the keyboard if executed on default
2329 * workqueue
2330 */
2331static void ipw_adapter_restart(void *adapter) 2321static void ipw_adapter_restart(void *adapter)
2332{ 2322{
2333 struct ipw_priv *priv = adapter; 2323 struct ipw_priv *priv = adapter;
@@ -2368,13 +2358,13 @@ static void ipw_scan_check(void *data)
2368 IPW_DEBUG_SCAN("Scan completion watchdog resetting " 2358 IPW_DEBUG_SCAN("Scan completion watchdog resetting "
2369 "adapter after (%dms).\n", 2359 "adapter after (%dms).\n",
2370 jiffies_to_msecs(IPW_SCAN_CHECK_WATCHDOG)); 2360 jiffies_to_msecs(IPW_SCAN_CHECK_WATCHDOG));
2371 queue_work(priv->workqueue, &priv->adapter_restart); 2361 schedule_work(&priv->adapter_restart);
2372 } else if (priv->status & STATUS_SCANNING) { 2362 } else if (priv->status & STATUS_SCANNING) {
2373 IPW_DEBUG_SCAN("Scan completion watchdog aborting scan " 2363 IPW_DEBUG_SCAN("Scan completion watchdog aborting scan "
2374 "after (%dms).\n", 2364 "after (%dms).\n",
2375 jiffies_to_msecs(IPW_SCAN_CHECK_WATCHDOG)); 2365 jiffies_to_msecs(IPW_SCAN_CHECK_WATCHDOG));
2376 ipw_abort_scan(priv); 2366 ipw_abort_scan(priv);
2377 queue_delayed_work(priv->workqueue, &priv->scan_check, HZ); 2367 schedule_delayed_work(&priv->scan_check, HZ);
2378 } 2368 }
2379} 2369}
2380 2370
@@ -3943,7 +3933,7 @@ static void ipw_send_disassociate(struct ipw_priv *priv, int quiet)
3943 3933
3944 if (priv->status & STATUS_ASSOCIATING) { 3934 if (priv->status & STATUS_ASSOCIATING) {
3945 IPW_DEBUG_ASSOC("Disassociating while associating.\n"); 3935 IPW_DEBUG_ASSOC("Disassociating while associating.\n");
3946 queue_work(priv->workqueue, &priv->disassociate); 3936 schedule_work(&priv->disassociate);
3947 return; 3937 return;
3948 } 3938 }
3949 3939
@@ -4360,8 +4350,7 @@ static void ipw_gather_stats(struct ipw_priv *priv)
4360 4350
4361 priv->quality = quality; 4351 priv->quality = quality;
4362 4352
4363 queue_delayed_work(priv->workqueue, &priv->gather_stats, 4353 schedule_delayed_work(&priv->gather_stats, IPW_STATS_INTERVAL);
4364 IPW_STATS_INTERVAL);
4365} 4354}
4366 4355
4367static void ipw_bg_gather_stats(struct work_struct *work) 4356static void ipw_bg_gather_stats(struct work_struct *work)
@@ -4396,10 +4385,10 @@ static void ipw_handle_missed_beacon(struct ipw_priv *priv,
4396 IPW_DEBUG(IPW_DL_INFO | IPW_DL_NOTIF | 4385 IPW_DEBUG(IPW_DL_INFO | IPW_DL_NOTIF |
4397 IPW_DL_STATE, 4386 IPW_DL_STATE,
4398 "Aborting scan with missed beacon.\n"); 4387 "Aborting scan with missed beacon.\n");
4399 queue_work(priv->workqueue, &priv->abort_scan); 4388 schedule_work(&priv->abort_scan);
4400 } 4389 }
4401 4390
4402 queue_work(priv->workqueue, &priv->disassociate); 4391 schedule_work(&priv->disassociate);
4403 return; 4392 return;
4404 } 4393 }
4405 4394
@@ -4425,8 +4414,7 @@ static void ipw_handle_missed_beacon(struct ipw_priv *priv,
4425 if (!(priv->status & STATUS_ROAMING)) { 4414 if (!(priv->status & STATUS_ROAMING)) {
4426 priv->status |= STATUS_ROAMING; 4415 priv->status |= STATUS_ROAMING;
4427 if (!(priv->status & STATUS_SCANNING)) 4416 if (!(priv->status & STATUS_SCANNING))
4428 queue_delayed_work(priv->workqueue, 4417 schedule_delayed_work(&priv->request_scan, 0);
4429 &priv->request_scan, 0);
4430 } 4418 }
4431 return; 4419 return;
4432 } 4420 }
@@ -4439,7 +4427,7 @@ static void ipw_handle_missed_beacon(struct ipw_priv *priv,
4439 * channels..) */ 4427 * channels..) */
4440 IPW_DEBUG(IPW_DL_INFO | IPW_DL_NOTIF | IPW_DL_STATE, 4428 IPW_DEBUG(IPW_DL_INFO | IPW_DL_NOTIF | IPW_DL_STATE,
4441 "Aborting scan with missed beacon.\n"); 4429 "Aborting scan with missed beacon.\n");
4442 queue_work(priv->workqueue, &priv->abort_scan); 4430 schedule_work(&priv->abort_scan);
4443 } 4431 }
4444 4432
4445 IPW_DEBUG_NOTIF("Missed beacon: %d\n", missed_count); 4433 IPW_DEBUG_NOTIF("Missed beacon: %d\n", missed_count);
@@ -4462,8 +4450,8 @@ static void handle_scan_event(struct ipw_priv *priv)
4462 /* Only userspace-requested scan completion events go out immediately */ 4450 /* Only userspace-requested scan completion events go out immediately */
4463 if (!priv->user_requested_scan) { 4451 if (!priv->user_requested_scan) {
4464 if (!delayed_work_pending(&priv->scan_event)) 4452 if (!delayed_work_pending(&priv->scan_event))
4465 queue_delayed_work(priv->workqueue, &priv->scan_event, 4453 schedule_delayed_work(&priv->scan_event,
4466 round_jiffies_relative(msecs_to_jiffies(4000))); 4454 round_jiffies_relative(msecs_to_jiffies(4000)));
4467 } else { 4455 } else {
4468 union iwreq_data wrqu; 4456 union iwreq_data wrqu;
4469 4457
@@ -4516,20 +4504,17 @@ static void ipw_rx_notification(struct ipw_priv *priv,
4516 4504
4517 IPW_DEBUG_ASSOC 4505 IPW_DEBUG_ASSOC
4518 ("queueing adhoc check\n"); 4506 ("queueing adhoc check\n");
4519 queue_delayed_work(priv-> 4507 schedule_delayed_work(
4520 workqueue, 4508 &priv->adhoc_check,
4521 &priv-> 4509 le16_to_cpu(priv->
4522 adhoc_check, 4510 assoc_request.
4523 le16_to_cpu(priv-> 4511 beacon_interval));
4524 assoc_request.
4525 beacon_interval));
4526 break; 4512 break;
4527 } 4513 }
4528 4514
4529 priv->status &= ~STATUS_ASSOCIATING; 4515 priv->status &= ~STATUS_ASSOCIATING;
4530 priv->status |= STATUS_ASSOCIATED; 4516 priv->status |= STATUS_ASSOCIATED;
4531 queue_work(priv->workqueue, 4517 schedule_work(&priv->system_config);
4532 &priv->system_config);
4533 4518
4534#ifdef CONFIG_IPW2200_QOS 4519#ifdef CONFIG_IPW2200_QOS
4535#define IPW_GET_PACKET_STYPE(x) WLAN_FC_GET_STYPE( \ 4520#define IPW_GET_PACKET_STYPE(x) WLAN_FC_GET_STYPE( \
@@ -4792,43 +4777,37 @@ static void ipw_rx_notification(struct ipw_priv *priv,
4792#ifdef CONFIG_IPW2200_MONITOR 4777#ifdef CONFIG_IPW2200_MONITOR
4793 if (priv->ieee->iw_mode == IW_MODE_MONITOR) { 4778 if (priv->ieee->iw_mode == IW_MODE_MONITOR) {
4794 priv->status |= STATUS_SCAN_FORCED; 4779 priv->status |= STATUS_SCAN_FORCED;
4795 queue_delayed_work(priv->workqueue, 4780 schedule_delayed_work(&priv->request_scan, 0);
4796 &priv->request_scan, 0);
4797 break; 4781 break;
4798 } 4782 }
4799 priv->status &= ~STATUS_SCAN_FORCED; 4783 priv->status &= ~STATUS_SCAN_FORCED;
4800#endif /* CONFIG_IPW2200_MONITOR */ 4784#endif /* CONFIG_IPW2200_MONITOR */
4801 4785
4802 /* Do queued direct scans first */ 4786 /* Do queued direct scans first */
4803 if (priv->status & STATUS_DIRECT_SCAN_PENDING) { 4787 if (priv->status & STATUS_DIRECT_SCAN_PENDING)
4804 queue_delayed_work(priv->workqueue, 4788 schedule_delayed_work(&priv->request_direct_scan, 0);
4805 &priv->request_direct_scan, 0);
4806 }
4807 4789
4808 if (!(priv->status & (STATUS_ASSOCIATED | 4790 if (!(priv->status & (STATUS_ASSOCIATED |
4809 STATUS_ASSOCIATING | 4791 STATUS_ASSOCIATING |
4810 STATUS_ROAMING | 4792 STATUS_ROAMING |
4811 STATUS_DISASSOCIATING))) 4793 STATUS_DISASSOCIATING)))
4812 queue_work(priv->workqueue, &priv->associate); 4794 schedule_work(&priv->associate);
4813 else if (priv->status & STATUS_ROAMING) { 4795 else if (priv->status & STATUS_ROAMING) {
4814 if (x->status == SCAN_COMPLETED_STATUS_COMPLETE) 4796 if (x->status == SCAN_COMPLETED_STATUS_COMPLETE)
4815 /* If a scan completed and we are in roam mode, then 4797 /* If a scan completed and we are in roam mode, then
4816 * the scan that completed was the one requested as a 4798 * the scan that completed was the one requested as a
4817 * result of entering roam... so, schedule the 4799 * result of entering roam... so, schedule the
4818 * roam work */ 4800 * roam work */
4819 queue_work(priv->workqueue, 4801 schedule_work(&priv->roam);
4820 &priv->roam);
4821 else 4802 else
4822 /* Don't schedule if we aborted the scan */ 4803 /* Don't schedule if we aborted the scan */
4823 priv->status &= ~STATUS_ROAMING; 4804 priv->status &= ~STATUS_ROAMING;
4824 } else if (priv->status & STATUS_SCAN_PENDING) 4805 } else if (priv->status & STATUS_SCAN_PENDING)
4825 queue_delayed_work(priv->workqueue, 4806 schedule_delayed_work(&priv->request_scan, 0);
4826 &priv->request_scan, 0);
4827 else if (priv->config & CFG_BACKGROUND_SCAN 4807 else if (priv->config & CFG_BACKGROUND_SCAN
4828 && priv->status & STATUS_ASSOCIATED) 4808 && priv->status & STATUS_ASSOCIATED)
4829 queue_delayed_work(priv->workqueue, 4809 schedule_delayed_work(&priv->request_scan,
4830 &priv->request_scan, 4810 round_jiffies_relative(HZ));
4831 round_jiffies_relative(HZ));
4832 4811
4833 /* Send an empty event to user space. 4812 /* Send an empty event to user space.
4834 * We don't send the received data on the event because 4813 * We don't send the received data on the event because
@@ -5192,7 +5171,7 @@ static void ipw_rx_queue_restock(struct ipw_priv *priv)
5192 /* If the pre-allocated buffer pool is dropping low, schedule to 5171 /* If the pre-allocated buffer pool is dropping low, schedule to
5193 * refill it */ 5172 * refill it */
5194 if (rxq->free_count <= RX_LOW_WATERMARK) 5173 if (rxq->free_count <= RX_LOW_WATERMARK)
5195 queue_work(priv->workqueue, &priv->rx_replenish); 5174 schedule_work(&priv->rx_replenish);
5196 5175
5197 /* If we've added more space for the firmware to place data, tell it */ 5176 /* If we've added more space for the firmware to place data, tell it */
5198 if (write != rxq->write) 5177 if (write != rxq->write)
@@ -6133,8 +6112,8 @@ static void ipw_adhoc_check(void *data)
6133 return; 6112 return;
6134 } 6113 }
6135 6114
6136 queue_delayed_work(priv->workqueue, &priv->adhoc_check, 6115 schedule_delayed_work(&priv->adhoc_check,
6137 le16_to_cpu(priv->assoc_request.beacon_interval)); 6116 le16_to_cpu(priv->assoc_request.beacon_interval));
6138} 6117}
6139 6118
6140static void ipw_bg_adhoc_check(struct work_struct *work) 6119static void ipw_bg_adhoc_check(struct work_struct *work)
@@ -6523,8 +6502,7 @@ send_request:
6523 } else 6502 } else
6524 priv->status &= ~STATUS_SCAN_PENDING; 6503 priv->status &= ~STATUS_SCAN_PENDING;
6525 6504
6526 queue_delayed_work(priv->workqueue, &priv->scan_check, 6505 schedule_delayed_work(&priv->scan_check, IPW_SCAN_CHECK_WATCHDOG);
6527 IPW_SCAN_CHECK_WATCHDOG);
6528done: 6506done:
6529 mutex_unlock(&priv->mutex); 6507 mutex_unlock(&priv->mutex);
6530 return err; 6508 return err;
@@ -6994,8 +6972,7 @@ static int ipw_qos_handle_probe_response(struct ipw_priv *priv,
6994 !memcmp(network->ssid, 6972 !memcmp(network->ssid,
6995 priv->assoc_network->ssid, 6973 priv->assoc_network->ssid,
6996 network->ssid_len)) { 6974 network->ssid_len)) {
6997 queue_work(priv->workqueue, 6975 schedule_work(&priv->merge_networks);
6998 &priv->merge_networks);
6999 } 6976 }
7000 } 6977 }
7001 6978
@@ -7663,7 +7640,7 @@ static int ipw_associate(void *data)
7663 if (priv->status & STATUS_DISASSOCIATING) { 7640 if (priv->status & STATUS_DISASSOCIATING) {
7664 IPW_DEBUG_ASSOC("Not attempting association (in " 7641 IPW_DEBUG_ASSOC("Not attempting association (in "
7665 "disassociating)\n "); 7642 "disassociating)\n ");
7666 queue_work(priv->workqueue, &priv->associate); 7643 schedule_work(&priv->associate);
7667 return 0; 7644 return 0;
7668 } 7645 }
7669 7646
@@ -7731,12 +7708,10 @@ static int ipw_associate(void *data)
7731 7708
7732 if (!(priv->status & STATUS_SCANNING)) { 7709 if (!(priv->status & STATUS_SCANNING)) {
7733 if (!(priv->config & CFG_SPEED_SCAN)) 7710 if (!(priv->config & CFG_SPEED_SCAN))
7734 queue_delayed_work(priv->workqueue, 7711 schedule_delayed_work(&priv->request_scan,
7735 &priv->request_scan, 7712 SCAN_INTERVAL);
7736 SCAN_INTERVAL);
7737 else 7713 else
7738 queue_delayed_work(priv->workqueue, 7714 schedule_delayed_work(&priv->request_scan, 0);
7739 &priv->request_scan, 0);
7740 } 7715 }
7741 7716
7742 return 0; 7717 return 0;
@@ -8899,7 +8874,7 @@ static int ipw_wx_set_mode(struct net_device *dev,
8899 8874
8900 priv->ieee->iw_mode = wrqu->mode; 8875 priv->ieee->iw_mode = wrqu->mode;
8901 8876
8902 queue_work(priv->workqueue, &priv->adapter_restart); 8877 schedule_work(&priv->adapter_restart);
8903 mutex_unlock(&priv->mutex); 8878 mutex_unlock(&priv->mutex);
8904 return err; 8879 return err;
8905} 8880}
@@ -9598,7 +9573,7 @@ static int ipw_wx_set_scan(struct net_device *dev,
9598 9573
9599 IPW_DEBUG_WX("Start scan\n"); 9574 IPW_DEBUG_WX("Start scan\n");
9600 9575
9601 queue_delayed_work(priv->workqueue, work, 0); 9576 schedule_delayed_work(work, 0);
9602 9577
9603 return 0; 9578 return 0;
9604} 9579}
@@ -9937,7 +9912,7 @@ static int ipw_wx_set_monitor(struct net_device *dev,
9937#else 9912#else
9938 priv->net_dev->type = ARPHRD_IEEE80211; 9913 priv->net_dev->type = ARPHRD_IEEE80211;
9939#endif 9914#endif
9940 queue_work(priv->workqueue, &priv->adapter_restart); 9915 schedule_work(&priv->adapter_restart);
9941 } 9916 }
9942 9917
9943 ipw_set_channel(priv, parms[1]); 9918 ipw_set_channel(priv, parms[1]);
@@ -9947,7 +9922,7 @@ static int ipw_wx_set_monitor(struct net_device *dev,
9947 return 0; 9922 return 0;
9948 } 9923 }
9949 priv->net_dev->type = ARPHRD_ETHER; 9924 priv->net_dev->type = ARPHRD_ETHER;
9950 queue_work(priv->workqueue, &priv->adapter_restart); 9925 schedule_work(&priv->adapter_restart);
9951 } 9926 }
9952 mutex_unlock(&priv->mutex); 9927 mutex_unlock(&priv->mutex);
9953 return 0; 9928 return 0;
@@ -9961,7 +9936,7 @@ static int ipw_wx_reset(struct net_device *dev,
9961{ 9936{
9962 struct ipw_priv *priv = libipw_priv(dev); 9937 struct ipw_priv *priv = libipw_priv(dev);
9963 IPW_DEBUG_WX("RESET\n"); 9938 IPW_DEBUG_WX("RESET\n");
9964 queue_work(priv->workqueue, &priv->adapter_restart); 9939 schedule_work(&priv->adapter_restart);
9965 return 0; 9940 return 0;
9966} 9941}
9967 9942
@@ -10551,7 +10526,7 @@ static int ipw_net_set_mac_address(struct net_device *dev, void *p)
10551 memcpy(priv->mac_addr, addr->sa_data, ETH_ALEN); 10526 memcpy(priv->mac_addr, addr->sa_data, ETH_ALEN);
10552 printk(KERN_INFO "%s: Setting MAC to %pM\n", 10527 printk(KERN_INFO "%s: Setting MAC to %pM\n",
10553 priv->net_dev->name, priv->mac_addr); 10528 priv->net_dev->name, priv->mac_addr);
10554 queue_work(priv->workqueue, &priv->adapter_restart); 10529 schedule_work(&priv->adapter_restart);
10555 mutex_unlock(&priv->mutex); 10530 mutex_unlock(&priv->mutex);
10556 return 0; 10531 return 0;
10557} 10532}
@@ -10684,9 +10659,7 @@ static void ipw_rf_kill(void *adapter)
10684 10659
10685 if (rf_kill_active(priv)) { 10660 if (rf_kill_active(priv)) {
10686 IPW_DEBUG_RF_KILL("RF Kill active, rescheduling GPIO check\n"); 10661 IPW_DEBUG_RF_KILL("RF Kill active, rescheduling GPIO check\n");
10687 if (priv->workqueue) 10662 schedule_delayed_work(&priv->rf_kill, 2 * HZ);
10688 queue_delayed_work(priv->workqueue,
10689 &priv->rf_kill, 2 * HZ);
10690 goto exit_unlock; 10663 goto exit_unlock;
10691 } 10664 }
10692 10665
@@ -10697,7 +10670,7 @@ static void ipw_rf_kill(void *adapter)
10697 "device\n"); 10670 "device\n");
10698 10671
10699 /* we can not do an adapter restart while inside an irq lock */ 10672 /* we can not do an adapter restart while inside an irq lock */
10700 queue_work(priv->workqueue, &priv->adapter_restart); 10673 schedule_work(&priv->adapter_restart);
10701 } else 10674 } else
10702 IPW_DEBUG_RF_KILL("HW RF Kill deactivated. SW RF Kill still " 10675 IPW_DEBUG_RF_KILL("HW RF Kill deactivated. SW RF Kill still "
10703 "enabled\n"); 10676 "enabled\n");
@@ -10735,7 +10708,7 @@ static void ipw_link_up(struct ipw_priv *priv)
10735 notify_wx_assoc_event(priv); 10708 notify_wx_assoc_event(priv);
10736 10709
10737 if (priv->config & CFG_BACKGROUND_SCAN) 10710 if (priv->config & CFG_BACKGROUND_SCAN)
10738 queue_delayed_work(priv->workqueue, &priv->request_scan, HZ); 10711 schedule_delayed_work(&priv->request_scan, HZ);
10739} 10712}
10740 10713
10741static void ipw_bg_link_up(struct work_struct *work) 10714static void ipw_bg_link_up(struct work_struct *work)
@@ -10764,7 +10737,7 @@ static void ipw_link_down(struct ipw_priv *priv)
10764 10737
10765 if (!(priv->status & STATUS_EXIT_PENDING)) { 10738 if (!(priv->status & STATUS_EXIT_PENDING)) {
10766 /* Queue up another scan... */ 10739 /* Queue up another scan... */
10767 queue_delayed_work(priv->workqueue, &priv->request_scan, 0); 10740 schedule_delayed_work(&priv->request_scan, 0);
10768 } else 10741 } else
10769 cancel_delayed_work(&priv->scan_event); 10742 cancel_delayed_work(&priv->scan_event);
10770} 10743}
@@ -10782,7 +10755,6 @@ static int __devinit ipw_setup_deferred_work(struct ipw_priv *priv)
10782{ 10755{
10783 int ret = 0; 10756 int ret = 0;
10784 10757
10785 priv->workqueue = create_workqueue(DRV_NAME);
10786 init_waitqueue_head(&priv->wait_command_queue); 10758 init_waitqueue_head(&priv->wait_command_queue);
10787 init_waitqueue_head(&priv->wait_state); 10759 init_waitqueue_head(&priv->wait_state);
10788 10760
@@ -11339,8 +11311,7 @@ static int ipw_up(struct ipw_priv *priv)
11339 IPW_WARNING("Radio Frequency Kill Switch is On:\n" 11311 IPW_WARNING("Radio Frequency Kill Switch is On:\n"
11340 "Kill switch must be turned off for " 11312 "Kill switch must be turned off for "
11341 "wireless networking to work.\n"); 11313 "wireless networking to work.\n");
11342 queue_delayed_work(priv->workqueue, &priv->rf_kill, 11314 schedule_delayed_work(&priv->rf_kill, 2 * HZ);
11343 2 * HZ);
11344 return 0; 11315 return 0;
11345 } 11316 }
11346 11317
@@ -11350,8 +11321,7 @@ static int ipw_up(struct ipw_priv *priv)
11350 11321
11351 /* If configure to try and auto-associate, kick 11322 /* If configure to try and auto-associate, kick
11352 * off a scan. */ 11323 * off a scan. */
11353 queue_delayed_work(priv->workqueue, 11324 schedule_delayed_work(&priv->request_scan, 0);
11354 &priv->request_scan, 0);
11355 11325
11356 return 0; 11326 return 0;
11357 } 11327 }
@@ -11817,7 +11787,7 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,
11817 err = request_irq(pdev->irq, ipw_isr, IRQF_SHARED, DRV_NAME, priv); 11787 err = request_irq(pdev->irq, ipw_isr, IRQF_SHARED, DRV_NAME, priv);
11818 if (err) { 11788 if (err) {
11819 IPW_ERROR("Error allocating IRQ %d\n", pdev->irq); 11789 IPW_ERROR("Error allocating IRQ %d\n", pdev->irq);
11820 goto out_destroy_workqueue; 11790 goto out_iounmap;
11821 } 11791 }
11822 11792
11823 SET_NETDEV_DEV(net_dev, &pdev->dev); 11793 SET_NETDEV_DEV(net_dev, &pdev->dev);
@@ -11885,9 +11855,6 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,
11885 sysfs_remove_group(&pdev->dev.kobj, &ipw_attribute_group); 11855 sysfs_remove_group(&pdev->dev.kobj, &ipw_attribute_group);
11886 out_release_irq: 11856 out_release_irq:
11887 free_irq(pdev->irq, priv); 11857 free_irq(pdev->irq, priv);
11888 out_destroy_workqueue:
11889 destroy_workqueue(priv->workqueue);
11890 priv->workqueue = NULL;
11891 out_iounmap: 11858 out_iounmap:
11892 iounmap(priv->hw_base); 11859 iounmap(priv->hw_base);
11893 out_pci_release_regions: 11860 out_pci_release_regions:
@@ -11930,18 +11897,31 @@ static void __devexit ipw_pci_remove(struct pci_dev *pdev)
11930 kfree(priv->cmdlog); 11897 kfree(priv->cmdlog);
11931 priv->cmdlog = NULL; 11898 priv->cmdlog = NULL;
11932 } 11899 }
11933 /* ipw_down will ensure that there is no more pending work 11900
11934 * in the workqueue's, so we can safely remove them now. */ 11901 /* make sure all works are inactive */
11935 cancel_delayed_work(&priv->adhoc_check); 11902 cancel_delayed_work_sync(&priv->adhoc_check);
11936 cancel_delayed_work(&priv->gather_stats); 11903 cancel_work_sync(&priv->associate);
11937 cancel_delayed_work(&priv->request_scan); 11904 cancel_work_sync(&priv->disassociate);
11938 cancel_delayed_work(&priv->request_direct_scan); 11905 cancel_work_sync(&priv->system_config);
11939 cancel_delayed_work(&priv->request_passive_scan); 11906 cancel_work_sync(&priv->rx_replenish);
11940 cancel_delayed_work(&priv->scan_event); 11907 cancel_work_sync(&priv->adapter_restart);
11941 cancel_delayed_work(&priv->rf_kill); 11908 cancel_delayed_work_sync(&priv->rf_kill);
11942 cancel_delayed_work(&priv->scan_check); 11909 cancel_work_sync(&priv->up);
11943 destroy_workqueue(priv->workqueue); 11910 cancel_work_sync(&priv->down);
11944 priv->workqueue = NULL; 11911 cancel_delayed_work_sync(&priv->request_scan);
11912 cancel_delayed_work_sync(&priv->request_direct_scan);
11913 cancel_delayed_work_sync(&priv->request_passive_scan);
11914 cancel_delayed_work_sync(&priv->scan_event);
11915 cancel_delayed_work_sync(&priv->gather_stats);
11916 cancel_work_sync(&priv->abort_scan);
11917 cancel_work_sync(&priv->roam);
11918 cancel_delayed_work_sync(&priv->scan_check);
11919 cancel_work_sync(&priv->link_up);
11920 cancel_work_sync(&priv->link_down);
11921 cancel_delayed_work_sync(&priv->led_link_on);
11922 cancel_delayed_work_sync(&priv->led_link_off);
11923 cancel_delayed_work_sync(&priv->led_act_off);
11924 cancel_work_sync(&priv->merge_networks);
11945 11925
11946 /* Free MAC hash list for ADHOC */ 11926 /* Free MAC hash list for ADHOC */
11947 for (i = 0; i < IPW_IBSS_MAC_HASH_SIZE; i++) { 11927 for (i = 0; i < IPW_IBSS_MAC_HASH_SIZE; i++) {
@@ -12029,7 +12009,7 @@ static int ipw_pci_resume(struct pci_dev *pdev)
12029 priv->suspend_time = get_seconds() - priv->suspend_at; 12009 priv->suspend_time = get_seconds() - priv->suspend_at;
12030 12010
12031 /* Bring the device back up */ 12011 /* Bring the device back up */
12032 queue_work(priv->workqueue, &priv->up); 12012 schedule_work(&priv->up);
12033 12013
12034 return 0; 12014 return 0;
12035} 12015}
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.h b/drivers/net/wireless/ipw2x00/ipw2200.h
index d7d049c7a4fa..0441445b8bfa 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.h
+++ b/drivers/net/wireless/ipw2x00/ipw2200.h
@@ -1299,8 +1299,6 @@ struct ipw_priv {
1299 u8 direct_scan_ssid[IW_ESSID_MAX_SIZE]; 1299 u8 direct_scan_ssid[IW_ESSID_MAX_SIZE];
1300 u8 direct_scan_ssid_len; 1300 u8 direct_scan_ssid_len;
1301 1301
1302 struct workqueue_struct *workqueue;
1303
1304 struct delayed_work adhoc_check; 1302 struct delayed_work adhoc_check;
1305 struct work_struct associate; 1303 struct work_struct associate;
1306 struct work_struct disassociate; 1304 struct work_struct disassociate;
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index a9b852be4509..39b6f16c87fa 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -402,72 +402,6 @@ static void iwl3945_accumulative_statistics(struct iwl_priv *priv,
402} 402}
403#endif 403#endif
404 404
405/**
406 * iwl3945_good_plcp_health - checks for plcp error.
407 *
408 * When the plcp error is exceeding the thresholds, reset the radio
409 * to improve the throughput.
410 */
411static bool iwl3945_good_plcp_health(struct iwl_priv *priv,
412 struct iwl_rx_packet *pkt)
413{
414 bool rc = true;
415 struct iwl3945_notif_statistics current_stat;
416 int combined_plcp_delta;
417 unsigned int plcp_msec;
418 unsigned long plcp_received_jiffies;
419
420 if (priv->cfg->base_params->plcp_delta_threshold ==
421 IWL_MAX_PLCP_ERR_THRESHOLD_DISABLE) {
422 IWL_DEBUG_RADIO(priv, "plcp_err check disabled\n");
423 return rc;
424 }
425 memcpy(&current_stat, pkt->u.raw, sizeof(struct
426 iwl3945_notif_statistics));
427 /*
428 * check for plcp_err and trigger radio reset if it exceeds
429 * the plcp error threshold plcp_delta.
430 */
431 plcp_received_jiffies = jiffies;
432 plcp_msec = jiffies_to_msecs((long) plcp_received_jiffies -
433 (long) priv->plcp_jiffies);
434 priv->plcp_jiffies = plcp_received_jiffies;
435 /*
436 * check to make sure plcp_msec is not 0 to prevent division
437 * by zero.
438 */
439 if (plcp_msec) {
440 combined_plcp_delta =
441 (le32_to_cpu(current_stat.rx.ofdm.plcp_err) -
442 le32_to_cpu(priv->_3945.statistics.rx.ofdm.plcp_err));
443
444 if ((combined_plcp_delta > 0) &&
445 ((combined_plcp_delta * 100) / plcp_msec) >
446 priv->cfg->base_params->plcp_delta_threshold) {
447 /*
448 * if plcp_err exceed the threshold, the following
449 * data is printed in csv format:
450 * Text: plcp_err exceeded %d,
451 * Received ofdm.plcp_err,
452 * Current ofdm.plcp_err,
453 * combined_plcp_delta,
454 * plcp_msec
455 */
456 IWL_DEBUG_RADIO(priv, "plcp_err exceeded %u, "
457 "%u, %d, %u mSecs\n",
458 priv->cfg->base_params->plcp_delta_threshold,
459 le32_to_cpu(current_stat.rx.ofdm.plcp_err),
460 combined_plcp_delta, plcp_msec);
461 /*
462 * Reset the RF radio due to the high plcp
463 * error rate
464 */
465 rc = false;
466 }
467 }
468 return rc;
469}
470
471void iwl3945_hw_rx_statistics(struct iwl_priv *priv, 405void iwl3945_hw_rx_statistics(struct iwl_priv *priv,
472 struct iwl_rx_mem_buffer *rxb) 406 struct iwl_rx_mem_buffer *rxb)
473{ 407{
@@ -2734,7 +2668,6 @@ static struct iwl_lib_ops iwl3945_lib = {
2734 .isr_ops = { 2668 .isr_ops = {
2735 .isr = iwl_isr_legacy, 2669 .isr = iwl_isr_legacy,
2736 }, 2670 },
2737 .check_plcp_health = iwl3945_good_plcp_health,
2738 2671
2739 .debugfs_ops = { 2672 .debugfs_ops = {
2740 .rx_stats_read = iwl3945_ucode_rx_stats_read, 2673 .rx_stats_read = iwl3945_ucode_rx_stats_read,
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index 79ab0a6b1386..537fb8c84e3a 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -51,7 +51,7 @@
51#include "iwl-agn-debugfs.h" 51#include "iwl-agn-debugfs.h"
52 52
53/* Highest firmware API version supported */ 53/* Highest firmware API version supported */
54#define IWL5000_UCODE_API_MAX 2 54#define IWL5000_UCODE_API_MAX 5
55#define IWL5150_UCODE_API_MAX 2 55#define IWL5150_UCODE_API_MAX 2
56 56
57/* Lowest firmware API version supported */ 57/* Lowest firmware API version supported */
diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
index 1eacba4daa5b..0494d7b102d4 100644
--- a/drivers/net/wireless/p54/p54pci.c
+++ b/drivers/net/wireless/p54/p54pci.c
@@ -199,6 +199,7 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
199 while (i != idx) { 199 while (i != idx) {
200 u16 len; 200 u16 len;
201 struct sk_buff *skb; 201 struct sk_buff *skb;
202 dma_addr_t dma_addr;
202 desc = &ring[i]; 203 desc = &ring[i];
203 len = le16_to_cpu(desc->len); 204 len = le16_to_cpu(desc->len);
204 skb = rx_buf[i]; 205 skb = rx_buf[i];
@@ -216,17 +217,20 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
216 217
217 len = priv->common.rx_mtu; 218 len = priv->common.rx_mtu;
218 } 219 }
220 dma_addr = le32_to_cpu(desc->host_addr);
221 pci_dma_sync_single_for_cpu(priv->pdev, dma_addr,
222 priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
219 skb_put(skb, len); 223 skb_put(skb, len);
220 224
221 if (p54_rx(dev, skb)) { 225 if (p54_rx(dev, skb)) {
222 pci_unmap_single(priv->pdev, 226 pci_unmap_single(priv->pdev, dma_addr,
223 le32_to_cpu(desc->host_addr), 227 priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
224 priv->common.rx_mtu + 32,
225 PCI_DMA_FROMDEVICE);
226 rx_buf[i] = NULL; 228 rx_buf[i] = NULL;
227 desc->host_addr = 0; 229 desc->host_addr = cpu_to_le32(0);
228 } else { 230 } else {
229 skb_trim(skb, 0); 231 skb_trim(skb, 0);
232 pci_dma_sync_single_for_device(priv->pdev, dma_addr,
233 priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
230 desc->len = cpu_to_le16(priv->common.rx_mtu + 32); 234 desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
231 } 235 }
232 236
diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
index 21713a7638c4..9b344a921e74 100644
--- a/drivers/net/wireless/p54/p54usb.c
+++ b/drivers/net/wireless/p54/p54usb.c
@@ -98,6 +98,7 @@ static struct usb_device_id p54u_table[] __devinitdata = {
98 {USB_DEVICE(0x1413, 0x5400)}, /* Telsey 802.11g USB2.0 Adapter */ 98 {USB_DEVICE(0x1413, 0x5400)}, /* Telsey 802.11g USB2.0 Adapter */
99 {USB_DEVICE(0x1435, 0x0427)}, /* Inventel UR054G */ 99 {USB_DEVICE(0x1435, 0x0427)}, /* Inventel UR054G */
100 {USB_DEVICE(0x1668, 0x1050)}, /* Actiontec 802UIG-1 */ 100 {USB_DEVICE(0x1668, 0x1050)}, /* Actiontec 802UIG-1 */
101 {USB_DEVICE(0x1740, 0x1000)}, /* Senao NUB-350 */
101 {USB_DEVICE(0x2001, 0x3704)}, /* DLink DWL-G122 rev A2 */ 102 {USB_DEVICE(0x2001, 0x3704)}, /* DLink DWL-G122 rev A2 */
102 {USB_DEVICE(0x2001, 0x3705)}, /* D-Link DWL-G120 rev C1 */ 103 {USB_DEVICE(0x2001, 0x3705)}, /* D-Link DWL-G120 rev C1 */
103 {USB_DEVICE(0x413c, 0x5513)}, /* Dell WLA3310 USB Wireless Adapter */ 104 {USB_DEVICE(0x413c, 0x5513)}, /* Dell WLA3310 USB Wireless Adapter */
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 848cc2cce247..518542b4bf9e 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -2597,6 +2597,9 @@ static int rndis_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
2597 __le32 mode; 2597 __le32 mode;
2598 int ret; 2598 int ret;
2599 2599
2600 if (priv->device_type != RNDIS_BCM4320B)
2601 return -ENOTSUPP;
2602
2600 netdev_dbg(usbdev->net, "%s(): %s, %d\n", __func__, 2603 netdev_dbg(usbdev->net, "%s(): %s, %d\n", __func__,
2601 enabled ? "enabled" : "disabled", 2604 enabled ? "enabled" : "disabled",
2602 timeout); 2605 timeout);
diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c
index aa97971a38af..3b3f1e45ab3e 100644
--- a/drivers/net/wireless/rt2x00/rt2800pci.c
+++ b/drivers/net/wireless/rt2x00/rt2800pci.c
@@ -652,6 +652,12 @@ static void rt2800pci_fill_rxdone(struct queue_entry *entry,
652 */ 652 */
653 rxdesc->flags |= RX_FLAG_IV_STRIPPED; 653 rxdesc->flags |= RX_FLAG_IV_STRIPPED;
654 654
655 /*
656 * The hardware has already checked the Michael Mic and has
657 * stripped it from the frame. Signal this to mac80211.
658 */
659 rxdesc->flags |= RX_FLAG_MMIC_STRIPPED;
660
655 if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS) 661 if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS)
656 rxdesc->flags |= RX_FLAG_DECRYPTED; 662 rxdesc->flags |= RX_FLAG_DECRYPTED;
657 else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC) 663 else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC)
@@ -1065,6 +1071,8 @@ static DEFINE_PCI_DEVICE_TABLE(rt2800pci_device_table) = {
1065 { PCI_DEVICE(0x1814, 0x3390), PCI_DEVICE_DATA(&rt2800pci_ops) }, 1071 { PCI_DEVICE(0x1814, 0x3390), PCI_DEVICE_DATA(&rt2800pci_ops) },
1066#endif 1072#endif
1067#ifdef CONFIG_RT2800PCI_RT35XX 1073#ifdef CONFIG_RT2800PCI_RT35XX
1074 { PCI_DEVICE(0x1432, 0x7711), PCI_DEVICE_DATA(&rt2800pci_ops) },
1075 { PCI_DEVICE(0x1432, 0x7722), PCI_DEVICE_DATA(&rt2800pci_ops) },
1068 { PCI_DEVICE(0x1814, 0x3060), PCI_DEVICE_DATA(&rt2800pci_ops) }, 1076 { PCI_DEVICE(0x1814, 0x3060), PCI_DEVICE_DATA(&rt2800pci_ops) },
1069 { PCI_DEVICE(0x1814, 0x3062), PCI_DEVICE_DATA(&rt2800pci_ops) }, 1077 { PCI_DEVICE(0x1814, 0x3062), PCI_DEVICE_DATA(&rt2800pci_ops) },
1070 { PCI_DEVICE(0x1814, 0x3562), PCI_DEVICE_DATA(&rt2800pci_ops) }, 1078 { PCI_DEVICE(0x1814, 0x3562), PCI_DEVICE_DATA(&rt2800pci_ops) },
diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index b97a4a54ff4c..197a36c05fda 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -486,6 +486,12 @@ static void rt2800usb_fill_rxdone(struct queue_entry *entry,
486 */ 486 */
487 rxdesc->flags |= RX_FLAG_IV_STRIPPED; 487 rxdesc->flags |= RX_FLAG_IV_STRIPPED;
488 488
489 /*
490 * The hardware has already checked the Michael Mic and has
491 * stripped it from the frame. Signal this to mac80211.
492 */
493 rxdesc->flags |= RX_FLAG_MMIC_STRIPPED;
494
489 if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS) 495 if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS)
490 rxdesc->flags |= RX_FLAG_DECRYPTED; 496 rxdesc->flags |= RX_FLAG_DECRYPTED;
491 else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC) 497 else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC)
diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index ffedfd492754..ea1580085347 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4 4
5menuconfig NFC_DEVICES 5menuconfig NFC_DEVICES
6 bool "NFC devices" 6 bool "Near Field Communication (NFC) devices"
7 default n 7 default n
8 ---help--- 8 ---help---
9 You'll have to say Y if your computer contains an NFC device that 9 You'll have to say Y if your computer contains an NFC device that
diff --git a/drivers/nfc/pn544.c b/drivers/nfc/pn544.c
index bae647264dd6..724f65d8f9e4 100644
--- a/drivers/nfc/pn544.c
+++ b/drivers/nfc/pn544.c
@@ -60,7 +60,7 @@ enum pn544_irq {
60struct pn544_info { 60struct pn544_info {
61 struct miscdevice miscdev; 61 struct miscdevice miscdev;
62 struct i2c_client *i2c_dev; 62 struct i2c_client *i2c_dev;
63 struct regulator_bulk_data regs[2]; 63 struct regulator_bulk_data regs[3];
64 64
65 enum pn544_state state; 65 enum pn544_state state;
66 wait_queue_head_t read_wait; 66 wait_queue_head_t read_wait;
@@ -74,6 +74,7 @@ struct pn544_info {
74 74
75static const char reg_vdd_io[] = "Vdd_IO"; 75static const char reg_vdd_io[] = "Vdd_IO";
76static const char reg_vbat[] = "VBat"; 76static const char reg_vbat[] = "VBat";
77static const char reg_vsim[] = "VSim";
77 78
78/* sysfs interface */ 79/* sysfs interface */
79static ssize_t pn544_test(struct device *dev, 80static ssize_t pn544_test(struct device *dev,
@@ -740,6 +741,7 @@ static int __devinit pn544_probe(struct i2c_client *client,
740 741
741 info->regs[0].supply = reg_vdd_io; 742 info->regs[0].supply = reg_vdd_io;
742 info->regs[1].supply = reg_vbat; 743 info->regs[1].supply = reg_vbat;
744 info->regs[2].supply = reg_vsim;
743 r = regulator_bulk_get(&client->dev, ARRAY_SIZE(info->regs), 745 r = regulator_bulk_get(&client->dev, ARRAY_SIZE(info->regs),
744 info->regs); 746 info->regs);
745 if (r < 0) 747 if (r < 0)
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 3c6e100a3ad0..d06a6374ed6c 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -69,4 +69,10 @@ config OF_MDIO
69 help 69 help
70 OpenFirmware MDIO bus (Ethernet PHY) accessors 70 OpenFirmware MDIO bus (Ethernet PHY) accessors
71 71
72config OF_PCI
73 def_tristate PCI
74 depends on PCI && (PPC || MICROBLAZE || X86)
75 help
76 OpenFirmware PCI bus accessors
77
72endmenu # OF 78endmenu # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 3ab21a0a4907..f7861ed2f287 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_OF_I2C) += of_i2c.o
9obj-$(CONFIG_OF_NET) += of_net.o 9obj-$(CONFIG_OF_NET) += of_net.o
10obj-$(CONFIG_OF_SPI) += of_spi.o 10obj-$(CONFIG_OF_SPI) += of_spi.o
11obj-$(CONFIG_OF_MDIO) += of_mdio.o 11obj-$(CONFIG_OF_MDIO) += of_mdio.o
12obj-$(CONFIG_OF_PCI) += of_pci.o
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
new file mode 100644
index 000000000000..ac1ec54e4fd5
--- /dev/null
+++ b/drivers/of/of_pci.c
@@ -0,0 +1,92 @@
1#include <linux/kernel.h>
2#include <linux/of_pci.h>
3#include <linux/of_irq.h>
4#include <asm/prom.h>
5
6/**
7 * of_irq_map_pci - Resolve the interrupt for a PCI device
8 * @pdev: the device whose interrupt is to be resolved
9 * @out_irq: structure of_irq filled by this function
10 *
11 * This function resolves the PCI interrupt for a given PCI device. If a
12 * device-node exists for a given pci_dev, it will use normal OF tree
13 * walking. If not, it will implement standard swizzling and walk up the
14 * PCI tree until an device-node is found, at which point it will finish
15 * resolving using the OF tree walking.
16 */
17int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
18{
19 struct device_node *dn, *ppnode;
20 struct pci_dev *ppdev;
21 u32 lspec;
22 __be32 lspec_be;
23 __be32 laddr[3];
24 u8 pin;
25 int rc;
26
27 /* Check if we have a device node, if yes, fallback to standard
28 * device tree parsing
29 */
30 dn = pci_device_to_OF_node(pdev);
31 if (dn) {
32 rc = of_irq_map_one(dn, 0, out_irq);
33 if (!rc)
34 return rc;
35 }
36
37 /* Ok, we don't, time to have fun. Let's start by building up an
38 * interrupt spec. we assume #interrupt-cells is 1, which is standard
39 * for PCI. If you do different, then don't use that routine.
40 */
41 rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
42 if (rc != 0)
43 return rc;
44 /* No pin, exit */
45 if (pin == 0)
46 return -ENODEV;
47
48 /* Now we walk up the PCI tree */
49 lspec = pin;
50 for (;;) {
51 /* Get the pci_dev of our parent */
52 ppdev = pdev->bus->self;
53
54 /* Ouch, it's a host bridge... */
55 if (ppdev == NULL) {
56 ppnode = pci_bus_to_OF_node(pdev->bus);
57
58 /* No node for host bridge ? give up */
59 if (ppnode == NULL)
60 return -EINVAL;
61 } else {
62 /* We found a P2P bridge, check if it has a node */
63 ppnode = pci_device_to_OF_node(ppdev);
64 }
65
66 /* Ok, we have found a parent with a device-node, hand over to
67 * the OF parsing code.
68 * We build a unit address from the linux device to be used for
69 * resolution. Note that we use the linux bus number which may
70 * not match your firmware bus numbering.
71 * Fortunately, in most cases, interrupt-map-mask doesn't
72 * include the bus number as part of the matching.
73 * You should still be careful about that though if you intend
74 * to rely on this function (you ship a firmware that doesn't
75 * create device nodes for all PCI devices).
76 */
77 if (ppnode)
78 break;
79
80 /* We can only get here if we hit a P2P bridge with no node,
81 * let's do standard swizzling and try again
82 */
83 lspec = pci_swizzle_interrupt_pin(pdev, lspec);
84 pdev = ppdev;
85 }
86
87 lspec_be = cpu_to_be32(lspec);
88 laddr[0] = cpu_to_be32((pdev->bus->number << 16) | (pdev->devfn << 8));
89 laddr[1] = laddr[2] = cpu_to_be32(0);
90 return of_irq_map_raw(ppnode, &lspec_be, 1, laddr, out_irq);
91}
92EXPORT_SYMBOL_GPL(of_irq_map_pci);
diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index 28295d0a50f6..4d87b5dc9284 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -36,19 +36,55 @@ unsigned int of_pdt_unique_id __initdata;
36 (p)->unique_id = of_pdt_unique_id++; \ 36 (p)->unique_id = of_pdt_unique_id++; \
37} while (0) 37} while (0)
38 38
39static inline const char *of_pdt_node_name(struct device_node *dp) 39static char * __init of_pdt_build_full_name(struct device_node *dp)
40{ 40{
41 return dp->path_component_name; 41 int len, ourlen, plen;
42 char *n;
43
44 dp->path_component_name = build_path_component(dp);
45
46 plen = strlen(dp->parent->full_name);
47 ourlen = strlen(dp->path_component_name);
48 len = ourlen + plen + 2;
49
50 n = prom_early_alloc(len);
51 strcpy(n, dp->parent->full_name);
52 if (!of_node_is_root(dp->parent)) {
53 strcpy(n + plen, "/");
54 plen++;
55 }
56 strcpy(n + plen, dp->path_component_name);
57
58 return n;
42} 59}
43 60
44#else 61#else /* CONFIG_SPARC */
45 62
46static inline void of_pdt_incr_unique_id(void *p) { } 63static inline void of_pdt_incr_unique_id(void *p) { }
47static inline void irq_trans_init(struct device_node *dp) { } 64static inline void irq_trans_init(struct device_node *dp) { }
48 65
49static inline const char *of_pdt_node_name(struct device_node *dp) 66static char * __init of_pdt_build_full_name(struct device_node *dp)
50{ 67{
51 return dp->name; 68 static int failsafe_id = 0; /* for generating unique names on failure */
69 char *buf;
70 int len;
71
72 if (of_pdt_prom_ops->pkg2path(dp->phandle, NULL, 0, &len))
73 goto failsafe;
74
75 buf = prom_early_alloc(len + 1);
76 if (of_pdt_prom_ops->pkg2path(dp->phandle, buf, len, &len))
77 goto failsafe;
78 return buf;
79
80 failsafe:
81 buf = prom_early_alloc(strlen(dp->parent->full_name) +
82 strlen(dp->name) + 16);
83 sprintf(buf, "%s/%s@unknown%i",
84 of_node_is_root(dp->parent) ? "" : dp->parent->full_name,
85 dp->name, failsafe_id++);
86 pr_err("%s: pkg2path failed; assigning %s\n", __func__, buf);
87 return buf;
52} 88}
53 89
54#endif /* !CONFIG_SPARC */ 90#endif /* !CONFIG_SPARC */
@@ -132,47 +168,6 @@ static char * __init of_pdt_get_one_property(phandle node, const char *name)
132 return buf; 168 return buf;
133} 169}
134 170
135static char * __init of_pdt_try_pkg2path(phandle node)
136{
137 char *res, *buf = NULL;
138 int len;
139
140 if (!of_pdt_prom_ops->pkg2path)
141 return NULL;
142
143 if (of_pdt_prom_ops->pkg2path(node, buf, 0, &len))
144 return NULL;
145 buf = prom_early_alloc(len + 1);
146 if (of_pdt_prom_ops->pkg2path(node, buf, len, &len)) {
147 pr_err("%s: package-to-path failed\n", __func__);
148 return NULL;
149 }
150
151 res = strrchr(buf, '/');
152 if (!res) {
153 pr_err("%s: couldn't find / in %s\n", __func__, buf);
154 return NULL;
155 }
156 return res+1;
157}
158
159/*
160 * When fetching the node's name, first try using package-to-path; if
161 * that fails (either because the arch hasn't supplied a PROM callback,
162 * or some other random failure), fall back to just looking at the node's
163 * 'name' property.
164 */
165static char * __init of_pdt_build_name(phandle node)
166{
167 char *buf;
168
169 buf = of_pdt_try_pkg2path(node);
170 if (!buf)
171 buf = of_pdt_get_one_property(node, "name");
172
173 return buf;
174}
175
176static struct device_node * __init of_pdt_create_node(phandle node, 171static struct device_node * __init of_pdt_create_node(phandle node,
177 struct device_node *parent) 172 struct device_node *parent)
178{ 173{
@@ -187,7 +182,7 @@ static struct device_node * __init of_pdt_create_node(phandle node,
187 182
188 kref_init(&dp->kref); 183 kref_init(&dp->kref);
189 184
190 dp->name = of_pdt_build_name(node); 185 dp->name = of_pdt_get_one_property(node, "name");
191 dp->type = of_pdt_get_one_property(node, "device_type"); 186 dp->type = of_pdt_get_one_property(node, "device_type");
192 dp->phandle = node; 187 dp->phandle = node;
193 188
@@ -198,26 +193,6 @@ static struct device_node * __init of_pdt_create_node(phandle node,
198 return dp; 193 return dp;
199} 194}
200 195
201static char * __init of_pdt_build_full_name(struct device_node *dp)
202{
203 int len, ourlen, plen;
204 char *n;
205
206 plen = strlen(dp->parent->full_name);
207 ourlen = strlen(of_pdt_node_name(dp));
208 len = ourlen + plen + 2;
209
210 n = prom_early_alloc(len);
211 strcpy(n, dp->parent->full_name);
212 if (!of_node_is_root(dp->parent)) {
213 strcpy(n + plen, "/");
214 plen++;
215 }
216 strcpy(n + plen, of_pdt_node_name(dp));
217
218 return n;
219}
220
221static struct device_node * __init of_pdt_build_tree(struct device_node *parent, 196static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
222 phandle node, 197 phandle node,
223 struct device_node ***nextp) 198 struct device_node ***nextp)
@@ -240,9 +215,6 @@ static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
240 *(*nextp) = dp; 215 *(*nextp) = dp;
241 *nextp = &dp->allnext; 216 *nextp = &dp->allnext;
242 217
243#if defined(CONFIG_SPARC)
244 dp->path_component_name = build_path_component(dp);
245#endif
246 dp->full_name = of_pdt_build_full_name(dp); 218 dp->full_name = of_pdt_build_full_name(dp);
247 219
248 dp->child = of_pdt_build_tree(dp, 220 dp->child = of_pdt_build_tree(dp,
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 88246dd46452..d86ea8b01137 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -431,7 +431,7 @@ static void pci_device_shutdown(struct device *dev)
431 pci_msix_shutdown(pci_dev); 431 pci_msix_shutdown(pci_dev);
432} 432}
433 433
434#ifdef CONFIG_PM_OPS 434#ifdef CONFIG_PM
435 435
436/* Auxiliary functions used for system resume and run-time resume. */ 436/* Auxiliary functions used for system resume and run-time resume. */
437 437
@@ -1059,7 +1059,7 @@ static int pci_pm_runtime_idle(struct device *dev)
1059 1059
1060#endif /* !CONFIG_PM_RUNTIME */ 1060#endif /* !CONFIG_PM_RUNTIME */
1061 1061
1062#ifdef CONFIG_PM_OPS 1062#ifdef CONFIG_PM
1063 1063
1064const struct dev_pm_ops pci_dev_pm_ops = { 1064const struct dev_pm_ops pci_dev_pm_ops = {
1065 .prepare = pci_pm_prepare, 1065 .prepare = pci_pm_prepare,
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 3a5a6fcc0ead..492b7d807fe8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -243,7 +243,7 @@ struct pci_ops pcifront_bus_ops = {
243 243
244#ifdef CONFIG_PCI_MSI 244#ifdef CONFIG_PCI_MSI
245static int pci_frontend_enable_msix(struct pci_dev *dev, 245static int pci_frontend_enable_msix(struct pci_dev *dev,
246 int **vector, int nvec) 246 int vector[], int nvec)
247{ 247{
248 int err; 248 int err;
249 int i; 249 int i;
@@ -277,18 +277,24 @@ static int pci_frontend_enable_msix(struct pci_dev *dev,
277 if (likely(!err)) { 277 if (likely(!err)) {
278 if (likely(!op.value)) { 278 if (likely(!op.value)) {
279 /* we get the result */ 279 /* we get the result */
280 for (i = 0; i < nvec; i++) 280 for (i = 0; i < nvec; i++) {
281 *(*vector+i) = op.msix_entries[i].vector; 281 if (op.msix_entries[i].vector <= 0) {
282 return 0; 282 dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
283 i, op.msix_entries[i].vector);
284 err = -EINVAL;
285 vector[i] = -1;
286 continue;
287 }
288 vector[i] = op.msix_entries[i].vector;
289 }
283 } else { 290 } else {
284 printk(KERN_DEBUG "enable msix get value %x\n", 291 printk(KERN_DEBUG "enable msix get value %x\n",
285 op.value); 292 op.value);
286 return op.value;
287 } 293 }
288 } else { 294 } else {
289 dev_err(&dev->dev, "enable msix get err %x\n", err); 295 dev_err(&dev->dev, "enable msix get err %x\n", err);
290 return err;
291 } 296 }
297 return err;
292} 298}
293 299
294static void pci_frontend_disable_msix(struct pci_dev *dev) 300static void pci_frontend_disable_msix(struct pci_dev *dev)
@@ -310,7 +316,7 @@ static void pci_frontend_disable_msix(struct pci_dev *dev)
310 dev_err(&dev->dev, "pci_disable_msix get err %x\n", err); 316 dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
311} 317}
312 318
313static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) 319static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
314{ 320{
315 int err; 321 int err;
316 struct xen_pci_op op = { 322 struct xen_pci_op op = {
@@ -324,7 +330,13 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
324 330
325 err = do_pci_op(pdev, &op); 331 err = do_pci_op(pdev, &op);
326 if (likely(!err)) { 332 if (likely(!err)) {
327 *(*vector) = op.value; 333 vector[0] = op.value;
334 if (op.value <= 0) {
335 dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
336 op.value);
337 err = -EINVAL;
338 vector[0] = -1;
339 }
328 } else { 340 } else {
329 dev_err(&dev->dev, "pci frontend enable msi failed for dev " 341 dev_err(&dev->dev, "pci frontend enable msi failed for dev "
330 "%x:%x\n", op.bus, op.devfn); 342 "%x:%x\n", op.bus, op.devfn);
@@ -733,8 +745,7 @@ static void free_pdev(struct pcifront_device *pdev)
733 745
734 pcifront_free_roots(pdev); 746 pcifront_free_roots(pdev);
735 747
736 /*For PCIE_AER error handling job*/ 748 cancel_work_sync(&pdev->op_work);
737 flush_scheduled_work();
738 749
739 if (pdev->irq >= 0) 750 if (pdev->irq >= 0)
740 unbind_from_irqhandler(pdev->irq, pdev); 751 unbind_from_irqhandler(pdev->irq, pdev);
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 0bdda5b3ed55..42fbf1a75576 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -518,6 +518,8 @@ int pcmcia_enable_device(struct pcmcia_device *p_dev)
518 flags |= CONF_ENABLE_IOCARD; 518 flags |= CONF_ENABLE_IOCARD;
519 if (flags & CONF_ENABLE_IOCARD) 519 if (flags & CONF_ENABLE_IOCARD)
520 s->socket.flags |= SS_IOCARD; 520 s->socket.flags |= SS_IOCARD;
521 if (flags & CONF_ENABLE_ZVCARD)
522 s->socket.flags |= SS_ZVCARD | SS_IOCARD;
521 if (flags & CONF_ENABLE_SPKR) { 523 if (flags & CONF_ENABLE_SPKR) {
522 s->socket.flags |= SS_SPKR_ENA; 524 s->socket.flags |= SS_SPKR_ENA;
523 status = CCSR_AUDIO_ENA; 525 status = CCSR_AUDIO_ENA;
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index 3755e7c8c715..2c540542b5af 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -215,7 +215,7 @@ pxa2xx_pcmcia_frequency_change(struct soc_pcmcia_socket *skt,
215} 215}
216#endif 216#endif
217 217
218static void pxa2xx_configure_sockets(struct device *dev) 218void pxa2xx_configure_sockets(struct device *dev)
219{ 219{
220 struct pcmcia_low_level *ops = dev->platform_data; 220 struct pcmcia_low_level *ops = dev->platform_data;
221 /* 221 /*
diff --git a/drivers/pcmcia/pxa2xx_base.h b/drivers/pcmcia/pxa2xx_base.h
index bb62ea87b8f9..b609b45469ed 100644
--- a/drivers/pcmcia/pxa2xx_base.h
+++ b/drivers/pcmcia/pxa2xx_base.h
@@ -1,3 +1,4 @@
1int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt); 1int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt);
2void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops); 2void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops);
3void pxa2xx_configure_sockets(struct device *dev);
3 4
diff --git a/drivers/pcmcia/pxa2xx_colibri.c b/drivers/pcmcia/pxa2xx_colibri.c
index c3f72192af66..a52039564e74 100644
--- a/drivers/pcmcia/pxa2xx_colibri.c
+++ b/drivers/pcmcia/pxa2xx_colibri.c
@@ -181,6 +181,9 @@ static int __init colibri_pcmcia_init(void)
181{ 181{
182 int ret; 182 int ret;
183 183
184 if (!machine_is_colibri() && !machine_is_colibri320())
185 return -ENODEV;
186
184 colibri_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1); 187 colibri_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1);
185 if (!colibri_pcmcia_device) 188 if (!colibri_pcmcia_device)
186 return -ENOMEM; 189 return -ENOMEM;
diff --git a/drivers/pcmcia/pxa2xx_lubbock.c b/drivers/pcmcia/pxa2xx_lubbock.c
index b9f8c8fb42bd..25afe637c657 100644
--- a/drivers/pcmcia/pxa2xx_lubbock.c
+++ b/drivers/pcmcia/pxa2xx_lubbock.c
@@ -226,6 +226,7 @@ int pcmcia_lubbock_init(struct sa1111_dev *sadev)
226 lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); 226 lubbock_set_misc_wr((1 << 15) | (1 << 14), 0);
227 227
228 pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); 228 pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops);
229 pxa2xx_configure_sockets(&sadev->dev);
229 ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, 230 ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops,
230 pxa2xx_drv_pcmcia_add_one); 231 pxa2xx_drv_pcmcia_add_one);
231 } 232 }
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index d163bc2e2b9e..a59af5b24f0a 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -227,7 +227,7 @@ config SONYPI_COMPAT
227config IDEAPAD_LAPTOP 227config IDEAPAD_LAPTOP
228 tristate "Lenovo IdeaPad Laptop Extras" 228 tristate "Lenovo IdeaPad Laptop Extras"
229 depends on ACPI 229 depends on ACPI
230 depends on RFKILL 230 depends on RFKILL && INPUT
231 select INPUT_SPARSEKMAP 231 select INPUT_SPARSEKMAP
232 help 232 help
233 This is a driver for the rfkill switches on Lenovo IdeaPad netbooks. 233 This is a driver for the rfkill switches on Lenovo IdeaPad netbooks.
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index c5c4b8c32eb8..38b34a73866a 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -84,7 +84,7 @@ MODULE_LICENSE("GPL");
84 */ 84 */
85#define AMW0_GUID1 "67C3371D-95A3-4C37-BB61-DD47B491DAAB" 85#define AMW0_GUID1 "67C3371D-95A3-4C37-BB61-DD47B491DAAB"
86#define AMW0_GUID2 "431F16ED-0C2B-444C-B267-27DEB140CF9C" 86#define AMW0_GUID2 "431F16ED-0C2B-444C-B267-27DEB140CF9C"
87#define WMID_GUID1 "6AF4F258-B401-42fd-BE91-3D4AC2D7C0D3" 87#define WMID_GUID1 "6AF4F258-B401-42FD-BE91-3D4AC2D7C0D3"
88#define WMID_GUID2 "95764E09-FB56-4e83-B31A-37761F60994A" 88#define WMID_GUID2 "95764E09-FB56-4e83-B31A-37761F60994A"
89#define WMID_GUID3 "61EF69EA-865C-4BC3-A502-A0DEBA0CB531" 89#define WMID_GUID3 "61EF69EA-865C-4BC3-A502-A0DEBA0CB531"
90 90
@@ -1280,7 +1280,7 @@ static ssize_t set_bool_threeg(struct device *dev,
1280 return -EINVAL; 1280 return -EINVAL;
1281 return count; 1281 return count;
1282} 1282}
1283static DEVICE_ATTR(threeg, S_IWUGO | S_IRUGO | S_IWUSR, show_bool_threeg, 1283static DEVICE_ATTR(threeg, S_IRUGO | S_IWUSR, show_bool_threeg,
1284 set_bool_threeg); 1284 set_bool_threeg);
1285 1285
1286static ssize_t show_interface(struct device *dev, struct device_attribute *attr, 1286static ssize_t show_interface(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c
index 4633fd8532cc..fe495939c307 100644
--- a/drivers/platform/x86/asus_acpi.c
+++ b/drivers/platform/x86/asus_acpi.c
@@ -1081,14 +1081,8 @@ static int asus_hotk_add_fs(struct acpi_device *device)
1081 struct proc_dir_entry *proc; 1081 struct proc_dir_entry *proc;
1082 mode_t mode; 1082 mode_t mode;
1083 1083
1084 /*
1085 * If parameter uid or gid is not changed, keep the default setting for
1086 * our proc entries (-rw-rw-rw-) else, it means we care about security,
1087 * and then set to -rw-rw----
1088 */
1089
1090 if ((asus_uid == 0) && (asus_gid == 0)) { 1084 if ((asus_uid == 0) && (asus_gid == 0)) {
1091 mode = S_IFREG | S_IRUGO | S_IWUGO; 1085 mode = S_IFREG | S_IRUGO | S_IWUSR | S_IWGRP;
1092 } else { 1086 } else {
1093 mode = S_IFREG | S_IRUSR | S_IRGRP | S_IWUSR | S_IWGRP; 1087 mode = S_IFREG | S_IRUSR | S_IRGRP | S_IWUSR | S_IWGRP;
1094 printk(KERN_WARNING " asus_uid and asus_gid parameters are " 1088 printk(KERN_WARNING " asus_uid and asus_gid parameters are "
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index 34657f96b5a5..ad24ef36f9f7 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -290,9 +290,12 @@ static int dell_rfkill_set(void *data, bool blocked)
290 dell_send_request(buffer, 17, 11); 290 dell_send_request(buffer, 17, 11);
291 291
292 /* If the hardware switch controls this radio, and the hardware 292 /* If the hardware switch controls this radio, and the hardware
293 switch is disabled, don't allow changing the software state */ 293 switch is disabled, don't allow changing the software state.
294 If the hardware switch is reported as not supported, always
295 fire the SMI to toggle the killswitch. */
294 if ((hwswitch_state & BIT(hwswitch_bit)) && 296 if ((hwswitch_state & BIT(hwswitch_bit)) &&
295 !(buffer->output[1] & BIT(16))) { 297 !(buffer->output[1] & BIT(16)) &&
298 (buffer->output[1] & BIT(0))) {
296 ret = -EINVAL; 299 ret = -EINVAL;
297 goto out; 300 goto out;
298 } 301 }
@@ -398,6 +401,23 @@ static const struct file_operations dell_debugfs_fops = {
398 401
399static void dell_update_rfkill(struct work_struct *ignored) 402static void dell_update_rfkill(struct work_struct *ignored)
400{ 403{
404 int status;
405
406 get_buffer();
407 dell_send_request(buffer, 17, 11);
408 status = buffer->output[1];
409 release_buffer();
410
411 /* if hardware rfkill is not supported, set it explicitly */
412 if (!(status & BIT(0))) {
413 if (wifi_rfkill)
414 dell_rfkill_set((void *)1, !((status & BIT(17)) >> 17));
415 if (bluetooth_rfkill)
416 dell_rfkill_set((void *)2, !((status & BIT(18)) >> 18));
417 if (wwan_rfkill)
418 dell_rfkill_set((void *)3, !((status & BIT(19)) >> 19));
419 }
420
401 if (wifi_rfkill) 421 if (wifi_rfkill)
402 dell_rfkill_query(wifi_rfkill, (void *)1); 422 dell_rfkill_query(wifi_rfkill, (void *)1);
403 if (bluetooth_rfkill) 423 if (bluetooth_rfkill)
diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c
index 930e62762365..61433d492862 100644
--- a/drivers/platform/x86/intel_pmic_gpio.c
+++ b/drivers/platform/x86/intel_pmic_gpio.c
@@ -60,69 +60,20 @@ enum pmic_gpio_register {
60#define GPOSW_DOU 0x08 60#define GPOSW_DOU 0x08
61#define GPOSW_RDRV 0x30 61#define GPOSW_RDRV 0x30
62 62
63#define GPIO_UPDATE_TYPE 0x80000000
63 64
64#define NUM_GPIO 24 65#define NUM_GPIO 24
65 66
66struct pmic_gpio_irq {
67 spinlock_t lock;
68 u32 trigger[NUM_GPIO];
69 u32 dirty;
70 struct work_struct work;
71};
72
73
74struct pmic_gpio { 67struct pmic_gpio {
68 struct mutex buslock;
75 struct gpio_chip chip; 69 struct gpio_chip chip;
76 struct pmic_gpio_irq irqtypes;
77 void *gpiointr; 70 void *gpiointr;
78 int irq; 71 int irq;
79 unsigned irq_base; 72 unsigned irq_base;
73 unsigned int update_type;
74 u32 trigger_type;
80}; 75};
81 76
82static void pmic_program_irqtype(int gpio, int type)
83{
84 if (type & IRQ_TYPE_EDGE_RISING)
85 intel_scu_ipc_update_register(GPIO0 + gpio, 0x20, 0x20);
86 else
87 intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x20);
88
89 if (type & IRQ_TYPE_EDGE_FALLING)
90 intel_scu_ipc_update_register(GPIO0 + gpio, 0x10, 0x10);
91 else
92 intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x10);
93};
94
95static void pmic_irqtype_work(struct work_struct *work)
96{
97 struct pmic_gpio_irq *t =
98 container_of(work, struct pmic_gpio_irq, work);
99 unsigned long flags;
100 int i;
101 u16 type;
102
103 spin_lock_irqsave(&t->lock, flags);
104 /* As we drop the lock, we may need multiple scans if we race the
105 pmic_irq_type function */
106 while (t->dirty) {
107 /*
108 * For each pin that has the dirty bit set send an IPC
109 * message to configure the hardware via the PMIC
110 */
111 for (i = 0; i < NUM_GPIO; i++) {
112 if (!(t->dirty & (1 << i)))
113 continue;
114 t->dirty &= ~(1 << i);
115 /* We can't trust the array entry or dirty
116 once the lock is dropped */
117 type = t->trigger[i];
118 spin_unlock_irqrestore(&t->lock, flags);
119 pmic_program_irqtype(i, type);
120 spin_lock_irqsave(&t->lock, flags);
121 }
122 }
123 spin_unlock_irqrestore(&t->lock, flags);
124}
125
126static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset) 77static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
127{ 78{
128 if (offset > 8) { 79 if (offset > 8) {
@@ -190,25 +141,24 @@ static void pmic_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
190 1 << (offset - 16)); 141 1 << (offset - 16));
191} 142}
192 143
193static int pmic_irq_type(unsigned irq, unsigned type) 144/*
145 * This is called from genirq with pg->buslock locked and
146 * irq_desc->lock held. We can not access the scu bus here, so we
147 * store the change and update in the bus_sync_unlock() function below
148 */
149static int pmic_irq_type(struct irq_data *data, unsigned type)
194{ 150{
195 struct pmic_gpio *pg = get_irq_chip_data(irq); 151 struct pmic_gpio *pg = irq_data_get_irq_chip_data(data);
196 u32 gpio = irq - pg->irq_base; 152 u32 gpio = data->irq - pg->irq_base;
197 unsigned long flags;
198 153
199 if (gpio >= pg->chip.ngpio) 154 if (gpio >= pg->chip.ngpio)
200 return -EINVAL; 155 return -EINVAL;
201 156
202 spin_lock_irqsave(&pg->irqtypes.lock, flags); 157 pg->trigger_type = type;
203 pg->irqtypes.trigger[gpio] = type; 158 pg->update_type = gpio | GPIO_UPDATE_TYPE;
204 pg->irqtypes.dirty |= (1 << gpio);
205 spin_unlock_irqrestore(&pg->irqtypes.lock, flags);
206 schedule_work(&pg->irqtypes.work);
207 return 0; 159 return 0;
208} 160}
209 161
210
211
212static int pmic_gpio_to_irq(struct gpio_chip *chip, unsigned offset) 162static int pmic_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
213{ 163{
214 struct pmic_gpio *pg = container_of(chip, struct pmic_gpio, chip); 164 struct pmic_gpio *pg = container_of(chip, struct pmic_gpio, chip);
@@ -217,38 +167,32 @@ static int pmic_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
217} 167}
218 168
219/* the gpiointr register is read-clear, so just do nothing. */ 169/* the gpiointr register is read-clear, so just do nothing. */
220static void pmic_irq_unmask(unsigned irq) 170static void pmic_irq_unmask(struct irq_data *data) { }
221{
222};
223 171
224static void pmic_irq_mask(unsigned irq) 172static void pmic_irq_mask(struct irq_data *data) { }
225{
226};
227 173
228static struct irq_chip pmic_irqchip = { 174static struct irq_chip pmic_irqchip = {
229 .name = "PMIC-GPIO", 175 .name = "PMIC-GPIO",
230 .mask = pmic_irq_mask, 176 .irq_mask = pmic_irq_mask,
231 .unmask = pmic_irq_unmask, 177 .irq_unmask = pmic_irq_unmask,
232 .set_type = pmic_irq_type, 178 .irq_set_type = pmic_irq_type,
233}; 179};
234 180
235static void pmic_irq_handler(unsigned irq, struct irq_desc *desc) 181static irqreturn_t pmic_irq_handler(int irq, void *data)
236{ 182{
237 struct pmic_gpio *pg = (struct pmic_gpio *)get_irq_data(irq); 183 struct pmic_gpio *pg = data;
238 u8 intsts = *((u8 *)pg->gpiointr + 4); 184 u8 intsts = *((u8 *)pg->gpiointr + 4);
239 int gpio; 185 int gpio;
186 irqreturn_t ret = IRQ_NONE;
240 187
241 for (gpio = 0; gpio < 8; gpio++) { 188 for (gpio = 0; gpio < 8; gpio++) {
242 if (intsts & (1 << gpio)) { 189 if (intsts & (1 << gpio)) {
243 pr_debug("pmic pin %d triggered\n", gpio); 190 pr_debug("pmic pin %d triggered\n", gpio);
244 generic_handle_irq(pg->irq_base + gpio); 191 generic_handle_irq(pg->irq_base + gpio);
192 ret = IRQ_HANDLED;
245 } 193 }
246 } 194 }
247 195 return ret;
248 if (desc->chip->irq_eoi)
249 desc->chip->irq_eoi(irq_get_irq_data(irq));
250 else
251 dev_warn(pg->chip.dev, "missing EOI handler for irq %d\n", irq);
252} 196}
253 197
254static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev) 198static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev)
@@ -297,8 +241,7 @@ static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev)
297 pg->chip.can_sleep = 1; 241 pg->chip.can_sleep = 1;
298 pg->chip.dev = dev; 242 pg->chip.dev = dev;
299 243
300 INIT_WORK(&pg->irqtypes.work, pmic_irqtype_work); 244 mutex_init(&pg->buslock);
301 spin_lock_init(&pg->irqtypes.lock);
302 245
303 pg->chip.dev = dev; 246 pg->chip.dev = dev;
304 retval = gpiochip_add(&pg->chip); 247 retval = gpiochip_add(&pg->chip);
@@ -306,8 +249,13 @@ static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev)
306 printk(KERN_ERR "%s: Can not add pmic gpio chip.\n", __func__); 249 printk(KERN_ERR "%s: Can not add pmic gpio chip.\n", __func__);
307 goto err; 250 goto err;
308 } 251 }
309 set_irq_data(pg->irq, pg); 252
310 set_irq_chained_handler(pg->irq, pmic_irq_handler); 253 retval = request_irq(pg->irq, pmic_irq_handler, 0, "pmic", pg);
254 if (retval) {
255 printk(KERN_WARNING "pmic: Interrupt request failed\n");
256 goto err;
257 }
258
311 for (i = 0; i < 8; i++) { 259 for (i = 0; i < 8; i++) {
312 set_irq_chip_and_handler_name(i + pg->irq_base, &pmic_irqchip, 260 set_irq_chip_and_handler_name(i + pg->irq_base, &pmic_irqchip,
313 handle_simple_irq, "demux"); 261 handle_simple_irq, "demux");
diff --git a/drivers/platform/x86/tc1100-wmi.c b/drivers/platform/x86/tc1100-wmi.c
index 1fe0f1feff71..865ef78d6f1a 100644
--- a/drivers/platform/x86/tc1100-wmi.c
+++ b/drivers/platform/x86/tc1100-wmi.c
@@ -162,7 +162,7 @@ set_bool_##value(struct device *dev, struct device_attribute *attr, \
162 return -EINVAL; \ 162 return -EINVAL; \
163 return count; \ 163 return count; \
164} \ 164} \
165static DEVICE_ATTR(value, S_IWUGO | S_IRUGO | S_IWUSR, \ 165static DEVICE_ATTR(value, S_IRUGO | S_IWUSR, \
166 show_bool_##value, set_bool_##value); 166 show_bool_##value, set_bool_##value);
167 167
168show_set_bool(wireless, TC1100_INSTANCE_WIRELESS); 168show_set_bool(wireless, TC1100_INSTANCE_WIRELESS);
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index dd599585c6a9..eb9922385ef8 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2275,16 +2275,12 @@ static void tpacpi_input_send_key(const unsigned int scancode)
2275 if (keycode != KEY_RESERVED) { 2275 if (keycode != KEY_RESERVED) {
2276 mutex_lock(&tpacpi_inputdev_send_mutex); 2276 mutex_lock(&tpacpi_inputdev_send_mutex);
2277 2277
2278 input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN, scancode);
2278 input_report_key(tpacpi_inputdev, keycode, 1); 2279 input_report_key(tpacpi_inputdev, keycode, 1);
2279 if (keycode == KEY_UNKNOWN)
2280 input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN,
2281 scancode);
2282 input_sync(tpacpi_inputdev); 2280 input_sync(tpacpi_inputdev);
2283 2281
2282 input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN, scancode);
2284 input_report_key(tpacpi_inputdev, keycode, 0); 2283 input_report_key(tpacpi_inputdev, keycode, 0);
2285 if (keycode == KEY_UNKNOWN)
2286 input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN,
2287 scancode);
2288 input_sync(tpacpi_inputdev); 2284 input_sync(tpacpi_inputdev);
2289 2285
2290 mutex_unlock(&tpacpi_inputdev_send_mutex); 2286 mutex_unlock(&tpacpi_inputdev_send_mutex);
diff --git a/drivers/pps/generators/Kconfig b/drivers/pps/generators/Kconfig
index f3a73dd77660..e4c4f3dc0728 100644
--- a/drivers/pps/generators/Kconfig
+++ b/drivers/pps/generators/Kconfig
@@ -6,7 +6,7 @@ comment "PPS generators support"
6 6
7config PPS_GENERATOR_PARPORT 7config PPS_GENERATOR_PARPORT
8 tristate "Parallel port PPS signal generator" 8 tristate "Parallel port PPS signal generator"
9 depends on PARPORT 9 depends on PARPORT && BROKEN
10 help 10 help
11 If you say yes here you get support for a PPS signal generator which 11 If you say yes here you get support for a PPS signal generator which
12 utilizes STROBE pin of a parallel port to send PPS signals. It uses 12 utilizes STROBE pin of a parallel port to send PPS signals. It uses
diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c
index cba1b43f7519..a4e8eb9fece6 100644
--- a/drivers/pps/kapi.c
+++ b/drivers/pps/kapi.c
@@ -168,7 +168,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event,
168{ 168{
169 unsigned long flags; 169 unsigned long flags;
170 int captured = 0; 170 int captured = 0;
171 struct pps_ktime ts_real; 171 struct pps_ktime ts_real = { .sec = 0, .nsec = 0, .flags = 0 };
172 172
173 /* check event type */ 173 /* check event type */
174 BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0); 174 BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0);
diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index 76b41853a877..1269fbd2deca 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -77,9 +77,9 @@ rio_read_config(struct file *filp, struct kobject *kobj,
77 77
78 /* Several chips lock up trying to read undefined config space */ 78 /* Several chips lock up trying to read undefined config space */
79 if (capable(CAP_SYS_ADMIN)) 79 if (capable(CAP_SYS_ADMIN))
80 size = 0x200000; 80 size = RIO_MAINT_SPACE_SZ;
81 81
82 if (off > size) 82 if (off >= size)
83 return 0; 83 return 0;
84 if (off + count > size) { 84 if (off + count > size) {
85 size -= off; 85 size -= off;
@@ -147,10 +147,10 @@ rio_write_config(struct file *filp, struct kobject *kobj,
147 loff_t init_off = off; 147 loff_t init_off = off;
148 u8 *data = (u8 *) buf; 148 u8 *data = (u8 *) buf;
149 149
150 if (off > 0x200000) 150 if (off >= RIO_MAINT_SPACE_SZ)
151 return 0; 151 return 0;
152 if (off + count > 0x200000) { 152 if (off + count > RIO_MAINT_SPACE_SZ) {
153 size = 0x200000 - off; 153 size = RIO_MAINT_SPACE_SZ - off;
154 count = size; 154 count = size;
155 } 155 }
156 156
@@ -200,7 +200,7 @@ static struct bin_attribute rio_config_attr = {
200 .name = "config", 200 .name = "config",
201 .mode = S_IRUGO | S_IWUSR, 201 .mode = S_IRUGO | S_IWUSR,
202 }, 202 },
203 .size = 0x200000, 203 .size = RIO_MAINT_SPACE_SZ,
204 .read = rio_read_config, 204 .read = rio_read_config,
205 .write = rio_write_config, 205 .write = rio_write_config,
206}; 206};
diff --git a/drivers/regulator/mc13xxx-regulator-core.c b/drivers/regulator/mc13xxx-regulator-core.c
index f53d31b950d4..2bb5de1f2421 100644
--- a/drivers/regulator/mc13xxx-regulator-core.c
+++ b/drivers/regulator/mc13xxx-regulator-core.c
@@ -174,7 +174,7 @@ static int mc13xxx_regulator_get_voltage(struct regulator_dev *rdev)
174 174
175 dev_dbg(rdev_get_dev(rdev), "%s id: %d val: %d\n", __func__, id, val); 175 dev_dbg(rdev_get_dev(rdev), "%s id: %d val: %d\n", __func__, id, val);
176 176
177 BUG_ON(val < 0 || val > mc13xxx_regulators[id].desc.n_voltages); 177 BUG_ON(val > mc13xxx_regulators[id].desc.n_voltages);
178 178
179 return mc13xxx_regulators[id].voltages[val]; 179 return mc13xxx_regulators[id].voltages[val];
180} 180}
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index 8b0d2c4bde91..06df898842c0 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -120,6 +120,7 @@ static unsigned int wm831x_dcdc_get_mode(struct regulator_dev *rdev)
120 return REGULATOR_MODE_IDLE; 120 return REGULATOR_MODE_IDLE;
121 default: 121 default:
122 BUG(); 122 BUG();
123 return -EINVAL;
123 } 124 }
124} 125}
125 126
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index cdd97192dc69..4941cade319f 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -97,6 +97,18 @@ config RTC_INTF_DEV
97 97
98 If unsure, say Y. 98 If unsure, say Y.
99 99
100config RTC_INTF_DEV_UIE_EMUL
101 bool "RTC UIE emulation on dev interface"
102 depends on RTC_INTF_DEV
103 help
104 Provides an emulation for RTC_UIE if the underlying rtc chip
105 driver does not expose RTC_UIE ioctls. Those requests generate
106 once-per-second update interrupts, used for synchronization.
107
108 The emulation code will read the time from the hardware
109 clock several times per second, please enable this option
110 only if you know that you really need it.
111
100config RTC_DRV_TEST 112config RTC_DRV_TEST
101 tristate "Test driver/device" 113 tristate "Test driver/device"
102 help 114 help
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index c404b61386bf..09b4437b3e61 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -117,6 +117,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
117 struct module *owner) 117 struct module *owner)
118{ 118{
119 struct rtc_device *rtc; 119 struct rtc_device *rtc;
120 struct rtc_wkalrm alrm;
120 int id, err; 121 int id, err;
121 122
122 if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) { 123 if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) {
@@ -166,6 +167,12 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
166 rtc->pie_timer.function = rtc_pie_update_irq; 167 rtc->pie_timer.function = rtc_pie_update_irq;
167 rtc->pie_enabled = 0; 168 rtc->pie_enabled = 0;
168 169
170 /* Check to see if there is an ALARM already set in hw */
171 err = __rtc_read_alarm(rtc, &alrm);
172
173 if (!err && !rtc_valid_tm(&alrm.time))
174 rtc_set_alarm(rtc, &alrm);
175
169 strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE); 176 strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
170 dev_set_name(&rtc->dev, "rtc%d", id); 177 dev_set_name(&rtc->dev, "rtc%d", id);
171 178
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index a0c01967244d..8ec6b069a7f5 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -116,6 +116,186 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
116} 116}
117EXPORT_SYMBOL_GPL(rtc_set_mmss); 117EXPORT_SYMBOL_GPL(rtc_set_mmss);
118 118
119static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
120{
121 int err;
122
123 err = mutex_lock_interruptible(&rtc->ops_lock);
124 if (err)
125 return err;
126
127 if (rtc->ops == NULL)
128 err = -ENODEV;
129 else if (!rtc->ops->read_alarm)
130 err = -EINVAL;
131 else {
132 memset(alarm, 0, sizeof(struct rtc_wkalrm));
133 err = rtc->ops->read_alarm(rtc->dev.parent, alarm);
134 }
135
136 mutex_unlock(&rtc->ops_lock);
137 return err;
138}
139
140int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
141{
142 int err;
143 struct rtc_time before, now;
144 int first_time = 1;
145 unsigned long t_now, t_alm;
146 enum { none, day, month, year } missing = none;
147 unsigned days;
148
149 /* The lower level RTC driver may return -1 in some fields,
150 * creating invalid alarm->time values, for reasons like:
151 *
152 * - The hardware may not be capable of filling them in;
153 * many alarms match only on time-of-day fields, not
154 * day/month/year calendar data.
155 *
156 * - Some hardware uses illegal values as "wildcard" match
157 * values, which non-Linux firmware (like a BIOS) may try
158 * to set up as e.g. "alarm 15 minutes after each hour".
159 * Linux uses only oneshot alarms.
160 *
161 * When we see that here, we deal with it by using values from
162 * a current RTC timestamp for any missing (-1) values. The
163 * RTC driver prevents "periodic alarm" modes.
164 *
165 * But this can be racey, because some fields of the RTC timestamp
166 * may have wrapped in the interval since we read the RTC alarm,
167 * which would lead to us inserting inconsistent values in place
168 * of the -1 fields.
169 *
170 * Reading the alarm and timestamp in the reverse sequence
171 * would have the same race condition, and not solve the issue.
172 *
173 * So, we must first read the RTC timestamp,
174 * then read the RTC alarm value,
175 * and then read a second RTC timestamp.
176 *
177 * If any fields of the second timestamp have changed
178 * when compared with the first timestamp, then we know
179 * our timestamp may be inconsistent with that used by
180 * the low-level rtc_read_alarm_internal() function.
181 *
182 * So, when the two timestamps disagree, we just loop and do
183 * the process again to get a fully consistent set of values.
184 *
185 * This could all instead be done in the lower level driver,
186 * but since more than one lower level RTC implementation needs it,
187 * then it's probably best best to do it here instead of there..
188 */
189
190 /* Get the "before" timestamp */
191 err = rtc_read_time(rtc, &before);
192 if (err < 0)
193 return err;
194 do {
195 if (!first_time)
196 memcpy(&before, &now, sizeof(struct rtc_time));
197 first_time = 0;
198
199 /* get the RTC alarm values, which may be incomplete */
200 err = rtc_read_alarm_internal(rtc, alarm);
201 if (err)
202 return err;
203
204 /* full-function RTCs won't have such missing fields */
205 if (rtc_valid_tm(&alarm->time) == 0)
206 return 0;
207
208 /* get the "after" timestamp, to detect wrapped fields */
209 err = rtc_read_time(rtc, &now);
210 if (err < 0)
211 return err;
212
213 /* note that tm_sec is a "don't care" value here: */
214 } while ( before.tm_min != now.tm_min
215 || before.tm_hour != now.tm_hour
216 || before.tm_mon != now.tm_mon
217 || before.tm_year != now.tm_year);
218
219 /* Fill in the missing alarm fields using the timestamp; we
220 * know there's at least one since alarm->time is invalid.
221 */
222 if (alarm->time.tm_sec == -1)
223 alarm->time.tm_sec = now.tm_sec;
224 if (alarm->time.tm_min == -1)
225 alarm->time.tm_min = now.tm_min;
226 if (alarm->time.tm_hour == -1)
227 alarm->time.tm_hour = now.tm_hour;
228
229 /* For simplicity, only support date rollover for now */
230 if (alarm->time.tm_mday == -1) {
231 alarm->time.tm_mday = now.tm_mday;
232 missing = day;
233 }
234 if (alarm->time.tm_mon == -1) {
235 alarm->time.tm_mon = now.tm_mon;
236 if (missing == none)
237 missing = month;
238 }
239 if (alarm->time.tm_year == -1) {
240 alarm->time.tm_year = now.tm_year;
241 if (missing == none)
242 missing = year;
243 }
244
245 /* with luck, no rollover is needed */
246 rtc_tm_to_time(&now, &t_now);
247 rtc_tm_to_time(&alarm->time, &t_alm);
248 if (t_now < t_alm)
249 goto done;
250
251 switch (missing) {
252
253 /* 24 hour rollover ... if it's now 10am Monday, an alarm that
254 * that will trigger at 5am will do so at 5am Tuesday, which
255 * could also be in the next month or year. This is a common
256 * case, especially for PCs.
257 */
258 case day:
259 dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day");
260 t_alm += 24 * 60 * 60;
261 rtc_time_to_tm(t_alm, &alarm->time);
262 break;
263
264 /* Month rollover ... if it's the 31th, an alarm on the 3rd will
265 * be next month. An alarm matching on the 30th, 29th, or 28th
266 * may end up in the month after that! Many newer PCs support
267 * this type of alarm.
268 */
269 case month:
270 dev_dbg(&rtc->dev, "alarm rollover: %s\n", "month");
271 do {
272 if (alarm->time.tm_mon < 11)
273 alarm->time.tm_mon++;
274 else {
275 alarm->time.tm_mon = 0;
276 alarm->time.tm_year++;
277 }
278 days = rtc_month_days(alarm->time.tm_mon,
279 alarm->time.tm_year);
280 } while (days < alarm->time.tm_mday);
281 break;
282
283 /* Year rollover ... easy except for leap years! */
284 case year:
285 dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year");
286 do {
287 alarm->time.tm_year++;
288 } while (rtc_valid_tm(&alarm->time) != 0);
289 break;
290
291 default:
292 dev_warn(&rtc->dev, "alarm rollover not handled\n");
293 }
294
295done:
296 return 0;
297}
298
119int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) 299int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
120{ 300{
121 int err; 301 int err;
@@ -209,9 +389,8 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
209 } 389 }
210 390
211 if (err) 391 if (err)
212 return err; 392 /* nothing */;
213 393 else if (!rtc->ops)
214 if (!rtc->ops)
215 err = -ENODEV; 394 err = -ENODEV;
216 else if (!rtc->ops->alarm_irq_enable) 395 else if (!rtc->ops->alarm_irq_enable)
217 err = -EINVAL; 396 err = -EINVAL;
@@ -229,6 +408,12 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled)
229 if (err) 408 if (err)
230 return err; 409 return err;
231 410
411#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
412 if (enabled == 0 && rtc->uie_irq_active) {
413 mutex_unlock(&rtc->ops_lock);
414 return rtc_dev_update_irq_enable_emul(rtc, 0);
415 }
416#endif
232 /* make sure we're changing state */ 417 /* make sure we're changing state */
233 if (rtc->uie_rtctimer.enabled == enabled) 418 if (rtc->uie_rtctimer.enabled == enabled)
234 goto out; 419 goto out;
@@ -248,6 +433,16 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled)
248 433
249out: 434out:
250 mutex_unlock(&rtc->ops_lock); 435 mutex_unlock(&rtc->ops_lock);
436#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
437 /*
438 * Enable emulation if the driver did not provide
439 * the update_irq_enable function pointer or if returned
440 * -EINVAL to signal that it has been configured without
441 * interrupts or that are not available at the moment.
442 */
443 if (err == -EINVAL)
444 err = rtc_dev_update_irq_enable_emul(rtc, enabled);
445#endif
251 return err; 446 return err;
252 447
253} 448}
@@ -263,7 +458,7 @@ EXPORT_SYMBOL_GPL(rtc_update_irq_enable);
263 * 458 *
264 * Triggers the registered irq_task function callback. 459 * Triggers the registered irq_task function callback.
265 */ 460 */
266static void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode) 461void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode)
267{ 462{
268 unsigned long flags; 463 unsigned long flags;
269 464
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 26d1cf5d19ae..518a76ec71ca 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -183,33 +183,6 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
183 return 0; 183 return 0;
184} 184}
185 185
186/*
187 * Handle commands from user-space
188 */
189static int at91_rtc_ioctl(struct device *dev, unsigned int cmd,
190 unsigned long arg)
191{
192 int ret = 0;
193
194 pr_debug("%s(): cmd=%08x, arg=%08lx.\n", __func__, cmd, arg);
195
196 /* important: scrub old status before enabling IRQs */
197 switch (cmd) {
198 case RTC_UIE_OFF: /* update off */
199 at91_sys_write(AT91_RTC_IDR, AT91_RTC_SECEV);
200 break;
201 case RTC_UIE_ON: /* update on */
202 at91_sys_write(AT91_RTC_SCCR, AT91_RTC_SECEV);
203 at91_sys_write(AT91_RTC_IER, AT91_RTC_SECEV);
204 break;
205 default:
206 ret = -ENOIOCTLCMD;
207 break;
208 }
209
210 return ret;
211}
212
213static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 186static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
214{ 187{
215 pr_debug("%s(): cmd=%08x\n", __func__, enabled); 188 pr_debug("%s(): cmd=%08x\n", __func__, enabled);
@@ -269,7 +242,6 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id)
269} 242}
270 243
271static const struct rtc_class_ops at91_rtc_ops = { 244static const struct rtc_class_ops at91_rtc_ops = {
272 .ioctl = at91_rtc_ioctl,
273 .read_time = at91_rtc_readtime, 245 .read_time = at91_rtc_readtime,
274 .set_time = at91_rtc_settime, 246 .set_time = at91_rtc_settime,
275 .read_alarm = at91_rtc_readalarm, 247 .read_alarm = at91_rtc_readalarm,
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index c36749e4c926..a3ad957507dc 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -216,33 +216,6 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
216 return 0; 216 return 0;
217} 217}
218 218
219/*
220 * Handle commands from user-space
221 */
222static int at91_rtc_ioctl(struct device *dev, unsigned int cmd,
223 unsigned long arg)
224{
225 struct sam9_rtc *rtc = dev_get_drvdata(dev);
226 int ret = 0;
227 u32 mr = rtt_readl(rtc, MR);
228
229 dev_dbg(dev, "ioctl: cmd=%08x, arg=%08lx, mr %08x\n", cmd, arg, mr);
230
231 switch (cmd) {
232 case RTC_UIE_OFF: /* update off */
233 rtt_writel(rtc, MR, mr & ~AT91_RTT_RTTINCIEN);
234 break;
235 case RTC_UIE_ON: /* update on */
236 rtt_writel(rtc, MR, mr | AT91_RTT_RTTINCIEN);
237 break;
238 default:
239 ret = -ENOIOCTLCMD;
240 break;
241 }
242
243 return ret;
244}
245
246static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 219static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
247{ 220{
248 struct sam9_rtc *rtc = dev_get_drvdata(dev); 221 struct sam9_rtc *rtc = dev_get_drvdata(dev);
@@ -303,13 +276,12 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *_rtc)
303} 276}
304 277
305static const struct rtc_class_ops at91_rtc_ops = { 278static const struct rtc_class_ops at91_rtc_ops = {
306 .ioctl = at91_rtc_ioctl,
307 .read_time = at91_rtc_readtime, 279 .read_time = at91_rtc_readtime,
308 .set_time = at91_rtc_settime, 280 .set_time = at91_rtc_settime,
309 .read_alarm = at91_rtc_readalarm, 281 .read_alarm = at91_rtc_readalarm,
310 .set_alarm = at91_rtc_setalarm, 282 .set_alarm = at91_rtc_setalarm,
311 .proc = at91_rtc_proc, 283 .proc = at91_rtc_proc,
312 .alarm_irq_enabled = at91_rtc_alarm_irq_enable, 284 .alarm_irq_enable = at91_rtc_alarm_irq_enable,
313}; 285};
314 286
315/* 287/*
diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index 17971d93354d..ca9cff85ab8a 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -240,32 +240,6 @@ static void bfin_rtc_int_set_alarm(struct bfin_rtc *rtc)
240 */ 240 */
241 bfin_rtc_int_set(rtc->rtc_alarm.tm_yday == -1 ? RTC_ISTAT_ALARM : RTC_ISTAT_ALARM_DAY); 241 bfin_rtc_int_set(rtc->rtc_alarm.tm_yday == -1 ? RTC_ISTAT_ALARM : RTC_ISTAT_ALARM_DAY);
242} 242}
243static int bfin_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
244{
245 struct bfin_rtc *rtc = dev_get_drvdata(dev);
246 int ret = 0;
247
248 dev_dbg_stamp(dev);
249
250 bfin_rtc_sync_pending(dev);
251
252 switch (cmd) {
253 case RTC_UIE_ON:
254 dev_dbg_stamp(dev);
255 bfin_rtc_int_set(RTC_ISTAT_SEC);
256 break;
257 case RTC_UIE_OFF:
258 dev_dbg_stamp(dev);
259 bfin_rtc_int_clear(~RTC_ISTAT_SEC);
260 break;
261
262 default:
263 dev_dbg_stamp(dev);
264 ret = -ENOIOCTLCMD;
265 }
266
267 return ret;
268}
269 243
270static int bfin_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 244static int bfin_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
271{ 245{
@@ -358,7 +332,6 @@ static int bfin_rtc_proc(struct device *dev, struct seq_file *seq)
358} 332}
359 333
360static struct rtc_class_ops bfin_rtc_ops = { 334static struct rtc_class_ops bfin_rtc_ops = {
361 .ioctl = bfin_rtc_ioctl,
362 .read_time = bfin_rtc_read_time, 335 .read_time = bfin_rtc_read_time,
363 .set_time = bfin_rtc_set_time, 336 .set_time = bfin_rtc_set_time,
364 .read_alarm = bfin_rtc_read_alarm, 337 .read_alarm = bfin_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index c7ff8df347e7..911e75cdc125 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -37,6 +37,8 @@
37#include <linux/mod_devicetable.h> 37#include <linux/mod_devicetable.h>
38#include <linux/log2.h> 38#include <linux/log2.h>
39#include <linux/pm.h> 39#include <linux/pm.h>
40#include <linux/of.h>
41#include <linux/of_platform.h>
40 42
41/* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */ 43/* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
42#include <asm-generic/rtc.h> 44#include <asm-generic/rtc.h>
@@ -375,50 +377,6 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
375 return 0; 377 return 0;
376} 378}
377 379
378static int cmos_irq_set_freq(struct device *dev, int freq)
379{
380 struct cmos_rtc *cmos = dev_get_drvdata(dev);
381 int f;
382 unsigned long flags;
383
384 if (!is_valid_irq(cmos->irq))
385 return -ENXIO;
386
387 if (!is_power_of_2(freq))
388 return -EINVAL;
389 /* 0 = no irqs; 1 = 2^15 Hz ... 15 = 2^0 Hz */
390 f = ffs(freq);
391 if (f-- > 16)
392 return -EINVAL;
393 f = 16 - f;
394
395 spin_lock_irqsave(&rtc_lock, flags);
396 hpet_set_periodic_freq(freq);
397 CMOS_WRITE(RTC_REF_CLCK_32KHZ | f, RTC_FREQ_SELECT);
398 spin_unlock_irqrestore(&rtc_lock, flags);
399
400 return 0;
401}
402
403static int cmos_irq_set_state(struct device *dev, int enabled)
404{
405 struct cmos_rtc *cmos = dev_get_drvdata(dev);
406 unsigned long flags;
407
408 if (!is_valid_irq(cmos->irq))
409 return -ENXIO;
410
411 spin_lock_irqsave(&rtc_lock, flags);
412
413 if (enabled)
414 cmos_irq_enable(cmos, RTC_PIE);
415 else
416 cmos_irq_disable(cmos, RTC_PIE);
417
418 spin_unlock_irqrestore(&rtc_lock, flags);
419 return 0;
420}
421
422static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled) 380static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
423{ 381{
424 struct cmos_rtc *cmos = dev_get_drvdata(dev); 382 struct cmos_rtc *cmos = dev_get_drvdata(dev);
@@ -438,25 +396,6 @@ static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
438 return 0; 396 return 0;
439} 397}
440 398
441static int cmos_update_irq_enable(struct device *dev, unsigned int enabled)
442{
443 struct cmos_rtc *cmos = dev_get_drvdata(dev);
444 unsigned long flags;
445
446 if (!is_valid_irq(cmos->irq))
447 return -EINVAL;
448
449 spin_lock_irqsave(&rtc_lock, flags);
450
451 if (enabled)
452 cmos_irq_enable(cmos, RTC_UIE);
453 else
454 cmos_irq_disable(cmos, RTC_UIE);
455
456 spin_unlock_irqrestore(&rtc_lock, flags);
457 return 0;
458}
459
460#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE) 399#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
461 400
462static int cmos_procfs(struct device *dev, struct seq_file *seq) 401static int cmos_procfs(struct device *dev, struct seq_file *seq)
@@ -501,10 +440,7 @@ static const struct rtc_class_ops cmos_rtc_ops = {
501 .read_alarm = cmos_read_alarm, 440 .read_alarm = cmos_read_alarm,
502 .set_alarm = cmos_set_alarm, 441 .set_alarm = cmos_set_alarm,
503 .proc = cmos_procfs, 442 .proc = cmos_procfs,
504 .irq_set_freq = cmos_irq_set_freq,
505 .irq_set_state = cmos_irq_set_state,
506 .alarm_irq_enable = cmos_alarm_irq_enable, 443 .alarm_irq_enable = cmos_alarm_irq_enable,
507 .update_irq_enable = cmos_update_irq_enable,
508}; 444};
509 445
510/*----------------------------------------------------------------*/ 446/*----------------------------------------------------------------*/
@@ -1123,6 +1059,47 @@ static struct pnp_driver cmos_pnp_driver = {
1123 1059
1124#endif /* CONFIG_PNP */ 1060#endif /* CONFIG_PNP */
1125 1061
1062#ifdef CONFIG_OF
1063static const struct of_device_id of_cmos_match[] = {
1064 {
1065 .compatible = "motorola,mc146818",
1066 },
1067 { },
1068};
1069MODULE_DEVICE_TABLE(of, of_cmos_match);
1070
1071static __init void cmos_of_init(struct platform_device *pdev)
1072{
1073 struct device_node *node = pdev->dev.of_node;
1074 struct rtc_time time;
1075 int ret;
1076 const __be32 *val;
1077
1078 if (!node)
1079 return;
1080
1081 val = of_get_property(node, "ctrl-reg", NULL);
1082 if (val)
1083 CMOS_WRITE(be32_to_cpup(val), RTC_CONTROL);
1084
1085 val = of_get_property(node, "freq-reg", NULL);
1086 if (val)
1087 CMOS_WRITE(be32_to_cpup(val), RTC_FREQ_SELECT);
1088
1089 get_rtc_time(&time);
1090 ret = rtc_valid_tm(&time);
1091 if (ret) {
1092 struct rtc_time def_time = {
1093 .tm_year = 1,
1094 .tm_mday = 1,
1095 };
1096 set_rtc_time(&def_time);
1097 }
1098}
1099#else
1100static inline void cmos_of_init(struct platform_device *pdev) {}
1101#define of_cmos_match NULL
1102#endif
1126/*----------------------------------------------------------------*/ 1103/*----------------------------------------------------------------*/
1127 1104
1128/* Platform setup should have set up an RTC device, when PNP is 1105/* Platform setup should have set up an RTC device, when PNP is
@@ -1131,6 +1108,7 @@ static struct pnp_driver cmos_pnp_driver = {
1131 1108
1132static int __init cmos_platform_probe(struct platform_device *pdev) 1109static int __init cmos_platform_probe(struct platform_device *pdev)
1133{ 1110{
1111 cmos_of_init(pdev);
1134 cmos_wake_setup(&pdev->dev); 1112 cmos_wake_setup(&pdev->dev);
1135 return cmos_do_probe(&pdev->dev, 1113 return cmos_do_probe(&pdev->dev,
1136 platform_get_resource(pdev, IORESOURCE_IO, 0), 1114 platform_get_resource(pdev, IORESOURCE_IO, 0),
@@ -1162,6 +1140,7 @@ static struct platform_driver cmos_platform_driver = {
1162#ifdef CONFIG_PM 1140#ifdef CONFIG_PM
1163 .pm = &cmos_pm_ops, 1141 .pm = &cmos_pm_ops,
1164#endif 1142#endif
1143 .of_match_table = of_cmos_match,
1165 } 1144 }
1166}; 1145};
1167 1146
diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c
index 34647fc1ee98..8d46838dff8a 100644
--- a/drivers/rtc/rtc-davinci.c
+++ b/drivers/rtc/rtc-davinci.c
@@ -231,10 +231,6 @@ davinci_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
231 case RTC_WIE_OFF: 231 case RTC_WIE_OFF:
232 rtc_ctrl &= ~PRTCSS_RTC_CTRL_WEN; 232 rtc_ctrl &= ~PRTCSS_RTC_CTRL_WEN;
233 break; 233 break;
234 case RTC_UIE_OFF:
235 case RTC_UIE_ON:
236 ret = -ENOTTY;
237 break;
238 default: 234 default:
239 ret = -ENOIOCTLCMD; 235 ret = -ENOIOCTLCMD;
240 } 236 }
@@ -473,55 +469,6 @@ static int davinci_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
473 return 0; 469 return 0;
474} 470}
475 471
476static int davinci_rtc_irq_set_state(struct device *dev, int enabled)
477{
478 struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
479 unsigned long flags;
480 u8 rtc_ctrl;
481
482 spin_lock_irqsave(&davinci_rtc_lock, flags);
483
484 rtc_ctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL);
485
486 if (enabled) {
487 while (rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL)
488 & PRTCSS_RTC_CTRL_WDTBUS)
489 cpu_relax();
490
491 rtc_ctrl |= PRTCSS_RTC_CTRL_TE;
492 rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL);
493
494 rtcss_write(davinci_rtc, 0x0, PRTCSS_RTC_CLKC_CNT);
495
496 rtc_ctrl |= PRTCSS_RTC_CTRL_TIEN |
497 PRTCSS_RTC_CTRL_TMMD |
498 PRTCSS_RTC_CTRL_TMRFLG;
499 } else
500 rtc_ctrl &= ~PRTCSS_RTC_CTRL_TIEN;
501
502 rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL);
503
504 spin_unlock_irqrestore(&davinci_rtc_lock, flags);
505
506 return 0;
507}
508
509static int davinci_rtc_irq_set_freq(struct device *dev, int freq)
510{
511 struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
512 unsigned long flags;
513 u16 tmr_counter = (0x8000 >> (ffs(freq) - 1));
514
515 spin_lock_irqsave(&davinci_rtc_lock, flags);
516
517 rtcss_write(davinci_rtc, tmr_counter & 0xFF, PRTCSS_RTC_TMR0);
518 rtcss_write(davinci_rtc, (tmr_counter & 0xFF00) >> 8, PRTCSS_RTC_TMR1);
519
520 spin_unlock_irqrestore(&davinci_rtc_lock, flags);
521
522 return 0;
523}
524
525static struct rtc_class_ops davinci_rtc_ops = { 472static struct rtc_class_ops davinci_rtc_ops = {
526 .ioctl = davinci_rtc_ioctl, 473 .ioctl = davinci_rtc_ioctl,
527 .read_time = davinci_rtc_read_time, 474 .read_time = davinci_rtc_read_time,
@@ -529,8 +476,6 @@ static struct rtc_class_ops davinci_rtc_ops = {
529 .alarm_irq_enable = davinci_rtc_alarm_irq_enable, 476 .alarm_irq_enable = davinci_rtc_alarm_irq_enable,
530 .read_alarm = davinci_rtc_read_alarm, 477 .read_alarm = davinci_rtc_read_alarm,
531 .set_alarm = davinci_rtc_set_alarm, 478 .set_alarm = davinci_rtc_set_alarm,
532 .irq_set_state = davinci_rtc_irq_set_state,
533 .irq_set_freq = davinci_rtc_irq_set_freq,
534}; 479};
535 480
536static int __init davinci_rtc_probe(struct platform_device *pdev) 481static int __init davinci_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 37c3cc1b3dd5..d0e06edb14c5 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -46,6 +46,105 @@ static int rtc_dev_open(struct inode *inode, struct file *file)
46 return err; 46 return err;
47} 47}
48 48
49#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
50/*
51 * Routine to poll RTC seconds field for change as often as possible,
52 * after first RTC_UIE use timer to reduce polling
53 */
54static void rtc_uie_task(struct work_struct *work)
55{
56 struct rtc_device *rtc =
57 container_of(work, struct rtc_device, uie_task);
58 struct rtc_time tm;
59 int num = 0;
60 int err;
61
62 err = rtc_read_time(rtc, &tm);
63
64 spin_lock_irq(&rtc->irq_lock);
65 if (rtc->stop_uie_polling || err) {
66 rtc->uie_task_active = 0;
67 } else if (rtc->oldsecs != tm.tm_sec) {
68 num = (tm.tm_sec + 60 - rtc->oldsecs) % 60;
69 rtc->oldsecs = tm.tm_sec;
70 rtc->uie_timer.expires = jiffies + HZ - (HZ/10);
71 rtc->uie_timer_active = 1;
72 rtc->uie_task_active = 0;
73 add_timer(&rtc->uie_timer);
74 } else if (schedule_work(&rtc->uie_task) == 0) {
75 rtc->uie_task_active = 0;
76 }
77 spin_unlock_irq(&rtc->irq_lock);
78 if (num)
79 rtc_handle_legacy_irq(rtc, num, RTC_UF);
80}
81static void rtc_uie_timer(unsigned long data)
82{
83 struct rtc_device *rtc = (struct rtc_device *)data;
84 unsigned long flags;
85
86 spin_lock_irqsave(&rtc->irq_lock, flags);
87 rtc->uie_timer_active = 0;
88 rtc->uie_task_active = 1;
89 if ((schedule_work(&rtc->uie_task) == 0))
90 rtc->uie_task_active = 0;
91 spin_unlock_irqrestore(&rtc->irq_lock, flags);
92}
93
94static int clear_uie(struct rtc_device *rtc)
95{
96 spin_lock_irq(&rtc->irq_lock);
97 if (rtc->uie_irq_active) {
98 rtc->stop_uie_polling = 1;
99 if (rtc->uie_timer_active) {
100 spin_unlock_irq(&rtc->irq_lock);
101 del_timer_sync(&rtc->uie_timer);
102 spin_lock_irq(&rtc->irq_lock);
103 rtc->uie_timer_active = 0;
104 }
105 if (rtc->uie_task_active) {
106 spin_unlock_irq(&rtc->irq_lock);
107 flush_scheduled_work();
108 spin_lock_irq(&rtc->irq_lock);
109 }
110 rtc->uie_irq_active = 0;
111 }
112 spin_unlock_irq(&rtc->irq_lock);
113 return 0;
114}
115
116static int set_uie(struct rtc_device *rtc)
117{
118 struct rtc_time tm;
119 int err;
120
121 err = rtc_read_time(rtc, &tm);
122 if (err)
123 return err;
124 spin_lock_irq(&rtc->irq_lock);
125 if (!rtc->uie_irq_active) {
126 rtc->uie_irq_active = 1;
127 rtc->stop_uie_polling = 0;
128 rtc->oldsecs = tm.tm_sec;
129 rtc->uie_task_active = 1;
130 if (schedule_work(&rtc->uie_task) == 0)
131 rtc->uie_task_active = 0;
132 }
133 rtc->irq_data = 0;
134 spin_unlock_irq(&rtc->irq_lock);
135 return 0;
136}
137
138int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, unsigned int enabled)
139{
140 if (enabled)
141 return set_uie(rtc);
142 else
143 return clear_uie(rtc);
144}
145EXPORT_SYMBOL(rtc_dev_update_irq_enable_emul);
146
147#endif /* CONFIG_RTC_INTF_DEV_UIE_EMUL */
49 148
50static ssize_t 149static ssize_t
51rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 150rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
@@ -387,6 +486,11 @@ void rtc_dev_prepare(struct rtc_device *rtc)
387 486
388 rtc->dev.devt = MKDEV(MAJOR(rtc_devt), rtc->id); 487 rtc->dev.devt = MKDEV(MAJOR(rtc_devt), rtc->id);
389 488
489#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
490 INIT_WORK(&rtc->uie_task, rtc_uie_task);
491 setup_timer(&rtc->uie_timer, rtc_uie_timer, (unsigned long)rtc);
492#endif
493
390 cdev_init(&rtc->char_dev, &rtc_dev_fops); 494 cdev_init(&rtc->char_dev, &rtc_dev_fops);
391 rtc->char_dev.owner = rtc->owner; 495 rtc->char_dev.owner = rtc->owner;
392} 496}
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 37268e97de49..3fffd708711f 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -397,29 +397,12 @@ static int ds1511_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
397 return 0; 397 return 0;
398} 398}
399 399
400static int ds1511_rtc_update_irq_enable(struct device *dev,
401 unsigned int enabled)
402{
403 struct platform_device *pdev = to_platform_device(dev);
404 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
405
406 if (pdata->irq <= 0)
407 return -EINVAL;
408 if (enabled)
409 pdata->irqen |= RTC_UF;
410 else
411 pdata->irqen &= ~RTC_UF;
412 ds1511_rtc_update_alarm(pdata);
413 return 0;
414}
415
416static const struct rtc_class_ops ds1511_rtc_ops = { 400static const struct rtc_class_ops ds1511_rtc_ops = {
417 .read_time = ds1511_rtc_read_time, 401 .read_time = ds1511_rtc_read_time,
418 .set_time = ds1511_rtc_set_time, 402 .set_time = ds1511_rtc_set_time,
419 .read_alarm = ds1511_rtc_read_alarm, 403 .read_alarm = ds1511_rtc_read_alarm,
420 .set_alarm = ds1511_rtc_set_alarm, 404 .set_alarm = ds1511_rtc_set_alarm,
421 .alarm_irq_enable = ds1511_rtc_alarm_irq_enable, 405 .alarm_irq_enable = ds1511_rtc_alarm_irq_enable,
422 .update_irq_enable = ds1511_rtc_update_irq_enable,
423}; 406};
424 407
425 static ssize_t 408 static ssize_t
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index ff432e2ca275..fee41b97c9e8 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -227,29 +227,12 @@ static int ds1553_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
227 return 0; 227 return 0;
228} 228}
229 229
230static int ds1553_rtc_update_irq_enable(struct device *dev,
231 unsigned int enabled)
232{
233 struct platform_device *pdev = to_platform_device(dev);
234 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
235
236 if (pdata->irq <= 0)
237 return -EINVAL;
238 if (enabled)
239 pdata->irqen |= RTC_UF;
240 else
241 pdata->irqen &= ~RTC_UF;
242 ds1553_rtc_update_alarm(pdata);
243 return 0;
244}
245
246static const struct rtc_class_ops ds1553_rtc_ops = { 230static const struct rtc_class_ops ds1553_rtc_ops = {
247 .read_time = ds1553_rtc_read_time, 231 .read_time = ds1553_rtc_read_time,
248 .set_time = ds1553_rtc_set_time, 232 .set_time = ds1553_rtc_set_time,
249 .read_alarm = ds1553_rtc_read_alarm, 233 .read_alarm = ds1553_rtc_read_alarm,
250 .set_alarm = ds1553_rtc_set_alarm, 234 .set_alarm = ds1553_rtc_set_alarm,
251 .alarm_irq_enable = ds1553_rtc_alarm_irq_enable, 235 .alarm_irq_enable = ds1553_rtc_alarm_irq_enable,
252 .update_irq_enable = ds1553_rtc_update_irq_enable,
253}; 236};
254 237
255static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj, 238static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj,
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index 23a9ee19764c..27b7bf672ac6 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * RTC client/driver for the Maxim/Dallas DS3232 Real-Time Clock over I2C 2 * RTC client/driver for the Maxim/Dallas DS3232 Real-Time Clock over I2C
3 * 3 *
4 * Copyright (C) 2009-2010 Freescale Semiconductor. 4 * Copyright (C) 2009-2011 Freescale Semiconductor.
5 * Author: Jack Lan <jack.lan@freescale.com> 5 * Author: Jack Lan <jack.lan@freescale.com>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify it 7 * This program is free software; you can redistribute it and/or modify it
@@ -141,9 +141,11 @@ static int ds3232_read_time(struct device *dev, struct rtc_time *time)
141 time->tm_hour = bcd2bin(hour); 141 time->tm_hour = bcd2bin(hour);
142 } 142 }
143 143
144 time->tm_wday = bcd2bin(week); 144 /* Day of the week in linux range is 0~6 while 1~7 in RTC chip */
145 time->tm_wday = bcd2bin(week) - 1;
145 time->tm_mday = bcd2bin(day); 146 time->tm_mday = bcd2bin(day);
146 time->tm_mon = bcd2bin(month & 0x7F); 147 /* linux tm_mon range:0~11, while month range is 1~12 in RTC chip */
148 time->tm_mon = bcd2bin(month & 0x7F) - 1;
147 if (century) 149 if (century)
148 add_century = 100; 150 add_century = 100;
149 151
@@ -162,9 +164,11 @@ static int ds3232_set_time(struct device *dev, struct rtc_time *time)
162 buf[0] = bin2bcd(time->tm_sec); 164 buf[0] = bin2bcd(time->tm_sec);
163 buf[1] = bin2bcd(time->tm_min); 165 buf[1] = bin2bcd(time->tm_min);
164 buf[2] = bin2bcd(time->tm_hour); 166 buf[2] = bin2bcd(time->tm_hour);
165 buf[3] = bin2bcd(time->tm_wday); /* Day of the week */ 167 /* Day of the week in linux range is 0~6 while 1~7 in RTC chip */
168 buf[3] = bin2bcd(time->tm_wday + 1);
166 buf[4] = bin2bcd(time->tm_mday); /* Date */ 169 buf[4] = bin2bcd(time->tm_mday); /* Date */
167 buf[5] = bin2bcd(time->tm_mon); 170 /* linux tm_mon range:0~11, while month range is 1~12 in RTC chip */
171 buf[5] = bin2bcd(time->tm_mon + 1);
168 if (time->tm_year >= 100) { 172 if (time->tm_year >= 100) {
169 buf[5] |= 0x80; 173 buf[5] |= 0x80;
170 buf[6] = bin2bcd(time->tm_year - 100); 174 buf[6] = bin2bcd(time->tm_year - 100);
@@ -335,23 +339,6 @@ static int ds3232_alarm_irq_enable(struct device *dev, unsigned int enabled)
335 return 0; 339 return 0;
336} 340}
337 341
338static int ds3232_update_irq_enable(struct device *dev, unsigned int enabled)
339{
340 struct i2c_client *client = to_i2c_client(dev);
341 struct ds3232 *ds3232 = i2c_get_clientdata(client);
342
343 if (client->irq <= 0)
344 return -EINVAL;
345
346 if (enabled)
347 ds3232->rtc->irq_data |= RTC_UF;
348 else
349 ds3232->rtc->irq_data &= ~RTC_UF;
350
351 ds3232_update_alarm(client);
352 return 0;
353}
354
355static irqreturn_t ds3232_irq(int irq, void *dev_id) 342static irqreturn_t ds3232_irq(int irq, void *dev_id)
356{ 343{
357 struct i2c_client *client = dev_id; 344 struct i2c_client *client = dev_id;
@@ -402,7 +389,6 @@ static const struct rtc_class_ops ds3232_rtc_ops = {
402 .read_alarm = ds3232_read_alarm, 389 .read_alarm = ds3232_read_alarm,
403 .set_alarm = ds3232_set_alarm, 390 .set_alarm = ds3232_set_alarm,
404 .alarm_irq_enable = ds3232_alarm_irq_enable, 391 .alarm_irq_enable = ds3232_alarm_irq_enable,
405 .update_irq_enable = ds3232_update_irq_enable,
406}; 392};
407 393
408static int __devinit ds3232_probe(struct i2c_client *client, 394static int __devinit ds3232_probe(struct i2c_client *client,
diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c
index 2e16f72c9056..b6473631d182 100644
--- a/drivers/rtc/rtc-jz4740.c
+++ b/drivers/rtc/rtc-jz4740.c
@@ -168,12 +168,6 @@ static int jz4740_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
168 return ret; 168 return ret;
169} 169}
170 170
171static int jz4740_rtc_update_irq_enable(struct device *dev, unsigned int enable)
172{
173 struct jz4740_rtc *rtc = dev_get_drvdata(dev);
174 return jz4740_rtc_ctrl_set_bits(rtc, JZ_RTC_CTRL_1HZ_IRQ, enable);
175}
176
177static int jz4740_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) 171static int jz4740_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
178{ 172{
179 struct jz4740_rtc *rtc = dev_get_drvdata(dev); 173 struct jz4740_rtc *rtc = dev_get_drvdata(dev);
@@ -185,7 +179,6 @@ static struct rtc_class_ops jz4740_rtc_ops = {
185 .set_mmss = jz4740_rtc_set_mmss, 179 .set_mmss = jz4740_rtc_set_mmss,
186 .read_alarm = jz4740_rtc_read_alarm, 180 .read_alarm = jz4740_rtc_read_alarm,
187 .set_alarm = jz4740_rtc_set_alarm, 181 .set_alarm = jz4740_rtc_set_alarm,
188 .update_irq_enable = jz4740_rtc_update_irq_enable,
189 .alarm_irq_enable = jz4740_rtc_alarm_irq_enable, 182 .alarm_irq_enable = jz4740_rtc_alarm_irq_enable,
190}; 183};
191 184
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index 5314b153bfba..c42006469559 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -282,12 +282,6 @@ static irqreturn_t mc13xxx_rtc_update_handler(int irq, void *dev)
282 return IRQ_HANDLED; 282 return IRQ_HANDLED;
283} 283}
284 284
285static int mc13xxx_rtc_update_irq_enable(struct device *dev,
286 unsigned int enabled)
287{
288 return mc13xxx_rtc_irq_enable(dev, enabled, MC13XXX_IRQ_1HZ);
289}
290
291static int mc13xxx_rtc_alarm_irq_enable(struct device *dev, 285static int mc13xxx_rtc_alarm_irq_enable(struct device *dev,
292 unsigned int enabled) 286 unsigned int enabled)
293{ 287{
@@ -300,7 +294,6 @@ static const struct rtc_class_ops mc13xxx_rtc_ops = {
300 .read_alarm = mc13xxx_rtc_read_alarm, 294 .read_alarm = mc13xxx_rtc_read_alarm,
301 .set_alarm = mc13xxx_rtc_set_alarm, 295 .set_alarm = mc13xxx_rtc_set_alarm,
302 .alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable, 296 .alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable,
303 .update_irq_enable = mc13xxx_rtc_update_irq_enable,
304}; 297};
305 298
306static irqreturn_t mc13xxx_rtc_reset_handler(int irq, void *dev) 299static irqreturn_t mc13xxx_rtc_reset_handler(int irq, void *dev)
diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c
index dfcdf0901d21..b40c1ff1ebc8 100644
--- a/drivers/rtc/rtc-mpc5121.c
+++ b/drivers/rtc/rtc-mpc5121.c
@@ -240,32 +240,12 @@ static int mpc5121_rtc_alarm_irq_enable(struct device *dev,
240 return 0; 240 return 0;
241} 241}
242 242
243static int mpc5121_rtc_update_irq_enable(struct device *dev,
244 unsigned int enabled)
245{
246 struct mpc5121_rtc_data *rtc = dev_get_drvdata(dev);
247 struct mpc5121_rtc_regs __iomem *regs = rtc->regs;
248 int val;
249
250 val = in_8(&regs->int_enable);
251
252 if (enabled)
253 val = (val & ~0x8) | 0x1;
254 else
255 val &= ~0x1;
256
257 out_8(&regs->int_enable, val);
258
259 return 0;
260}
261
262static const struct rtc_class_ops mpc5121_rtc_ops = { 243static const struct rtc_class_ops mpc5121_rtc_ops = {
263 .read_time = mpc5121_rtc_read_time, 244 .read_time = mpc5121_rtc_read_time,
264 .set_time = mpc5121_rtc_set_time, 245 .set_time = mpc5121_rtc_set_time,
265 .read_alarm = mpc5121_rtc_read_alarm, 246 .read_alarm = mpc5121_rtc_read_alarm,
266 .set_alarm = mpc5121_rtc_set_alarm, 247 .set_alarm = mpc5121_rtc_set_alarm,
267 .alarm_irq_enable = mpc5121_rtc_alarm_irq_enable, 248 .alarm_irq_enable = mpc5121_rtc_alarm_irq_enable,
268 .update_irq_enable = mpc5121_rtc_update_irq_enable,
269}; 249};
270 250
271static int __devinit mpc5121_rtc_probe(struct platform_device *op, 251static int __devinit mpc5121_rtc_probe(struct platform_device *op,
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
index 1db62db8469d..b86bc328463b 100644
--- a/drivers/rtc/rtc-mrst.c
+++ b/drivers/rtc/rtc-mrst.c
@@ -62,6 +62,17 @@ static inline int is_intr(u8 rtc_intr)
62 return rtc_intr & RTC_IRQMASK; 62 return rtc_intr & RTC_IRQMASK;
63} 63}
64 64
65static inline unsigned char vrtc_is_updating(void)
66{
67 unsigned char uip;
68 unsigned long flags;
69
70 spin_lock_irqsave(&rtc_lock, flags);
71 uip = (vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP);
72 spin_unlock_irqrestore(&rtc_lock, flags);
73 return uip;
74}
75
65/* 76/*
66 * rtc_time's year contains the increment over 1900, but vRTC's YEAR 77 * rtc_time's year contains the increment over 1900, but vRTC's YEAR
67 * register can't be programmed to value larger than 0x64, so vRTC 78 * register can't be programmed to value larger than 0x64, so vRTC
@@ -76,7 +87,7 @@ static int mrst_read_time(struct device *dev, struct rtc_time *time)
76{ 87{
77 unsigned long flags; 88 unsigned long flags;
78 89
79 if (rtc_is_updating()) 90 if (vrtc_is_updating())
80 mdelay(20); 91 mdelay(20);
81 92
82 spin_lock_irqsave(&rtc_lock, flags); 93 spin_lock_irqsave(&rtc_lock, flags);
@@ -236,25 +247,6 @@ static int mrst_set_alarm(struct device *dev, struct rtc_wkalrm *t)
236 return 0; 247 return 0;
237} 248}
238 249
239static int mrst_irq_set_state(struct device *dev, int enabled)
240{
241 struct mrst_rtc *mrst = dev_get_drvdata(dev);
242 unsigned long flags;
243
244 if (!mrst->irq)
245 return -ENXIO;
246
247 spin_lock_irqsave(&rtc_lock, flags);
248
249 if (enabled)
250 mrst_irq_enable(mrst, RTC_PIE);
251 else
252 mrst_irq_disable(mrst, RTC_PIE);
253
254 spin_unlock_irqrestore(&rtc_lock, flags);
255 return 0;
256}
257
258/* Currently, the vRTC doesn't support UIE ON/OFF */ 250/* Currently, the vRTC doesn't support UIE ON/OFF */
259static int mrst_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 251static int mrst_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
260{ 252{
@@ -301,7 +293,6 @@ static const struct rtc_class_ops mrst_rtc_ops = {
301 .read_alarm = mrst_read_alarm, 293 .read_alarm = mrst_read_alarm,
302 .set_alarm = mrst_set_alarm, 294 .set_alarm = mrst_set_alarm,
303 .proc = mrst_procfs, 295 .proc = mrst_procfs,
304 .irq_set_state = mrst_irq_set_state,
305 .alarm_irq_enable = mrst_rtc_alarm_irq_enable, 296 .alarm_irq_enable = mrst_rtc_alarm_irq_enable,
306}; 297};
307 298
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index 0b06c1e03fd5..826ab64a8fa9 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c
@@ -274,12 +274,6 @@ static int mxc_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
274 return 0; 274 return 0;
275} 275}
276 276
277static int mxc_rtc_update_irq_enable(struct device *dev, unsigned int enabled)
278{
279 mxc_rtc_irq_enable(dev, RTC_1HZ_BIT, enabled);
280 return 0;
281}
282
283/* 277/*
284 * This function reads the current RTC time into tm in Gregorian date. 278 * This function reads the current RTC time into tm in Gregorian date.
285 */ 279 */
@@ -368,7 +362,6 @@ static struct rtc_class_ops mxc_rtc_ops = {
368 .read_alarm = mxc_rtc_read_alarm, 362 .read_alarm = mxc_rtc_read_alarm,
369 .set_alarm = mxc_rtc_set_alarm, 363 .set_alarm = mxc_rtc_set_alarm,
370 .alarm_irq_enable = mxc_rtc_alarm_irq_enable, 364 .alarm_irq_enable = mxc_rtc_alarm_irq_enable,
371 .update_irq_enable = mxc_rtc_update_irq_enable,
372}; 365};
373 366
374static int __init mxc_rtc_probe(struct platform_device *pdev) 367static int __init mxc_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c
index ddb0857e15a4..781068d62f23 100644
--- a/drivers/rtc/rtc-nuc900.c
+++ b/drivers/rtc/rtc-nuc900.c
@@ -134,20 +134,6 @@ static void nuc900_rtc_bin2bcd(struct device *dev, struct rtc_time *settm,
134 gettm->bcd_hour = bin2bcd(settm->tm_hour) << 16; 134 gettm->bcd_hour = bin2bcd(settm->tm_hour) << 16;
135} 135}
136 136
137static int nuc900_update_irq_enable(struct device *dev, unsigned int enabled)
138{
139 struct nuc900_rtc *rtc = dev_get_drvdata(dev);
140
141 if (enabled)
142 __raw_writel(__raw_readl(rtc->rtc_reg + REG_RTC_RIER)|
143 (TICKINTENB), rtc->rtc_reg + REG_RTC_RIER);
144 else
145 __raw_writel(__raw_readl(rtc->rtc_reg + REG_RTC_RIER)&
146 (~TICKINTENB), rtc->rtc_reg + REG_RTC_RIER);
147
148 return 0;
149}
150
151static int nuc900_alarm_irq_enable(struct device *dev, unsigned int enabled) 137static int nuc900_alarm_irq_enable(struct device *dev, unsigned int enabled)
152{ 138{
153 struct nuc900_rtc *rtc = dev_get_drvdata(dev); 139 struct nuc900_rtc *rtc = dev_get_drvdata(dev);
@@ -234,7 +220,6 @@ static struct rtc_class_ops nuc900_rtc_ops = {
234 .read_alarm = nuc900_rtc_read_alarm, 220 .read_alarm = nuc900_rtc_read_alarm,
235 .set_alarm = nuc900_rtc_set_alarm, 221 .set_alarm = nuc900_rtc_set_alarm,
236 .alarm_irq_enable = nuc900_alarm_irq_enable, 222 .alarm_irq_enable = nuc900_alarm_irq_enable,
237 .update_irq_enable = nuc900_update_irq_enable,
238}; 223};
239 224
240static int __devinit nuc900_rtc_probe(struct platform_device *pdev) 225static int __devinit nuc900_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index b4dbf3a319b3..de0dd7b1f146 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -135,44 +135,6 @@ static irqreturn_t rtc_irq(int irq, void *rtc)
135 return IRQ_HANDLED; 135 return IRQ_HANDLED;
136} 136}
137 137
138#ifdef CONFIG_RTC_INTF_DEV
139
140static int
141omap_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
142{
143 u8 reg;
144
145 switch (cmd) {
146 case RTC_UIE_OFF:
147 case RTC_UIE_ON:
148 break;
149 default:
150 return -ENOIOCTLCMD;
151 }
152
153 local_irq_disable();
154 rtc_wait_not_busy();
155 reg = rtc_read(OMAP_RTC_INTERRUPTS_REG);
156 switch (cmd) {
157 /* UIE = Update Interrupt Enable (1/second) */
158 case RTC_UIE_OFF:
159 reg &= ~OMAP_RTC_INTERRUPTS_IT_TIMER;
160 break;
161 case RTC_UIE_ON:
162 reg |= OMAP_RTC_INTERRUPTS_IT_TIMER;
163 break;
164 }
165 rtc_wait_not_busy();
166 rtc_write(reg, OMAP_RTC_INTERRUPTS_REG);
167 local_irq_enable();
168
169 return 0;
170}
171
172#else
173#define omap_rtc_ioctl NULL
174#endif
175
176static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 138static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
177{ 139{
178 u8 reg; 140 u8 reg;
@@ -313,7 +275,6 @@ static int omap_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
313} 275}
314 276
315static struct rtc_class_ops omap_rtc_ops = { 277static struct rtc_class_ops omap_rtc_ops = {
316 .ioctl = omap_rtc_ioctl,
317 .read_time = omap_rtc_read_time, 278 .read_time = omap_rtc_read_time,
318 .set_time = omap_rtc_set_time, 279 .set_time = omap_rtc_set_time,
319 .read_alarm = omap_rtc_read_alarm, 280 .read_alarm = omap_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c
index 25c0b3fd44f1..a633abc42896 100644
--- a/drivers/rtc/rtc-pcap.c
+++ b/drivers/rtc/rtc-pcap.c
@@ -131,18 +131,12 @@ static int pcap_rtc_alarm_irq_enable(struct device *dev, unsigned int en)
131 return pcap_rtc_irq_enable(dev, PCAP_IRQ_TODA, en); 131 return pcap_rtc_irq_enable(dev, PCAP_IRQ_TODA, en);
132} 132}
133 133
134static int pcap_rtc_update_irq_enable(struct device *dev, unsigned int en)
135{
136 return pcap_rtc_irq_enable(dev, PCAP_IRQ_1HZ, en);
137}
138
139static const struct rtc_class_ops pcap_rtc_ops = { 134static const struct rtc_class_ops pcap_rtc_ops = {
140 .read_time = pcap_rtc_read_time, 135 .read_time = pcap_rtc_read_time,
141 .read_alarm = pcap_rtc_read_alarm, 136 .read_alarm = pcap_rtc_read_alarm,
142 .set_alarm = pcap_rtc_set_alarm, 137 .set_alarm = pcap_rtc_set_alarm,
143 .set_mmss = pcap_rtc_set_mmss, 138 .set_mmss = pcap_rtc_set_mmss,
144 .alarm_irq_enable = pcap_rtc_alarm_irq_enable, 139 .alarm_irq_enable = pcap_rtc_alarm_irq_enable,
145 .update_irq_enable = pcap_rtc_update_irq_enable,
146}; 140};
147 141
148static int __devinit pcap_rtc_probe(struct platform_device *pdev) 142static int __devinit pcap_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c
index 16edf94ab42f..f90c574f9d05 100644
--- a/drivers/rtc/rtc-pcf50633.c
+++ b/drivers/rtc/rtc-pcf50633.c
@@ -106,25 +106,6 @@ pcf50633_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
106 return 0; 106 return 0;
107} 107}
108 108
109static int
110pcf50633_rtc_update_irq_enable(struct device *dev, unsigned int enabled)
111{
112 struct pcf50633_rtc *rtc = dev_get_drvdata(dev);
113 int err;
114
115 if (enabled)
116 err = pcf50633_irq_unmask(rtc->pcf, PCF50633_IRQ_SECOND);
117 else
118 err = pcf50633_irq_mask(rtc->pcf, PCF50633_IRQ_SECOND);
119
120 if (err < 0)
121 return err;
122
123 rtc->second_enabled = enabled;
124
125 return 0;
126}
127
128static int pcf50633_rtc_read_time(struct device *dev, struct rtc_time *tm) 109static int pcf50633_rtc_read_time(struct device *dev, struct rtc_time *tm)
129{ 110{
130 struct pcf50633_rtc *rtc; 111 struct pcf50633_rtc *rtc;
@@ -262,8 +243,7 @@ static struct rtc_class_ops pcf50633_rtc_ops = {
262 .set_time = pcf50633_rtc_set_time, 243 .set_time = pcf50633_rtc_set_time,
263 .read_alarm = pcf50633_rtc_read_alarm, 244 .read_alarm = pcf50633_rtc_read_alarm,
264 .set_alarm = pcf50633_rtc_set_alarm, 245 .set_alarm = pcf50633_rtc_set_alarm,
265 .alarm_irq_enable = pcf50633_rtc_alarm_irq_enable, 246 .alarm_irq_enable = pcf50633_rtc_alarm_irq_enable,
266 .update_irq_enable = pcf50633_rtc_update_irq_enable,
267}; 247};
268 248
269static void pcf50633_rtc_irq(int irq, void *data) 249static void pcf50633_rtc_irq(int irq, void *data)
diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c
index bbdb2f02798a..d554368c9f57 100644
--- a/drivers/rtc/rtc-pl030.c
+++ b/drivers/rtc/rtc-pl030.c
@@ -35,11 +35,6 @@ static irqreturn_t pl030_interrupt(int irq, void *dev_id)
35 return IRQ_HANDLED; 35 return IRQ_HANDLED;
36} 36}
37 37
38static int pl030_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
39{
40 return -ENOIOCTLCMD;
41}
42
43static int pl030_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) 38static int pl030_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
44{ 39{
45 struct pl030_rtc *rtc = dev_get_drvdata(dev); 40 struct pl030_rtc *rtc = dev_get_drvdata(dev);
@@ -96,7 +91,6 @@ static int pl030_set_time(struct device *dev, struct rtc_time *tm)
96} 91}
97 92
98static const struct rtc_class_ops pl030_ops = { 93static const struct rtc_class_ops pl030_ops = {
99 .ioctl = pl030_ioctl,
100 .read_time = pl030_read_time, 94 .read_time = pl030_read_time,
101 .set_time = pl030_set_time, 95 .set_time = pl030_set_time,
102 .read_alarm = pl030_read_alarm, 96 .read_alarm = pl030_read_alarm,
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index b7a6690e5b35..d829ea63c4fb 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -293,57 +293,6 @@ static int pl031_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
293 return ret; 293 return ret;
294} 294}
295 295
296/* Periodic interrupt is only available in ST variants. */
297static int pl031_irq_set_state(struct device *dev, int enabled)
298{
299 struct pl031_local *ldata = dev_get_drvdata(dev);
300
301 if (enabled == 1) {
302 /* Clear any pending timer interrupt. */
303 writel(RTC_BIT_PI, ldata->base + RTC_ICR);
304
305 writel(readl(ldata->base + RTC_IMSC) | RTC_BIT_PI,
306 ldata->base + RTC_IMSC);
307
308 /* Now start the timer */
309 writel(readl(ldata->base + RTC_TCR) | RTC_TCR_EN,
310 ldata->base + RTC_TCR);
311
312 } else {
313 writel(readl(ldata->base + RTC_IMSC) & (~RTC_BIT_PI),
314 ldata->base + RTC_IMSC);
315
316 /* Also stop the timer */
317 writel(readl(ldata->base + RTC_TCR) & (~RTC_TCR_EN),
318 ldata->base + RTC_TCR);
319 }
320 /* Wait at least 1 RTC32 clock cycle to ensure next access
321 * to RTC_TCR will succeed.
322 */
323 udelay(40);
324
325 return 0;
326}
327
328static int pl031_irq_set_freq(struct device *dev, int freq)
329{
330 struct pl031_local *ldata = dev_get_drvdata(dev);
331
332 /* Cant set timer if it is already enabled */
333 if (readl(ldata->base + RTC_TCR) & RTC_TCR_EN) {
334 dev_err(dev, "can't change frequency while timer enabled\n");
335 return -EINVAL;
336 }
337
338 /* If self start bit in RTC_TCR is set timer will start here,
339 * but we never set that bit. Instead we start the timer when
340 * set_state is called with enabled == 1.
341 */
342 writel(RTC_TIMER_FREQ / freq, ldata->base + RTC_TLR);
343
344 return 0;
345}
346
347static int pl031_remove(struct amba_device *adev) 296static int pl031_remove(struct amba_device *adev)
348{ 297{
349 struct pl031_local *ldata = dev_get_drvdata(&adev->dev); 298 struct pl031_local *ldata = dev_get_drvdata(&adev->dev);
@@ -440,8 +389,6 @@ static struct rtc_class_ops stv1_pl031_ops = {
440 .read_alarm = pl031_read_alarm, 389 .read_alarm = pl031_read_alarm,
441 .set_alarm = pl031_set_alarm, 390 .set_alarm = pl031_set_alarm,
442 .alarm_irq_enable = pl031_alarm_irq_enable, 391 .alarm_irq_enable = pl031_alarm_irq_enable,
443 .irq_set_state = pl031_irq_set_state,
444 .irq_set_freq = pl031_irq_set_freq,
445}; 392};
446 393
447/* And the second ST derivative */ 394/* And the second ST derivative */
@@ -451,8 +398,6 @@ static struct rtc_class_ops stv2_pl031_ops = {
451 .read_alarm = pl031_stv2_read_alarm, 398 .read_alarm = pl031_stv2_read_alarm,
452 .set_alarm = pl031_stv2_set_alarm, 399 .set_alarm = pl031_stv2_set_alarm,
453 .alarm_irq_enable = pl031_alarm_irq_enable, 400 .alarm_irq_enable = pl031_alarm_irq_enable,
454 .irq_set_state = pl031_irq_set_state,
455 .irq_set_freq = pl031_irq_set_freq,
456}; 401};
457 402
458static struct amba_id pl031_ids[] = { 403static struct amba_id pl031_ids[] = {
diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c
index 242bbf86c74a..0a59fda5c09d 100644
--- a/drivers/rtc/rtc-proc.c
+++ b/drivers/rtc/rtc-proc.c
@@ -69,6 +69,14 @@ static int rtc_proc_show(struct seq_file *seq, void *offset)
69 alrm.enabled ? "yes" : "no"); 69 alrm.enabled ? "yes" : "no");
70 seq_printf(seq, "alrm_pending\t: %s\n", 70 seq_printf(seq, "alrm_pending\t: %s\n",
71 alrm.pending ? "yes" : "no"); 71 alrm.pending ? "yes" : "no");
72 seq_printf(seq, "update IRQ enabled\t: %s\n",
73 (rtc->uie_rtctimer.enabled) ? "yes" : "no");
74 seq_printf(seq, "periodic IRQ enabled\t: %s\n",
75 (rtc->pie_enabled) ? "yes" : "no");
76 seq_printf(seq, "periodic IRQ frequency\t: %d\n",
77 rtc->irq_freq);
78 seq_printf(seq, "max user IRQ frequency\t: %d\n",
79 rtc->max_user_freq);
72 } 80 }
73 81
74 seq_printf(seq, "24hr\t\t: yes\n"); 82 seq_printf(seq, "24hr\t\t: yes\n");
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index 29e867a1aaa8..fc9f4991574b 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -209,32 +209,6 @@ static void pxa_rtc_release(struct device *dev)
209 free_irq(pxa_rtc->irq_1Hz, dev); 209 free_irq(pxa_rtc->irq_1Hz, dev);
210} 210}
211 211
212static int pxa_periodic_irq_set_freq(struct device *dev, int freq)
213{
214 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
215 int period_ms;
216
217 if (freq < 1 || freq > MAXFREQ_PERIODIC)
218 return -EINVAL;
219
220 period_ms = 1000 / freq;
221 rtc_writel(pxa_rtc, PIAR, period_ms);
222
223 return 0;
224}
225
226static int pxa_periodic_irq_set_state(struct device *dev, int enabled)
227{
228 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
229
230 if (enabled)
231 rtsr_set_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
232 else
233 rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
234
235 return 0;
236}
237
238static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled) 212static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled)
239{ 213{
240 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); 214 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
@@ -250,21 +224,6 @@ static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled)
250 return 0; 224 return 0;
251} 225}
252 226
253static int pxa_update_irq_enable(struct device *dev, unsigned int enabled)
254{
255 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
256
257 spin_lock_irq(&pxa_rtc->lock);
258
259 if (enabled)
260 rtsr_set_bits(pxa_rtc, RTSR_HZE);
261 else
262 rtsr_clear_bits(pxa_rtc, RTSR_HZE);
263
264 spin_unlock_irq(&pxa_rtc->lock);
265 return 0;
266}
267
268static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm) 227static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm)
269{ 228{
270 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); 229 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
@@ -346,10 +305,7 @@ static const struct rtc_class_ops pxa_rtc_ops = {
346 .read_alarm = pxa_rtc_read_alarm, 305 .read_alarm = pxa_rtc_read_alarm,
347 .set_alarm = pxa_rtc_set_alarm, 306 .set_alarm = pxa_rtc_set_alarm,
348 .alarm_irq_enable = pxa_alarm_irq_enable, 307 .alarm_irq_enable = pxa_alarm_irq_enable,
349 .update_irq_enable = pxa_update_irq_enable,
350 .proc = pxa_rtc_proc, 308 .proc = pxa_rtc_proc,
351 .irq_set_state = pxa_periodic_irq_set_state,
352 .irq_set_freq = pxa_periodic_irq_set_freq,
353}; 309};
354 310
355static int __init pxa_rtc_probe(struct platform_device *pdev) 311static int __init pxa_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 6aaa1550e3b1..85c1b848dd72 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -281,57 +281,6 @@ static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
281 return rs5c372_set_datetime(to_i2c_client(dev), tm); 281 return rs5c372_set_datetime(to_i2c_client(dev), tm);
282} 282}
283 283
284#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE)
285
286static int
287rs5c_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
288{
289 struct i2c_client *client = to_i2c_client(dev);
290 struct rs5c372 *rs5c = i2c_get_clientdata(client);
291 unsigned char buf;
292 int status, addr;
293
294 buf = rs5c->regs[RS5C_REG_CTRL1];
295 switch (cmd) {
296 case RTC_UIE_OFF:
297 case RTC_UIE_ON:
298 /* some 327a modes use a different IRQ pin for 1Hz irqs */
299 if (rs5c->type == rtc_rs5c372a
300 && (buf & RS5C372A_CTRL1_SL1))
301 return -ENOIOCTLCMD;
302 default:
303 return -ENOIOCTLCMD;
304 }
305
306 status = rs5c_get_regs(rs5c);
307 if (status < 0)
308 return status;
309
310 addr = RS5C_ADDR(RS5C_REG_CTRL1);
311 switch (cmd) {
312 case RTC_UIE_OFF: /* update off */
313 buf &= ~RS5C_CTRL1_CT_MASK;
314 break;
315 case RTC_UIE_ON: /* update on */
316 buf &= ~RS5C_CTRL1_CT_MASK;
317 buf |= RS5C_CTRL1_CT4;
318 break;
319 }
320
321 if (i2c_smbus_write_byte_data(client, addr, buf) < 0) {
322 printk(KERN_WARNING "%s: can't update alarm\n",
323 rs5c->rtc->name);
324 status = -EIO;
325 } else
326 rs5c->regs[RS5C_REG_CTRL1] = buf;
327
328 return status;
329}
330
331#else
332#define rs5c_rtc_ioctl NULL
333#endif
334
335 284
336static int rs5c_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 285static int rs5c_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
337{ 286{
@@ -480,7 +429,6 @@ static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq)
480 429
481static const struct rtc_class_ops rs5c372_rtc_ops = { 430static const struct rtc_class_ops rs5c372_rtc_ops = {
482 .proc = rs5c372_rtc_proc, 431 .proc = rs5c372_rtc_proc,
483 .ioctl = rs5c_rtc_ioctl,
484 .read_time = rs5c372_rtc_read_time, 432 .read_time = rs5c372_rtc_read_time,
485 .set_time = rs5c372_rtc_set_time, 433 .set_time = rs5c372_rtc_set_time,
486 .read_alarm = rs5c_read_alarm, 434 .read_alarm = rs5c_read_alarm,
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index af32a62e12a8..fde172fb2abe 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -424,37 +424,12 @@ static int rx8025_alarm_irq_enable(struct device *dev, unsigned int enabled)
424 return 0; 424 return 0;
425} 425}
426 426
427static int rx8025_irq_set_state(struct device *dev, int enabled)
428{
429 struct i2c_client *client = to_i2c_client(dev);
430 struct rx8025_data *rx8025 = i2c_get_clientdata(client);
431 int ctrl1;
432 int err;
433
434 if (client->irq <= 0)
435 return -ENXIO;
436
437 ctrl1 = rx8025->ctrl1 & ~RX8025_BIT_CTRL1_CT;
438 if (enabled)
439 ctrl1 |= RX8025_BIT_CTRL1_CT_1HZ;
440 if (ctrl1 != rx8025->ctrl1) {
441 rx8025->ctrl1 = ctrl1;
442 err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1,
443 rx8025->ctrl1);
444 if (err)
445 return err;
446 }
447
448 return 0;
449}
450
451static struct rtc_class_ops rx8025_rtc_ops = { 427static struct rtc_class_ops rx8025_rtc_ops = {
452 .read_time = rx8025_get_time, 428 .read_time = rx8025_get_time,
453 .set_time = rx8025_set_time, 429 .set_time = rx8025_set_time,
454 .read_alarm = rx8025_read_alarm, 430 .read_alarm = rx8025_read_alarm,
455 .set_alarm = rx8025_set_alarm, 431 .set_alarm = rx8025_set_alarm,
456 .alarm_irq_enable = rx8025_alarm_irq_enable, 432 .alarm_irq_enable = rx8025_alarm_irq_enable,
457 .irq_set_state = rx8025_irq_set_state,
458}; 433};
459 434
460/* 435/*
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index cf953ecbfca9..714964913e5e 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -77,47 +77,18 @@ static irqreturn_t s3c_rtc_tickirq(int irq, void *id)
77} 77}
78 78
79/* Update control registers */ 79/* Update control registers */
80static void s3c_rtc_setaie(int to) 80static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
81{ 81{
82 unsigned int tmp; 82 unsigned int tmp;
83 83
84 pr_debug("%s: aie=%d\n", __func__, to); 84 pr_debug("%s: aie=%d\n", __func__, enabled);
85 85
86 tmp = readb(s3c_rtc_base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN; 86 tmp = readb(s3c_rtc_base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN;
87 87
88 if (to) 88 if (enabled)
89 tmp |= S3C2410_RTCALM_ALMEN; 89 tmp |= S3C2410_RTCALM_ALMEN;
90 90
91 writeb(tmp, s3c_rtc_base + S3C2410_RTCALM); 91 writeb(tmp, s3c_rtc_base + S3C2410_RTCALM);
92}
93
94static int s3c_rtc_setpie(struct device *dev, int enabled)
95{
96 unsigned int tmp;
97
98 pr_debug("%s: pie=%d\n", __func__, enabled);
99
100 spin_lock_irq(&s3c_rtc_pie_lock);
101
102 if (s3c_rtc_cpu_type == TYPE_S3C64XX) {
103 tmp = readw(s3c_rtc_base + S3C2410_RTCCON);
104 tmp &= ~S3C64XX_RTCCON_TICEN;
105
106 if (enabled)
107 tmp |= S3C64XX_RTCCON_TICEN;
108
109 writew(tmp, s3c_rtc_base + S3C2410_RTCCON);
110 } else {
111 tmp = readb(s3c_rtc_base + S3C2410_TICNT);
112 tmp &= ~S3C2410_TICNT_ENABLE;
113
114 if (enabled)
115 tmp |= S3C2410_TICNT_ENABLE;
116
117 writeb(tmp, s3c_rtc_base + S3C2410_TICNT);
118 }
119
120 spin_unlock_irq(&s3c_rtc_pie_lock);
121 92
122 return 0; 93 return 0;
123} 94}
@@ -308,7 +279,7 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
308 279
309 writeb(alrm_en, base + S3C2410_RTCALM); 280 writeb(alrm_en, base + S3C2410_RTCALM);
310 281
311 s3c_rtc_setaie(alrm->enabled); 282 s3c_rtc_setaie(dev, alrm->enabled);
312 283
313 return 0; 284 return 0;
314} 285}
@@ -377,8 +348,6 @@ static const struct rtc_class_ops s3c_rtcops = {
377 .set_time = s3c_rtc_settime, 348 .set_time = s3c_rtc_settime,
378 .read_alarm = s3c_rtc_getalarm, 349 .read_alarm = s3c_rtc_getalarm,
379 .set_alarm = s3c_rtc_setalarm, 350 .set_alarm = s3c_rtc_setalarm,
380 .irq_set_freq = s3c_rtc_setfreq,
381 .irq_set_state = s3c_rtc_setpie,
382 .proc = s3c_rtc_proc, 351 .proc = s3c_rtc_proc,
383 .alarm_irq_enable = s3c_rtc_setaie, 352 .alarm_irq_enable = s3c_rtc_setaie,
384}; 353};
@@ -440,7 +409,7 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev)
440 rtc_device_unregister(rtc); 409 rtc_device_unregister(rtc);
441 410
442 s3c_rtc_setpie(&dev->dev, 0); 411 s3c_rtc_setpie(&dev->dev, 0);
443 s3c_rtc_setaie(0); 412 s3c_rtc_setaie(&dev->dev, 0);
444 413
445 clk_disable(rtc_clk); 414 clk_disable(rtc_clk);
446 clk_put(rtc_clk); 415 clk_put(rtc_clk);
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 5dfe5ffcb0d3..0b40bb88a884 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -43,7 +43,6 @@
43#define RTC_DEF_TRIM 0 43#define RTC_DEF_TRIM 0
44 44
45static const unsigned long RTC_FREQ = 1024; 45static const unsigned long RTC_FREQ = 1024;
46static unsigned long timer_freq;
47static struct rtc_time rtc_alarm; 46static struct rtc_time rtc_alarm;
48static DEFINE_SPINLOCK(sa1100_rtc_lock); 47static DEFINE_SPINLOCK(sa1100_rtc_lock);
49 48
@@ -156,114 +155,11 @@ static irqreturn_t sa1100_rtc_interrupt(int irq, void *dev_id)
156 return IRQ_HANDLED; 155 return IRQ_HANDLED;
157} 156}
158 157
159static int sa1100_irq_set_freq(struct device *dev, int freq)
160{
161 if (freq < 1 || freq > timer_freq) {
162 return -EINVAL;
163 } else {
164 struct rtc_device *rtc = (struct rtc_device *)dev;
165
166 rtc->irq_freq = freq;
167
168 return 0;
169 }
170}
171
172static int rtc_timer1_count;
173
174static int sa1100_irq_set_state(struct device *dev, int enabled)
175{
176 spin_lock_irq(&sa1100_rtc_lock);
177 if (enabled) {
178 struct rtc_device *rtc = (struct rtc_device *)dev;
179
180 OSMR1 = timer_freq / rtc->irq_freq + OSCR;
181 OIER |= OIER_E1;
182 rtc_timer1_count = 1;
183 } else {
184 OIER &= ~OIER_E1;
185 }
186 spin_unlock_irq(&sa1100_rtc_lock);
187
188 return 0;
189}
190
191static inline int sa1100_timer1_retrigger(struct rtc_device *rtc)
192{
193 unsigned long diff;
194 unsigned long period = timer_freq / rtc->irq_freq;
195
196 spin_lock_irq(&sa1100_rtc_lock);
197
198 do {
199 OSMR1 += period;
200 diff = OSMR1 - OSCR;
201 /* If OSCR > OSMR1, diff is a very large number (unsigned
202 * math). This means we have a lost interrupt. */
203 } while (diff > period);
204 OIER |= OIER_E1;
205
206 spin_unlock_irq(&sa1100_rtc_lock);
207
208 return 0;
209}
210
211static irqreturn_t timer1_interrupt(int irq, void *dev_id)
212{
213 struct platform_device *pdev = to_platform_device(dev_id);
214 struct rtc_device *rtc = platform_get_drvdata(pdev);
215
216 /*
217 * If we match for the first time, rtc_timer1_count will be 1.
218 * Otherwise, we wrapped around (very unlikely but
219 * still possible) so compute the amount of missed periods.
220 * The match reg is updated only when the data is actually retrieved
221 * to avoid unnecessary interrupts.
222 */
223 OSSR = OSSR_M1; /* clear match on timer1 */
224
225 rtc_update_irq(rtc, rtc_timer1_count, RTC_PF | RTC_IRQF);
226
227 if (rtc_timer1_count == 1)
228 rtc_timer1_count =
229 (rtc->irq_freq * ((1 << 30) / (timer_freq >> 2)));
230
231 /* retrigger. */
232 sa1100_timer1_retrigger(rtc);
233
234 return IRQ_HANDLED;
235}
236
237static int sa1100_rtc_read_callback(struct device *dev, int data)
238{
239 if (data & RTC_PF) {
240 struct rtc_device *rtc = (struct rtc_device *)dev;
241
242 /* interpolate missed periods and set match for the next */
243 unsigned long period = timer_freq / rtc->irq_freq;
244 unsigned long oscr = OSCR;
245 unsigned long osmr1 = OSMR1;
246 unsigned long missed = (oscr - osmr1)/period;
247 data += missed << 8;
248 OSSR = OSSR_M1; /* clear match on timer 1 */
249 OSMR1 = osmr1 + (missed + 1)*period;
250 /* Ensure we didn't miss another match in the mean time.
251 * Here we compare (match - OSCR) 8 instead of 0 --
252 * see comment in pxa_timer_interrupt() for explanation.
253 */
254 while ((signed long)((osmr1 = OSMR1) - OSCR) <= 8) {
255 data += 0x100;
256 OSSR = OSSR_M1; /* clear match on timer 1 */
257 OSMR1 = osmr1 + period;
258 }
259 }
260 return data;
261}
262
263static int sa1100_rtc_open(struct device *dev) 158static int sa1100_rtc_open(struct device *dev)
264{ 159{
265 int ret; 160 int ret;
266 struct rtc_device *rtc = (struct rtc_device *)dev; 161 struct platform_device *plat_dev = to_platform_device(dev);
162 struct rtc_device *rtc = platform_get_drvdata(plat_dev);
267 163
268 ret = request_irq(IRQ_RTC1Hz, sa1100_rtc_interrupt, IRQF_DISABLED, 164 ret = request_irq(IRQ_RTC1Hz, sa1100_rtc_interrupt, IRQF_DISABLED,
269 "rtc 1Hz", dev); 165 "rtc 1Hz", dev);
@@ -277,19 +173,11 @@ static int sa1100_rtc_open(struct device *dev)
277 dev_err(dev, "IRQ %d already in use.\n", IRQ_RTCAlrm); 173 dev_err(dev, "IRQ %d already in use.\n", IRQ_RTCAlrm);
278 goto fail_ai; 174 goto fail_ai;
279 } 175 }
280 ret = request_irq(IRQ_OST1, timer1_interrupt, IRQF_DISABLED,
281 "rtc timer", dev);
282 if (ret) {
283 dev_err(dev, "IRQ %d already in use.\n", IRQ_OST1);
284 goto fail_pi;
285 }
286 rtc->max_user_freq = RTC_FREQ; 176 rtc->max_user_freq = RTC_FREQ;
287 sa1100_irq_set_freq(dev, RTC_FREQ); 177 rtc_irq_set_freq(rtc, NULL, RTC_FREQ);
288 178
289 return 0; 179 return 0;
290 180
291 fail_pi:
292 free_irq(IRQ_RTCAlrm, dev);
293 fail_ai: 181 fail_ai:
294 free_irq(IRQ_RTC1Hz, dev); 182 free_irq(IRQ_RTC1Hz, dev);
295 fail_ui: 183 fail_ui:
@@ -304,30 +192,10 @@ static void sa1100_rtc_release(struct device *dev)
304 OSSR = OSSR_M1; 192 OSSR = OSSR_M1;
305 spin_unlock_irq(&sa1100_rtc_lock); 193 spin_unlock_irq(&sa1100_rtc_lock);
306 194
307 free_irq(IRQ_OST1, dev);
308 free_irq(IRQ_RTCAlrm, dev); 195 free_irq(IRQ_RTCAlrm, dev);
309 free_irq(IRQ_RTC1Hz, dev); 196 free_irq(IRQ_RTC1Hz, dev);
310} 197}
311 198
312
313static int sa1100_rtc_ioctl(struct device *dev, unsigned int cmd,
314 unsigned long arg)
315{
316 switch (cmd) {
317 case RTC_UIE_OFF:
318 spin_lock_irq(&sa1100_rtc_lock);
319 RTSR &= ~RTSR_HZE;
320 spin_unlock_irq(&sa1100_rtc_lock);
321 return 0;
322 case RTC_UIE_ON:
323 spin_lock_irq(&sa1100_rtc_lock);
324 RTSR |= RTSR_HZE;
325 spin_unlock_irq(&sa1100_rtc_lock);
326 return 0;
327 }
328 return -ENOIOCTLCMD;
329}
330
331static int sa1100_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 199static int sa1100_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
332{ 200{
333 spin_lock_irq(&sa1100_rtc_lock); 201 spin_lock_irq(&sa1100_rtc_lock);
@@ -386,31 +254,20 @@ static int sa1100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
386 254
387static int sa1100_rtc_proc(struct device *dev, struct seq_file *seq) 255static int sa1100_rtc_proc(struct device *dev, struct seq_file *seq)
388{ 256{
389 struct rtc_device *rtc = (struct rtc_device *)dev; 257 seq_printf(seq, "trim/divider\t\t: 0x%08x\n", (u32) RTTR);
390 258 seq_printf(seq, "RTSR\t\t\t: 0x%08x\n", (u32)RTSR);
391 seq_printf(seq, "trim/divider\t: 0x%08x\n", (u32) RTTR);
392 seq_printf(seq, "update_IRQ\t: %s\n",
393 (RTSR & RTSR_HZE) ? "yes" : "no");
394 seq_printf(seq, "periodic_IRQ\t: %s\n",
395 (OIER & OIER_E1) ? "yes" : "no");
396 seq_printf(seq, "periodic_freq\t: %d\n", rtc->irq_freq);
397 seq_printf(seq, "RTSR\t\t: 0x%08x\n", (u32)RTSR);
398 259
399 return 0; 260 return 0;
400} 261}
401 262
402static const struct rtc_class_ops sa1100_rtc_ops = { 263static const struct rtc_class_ops sa1100_rtc_ops = {
403 .open = sa1100_rtc_open, 264 .open = sa1100_rtc_open,
404 .read_callback = sa1100_rtc_read_callback,
405 .release = sa1100_rtc_release, 265 .release = sa1100_rtc_release,
406 .ioctl = sa1100_rtc_ioctl,
407 .read_time = sa1100_rtc_read_time, 266 .read_time = sa1100_rtc_read_time,
408 .set_time = sa1100_rtc_set_time, 267 .set_time = sa1100_rtc_set_time,
409 .read_alarm = sa1100_rtc_read_alarm, 268 .read_alarm = sa1100_rtc_read_alarm,
410 .set_alarm = sa1100_rtc_set_alarm, 269 .set_alarm = sa1100_rtc_set_alarm,
411 .proc = sa1100_rtc_proc, 270 .proc = sa1100_rtc_proc,
412 .irq_set_freq = sa1100_irq_set_freq,
413 .irq_set_state = sa1100_irq_set_state,
414 .alarm_irq_enable = sa1100_rtc_alarm_irq_enable, 271 .alarm_irq_enable = sa1100_rtc_alarm_irq_enable,
415}; 272};
416 273
@@ -418,8 +275,6 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
418{ 275{
419 struct rtc_device *rtc; 276 struct rtc_device *rtc;
420 277
421 timer_freq = get_clock_tick_rate();
422
423 /* 278 /*
424 * According to the manual we should be able to let RTTR be zero 279 * According to the manual we should be able to let RTTR be zero
425 * and then a default diviser for a 32.768KHz clock is used. 280 * and then a default diviser for a 32.768KHz clock is used.
@@ -445,11 +300,6 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
445 300
446 platform_set_drvdata(pdev, rtc); 301 platform_set_drvdata(pdev, rtc);
447 302
448 /* Set the irq_freq */
449 /*TODO: Find out who is messing with this value after we initialize
450 * it here.*/
451 rtc->irq_freq = RTC_FREQ;
452
453 /* Fix for a nasty initialization problem the in SA11xx RTSR register. 303 /* Fix for a nasty initialization problem the in SA11xx RTSR register.
454 * See also the comments in sa1100_rtc_interrupt(). 304 * See also the comments in sa1100_rtc_interrupt().
455 * 305 *
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 93314a9e7fa9..e55dc1ac83ab 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -344,27 +344,6 @@ static inline void sh_rtc_setcie(struct device *dev, unsigned int enable)
344 spin_unlock_irq(&rtc->lock); 344 spin_unlock_irq(&rtc->lock);
345} 345}
346 346
347static int sh_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
348{
349 struct sh_rtc *rtc = dev_get_drvdata(dev);
350 unsigned int ret = 0;
351
352 switch (cmd) {
353 case RTC_UIE_OFF:
354 rtc->periodic_freq &= ~PF_OXS;
355 sh_rtc_setcie(dev, 0);
356 break;
357 case RTC_UIE_ON:
358 rtc->periodic_freq |= PF_OXS;
359 sh_rtc_setcie(dev, 1);
360 break;
361 default:
362 ret = -ENOIOCTLCMD;
363 }
364
365 return ret;
366}
367
368static int sh_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 347static int sh_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
369{ 348{
370 sh_rtc_setaie(dev, enabled); 349 sh_rtc_setaie(dev, enabled);
@@ -598,13 +577,10 @@ static int sh_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
598} 577}
599 578
600static struct rtc_class_ops sh_rtc_ops = { 579static struct rtc_class_ops sh_rtc_ops = {
601 .ioctl = sh_rtc_ioctl,
602 .read_time = sh_rtc_read_time, 580 .read_time = sh_rtc_read_time,
603 .set_time = sh_rtc_set_time, 581 .set_time = sh_rtc_set_time,
604 .read_alarm = sh_rtc_read_alarm, 582 .read_alarm = sh_rtc_read_alarm,
605 .set_alarm = sh_rtc_set_alarm, 583 .set_alarm = sh_rtc_set_alarm,
606 .irq_set_state = sh_rtc_irq_set_state,
607 .irq_set_freq = sh_rtc_irq_set_freq,
608 .proc = sh_rtc_proc, 584 .proc = sh_rtc_proc,
609 .alarm_irq_enable = sh_rtc_alarm_irq_enable, 585 .alarm_irq_enable = sh_rtc_alarm_irq_enable,
610}; 586};
diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c
index 7e7d0c806f2d..572e9534b591 100644
--- a/drivers/rtc/rtc-stmp3xxx.c
+++ b/drivers/rtc/rtc-stmp3xxx.c
@@ -115,19 +115,6 @@ static int stmp3xxx_alarm_irq_enable(struct device *dev, unsigned int enabled)
115 return 0; 115 return 0;
116} 116}
117 117
118static int stmp3xxx_update_irq_enable(struct device *dev, unsigned int enabled)
119{
120 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
121
122 if (enabled)
123 stmp3xxx_setl(BM_RTC_CTRL_ONEMSEC_IRQ_EN,
124 rtc_data->io + HW_RTC_CTRL);
125 else
126 stmp3xxx_clearl(BM_RTC_CTRL_ONEMSEC_IRQ_EN,
127 rtc_data->io + HW_RTC_CTRL);
128 return 0;
129}
130
131static int stmp3xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) 118static int stmp3xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
132{ 119{
133 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); 120 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
@@ -149,8 +136,6 @@ static int stmp3xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
149static struct rtc_class_ops stmp3xxx_rtc_ops = { 136static struct rtc_class_ops stmp3xxx_rtc_ops = {
150 .alarm_irq_enable = 137 .alarm_irq_enable =
151 stmp3xxx_alarm_irq_enable, 138 stmp3xxx_alarm_irq_enable,
152 .update_irq_enable =
153 stmp3xxx_update_irq_enable,
154 .read_time = stmp3xxx_rtc_gettime, 139 .read_time = stmp3xxx_rtc_gettime,
155 .set_mmss = stmp3xxx_rtc_set_mmss, 140 .set_mmss = stmp3xxx_rtc_set_mmss,
156 .read_alarm = stmp3xxx_rtc_read_alarm, 141 .read_alarm = stmp3xxx_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
index a82d6fe97076..7e96254bd365 100644
--- a/drivers/rtc/rtc-test.c
+++ b/drivers/rtc/rtc-test.c
@@ -78,11 +78,16 @@ static ssize_t test_irq_store(struct device *dev,
78 struct rtc_device *rtc = platform_get_drvdata(plat_dev); 78 struct rtc_device *rtc = platform_get_drvdata(plat_dev);
79 79
80 retval = count; 80 retval = count;
81 if (strncmp(buf, "tick", 4) == 0) 81 if (strncmp(buf, "tick", 4) == 0 && rtc->pie_enabled)
82 rtc_update_irq(rtc, 1, RTC_PF | RTC_IRQF); 82 rtc_update_irq(rtc, 1, RTC_PF | RTC_IRQF);
83 else if (strncmp(buf, "alarm", 5) == 0) 83 else if (strncmp(buf, "alarm", 5) == 0) {
84 rtc_update_irq(rtc, 1, RTC_AF | RTC_IRQF); 84 struct rtc_wkalrm alrm;
85 else if (strncmp(buf, "update", 6) == 0) 85 int err = rtc_read_alarm(rtc, &alrm);
86
87 if (!err && alrm.enabled)
88 rtc_update_irq(rtc, 1, RTC_AF | RTC_IRQF);
89
90 } else if (strncmp(buf, "update", 6) == 0 && rtc->uie_rtctimer.enabled)
86 rtc_update_irq(rtc, 1, RTC_UF | RTC_IRQF); 91 rtc_update_irq(rtc, 1, RTC_UF | RTC_IRQF);
87 else 92 else
88 retval = -EINVAL; 93 retval = -EINVAL;
diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index ed1b86828124..f9a2799c44d6 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -213,18 +213,6 @@ static int twl_rtc_alarm_irq_enable(struct device *dev, unsigned enabled)
213 return ret; 213 return ret;
214} 214}
215 215
216static int twl_rtc_update_irq_enable(struct device *dev, unsigned enabled)
217{
218 int ret;
219
220 if (enabled)
221 ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
222 else
223 ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
224
225 return ret;
226}
227
228/* 216/*
229 * Gets current TWL RTC time and date parameters. 217 * Gets current TWL RTC time and date parameters.
230 * 218 *
@@ -433,7 +421,6 @@ static struct rtc_class_ops twl_rtc_ops = {
433 .read_alarm = twl_rtc_read_alarm, 421 .read_alarm = twl_rtc_read_alarm,
434 .set_alarm = twl_rtc_set_alarm, 422 .set_alarm = twl_rtc_set_alarm,
435 .alarm_irq_enable = twl_rtc_alarm_irq_enable, 423 .alarm_irq_enable = twl_rtc_alarm_irq_enable,
436 .update_irq_enable = twl_rtc_update_irq_enable,
437}; 424};
438 425
439/*----------------------------------------------------------------------*/ 426/*----------------------------------------------------------------------*/
diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index 769190ac6d11..c5698cda366a 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -207,36 +207,6 @@ static int vr41xx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
207 return 0; 207 return 0;
208} 208}
209 209
210static int vr41xx_rtc_irq_set_freq(struct device *dev, int freq)
211{
212 u64 count;
213
214 if (!is_power_of_2(freq))
215 return -EINVAL;
216 count = RTC_FREQUENCY;
217 do_div(count, freq);
218
219 spin_lock_irq(&rtc_lock);
220
221 periodic_count = count;
222 rtc1_write(RTCL1LREG, periodic_count);
223 rtc1_write(RTCL1HREG, periodic_count >> 16);
224
225 spin_unlock_irq(&rtc_lock);
226
227 return 0;
228}
229
230static int vr41xx_rtc_irq_set_state(struct device *dev, int enabled)
231{
232 if (enabled)
233 enable_irq(pie_irq);
234 else
235 disable_irq(pie_irq);
236
237 return 0;
238}
239
240static int vr41xx_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) 210static int vr41xx_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
241{ 211{
242 switch (cmd) { 212 switch (cmd) {
@@ -308,8 +278,6 @@ static const struct rtc_class_ops vr41xx_rtc_ops = {
308 .set_time = vr41xx_rtc_set_time, 278 .set_time = vr41xx_rtc_set_time,
309 .read_alarm = vr41xx_rtc_read_alarm, 279 .read_alarm = vr41xx_rtc_read_alarm,
310 .set_alarm = vr41xx_rtc_set_alarm, 280 .set_alarm = vr41xx_rtc_set_alarm,
311 .irq_set_freq = vr41xx_rtc_irq_set_freq,
312 .irq_set_state = vr41xx_rtc_irq_set_state,
313}; 281};
314 282
315static int __devinit rtc_probe(struct platform_device *pdev) 283static int __devinit rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 82931dc65c0b..bdc909bd56da 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -315,21 +315,6 @@ static int wm831x_rtc_alarm_irq_enable(struct device *dev,
315 return wm831x_rtc_stop_alarm(wm831x_rtc); 315 return wm831x_rtc_stop_alarm(wm831x_rtc);
316} 316}
317 317
318static int wm831x_rtc_update_irq_enable(struct device *dev,
319 unsigned int enabled)
320{
321 struct wm831x_rtc *wm831x_rtc = dev_get_drvdata(dev);
322 int val;
323
324 if (enabled)
325 val = 1 << WM831X_RTC_PINT_FREQ_SHIFT;
326 else
327 val = 0;
328
329 return wm831x_set_bits(wm831x_rtc->wm831x, WM831X_RTC_CONTROL,
330 WM831X_RTC_PINT_FREQ_MASK, val);
331}
332
333static irqreturn_t wm831x_alm_irq(int irq, void *data) 318static irqreturn_t wm831x_alm_irq(int irq, void *data)
334{ 319{
335 struct wm831x_rtc *wm831x_rtc = data; 320 struct wm831x_rtc *wm831x_rtc = data;
@@ -354,7 +339,6 @@ static const struct rtc_class_ops wm831x_rtc_ops = {
354 .read_alarm = wm831x_rtc_readalarm, 339 .read_alarm = wm831x_rtc_readalarm,
355 .set_alarm = wm831x_rtc_setalarm, 340 .set_alarm = wm831x_rtc_setalarm,
356 .alarm_irq_enable = wm831x_rtc_alarm_irq_enable, 341 .alarm_irq_enable = wm831x_rtc_alarm_irq_enable,
357 .update_irq_enable = wm831x_rtc_update_irq_enable,
358}; 342};
359 343
360#ifdef CONFIG_PM 344#ifdef CONFIG_PM
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index 3d0dc76b38af..66421426e404 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -302,26 +302,6 @@ static int wm8350_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
302 return ret; 302 return ret;
303} 303}
304 304
305static int wm8350_rtc_update_irq_enable(struct device *dev,
306 unsigned int enabled)
307{
308 struct wm8350 *wm8350 = dev_get_drvdata(dev);
309
310 /* Suppress duplicate changes since genirq nests enable and
311 * disable calls. */
312 if (enabled == wm8350->rtc.update_enabled)
313 return 0;
314
315 if (enabled)
316 wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_SEC);
317 else
318 wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC);
319
320 wm8350->rtc.update_enabled = enabled;
321
322 return 0;
323}
324
325static irqreturn_t wm8350_rtc_alarm_handler(int irq, void *data) 305static irqreturn_t wm8350_rtc_alarm_handler(int irq, void *data)
326{ 306{
327 struct wm8350 *wm8350 = data; 307 struct wm8350 *wm8350 = data;
@@ -357,7 +337,6 @@ static const struct rtc_class_ops wm8350_rtc_ops = {
357 .read_alarm = wm8350_rtc_readalarm, 337 .read_alarm = wm8350_rtc_readalarm,
358 .set_alarm = wm8350_rtc_setalarm, 338 .set_alarm = wm8350_rtc_setalarm,
359 .alarm_irq_enable = wm8350_rtc_alarm_irq_enable, 339 .alarm_irq_enable = wm8350_rtc_alarm_irq_enable,
360 .update_irq_enable = wm8350_rtc_update_irq_enable,
361}; 340};
362 341
363#ifdef CONFIG_PM 342#ifdef CONFIG_PM
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 318672d05563..a9fe23d5bd0f 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -72,7 +72,7 @@ static struct dasd_discipline dasd_eckd_discipline;
72static struct ccw_device_id dasd_eckd_ids[] = { 72static struct ccw_device_id dasd_eckd_ids[] = {
73 { CCW_DEVICE_DEVTYPE (0x3990, 0, 0x3390, 0), .driver_info = 0x1}, 73 { CCW_DEVICE_DEVTYPE (0x3990, 0, 0x3390, 0), .driver_info = 0x1},
74 { CCW_DEVICE_DEVTYPE (0x2105, 0, 0x3390, 0), .driver_info = 0x2}, 74 { CCW_DEVICE_DEVTYPE (0x2105, 0, 0x3390, 0), .driver_info = 0x2},
75 { CCW_DEVICE_DEVTYPE (0x3880, 0, 0x3390, 0), .driver_info = 0x3}, 75 { CCW_DEVICE_DEVTYPE (0x3880, 0, 0x3380, 0), .driver_info = 0x3},
76 { CCW_DEVICE_DEVTYPE (0x3990, 0, 0x3380, 0), .driver_info = 0x4}, 76 { CCW_DEVICE_DEVTYPE (0x3990, 0, 0x3380, 0), .driver_info = 0x4},
77 { CCW_DEVICE_DEVTYPE (0x2105, 0, 0x3380, 0), .driver_info = 0x5}, 77 { CCW_DEVICE_DEVTYPE (0x2105, 0, 0x3380, 0), .driver_info = 0x5},
78 { CCW_DEVICE_DEVTYPE (0x9343, 0, 0x9345, 0), .driver_info = 0x6}, 78 { CCW_DEVICE_DEVTYPE (0x9343, 0, 0x9345, 0), .driver_info = 0x6},
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index c881a14fa5dd..1f6a4d894e73 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -62,8 +62,8 @@ static int xpram_devs;
62/* 62/*
63 * Parameter parsing functions. 63 * Parameter parsing functions.
64 */ 64 */
65static int __initdata devs = XPRAM_DEVS; 65static int devs = XPRAM_DEVS;
66static char __initdata *sizes[XPRAM_MAX_DEVS]; 66static char *sizes[XPRAM_MAX_DEVS];
67 67
68module_param(devs, int, 0); 68module_param(devs, int, 0);
69module_param_array(sizes, charp, NULL, 0); 69module_param_array(sizes, charp, NULL, 0);
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 8cd58e412b5e..5ad44daef73b 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -460,7 +460,8 @@ kbd_ioctl(struct kbd_data *kbd, struct file *file,
460 unsigned int cmd, unsigned long arg) 460 unsigned int cmd, unsigned long arg)
461{ 461{
462 void __user *argp; 462 void __user *argp;
463 int ct, perm; 463 unsigned int ct;
464 int perm;
464 465
465 argp = (void __user *)arg; 466 argp = (void __user *)arg;
466 467
diff --git a/drivers/s390/char/tape.h b/drivers/s390/char/tape.h
index 7a242f073632..267b54e8ff5a 100644
--- a/drivers/s390/char/tape.h
+++ b/drivers/s390/char/tape.h
@@ -280,6 +280,14 @@ tape_do_io_free(struct tape_device *device, struct tape_request *request)
280 return rc; 280 return rc;
281} 281}
282 282
283static inline void
284tape_do_io_async_free(struct tape_device *device, struct tape_request *request)
285{
286 request->callback = (void *) tape_free_request;
287 request->callback_data = NULL;
288 tape_do_io_async(device, request);
289}
290
283extern int tape_oper_handler(int irq, int status); 291extern int tape_oper_handler(int irq, int status);
284extern void tape_noper_handler(int irq, int status); 292extern void tape_noper_handler(int irq, int status);
285extern int tape_open(struct tape_device *); 293extern int tape_open(struct tape_device *);
diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index c17f35b6136a..c26511171ffe 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c
@@ -53,23 +53,11 @@ static void tape_34xx_delete_sbid_from(struct tape_device *, int);
53 * Medium sense for 34xx tapes. There is no 'real' medium sense call. 53 * Medium sense for 34xx tapes. There is no 'real' medium sense call.
54 * So we just do a normal sense. 54 * So we just do a normal sense.
55 */ 55 */
56static int 56static void __tape_34xx_medium_sense(struct tape_request *request)
57tape_34xx_medium_sense(struct tape_device *device)
58{ 57{
59 struct tape_request *request; 58 struct tape_device *device = request->device;
60 unsigned char *sense; 59 unsigned char *sense;
61 int rc;
62
63 request = tape_alloc_request(1, 32);
64 if (IS_ERR(request)) {
65 DBF_EXCEPTION(6, "MSEN fail\n");
66 return PTR_ERR(request);
67 }
68
69 request->op = TO_MSEN;
70 tape_ccw_end(request->cpaddr, SENSE, 32, request->cpdata);
71 60
72 rc = tape_do_io_interruptible(device, request);
73 if (request->rc == 0) { 61 if (request->rc == 0) {
74 sense = request->cpdata; 62 sense = request->cpdata;
75 63
@@ -88,15 +76,47 @@ tape_34xx_medium_sense(struct tape_device *device)
88 device->tape_generic_status |= GMT_WR_PROT(~0); 76 device->tape_generic_status |= GMT_WR_PROT(~0);
89 else 77 else
90 device->tape_generic_status &= ~GMT_WR_PROT(~0); 78 device->tape_generic_status &= ~GMT_WR_PROT(~0);
91 } else { 79 } else
92 DBF_EVENT(4, "tape_34xx: medium sense failed with rc=%d\n", 80 DBF_EVENT(4, "tape_34xx: medium sense failed with rc=%d\n",
93 request->rc); 81 request->rc);
94 }
95 tape_free_request(request); 82 tape_free_request(request);
83}
84
85static int tape_34xx_medium_sense(struct tape_device *device)
86{
87 struct tape_request *request;
88 int rc;
89
90 request = tape_alloc_request(1, 32);
91 if (IS_ERR(request)) {
92 DBF_EXCEPTION(6, "MSEN fail\n");
93 return PTR_ERR(request);
94 }
96 95
96 request->op = TO_MSEN;
97 tape_ccw_end(request->cpaddr, SENSE, 32, request->cpdata);
98 rc = tape_do_io_interruptible(device, request);
99 __tape_34xx_medium_sense(request);
97 return rc; 100 return rc;
98} 101}
99 102
103static void tape_34xx_medium_sense_async(struct tape_device *device)
104{
105 struct tape_request *request;
106
107 request = tape_alloc_request(1, 32);
108 if (IS_ERR(request)) {
109 DBF_EXCEPTION(6, "MSEN fail\n");
110 return;
111 }
112
113 request->op = TO_MSEN;
114 tape_ccw_end(request->cpaddr, SENSE, 32, request->cpdata);
115 request->callback = (void *) __tape_34xx_medium_sense;
116 request->callback_data = NULL;
117 tape_do_io_async(device, request);
118}
119
100struct tape_34xx_work { 120struct tape_34xx_work {
101 struct tape_device *device; 121 struct tape_device *device;
102 enum tape_op op; 122 enum tape_op op;
@@ -109,6 +129,9 @@ struct tape_34xx_work {
109 * is inserted but cannot call tape_do_io* from an interrupt context. 129 * is inserted but cannot call tape_do_io* from an interrupt context.
110 * Maybe that's useful for other actions we want to start from the 130 * Maybe that's useful for other actions we want to start from the
111 * interrupt handler. 131 * interrupt handler.
132 * Note: the work handler is called by the system work queue. The tape
133 * commands started by the handler need to be asynchrounous, otherwise
134 * a deadlock can occur e.g. in case of a deferred cc=1 (see __tape_do_irq).
112 */ 135 */
113static void 136static void
114tape_34xx_work_handler(struct work_struct *work) 137tape_34xx_work_handler(struct work_struct *work)
@@ -119,7 +142,7 @@ tape_34xx_work_handler(struct work_struct *work)
119 142
120 switch(p->op) { 143 switch(p->op) {
121 case TO_MSEN: 144 case TO_MSEN:
122 tape_34xx_medium_sense(device); 145 tape_34xx_medium_sense_async(device);
123 break; 146 break;
124 default: 147 default:
125 DBF_EVENT(3, "T34XX: internal error: unknown work\n"); 148 DBF_EVENT(3, "T34XX: internal error: unknown work\n");
diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index fbe361fcd2c0..de2e99e0a71b 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -329,17 +329,17 @@ out:
329/* 329/*
330 * Enable encryption 330 * Enable encryption
331 */ 331 */
332static int tape_3592_enable_crypt(struct tape_device *device) 332static struct tape_request *__tape_3592_enable_crypt(struct tape_device *device)
333{ 333{
334 struct tape_request *request; 334 struct tape_request *request;
335 char *data; 335 char *data;
336 336
337 DBF_EVENT(6, "tape_3592_enable_crypt\n"); 337 DBF_EVENT(6, "tape_3592_enable_crypt\n");
338 if (!crypt_supported(device)) 338 if (!crypt_supported(device))
339 return -ENOSYS; 339 return ERR_PTR(-ENOSYS);
340 request = tape_alloc_request(2, 72); 340 request = tape_alloc_request(2, 72);
341 if (IS_ERR(request)) 341 if (IS_ERR(request))
342 return PTR_ERR(request); 342 return request;
343 data = request->cpdata; 343 data = request->cpdata;
344 memset(data,0,72); 344 memset(data,0,72);
345 345
@@ -354,23 +354,42 @@ static int tape_3592_enable_crypt(struct tape_device *device)
354 request->op = TO_CRYPT_ON; 354 request->op = TO_CRYPT_ON;
355 tape_ccw_cc(request->cpaddr, MODE_SET_CB, 36, data); 355 tape_ccw_cc(request->cpaddr, MODE_SET_CB, 36, data);
356 tape_ccw_end(request->cpaddr + 1, MODE_SET_CB, 36, data + 36); 356 tape_ccw_end(request->cpaddr + 1, MODE_SET_CB, 36, data + 36);
357 return request;
358}
359
360static int tape_3592_enable_crypt(struct tape_device *device)
361{
362 struct tape_request *request;
363
364 request = __tape_3592_enable_crypt(device);
365 if (IS_ERR(request))
366 return PTR_ERR(request);
357 return tape_do_io_free(device, request); 367 return tape_do_io_free(device, request);
358} 368}
359 369
370static void tape_3592_enable_crypt_async(struct tape_device *device)
371{
372 struct tape_request *request;
373
374 request = __tape_3592_enable_crypt(device);
375 if (!IS_ERR(request))
376 tape_do_io_async_free(device, request);
377}
378
360/* 379/*
361 * Disable encryption 380 * Disable encryption
362 */ 381 */
363static int tape_3592_disable_crypt(struct tape_device *device) 382static struct tape_request *__tape_3592_disable_crypt(struct tape_device *device)
364{ 383{
365 struct tape_request *request; 384 struct tape_request *request;
366 char *data; 385 char *data;
367 386
368 DBF_EVENT(6, "tape_3592_disable_crypt\n"); 387 DBF_EVENT(6, "tape_3592_disable_crypt\n");
369 if (!crypt_supported(device)) 388 if (!crypt_supported(device))
370 return -ENOSYS; 389 return ERR_PTR(-ENOSYS);
371 request = tape_alloc_request(2, 72); 390 request = tape_alloc_request(2, 72);
372 if (IS_ERR(request)) 391 if (IS_ERR(request))
373 return PTR_ERR(request); 392 return request;
374 data = request->cpdata; 393 data = request->cpdata;
375 memset(data,0,72); 394 memset(data,0,72);
376 395
@@ -383,9 +402,28 @@ static int tape_3592_disable_crypt(struct tape_device *device)
383 tape_ccw_cc(request->cpaddr, MODE_SET_CB, 36, data); 402 tape_ccw_cc(request->cpaddr, MODE_SET_CB, 36, data);
384 tape_ccw_end(request->cpaddr + 1, MODE_SET_CB, 36, data + 36); 403 tape_ccw_end(request->cpaddr + 1, MODE_SET_CB, 36, data + 36);
385 404
405 return request;
406}
407
408static int tape_3592_disable_crypt(struct tape_device *device)
409{
410 struct tape_request *request;
411
412 request = __tape_3592_disable_crypt(device);
413 if (IS_ERR(request))
414 return PTR_ERR(request);
386 return tape_do_io_free(device, request); 415 return tape_do_io_free(device, request);
387} 416}
388 417
418static void tape_3592_disable_crypt_async(struct tape_device *device)
419{
420 struct tape_request *request;
421
422 request = __tape_3592_disable_crypt(device);
423 if (!IS_ERR(request))
424 tape_do_io_async_free(device, request);
425}
426
389/* 427/*
390 * IOCTL: Set encryption status 428 * IOCTL: Set encryption status
391 */ 429 */
@@ -457,8 +495,7 @@ tape_3590_ioctl(struct tape_device *device, unsigned int cmd, unsigned long arg)
457/* 495/*
458 * SENSE Medium: Get Sense data about medium state 496 * SENSE Medium: Get Sense data about medium state
459 */ 497 */
460static int 498static int tape_3590_sense_medium(struct tape_device *device)
461tape_3590_sense_medium(struct tape_device *device)
462{ 499{
463 struct tape_request *request; 500 struct tape_request *request;
464 501
@@ -470,6 +507,18 @@ tape_3590_sense_medium(struct tape_device *device)
470 return tape_do_io_free(device, request); 507 return tape_do_io_free(device, request);
471} 508}
472 509
510static void tape_3590_sense_medium_async(struct tape_device *device)
511{
512 struct tape_request *request;
513
514 request = tape_alloc_request(1, 128);
515 if (IS_ERR(request))
516 return;
517 request->op = TO_MSEN;
518 tape_ccw_end(request->cpaddr, MEDIUM_SENSE, 128, request->cpdata);
519 tape_do_io_async_free(device, request);
520}
521
473/* 522/*
474 * MTTELL: Tell block. Return the number of block relative to current file. 523 * MTTELL: Tell block. Return the number of block relative to current file.
475 */ 524 */
@@ -546,15 +595,14 @@ tape_3590_read_opposite(struct tape_device *device,
546 * 2. The attention msg is written to the "read subsystem data" buffer. 595 * 2. The attention msg is written to the "read subsystem data" buffer.
547 * In this case we probably should print it to the console. 596 * In this case we probably should print it to the console.
548 */ 597 */
549static int 598static void tape_3590_read_attmsg_async(struct tape_device *device)
550tape_3590_read_attmsg(struct tape_device *device)
551{ 599{
552 struct tape_request *request; 600 struct tape_request *request;
553 char *buf; 601 char *buf;
554 602
555 request = tape_alloc_request(3, 4096); 603 request = tape_alloc_request(3, 4096);
556 if (IS_ERR(request)) 604 if (IS_ERR(request))
557 return PTR_ERR(request); 605 return;
558 request->op = TO_READ_ATTMSG; 606 request->op = TO_READ_ATTMSG;
559 buf = request->cpdata; 607 buf = request->cpdata;
560 buf[0] = PREP_RD_SS_DATA; 608 buf[0] = PREP_RD_SS_DATA;
@@ -562,12 +610,15 @@ tape_3590_read_attmsg(struct tape_device *device)
562 tape_ccw_cc(request->cpaddr, PERFORM_SS_FUNC, 12, buf); 610 tape_ccw_cc(request->cpaddr, PERFORM_SS_FUNC, 12, buf);
563 tape_ccw_cc(request->cpaddr + 1, READ_SS_DATA, 4096 - 12, buf + 12); 611 tape_ccw_cc(request->cpaddr + 1, READ_SS_DATA, 4096 - 12, buf + 12);
564 tape_ccw_end(request->cpaddr + 2, NOP, 0, NULL); 612 tape_ccw_end(request->cpaddr + 2, NOP, 0, NULL);
565 return tape_do_io_free(device, request); 613 tape_do_io_async_free(device, request);
566} 614}
567 615
568/* 616/*
569 * These functions are used to schedule follow-up actions from within an 617 * These functions are used to schedule follow-up actions from within an
570 * interrupt context (like unsolicited interrupts). 618 * interrupt context (like unsolicited interrupts).
619 * Note: the work handler is called by the system work queue. The tape
620 * commands started by the handler need to be asynchrounous, otherwise
621 * a deadlock can occur e.g. in case of a deferred cc=1 (see __tape_do_irq).
571 */ 622 */
572struct work_handler_data { 623struct work_handler_data {
573 struct tape_device *device; 624 struct tape_device *device;
@@ -583,16 +634,16 @@ tape_3590_work_handler(struct work_struct *work)
583 634
584 switch (p->op) { 635 switch (p->op) {
585 case TO_MSEN: 636 case TO_MSEN:
586 tape_3590_sense_medium(p->device); 637 tape_3590_sense_medium_async(p->device);
587 break; 638 break;
588 case TO_READ_ATTMSG: 639 case TO_READ_ATTMSG:
589 tape_3590_read_attmsg(p->device); 640 tape_3590_read_attmsg_async(p->device);
590 break; 641 break;
591 case TO_CRYPT_ON: 642 case TO_CRYPT_ON:
592 tape_3592_enable_crypt(p->device); 643 tape_3592_enable_crypt_async(p->device);
593 break; 644 break;
594 case TO_CRYPT_OFF: 645 case TO_CRYPT_OFF:
595 tape_3592_disable_crypt(p->device); 646 tape_3592_disable_crypt_async(p->device);
596 break; 647 break;
597 default: 648 default:
598 DBF_EVENT(3, "T3590: work handler undefined for " 649 DBF_EVENT(3, "T3590: work handler undefined for "
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 2e9a87e8e7d8..ef6de669424b 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -165,7 +165,7 @@ scsi_mod-$(CONFIG_SCSI_NETLINK) += scsi_netlink.o
165scsi_mod-$(CONFIG_SYSCTL) += scsi_sysctl.o 165scsi_mod-$(CONFIG_SYSCTL) += scsi_sysctl.o
166scsi_mod-$(CONFIG_SCSI_PROC_FS) += scsi_proc.o 166scsi_mod-$(CONFIG_SCSI_PROC_FS) += scsi_proc.o
167scsi_mod-y += scsi_trace.o 167scsi_mod-y += scsi_trace.o
168scsi_mod-$(CONFIG_PM_OPS) += scsi_pm.o 168scsi_mod-$(CONFIG_PM) += scsi_pm.o
169 169
170scsi_tgt-y += scsi_tgt_lib.o scsi_tgt_if.o 170scsi_tgt-y += scsi_tgt_lib.o scsi_tgt_if.o
171 171
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 79cefbe31367..638c72b7f94a 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -4277,7 +4277,7 @@ static int __devinit beiscsi_dev_probe(struct pci_dev *pcidev,
4277 4277
4278 snprintf(phba->wq_name, sizeof(phba->wq_name), "beiscsi_q_irq%u", 4278 snprintf(phba->wq_name, sizeof(phba->wq_name), "beiscsi_q_irq%u",
4279 phba->shost->host_no); 4279 phba->shost->host_no);
4280 phba->wq = create_workqueue(phba->wq_name); 4280 phba->wq = alloc_workqueue(phba->wq_name, WQ_MEM_RECLAIM, 1);
4281 if (!phba->wq) { 4281 if (!phba->wq) {
4282 shost_printk(KERN_ERR, phba->shost, "beiscsi_dev_probe-" 4282 shost_printk(KERN_ERR, phba->shost, "beiscsi_dev_probe-"
4283 "Failed to allocate work queue\n"); 4283 "Failed to allocate work queue\n");
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 9c5c8be72231..d841e98a8bd5 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6219,11 +6219,10 @@ static struct ata_port_operations ipr_sata_ops = {
6219}; 6219};
6220 6220
6221static struct ata_port_info sata_port_info = { 6221static struct ata_port_info sata_port_info = {
6222 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_SATA_RESET | 6222 .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
6223 ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA, 6223 .pio_mask = ATA_PIO4_ONLY,
6224 .pio_mask = 0x10, /* pio4 */ 6224 .mwdma_mask = ATA_MWDMA2,
6225 .mwdma_mask = 0x07, 6225 .udma_mask = ATA_UDMA6,
6226 .udma_mask = 0x7f, /* udma0-6 */
6227 .port_ops = &ipr_sata_ops 6226 .port_ops = &ipr_sata_ops
6228}; 6227};
6229 6228
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index e1a395b438ee..4d3b704ede1c 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -238,37 +238,43 @@ static bool sas_ata_qc_fill_rtf(struct ata_queued_cmd *qc)
238 return true; 238 return true;
239} 239}
240 240
241static void sas_ata_phy_reset(struct ata_port *ap) 241static int sas_ata_hard_reset(struct ata_link *link, unsigned int *class,
242 unsigned long deadline)
242{ 243{
244 struct ata_port *ap = link->ap;
243 struct domain_device *dev = ap->private_data; 245 struct domain_device *dev = ap->private_data;
244 struct sas_internal *i = 246 struct sas_internal *i =
245 to_sas_internal(dev->port->ha->core.shost->transportt); 247 to_sas_internal(dev->port->ha->core.shost->transportt);
246 int res = TMF_RESP_FUNC_FAILED; 248 int res = TMF_RESP_FUNC_FAILED;
249 int ret = 0;
247 250
248 if (i->dft->lldd_I_T_nexus_reset) 251 if (i->dft->lldd_I_T_nexus_reset)
249 res = i->dft->lldd_I_T_nexus_reset(dev); 252 res = i->dft->lldd_I_T_nexus_reset(dev);
250 253
251 if (res != TMF_RESP_FUNC_COMPLETE) 254 if (res != TMF_RESP_FUNC_COMPLETE) {
252 SAS_DPRINTK("%s: Unable to reset I T nexus?\n", __func__); 255 SAS_DPRINTK("%s: Unable to reset I T nexus?\n", __func__);
256 ret = -EAGAIN;
257 }
253 258
254 switch (dev->sata_dev.command_set) { 259 switch (dev->sata_dev.command_set) {
255 case ATA_COMMAND_SET: 260 case ATA_COMMAND_SET:
256 SAS_DPRINTK("%s: Found ATA device.\n", __func__); 261 SAS_DPRINTK("%s: Found ATA device.\n", __func__);
257 ap->link.device[0].class = ATA_DEV_ATA; 262 *class = ATA_DEV_ATA;
258 break; 263 break;
259 case ATAPI_COMMAND_SET: 264 case ATAPI_COMMAND_SET:
260 SAS_DPRINTK("%s: Found ATAPI device.\n", __func__); 265 SAS_DPRINTK("%s: Found ATAPI device.\n", __func__);
261 ap->link.device[0].class = ATA_DEV_ATAPI; 266 *class = ATA_DEV_ATAPI;
262 break; 267 break;
263 default: 268 default:
264 SAS_DPRINTK("%s: Unknown SATA command set: %d.\n", 269 SAS_DPRINTK("%s: Unknown SATA command set: %d.\n",
265 __func__, 270 __func__,
266 dev->sata_dev.command_set); 271 dev->sata_dev.command_set);
267 ap->link.device[0].class = ATA_DEV_UNKNOWN; 272 *class = ATA_DEV_UNKNOWN;
268 break; 273 break;
269 } 274 }
270 275
271 ap->cbl = ATA_CBL_SATA; 276 ap->cbl = ATA_CBL_SATA;
277 return ret;
272} 278}
273 279
274static void sas_ata_post_internal(struct ata_queued_cmd *qc) 280static void sas_ata_post_internal(struct ata_queued_cmd *qc)
@@ -349,7 +355,11 @@ static int sas_ata_scr_read(struct ata_link *link, unsigned int sc_reg_in,
349} 355}
350 356
351static struct ata_port_operations sas_sata_ops = { 357static struct ata_port_operations sas_sata_ops = {
352 .phy_reset = sas_ata_phy_reset, 358 .prereset = ata_std_prereset,
359 .softreset = NULL,
360 .hardreset = sas_ata_hard_reset,
361 .postreset = ata_std_postreset,
362 .error_handler = ata_std_error_handler,
353 .post_internal_cmd = sas_ata_post_internal, 363 .post_internal_cmd = sas_ata_post_internal,
354 .qc_defer = ata_std_qc_defer, 364 .qc_defer = ata_std_qc_defer,
355 .qc_prep = ata_noop_qc_prep, 365 .qc_prep = ata_noop_qc_prep,
@@ -362,10 +372,9 @@ static struct ata_port_operations sas_sata_ops = {
362}; 372};
363 373
364static struct ata_port_info sata_port_info = { 374static struct ata_port_info sata_port_info = {
365 .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_SATA_RESET | 375 .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
366 ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ, 376 .pio_mask = ATA_PIO4,
367 .pio_mask = 0x1f, /* PIO0-4 */ 377 .mwdma_mask = ATA_MWDMA2,
368 .mwdma_mask = 0x07, /* MWDMA0-2 */
369 .udma_mask = ATA_UDMA6, 378 .udma_mask = ATA_UDMA6,
370 .port_ops = &sas_sata_ops 379 .port_ops = &sas_sata_ops
371}; 380};
@@ -781,3 +790,68 @@ int sas_discover_sata(struct domain_device *dev)
781 790
782 return res; 791 return res;
783} 792}
793
794void sas_ata_strategy_handler(struct Scsi_Host *shost)
795{
796 struct scsi_device *sdev;
797
798 shost_for_each_device(sdev, shost) {
799 struct domain_device *ddev = sdev_to_domain_dev(sdev);
800 struct ata_port *ap = ddev->sata_dev.ap;
801
802 if (!dev_is_sata(ddev))
803 continue;
804
805 ata_port_printk(ap, KERN_DEBUG, "sas eh calling libata port error handler");
806 ata_scsi_port_error_handler(shost, ap);
807 }
808}
809
810int sas_ata_timed_out(struct scsi_cmnd *cmd, struct sas_task *task,
811 enum blk_eh_timer_return *rtn)
812{
813 struct domain_device *ddev = cmd_to_domain_dev(cmd);
814
815 if (!dev_is_sata(ddev) || task)
816 return 0;
817
818 /* we're a sata device with no task, so this must be a libata
819 * eh timeout. Ideally should hook into libata timeout
820 * handling, but there's no point, it just wants to activate
821 * the eh thread */
822 *rtn = BLK_EH_NOT_HANDLED;
823 return 1;
824}
825
826int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
827 struct list_head *done_q)
828{
829 int rtn = 0;
830 struct scsi_cmnd *cmd, *n;
831 struct ata_port *ap;
832
833 do {
834 LIST_HEAD(sata_q);
835
836 ap = NULL;
837
838 list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
839 struct domain_device *ddev = cmd_to_domain_dev(cmd);
840
841 if (!dev_is_sata(ddev) || TO_SAS_TASK(cmd))
842 continue;
843 if(ap && ap != ddev->sata_dev.ap)
844 continue;
845 ap = ddev->sata_dev.ap;
846 rtn = 1;
847 list_move(&cmd->eh_entry, &sata_q);
848 }
849
850 if (!list_empty(&sata_q)) {
851 ata_port_printk(ap, KERN_DEBUG,"sas eh calling libata cmd error handler\n");
852 ata_scsi_cmd_error_handler(shost, ap, &sata_q);
853 }
854 } while (ap);
855
856 return rtn;
857}
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 9a7aaf5f1311..67758ea8eb7f 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -663,11 +663,16 @@ void sas_scsi_recover_host(struct Scsi_Host *shost)
663 * scsi_unjam_host does, but we skip scsi_eh_abort_cmds because any 663 * scsi_unjam_host does, but we skip scsi_eh_abort_cmds because any
664 * command we see here has no sas_task and is thus unknown to the HA. 664 * command we see here has no sas_task and is thus unknown to the HA.
665 */ 665 */
666 if (!scsi_eh_get_sense(&eh_work_q, &ha->eh_done_q)) 666 if (!sas_ata_eh(shost, &eh_work_q, &ha->eh_done_q))
667 scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q); 667 if (!scsi_eh_get_sense(&eh_work_q, &ha->eh_done_q))
668 scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q);
668 669
669out: 670out:
671 /* now link into libata eh --- if we have any ata devices */
672 sas_ata_strategy_handler(shost);
673
670 scsi_eh_flush_done_q(&ha->eh_done_q); 674 scsi_eh_flush_done_q(&ha->eh_done_q);
675
671 SAS_DPRINTK("--- Exit %s\n", __func__); 676 SAS_DPRINTK("--- Exit %s\n", __func__);
672 return; 677 return;
673} 678}
@@ -676,6 +681,11 @@ enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
676{ 681{
677 struct sas_task *task = TO_SAS_TASK(cmd); 682 struct sas_task *task = TO_SAS_TASK(cmd);
678 unsigned long flags; 683 unsigned long flags;
684 enum blk_eh_timer_return rtn;
685
686 if (sas_ata_timed_out(cmd, task, &rtn))
687 return rtn;
688
679 689
680 if (!task) { 690 if (!task) {
681 cmd->request->timeout /= 2; 691 cmd->request->timeout /= 2;
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 44578b56ad0a..d3e58d763b43 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1561,6 +1561,7 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
1561{ 1561{
1562 struct Scsi_Host *host = rport_to_shost(rport); 1562 struct Scsi_Host *host = rport_to_shost(rport);
1563 fc_port_t *fcport = *(fc_port_t **)rport->dd_data; 1563 fc_port_t *fcport = *(fc_port_t **)rport->dd_data;
1564 unsigned long flags;
1564 1565
1565 if (!fcport) 1566 if (!fcport)
1566 return; 1567 return;
@@ -1573,10 +1574,10 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
1573 * Transport has effectively 'deleted' the rport, clear 1574 * Transport has effectively 'deleted' the rport, clear
1574 * all local references. 1575 * all local references.
1575 */ 1576 */
1576 spin_lock_irq(host->host_lock); 1577 spin_lock_irqsave(host->host_lock, flags);
1577 fcport->rport = fcport->drport = NULL; 1578 fcport->rport = fcport->drport = NULL;
1578 *((fc_port_t **)rport->dd_data) = NULL; 1579 *((fc_port_t **)rport->dd_data) = NULL;
1579 spin_unlock_irq(host->host_lock); 1580 spin_unlock_irqrestore(host->host_lock, flags);
1580 1581
1581 if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags)) 1582 if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
1582 return; 1583 return;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index f948e1a73aec..d9479c3fe5f8 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2505,11 +2505,12 @@ qla2x00_rport_del(void *data)
2505{ 2505{
2506 fc_port_t *fcport = data; 2506 fc_port_t *fcport = data;
2507 struct fc_rport *rport; 2507 struct fc_rport *rport;
2508 unsigned long flags;
2508 2509
2509 spin_lock_irq(fcport->vha->host->host_lock); 2510 spin_lock_irqsave(fcport->vha->host->host_lock, flags);
2510 rport = fcport->drport ? fcport->drport: fcport->rport; 2511 rport = fcport->drport ? fcport->drport: fcport->rport;
2511 fcport->drport = NULL; 2512 fcport->drport = NULL;
2512 spin_unlock_irq(fcport->vha->host->host_lock); 2513 spin_unlock_irqrestore(fcport->vha->host->host_lock, flags);
2513 if (rport) 2514 if (rport)
2514 fc_remote_port_delete(rport); 2515 fc_remote_port_delete(rport);
2515} 2516}
@@ -2879,6 +2880,7 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
2879 struct fc_rport_identifiers rport_ids; 2880 struct fc_rport_identifiers rport_ids;
2880 struct fc_rport *rport; 2881 struct fc_rport *rport;
2881 struct qla_hw_data *ha = vha->hw; 2882 struct qla_hw_data *ha = vha->hw;
2883 unsigned long flags;
2882 2884
2883 qla2x00_rport_del(fcport); 2885 qla2x00_rport_del(fcport);
2884 2886
@@ -2893,9 +2895,9 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
2893 "Unable to allocate fc remote port!\n"); 2895 "Unable to allocate fc remote port!\n");
2894 return; 2896 return;
2895 } 2897 }
2896 spin_lock_irq(fcport->vha->host->host_lock); 2898 spin_lock_irqsave(fcport->vha->host->host_lock, flags);
2897 *((fc_port_t **)rport->dd_data) = fcport; 2899 *((fc_port_t **)rport->dd_data) = fcport;
2898 spin_unlock_irq(fcport->vha->host->host_lock); 2900 spin_unlock_irqrestore(fcport->vha->host->host_lock, flags);
2899 2901
2900 rport->supported_classes = fcport->supported_classes; 2902 rport->supported_classes = fcport->supported_classes;
2901 2903
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index c194c23ca1fb..e90f7c16b956 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -349,7 +349,7 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha)
349 "Can't create request queue\n"); 349 "Can't create request queue\n");
350 goto fail; 350 goto fail;
351 } 351 }
352 ha->wq = create_workqueue("qla2xxx_wq"); 352 ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 1);
353 vha->req = ha->req_q_map[req]; 353 vha->req = ha->req_q_map[req];
354 options |= BIT_1; 354 options |= BIT_1;
355 for (ques = 1; ques < ha->max_rsp_queues; ques++) { 355 for (ques = 1; ques < ha->max_rsp_queues; ques++) {
@@ -562,7 +562,6 @@ qla2xxx_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)
562 } 562 }
563 if (atomic_read(&fcport->state) != FCS_ONLINE) { 563 if (atomic_read(&fcport->state) != FCS_ONLINE) {
564 if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD || 564 if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD ||
565 atomic_read(&fcport->state) == FCS_DEVICE_LOST ||
566 atomic_read(&base_vha->loop_state) == LOOP_DEAD) { 565 atomic_read(&base_vha->loop_state) == LOOP_DEAD) {
567 cmd->result = DID_NO_CONNECT << 16; 566 cmd->result = DID_NO_CONNECT << 16;
568 goto qc24_fail_command; 567 goto qc24_fail_command;
@@ -2513,6 +2512,7 @@ qla2x00_schedule_rport_del(struct scsi_qla_host *vha, fc_port_t *fcport,
2513{ 2512{
2514 struct fc_rport *rport; 2513 struct fc_rport *rport;
2515 scsi_qla_host_t *base_vha; 2514 scsi_qla_host_t *base_vha;
2515 unsigned long flags;
2516 2516
2517 if (!fcport->rport) 2517 if (!fcport->rport)
2518 return; 2518 return;
@@ -2520,9 +2520,9 @@ qla2x00_schedule_rport_del(struct scsi_qla_host *vha, fc_port_t *fcport,
2520 rport = fcport->rport; 2520 rport = fcport->rport;
2521 if (defer) { 2521 if (defer) {
2522 base_vha = pci_get_drvdata(vha->hw->pdev); 2522 base_vha = pci_get_drvdata(vha->hw->pdev);
2523 spin_lock_irq(vha->host->host_lock); 2523 spin_lock_irqsave(vha->host->host_lock, flags);
2524 fcport->drport = rport; 2524 fcport->drport = rport;
2525 spin_unlock_irq(vha->host->host_lock); 2525 spin_unlock_irqrestore(vha->host->host_lock, flags);
2526 set_bit(FCPORT_UPDATE_NEEDED, &base_vha->dpc_flags); 2526 set_bit(FCPORT_UPDATE_NEEDED, &base_vha->dpc_flags);
2527 qla2xxx_wake_dpc(base_vha); 2527 qla2xxx_wake_dpc(base_vha);
2528 } else 2528 } else
@@ -3282,10 +3282,10 @@ qla2x00_do_dpc(void *data)
3282 3282
3283 set_user_nice(current, -20); 3283 set_user_nice(current, -20);
3284 3284
3285 set_current_state(TASK_INTERRUPTIBLE);
3285 while (!kthread_should_stop()) { 3286 while (!kthread_should_stop()) {
3286 DEBUG3(printk("qla2x00: DPC handler sleeping\n")); 3287 DEBUG3(printk("qla2x00: DPC handler sleeping\n"));
3287 3288
3288 set_current_state(TASK_INTERRUPTIBLE);
3289 schedule(); 3289 schedule();
3290 __set_current_state(TASK_RUNNING); 3290 __set_current_state(TASK_RUNNING);
3291 3291
@@ -3454,7 +3454,9 @@ qla2x00_do_dpc(void *data)
3454 qla2x00_do_dpc_all_vps(base_vha); 3454 qla2x00_do_dpc_all_vps(base_vha);
3455 3455
3456 ha->dpc_active = 0; 3456 ha->dpc_active = 0;
3457 set_current_state(TASK_INTERRUPTIBLE);
3457 } /* End of while(1) */ 3458 } /* End of while(1) */
3459 __set_current_state(TASK_RUNNING);
3458 3460
3459 DEBUG(printk("scsi(%ld): DPC handler exiting\n", base_vha->host_no)); 3461 DEBUG(printk("scsi(%ld): DPC handler exiting\n", base_vha->host_no));
3460 3462
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 7b310934efed..a6b2d72022fc 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1671,7 +1671,7 @@ static int do_device_access(struct scsi_cmnd *scmd,
1671 unsigned long long lba, unsigned int num, int write) 1671 unsigned long long lba, unsigned int num, int write)
1672{ 1672{
1673 int ret; 1673 int ret;
1674 unsigned int block, rest = 0; 1674 unsigned long long block, rest = 0;
1675 int (*func)(struct scsi_cmnd *, unsigned char *, int); 1675 int (*func)(struct scsi_cmnd *, unsigned char *, int);
1676 1676
1677 func = write ? fetch_to_dev_buffer : fill_from_dev_buffer; 1677 func = write ? fetch_to_dev_buffer : fill_from_dev_buffer;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9045c52abd25..fb2bb35c62cb 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -443,7 +443,7 @@ static void scsi_run_queue(struct request_queue *q)
443 &sdev->request_queue->queue_flags); 443 &sdev->request_queue->queue_flags);
444 if (flagset) 444 if (flagset)
445 queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); 445 queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
446 __blk_run_queue(sdev->request_queue); 446 __blk_run_queue(sdev->request_queue, false);
447 if (flagset) 447 if (flagset)
448 queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); 448 queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
449 spin_unlock(sdev->request_queue->queue_lock); 449 spin_unlock(sdev->request_queue->queue_lock);
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index b4056d14f812..342ee1a9c41d 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -146,7 +146,7 @@ static inline void scsi_netlink_exit(void) {}
146#endif 146#endif
147 147
148/* scsi_pm.c */ 148/* scsi_pm.c */
149#ifdef CONFIG_PM_OPS 149#ifdef CONFIG_PM
150extern const struct dev_pm_ops scsi_bus_pm_ops; 150extern const struct dev_pm_ops scsi_bus_pm_ops;
151#endif 151#endif
152#ifdef CONFIG_PM_RUNTIME 152#ifdef CONFIG_PM_RUNTIME
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 490ce213204e..e44ff64233fd 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -383,7 +383,7 @@ struct bus_type scsi_bus_type = {
383 .name = "scsi", 383 .name = "scsi",
384 .match = scsi_bus_match, 384 .match = scsi_bus_match,
385 .uevent = scsi_bus_uevent, 385 .uevent = scsi_bus_uevent,
386#ifdef CONFIG_PM_OPS 386#ifdef CONFIG_PM
387 .pm = &scsi_bus_pm_ops, 387 .pm = &scsi_bus_pm_ops,
388#endif 388#endif
389}; 389};
diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
index c399be979921..f67282058ba1 100644
--- a/drivers/scsi/scsi_tgt_lib.c
+++ b/drivers/scsi/scsi_tgt_lib.c
@@ -629,7 +629,7 @@ static int __init scsi_tgt_init(void)
629 if (!scsi_tgt_cmd_cache) 629 if (!scsi_tgt_cmd_cache)
630 return -ENOMEM; 630 return -ENOMEM;
631 631
632 scsi_tgtd = create_workqueue("scsi_tgtd"); 632 scsi_tgtd = alloc_workqueue("scsi_tgtd", 0, 1);
633 if (!scsi_tgtd) { 633 if (!scsi_tgtd) {
634 err = -ENOMEM; 634 err = -ENOMEM;
635 goto free_kmemcache; 635 goto free_kmemcache;
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 998c01be3234..5c3ccfc6b622 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rport)
3829 !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); 3829 !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
3830 if (flagset) 3830 if (flagset)
3831 queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q); 3831 queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
3832 __blk_run_queue(rport->rqst_q); 3832 __blk_run_queue(rport->rqst_q, false);
3833 if (flagset) 3833 if (flagset)
3834 queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); 3834 queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
3835 spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags); 3835 spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 95928833855b..a429b01d0285 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1557,9 +1557,7 @@ static int __devinit pxa2xx_spi_probe(struct platform_device *pdev)
1557 drv_data->ssp = ssp; 1557 drv_data->ssp = ssp;
1558 1558
1559 master->dev.parent = &pdev->dev; 1559 master->dev.parent = &pdev->dev;
1560#ifdef CONFIG_OF
1561 master->dev.of_node = pdev->dev.of_node; 1560 master->dev.of_node = pdev->dev.of_node;
1562#endif
1563 /* the spi->mode bits understood by this driver: */ 1561 /* the spi->mode bits understood by this driver: */
1564 master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; 1562 master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
1565 1563
diff --git a/drivers/spi/pxa2xx_spi_pci.c b/drivers/spi/pxa2xx_spi_pci.c
index 351d8a375b57..378e504f89eb 100644
--- a/drivers/spi/pxa2xx_spi_pci.c
+++ b/drivers/spi/pxa2xx_spi_pci.c
@@ -7,10 +7,9 @@
7#include <linux/of_device.h> 7#include <linux/of_device.h>
8#include <linux/spi/pxa2xx_spi.h> 8#include <linux/spi/pxa2xx_spi.h>
9 9
10struct awesome_struct { 10struct ce4100_info {
11 struct ssp_device ssp; 11 struct ssp_device ssp;
12 struct platform_device spi_pdev; 12 struct platform_device *spi_pdev;
13 struct pxa2xx_spi_master spi_pdata;
14}; 13};
15 14
16static DEFINE_MUTEX(ssp_lock); 15static DEFINE_MUTEX(ssp_lock);
@@ -51,23 +50,15 @@ void pxa_ssp_free(struct ssp_device *ssp)
51} 50}
52EXPORT_SYMBOL_GPL(pxa_ssp_free); 51EXPORT_SYMBOL_GPL(pxa_ssp_free);
53 52
54static void plat_dev_release(struct device *dev)
55{
56 struct awesome_struct *as = container_of(dev,
57 struct awesome_struct, spi_pdev.dev);
58
59 of_device_node_put(&as->spi_pdev.dev);
60}
61
62static int __devinit ce4100_spi_probe(struct pci_dev *dev, 53static int __devinit ce4100_spi_probe(struct pci_dev *dev,
63 const struct pci_device_id *ent) 54 const struct pci_device_id *ent)
64{ 55{
65 int ret; 56 int ret;
66 resource_size_t phys_beg; 57 resource_size_t phys_beg;
67 resource_size_t phys_len; 58 resource_size_t phys_len;
68 struct awesome_struct *spi_info; 59 struct ce4100_info *spi_info;
69 struct platform_device *pdev; 60 struct platform_device *pdev;
70 struct pxa2xx_spi_master *spi_pdata; 61 struct pxa2xx_spi_master spi_pdata;
71 struct ssp_device *ssp; 62 struct ssp_device *ssp;
72 63
73 ret = pci_enable_device(dev); 64 ret = pci_enable_device(dev);
@@ -84,33 +75,28 @@ static int __devinit ce4100_spi_probe(struct pci_dev *dev,
84 return ret; 75 return ret;
85 } 76 }
86 77
78 pdev = platform_device_alloc("pxa2xx-spi", dev->devfn);
87 spi_info = kzalloc(sizeof(*spi_info), GFP_KERNEL); 79 spi_info = kzalloc(sizeof(*spi_info), GFP_KERNEL);
88 if (!spi_info) { 80 if (!pdev || !spi_info ) {
89 ret = -ENOMEM; 81 ret = -ENOMEM;
90 goto err_kz; 82 goto err_nomem;
91 } 83 }
92 ssp = &spi_info->ssp; 84 memset(&spi_pdata, 0, sizeof(spi_pdata));
93 pdev = &spi_info->spi_pdev; 85 spi_pdata.num_chipselect = dev->devfn;
94 spi_pdata = &spi_info->spi_pdata;
95 86
96 pdev->name = "pxa2xx-spi"; 87 ret = platform_device_add_data(pdev, &spi_pdata, sizeof(spi_pdata));
97 pdev->id = dev->devfn; 88 if (ret)
98 pdev->dev.parent = &dev->dev; 89 goto err_nomem;
99 pdev->dev.platform_data = &spi_info->spi_pdata;
100 90
101#ifdef CONFIG_OF 91 pdev->dev.parent = &dev->dev;
102 pdev->dev.of_node = dev->dev.of_node; 92 pdev->dev.of_node = dev->dev.of_node;
103#endif 93 ssp = &spi_info->ssp;
104 pdev->dev.release = plat_dev_release;
105
106 spi_pdata->num_chipselect = dev->devfn;
107
108 ssp->phys_base = pci_resource_start(dev, 0); 94 ssp->phys_base = pci_resource_start(dev, 0);
109 ssp->mmio_base = ioremap(phys_beg, phys_len); 95 ssp->mmio_base = ioremap(phys_beg, phys_len);
110 if (!ssp->mmio_base) { 96 if (!ssp->mmio_base) {
111 dev_err(&pdev->dev, "failed to ioremap() registers\n"); 97 dev_err(&pdev->dev, "failed to ioremap() registers\n");
112 ret = -EIO; 98 ret = -EIO;
113 goto err_remap; 99 goto err_nomem;
114 } 100 }
115 ssp->irq = dev->irq; 101 ssp->irq = dev->irq;
116 ssp->port_id = pdev->id; 102 ssp->port_id = pdev->id;
@@ -122,7 +108,7 @@ static int __devinit ce4100_spi_probe(struct pci_dev *dev,
122 108
123 pci_set_drvdata(dev, spi_info); 109 pci_set_drvdata(dev, spi_info);
124 110
125 ret = platform_device_register(pdev); 111 ret = platform_device_add(pdev);
126 if (ret) 112 if (ret)
127 goto err_dev_add; 113 goto err_dev_add;
128 114
@@ -135,27 +121,21 @@ err_dev_add:
135 mutex_unlock(&ssp_lock); 121 mutex_unlock(&ssp_lock);
136 iounmap(ssp->mmio_base); 122 iounmap(ssp->mmio_base);
137 123
138err_remap: 124err_nomem:
139 kfree(spi_info);
140
141err_kz:
142 release_mem_region(phys_beg, phys_len); 125 release_mem_region(phys_beg, phys_len);
143 126 platform_device_put(pdev);
127 kfree(spi_info);
144 return ret; 128 return ret;
145} 129}
146 130
147static void __devexit ce4100_spi_remove(struct pci_dev *dev) 131static void __devexit ce4100_spi_remove(struct pci_dev *dev)
148{ 132{
149 struct awesome_struct *spi_info; 133 struct ce4100_info *spi_info;
150 struct platform_device *pdev;
151 struct ssp_device *ssp; 134 struct ssp_device *ssp;
152 135
153 spi_info = pci_get_drvdata(dev); 136 spi_info = pci_get_drvdata(dev);
154
155 ssp = &spi_info->ssp; 137 ssp = &spi_info->ssp;
156 pdev = &spi_info->spi_pdev; 138 platform_device_unregister(spi_info->spi_pdev);
157
158 platform_device_unregister(pdev);
159 139
160 iounmap(ssp->mmio_base); 140 iounmap(ssp->mmio_base);
161 release_mem_region(pci_resource_start(dev, 0), 141 release_mem_region(pci_resource_start(dev, 0),
@@ -171,7 +151,6 @@ static void __devexit ce4100_spi_remove(struct pci_dev *dev)
171} 151}
172 152
173static struct pci_device_id ce4100_spi_devices[] __devinitdata = { 153static struct pci_device_id ce4100_spi_devices[] __devinitdata = {
174
175 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2e6a) }, 154 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2e6a) },
176 { }, 155 { },
177}; 156};
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 7adaef62a991..4d2c75df886c 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -351,14 +351,12 @@ static irqreturn_t xilinx_spi_irq(int irq, void *dev_id)
351 return IRQ_HANDLED; 351 return IRQ_HANDLED;
352} 352}
353 353
354#ifdef CONFIG_OF
355static const struct of_device_id xilinx_spi_of_match[] = { 354static const struct of_device_id xilinx_spi_of_match[] = {
356 { .compatible = "xlnx,xps-spi-2.00.a", }, 355 { .compatible = "xlnx,xps-spi-2.00.a", },
357 { .compatible = "xlnx,xps-spi-2.00.b", }, 356 { .compatible = "xlnx,xps-spi-2.00.b", },
358 {} 357 {}
359}; 358};
360MODULE_DEVICE_TABLE(of, xilinx_spi_of_match); 359MODULE_DEVICE_TABLE(of, xilinx_spi_of_match);
361#endif
362 360
363struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem, 361struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
364 u32 irq, s16 bus_num, int num_cs, int little_endian, int bits_per_word) 362 u32 irq, s16 bus_num, int num_cs, int little_endian, int bits_per_word)
@@ -394,9 +392,7 @@ struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
394 392
395 master->bus_num = bus_num; 393 master->bus_num = bus_num;
396 master->num_chipselect = num_cs; 394 master->num_chipselect = num_cs;
397#ifdef CONFIG_OF
398 master->dev.of_node = dev->of_node; 395 master->dev.of_node = dev->of_node;
399#endif
400 396
401 xspi->mem = *mem; 397 xspi->mem = *mem;
402 xspi->irq = irq; 398 xspi->irq = irq;
@@ -539,9 +535,7 @@ static struct platform_driver xilinx_spi_driver = {
539 .driver = { 535 .driver = {
540 .name = XILINX_SPI_NAME, 536 .name = XILINX_SPI_NAME,
541 .owner = THIS_MODULE, 537 .owner = THIS_MODULE,
542#ifdef CONFIG_OF
543 .of_match_table = xilinx_spi_of_match, 538 .of_match_table = xilinx_spi_of_match,
544#endif
545 }, 539 },
546}; 540};
547 541
diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 5cfd70819f08..973bb190ef57 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -13,8 +13,7 @@ target_core_mod-y := target_core_configfs.o \
13 target_core_transport.o \ 13 target_core_transport.o \
14 target_core_cdb.o \ 14 target_core_cdb.o \
15 target_core_ua.o \ 15 target_core_ua.o \
16 target_core_rd.o \ 16 target_core_rd.o
17 target_core_mib.o
18 17
19obj-$(CONFIG_TARGET_CORE) += target_core_mod.o 18obj-$(CONFIG_TARGET_CORE) += target_core_mod.o
20 19
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 2764510798b0..caf8dc18ee0a 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -37,7 +37,6 @@
37#include <linux/parser.h> 37#include <linux/parser.h>
38#include <linux/syscalls.h> 38#include <linux/syscalls.h>
39#include <linux/configfs.h> 39#include <linux/configfs.h>
40#include <linux/proc_fs.h>
41 40
42#include <target/target_core_base.h> 41#include <target/target_core_base.h>
43#include <target/target_core_device.h> 42#include <target/target_core_device.h>
@@ -1971,13 +1970,35 @@ static void target_core_dev_release(struct config_item *item)
1971{ 1970{
1972 struct se_subsystem_dev *se_dev = container_of(to_config_group(item), 1971 struct se_subsystem_dev *se_dev = container_of(to_config_group(item),
1973 struct se_subsystem_dev, se_dev_group); 1972 struct se_subsystem_dev, se_dev_group);
1974 struct config_group *dev_cg; 1973 struct se_hba *hba = item_to_hba(&se_dev->se_dev_hba->hba_group.cg_item);
1975 1974 struct se_subsystem_api *t = hba->transport;
1976 if (!(se_dev)) 1975 struct config_group *dev_cg = &se_dev->se_dev_group;
1977 return;
1978 1976
1979 dev_cg = &se_dev->se_dev_group;
1980 kfree(dev_cg->default_groups); 1977 kfree(dev_cg->default_groups);
1978 /*
1979 * This pointer will set when the storage is enabled with:
1980 *`echo 1 > $CONFIGFS/core/$HBA/$DEV/dev_enable`
1981 */
1982 if (se_dev->se_dev_ptr) {
1983 printk(KERN_INFO "Target_Core_ConfigFS: Calling se_free_"
1984 "virtual_device() for se_dev_ptr: %p\n",
1985 se_dev->se_dev_ptr);
1986
1987 se_free_virtual_device(se_dev->se_dev_ptr, hba);
1988 } else {
1989 /*
1990 * Release struct se_subsystem_dev->se_dev_su_ptr..
1991 */
1992 printk(KERN_INFO "Target_Core_ConfigFS: Calling t->free_"
1993 "device() for se_dev_su_ptr: %p\n",
1994 se_dev->se_dev_su_ptr);
1995
1996 t->free_device(se_dev->se_dev_su_ptr);
1997 }
1998
1999 printk(KERN_INFO "Target_Core_ConfigFS: Deallocating se_subsystem"
2000 "_dev_t: %p\n", se_dev);
2001 kfree(se_dev);
1981} 2002}
1982 2003
1983static ssize_t target_core_dev_show(struct config_item *item, 2004static ssize_t target_core_dev_show(struct config_item *item,
@@ -2140,7 +2161,16 @@ static struct configfs_attribute *target_core_alua_lu_gp_attrs[] = {
2140 NULL, 2161 NULL,
2141}; 2162};
2142 2163
2164static void target_core_alua_lu_gp_release(struct config_item *item)
2165{
2166 struct t10_alua_lu_gp *lu_gp = container_of(to_config_group(item),
2167 struct t10_alua_lu_gp, lu_gp_group);
2168
2169 core_alua_free_lu_gp(lu_gp);
2170}
2171
2143static struct configfs_item_operations target_core_alua_lu_gp_ops = { 2172static struct configfs_item_operations target_core_alua_lu_gp_ops = {
2173 .release = target_core_alua_lu_gp_release,
2144 .show_attribute = target_core_alua_lu_gp_attr_show, 2174 .show_attribute = target_core_alua_lu_gp_attr_show,
2145 .store_attribute = target_core_alua_lu_gp_attr_store, 2175 .store_attribute = target_core_alua_lu_gp_attr_store,
2146}; 2176};
@@ -2191,9 +2221,11 @@ static void target_core_alua_drop_lu_gp(
2191 printk(KERN_INFO "Target_Core_ConfigFS: Releasing ALUA Logical Unit" 2221 printk(KERN_INFO "Target_Core_ConfigFS: Releasing ALUA Logical Unit"
2192 " Group: core/alua/lu_gps/%s, ID: %hu\n", 2222 " Group: core/alua/lu_gps/%s, ID: %hu\n",
2193 config_item_name(item), lu_gp->lu_gp_id); 2223 config_item_name(item), lu_gp->lu_gp_id);
2194 2224 /*
2225 * core_alua_free_lu_gp() is called from target_core_alua_lu_gp_ops->release()
2226 * -> target_core_alua_lu_gp_release()
2227 */
2195 config_item_put(item); 2228 config_item_put(item);
2196 core_alua_free_lu_gp(lu_gp);
2197} 2229}
2198 2230
2199static struct configfs_group_operations target_core_alua_lu_gps_group_ops = { 2231static struct configfs_group_operations target_core_alua_lu_gps_group_ops = {
@@ -2549,7 +2581,16 @@ static struct configfs_attribute *target_core_alua_tg_pt_gp_attrs[] = {
2549 NULL, 2581 NULL,
2550}; 2582};
2551 2583
2584static void target_core_alua_tg_pt_gp_release(struct config_item *item)
2585{
2586 struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(to_config_group(item),
2587 struct t10_alua_tg_pt_gp, tg_pt_gp_group);
2588
2589 core_alua_free_tg_pt_gp(tg_pt_gp);
2590}
2591
2552static struct configfs_item_operations target_core_alua_tg_pt_gp_ops = { 2592static struct configfs_item_operations target_core_alua_tg_pt_gp_ops = {
2593 .release = target_core_alua_tg_pt_gp_release,
2553 .show_attribute = target_core_alua_tg_pt_gp_attr_show, 2594 .show_attribute = target_core_alua_tg_pt_gp_attr_show,
2554 .store_attribute = target_core_alua_tg_pt_gp_attr_store, 2595 .store_attribute = target_core_alua_tg_pt_gp_attr_store,
2555}; 2596};
@@ -2602,9 +2643,11 @@ static void target_core_alua_drop_tg_pt_gp(
2602 printk(KERN_INFO "Target_Core_ConfigFS: Releasing ALUA Target Port" 2643 printk(KERN_INFO "Target_Core_ConfigFS: Releasing ALUA Target Port"
2603 " Group: alua/tg_pt_gps/%s, ID: %hu\n", 2644 " Group: alua/tg_pt_gps/%s, ID: %hu\n",
2604 config_item_name(item), tg_pt_gp->tg_pt_gp_id); 2645 config_item_name(item), tg_pt_gp->tg_pt_gp_id);
2605 2646 /*
2647 * core_alua_free_tg_pt_gp() is called from target_core_alua_tg_pt_gp_ops->release()
2648 * -> target_core_alua_tg_pt_gp_release().
2649 */
2606 config_item_put(item); 2650 config_item_put(item);
2607 core_alua_free_tg_pt_gp(tg_pt_gp);
2608} 2651}
2609 2652
2610static struct configfs_group_operations target_core_alua_tg_pt_gps_group_ops = { 2653static struct configfs_group_operations target_core_alua_tg_pt_gps_group_ops = {
@@ -2771,13 +2814,11 @@ static void target_core_drop_subdev(
2771 struct se_subsystem_api *t; 2814 struct se_subsystem_api *t;
2772 struct config_item *df_item; 2815 struct config_item *df_item;
2773 struct config_group *dev_cg, *tg_pt_gp_cg; 2816 struct config_group *dev_cg, *tg_pt_gp_cg;
2774 int i, ret; 2817 int i;
2775 2818
2776 hba = item_to_hba(&se_dev->se_dev_hba->hba_group.cg_item); 2819 hba = item_to_hba(&se_dev->se_dev_hba->hba_group.cg_item);
2777 2820
2778 if (mutex_lock_interruptible(&hba->hba_access_mutex)) 2821 mutex_lock(&hba->hba_access_mutex);
2779 goto out;
2780
2781 t = hba->transport; 2822 t = hba->transport;
2782 2823
2783 spin_lock(&se_global->g_device_lock); 2824 spin_lock(&se_global->g_device_lock);
@@ -2791,7 +2832,10 @@ static void target_core_drop_subdev(
2791 config_item_put(df_item); 2832 config_item_put(df_item);
2792 } 2833 }
2793 kfree(tg_pt_gp_cg->default_groups); 2834 kfree(tg_pt_gp_cg->default_groups);
2794 core_alua_free_tg_pt_gp(T10_ALUA(se_dev)->default_tg_pt_gp); 2835 /*
2836 * core_alua_free_tg_pt_gp() is called from ->default_tg_pt_gp
2837 * directly from target_core_alua_tg_pt_gp_release().
2838 */
2795 T10_ALUA(se_dev)->default_tg_pt_gp = NULL; 2839 T10_ALUA(se_dev)->default_tg_pt_gp = NULL;
2796 2840
2797 dev_cg = &se_dev->se_dev_group; 2841 dev_cg = &se_dev->se_dev_group;
@@ -2800,38 +2844,12 @@ static void target_core_drop_subdev(
2800 dev_cg->default_groups[i] = NULL; 2844 dev_cg->default_groups[i] = NULL;
2801 config_item_put(df_item); 2845 config_item_put(df_item);
2802 } 2846 }
2803
2804 config_item_put(item);
2805 /* 2847 /*
2806 * This pointer will set when the storage is enabled with: 2848 * The releasing of se_dev and associated se_dev->se_dev_ptr is done
2807 * `echo 1 > $CONFIGFS/core/$HBA/$DEV/dev_enable` 2849 * from target_core_dev_item_ops->release() ->target_core_dev_release().
2808 */ 2850 */
2809 if (se_dev->se_dev_ptr) { 2851 config_item_put(item);
2810 printk(KERN_INFO "Target_Core_ConfigFS: Calling se_free_"
2811 "virtual_device() for se_dev_ptr: %p\n",
2812 se_dev->se_dev_ptr);
2813
2814 ret = se_free_virtual_device(se_dev->se_dev_ptr, hba);
2815 if (ret < 0)
2816 goto hba_out;
2817 } else {
2818 /*
2819 * Release struct se_subsystem_dev->se_dev_su_ptr..
2820 */
2821 printk(KERN_INFO "Target_Core_ConfigFS: Calling t->free_"
2822 "device() for se_dev_su_ptr: %p\n",
2823 se_dev->se_dev_su_ptr);
2824
2825 t->free_device(se_dev->se_dev_su_ptr);
2826 }
2827
2828 printk(KERN_INFO "Target_Core_ConfigFS: Deallocating se_subsystem"
2829 "_dev_t: %p\n", se_dev);
2830
2831hba_out:
2832 mutex_unlock(&hba->hba_access_mutex); 2852 mutex_unlock(&hba->hba_access_mutex);
2833out:
2834 kfree(se_dev);
2835} 2853}
2836 2854
2837static struct configfs_group_operations target_core_hba_group_ops = { 2855static struct configfs_group_operations target_core_hba_group_ops = {
@@ -2914,6 +2932,13 @@ SE_HBA_ATTR(hba_mode, S_IRUGO | S_IWUSR);
2914 2932
2915CONFIGFS_EATTR_OPS(target_core_hba, se_hba, hba_group); 2933CONFIGFS_EATTR_OPS(target_core_hba, se_hba, hba_group);
2916 2934
2935static void target_core_hba_release(struct config_item *item)
2936{
2937 struct se_hba *hba = container_of(to_config_group(item),
2938 struct se_hba, hba_group);
2939 core_delete_hba(hba);
2940}
2941
2917static struct configfs_attribute *target_core_hba_attrs[] = { 2942static struct configfs_attribute *target_core_hba_attrs[] = {
2918 &target_core_hba_hba_info.attr, 2943 &target_core_hba_hba_info.attr,
2919 &target_core_hba_hba_mode.attr, 2944 &target_core_hba_hba_mode.attr,
@@ -2921,6 +2946,7 @@ static struct configfs_attribute *target_core_hba_attrs[] = {
2921}; 2946};
2922 2947
2923static struct configfs_item_operations target_core_hba_item_ops = { 2948static struct configfs_item_operations target_core_hba_item_ops = {
2949 .release = target_core_hba_release,
2924 .show_attribute = target_core_hba_attr_show, 2950 .show_attribute = target_core_hba_attr_show,
2925 .store_attribute = target_core_hba_attr_store, 2951 .store_attribute = target_core_hba_attr_store,
2926}; 2952};
@@ -2997,10 +3023,11 @@ static void target_core_call_delhbafromtarget(
2997 struct config_group *group, 3023 struct config_group *group,
2998 struct config_item *item) 3024 struct config_item *item)
2999{ 3025{
3000 struct se_hba *hba = item_to_hba(item); 3026 /*
3001 3027 * core_delete_hba() is called from target_core_hba_item_ops->release()
3028 * -> target_core_hba_release()
3029 */
3002 config_item_put(item); 3030 config_item_put(item);
3003 core_delete_hba(hba);
3004} 3031}
3005 3032
3006static struct configfs_group_operations target_core_group_ops = { 3033static struct configfs_group_operations target_core_group_ops = {
@@ -3022,7 +3049,6 @@ static int target_core_init_configfs(void)
3022 struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL; 3049 struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL;
3023 struct config_group *lu_gp_cg = NULL; 3050 struct config_group *lu_gp_cg = NULL;
3024 struct configfs_subsystem *subsys; 3051 struct configfs_subsystem *subsys;
3025 struct proc_dir_entry *scsi_target_proc = NULL;
3026 struct t10_alua_lu_gp *lu_gp; 3052 struct t10_alua_lu_gp *lu_gp;
3027 int ret; 3053 int ret;
3028 3054
@@ -3128,21 +3154,10 @@ static int target_core_init_configfs(void)
3128 if (core_dev_setup_virtual_lun0() < 0) 3154 if (core_dev_setup_virtual_lun0() < 0)
3129 goto out; 3155 goto out;
3130 3156
3131 scsi_target_proc = proc_mkdir("scsi_target", 0);
3132 if (!(scsi_target_proc)) {
3133 printk(KERN_ERR "proc_mkdir(scsi_target, 0) failed\n");
3134 goto out;
3135 }
3136 ret = init_scsi_target_mib();
3137 if (ret < 0)
3138 goto out;
3139
3140 return 0; 3157 return 0;
3141 3158
3142out: 3159out:
3143 configfs_unregister_subsystem(subsys); 3160 configfs_unregister_subsystem(subsys);
3144 if (scsi_target_proc)
3145 remove_proc_entry("scsi_target", 0);
3146 core_dev_release_virtual_lun0(); 3161 core_dev_release_virtual_lun0();
3147 rd_module_exit(); 3162 rd_module_exit();
3148out_global: 3163out_global:
@@ -3178,8 +3193,7 @@ static void target_core_exit_configfs(void)
3178 config_item_put(item); 3193 config_item_put(item);
3179 } 3194 }
3180 kfree(lu_gp_cg->default_groups); 3195 kfree(lu_gp_cg->default_groups);
3181 core_alua_free_lu_gp(se_global->default_lu_gp); 3196 lu_gp_cg->default_groups = NULL;
3182 se_global->default_lu_gp = NULL;
3183 3197
3184 alua_cg = &se_global->alua_group; 3198 alua_cg = &se_global->alua_group;
3185 for (i = 0; alua_cg->default_groups[i]; i++) { 3199 for (i = 0; alua_cg->default_groups[i]; i++) {
@@ -3188,6 +3202,7 @@ static void target_core_exit_configfs(void)
3188 config_item_put(item); 3202 config_item_put(item);
3189 } 3203 }
3190 kfree(alua_cg->default_groups); 3204 kfree(alua_cg->default_groups);
3205 alua_cg->default_groups = NULL;
3191 3206
3192 hba_cg = &se_global->target_core_hbagroup; 3207 hba_cg = &se_global->target_core_hbagroup;
3193 for (i = 0; hba_cg->default_groups[i]; i++) { 3208 for (i = 0; hba_cg->default_groups[i]; i++) {
@@ -3196,20 +3211,20 @@ static void target_core_exit_configfs(void)
3196 config_item_put(item); 3211 config_item_put(item);
3197 } 3212 }
3198 kfree(hba_cg->default_groups); 3213 kfree(hba_cg->default_groups);
3199 3214 hba_cg->default_groups = NULL;
3200 for (i = 0; subsys->su_group.default_groups[i]; i++) { 3215 /*
3201 item = &subsys->su_group.default_groups[i]->cg_item; 3216 * We expect subsys->su_group.default_groups to be released
3202 subsys->su_group.default_groups[i] = NULL; 3217 * by configfs subsystem provider logic..
3203 config_item_put(item); 3218 */
3204 } 3219 configfs_unregister_subsystem(subsys);
3205 kfree(subsys->su_group.default_groups); 3220 kfree(subsys->su_group.default_groups);
3206 3221
3207 configfs_unregister_subsystem(subsys); 3222 core_alua_free_lu_gp(se_global->default_lu_gp);
3223 se_global->default_lu_gp = NULL;
3224
3208 printk(KERN_INFO "TARGET_CORE[0]: Released ConfigFS Fabric" 3225 printk(KERN_INFO "TARGET_CORE[0]: Released ConfigFS Fabric"
3209 " Infrastructure\n"); 3226 " Infrastructure\n");
3210 3227
3211 remove_scsi_target_mib();
3212 remove_proc_entry("scsi_target", 0);
3213 core_dev_release_virtual_lun0(); 3228 core_dev_release_virtual_lun0();
3214 rd_module_exit(); 3229 rd_module_exit();
3215 release_se_global(); 3230 release_se_global();
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 317ce58d426d..5da051a07fa3 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -373,11 +373,11 @@ int core_update_device_list_for_node(
373 /* 373 /*
374 * deve->se_lun_acl will be NULL for demo-mode created LUNs 374 * deve->se_lun_acl will be NULL for demo-mode created LUNs
375 * that have not been explictly concerted to MappedLUNs -> 375 * that have not been explictly concerted to MappedLUNs ->
376 * struct se_lun_acl. 376 * struct se_lun_acl, but we remove deve->alua_port_list from
377 * port->sep_alua_list. This also means that active UAs and
378 * NodeACL context specific PR metadata for demo-mode
379 * MappedLUN *deve will be released below..
377 */ 380 */
378 if (!(deve->se_lun_acl))
379 return 0;
380
381 spin_lock_bh(&port->sep_alua_lock); 381 spin_lock_bh(&port->sep_alua_lock);
382 list_del(&deve->alua_port_list); 382 list_del(&deve->alua_port_list);
383 spin_unlock_bh(&port->sep_alua_lock); 383 spin_unlock_bh(&port->sep_alua_lock);
@@ -395,12 +395,14 @@ int core_update_device_list_for_node(
395 printk(KERN_ERR "struct se_dev_entry->se_lun_acl" 395 printk(KERN_ERR "struct se_dev_entry->se_lun_acl"
396 " already set for demo mode -> explict" 396 " already set for demo mode -> explict"
397 " LUN ACL transition\n"); 397 " LUN ACL transition\n");
398 spin_unlock_irq(&nacl->device_list_lock);
398 return -1; 399 return -1;
399 } 400 }
400 if (deve->se_lun != lun) { 401 if (deve->se_lun != lun) {
401 printk(KERN_ERR "struct se_dev_entry->se_lun does" 402 printk(KERN_ERR "struct se_dev_entry->se_lun does"
402 " match passed struct se_lun for demo mode" 403 " match passed struct se_lun for demo mode"
403 " -> explict LUN ACL transition\n"); 404 " -> explict LUN ACL transition\n");
405 spin_unlock_irq(&nacl->device_list_lock);
404 return -1; 406 return -1;
405 } 407 }
406 deve->se_lun_acl = lun_acl; 408 deve->se_lun_acl = lun_acl;
@@ -865,9 +867,6 @@ static void se_dev_stop(struct se_device *dev)
865 } 867 }
866 } 868 }
867 spin_unlock(&hba->device_lock); 869 spin_unlock(&hba->device_lock);
868
869 while (atomic_read(&hba->dev_mib_access_count))
870 cpu_relax();
871} 870}
872 871
873int se_dev_check_online(struct se_device *dev) 872int se_dev_check_online(struct se_device *dev)
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 32b148d7e261..b65d1c8e7740 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -214,12 +214,22 @@ TCM_MAPPEDLUN_ATTR(write_protect, S_IRUGO | S_IWUSR);
214 214
215CONFIGFS_EATTR_OPS(target_fabric_mappedlun, se_lun_acl, se_lun_group); 215CONFIGFS_EATTR_OPS(target_fabric_mappedlun, se_lun_acl, se_lun_group);
216 216
217static void target_fabric_mappedlun_release(struct config_item *item)
218{
219 struct se_lun_acl *lacl = container_of(to_config_group(item),
220 struct se_lun_acl, se_lun_group);
221 struct se_portal_group *se_tpg = lacl->se_lun_nacl->se_tpg;
222
223 core_dev_free_initiator_node_lun_acl(se_tpg, lacl);
224}
225
217static struct configfs_attribute *target_fabric_mappedlun_attrs[] = { 226static struct configfs_attribute *target_fabric_mappedlun_attrs[] = {
218 &target_fabric_mappedlun_write_protect.attr, 227 &target_fabric_mappedlun_write_protect.attr,
219 NULL, 228 NULL,
220}; 229};
221 230
222static struct configfs_item_operations target_fabric_mappedlun_item_ops = { 231static struct configfs_item_operations target_fabric_mappedlun_item_ops = {
232 .release = target_fabric_mappedlun_release,
223 .show_attribute = target_fabric_mappedlun_attr_show, 233 .show_attribute = target_fabric_mappedlun_attr_show,
224 .store_attribute = target_fabric_mappedlun_attr_store, 234 .store_attribute = target_fabric_mappedlun_attr_store,
225 .allow_link = target_fabric_mappedlun_link, 235 .allow_link = target_fabric_mappedlun_link,
@@ -337,15 +347,21 @@ static void target_fabric_drop_mappedlun(
337 struct config_group *group, 347 struct config_group *group,
338 struct config_item *item) 348 struct config_item *item)
339{ 349{
340 struct se_lun_acl *lacl = container_of(to_config_group(item),
341 struct se_lun_acl, se_lun_group);
342 struct se_portal_group *se_tpg = lacl->se_lun_nacl->se_tpg;
343
344 config_item_put(item); 350 config_item_put(item);
345 core_dev_free_initiator_node_lun_acl(se_tpg, lacl); 351}
352
353static void target_fabric_nacl_base_release(struct config_item *item)
354{
355 struct se_node_acl *se_nacl = container_of(to_config_group(item),
356 struct se_node_acl, acl_group);
357 struct se_portal_group *se_tpg = se_nacl->se_tpg;
358 struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
359
360 tf->tf_ops.fabric_drop_nodeacl(se_nacl);
346} 361}
347 362
348static struct configfs_item_operations target_fabric_nacl_base_item_ops = { 363static struct configfs_item_operations target_fabric_nacl_base_item_ops = {
364 .release = target_fabric_nacl_base_release,
349 .show_attribute = target_fabric_nacl_base_attr_show, 365 .show_attribute = target_fabric_nacl_base_attr_show,
350 .store_attribute = target_fabric_nacl_base_attr_store, 366 .store_attribute = target_fabric_nacl_base_attr_store,
351}; 367};
@@ -404,9 +420,6 @@ static void target_fabric_drop_nodeacl(
404 struct config_group *group, 420 struct config_group *group,
405 struct config_item *item) 421 struct config_item *item)
406{ 422{
407 struct se_portal_group *se_tpg = container_of(group,
408 struct se_portal_group, tpg_acl_group);
409 struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
410 struct se_node_acl *se_nacl = container_of(to_config_group(item), 423 struct se_node_acl *se_nacl = container_of(to_config_group(item),
411 struct se_node_acl, acl_group); 424 struct se_node_acl, acl_group);
412 struct config_item *df_item; 425 struct config_item *df_item;
@@ -419,9 +432,10 @@ static void target_fabric_drop_nodeacl(
419 nacl_cg->default_groups[i] = NULL; 432 nacl_cg->default_groups[i] = NULL;
420 config_item_put(df_item); 433 config_item_put(df_item);
421 } 434 }
422 435 /*
436 * struct se_node_acl free is done in target_fabric_nacl_base_release()
437 */
423 config_item_put(item); 438 config_item_put(item);
424 tf->tf_ops.fabric_drop_nodeacl(se_nacl);
425} 439}
426 440
427static struct configfs_group_operations target_fabric_nacl_group_ops = { 441static struct configfs_group_operations target_fabric_nacl_group_ops = {
@@ -437,7 +451,18 @@ TF_CIT_SETUP(tpg_nacl, NULL, &target_fabric_nacl_group_ops, NULL);
437 451
438CONFIGFS_EATTR_OPS(target_fabric_np_base, se_tpg_np, tpg_np_group); 452CONFIGFS_EATTR_OPS(target_fabric_np_base, se_tpg_np, tpg_np_group);
439 453
454static void target_fabric_np_base_release(struct config_item *item)
455{
456 struct se_tpg_np *se_tpg_np = container_of(to_config_group(item),
457 struct se_tpg_np, tpg_np_group);
458 struct se_portal_group *se_tpg = se_tpg_np->tpg_np_parent;
459 struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
460
461 tf->tf_ops.fabric_drop_np(se_tpg_np);
462}
463
440static struct configfs_item_operations target_fabric_np_base_item_ops = { 464static struct configfs_item_operations target_fabric_np_base_item_ops = {
465 .release = target_fabric_np_base_release,
441 .show_attribute = target_fabric_np_base_attr_show, 466 .show_attribute = target_fabric_np_base_attr_show,
442 .store_attribute = target_fabric_np_base_attr_store, 467 .store_attribute = target_fabric_np_base_attr_store,
443}; 468};
@@ -466,6 +491,7 @@ static struct config_group *target_fabric_make_np(
466 if (!(se_tpg_np) || IS_ERR(se_tpg_np)) 491 if (!(se_tpg_np) || IS_ERR(se_tpg_np))
467 return ERR_PTR(-EINVAL); 492 return ERR_PTR(-EINVAL);
468 493
494 se_tpg_np->tpg_np_parent = se_tpg;
469 config_group_init_type_name(&se_tpg_np->tpg_np_group, name, 495 config_group_init_type_name(&se_tpg_np->tpg_np_group, name,
470 &TF_CIT_TMPL(tf)->tfc_tpg_np_base_cit); 496 &TF_CIT_TMPL(tf)->tfc_tpg_np_base_cit);
471 497
@@ -476,14 +502,10 @@ static void target_fabric_drop_np(
476 struct config_group *group, 502 struct config_group *group,
477 struct config_item *item) 503 struct config_item *item)
478{ 504{
479 struct se_portal_group *se_tpg = container_of(group, 505 /*
480 struct se_portal_group, tpg_np_group); 506 * struct se_tpg_np is released via target_fabric_np_base_release()
481 struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; 507 */
482 struct se_tpg_np *se_tpg_np = container_of(to_config_group(item),
483 struct se_tpg_np, tpg_np_group);
484
485 config_item_put(item); 508 config_item_put(item);
486 tf->tf_ops.fabric_drop_np(se_tpg_np);
487} 509}
488 510
489static struct configfs_group_operations target_fabric_np_group_ops = { 511static struct configfs_group_operations target_fabric_np_group_ops = {
@@ -814,7 +836,18 @@ TF_CIT_SETUP(tpg_param, &target_fabric_tpg_param_item_ops, NULL, NULL);
814 */ 836 */
815CONFIGFS_EATTR_OPS(target_fabric_tpg, se_portal_group, tpg_group); 837CONFIGFS_EATTR_OPS(target_fabric_tpg, se_portal_group, tpg_group);
816 838
839static void target_fabric_tpg_release(struct config_item *item)
840{
841 struct se_portal_group *se_tpg = container_of(to_config_group(item),
842 struct se_portal_group, tpg_group);
843 struct se_wwn *wwn = se_tpg->se_tpg_wwn;
844 struct target_fabric_configfs *tf = wwn->wwn_tf;
845
846 tf->tf_ops.fabric_drop_tpg(se_tpg);
847}
848
817static struct configfs_item_operations target_fabric_tpg_base_item_ops = { 849static struct configfs_item_operations target_fabric_tpg_base_item_ops = {
850 .release = target_fabric_tpg_release,
818 .show_attribute = target_fabric_tpg_attr_show, 851 .show_attribute = target_fabric_tpg_attr_show,
819 .store_attribute = target_fabric_tpg_attr_store, 852 .store_attribute = target_fabric_tpg_attr_store,
820}; 853};
@@ -872,8 +905,6 @@ static void target_fabric_drop_tpg(
872 struct config_group *group, 905 struct config_group *group,
873 struct config_item *item) 906 struct config_item *item)
874{ 907{
875 struct se_wwn *wwn = container_of(group, struct se_wwn, wwn_group);
876 struct target_fabric_configfs *tf = wwn->wwn_tf;
877 struct se_portal_group *se_tpg = container_of(to_config_group(item), 908 struct se_portal_group *se_tpg = container_of(to_config_group(item),
878 struct se_portal_group, tpg_group); 909 struct se_portal_group, tpg_group);
879 struct config_group *tpg_cg = &se_tpg->tpg_group; 910 struct config_group *tpg_cg = &se_tpg->tpg_group;
@@ -890,15 +921,28 @@ static void target_fabric_drop_tpg(
890 } 921 }
891 922
892 config_item_put(item); 923 config_item_put(item);
893 tf->tf_ops.fabric_drop_tpg(se_tpg);
894} 924}
895 925
926static void target_fabric_release_wwn(struct config_item *item)
927{
928 struct se_wwn *wwn = container_of(to_config_group(item),
929 struct se_wwn, wwn_group);
930 struct target_fabric_configfs *tf = wwn->wwn_tf;
931
932 tf->tf_ops.fabric_drop_wwn(wwn);
933}
934
935static struct configfs_item_operations target_fabric_tpg_item_ops = {
936 .release = target_fabric_release_wwn,
937};
938
896static struct configfs_group_operations target_fabric_tpg_group_ops = { 939static struct configfs_group_operations target_fabric_tpg_group_ops = {
897 .make_group = target_fabric_make_tpg, 940 .make_group = target_fabric_make_tpg,
898 .drop_item = target_fabric_drop_tpg, 941 .drop_item = target_fabric_drop_tpg,
899}; 942};
900 943
901TF_CIT_SETUP(tpg, NULL, &target_fabric_tpg_group_ops, NULL); 944TF_CIT_SETUP(tpg, &target_fabric_tpg_item_ops, &target_fabric_tpg_group_ops,
945 NULL);
902 946
903/* End of tfc_tpg_cit */ 947/* End of tfc_tpg_cit */
904 948
@@ -932,13 +976,7 @@ static void target_fabric_drop_wwn(
932 struct config_group *group, 976 struct config_group *group,
933 struct config_item *item) 977 struct config_item *item)
934{ 978{
935 struct target_fabric_configfs *tf = container_of(group,
936 struct target_fabric_configfs, tf_group);
937 struct se_wwn *wwn = container_of(to_config_group(item),
938 struct se_wwn, wwn_group);
939
940 config_item_put(item); 979 config_item_put(item);
941 tf->tf_ops.fabric_drop_wwn(wwn);
942} 980}
943 981
944static struct configfs_group_operations target_fabric_wwn_group_ops = { 982static struct configfs_group_operations target_fabric_wwn_group_ops = {
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index c6e0d757e76e..67f0c09983c8 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -154,7 +154,7 @@ static struct se_device *iblock_create_virtdevice(
154 154
155 bd = blkdev_get_by_path(ib_dev->ibd_udev_path, 155 bd = blkdev_get_by_path(ib_dev->ibd_udev_path,
156 FMODE_WRITE|FMODE_READ|FMODE_EXCL, ib_dev); 156 FMODE_WRITE|FMODE_READ|FMODE_EXCL, ib_dev);
157 if (!(bd)) 157 if (IS_ERR(bd))
158 goto failed; 158 goto failed;
159 /* 159 /*
160 * Setup the local scope queue_limits from struct request_queue->limits 160 * Setup the local scope queue_limits from struct request_queue->limits
@@ -220,8 +220,10 @@ static void iblock_free_device(void *p)
220{ 220{
221 struct iblock_dev *ib_dev = p; 221 struct iblock_dev *ib_dev = p;
222 222
223 blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); 223 if (ib_dev->ibd_bd != NULL)
224 bioset_free(ib_dev->ibd_bio_set); 224 blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
225 if (ib_dev->ibd_bio_set != NULL)
226 bioset_free(ib_dev->ibd_bio_set);
225 kfree(ib_dev); 227 kfree(ib_dev);
226} 228}
227 229
diff --git a/drivers/target/target_core_mib.c b/drivers/target/target_core_mib.c
deleted file mode 100644
index d5a48aa0d2d1..000000000000
--- a/drivers/target/target_core_mib.c
+++ /dev/null
@@ -1,1078 +0,0 @@
1/*******************************************************************************
2 * Filename: target_core_mib.c
3 *
4 * Copyright (c) 2006-2007 SBE, Inc. All Rights Reserved.
5 * Copyright (c) 2007-2010 Rising Tide Systems
6 * Copyright (c) 2008-2010 Linux-iSCSI.org
7 *
8 * Nicholas A. Bellinger <nab@linux-iscsi.org>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 *
24 ******************************************************************************/
25
26
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/delay.h>
30#include <linux/timer.h>
31#include <linux/string.h>
32#include <linux/version.h>
33#include <generated/utsrelease.h>
34#include <linux/utsname.h>
35#include <linux/proc_fs.h>
36#include <linux/seq_file.h>
37#include <linux/blkdev.h>
38#include <scsi/scsi.h>
39#include <scsi/scsi_device.h>
40#include <scsi/scsi_host.h>
41
42#include <target/target_core_base.h>
43#include <target/target_core_transport.h>
44#include <target/target_core_fabric_ops.h>
45#include <target/target_core_configfs.h>
46
47#include "target_core_hba.h"
48#include "target_core_mib.h"
49
50/* SCSI mib table index */
51static struct scsi_index_table scsi_index_table;
52
53#ifndef INITIAL_JIFFIES
54#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
55#endif
56
57/* SCSI Instance Table */
58#define SCSI_INST_SW_INDEX 1
59#define SCSI_TRANSPORT_INDEX 1
60
61#define NONE "None"
62#define ISPRINT(a) ((a >= ' ') && (a <= '~'))
63
64static inline int list_is_first(const struct list_head *list,
65 const struct list_head *head)
66{
67 return list->prev == head;
68}
69
70static void *locate_hba_start(
71 struct seq_file *seq,
72 loff_t *pos)
73{
74 spin_lock(&se_global->g_device_lock);
75 return seq_list_start(&se_global->g_se_dev_list, *pos);
76}
77
78static void *locate_hba_next(
79 struct seq_file *seq,
80 void *v,
81 loff_t *pos)
82{
83 return seq_list_next(v, &se_global->g_se_dev_list, pos);
84}
85
86static void locate_hba_stop(struct seq_file *seq, void *v)
87{
88 spin_unlock(&se_global->g_device_lock);
89}
90
91/****************************************************************************
92 * SCSI MIB Tables
93 ****************************************************************************/
94
95/*
96 * SCSI Instance Table
97 */
98static void *scsi_inst_seq_start(
99 struct seq_file *seq,
100 loff_t *pos)
101{
102 spin_lock(&se_global->hba_lock);
103 return seq_list_start(&se_global->g_hba_list, *pos);
104}
105
106static void *scsi_inst_seq_next(
107 struct seq_file *seq,
108 void *v,
109 loff_t *pos)
110{
111 return seq_list_next(v, &se_global->g_hba_list, pos);
112}
113
114static void scsi_inst_seq_stop(struct seq_file *seq, void *v)
115{
116 spin_unlock(&se_global->hba_lock);
117}
118
119static int scsi_inst_seq_show(struct seq_file *seq, void *v)
120{
121 struct se_hba *hba = list_entry(v, struct se_hba, hba_list);
122
123 if (list_is_first(&hba->hba_list, &se_global->g_hba_list))
124 seq_puts(seq, "inst sw_indx\n");
125
126 seq_printf(seq, "%u %u\n", hba->hba_index, SCSI_INST_SW_INDEX);
127 seq_printf(seq, "plugin: %s version: %s\n",
128 hba->transport->name, TARGET_CORE_VERSION);
129
130 return 0;
131}
132
133static const struct seq_operations scsi_inst_seq_ops = {
134 .start = scsi_inst_seq_start,
135 .next = scsi_inst_seq_next,
136 .stop = scsi_inst_seq_stop,
137 .show = scsi_inst_seq_show
138};
139
140static int scsi_inst_seq_open(struct inode *inode, struct file *file)
141{
142 return seq_open(file, &scsi_inst_seq_ops);
143}
144
145static const struct file_operations scsi_inst_seq_fops = {
146 .owner = THIS_MODULE,
147 .open = scsi_inst_seq_open,
148 .read = seq_read,
149 .llseek = seq_lseek,
150 .release = seq_release,
151};
152
153/*
154 * SCSI Device Table
155 */
156static void *scsi_dev_seq_start(struct seq_file *seq, loff_t *pos)
157{
158 return locate_hba_start(seq, pos);
159}
160
161static void *scsi_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
162{
163 return locate_hba_next(seq, v, pos);
164}
165
166static void scsi_dev_seq_stop(struct seq_file *seq, void *v)
167{
168 locate_hba_stop(seq, v);
169}
170
171static int scsi_dev_seq_show(struct seq_file *seq, void *v)
172{
173 struct se_hba *hba;
174 struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
175 g_se_dev_list);
176 struct se_device *dev = se_dev->se_dev_ptr;
177 char str[28];
178 int k;
179
180 if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
181 seq_puts(seq, "inst indx role ports\n");
182
183 if (!(dev))
184 return 0;
185
186 hba = dev->se_hba;
187 if (!(hba)) {
188 /* Log error ? */
189 return 0;
190 }
191
192 seq_printf(seq, "%u %u %s %u\n", hba->hba_index,
193 dev->dev_index, "Target", dev->dev_port_count);
194
195 memcpy(&str[0], (void *)DEV_T10_WWN(dev), 28);
196
197 /* vendor */
198 for (k = 0; k < 8; k++)
199 str[k] = ISPRINT(DEV_T10_WWN(dev)->vendor[k]) ?
200 DEV_T10_WWN(dev)->vendor[k] : 0x20;
201 str[k] = 0x20;
202
203 /* model */
204 for (k = 0; k < 16; k++)
205 str[k+9] = ISPRINT(DEV_T10_WWN(dev)->model[k]) ?
206 DEV_T10_WWN(dev)->model[k] : 0x20;
207 str[k + 9] = 0;
208
209 seq_printf(seq, "dev_alias: %s\n", str);
210
211 return 0;
212}
213
214static const struct seq_operations scsi_dev_seq_ops = {
215 .start = scsi_dev_seq_start,
216 .next = scsi_dev_seq_next,
217 .stop = scsi_dev_seq_stop,
218 .show = scsi_dev_seq_show
219};
220
221static int scsi_dev_seq_open(struct inode *inode, struct file *file)
222{
223 return seq_open(file, &scsi_dev_seq_ops);
224}
225
226static const struct file_operations scsi_dev_seq_fops = {
227 .owner = THIS_MODULE,
228 .open = scsi_dev_seq_open,
229 .read = seq_read,
230 .llseek = seq_lseek,
231 .release = seq_release,
232};
233
234/*
235 * SCSI Port Table
236 */
237static void *scsi_port_seq_start(struct seq_file *seq, loff_t *pos)
238{
239 return locate_hba_start(seq, pos);
240}
241
242static void *scsi_port_seq_next(struct seq_file *seq, void *v, loff_t *pos)
243{
244 return locate_hba_next(seq, v, pos);
245}
246
247static void scsi_port_seq_stop(struct seq_file *seq, void *v)
248{
249 locate_hba_stop(seq, v);
250}
251
252static int scsi_port_seq_show(struct seq_file *seq, void *v)
253{
254 struct se_hba *hba;
255 struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
256 g_se_dev_list);
257 struct se_device *dev = se_dev->se_dev_ptr;
258 struct se_port *sep, *sep_tmp;
259
260 if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
261 seq_puts(seq, "inst device indx role busy_count\n");
262
263 if (!(dev))
264 return 0;
265
266 hba = dev->se_hba;
267 if (!(hba)) {
268 /* Log error ? */
269 return 0;
270 }
271
272 /* FIXME: scsiPortBusyStatuses count */
273 spin_lock(&dev->se_port_lock);
274 list_for_each_entry_safe(sep, sep_tmp, &dev->dev_sep_list, sep_list) {
275 seq_printf(seq, "%u %u %u %s%u %u\n", hba->hba_index,
276 dev->dev_index, sep->sep_index, "Device",
277 dev->dev_index, 0);
278 }
279 spin_unlock(&dev->se_port_lock);
280
281 return 0;
282}
283
284static const struct seq_operations scsi_port_seq_ops = {
285 .start = scsi_port_seq_start,
286 .next = scsi_port_seq_next,
287 .stop = scsi_port_seq_stop,
288 .show = scsi_port_seq_show
289};
290
291static int scsi_port_seq_open(struct inode *inode, struct file *file)
292{
293 return seq_open(file, &scsi_port_seq_ops);
294}
295
296static const struct file_operations scsi_port_seq_fops = {
297 .owner = THIS_MODULE,
298 .open = scsi_port_seq_open,
299 .read = seq_read,
300 .llseek = seq_lseek,
301 .release = seq_release,
302};
303
304/*
305 * SCSI Transport Table
306 */
307static void *scsi_transport_seq_start(struct seq_file *seq, loff_t *pos)
308{
309 return locate_hba_start(seq, pos);
310}
311
312static void *scsi_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos)
313{
314 return locate_hba_next(seq, v, pos);
315}
316
317static void scsi_transport_seq_stop(struct seq_file *seq, void *v)
318{
319 locate_hba_stop(seq, v);
320}
321
322static int scsi_transport_seq_show(struct seq_file *seq, void *v)
323{
324 struct se_hba *hba;
325 struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
326 g_se_dev_list);
327 struct se_device *dev = se_dev->se_dev_ptr;
328 struct se_port *se, *se_tmp;
329 struct se_portal_group *tpg;
330 struct t10_wwn *wwn;
331 char buf[64];
332
333 if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
334 seq_puts(seq, "inst device indx dev_name\n");
335
336 if (!(dev))
337 return 0;
338
339 hba = dev->se_hba;
340 if (!(hba)) {
341 /* Log error ? */
342 return 0;
343 }
344
345 wwn = DEV_T10_WWN(dev);
346
347 spin_lock(&dev->se_port_lock);
348 list_for_each_entry_safe(se, se_tmp, &dev->dev_sep_list, sep_list) {
349 tpg = se->sep_tpg;
350 sprintf(buf, "scsiTransport%s",
351 TPG_TFO(tpg)->get_fabric_name());
352
353 seq_printf(seq, "%u %s %u %s+%s\n",
354 hba->hba_index, /* scsiTransportIndex */
355 buf, /* scsiTransportType */
356 (TPG_TFO(tpg)->tpg_get_inst_index != NULL) ?
357 TPG_TFO(tpg)->tpg_get_inst_index(tpg) :
358 0,
359 TPG_TFO(tpg)->tpg_get_wwn(tpg),
360 (strlen(wwn->unit_serial)) ?
361 /* scsiTransportDevName */
362 wwn->unit_serial : wwn->vendor);
363 }
364 spin_unlock(&dev->se_port_lock);
365
366 return 0;
367}
368
369static const struct seq_operations scsi_transport_seq_ops = {
370 .start = scsi_transport_seq_start,
371 .next = scsi_transport_seq_next,
372 .stop = scsi_transport_seq_stop,
373 .show = scsi_transport_seq_show
374};
375
376static int scsi_transport_seq_open(struct inode *inode, struct file *file)
377{
378 return seq_open(file, &scsi_transport_seq_ops);
379}
380
381static const struct file_operations scsi_transport_seq_fops = {
382 .owner = THIS_MODULE,
383 .open = scsi_transport_seq_open,
384 .read = seq_read,
385 .llseek = seq_lseek,
386 .release = seq_release,
387};
388
389/*
390 * SCSI Target Device Table
391 */
392static void *scsi_tgt_dev_seq_start(struct seq_file *seq, loff_t *pos)
393{
394 return locate_hba_start(seq, pos);
395}
396
397static void *scsi_tgt_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
398{
399 return locate_hba_next(seq, v, pos);
400}
401
402static void scsi_tgt_dev_seq_stop(struct seq_file *seq, void *v)
403{
404 locate_hba_stop(seq, v);
405}
406
407
408#define LU_COUNT 1 /* for now */
409static int scsi_tgt_dev_seq_show(struct seq_file *seq, void *v)
410{
411 struct se_hba *hba;
412 struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
413 g_se_dev_list);
414 struct se_device *dev = se_dev->se_dev_ptr;
415 int non_accessible_lus = 0;
416 char status[16];
417
418 if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
419 seq_puts(seq, "inst indx num_LUs status non_access_LUs"
420 " resets\n");
421
422 if (!(dev))
423 return 0;
424
425 hba = dev->se_hba;
426 if (!(hba)) {
427 /* Log error ? */
428 return 0;
429 }
430
431 switch (dev->dev_status) {
432 case TRANSPORT_DEVICE_ACTIVATED:
433 strcpy(status, "activated");
434 break;
435 case TRANSPORT_DEVICE_DEACTIVATED:
436 strcpy(status, "deactivated");
437 non_accessible_lus = 1;
438 break;
439 case TRANSPORT_DEVICE_SHUTDOWN:
440 strcpy(status, "shutdown");
441 non_accessible_lus = 1;
442 break;
443 case TRANSPORT_DEVICE_OFFLINE_ACTIVATED:
444 case TRANSPORT_DEVICE_OFFLINE_DEACTIVATED:
445 strcpy(status, "offline");
446 non_accessible_lus = 1;
447 break;
448 default:
449 sprintf(status, "unknown(%d)", dev->dev_status);
450 non_accessible_lus = 1;
451 }
452
453 seq_printf(seq, "%u %u %u %s %u %u\n",
454 hba->hba_index, dev->dev_index, LU_COUNT,
455 status, non_accessible_lus, dev->num_resets);
456
457 return 0;
458}
459
460static const struct seq_operations scsi_tgt_dev_seq_ops = {
461 .start = scsi_tgt_dev_seq_start,
462 .next = scsi_tgt_dev_seq_next,
463 .stop = scsi_tgt_dev_seq_stop,
464 .show = scsi_tgt_dev_seq_show
465};
466
467static int scsi_tgt_dev_seq_open(struct inode *inode, struct file *file)
468{
469 return seq_open(file, &scsi_tgt_dev_seq_ops);
470}
471
472static const struct file_operations scsi_tgt_dev_seq_fops = {
473 .owner = THIS_MODULE,
474 .open = scsi_tgt_dev_seq_open,
475 .read = seq_read,
476 .llseek = seq_lseek,
477 .release = seq_release,
478};
479
480/*
481 * SCSI Target Port Table
482 */
483static void *scsi_tgt_port_seq_start(struct seq_file *seq, loff_t *pos)
484{
485 return locate_hba_start(seq, pos);
486}
487
488static void *scsi_tgt_port_seq_next(struct seq_file *seq, void *v, loff_t *pos)
489{
490 return locate_hba_next(seq, v, pos);
491}
492
493static void scsi_tgt_port_seq_stop(struct seq_file *seq, void *v)
494{
495 locate_hba_stop(seq, v);
496}
497
498static int scsi_tgt_port_seq_show(struct seq_file *seq, void *v)
499{
500 struct se_hba *hba;
501 struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
502 g_se_dev_list);
503 struct se_device *dev = se_dev->se_dev_ptr;
504 struct se_port *sep, *sep_tmp;
505 struct se_portal_group *tpg;
506 u32 rx_mbytes, tx_mbytes;
507 unsigned long long num_cmds;
508 char buf[64];
509
510 if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
511 seq_puts(seq, "inst device indx name port_index in_cmds"
512 " write_mbytes read_mbytes hs_in_cmds\n");
513
514 if (!(dev))
515 return 0;
516
517 hba = dev->se_hba;
518 if (!(hba)) {
519 /* Log error ? */
520 return 0;
521 }
522
523 spin_lock(&dev->se_port_lock);
524 list_for_each_entry_safe(sep, sep_tmp, &dev->dev_sep_list, sep_list) {
525 tpg = sep->sep_tpg;
526 sprintf(buf, "%sPort#",
527 TPG_TFO(tpg)->get_fabric_name());
528
529 seq_printf(seq, "%u %u %u %s%d %s%s%d ",
530 hba->hba_index,
531 dev->dev_index,
532 sep->sep_index,
533 buf, sep->sep_index,
534 TPG_TFO(tpg)->tpg_get_wwn(tpg), "+t+",
535 TPG_TFO(tpg)->tpg_get_tag(tpg));
536
537 spin_lock(&sep->sep_lun->lun_sep_lock);
538 num_cmds = sep->sep_stats.cmd_pdus;
539 rx_mbytes = (sep->sep_stats.rx_data_octets >> 20);
540 tx_mbytes = (sep->sep_stats.tx_data_octets >> 20);
541 spin_unlock(&sep->sep_lun->lun_sep_lock);
542
543 seq_printf(seq, "%llu %u %u %u\n", num_cmds,
544 rx_mbytes, tx_mbytes, 0);
545 }
546 spin_unlock(&dev->se_port_lock);
547
548 return 0;
549}
550
551static const struct seq_operations scsi_tgt_port_seq_ops = {
552 .start = scsi_tgt_port_seq_start,
553 .next = scsi_tgt_port_seq_next,
554 .stop = scsi_tgt_port_seq_stop,
555 .show = scsi_tgt_port_seq_show
556};
557
558static int scsi_tgt_port_seq_open(struct inode *inode, struct file *file)
559{
560 return seq_open(file, &scsi_tgt_port_seq_ops);
561}
562
563static const struct file_operations scsi_tgt_port_seq_fops = {
564 .owner = THIS_MODULE,
565 .open = scsi_tgt_port_seq_open,
566 .read = seq_read,
567 .llseek = seq_lseek,
568 .release = seq_release,
569};
570
571/*
572 * SCSI Authorized Initiator Table:
573 * It contains the SCSI Initiators authorized to be attached to one of the
574 * local Target ports.
575 * Iterates through all active TPGs and extracts the info from the ACLs
576 */
577static void *scsi_auth_intr_seq_start(struct seq_file *seq, loff_t *pos)
578{
579 spin_lock_bh(&se_global->se_tpg_lock);
580 return seq_list_start(&se_global->g_se_tpg_list, *pos);
581}
582
583static void *scsi_auth_intr_seq_next(struct seq_file *seq, void *v,
584 loff_t *pos)
585{
586 return seq_list_next(v, &se_global->g_se_tpg_list, pos);
587}
588
589static void scsi_auth_intr_seq_stop(struct seq_file *seq, void *v)
590{
591 spin_unlock_bh(&se_global->se_tpg_lock);
592}
593
594static int scsi_auth_intr_seq_show(struct seq_file *seq, void *v)
595{
596 struct se_portal_group *se_tpg = list_entry(v, struct se_portal_group,
597 se_tpg_list);
598 struct se_dev_entry *deve;
599 struct se_lun *lun;
600 struct se_node_acl *se_nacl;
601 int j;
602
603 if (list_is_first(&se_tpg->se_tpg_list,
604 &se_global->g_se_tpg_list))
605 seq_puts(seq, "inst dev port indx dev_or_port intr_name "
606 "map_indx att_count num_cmds read_mbytes "
607 "write_mbytes hs_num_cmds creation_time row_status\n");
608
609 if (!(se_tpg))
610 return 0;
611
612 spin_lock(&se_tpg->acl_node_lock);
613 list_for_each_entry(se_nacl, &se_tpg->acl_node_list, acl_list) {
614
615 atomic_inc(&se_nacl->mib_ref_count);
616 smp_mb__after_atomic_inc();
617 spin_unlock(&se_tpg->acl_node_lock);
618
619 spin_lock_irq(&se_nacl->device_list_lock);
620 for (j = 0; j < TRANSPORT_MAX_LUNS_PER_TPG; j++) {
621 deve = &se_nacl->device_list[j];
622 if (!(deve->lun_flags &
623 TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) ||
624 (!deve->se_lun))
625 continue;
626 lun = deve->se_lun;
627 if (!lun->lun_se_dev)
628 continue;
629
630 seq_printf(seq, "%u %u %u %u %u %s %u %u %u %u %u %u"
631 " %u %s\n",
632 /* scsiInstIndex */
633 (TPG_TFO(se_tpg)->tpg_get_inst_index != NULL) ?
634 TPG_TFO(se_tpg)->tpg_get_inst_index(se_tpg) :
635 0,
636 /* scsiDeviceIndex */
637 lun->lun_se_dev->dev_index,
638 /* scsiAuthIntrTgtPortIndex */
639 TPG_TFO(se_tpg)->tpg_get_tag(se_tpg),
640 /* scsiAuthIntrIndex */
641 se_nacl->acl_index,
642 /* scsiAuthIntrDevOrPort */
643 1,
644 /* scsiAuthIntrName */
645 se_nacl->initiatorname[0] ?
646 se_nacl->initiatorname : NONE,
647 /* FIXME: scsiAuthIntrLunMapIndex */
648 0,
649 /* scsiAuthIntrAttachedTimes */
650 deve->attach_count,
651 /* scsiAuthIntrOutCommands */
652 deve->total_cmds,
653 /* scsiAuthIntrReadMegaBytes */
654 (u32)(deve->read_bytes >> 20),
655 /* scsiAuthIntrWrittenMegaBytes */
656 (u32)(deve->write_bytes >> 20),
657 /* FIXME: scsiAuthIntrHSOutCommands */
658 0,
659 /* scsiAuthIntrLastCreation */
660 (u32)(((u32)deve->creation_time -
661 INITIAL_JIFFIES) * 100 / HZ),
662 /* FIXME: scsiAuthIntrRowStatus */
663 "Ready");
664 }
665 spin_unlock_irq(&se_nacl->device_list_lock);
666
667 spin_lock(&se_tpg->acl_node_lock);
668 atomic_dec(&se_nacl->mib_ref_count);
669 smp_mb__after_atomic_dec();
670 }
671 spin_unlock(&se_tpg->acl_node_lock);
672
673 return 0;
674}
675
676static const struct seq_operations scsi_auth_intr_seq_ops = {
677 .start = scsi_auth_intr_seq_start,
678 .next = scsi_auth_intr_seq_next,
679 .stop = scsi_auth_intr_seq_stop,
680 .show = scsi_auth_intr_seq_show
681};
682
683static int scsi_auth_intr_seq_open(struct inode *inode, struct file *file)
684{
685 return seq_open(file, &scsi_auth_intr_seq_ops);
686}
687
688static const struct file_operations scsi_auth_intr_seq_fops = {
689 .owner = THIS_MODULE,
690 .open = scsi_auth_intr_seq_open,
691 .read = seq_read,
692 .llseek = seq_lseek,
693 .release = seq_release,
694};
695
696/*
697 * SCSI Attached Initiator Port Table:
698 * It lists the SCSI Initiators attached to one of the local Target ports.
699 * Iterates through all active TPGs and use active sessions from each TPG
700 * to list the info fo this table.
701 */
702static void *scsi_att_intr_port_seq_start(struct seq_file *seq, loff_t *pos)
703{
704 spin_lock_bh(&se_global->se_tpg_lock);
705 return seq_list_start(&se_global->g_se_tpg_list, *pos);
706}
707
708static void *scsi_att_intr_port_seq_next(struct seq_file *seq, void *v,
709 loff_t *pos)
710{
711 return seq_list_next(v, &se_global->g_se_tpg_list, pos);
712}
713
714static void scsi_att_intr_port_seq_stop(struct seq_file *seq, void *v)
715{
716 spin_unlock_bh(&se_global->se_tpg_lock);
717}
718
719static int scsi_att_intr_port_seq_show(struct seq_file *seq, void *v)
720{
721 struct se_portal_group *se_tpg = list_entry(v, struct se_portal_group,
722 se_tpg_list);
723 struct se_dev_entry *deve;
724 struct se_lun *lun;
725 struct se_node_acl *se_nacl;
726 struct se_session *se_sess;
727 unsigned char buf[64];
728 int j;
729
730 if (list_is_first(&se_tpg->se_tpg_list,
731 &se_global->g_se_tpg_list))
732 seq_puts(seq, "inst dev port indx port_auth_indx port_name"
733 " port_ident\n");
734
735 if (!(se_tpg))
736 return 0;
737
738 spin_lock(&se_tpg->session_lock);
739 list_for_each_entry(se_sess, &se_tpg->tpg_sess_list, sess_list) {
740 if ((TPG_TFO(se_tpg)->sess_logged_in(se_sess)) ||
741 (!se_sess->se_node_acl) ||
742 (!se_sess->se_node_acl->device_list))
743 continue;
744
745 atomic_inc(&se_sess->mib_ref_count);
746 smp_mb__after_atomic_inc();
747 se_nacl = se_sess->se_node_acl;
748 atomic_inc(&se_nacl->mib_ref_count);
749 smp_mb__after_atomic_inc();
750 spin_unlock(&se_tpg->session_lock);
751
752 spin_lock_irq(&se_nacl->device_list_lock);
753 for (j = 0; j < TRANSPORT_MAX_LUNS_PER_TPG; j++) {
754 deve = &se_nacl->device_list[j];
755 if (!(deve->lun_flags &
756 TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) ||
757 (!deve->se_lun))
758 continue;
759
760 lun = deve->se_lun;
761 if (!lun->lun_se_dev)
762 continue;
763
764 memset(buf, 0, 64);
765 if (TPG_TFO(se_tpg)->sess_get_initiator_sid != NULL)
766 TPG_TFO(se_tpg)->sess_get_initiator_sid(
767 se_sess, (unsigned char *)&buf[0], 64);
768
769 seq_printf(seq, "%u %u %u %u %u %s+i+%s\n",
770 /* scsiInstIndex */
771 (TPG_TFO(se_tpg)->tpg_get_inst_index != NULL) ?
772 TPG_TFO(se_tpg)->tpg_get_inst_index(se_tpg) :
773 0,
774 /* scsiDeviceIndex */
775 lun->lun_se_dev->dev_index,
776 /* scsiPortIndex */
777 TPG_TFO(se_tpg)->tpg_get_tag(se_tpg),
778 /* scsiAttIntrPortIndex */
779 (TPG_TFO(se_tpg)->sess_get_index != NULL) ?
780 TPG_TFO(se_tpg)->sess_get_index(se_sess) :
781 0,
782 /* scsiAttIntrPortAuthIntrIdx */
783 se_nacl->acl_index,
784 /* scsiAttIntrPortName */
785 se_nacl->initiatorname[0] ?
786 se_nacl->initiatorname : NONE,
787 /* scsiAttIntrPortIdentifier */
788 buf);
789 }
790 spin_unlock_irq(&se_nacl->device_list_lock);
791
792 spin_lock(&se_tpg->session_lock);
793 atomic_dec(&se_nacl->mib_ref_count);
794 smp_mb__after_atomic_dec();
795 atomic_dec(&se_sess->mib_ref_count);
796 smp_mb__after_atomic_dec();
797 }
798 spin_unlock(&se_tpg->session_lock);
799
800 return 0;
801}
802
803static const struct seq_operations scsi_att_intr_port_seq_ops = {
804 .start = scsi_att_intr_port_seq_start,
805 .next = scsi_att_intr_port_seq_next,
806 .stop = scsi_att_intr_port_seq_stop,
807 .show = scsi_att_intr_port_seq_show
808};
809
810static int scsi_att_intr_port_seq_open(struct inode *inode, struct file *file)
811{
812 return seq_open(file, &scsi_att_intr_port_seq_ops);
813}
814
815static const struct file_operations scsi_att_intr_port_seq_fops = {
816 .owner = THIS_MODULE,
817 .open = scsi_att_intr_port_seq_open,
818 .read = seq_read,
819 .llseek = seq_lseek,
820 .release = seq_release,
821};
822
823/*
824 * SCSI Logical Unit Table
825 */
826static void *scsi_lu_seq_start(struct seq_file *seq, loff_t *pos)
827{
828 return locate_hba_start(seq, pos);
829}
830
831static void *scsi_lu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
832{
833 return locate_hba_next(seq, v, pos);
834}
835
836static void scsi_lu_seq_stop(struct seq_file *seq, void *v)
837{
838 locate_hba_stop(seq, v);
839}
840
841#define SCSI_LU_INDEX 1
842static int scsi_lu_seq_show(struct seq_file *seq, void *v)
843{
844 struct se_hba *hba;
845 struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
846 g_se_dev_list);
847 struct se_device *dev = se_dev->se_dev_ptr;
848 int j;
849 char str[28];
850
851 if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
852 seq_puts(seq, "inst dev indx LUN lu_name vend prod rev"
853 " dev_type status state-bit num_cmds read_mbytes"
854 " write_mbytes resets full_stat hs_num_cmds creation_time\n");
855
856 if (!(dev))
857 return 0;
858
859 hba = dev->se_hba;
860 if (!(hba)) {
861 /* Log error ? */
862 return 0;
863 }
864
865 /* Fix LU state, if we can read it from the device */
866 seq_printf(seq, "%u %u %u %llu %s", hba->hba_index,
867 dev->dev_index, SCSI_LU_INDEX,
868 (unsigned long long)0, /* FIXME: scsiLuDefaultLun */
869 (strlen(DEV_T10_WWN(dev)->unit_serial)) ?
870 /* scsiLuWwnName */
871 (char *)&DEV_T10_WWN(dev)->unit_serial[0] :
872 "None");
873
874 memcpy(&str[0], (void *)DEV_T10_WWN(dev), 28);
875 /* scsiLuVendorId */
876 for (j = 0; j < 8; j++)
877 str[j] = ISPRINT(DEV_T10_WWN(dev)->vendor[j]) ?
878 DEV_T10_WWN(dev)->vendor[j] : 0x20;
879 str[8] = 0;
880 seq_printf(seq, " %s", str);
881
882 /* scsiLuProductId */
883 for (j = 0; j < 16; j++)
884 str[j] = ISPRINT(DEV_T10_WWN(dev)->model[j]) ?
885 DEV_T10_WWN(dev)->model[j] : 0x20;
886 str[16] = 0;
887 seq_printf(seq, " %s", str);
888
889 /* scsiLuRevisionId */
890 for (j = 0; j < 4; j++)
891 str[j] = ISPRINT(DEV_T10_WWN(dev)->revision[j]) ?
892 DEV_T10_WWN(dev)->revision[j] : 0x20;
893 str[4] = 0;
894 seq_printf(seq, " %s", str);
895
896 seq_printf(seq, " %u %s %s %llu %u %u %u %u %u %u\n",
897 /* scsiLuPeripheralType */
898 TRANSPORT(dev)->get_device_type(dev),
899 (dev->dev_status == TRANSPORT_DEVICE_ACTIVATED) ?
900 "available" : "notavailable", /* scsiLuStatus */
901 "exposed", /* scsiLuState */
902 (unsigned long long)dev->num_cmds,
903 /* scsiLuReadMegaBytes */
904 (u32)(dev->read_bytes >> 20),
905 /* scsiLuWrittenMegaBytes */
906 (u32)(dev->write_bytes >> 20),
907 dev->num_resets, /* scsiLuInResets */
908 0, /* scsiLuOutTaskSetFullStatus */
909 0, /* scsiLuHSInCommands */
910 (u32)(((u32)dev->creation_time - INITIAL_JIFFIES) *
911 100 / HZ));
912
913 return 0;
914}
915
916static const struct seq_operations scsi_lu_seq_ops = {
917 .start = scsi_lu_seq_start,
918 .next = scsi_lu_seq_next,
919 .stop = scsi_lu_seq_stop,
920 .show = scsi_lu_seq_show
921};
922
923static int scsi_lu_seq_open(struct inode *inode, struct file *file)
924{
925 return seq_open(file, &scsi_lu_seq_ops);
926}
927
928static const struct file_operations scsi_lu_seq_fops = {
929 .owner = THIS_MODULE,
930 .open = scsi_lu_seq_open,
931 .read = seq_read,
932 .llseek = seq_lseek,
933 .release = seq_release,
934};
935
936/****************************************************************************/
937
938/*
939 * Remove proc fs entries
940 */
941void remove_scsi_target_mib(void)
942{
943 remove_proc_entry("scsi_target/mib/scsi_inst", NULL);
944 remove_proc_entry("scsi_target/mib/scsi_dev", NULL);
945 remove_proc_entry("scsi_target/mib/scsi_port", NULL);
946 remove_proc_entry("scsi_target/mib/scsi_transport", NULL);
947 remove_proc_entry("scsi_target/mib/scsi_tgt_dev", NULL);
948 remove_proc_entry("scsi_target/mib/scsi_tgt_port", NULL);
949 remove_proc_entry("scsi_target/mib/scsi_auth_intr", NULL);
950 remove_proc_entry("scsi_target/mib/scsi_att_intr_port", NULL);
951 remove_proc_entry("scsi_target/mib/scsi_lu", NULL);
952 remove_proc_entry("scsi_target/mib", NULL);
953}
954
955/*
956 * Create proc fs entries for the mib tables
957 */
958int init_scsi_target_mib(void)
959{
960 struct proc_dir_entry *dir_entry;
961 struct proc_dir_entry *scsi_inst_entry;
962 struct proc_dir_entry *scsi_dev_entry;
963 struct proc_dir_entry *scsi_port_entry;
964 struct proc_dir_entry *scsi_transport_entry;
965 struct proc_dir_entry *scsi_tgt_dev_entry;
966 struct proc_dir_entry *scsi_tgt_port_entry;
967 struct proc_dir_entry *scsi_auth_intr_entry;
968 struct proc_dir_entry *scsi_att_intr_port_entry;
969 struct proc_dir_entry *scsi_lu_entry;
970
971 dir_entry = proc_mkdir("scsi_target/mib", NULL);
972 if (!(dir_entry)) {
973 printk(KERN_ERR "proc_mkdir() failed.\n");
974 return -1;
975 }
976
977 scsi_inst_entry =
978 create_proc_entry("scsi_target/mib/scsi_inst", 0, NULL);
979 if (scsi_inst_entry)
980 scsi_inst_entry->proc_fops = &scsi_inst_seq_fops;
981 else
982 goto error;
983
984 scsi_dev_entry =
985 create_proc_entry("scsi_target/mib/scsi_dev", 0, NULL);
986 if (scsi_dev_entry)
987 scsi_dev_entry->proc_fops = &scsi_dev_seq_fops;
988 else
989 goto error;
990
991 scsi_port_entry =
992 create_proc_entry("scsi_target/mib/scsi_port", 0, NULL);
993 if (scsi_port_entry)
994 scsi_port_entry->proc_fops = &scsi_port_seq_fops;
995 else
996 goto error;
997
998 scsi_transport_entry =
999 create_proc_entry("scsi_target/mib/scsi_transport", 0, NULL);
1000 if (scsi_transport_entry)
1001 scsi_transport_entry->proc_fops = &scsi_transport_seq_fops;
1002 else
1003 goto error;
1004
1005 scsi_tgt_dev_entry =
1006 create_proc_entry("scsi_target/mib/scsi_tgt_dev", 0, NULL);
1007 if (scsi_tgt_dev_entry)
1008 scsi_tgt_dev_entry->proc_fops = &scsi_tgt_dev_seq_fops;
1009 else
1010 goto error;
1011
1012 scsi_tgt_port_entry =
1013 create_proc_entry("scsi_target/mib/scsi_tgt_port", 0, NULL);
1014 if (scsi_tgt_port_entry)
1015 scsi_tgt_port_entry->proc_fops = &scsi_tgt_port_seq_fops;
1016 else
1017 goto error;
1018
1019 scsi_auth_intr_entry =
1020 create_proc_entry("scsi_target/mib/scsi_auth_intr", 0, NULL);
1021 if (scsi_auth_intr_entry)
1022 scsi_auth_intr_entry->proc_fops = &scsi_auth_intr_seq_fops;
1023 else
1024 goto error;
1025
1026 scsi_att_intr_port_entry =
1027 create_proc_entry("scsi_target/mib/scsi_att_intr_port", 0, NULL);
1028 if (scsi_att_intr_port_entry)
1029 scsi_att_intr_port_entry->proc_fops =
1030 &scsi_att_intr_port_seq_fops;
1031 else
1032 goto error;
1033
1034 scsi_lu_entry = create_proc_entry("scsi_target/mib/scsi_lu", 0, NULL);
1035 if (scsi_lu_entry)
1036 scsi_lu_entry->proc_fops = &scsi_lu_seq_fops;
1037 else
1038 goto error;
1039
1040 return 0;
1041
1042error:
1043 printk(KERN_ERR "create_proc_entry() failed.\n");
1044 remove_scsi_target_mib();
1045 return -1;
1046}
1047
1048/*
1049 * Initialize the index table for allocating unique row indexes to various mib
1050 * tables
1051 */
1052void init_scsi_index_table(void)
1053{
1054 memset(&scsi_index_table, 0, sizeof(struct scsi_index_table));
1055 spin_lock_init(&scsi_index_table.lock);
1056}
1057
1058/*
1059 * Allocate a new row index for the entry type specified
1060 */
1061u32 scsi_get_new_index(scsi_index_t type)
1062{
1063 u32 new_index;
1064
1065 if ((type < 0) || (type >= SCSI_INDEX_TYPE_MAX)) {
1066 printk(KERN_ERR "Invalid index type %d\n", type);
1067 return -1;
1068 }
1069
1070 spin_lock(&scsi_index_table.lock);
1071 new_index = ++scsi_index_table.scsi_mib_index[type];
1072 if (new_index == 0)
1073 new_index = ++scsi_index_table.scsi_mib_index[type];
1074 spin_unlock(&scsi_index_table.lock);
1075
1076 return new_index;
1077}
1078EXPORT_SYMBOL(scsi_get_new_index);
diff --git a/drivers/target/target_core_mib.h b/drivers/target/target_core_mib.h
deleted file mode 100644
index 277204633850..000000000000
--- a/drivers/target/target_core_mib.h
+++ /dev/null
@@ -1,28 +0,0 @@
1#ifndef TARGET_CORE_MIB_H
2#define TARGET_CORE_MIB_H
3
4typedef enum {
5 SCSI_INST_INDEX,
6 SCSI_DEVICE_INDEX,
7 SCSI_AUTH_INTR_INDEX,
8 SCSI_INDEX_TYPE_MAX
9} scsi_index_t;
10
11struct scsi_index_table {
12 spinlock_t lock;
13 u32 scsi_mib_index[SCSI_INDEX_TYPE_MAX];
14} ____cacheline_aligned;
15
16/* SCSI Port stats */
17struct scsi_port_stats {
18 u64 cmd_pdus;
19 u64 tx_data_octets;
20 u64 rx_data_octets;
21} ____cacheline_aligned;
22
23extern int init_scsi_target_mib(void);
24extern void remove_scsi_target_mib(void);
25extern void init_scsi_index_table(void);
26extern u32 scsi_get_new_index(scsi_index_t);
27
28#endif /*** TARGET_CORE_MIB_H ***/
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 742d24609a9b..f2a08477a68c 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -462,8 +462,8 @@ static struct se_device *pscsi_create_type_disk(
462 */ 462 */
463 bd = blkdev_get_by_path(se_dev->se_dev_udev_path, 463 bd = blkdev_get_by_path(se_dev->se_dev_udev_path,
464 FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv); 464 FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv);
465 if (!(bd)) { 465 if (IS_ERR(bd)) {
466 printk("pSCSI: blkdev_get_by_path() failed\n"); 466 printk(KERN_ERR "pSCSI: blkdev_get_by_path() failed\n");
467 scsi_device_put(sd); 467 scsi_device_put(sd);
468 return NULL; 468 return NULL;
469 } 469 }
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 158cecbec718..4a109835e420 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -282,6 +282,9 @@ int core_tmr_lun_reset(
282 282
283 atomic_set(&task->task_active, 0); 283 atomic_set(&task->task_active, 0);
284 atomic_set(&task->task_stop, 0); 284 atomic_set(&task->task_stop, 0);
285 } else {
286 if (atomic_read(&task->task_execute_queue) != 0)
287 transport_remove_task_from_execute_queue(task, dev);
285 } 288 }
286 __transport_stop_task_timer(task, &flags); 289 __transport_stop_task_timer(task, &flags);
287 290
@@ -301,6 +304,7 @@ int core_tmr_lun_reset(
301 DEBUG_LR("LUN_RESET: got t_transport_active = 1 for" 304 DEBUG_LR("LUN_RESET: got t_transport_active = 1 for"
302 " task: %p, t_fe_count: %d dev: %p\n", task, 305 " task: %p, t_fe_count: %d dev: %p\n", task,
303 fe_count, dev); 306 fe_count, dev);
307 atomic_set(&T_TASK(cmd)->t_transport_aborted, 1);
304 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, 308 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock,
305 flags); 309 flags);
306 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count); 310 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
@@ -310,6 +314,7 @@ int core_tmr_lun_reset(
310 } 314 }
311 DEBUG_LR("LUN_RESET: Got t_transport_active = 0 for task: %p," 315 DEBUG_LR("LUN_RESET: Got t_transport_active = 0 for task: %p,"
312 " t_fe_count: %d dev: %p\n", task, fe_count, dev); 316 " t_fe_count: %d dev: %p\n", task, fe_count, dev);
317 atomic_set(&T_TASK(cmd)->t_transport_aborted, 1);
313 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, flags); 318 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, flags);
314 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count); 319 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
315 320
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index abfa81a57115..c26f67467623 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -275,7 +275,6 @@ struct se_node_acl *core_tpg_check_initiator_node_acl(
275 spin_lock_init(&acl->device_list_lock); 275 spin_lock_init(&acl->device_list_lock);
276 spin_lock_init(&acl->nacl_sess_lock); 276 spin_lock_init(&acl->nacl_sess_lock);
277 atomic_set(&acl->acl_pr_ref_count, 0); 277 atomic_set(&acl->acl_pr_ref_count, 0);
278 atomic_set(&acl->mib_ref_count, 0);
279 acl->queue_depth = TPG_TFO(tpg)->tpg_get_default_depth(tpg); 278 acl->queue_depth = TPG_TFO(tpg)->tpg_get_default_depth(tpg);
280 snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname); 279 snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname);
281 acl->se_tpg = tpg; 280 acl->se_tpg = tpg;
@@ -318,12 +317,6 @@ void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *nacl)
318 cpu_relax(); 317 cpu_relax();
319} 318}
320 319
321void core_tpg_wait_for_mib_ref(struct se_node_acl *nacl)
322{
323 while (atomic_read(&nacl->mib_ref_count) != 0)
324 cpu_relax();
325}
326
327void core_tpg_clear_object_luns(struct se_portal_group *tpg) 320void core_tpg_clear_object_luns(struct se_portal_group *tpg)
328{ 321{
329 int i, ret; 322 int i, ret;
@@ -480,7 +473,6 @@ int core_tpg_del_initiator_node_acl(
480 spin_unlock_bh(&tpg->session_lock); 473 spin_unlock_bh(&tpg->session_lock);
481 474
482 core_tpg_wait_for_nacl_pr_ref(acl); 475 core_tpg_wait_for_nacl_pr_ref(acl);
483 core_tpg_wait_for_mib_ref(acl);
484 core_clear_initiator_node_from_tpg(acl, tpg); 476 core_clear_initiator_node_from_tpg(acl, tpg);
485 core_free_device_list_for_node(acl, tpg); 477 core_free_device_list_for_node(acl, tpg);
486 478
@@ -701,6 +693,8 @@ EXPORT_SYMBOL(core_tpg_register);
701 693
702int core_tpg_deregister(struct se_portal_group *se_tpg) 694int core_tpg_deregister(struct se_portal_group *se_tpg)
703{ 695{
696 struct se_node_acl *nacl, *nacl_tmp;
697
704 printk(KERN_INFO "TARGET_CORE[%s]: Deallocating %s struct se_portal_group" 698 printk(KERN_INFO "TARGET_CORE[%s]: Deallocating %s struct se_portal_group"
705 " for endpoint: %s Portal Tag %u\n", 699 " for endpoint: %s Portal Tag %u\n",
706 (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) ? 700 (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) ?
@@ -714,6 +708,25 @@ int core_tpg_deregister(struct se_portal_group *se_tpg)
714 708
715 while (atomic_read(&se_tpg->tpg_pr_ref_count) != 0) 709 while (atomic_read(&se_tpg->tpg_pr_ref_count) != 0)
716 cpu_relax(); 710 cpu_relax();
711 /*
712 * Release any remaining demo-mode generated se_node_acl that have
713 * not been released because of TFO->tpg_check_demo_mode_cache() == 1
714 * in transport_deregister_session().
715 */
716 spin_lock_bh(&se_tpg->acl_node_lock);
717 list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list,
718 acl_list) {
719 list_del(&nacl->acl_list);
720 se_tpg->num_node_acls--;
721 spin_unlock_bh(&se_tpg->acl_node_lock);
722
723 core_tpg_wait_for_nacl_pr_ref(nacl);
724 core_free_device_list_for_node(nacl, se_tpg);
725 TPG_TFO(se_tpg)->tpg_release_fabric_acl(se_tpg, nacl);
726
727 spin_lock_bh(&se_tpg->acl_node_lock);
728 }
729 spin_unlock_bh(&se_tpg->acl_node_lock);
717 730
718 if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) 731 if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL)
719 core_tpg_release_virtual_lun0(se_tpg); 732 core_tpg_release_virtual_lun0(se_tpg);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 28b6292ff298..4bbf6c147f89 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -379,6 +379,40 @@ void release_se_global(void)
379 se_global = NULL; 379 se_global = NULL;
380} 380}
381 381
382/* SCSI statistics table index */
383static struct scsi_index_table scsi_index_table;
384
385/*
386 * Initialize the index table for allocating unique row indexes to various mib
387 * tables.
388 */
389void init_scsi_index_table(void)
390{
391 memset(&scsi_index_table, 0, sizeof(struct scsi_index_table));
392 spin_lock_init(&scsi_index_table.lock);
393}
394
395/*
396 * Allocate a new row index for the entry type specified
397 */
398u32 scsi_get_new_index(scsi_index_t type)
399{
400 u32 new_index;
401
402 if ((type < 0) || (type >= SCSI_INDEX_TYPE_MAX)) {
403 printk(KERN_ERR "Invalid index type %d\n", type);
404 return -EINVAL;
405 }
406
407 spin_lock(&scsi_index_table.lock);
408 new_index = ++scsi_index_table.scsi_mib_index[type];
409 if (new_index == 0)
410 new_index = ++scsi_index_table.scsi_mib_index[type];
411 spin_unlock(&scsi_index_table.lock);
412
413 return new_index;
414}
415
382void transport_init_queue_obj(struct se_queue_obj *qobj) 416void transport_init_queue_obj(struct se_queue_obj *qobj)
383{ 417{
384 atomic_set(&qobj->queue_cnt, 0); 418 atomic_set(&qobj->queue_cnt, 0);
@@ -437,7 +471,6 @@ struct se_session *transport_init_session(void)
437 } 471 }
438 INIT_LIST_HEAD(&se_sess->sess_list); 472 INIT_LIST_HEAD(&se_sess->sess_list);
439 INIT_LIST_HEAD(&se_sess->sess_acl_list); 473 INIT_LIST_HEAD(&se_sess->sess_acl_list);
440 atomic_set(&se_sess->mib_ref_count, 0);
441 474
442 return se_sess; 475 return se_sess;
443} 476}
@@ -546,12 +579,6 @@ void transport_deregister_session(struct se_session *se_sess)
546 transport_free_session(se_sess); 579 transport_free_session(se_sess);
547 return; 580 return;
548 } 581 }
549 /*
550 * Wait for possible reference in drivers/target/target_core_mib.c:
551 * scsi_att_intr_port_seq_show()
552 */
553 while (atomic_read(&se_sess->mib_ref_count) != 0)
554 cpu_relax();
555 582
556 spin_lock_bh(&se_tpg->session_lock); 583 spin_lock_bh(&se_tpg->session_lock);
557 list_del(&se_sess->sess_list); 584 list_del(&se_sess->sess_list);
@@ -574,7 +601,6 @@ void transport_deregister_session(struct se_session *se_sess)
574 spin_unlock_bh(&se_tpg->acl_node_lock); 601 spin_unlock_bh(&se_tpg->acl_node_lock);
575 602
576 core_tpg_wait_for_nacl_pr_ref(se_nacl); 603 core_tpg_wait_for_nacl_pr_ref(se_nacl);
577 core_tpg_wait_for_mib_ref(se_nacl);
578 core_free_device_list_for_node(se_nacl, se_tpg); 604 core_free_device_list_for_node(se_nacl, se_tpg);
579 TPG_TFO(se_tpg)->tpg_release_fabric_acl(se_tpg, 605 TPG_TFO(se_tpg)->tpg_release_fabric_acl(se_tpg,
580 se_nacl); 606 se_nacl);
@@ -1181,7 +1207,7 @@ transport_get_task_from_execute_queue(struct se_device *dev)
1181 * 1207 *
1182 * 1208 *
1183 */ 1209 */
1184static void transport_remove_task_from_execute_queue( 1210void transport_remove_task_from_execute_queue(
1185 struct se_task *task, 1211 struct se_task *task,
1186 struct se_device *dev) 1212 struct se_device *dev)
1187{ 1213{
@@ -4827,6 +4853,8 @@ static int transport_do_se_mem_map(
4827 4853
4828 return ret; 4854 return ret;
4829 } 4855 }
4856
4857 BUG_ON(list_empty(se_mem_list));
4830 /* 4858 /*
4831 * This is the normal path for all normal non BIDI and BIDI-COMMAND 4859 * This is the normal path for all normal non BIDI and BIDI-COMMAND
4832 * WRITE payloads.. If we need to do BIDI READ passthrough for 4860 * WRITE payloads.. If we need to do BIDI READ passthrough for
@@ -5008,7 +5036,9 @@ transport_map_control_cmd_to_task(struct se_cmd *cmd)
5008 struct se_mem *se_mem = NULL, *se_mem_lout = NULL; 5036 struct se_mem *se_mem = NULL, *se_mem_lout = NULL;
5009 u32 se_mem_cnt = 0, task_offset = 0; 5037 u32 se_mem_cnt = 0, task_offset = 0;
5010 5038
5011 BUG_ON(list_empty(cmd->t_task->t_mem_list)); 5039 if (!list_empty(T_TASK(cmd)->t_mem_list))
5040 se_mem = list_entry(T_TASK(cmd)->t_mem_list->next,
5041 struct se_mem, se_list);
5012 5042
5013 ret = transport_do_se_mem_map(dev, task, 5043 ret = transport_do_se_mem_map(dev, task,
5014 cmd->t_task->t_mem_list, NULL, se_mem, 5044 cmd->t_task->t_mem_list, NULL, se_mem,
@@ -5519,7 +5549,8 @@ static void transport_generic_wait_for_tasks(
5519 5549
5520 atomic_set(&T_TASK(cmd)->transport_lun_stop, 0); 5550 atomic_set(&T_TASK(cmd)->transport_lun_stop, 0);
5521 } 5551 }
5522 if (!atomic_read(&T_TASK(cmd)->t_transport_active)) 5552 if (!atomic_read(&T_TASK(cmd)->t_transport_active) ||
5553 atomic_read(&T_TASK(cmd)->t_transport_aborted))
5523 goto remove; 5554 goto remove;
5524 5555
5525 atomic_set(&T_TASK(cmd)->t_transport_stop, 1); 5556 atomic_set(&T_TASK(cmd)->t_transport_stop, 1);
@@ -5926,6 +5957,9 @@ static void transport_processing_shutdown(struct se_device *dev)
5926 5957
5927 atomic_set(&task->task_active, 0); 5958 atomic_set(&task->task_active, 0);
5928 atomic_set(&task->task_stop, 0); 5959 atomic_set(&task->task_stop, 0);
5960 } else {
5961 if (atomic_read(&task->task_execute_queue) != 0)
5962 transport_remove_task_from_execute_queue(task, dev);
5929 } 5963 }
5930 __transport_stop_task_timer(task, &flags); 5964 __transport_stop_task_timer(task, &flags);
5931 5965
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index f7a5dba3ca23..bf7c687519ef 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -4,7 +4,6 @@
4 4
5menuconfig THERMAL 5menuconfig THERMAL
6 tristate "Generic Thermal sysfs driver" 6 tristate "Generic Thermal sysfs driver"
7 depends on NET
8 help 7 help
9 Generic Thermal Sysfs driver offers a generic mechanism for 8 Generic Thermal Sysfs driver offers a generic mechanism for
10 thermal management. Usually it's made up of one or more thermal 9 thermal management. Usually it's made up of one or more thermal
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 7d0e63c79280..713b7ea4a607 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -62,20 +62,6 @@ static DEFINE_MUTEX(thermal_list_lock);
62 62
63static unsigned int thermal_event_seqnum; 63static unsigned int thermal_event_seqnum;
64 64
65static struct genl_family thermal_event_genl_family = {
66 .id = GENL_ID_GENERATE,
67 .name = THERMAL_GENL_FAMILY_NAME,
68 .version = THERMAL_GENL_VERSION,
69 .maxattr = THERMAL_GENL_ATTR_MAX,
70};
71
72static struct genl_multicast_group thermal_event_mcgrp = {
73 .name = THERMAL_GENL_MCAST_GROUP_NAME,
74};
75
76static int genetlink_init(void);
77static void genetlink_exit(void);
78
79static int get_idr(struct idr *idr, struct mutex *lock, int *id) 65static int get_idr(struct idr *idr, struct mutex *lock, int *id)
80{ 66{
81 int err; 67 int err;
@@ -1225,6 +1211,18 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
1225 1211
1226EXPORT_SYMBOL(thermal_zone_device_unregister); 1212EXPORT_SYMBOL(thermal_zone_device_unregister);
1227 1213
1214#ifdef CONFIG_NET
1215static struct genl_family thermal_event_genl_family = {
1216 .id = GENL_ID_GENERATE,
1217 .name = THERMAL_GENL_FAMILY_NAME,
1218 .version = THERMAL_GENL_VERSION,
1219 .maxattr = THERMAL_GENL_ATTR_MAX,
1220};
1221
1222static struct genl_multicast_group thermal_event_mcgrp = {
1223 .name = THERMAL_GENL_MCAST_GROUP_NAME,
1224};
1225
1228int generate_netlink_event(u32 orig, enum events event) 1226int generate_netlink_event(u32 orig, enum events event)
1229{ 1227{
1230 struct sk_buff *skb; 1228 struct sk_buff *skb;
@@ -1301,6 +1299,15 @@ static int genetlink_init(void)
1301 return result; 1299 return result;
1302} 1300}
1303 1301
1302static void genetlink_exit(void)
1303{
1304 genl_unregister_family(&thermal_event_genl_family);
1305}
1306#else /* !CONFIG_NET */
1307static inline int genetlink_init(void) { return 0; }
1308static inline void genetlink_exit(void) {}
1309#endif /* !CONFIG_NET */
1310
1304static int __init thermal_init(void) 1311static int __init thermal_init(void)
1305{ 1312{
1306 int result = 0; 1313 int result = 0;
@@ -1316,11 +1323,6 @@ static int __init thermal_init(void)
1316 return result; 1323 return result;
1317} 1324}
1318 1325
1319static void genetlink_exit(void)
1320{
1321 genl_unregister_family(&thermal_event_genl_family);
1322}
1323
1324static void __exit thermal_exit(void) 1326static void __exit thermal_exit(void)
1325{ 1327{
1326 class_unregister(&thermal_class); 1328 class_unregister(&thermal_class);
diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c
index beb1afa27d8d..7b951adac54b 100644
--- a/drivers/tty/serial/max3100.c
+++ b/drivers/tty/serial/max3100.c
@@ -601,7 +601,7 @@ static int max3100_startup(struct uart_port *port)
601 s->rts = 0; 601 s->rts = 0;
602 602
603 sprintf(b, "max3100-%d", s->minor); 603 sprintf(b, "max3100-%d", s->minor);
604 s->workqueue = create_freezeable_workqueue(b); 604 s->workqueue = create_freezable_workqueue(b);
605 if (!s->workqueue) { 605 if (!s->workqueue) {
606 dev_warn(&s->spi->dev, "cannot create workqueue\n"); 606 dev_warn(&s->spi->dev, "cannot create workqueue\n");
607 return -EBUSY; 607 return -EBUSY;
diff --git a/drivers/tty/serial/max3107.c b/drivers/tty/serial/max3107.c
index 910870edf708..750b4f627315 100644
--- a/drivers/tty/serial/max3107.c
+++ b/drivers/tty/serial/max3107.c
@@ -833,7 +833,7 @@ static int max3107_startup(struct uart_port *port)
833 struct max3107_port *s = container_of(port, struct max3107_port, port); 833 struct max3107_port *s = container_of(port, struct max3107_port, port);
834 834
835 /* Initialize work queue */ 835 /* Initialize work queue */
836 s->workqueue = create_freezeable_workqueue("max3107"); 836 s->workqueue = create_freezable_workqueue("max3107");
837 if (!s->workqueue) { 837 if (!s->workqueue) {
838 dev_err(&s->spi->dev, "Workqueue creation failed\n"); 838 dev_err(&s->spi->dev, "Workqueue creation failed\n");
839 return -EBUSY; 839 return -EBUSY;
diff --git a/drivers/tty/serial/serial_cs.c b/drivers/tty/serial/serial_cs.c
index 93760b2ea172..1ef4df9bf7e4 100644
--- a/drivers/tty/serial/serial_cs.c
+++ b/drivers/tty/serial/serial_cs.c
@@ -712,6 +712,7 @@ static struct pcmcia_device_id serial_ids[] = {
712 PCMCIA_PFC_DEVICE_PROD_ID12(1, "Xircom", "CreditCard Ethernet+Modem II", 0x2e3ee845, 0xeca401bf), 712 PCMCIA_PFC_DEVICE_PROD_ID12(1, "Xircom", "CreditCard Ethernet+Modem II", 0x2e3ee845, 0xeca401bf),
713 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0e01), 713 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0e01),
714 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0a05), 714 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0a05),
715 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0b05),
715 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x1101), 716 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x1101),
716 PCMCIA_MFC_DEVICE_MANF_CARD(0, 0x0104, 0x0070), 717 PCMCIA_MFC_DEVICE_MANF_CARD(0, 0x0104, 0x0070),
717 PCMCIA_MFC_DEVICE_MANF_CARD(1, 0x0101, 0x0562), 718 PCMCIA_MFC_DEVICE_MANF_CARD(1, 0x0101, 0x0562),
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index f71e8e307e0f..64a035ba2eab 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -335,7 +335,7 @@ void usb_hcd_pci_shutdown(struct pci_dev *dev)
335} 335}
336EXPORT_SYMBOL_GPL(usb_hcd_pci_shutdown); 336EXPORT_SYMBOL_GPL(usb_hcd_pci_shutdown);
337 337
338#ifdef CONFIG_PM_OPS 338#ifdef CONFIG_PM
339 339
340#ifdef CONFIG_PPC_PMAC 340#ifdef CONFIG_PPC_PMAC
341static void powermac_set_asic(struct pci_dev *pci_dev, int enable) 341static void powermac_set_asic(struct pci_dev *pci_dev, int enable)
@@ -580,4 +580,4 @@ const struct dev_pm_ops usb_hcd_pci_pm_ops = {
580}; 580};
581EXPORT_SYMBOL_GPL(usb_hcd_pci_pm_ops); 581EXPORT_SYMBOL_GPL(usb_hcd_pci_pm_ops);
582 582
583#endif /* CONFIG_PM_OPS */ 583#endif /* CONFIG_PM */
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d041c6826e43..19d3435e6140 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1465,6 +1465,7 @@ void usb_set_device_state(struct usb_device *udev,
1465 enum usb_device_state new_state) 1465 enum usb_device_state new_state)
1466{ 1466{
1467 unsigned long flags; 1467 unsigned long flags;
1468 int wakeup = -1;
1468 1469
1469 spin_lock_irqsave(&device_state_lock, flags); 1470 spin_lock_irqsave(&device_state_lock, flags);
1470 if (udev->state == USB_STATE_NOTATTACHED) 1471 if (udev->state == USB_STATE_NOTATTACHED)
@@ -1479,11 +1480,10 @@ void usb_set_device_state(struct usb_device *udev,
1479 || new_state == USB_STATE_SUSPENDED) 1480 || new_state == USB_STATE_SUSPENDED)
1480 ; /* No change to wakeup settings */ 1481 ; /* No change to wakeup settings */
1481 else if (new_state == USB_STATE_CONFIGURED) 1482 else if (new_state == USB_STATE_CONFIGURED)
1482 device_set_wakeup_capable(&udev->dev, 1483 wakeup = udev->actconfig->desc.bmAttributes
1483 (udev->actconfig->desc.bmAttributes 1484 & USB_CONFIG_ATT_WAKEUP;
1484 & USB_CONFIG_ATT_WAKEUP));
1485 else 1485 else
1486 device_set_wakeup_capable(&udev->dev, 0); 1486 wakeup = 0;
1487 } 1487 }
1488 if (udev->state == USB_STATE_SUSPENDED && 1488 if (udev->state == USB_STATE_SUSPENDED &&
1489 new_state != USB_STATE_SUSPENDED) 1489 new_state != USB_STATE_SUSPENDED)
@@ -1495,6 +1495,8 @@ void usb_set_device_state(struct usb_device *udev,
1495 } else 1495 } else
1496 recursively_mark_NOTATTACHED(udev); 1496 recursively_mark_NOTATTACHED(udev);
1497 spin_unlock_irqrestore(&device_state_lock, flags); 1497 spin_unlock_irqrestore(&device_state_lock, flags);
1498 if (wakeup >= 0)
1499 device_set_wakeup_capable(&udev->dev, wakeup);
1498} 1500}
1499EXPORT_SYMBOL_GPL(usb_set_device_state); 1501EXPORT_SYMBOL_GPL(usb_set_device_state);
1500 1502
@@ -2681,17 +2683,13 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
2681 2683
2682 mutex_lock(&usb_address0_mutex); 2684 mutex_lock(&usb_address0_mutex);
2683 2685
2684 if (!udev->config && oldspeed == USB_SPEED_SUPER) { 2686 /* Reset the device; full speed may morph to high speed */
2685 /* Don't reset USB 3.0 devices during an initial setup */ 2687 /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
2686 usb_set_device_state(udev, USB_STATE_DEFAULT); 2688 retval = hub_port_reset(hub, port1, udev, delay);
2687 } else { 2689 if (retval < 0) /* error or disconnect */
2688 /* Reset the device; full speed may morph to high speed */ 2690 goto fail;
2689 /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */ 2691 /* success, speed is known */
2690 retval = hub_port_reset(hub, port1, udev, delay); 2692
2691 if (retval < 0) /* error or disconnect */
2692 goto fail;
2693 /* success, speed is known */
2694 }
2695 retval = -ENODEV; 2693 retval = -ENODEV;
2696 2694
2697 if (oldspeed != USB_SPEED_UNKNOWN && oldspeed != udev->speed) { 2695 if (oldspeed != USB_SPEED_UNKNOWN && oldspeed != udev->speed) {
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 44c595432d6f..81ce6a8e1d94 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -48,6 +48,10 @@ static const struct usb_device_id usb_quirk_list[] = {
48 { USB_DEVICE(0x04b4, 0x0526), .driver_info = 48 { USB_DEVICE(0x04b4, 0x0526), .driver_info =
49 USB_QUIRK_CONFIG_INTF_STRINGS }, 49 USB_QUIRK_CONFIG_INTF_STRINGS },
50 50
51 /* Samsung Android phone modem - ID conflict with SPH-I500 */
52 { USB_DEVICE(0x04e8, 0x6601), .driver_info =
53 USB_QUIRK_CONFIG_INTF_STRINGS },
54
51 /* Roland SC-8820 */ 55 /* Roland SC-8820 */
52 { USB_DEVICE(0x0582, 0x0007), .driver_info = USB_QUIRK_RESET_RESUME }, 56 { USB_DEVICE(0x0582, 0x0007), .driver_info = USB_QUIRK_RESET_RESUME },
53 57
@@ -68,6 +72,10 @@ static const struct usb_device_id usb_quirk_list[] = {
68 /* M-Systems Flash Disk Pioneers */ 72 /* M-Systems Flash Disk Pioneers */
69 { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, 73 { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME },
70 74
75 /* Keytouch QWERTY Panel keyboard */
76 { USB_DEVICE(0x0926, 0x3333), .driver_info =
77 USB_QUIRK_CONFIG_INTF_STRINGS },
78
71 /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */ 79 /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */
72 { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF }, 80 { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF },
73 81
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index 3c6e1a058745..5e1495097ec3 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -346,14 +346,19 @@ static void pn_rx_complete(struct usb_ep *ep, struct usb_request *req)
346 346
347 if (unlikely(!skb)) 347 if (unlikely(!skb))
348 break; 348 break;
349 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, 0,
350 req->actual);
351 page = NULL;
352 349
353 if (req->actual < req->length) { /* Last fragment */ 350 if (skb->len == 0) { /* First fragment */
354 skb->protocol = htons(ETH_P_PHONET); 351 skb->protocol = htons(ETH_P_PHONET);
355 skb_reset_mac_header(skb); 352 skb_reset_mac_header(skb);
356 pskb_pull(skb, 1); 353 /* Can't use pskb_pull() on page in IRQ */
354 memcpy(skb_put(skb, 1), page_address(page), 1);
355 }
356
357 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
358 skb->len == 0, req->actual);
359 page = NULL;
360
361 if (req->actual < req->length) { /* Last fragment */
357 skb->dev = dev; 362 skb->dev = dev;
358 dev->stats.rx_packets++; 363 dev->stats.rx_packets++;
359 dev->stats.rx_bytes += skb->len; 364 dev->stats.rx_bytes += skb->len;
diff --git a/drivers/usb/host/ehci-xilinx-of.c b/drivers/usb/host/ehci-xilinx-of.c
index e8f4f36fdf0b..a6f21b891f68 100644
--- a/drivers/usb/host/ehci-xilinx-of.c
+++ b/drivers/usb/host/ehci-xilinx-of.c
@@ -29,6 +29,7 @@
29 29
30#include <linux/of.h> 30#include <linux/of.h>
31#include <linux/of_platform.h> 31#include <linux/of_platform.h>
32#include <linux/of_address.h>
32 33
33/** 34/**
34 * ehci_xilinx_of_setup - Initialize the device for ehci_reset() 35 * ehci_xilinx_of_setup - Initialize the device for ehci_reset()
diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c
index fcbf4abbf381..0231814a97a5 100644
--- a/drivers/usb/host/xhci-dbg.c
+++ b/drivers/usb/host/xhci-dbg.c
@@ -169,9 +169,10 @@ static void xhci_print_ports(struct xhci_hcd *xhci)
169 } 169 }
170} 170}
171 171
172void xhci_print_ir_set(struct xhci_hcd *xhci, struct xhci_intr_reg *ir_set, int set_num) 172void xhci_print_ir_set(struct xhci_hcd *xhci, int set_num)
173{ 173{
174 void *addr; 174 struct xhci_intr_reg __iomem *ir_set = &xhci->run_regs->ir_set[set_num];
175 void __iomem *addr;
175 u32 temp; 176 u32 temp;
176 u64 temp_64; 177 u64 temp_64;
177 178
@@ -449,7 +450,7 @@ char *xhci_get_slot_state(struct xhci_hcd *xhci,
449 } 450 }
450} 451}
451 452
452void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx) 453static void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx)
453{ 454{
454 /* Fields are 32 bits wide, DMA addresses are in bytes */ 455 /* Fields are 32 bits wide, DMA addresses are in bytes */
455 int field_size = 32 / 8; 456 int field_size = 32 / 8;
@@ -488,7 +489,7 @@ void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx)
488 dbg_rsvd64(xhci, (u64 *)slot_ctx, dma); 489 dbg_rsvd64(xhci, (u64 *)slot_ctx, dma);
489} 490}
490 491
491void xhci_dbg_ep_ctx(struct xhci_hcd *xhci, 492static void xhci_dbg_ep_ctx(struct xhci_hcd *xhci,
492 struct xhci_container_ctx *ctx, 493 struct xhci_container_ctx *ctx,
493 unsigned int last_ep) 494 unsigned int last_ep)
494{ 495{
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 1d0f45f0e7a6..a9534396e85b 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -307,7 +307,7 @@ struct xhci_ep_ctx *xhci_get_ep_ctx(struct xhci_hcd *xhci,
307 307
308/***************** Streams structures manipulation *************************/ 308/***************** Streams structures manipulation *************************/
309 309
310void xhci_free_stream_ctx(struct xhci_hcd *xhci, 310static void xhci_free_stream_ctx(struct xhci_hcd *xhci,
311 unsigned int num_stream_ctxs, 311 unsigned int num_stream_ctxs,
312 struct xhci_stream_ctx *stream_ctx, dma_addr_t dma) 312 struct xhci_stream_ctx *stream_ctx, dma_addr_t dma)
313{ 313{
@@ -335,7 +335,7 @@ void xhci_free_stream_ctx(struct xhci_hcd *xhci,
335 * The stream context array must be a power of 2, and can be as small as 335 * The stream context array must be a power of 2, and can be as small as
336 * 64 bytes or as large as 1MB. 336 * 64 bytes or as large as 1MB.
337 */ 337 */
338struct xhci_stream_ctx *xhci_alloc_stream_ctx(struct xhci_hcd *xhci, 338static struct xhci_stream_ctx *xhci_alloc_stream_ctx(struct xhci_hcd *xhci,
339 unsigned int num_stream_ctxs, dma_addr_t *dma, 339 unsigned int num_stream_ctxs, dma_addr_t *dma,
340 gfp_t mem_flags) 340 gfp_t mem_flags)
341{ 341{
@@ -1900,11 +1900,11 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
1900 val &= DBOFF_MASK; 1900 val &= DBOFF_MASK;
1901 xhci_dbg(xhci, "// Doorbell array is located at offset 0x%x" 1901 xhci_dbg(xhci, "// Doorbell array is located at offset 0x%x"
1902 " from cap regs base addr\n", val); 1902 " from cap regs base addr\n", val);
1903 xhci->dba = (void *) xhci->cap_regs + val; 1903 xhci->dba = (void __iomem *) xhci->cap_regs + val;
1904 xhci_dbg_regs(xhci); 1904 xhci_dbg_regs(xhci);
1905 xhci_print_run_regs(xhci); 1905 xhci_print_run_regs(xhci);
1906 /* Set ir_set to interrupt register set 0 */ 1906 /* Set ir_set to interrupt register set 0 */
1907 xhci->ir_set = (void *) xhci->run_regs->ir_set; 1907 xhci->ir_set = &xhci->run_regs->ir_set[0];
1908 1908
1909 /* 1909 /*
1910 * Event ring setup: Allocate a normal ring, but also setup 1910 * Event ring setup: Allocate a normal ring, but also setup
@@ -1961,7 +1961,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
1961 /* Set the event ring dequeue address */ 1961 /* Set the event ring dequeue address */
1962 xhci_set_hc_event_deq(xhci); 1962 xhci_set_hc_event_deq(xhci);
1963 xhci_dbg(xhci, "Wrote ERST address to ir_set 0.\n"); 1963 xhci_dbg(xhci, "Wrote ERST address to ir_set 0.\n");
1964 xhci_print_ir_set(xhci, xhci->ir_set, 0); 1964 xhci_print_ir_set(xhci, 0);
1965 1965
1966 /* 1966 /*
1967 * XXX: Might need to set the Interrupter Moderation Register to 1967 * XXX: Might need to set the Interrupter Moderation Register to
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 3e8211c1ce5a..3289bf4832c9 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -474,8 +474,11 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
474 state->new_deq_seg = find_trb_seg(cur_td->start_seg, 474 state->new_deq_seg = find_trb_seg(cur_td->start_seg,
475 dev->eps[ep_index].stopped_trb, 475 dev->eps[ep_index].stopped_trb,
476 &state->new_cycle_state); 476 &state->new_cycle_state);
477 if (!state->new_deq_seg) 477 if (!state->new_deq_seg) {
478 BUG(); 478 WARN_ON(1);
479 return;
480 }
481
479 /* Dig out the cycle state saved by the xHC during the stop ep cmd */ 482 /* Dig out the cycle state saved by the xHC during the stop ep cmd */
480 xhci_dbg(xhci, "Finding endpoint context\n"); 483 xhci_dbg(xhci, "Finding endpoint context\n");
481 ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index); 484 ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index);
@@ -486,8 +489,10 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
486 state->new_deq_seg = find_trb_seg(state->new_deq_seg, 489 state->new_deq_seg = find_trb_seg(state->new_deq_seg,
487 state->new_deq_ptr, 490 state->new_deq_ptr,
488 &state->new_cycle_state); 491 &state->new_cycle_state);
489 if (!state->new_deq_seg) 492 if (!state->new_deq_seg) {
490 BUG(); 493 WARN_ON(1);
494 return;
495 }
491 496
492 trb = &state->new_deq_ptr->generic; 497 trb = &state->new_deq_ptr->generic;
493 if ((trb->field[3] & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK) && 498 if ((trb->field[3] & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK) &&
@@ -2363,12 +2368,13 @@ static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb *urb)
2363 2368
2364 /* Scatter gather list entries may cross 64KB boundaries */ 2369 /* Scatter gather list entries may cross 64KB boundaries */
2365 running_total = TRB_MAX_BUFF_SIZE - 2370 running_total = TRB_MAX_BUFF_SIZE -
2366 (sg_dma_address(sg) & ((1 << TRB_MAX_BUFF_SHIFT) - 1)); 2371 (sg_dma_address(sg) & (TRB_MAX_BUFF_SIZE - 1));
2372 running_total &= TRB_MAX_BUFF_SIZE - 1;
2367 if (running_total != 0) 2373 if (running_total != 0)
2368 num_trbs++; 2374 num_trbs++;
2369 2375
2370 /* How many more 64KB chunks to transfer, how many more TRBs? */ 2376 /* How many more 64KB chunks to transfer, how many more TRBs? */
2371 while (running_total < sg_dma_len(sg)) { 2377 while (running_total < sg_dma_len(sg) && running_total < temp) {
2372 num_trbs++; 2378 num_trbs++;
2373 running_total += TRB_MAX_BUFF_SIZE; 2379 running_total += TRB_MAX_BUFF_SIZE;
2374 } 2380 }
@@ -2394,11 +2400,11 @@ static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb *urb)
2394static void check_trb_math(struct urb *urb, int num_trbs, int running_total) 2400static void check_trb_math(struct urb *urb, int num_trbs, int running_total)
2395{ 2401{
2396 if (num_trbs != 0) 2402 if (num_trbs != 0)
2397 dev_dbg(&urb->dev->dev, "%s - ep %#x - Miscalculated number of " 2403 dev_err(&urb->dev->dev, "%s - ep %#x - Miscalculated number of "
2398 "TRBs, %d left\n", __func__, 2404 "TRBs, %d left\n", __func__,
2399 urb->ep->desc.bEndpointAddress, num_trbs); 2405 urb->ep->desc.bEndpointAddress, num_trbs);
2400 if (running_total != urb->transfer_buffer_length) 2406 if (running_total != urb->transfer_buffer_length)
2401 dev_dbg(&urb->dev->dev, "%s - ep %#x - Miscalculated tx length, " 2407 dev_err(&urb->dev->dev, "%s - ep %#x - Miscalculated tx length, "
2402 "queued %#x (%d), asked for %#x (%d)\n", 2408 "queued %#x (%d), asked for %#x (%d)\n",
2403 __func__, 2409 __func__,
2404 urb->ep->desc.bEndpointAddress, 2410 urb->ep->desc.bEndpointAddress,
@@ -2533,8 +2539,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
2533 sg = urb->sg; 2539 sg = urb->sg;
2534 addr = (u64) sg_dma_address(sg); 2540 addr = (u64) sg_dma_address(sg);
2535 this_sg_len = sg_dma_len(sg); 2541 this_sg_len = sg_dma_len(sg);
2536 trb_buff_len = TRB_MAX_BUFF_SIZE - 2542 trb_buff_len = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1));
2537 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
2538 trb_buff_len = min_t(int, trb_buff_len, this_sg_len); 2543 trb_buff_len = min_t(int, trb_buff_len, this_sg_len);
2539 if (trb_buff_len > urb->transfer_buffer_length) 2544 if (trb_buff_len > urb->transfer_buffer_length)
2540 trb_buff_len = urb->transfer_buffer_length; 2545 trb_buff_len = urb->transfer_buffer_length;
@@ -2572,7 +2577,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
2572 (unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1), 2577 (unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1),
2573 (unsigned int) addr + trb_buff_len); 2578 (unsigned int) addr + trb_buff_len);
2574 if (TRB_MAX_BUFF_SIZE - 2579 if (TRB_MAX_BUFF_SIZE -
2575 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1)) < trb_buff_len) { 2580 (addr & (TRB_MAX_BUFF_SIZE - 1)) < trb_buff_len) {
2576 xhci_warn(xhci, "WARN: sg dma xfer crosses 64KB boundaries!\n"); 2581 xhci_warn(xhci, "WARN: sg dma xfer crosses 64KB boundaries!\n");
2577 xhci_dbg(xhci, "Next boundary at %#x, end dma = %#x\n", 2582 xhci_dbg(xhci, "Next boundary at %#x, end dma = %#x\n",
2578 (unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1), 2583 (unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1),
@@ -2616,7 +2621,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
2616 } 2621 }
2617 2622
2618 trb_buff_len = TRB_MAX_BUFF_SIZE - 2623 trb_buff_len = TRB_MAX_BUFF_SIZE -
2619 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1)); 2624 (addr & (TRB_MAX_BUFF_SIZE - 1));
2620 trb_buff_len = min_t(int, trb_buff_len, this_sg_len); 2625 trb_buff_len = min_t(int, trb_buff_len, this_sg_len);
2621 if (running_total + trb_buff_len > urb->transfer_buffer_length) 2626 if (running_total + trb_buff_len > urb->transfer_buffer_length)
2622 trb_buff_len = 2627 trb_buff_len =
@@ -2656,7 +2661,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
2656 num_trbs = 0; 2661 num_trbs = 0;
2657 /* How much data is (potentially) left before the 64KB boundary? */ 2662 /* How much data is (potentially) left before the 64KB boundary? */
2658 running_total = TRB_MAX_BUFF_SIZE - 2663 running_total = TRB_MAX_BUFF_SIZE -
2659 (urb->transfer_dma & ((1 << TRB_MAX_BUFF_SHIFT) - 1)); 2664 (urb->transfer_dma & (TRB_MAX_BUFF_SIZE - 1));
2665 running_total &= TRB_MAX_BUFF_SIZE - 1;
2660 2666
2661 /* If there's some data on this 64KB chunk, or we have to send a 2667 /* If there's some data on this 64KB chunk, or we have to send a
2662 * zero-length transfer, we need at least one TRB 2668 * zero-length transfer, we need at least one TRB
@@ -2700,8 +2706,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
2700 /* How much data is in the first TRB? */ 2706 /* How much data is in the first TRB? */
2701 addr = (u64) urb->transfer_dma; 2707 addr = (u64) urb->transfer_dma;
2702 trb_buff_len = TRB_MAX_BUFF_SIZE - 2708 trb_buff_len = TRB_MAX_BUFF_SIZE -
2703 (urb->transfer_dma & ((1 << TRB_MAX_BUFF_SHIFT) - 1)); 2709 (urb->transfer_dma & (TRB_MAX_BUFF_SIZE - 1));
2704 if (urb->transfer_buffer_length < trb_buff_len) 2710 if (trb_buff_len > urb->transfer_buffer_length)
2705 trb_buff_len = urb->transfer_buffer_length; 2711 trb_buff_len = urb->transfer_buffer_length;
2706 2712
2707 first_trb = true; 2713 first_trb = true;
@@ -2879,8 +2885,8 @@ static int count_isoc_trbs_needed(struct xhci_hcd *xhci,
2879 addr = (u64) (urb->transfer_dma + urb->iso_frame_desc[i].offset); 2885 addr = (u64) (urb->transfer_dma + urb->iso_frame_desc[i].offset);
2880 td_len = urb->iso_frame_desc[i].length; 2886 td_len = urb->iso_frame_desc[i].length;
2881 2887
2882 running_total = TRB_MAX_BUFF_SIZE - 2888 running_total = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1));
2883 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1)); 2889 running_total &= TRB_MAX_BUFF_SIZE - 1;
2884 if (running_total != 0) 2890 if (running_total != 0)
2885 num_trbs++; 2891 num_trbs++;
2886 2892
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 34cf4e165877..2083fc2179b2 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -109,7 +109,7 @@ int xhci_halt(struct xhci_hcd *xhci)
109/* 109/*
110 * Set the run bit and wait for the host to be running. 110 * Set the run bit and wait for the host to be running.
111 */ 111 */
112int xhci_start(struct xhci_hcd *xhci) 112static int xhci_start(struct xhci_hcd *xhci)
113{ 113{
114 u32 temp; 114 u32 temp;
115 int ret; 115 int ret;
@@ -329,7 +329,7 @@ int xhci_init(struct usb_hcd *hcd)
329 329
330 330
331#ifdef CONFIG_USB_XHCI_HCD_DEBUGGING 331#ifdef CONFIG_USB_XHCI_HCD_DEBUGGING
332void xhci_event_ring_work(unsigned long arg) 332static void xhci_event_ring_work(unsigned long arg)
333{ 333{
334 unsigned long flags; 334 unsigned long flags;
335 int temp; 335 int temp;
@@ -473,7 +473,7 @@ int xhci_run(struct usb_hcd *hcd)
473 xhci->ir_set, (unsigned int) ER_IRQ_ENABLE(temp)); 473 xhci->ir_set, (unsigned int) ER_IRQ_ENABLE(temp));
474 xhci_writel(xhci, ER_IRQ_ENABLE(temp), 474 xhci_writel(xhci, ER_IRQ_ENABLE(temp),
475 &xhci->ir_set->irq_pending); 475 &xhci->ir_set->irq_pending);
476 xhci_print_ir_set(xhci, xhci->ir_set, 0); 476 xhci_print_ir_set(xhci, 0);
477 477
478 if (NUM_TEST_NOOPS > 0) 478 if (NUM_TEST_NOOPS > 0)
479 doorbell = xhci_setup_one_noop(xhci); 479 doorbell = xhci_setup_one_noop(xhci);
@@ -528,7 +528,7 @@ void xhci_stop(struct usb_hcd *hcd)
528 temp = xhci_readl(xhci, &xhci->ir_set->irq_pending); 528 temp = xhci_readl(xhci, &xhci->ir_set->irq_pending);
529 xhci_writel(xhci, ER_IRQ_DISABLE(temp), 529 xhci_writel(xhci, ER_IRQ_DISABLE(temp),
530 &xhci->ir_set->irq_pending); 530 &xhci->ir_set->irq_pending);
531 xhci_print_ir_set(xhci, xhci->ir_set, 0); 531 xhci_print_ir_set(xhci, 0);
532 532
533 xhci_dbg(xhci, "cleaning up memory\n"); 533 xhci_dbg(xhci, "cleaning up memory\n");
534 xhci_mem_cleanup(xhci); 534 xhci_mem_cleanup(xhci);
@@ -755,7 +755,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
755 temp = xhci_readl(xhci, &xhci->ir_set->irq_pending); 755 temp = xhci_readl(xhci, &xhci->ir_set->irq_pending);
756 xhci_writel(xhci, ER_IRQ_DISABLE(temp), 756 xhci_writel(xhci, ER_IRQ_DISABLE(temp),
757 &xhci->ir_set->irq_pending); 757 &xhci->ir_set->irq_pending);
758 xhci_print_ir_set(xhci, xhci->ir_set, 0); 758 xhci_print_ir_set(xhci, 0);
759 759
760 xhci_dbg(xhci, "cleaning up memory\n"); 760 xhci_dbg(xhci, "cleaning up memory\n");
761 xhci_mem_cleanup(xhci); 761 xhci_mem_cleanup(xhci);
@@ -857,7 +857,7 @@ unsigned int xhci_last_valid_endpoint(u32 added_ctxs)
857/* Returns 1 if the arguments are OK; 857/* Returns 1 if the arguments are OK;
858 * returns 0 this is a root hub; returns -EINVAL for NULL pointers. 858 * returns 0 this is a root hub; returns -EINVAL for NULL pointers.
859 */ 859 */
860int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev, 860static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
861 struct usb_host_endpoint *ep, int check_ep, bool check_virt_dev, 861 struct usb_host_endpoint *ep, int check_ep, bool check_virt_dev,
862 const char *func) { 862 const char *func) {
863 struct xhci_hcd *xhci; 863 struct xhci_hcd *xhci;
@@ -1693,7 +1693,7 @@ static void xhci_setup_input_ctx_for_config_ep(struct xhci_hcd *xhci,
1693 xhci_dbg_ctx(xhci, in_ctx, xhci_last_valid_endpoint(add_flags)); 1693 xhci_dbg_ctx(xhci, in_ctx, xhci_last_valid_endpoint(add_flags));
1694} 1694}
1695 1695
1696void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci, 1696static void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
1697 unsigned int slot_id, unsigned int ep_index, 1697 unsigned int slot_id, unsigned int ep_index,
1698 struct xhci_dequeue_state *deq_state) 1698 struct xhci_dequeue_state *deq_state)
1699{ 1699{
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 7f236fd22015..7f127df6dd55 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1348,7 +1348,7 @@ static inline int xhci_link_trb_quirk(struct xhci_hcd *xhci)
1348} 1348}
1349 1349
1350/* xHCI debugging */ 1350/* xHCI debugging */
1351void xhci_print_ir_set(struct xhci_hcd *xhci, struct xhci_intr_reg *ir_set, int set_num); 1351void xhci_print_ir_set(struct xhci_hcd *xhci, int set_num);
1352void xhci_print_registers(struct xhci_hcd *xhci); 1352void xhci_print_registers(struct xhci_hcd *xhci);
1353void xhci_dbg_regs(struct xhci_hcd *xhci); 1353void xhci_dbg_regs(struct xhci_hcd *xhci);
1354void xhci_print_run_regs(struct xhci_hcd *xhci); 1354void xhci_print_run_regs(struct xhci_hcd *xhci);
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 54a8bd1047d6..c292d5c499e7 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1864,6 +1864,7 @@ allocate_instance(struct device *dev,
1864 INIT_LIST_HEAD(&musb->out_bulk); 1864 INIT_LIST_HEAD(&musb->out_bulk);
1865 1865
1866 hcd->uses_new_polling = 1; 1866 hcd->uses_new_polling = 1;
1867 hcd->has_tt = 1;
1867 1868
1868 musb->vbuserr_retry = VBUSERR_RETRY_COUNT; 1869 musb->vbuserr_retry = VBUSERR_RETRY_COUNT;
1869 musb->a_wait_bcon = OTG_TIME_A_WAIT_BCON; 1870 musb->a_wait_bcon = OTG_TIME_A_WAIT_BCON;
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index d74a8113ae74..e6400be8a0f8 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -488,6 +488,15 @@ struct musb {
488 unsigned set_address:1; 488 unsigned set_address:1;
489 unsigned test_mode:1; 489 unsigned test_mode:1;
490 unsigned softconnect:1; 490 unsigned softconnect:1;
491
492 u8 address;
493 u8 test_mode_nr;
494 u16 ackpend; /* ep0 */
495 enum musb_g_ep0_state ep0_state;
496 struct usb_gadget g; /* the gadget */
497 struct usb_gadget_driver *gadget_driver; /* its driver */
498#endif
499
491 /* 500 /*
492 * FIXME: Remove this flag. 501 * FIXME: Remove this flag.
493 * 502 *
@@ -501,14 +510,6 @@ struct musb {
501 */ 510 */
502 unsigned double_buffer_not_ok:1 __deprecated; 511 unsigned double_buffer_not_ok:1 __deprecated;
503 512
504 u8 address;
505 u8 test_mode_nr;
506 u16 ackpend; /* ep0 */
507 enum musb_g_ep0_state ep0_state;
508 struct usb_gadget g; /* the gadget */
509 struct usb_gadget_driver *gadget_driver; /* its driver */
510#endif
511
512 struct musb_hdrc_config *config; 513 struct musb_hdrc_config *config;
513 514
514#ifdef MUSB_CONFIG_PROC_FS 515#ifdef MUSB_CONFIG_PROC_FS
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index a3f12333fc41..bc8badd16897 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -362,6 +362,7 @@ static int omap2430_musb_init(struct musb *musb)
362 362
363static int omap2430_musb_exit(struct musb *musb) 363static int omap2430_musb_exit(struct musb *musb)
364{ 364{
365 del_timer_sync(&musb_idle_timer);
365 366
366 omap2430_low_level_exit(musb); 367 omap2430_low_level_exit(musb);
367 otg_put_transceiver(musb->xceiv); 368 otg_put_transceiver(musb->xceiv);
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 7481ff8a49e4..0457813eebee 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -301,6 +301,9 @@ static const struct usb_device_id id_table[] = {
301 { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */ 301 { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */
302 .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist 302 .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
303 }, 303 },
304 { USB_DEVICE(0x0f3d, 0x68A3), /* Airprime/Sierra Wireless Direct IP modems */
305 .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
306 },
304 { USB_DEVICE(0x413C, 0x08133) }, /* Dell Computer Corp. Wireless 5720 VZW Mobile Broadband (EVDO Rev-A) Minicard GPS Port */ 307 { USB_DEVICE(0x413C, 0x08133) }, /* Dell Computer Corp. Wireless 5720 VZW Mobile Broadband (EVDO Rev-A) Minicard GPS Port */
305 308
306 { } 309 { }
diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c
index b004b2a485c3..9c014e2ecd68 100644
--- a/drivers/usb/serial/usb_wwan.c
+++ b/drivers/usb/serial/usb_wwan.c
@@ -295,12 +295,15 @@ static void usb_wwan_indat_callback(struct urb *urb)
295 __func__, status, endpoint); 295 __func__, status, endpoint);
296 } else { 296 } else {
297 tty = tty_port_tty_get(&port->port); 297 tty = tty_port_tty_get(&port->port);
298 if (urb->actual_length) { 298 if (tty) {
299 tty_insert_flip_string(tty, data, urb->actual_length); 299 if (urb->actual_length) {
300 tty_flip_buffer_push(tty); 300 tty_insert_flip_string(tty, data,
301 } else 301 urb->actual_length);
302 dbg("%s: empty read urb received", __func__); 302 tty_flip_buffer_push(tty);
303 tty_kref_put(tty); 303 } else
304 dbg("%s: empty read urb received", __func__);
305 tty_kref_put(tty);
306 }
304 307
305 /* Resubmit urb so we continue receiving */ 308 /* Resubmit urb so we continue receiving */
306 if (status != -ESHUTDOWN) { 309 if (status != -ESHUTDOWN) {
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 15a5d89b7f39..1c11959a7d58 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -27,6 +27,7 @@
27#include <linux/uaccess.h> 27#include <linux/uaccess.h>
28#include <linux/usb.h> 28#include <linux/usb.h>
29#include <linux/usb/serial.h> 29#include <linux/usb/serial.h>
30#include <linux/usb/cdc.h>
30#include "visor.h" 31#include "visor.h"
31 32
32/* 33/*
@@ -479,6 +480,17 @@ static int visor_probe(struct usb_serial *serial,
479 480
480 dbg("%s", __func__); 481 dbg("%s", __func__);
481 482
483 /*
484 * some Samsung Android phones in modem mode have the same ID
485 * as SPH-I500, but they are ACM devices, so dont bind to them
486 */
487 if (id->idVendor == SAMSUNG_VENDOR_ID &&
488 id->idProduct == SAMSUNG_SPH_I500_ID &&
489 serial->dev->descriptor.bDeviceClass == USB_CLASS_COMM &&
490 serial->dev->descriptor.bDeviceSubClass ==
491 USB_CDC_SUBCLASS_ACM)
492 return -ENODEV;
493
482 if (serial->dev->actconfig->desc.bConfigurationValue != 1) { 494 if (serial->dev->actconfig->desc.bConfigurationValue != 1) {
483 dev_err(&serial->dev->dev, "active config #%d != 1 ??\n", 495 dev_err(&serial->dev->dev, "active config #%d != 1 ??\n",
484 serial->dev->actconfig->desc.bConfigurationValue); 496 serial->dev->actconfig->desc.bConfigurationValue);
diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c
index 8010aaeb5adb..dd0e84a9bd2f 100644
--- a/drivers/video/backlight/ltv350qv.c
+++ b/drivers/video/backlight/ltv350qv.c
@@ -239,11 +239,15 @@ static int __devinit ltv350qv_probe(struct spi_device *spi)
239 lcd->spi = spi; 239 lcd->spi = spi;
240 lcd->power = FB_BLANK_POWERDOWN; 240 lcd->power = FB_BLANK_POWERDOWN;
241 lcd->buffer = kzalloc(8, GFP_KERNEL); 241 lcd->buffer = kzalloc(8, GFP_KERNEL);
242 if (!lcd->buffer) {
243 ret = -ENOMEM;
244 goto out_free_lcd;
245 }
242 246
243 ld = lcd_device_register("ltv350qv", &spi->dev, lcd, &ltv_ops); 247 ld = lcd_device_register("ltv350qv", &spi->dev, lcd, &ltv_ops);
244 if (IS_ERR(ld)) { 248 if (IS_ERR(ld)) {
245 ret = PTR_ERR(ld); 249 ret = PTR_ERR(ld);
246 goto out_free_lcd; 250 goto out_free_buffer;
247 } 251 }
248 lcd->ld = ld; 252 lcd->ld = ld;
249 253
@@ -257,6 +261,8 @@ static int __devinit ltv350qv_probe(struct spi_device *spi)
257 261
258out_unregister: 262out_unregister:
259 lcd_device_unregister(ld); 263 lcd_device_unregister(ld);
264out_free_buffer:
265 kfree(lcd->buffer);
260out_free_lcd: 266out_free_lcd:
261 kfree(lcd); 267 kfree(lcd);
262 return ret; 268 return ret;
@@ -268,6 +274,7 @@ static int __devexit ltv350qv_remove(struct spi_device *spi)
268 274
269 ltv350qv_power(lcd, FB_BLANK_POWERDOWN); 275 ltv350qv_power(lcd, FB_BLANK_POWERDOWN);
270 lcd_device_unregister(lcd->ld); 276 lcd_device_unregister(lcd->ld);
277 kfree(lcd->buffer);
271 kfree(lcd); 278 kfree(lcd);
272 279
273 return 0; 280 return 0;
diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c
index eca855a55c0d..3de4ba0260a5 100644
--- a/drivers/watchdog/cpwd.c
+++ b/drivers/watchdog/cpwd.c
@@ -646,7 +646,7 @@ static int __devexit cpwd_remove(struct platform_device *op)
646 struct cpwd *p = dev_get_drvdata(&op->dev); 646 struct cpwd *p = dev_get_drvdata(&op->dev);
647 int i; 647 int i;
648 648
649 for (i = 0; i < 4; i++) { 649 for (i = 0; i < WD_NUMDEVS; i++) {
650 misc_deregister(&p->devs[i].misc); 650 misc_deregister(&p->devs[i].misc);
651 651
652 if (!p->enabled) { 652 if (!p->enabled) {
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 24b966d5061a..204a5603c4ae 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -710,7 +710,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
710 return 0; 710 return 0;
711} 711}
712 712
713static void __devexit hpwdt_exit_nmi_decoding(void) 713static void hpwdt_exit_nmi_decoding(void)
714{ 714{
715 unregister_die_notifier(&die_notifier); 715 unregister_die_notifier(&die_notifier);
716 if (cru_rom_addr) 716 if (cru_rom_addr)
@@ -726,7 +726,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
726 return 0; 726 return 0;
727} 727}
728 728
729static void __devexit hpwdt_exit_nmi_decoding(void) 729static void hpwdt_exit_nmi_decoding(void)
730{ 730{
731} 731}
732#endif /* CONFIG_HPWDT_NMI_DECODING */ 732#endif /* CONFIG_HPWDT_NMI_DECODING */
diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c
index c7d67e9a7465..79906255eeb6 100644
--- a/drivers/watchdog/sbc_fitpc2_wdt.c
+++ b/drivers/watchdog/sbc_fitpc2_wdt.c
@@ -201,11 +201,14 @@ static struct miscdevice fitpc2_wdt_miscdev = {
201static int __init fitpc2_wdt_init(void) 201static int __init fitpc2_wdt_init(void)
202{ 202{
203 int err; 203 int err;
204 const char *brd_name;
204 205
205 if (!strstr(dmi_get_system_info(DMI_BOARD_NAME), "SBC-FITPC2")) 206 brd_name = dmi_get_system_info(DMI_BOARD_NAME);
207
208 if (!brd_name || !strstr(brd_name, "SBC-FITPC2"))
206 return -ENODEV; 209 return -ENODEV;
207 210
208 pr_info("%s found\n", dmi_get_system_info(DMI_BOARD_NAME)); 211 pr_info("%s found\n", brd_name);
209 212
210 if (!request_region(COMMAND_PORT, 1, WATCHDOG_NAME)) { 213 if (!request_region(COMMAND_PORT, 1, WATCHDOG_NAME)) {
211 pr_err("I/O address 0x%04x already in use\n", COMMAND_PORT); 214 pr_err("I/O address 0x%04x already in use\n", COMMAND_PORT);
diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c
index 0461858e07d0..b61ab1c54293 100644
--- a/drivers/watchdog/sch311x_wdt.c
+++ b/drivers/watchdog/sch311x_wdt.c
@@ -508,7 +508,7 @@ static int __init sch311x_detect(int sio_config_port, unsigned short *addr)
508 sch311x_sio_outb(sio_config_port, 0x07, 0x0a); 508 sch311x_sio_outb(sio_config_port, 0x07, 0x0a);
509 509
510 /* Check if Logical Device Register is currently active */ 510 /* Check if Logical Device Register is currently active */
511 if (sch311x_sio_inb(sio_config_port, 0x30) && 0x01 == 0) 511 if ((sch311x_sio_inb(sio_config_port, 0x30) & 0x01) == 0)
512 printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n"); 512 printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n");
513 513
514 /* Get the base address of the runtime registers */ 514 /* Get the base address of the runtime registers */
diff --git a/drivers/watchdog/w83697ug_wdt.c b/drivers/watchdog/w83697ug_wdt.c
index a6c12dec91a1..df2a64dc9672 100644
--- a/drivers/watchdog/w83697ug_wdt.c
+++ b/drivers/watchdog/w83697ug_wdt.c
@@ -109,7 +109,7 @@ static int w83697ug_select_wd_register(void)
109 outb_p(0x08, WDT_EFDR); /* select logical device 8 (GPIO2) */ 109 outb_p(0x08, WDT_EFDR); /* select logical device 8 (GPIO2) */
110 outb_p(0x30, WDT_EFER); /* select CR30 */ 110 outb_p(0x30, WDT_EFER); /* select CR30 */
111 c = inb_p(WDT_EFDR); 111 c = inb_p(WDT_EFDR);
112 outb_p(c || 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */ 112 outb_p(c | 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */
113 113
114 return 0; 114 return 0;
115} 115}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 43f9f02c7db0..718050ace08f 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -232,7 +232,7 @@ static int increase_reservation(unsigned long nr_pages)
232 set_phys_to_machine(pfn, frame_list[i]); 232 set_phys_to_machine(pfn, frame_list[i]);
233 233
234 /* Link back into the page tables if not highmem. */ 234 /* Link back into the page tables if not highmem. */
235 if (pfn < max_low_pfn) { 235 if (!xen_hvm_domain() && pfn < max_low_pfn) {
236 int ret; 236 int ret;
237 ret = HYPERVISOR_update_va_mapping( 237 ret = HYPERVISOR_update_va_mapping(
238 (unsigned long)__va(pfn << PAGE_SHIFT), 238 (unsigned long)__va(pfn << PAGE_SHIFT),
@@ -280,7 +280,7 @@ static int decrease_reservation(unsigned long nr_pages)
280 280
281 scrub_page(page); 281 scrub_page(page);
282 282
283 if (!PageHighMem(page)) { 283 if (!xen_hvm_domain() && !PageHighMem(page)) {
284 ret = HYPERVISOR_update_va_mapping( 284 ret = HYPERVISOR_update_va_mapping(
285 (unsigned long)__va(pfn << PAGE_SHIFT), 285 (unsigned long)__va(pfn << PAGE_SHIFT),
286 __pte_ma(0), 0); 286 __pte_ma(0), 0);
@@ -296,7 +296,7 @@ static int decrease_reservation(unsigned long nr_pages)
296 /* No more mappings: invalidate P2M and add to balloon. */ 296 /* No more mappings: invalidate P2M and add to balloon. */
297 for (i = 0; i < nr_pages; i++) { 297 for (i = 0; i < nr_pages; i++) {
298 pfn = mfn_to_pfn(frame_list[i]); 298 pfn = mfn_to_pfn(frame_list[i]);
299 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 299 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
300 balloon_append(pfn_to_page(pfn)); 300 balloon_append(pfn_to_page(pfn));
301 } 301 }
302 302
@@ -392,15 +392,19 @@ static struct notifier_block xenstore_notifier;
392 392
393static int __init balloon_init(void) 393static int __init balloon_init(void)
394{ 394{
395 unsigned long pfn, extra_pfn_end; 395 unsigned long pfn, nr_pages, extra_pfn_end;
396 struct page *page; 396 struct page *page;
397 397
398 if (!xen_pv_domain()) 398 if (!xen_domain())
399 return -ENODEV; 399 return -ENODEV;
400 400
401 pr_info("xen_balloon: Initialising balloon driver.\n"); 401 pr_info("xen_balloon: Initialising balloon driver.\n");
402 402
403 balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); 403 if (xen_pv_domain())
404 nr_pages = xen_start_info->nr_pages;
405 else
406 nr_pages = max_pfn;
407 balloon_stats.current_pages = min(nr_pages, max_pfn);
404 balloon_stats.target_pages = balloon_stats.current_pages; 408 balloon_stats.target_pages = balloon_stats.current_pages;
405 balloon_stats.balloon_low = 0; 409 balloon_stats.balloon_low = 0;
406 balloon_stats.balloon_high = 0; 410 balloon_stats.balloon_high = 0;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 74681478100a..0ad1699a1b3e 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -114,7 +114,7 @@ struct cpu_evtchn_s {
114static __initdata struct cpu_evtchn_s init_evtchn_mask = { 114static __initdata struct cpu_evtchn_s init_evtchn_mask = {
115 .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul, 115 .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
116}; 116};
117static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask; 117static struct cpu_evtchn_s __refdata *cpu_evtchn_mask_p = &init_evtchn_mask;
118 118
119static inline unsigned long *cpu_evtchn_mask(int cpu) 119static inline unsigned long *cpu_evtchn_mask(int cpu)
120{ 120{
@@ -277,7 +277,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
277 277
278 BUG_ON(irq == -1); 278 BUG_ON(irq == -1);
279#ifdef CONFIG_SMP 279#ifdef CONFIG_SMP
280 cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); 280 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
281#endif 281#endif
282 282
283 clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); 283 clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
@@ -294,7 +294,7 @@ static void init_evtchn_cpu_bindings(void)
294 294
295 /* By default all event channels notify CPU#0. */ 295 /* By default all event channels notify CPU#0. */
296 for_each_irq_desc(i, desc) { 296 for_each_irq_desc(i, desc) {
297 cpumask_copy(desc->affinity, cpumask_of(0)); 297 cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
298 } 298 }
299#endif 299#endif
300 300
@@ -376,81 +376,69 @@ static void unmask_evtchn(int port)
376 put_cpu(); 376 put_cpu();
377} 377}
378 378
379static int get_nr_hw_irqs(void) 379static int xen_allocate_irq_dynamic(void)
380{ 380{
381 int ret = 1; 381 int first = 0;
382 int irq;
382 383
383#ifdef CONFIG_X86_IO_APIC 384#ifdef CONFIG_X86_IO_APIC
384 ret = get_nr_irqs_gsi(); 385 /*
386 * For an HVM guest or domain 0 which see "real" (emulated or
387 * actual repectively) GSIs we allocate dynamic IRQs
388 * e.g. those corresponding to event channels or MSIs
389 * etc. from the range above those "real" GSIs to avoid
390 * collisions.
391 */
392 if (xen_initial_domain() || xen_hvm_domain())
393 first = get_nr_irqs_gsi();
385#endif 394#endif
386 395
387 return ret; 396retry:
388} 397 irq = irq_alloc_desc_from(first, -1);
389 398
390static int find_unbound_pirq(int type) 399 if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
391{ 400 printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
392 int rc, i; 401 first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
393 struct physdev_get_free_pirq op_get_free_pirq; 402 goto retry;
394 op_get_free_pirq.type = type; 403 }
395 404
396 rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); 405 if (irq < 0)
397 if (!rc) 406 panic("No available IRQ to bind to: increase nr_irqs!\n");
398 return op_get_free_pirq.pirq;
399 407
400 for (i = 0; i < nr_irqs; i++) { 408 return irq;
401 if (pirq_to_irq[i] < 0)
402 return i;
403 }
404 return -1;
405} 409}
406 410
407static int find_unbound_irq(void) 411static int xen_allocate_irq_gsi(unsigned gsi)
408{ 412{
409 struct irq_data *data; 413 int irq;
410 int irq, res;
411 int bottom = get_nr_hw_irqs();
412 int top = nr_irqs-1;
413
414 if (bottom == nr_irqs)
415 goto no_irqs;
416 414
417 /* This loop starts from the top of IRQ space and goes down. 415 /*
418 * We need this b/c if we have a PCI device in a Xen PV guest 416 * A PV guest has no concept of a GSI (since it has no ACPI
419 * we do not have an IO-APIC (though the backend might have them) 417 * nor access to/knowledge of the physical APICs). Therefore
420 * mapped in. To not have a collision of physical IRQs with the Xen 418 * all IRQs are dynamically allocated from the entire IRQ
421 * event channels start at the top of the IRQ space for virtual IRQs. 419 * space.
422 */ 420 */
423 for (irq = top; irq > bottom; irq--) { 421 if (xen_pv_domain() && !xen_initial_domain())
424 data = irq_get_irq_data(irq); 422 return xen_allocate_irq_dynamic();
425 /* only 15->0 have init'd desc; handle irq > 16 */
426 if (!data)
427 break;
428 if (data->chip == &no_irq_chip)
429 break;
430 if (data->chip != &xen_dynamic_chip)
431 continue;
432 if (irq_info[irq].type == IRQT_UNBOUND)
433 return irq;
434 }
435
436 if (irq == bottom)
437 goto no_irqs;
438 423
439 res = irq_alloc_desc_at(irq, -1); 424 /* Legacy IRQ descriptors are already allocated by the arch. */
425 if (gsi < NR_IRQS_LEGACY)
426 return gsi;
440 427
441 if (WARN_ON(res != irq)) 428 irq = irq_alloc_desc_at(gsi, -1);
442 return -1; 429 if (irq < 0)
430 panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
443 431
444 return irq; 432 return irq;
445
446no_irqs:
447 panic("No available IRQ to bind to: increase nr_irqs!\n");
448} 433}
449 434
450static bool identity_mapped_irq(unsigned irq) 435static void xen_free_irq(unsigned irq)
451{ 436{
452 /* identity map all the hardware irqs */ 437 /* Legacy IRQ descriptors are managed by the arch. */
453 return irq < get_nr_hw_irqs(); 438 if (irq < NR_IRQS_LEGACY)
439 return;
440
441 irq_free_desc(irq);
454} 442}
455 443
456static void pirq_unmask_notify(int irq) 444static void pirq_unmask_notify(int irq)
@@ -486,7 +474,7 @@ static bool probing_irq(int irq)
486 return desc && desc->action == NULL; 474 return desc && desc->action == NULL;
487} 475}
488 476
489static unsigned int startup_pirq(unsigned int irq) 477static unsigned int __startup_pirq(unsigned int irq)
490{ 478{
491 struct evtchn_bind_pirq bind_pirq; 479 struct evtchn_bind_pirq bind_pirq;
492 struct irq_info *info = info_for_irq(irq); 480 struct irq_info *info = info_for_irq(irq);
@@ -524,9 +512,15 @@ out:
524 return 0; 512 return 0;
525} 513}
526 514
527static void shutdown_pirq(unsigned int irq) 515static unsigned int startup_pirq(struct irq_data *data)
516{
517 return __startup_pirq(data->irq);
518}
519
520static void shutdown_pirq(struct irq_data *data)
528{ 521{
529 struct evtchn_close close; 522 struct evtchn_close close;
523 unsigned int irq = data->irq;
530 struct irq_info *info = info_for_irq(irq); 524 struct irq_info *info = info_for_irq(irq);
531 int evtchn = evtchn_from_irq(irq); 525 int evtchn = evtchn_from_irq(irq);
532 526
@@ -546,20 +540,20 @@ static void shutdown_pirq(unsigned int irq)
546 info->evtchn = 0; 540 info->evtchn = 0;
547} 541}
548 542
549static void enable_pirq(unsigned int irq) 543static void enable_pirq(struct irq_data *data)
550{ 544{
551 startup_pirq(irq); 545 startup_pirq(data);
552} 546}
553 547
554static void disable_pirq(unsigned int irq) 548static void disable_pirq(struct irq_data *data)
555{ 549{
556} 550}
557 551
558static void ack_pirq(unsigned int irq) 552static void ack_pirq(struct irq_data *data)
559{ 553{
560 int evtchn = evtchn_from_irq(irq); 554 int evtchn = evtchn_from_irq(data->irq);
561 555
562 move_native_irq(irq); 556 move_native_irq(data->irq);
563 557
564 if (VALID_EVTCHN(evtchn)) { 558 if (VALID_EVTCHN(evtchn)) {
565 mask_evtchn(evtchn); 559 mask_evtchn(evtchn);
@@ -567,23 +561,6 @@ static void ack_pirq(unsigned int irq)
567 } 561 }
568} 562}
569 563
570static void end_pirq(unsigned int irq)
571{
572 int evtchn = evtchn_from_irq(irq);
573 struct irq_desc *desc = irq_to_desc(irq);
574
575 if (WARN_ON(!desc))
576 return;
577
578 if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
579 (IRQ_DISABLED|IRQ_PENDING)) {
580 shutdown_pirq(irq);
581 } else if (VALID_EVTCHN(evtchn)) {
582 unmask_evtchn(evtchn);
583 pirq_unmask_notify(irq);
584 }
585}
586
587static int find_irq_by_gsi(unsigned gsi) 564static int find_irq_by_gsi(unsigned gsi)
588{ 565{
589 int irq; 566 int irq;
@@ -638,14 +615,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
638 goto out; /* XXX need refcount? */ 615 goto out; /* XXX need refcount? */
639 } 616 }
640 617
641 /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore 618 irq = xen_allocate_irq_gsi(gsi);
642 * we are using the !xen_initial_domain() to drop in the function.*/
643 if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
644 xen_pv_domain())) {
645 irq = gsi;
646 irq_alloc_desc_at(irq, -1);
647 } else
648 irq = find_unbound_irq();
649 619
650 set_irq_chip_and_handler_name(irq, &xen_pirq_chip, 620 set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
651 handle_level_irq, name); 621 handle_level_irq, name);
@@ -658,7 +628,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
658 * this in the priv domain. */ 628 * this in the priv domain. */
659 if (xen_initial_domain() && 629 if (xen_initial_domain() &&
660 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { 630 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
661 irq_free_desc(irq); 631 xen_free_irq(irq);
662 irq = -ENOSPC; 632 irq = -ENOSPC;
663 goto out; 633 goto out;
664 } 634 }
@@ -674,87 +644,46 @@ out:
674} 644}
675 645
676#ifdef CONFIG_PCI_MSI 646#ifdef CONFIG_PCI_MSI
677#include <linux/msi.h> 647int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
678#include "../pci/msi.h"
679
680void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
681{ 648{
682 spin_lock(&irq_mapping_update_lock); 649 int rc;
683 650 struct physdev_get_free_pirq op_get_free_pirq;
684 if (alloc & XEN_ALLOC_IRQ) {
685 *irq = find_unbound_irq();
686 if (*irq == -1)
687 goto out;
688 }
689
690 if (alloc & XEN_ALLOC_PIRQ) {
691 *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
692 if (*pirq == -1)
693 goto out;
694 }
695 651
696 set_irq_chip_and_handler_name(*irq, &xen_pirq_chip, 652 op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
697 handle_level_irq, name); 653 rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
698 654
699 irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0); 655 WARN_ONCE(rc == -ENOSYS,
700 pirq_to_irq[*pirq] = *irq; 656 "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
701 657
702out: 658 return rc ? -1 : op_get_free_pirq.pirq;
703 spin_unlock(&irq_mapping_update_lock);
704} 659}
705 660
706int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) 661int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
662 int pirq, int vector, const char *name)
707{ 663{
708 int irq = -1; 664 int irq, ret;
709 struct physdev_map_pirq map_irq;
710 int rc;
711 int pos;
712 u32 table_offset, bir;
713
714 memset(&map_irq, 0, sizeof(map_irq));
715 map_irq.domid = DOMID_SELF;
716 map_irq.type = MAP_PIRQ_TYPE_MSI;
717 map_irq.index = -1;
718 map_irq.pirq = -1;
719 map_irq.bus = dev->bus->number;
720 map_irq.devfn = dev->devfn;
721
722 if (type == PCI_CAP_ID_MSIX) {
723 pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
724
725 pci_read_config_dword(dev, msix_table_offset_reg(pos),
726 &table_offset);
727 bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
728
729 map_irq.table_base = pci_resource_start(dev, bir);
730 map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
731 }
732 665
733 spin_lock(&irq_mapping_update_lock); 666 spin_lock(&irq_mapping_update_lock);
734 667
735 irq = find_unbound_irq(); 668 irq = xen_allocate_irq_dynamic();
736
737 if (irq == -1) 669 if (irq == -1)
738 goto out; 670 goto out;
739 671
740 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
741 if (rc) {
742 printk(KERN_WARNING "xen map irq failed %d\n", rc);
743
744 irq_free_desc(irq);
745
746 irq = -1;
747 goto out;
748 }
749 irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
750
751 set_irq_chip_and_handler_name(irq, &xen_pirq_chip, 672 set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
752 handle_level_irq, 673 handle_level_irq, name);
753 (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
754 674
675 irq_info[irq] = mk_pirq_info(0, pirq, 0, vector);
676 pirq_to_irq[pirq] = irq;
677 ret = irq_set_msi_desc(irq, msidesc);
678 if (ret < 0)
679 goto error_irq;
755out: 680out:
756 spin_unlock(&irq_mapping_update_lock); 681 spin_unlock(&irq_mapping_update_lock);
757 return irq; 682 return irq;
683error_irq:
684 spin_unlock(&irq_mapping_update_lock);
685 xen_free_irq(irq);
686 return -1;
758} 687}
759#endif 688#endif
760 689
@@ -779,11 +708,12 @@ int xen_destroy_irq(int irq)
779 printk(KERN_WARNING "unmap irq failed %d\n", rc); 708 printk(KERN_WARNING "unmap irq failed %d\n", rc);
780 goto out; 709 goto out;
781 } 710 }
782 pirq_to_irq[info->u.pirq.pirq] = -1;
783 } 711 }
712 pirq_to_irq[info->u.pirq.pirq] = -1;
713
784 irq_info[irq] = mk_unbound_info(); 714 irq_info[irq] = mk_unbound_info();
785 715
786 irq_free_desc(irq); 716 xen_free_irq(irq);
787 717
788out: 718out:
789 spin_unlock(&irq_mapping_update_lock); 719 spin_unlock(&irq_mapping_update_lock);
@@ -814,7 +744,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
814 irq = evtchn_to_irq[evtchn]; 744 irq = evtchn_to_irq[evtchn];
815 745
816 if (irq == -1) { 746 if (irq == -1) {
817 irq = find_unbound_irq(); 747 irq = xen_allocate_irq_dynamic();
818 748
819 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, 749 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
820 handle_fasteoi_irq, "event"); 750 handle_fasteoi_irq, "event");
@@ -839,7 +769,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
839 irq = per_cpu(ipi_to_irq, cpu)[ipi]; 769 irq = per_cpu(ipi_to_irq, cpu)[ipi];
840 770
841 if (irq == -1) { 771 if (irq == -1) {
842 irq = find_unbound_irq(); 772 irq = xen_allocate_irq_dynamic();
843 if (irq < 0) 773 if (irq < 0)
844 goto out; 774 goto out;
845 775
@@ -875,7 +805,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
875 irq = per_cpu(virq_to_irq, cpu)[virq]; 805 irq = per_cpu(virq_to_irq, cpu)[virq];
876 806
877 if (irq == -1) { 807 if (irq == -1) {
878 irq = find_unbound_irq(); 808 irq = xen_allocate_irq_dynamic();
879 809
880 set_irq_chip_and_handler_name(irq, &xen_percpu_chip, 810 set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
881 handle_percpu_irq, "virq"); 811 handle_percpu_irq, "virq");
@@ -934,7 +864,7 @@ static void unbind_from_irq(unsigned int irq)
934 if (irq_info[irq].type != IRQT_UNBOUND) { 864 if (irq_info[irq].type != IRQT_UNBOUND) {
935 irq_info[irq] = mk_unbound_info(); 865 irq_info[irq] = mk_unbound_info();
936 866
937 irq_free_desc(irq); 867 xen_free_irq(irq);
938 } 868 }
939 869
940 spin_unlock(&irq_mapping_update_lock); 870 spin_unlock(&irq_mapping_update_lock);
@@ -990,7 +920,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
990 if (irq < 0) 920 if (irq < 0)
991 return irq; 921 return irq;
992 922
993 irqflags |= IRQF_NO_SUSPEND; 923 irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
994 retval = request_irq(irq, handler, irqflags, devname, dev_id); 924 retval = request_irq(irq, handler, irqflags, devname, dev_id);
995 if (retval != 0) { 925 if (retval != 0) {
996 unbind_from_irq(irq); 926 unbind_from_irq(irq);
@@ -1234,11 +1164,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1234 return 0; 1164 return 0;
1235} 1165}
1236 1166
1237static int set_affinity_irq(unsigned irq, const struct cpumask *dest) 1167static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1168 bool force)
1238{ 1169{
1239 unsigned tcpu = cpumask_first(dest); 1170 unsigned tcpu = cpumask_first(dest);
1240 1171
1241 return rebind_irq_to_cpu(irq, tcpu); 1172 return rebind_irq_to_cpu(data->irq, tcpu);
1242} 1173}
1243 1174
1244int resend_irq_on_evtchn(unsigned int irq) 1175int resend_irq_on_evtchn(unsigned int irq)
@@ -1257,35 +1188,35 @@ int resend_irq_on_evtchn(unsigned int irq)
1257 return 1; 1188 return 1;
1258} 1189}
1259 1190
1260static void enable_dynirq(unsigned int irq) 1191static void enable_dynirq(struct irq_data *data)
1261{ 1192{
1262 int evtchn = evtchn_from_irq(irq); 1193 int evtchn = evtchn_from_irq(data->irq);
1263 1194
1264 if (VALID_EVTCHN(evtchn)) 1195 if (VALID_EVTCHN(evtchn))
1265 unmask_evtchn(evtchn); 1196 unmask_evtchn(evtchn);
1266} 1197}
1267 1198
1268static void disable_dynirq(unsigned int irq) 1199static void disable_dynirq(struct irq_data *data)
1269{ 1200{
1270 int evtchn = evtchn_from_irq(irq); 1201 int evtchn = evtchn_from_irq(data->irq);
1271 1202
1272 if (VALID_EVTCHN(evtchn)) 1203 if (VALID_EVTCHN(evtchn))
1273 mask_evtchn(evtchn); 1204 mask_evtchn(evtchn);
1274} 1205}
1275 1206
1276static void ack_dynirq(unsigned int irq) 1207static void ack_dynirq(struct irq_data *data)
1277{ 1208{
1278 int evtchn = evtchn_from_irq(irq); 1209 int evtchn = evtchn_from_irq(data->irq);
1279 1210
1280 move_masked_irq(irq); 1211 move_masked_irq(data->irq);
1281 1212
1282 if (VALID_EVTCHN(evtchn)) 1213 if (VALID_EVTCHN(evtchn))
1283 unmask_evtchn(evtchn); 1214 unmask_evtchn(evtchn);
1284} 1215}
1285 1216
1286static int retrigger_dynirq(unsigned int irq) 1217static int retrigger_dynirq(struct irq_data *data)
1287{ 1218{
1288 int evtchn = evtchn_from_irq(irq); 1219 int evtchn = evtchn_from_irq(data->irq);
1289 struct shared_info *sh = HYPERVISOR_shared_info; 1220 struct shared_info *sh = HYPERVISOR_shared_info;
1290 int ret = 0; 1221 int ret = 0;
1291 1222
@@ -1334,7 +1265,7 @@ static void restore_cpu_pirqs(void)
1334 1265
1335 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); 1266 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1336 1267
1337 startup_pirq(irq); 1268 __startup_pirq(irq);
1338 } 1269 }
1339} 1270}
1340 1271
@@ -1445,7 +1376,6 @@ void xen_poll_irq(int irq)
1445void xen_irq_resume(void) 1376void xen_irq_resume(void)
1446{ 1377{
1447 unsigned int cpu, irq, evtchn; 1378 unsigned int cpu, irq, evtchn;
1448 struct irq_desc *desc;
1449 1379
1450 init_evtchn_cpu_bindings(); 1380 init_evtchn_cpu_bindings();
1451 1381
@@ -1465,66 +1395,48 @@ void xen_irq_resume(void)
1465 restore_cpu_ipis(cpu); 1395 restore_cpu_ipis(cpu);
1466 } 1396 }
1467 1397
1468 /*
1469 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
1470 * are not handled by the IRQ core.
1471 */
1472 for_each_irq_desc(irq, desc) {
1473 if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
1474 continue;
1475 if (desc->status & IRQ_DISABLED)
1476 continue;
1477
1478 evtchn = evtchn_from_irq(irq);
1479 if (evtchn == -1)
1480 continue;
1481
1482 unmask_evtchn(evtchn);
1483 }
1484
1485 restore_cpu_pirqs(); 1398 restore_cpu_pirqs();
1486} 1399}
1487 1400
1488static struct irq_chip xen_dynamic_chip __read_mostly = { 1401static struct irq_chip xen_dynamic_chip __read_mostly = {
1489 .name = "xen-dyn", 1402 .name = "xen-dyn",
1490 1403
1491 .disable = disable_dynirq, 1404 .irq_disable = disable_dynirq,
1492 .mask = disable_dynirq, 1405 .irq_mask = disable_dynirq,
1493 .unmask = enable_dynirq, 1406 .irq_unmask = enable_dynirq,
1494 1407
1495 .eoi = ack_dynirq, 1408 .irq_eoi = ack_dynirq,
1496 .set_affinity = set_affinity_irq, 1409 .irq_set_affinity = set_affinity_irq,
1497 .retrigger = retrigger_dynirq, 1410 .irq_retrigger = retrigger_dynirq,
1498}; 1411};
1499 1412
1500static struct irq_chip xen_pirq_chip __read_mostly = { 1413static struct irq_chip xen_pirq_chip __read_mostly = {
1501 .name = "xen-pirq", 1414 .name = "xen-pirq",
1502 1415
1503 .startup = startup_pirq, 1416 .irq_startup = startup_pirq,
1504 .shutdown = shutdown_pirq, 1417 .irq_shutdown = shutdown_pirq,
1505 1418
1506 .enable = enable_pirq, 1419 .irq_enable = enable_pirq,
1507 .unmask = enable_pirq, 1420 .irq_unmask = enable_pirq,
1508 1421
1509 .disable = disable_pirq, 1422 .irq_disable = disable_pirq,
1510 .mask = disable_pirq, 1423 .irq_mask = disable_pirq,
1511 1424
1512 .ack = ack_pirq, 1425 .irq_ack = ack_pirq,
1513 .end = end_pirq,
1514 1426
1515 .set_affinity = set_affinity_irq, 1427 .irq_set_affinity = set_affinity_irq,
1516 1428
1517 .retrigger = retrigger_dynirq, 1429 .irq_retrigger = retrigger_dynirq,
1518}; 1430};
1519 1431
1520static struct irq_chip xen_percpu_chip __read_mostly = { 1432static struct irq_chip xen_percpu_chip __read_mostly = {
1521 .name = "xen-percpu", 1433 .name = "xen-percpu",
1522 1434
1523 .disable = disable_dynirq, 1435 .irq_disable = disable_dynirq,
1524 .mask = disable_dynirq, 1436 .irq_mask = disable_dynirq,
1525 .unmask = enable_dynirq, 1437 .irq_unmask = enable_dynirq,
1526 1438
1527 .ack = ack_dynirq, 1439 .irq_ack = ack_dynirq,
1528}; 1440};
1529 1441
1530int xen_set_callback_via(uint64_t via) 1442int xen_set_callback_via(uint64_t via)
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index db8c4c4ac880..ebb292859b59 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -34,32 +34,38 @@ enum shutdown_state {
34/* Ignore multiple shutdown requests. */ 34/* Ignore multiple shutdown requests. */
35static enum shutdown_state shutting_down = SHUTDOWN_INVALID; 35static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
36 36
37#ifdef CONFIG_PM_SLEEP 37struct suspend_info {
38static int xen_hvm_suspend(void *data) 38 int cancelled;
39{ 39 unsigned long arg; /* extra hypercall argument */
40 struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; 40 void (*pre)(void);
41 int *cancelled = data; 41 void (*post)(int cancelled);
42 42};
43 BUG_ON(!irqs_disabled());
44
45 *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
46 43
47 xen_hvm_post_suspend(*cancelled); 44static void xen_hvm_post_suspend(int cancelled)
45{
46 xen_arch_hvm_post_suspend(cancelled);
48 gnttab_resume(); 47 gnttab_resume();
48}
49 49
50 if (!*cancelled) { 50static void xen_pre_suspend(void)
51 xen_irq_resume(); 51{
52 xen_console_resume(); 52 xen_mm_pin_all();
53 xen_timer_resume(); 53 gnttab_suspend();
54 } 54 xen_arch_pre_suspend();
55}
55 56
56 return 0; 57static void xen_post_suspend(int cancelled)
58{
59 xen_arch_post_suspend(cancelled);
60 gnttab_resume();
61 xen_mm_unpin_all();
57} 62}
58 63
64#ifdef CONFIG_PM_SLEEP
59static int xen_suspend(void *data) 65static int xen_suspend(void *data)
60{ 66{
67 struct suspend_info *si = data;
61 int err; 68 int err;
62 int *cancelled = data;
63 69
64 BUG_ON(!irqs_disabled()); 70 BUG_ON(!irqs_disabled());
65 71
@@ -70,22 +76,20 @@ static int xen_suspend(void *data)
70 return err; 76 return err;
71 } 77 }
72 78
73 xen_mm_pin_all(); 79 if (si->pre)
74 gnttab_suspend(); 80 si->pre();
75 xen_pre_suspend();
76 81
77 /* 82 /*
78 * This hypercall returns 1 if suspend was cancelled 83 * This hypercall returns 1 if suspend was cancelled
79 * or the domain was merely checkpointed, and 0 if it 84 * or the domain was merely checkpointed, and 0 if it
80 * is resuming in a new domain. 85 * is resuming in a new domain.
81 */ 86 */
82 *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); 87 si->cancelled = HYPERVISOR_suspend(si->arg);
83 88
84 xen_post_suspend(*cancelled); 89 if (si->post)
85 gnttab_resume(); 90 si->post(si->cancelled);
86 xen_mm_unpin_all();
87 91
88 if (!*cancelled) { 92 if (!si->cancelled) {
89 xen_irq_resume(); 93 xen_irq_resume();
90 xen_console_resume(); 94 xen_console_resume();
91 xen_timer_resume(); 95 xen_timer_resume();
@@ -99,7 +103,7 @@ static int xen_suspend(void *data)
99static void do_suspend(void) 103static void do_suspend(void)
100{ 104{
101 int err; 105 int err;
102 int cancelled = 1; 106 struct suspend_info si;
103 107
104 shutting_down = SHUTDOWN_SUSPEND; 108 shutting_down = SHUTDOWN_SUSPEND;
105 109
@@ -129,20 +133,29 @@ static void do_suspend(void)
129 goto out_resume; 133 goto out_resume;
130 } 134 }
131 135
132 if (xen_hvm_domain()) 136 si.cancelled = 1;
133 err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0)); 137
134 else 138 if (xen_hvm_domain()) {
135 err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); 139 si.arg = 0UL;
140 si.pre = NULL;
141 si.post = &xen_hvm_post_suspend;
142 } else {
143 si.arg = virt_to_mfn(xen_start_info);
144 si.pre = &xen_pre_suspend;
145 si.post = &xen_post_suspend;
146 }
147
148 err = stop_machine(xen_suspend, &si, cpumask_of(0));
136 149
137 dpm_resume_noirq(PMSG_RESUME); 150 dpm_resume_noirq(PMSG_RESUME);
138 151
139 if (err) { 152 if (err) {
140 printk(KERN_ERR "failed to start xen_suspend: %d\n", err); 153 printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
141 cancelled = 1; 154 si.cancelled = 1;
142 } 155 }
143 156
144out_resume: 157out_resume:
145 if (!cancelled) { 158 if (!si.cancelled) {
146 xen_arch_resume(); 159 xen_arch_resume();
147 xs_resume(); 160 xs_resume();
148 } else 161 } else
@@ -162,12 +175,39 @@ out:
162} 175}
163#endif /* CONFIG_PM_SLEEP */ 176#endif /* CONFIG_PM_SLEEP */
164 177
178struct shutdown_handler {
179 const char *command;
180 void (*cb)(void);
181};
182
183static void do_poweroff(void)
184{
185 shutting_down = SHUTDOWN_POWEROFF;
186 orderly_poweroff(false);
187}
188
189static void do_reboot(void)
190{
191 shutting_down = SHUTDOWN_POWEROFF; /* ? */
192 ctrl_alt_del();
193}
194
165static void shutdown_handler(struct xenbus_watch *watch, 195static void shutdown_handler(struct xenbus_watch *watch,
166 const char **vec, unsigned int len) 196 const char **vec, unsigned int len)
167{ 197{
168 char *str; 198 char *str;
169 struct xenbus_transaction xbt; 199 struct xenbus_transaction xbt;
170 int err; 200 int err;
201 static struct shutdown_handler handlers[] = {
202 { "poweroff", do_poweroff },
203 { "halt", do_poweroff },
204 { "reboot", do_reboot },
205#ifdef CONFIG_PM_SLEEP
206 { "suspend", do_suspend },
207#endif
208 {NULL, NULL},
209 };
210 static struct shutdown_handler *handler;
171 211
172 if (shutting_down != SHUTDOWN_INVALID) 212 if (shutting_down != SHUTDOWN_INVALID)
173 return; 213 return;
@@ -184,7 +224,14 @@ static void shutdown_handler(struct xenbus_watch *watch,
184 return; 224 return;
185 } 225 }
186 226
187 xenbus_write(xbt, "control", "shutdown", ""); 227 for (handler = &handlers[0]; handler->command; handler++) {
228 if (strcmp(str, handler->command) == 0)
229 break;
230 }
231
232 /* Only acknowledge commands which we are prepared to handle. */
233 if (handler->cb)
234 xenbus_write(xbt, "control", "shutdown", "");
188 235
189 err = xenbus_transaction_end(xbt, 0); 236 err = xenbus_transaction_end(xbt, 0);
190 if (err == -EAGAIN) { 237 if (err == -EAGAIN) {
@@ -192,17 +239,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
192 goto again; 239 goto again;
193 } 240 }
194 241
195 if (strcmp(str, "poweroff") == 0 || 242 if (handler->cb) {
196 strcmp(str, "halt") == 0) { 243 handler->cb();
197 shutting_down = SHUTDOWN_POWEROFF;
198 orderly_poweroff(false);
199 } else if (strcmp(str, "reboot") == 0) {
200 shutting_down = SHUTDOWN_POWEROFF; /* ? */
201 ctrl_alt_del();
202#ifdef CONFIG_PM_SLEEP
203 } else if (strcmp(str, "suspend") == 0) {
204 do_suspend();
205#endif
206 } else { 244 } else {
207 printk(KERN_INFO "Ignoring shutdown request: %s\n", str); 245 printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
208 shutting_down = SHUTDOWN_INVALID; 246 shutting_down = SHUTDOWN_INVALID;
@@ -281,27 +319,18 @@ static int shutdown_event(struct notifier_block *notifier,
281 return NOTIFY_DONE; 319 return NOTIFY_DONE;
282} 320}
283 321
284static int __init __setup_shutdown_event(void)
285{
286 /* Delay initialization in the PV on HVM case */
287 if (xen_hvm_domain())
288 return 0;
289
290 if (!xen_pv_domain())
291 return -ENODEV;
292
293 return xen_setup_shutdown_event();
294}
295
296int xen_setup_shutdown_event(void) 322int xen_setup_shutdown_event(void)
297{ 323{
298 static struct notifier_block xenstore_notifier = { 324 static struct notifier_block xenstore_notifier = {
299 .notifier_call = shutdown_event 325 .notifier_call = shutdown_event
300 }; 326 };
327
328 if (!xen_domain())
329 return -ENODEV;
301 register_xenstore_notifier(&xenstore_notifier); 330 register_xenstore_notifier(&xenstore_notifier);
302 331
303 return 0; 332 return 0;
304} 333}
305EXPORT_SYMBOL_GPL(xen_setup_shutdown_event); 334EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
306 335
307subsys_initcall(__setup_shutdown_event); 336subsys_initcall(xen_setup_shutdown_event);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index afbe041f42c5..319dd0a94d51 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -156,9 +156,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
156 if (ret) 156 if (ret)
157 goto out; 157 goto out;
158 xenbus_probe(NULL); 158 xenbus_probe(NULL);
159 ret = xen_setup_shutdown_event();
160 if (ret)
161 goto out;
162 return 0; 159 return 0;
163 160
164out: 161out:
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 02a2cf616318..515455296378 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -21,8 +21,8 @@
21#include <linux/posix_acl_xattr.h> 21#include <linux/posix_acl_xattr.h>
22#include "xattr.h" 22#include "xattr.h"
23#include "acl.h" 23#include "acl.h"
24#include "v9fs_vfs.h"
25#include "v9fs.h" 24#include "v9fs.h"
25#include "v9fs_vfs.h"
26 26
27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) 27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
28{ 28{
@@ -59,7 +59,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
59 struct v9fs_session_info *v9ses; 59 struct v9fs_session_info *v9ses;
60 60
61 v9ses = v9fs_inode2v9ses(inode); 61 v9ses = v9fs_inode2v9ses(inode);
62 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 62 if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
63 ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
63 set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL); 64 set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
64 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); 65 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
65 return 0; 66 return 0;
@@ -71,11 +72,15 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
71 if (!IS_ERR(dacl) && !IS_ERR(pacl)) { 72 if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
72 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl); 73 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
73 set_cached_acl(inode, ACL_TYPE_ACCESS, pacl); 74 set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
74 posix_acl_release(dacl);
75 posix_acl_release(pacl);
76 } else 75 } else
77 retval = -EIO; 76 retval = -EIO;
78 77
78 if (!IS_ERR(dacl))
79 posix_acl_release(dacl);
80
81 if (!IS_ERR(pacl))
82 posix_acl_release(pacl);
83
79 return retval; 84 return retval;
80} 85}
81 86
@@ -100,9 +105,10 @@ int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
100 return -ECHILD; 105 return -ECHILD;
101 106
102 v9ses = v9fs_inode2v9ses(inode); 107 v9ses = v9fs_inode2v9ses(inode);
103 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 108 if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
109 ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
104 /* 110 /*
105 * On access = client mode get the acl 111 * On access = client and acl = on mode get the acl
106 * values from the server 112 * values from the server
107 */ 113 */
108 return 0; 114 return 0;
@@ -128,6 +134,10 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
128 struct inode *inode = dentry->d_inode; 134 struct inode *inode = dentry->d_inode;
129 135
130 set_cached_acl(inode, type, acl); 136 set_cached_acl(inode, type, acl);
137
138 if (!acl)
139 return 0;
140
131 /* Set a setxattr request to server */ 141 /* Set a setxattr request to server */
132 size = posix_acl_xattr_size(acl->a_count); 142 size = posix_acl_xattr_size(acl->a_count);
133 buffer = kmalloc(size, GFP_KERNEL); 143 buffer = kmalloc(size, GFP_KERNEL);
@@ -177,10 +187,8 @@ int v9fs_acl_chmod(struct dentry *dentry)
177int v9fs_set_create_acl(struct dentry *dentry, 187int v9fs_set_create_acl(struct dentry *dentry,
178 struct posix_acl *dpacl, struct posix_acl *pacl) 188 struct posix_acl *dpacl, struct posix_acl *pacl)
179{ 189{
180 if (dpacl) 190 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
181 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl); 191 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
182 if (pacl)
183 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
184 posix_acl_release(dpacl); 192 posix_acl_release(dpacl);
185 posix_acl_release(pacl); 193 posix_acl_release(pacl);
186 return 0; 194 return 0;
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 0dbe0d139ac2..5b335c5086a1 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -33,67 +33,11 @@
33 33
34#define CACHETAG_LEN 11 34#define CACHETAG_LEN 11
35 35
36struct kmem_cache *vcookie_cache;
37
38struct fscache_netfs v9fs_cache_netfs = { 36struct fscache_netfs v9fs_cache_netfs = {
39 .name = "9p", 37 .name = "9p",
40 .version = 0, 38 .version = 0,
41}; 39};
42 40
43static void init_once(void *foo)
44{
45 struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
46 vcookie->fscache = NULL;
47 vcookie->qid = NULL;
48 inode_init_once(&vcookie->inode);
49}
50
51/**
52 * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
53 * vcookie to inode mapping
54 *
55 * Returns 0 on success.
56 */
57
58static int v9fs_init_vcookiecache(void)
59{
60 vcookie_cache = kmem_cache_create("vcookie_cache",
61 sizeof(struct v9fs_cookie),
62 0, (SLAB_RECLAIM_ACCOUNT|
63 SLAB_MEM_SPREAD),
64 init_once);
65 if (!vcookie_cache)
66 return -ENOMEM;
67
68 return 0;
69}
70
71/**
72 * v9fs_destroy_vcookiecache - destroy the cache of vcookies
73 *
74 */
75
76static void v9fs_destroy_vcookiecache(void)
77{
78 kmem_cache_destroy(vcookie_cache);
79}
80
81int __v9fs_cache_register(void)
82{
83 int ret;
84 ret = v9fs_init_vcookiecache();
85 if (ret < 0)
86 return ret;
87
88 return fscache_register_netfs(&v9fs_cache_netfs);
89}
90
91void __v9fs_cache_unregister(void)
92{
93 v9fs_destroy_vcookiecache();
94 fscache_unregister_netfs(&v9fs_cache_netfs);
95}
96
97/** 41/**
98 * v9fs_random_cachetag - Generate a random tag to be associated 42 * v9fs_random_cachetag - Generate a random tag to be associated
99 * with a new cache session. 43 * with a new cache session.
@@ -133,9 +77,9 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
133} 77}
134 78
135const struct fscache_cookie_def v9fs_cache_session_index_def = { 79const struct fscache_cookie_def v9fs_cache_session_index_def = {
136 .name = "9P.session", 80 .name = "9P.session",
137 .type = FSCACHE_COOKIE_TYPE_INDEX, 81 .type = FSCACHE_COOKIE_TYPE_INDEX,
138 .get_key = v9fs_cache_session_get_key, 82 .get_key = v9fs_cache_session_get_key,
139}; 83};
140 84
141void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) 85void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
@@ -163,33 +107,33 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
163static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, 107static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
164 void *buffer, uint16_t bufmax) 108 void *buffer, uint16_t bufmax)
165{ 109{
166 const struct v9fs_cookie *vcookie = cookie_netfs_data; 110 const struct v9fs_inode *v9inode = cookie_netfs_data;
167 memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path)); 111 memcpy(buffer, &v9inode->fscache_key->path,
168 112 sizeof(v9inode->fscache_key->path));
169 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode, 113 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
170 vcookie->qid->path); 114 v9inode->fscache_key->path);
171 return sizeof(vcookie->qid->path); 115 return sizeof(v9inode->fscache_key->path);
172} 116}
173 117
174static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, 118static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
175 uint64_t *size) 119 uint64_t *size)
176{ 120{
177 const struct v9fs_cookie *vcookie = cookie_netfs_data; 121 const struct v9fs_inode *v9inode = cookie_netfs_data;
178 *size = i_size_read(&vcookie->inode); 122 *size = i_size_read(&v9inode->vfs_inode);
179 123
180 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode, 124 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode,
181 *size); 125 *size);
182} 126}
183 127
184static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, 128static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
185 void *buffer, uint16_t buflen) 129 void *buffer, uint16_t buflen)
186{ 130{
187 const struct v9fs_cookie *vcookie = cookie_netfs_data; 131 const struct v9fs_inode *v9inode = cookie_netfs_data;
188 memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version)); 132 memcpy(buffer, &v9inode->fscache_key->version,
189 133 sizeof(v9inode->fscache_key->version));
190 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode, 134 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
191 vcookie->qid->version); 135 v9inode->fscache_key->version);
192 return sizeof(vcookie->qid->version); 136 return sizeof(v9inode->fscache_key->version);
193} 137}
194 138
195static enum 139static enum
@@ -197,13 +141,13 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
197 const void *buffer, 141 const void *buffer,
198 uint16_t buflen) 142 uint16_t buflen)
199{ 143{
200 const struct v9fs_cookie *vcookie = cookie_netfs_data; 144 const struct v9fs_inode *v9inode = cookie_netfs_data;
201 145
202 if (buflen != sizeof(vcookie->qid->version)) 146 if (buflen != sizeof(v9inode->fscache_key->version))
203 return FSCACHE_CHECKAUX_OBSOLETE; 147 return FSCACHE_CHECKAUX_OBSOLETE;
204 148
205 if (memcmp(buffer, &vcookie->qid->version, 149 if (memcmp(buffer, &v9inode->fscache_key->version,
206 sizeof(vcookie->qid->version))) 150 sizeof(v9inode->fscache_key->version)))
207 return FSCACHE_CHECKAUX_OBSOLETE; 151 return FSCACHE_CHECKAUX_OBSOLETE;
208 152
209 return FSCACHE_CHECKAUX_OKAY; 153 return FSCACHE_CHECKAUX_OKAY;
@@ -211,7 +155,7 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
211 155
212static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data) 156static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
213{ 157{
214 struct v9fs_cookie *vcookie = cookie_netfs_data; 158 struct v9fs_inode *v9inode = cookie_netfs_data;
215 struct pagevec pvec; 159 struct pagevec pvec;
216 pgoff_t first; 160 pgoff_t first;
217 int loop, nr_pages; 161 int loop, nr_pages;
@@ -220,7 +164,7 @@ static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
220 first = 0; 164 first = 0;
221 165
222 for (;;) { 166 for (;;) {
223 nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping, 167 nr_pages = pagevec_lookup(&pvec, v9inode->vfs_inode.i_mapping,
224 first, 168 first,
225 PAGEVEC_SIZE - pagevec_count(&pvec)); 169 PAGEVEC_SIZE - pagevec_count(&pvec));
226 if (!nr_pages) 170 if (!nr_pages)
@@ -249,115 +193,114 @@ const struct fscache_cookie_def v9fs_cache_inode_index_def = {
249 193
250void v9fs_cache_inode_get_cookie(struct inode *inode) 194void v9fs_cache_inode_get_cookie(struct inode *inode)
251{ 195{
252 struct v9fs_cookie *vcookie; 196 struct v9fs_inode *v9inode;
253 struct v9fs_session_info *v9ses; 197 struct v9fs_session_info *v9ses;
254 198
255 if (!S_ISREG(inode->i_mode)) 199 if (!S_ISREG(inode->i_mode))
256 return; 200 return;
257 201
258 vcookie = v9fs_inode2cookie(inode); 202 v9inode = V9FS_I(inode);
259 if (vcookie->fscache) 203 if (v9inode->fscache)
260 return; 204 return;
261 205
262 v9ses = v9fs_inode2v9ses(inode); 206 v9ses = v9fs_inode2v9ses(inode);
263 vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, 207 v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
264 &v9fs_cache_inode_index_def, 208 &v9fs_cache_inode_index_def,
265 vcookie); 209 v9inode);
266 210
267 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, 211 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
268 vcookie->fscache); 212 v9inode->fscache);
269} 213}
270 214
271void v9fs_cache_inode_put_cookie(struct inode *inode) 215void v9fs_cache_inode_put_cookie(struct inode *inode)
272{ 216{
273 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 217 struct v9fs_inode *v9inode = V9FS_I(inode);
274 218
275 if (!vcookie->fscache) 219 if (!v9inode->fscache)
276 return; 220 return;
277 P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, 221 P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
278 vcookie->fscache); 222 v9inode->fscache);
279 223
280 fscache_relinquish_cookie(vcookie->fscache, 0); 224 fscache_relinquish_cookie(v9inode->fscache, 0);
281 vcookie->fscache = NULL; 225 v9inode->fscache = NULL;
282} 226}
283 227
284void v9fs_cache_inode_flush_cookie(struct inode *inode) 228void v9fs_cache_inode_flush_cookie(struct inode *inode)
285{ 229{
286 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 230 struct v9fs_inode *v9inode = V9FS_I(inode);
287 231
288 if (!vcookie->fscache) 232 if (!v9inode->fscache)
289 return; 233 return;
290 P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, 234 P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
291 vcookie->fscache); 235 v9inode->fscache);
292 236
293 fscache_relinquish_cookie(vcookie->fscache, 1); 237 fscache_relinquish_cookie(v9inode->fscache, 1);
294 vcookie->fscache = NULL; 238 v9inode->fscache = NULL;
295} 239}
296 240
297void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp) 241void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
298{ 242{
299 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 243 struct v9fs_inode *v9inode = V9FS_I(inode);
300 struct p9_fid *fid; 244 struct p9_fid *fid;
301 245
302 if (!vcookie->fscache) 246 if (!v9inode->fscache)
303 return; 247 return;
304 248
305 spin_lock(&vcookie->lock); 249 spin_lock(&v9inode->fscache_lock);
306 fid = filp->private_data; 250 fid = filp->private_data;
307 if ((filp->f_flags & O_ACCMODE) != O_RDONLY) 251 if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
308 v9fs_cache_inode_flush_cookie(inode); 252 v9fs_cache_inode_flush_cookie(inode);
309 else 253 else
310 v9fs_cache_inode_get_cookie(inode); 254 v9fs_cache_inode_get_cookie(inode);
311 255
312 spin_unlock(&vcookie->lock); 256 spin_unlock(&v9inode->fscache_lock);
313} 257}
314 258
315void v9fs_cache_inode_reset_cookie(struct inode *inode) 259void v9fs_cache_inode_reset_cookie(struct inode *inode)
316{ 260{
317 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 261 struct v9fs_inode *v9inode = V9FS_I(inode);
318 struct v9fs_session_info *v9ses; 262 struct v9fs_session_info *v9ses;
319 struct fscache_cookie *old; 263 struct fscache_cookie *old;
320 264
321 if (!vcookie->fscache) 265 if (!v9inode->fscache)
322 return; 266 return;
323 267
324 old = vcookie->fscache; 268 old = v9inode->fscache;
325 269
326 spin_lock(&vcookie->lock); 270 spin_lock(&v9inode->fscache_lock);
327 fscache_relinquish_cookie(vcookie->fscache, 1); 271 fscache_relinquish_cookie(v9inode->fscache, 1);
328 272
329 v9ses = v9fs_inode2v9ses(inode); 273 v9ses = v9fs_inode2v9ses(inode);
330 vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, 274 v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
331 &v9fs_cache_inode_index_def, 275 &v9fs_cache_inode_index_def,
332 vcookie); 276 v9inode);
333
334 P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", 277 P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
335 inode, old, vcookie->fscache); 278 inode, old, v9inode->fscache);
336 279
337 spin_unlock(&vcookie->lock); 280 spin_unlock(&v9inode->fscache_lock);
338} 281}
339 282
340int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) 283int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
341{ 284{
342 struct inode *inode = page->mapping->host; 285 struct inode *inode = page->mapping->host;
343 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 286 struct v9fs_inode *v9inode = V9FS_I(inode);
344 287
345 BUG_ON(!vcookie->fscache); 288 BUG_ON(!v9inode->fscache);
346 289
347 return fscache_maybe_release_page(vcookie->fscache, page, gfp); 290 return fscache_maybe_release_page(v9inode->fscache, page, gfp);
348} 291}
349 292
350void __v9fs_fscache_invalidate_page(struct page *page) 293void __v9fs_fscache_invalidate_page(struct page *page)
351{ 294{
352 struct inode *inode = page->mapping->host; 295 struct inode *inode = page->mapping->host;
353 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 296 struct v9fs_inode *v9inode = V9FS_I(inode);
354 297
355 BUG_ON(!vcookie->fscache); 298 BUG_ON(!v9inode->fscache);
356 299
357 if (PageFsCache(page)) { 300 if (PageFsCache(page)) {
358 fscache_wait_on_page_write(vcookie->fscache, page); 301 fscache_wait_on_page_write(v9inode->fscache, page);
359 BUG_ON(!PageLocked(page)); 302 BUG_ON(!PageLocked(page));
360 fscache_uncache_page(vcookie->fscache, page); 303 fscache_uncache_page(v9inode->fscache, page);
361 } 304 }
362} 305}
363 306
@@ -380,13 +323,13 @@ static void v9fs_vfs_readpage_complete(struct page *page, void *data,
380int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) 323int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
381{ 324{
382 int ret; 325 int ret;
383 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 326 const struct v9fs_inode *v9inode = V9FS_I(inode);
384 327
385 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 328 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
386 if (!vcookie->fscache) 329 if (!v9inode->fscache)
387 return -ENOBUFS; 330 return -ENOBUFS;
388 331
389 ret = fscache_read_or_alloc_page(vcookie->fscache, 332 ret = fscache_read_or_alloc_page(v9inode->fscache,
390 page, 333 page,
391 v9fs_vfs_readpage_complete, 334 v9fs_vfs_readpage_complete,
392 NULL, 335 NULL,
@@ -418,13 +361,13 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
418 unsigned *nr_pages) 361 unsigned *nr_pages)
419{ 362{
420 int ret; 363 int ret;
421 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 364 const struct v9fs_inode *v9inode = V9FS_I(inode);
422 365
423 P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); 366 P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
424 if (!vcookie->fscache) 367 if (!v9inode->fscache)
425 return -ENOBUFS; 368 return -ENOBUFS;
426 369
427 ret = fscache_read_or_alloc_pages(vcookie->fscache, 370 ret = fscache_read_or_alloc_pages(v9inode->fscache,
428 mapping, pages, nr_pages, 371 mapping, pages, nr_pages,
429 v9fs_vfs_readpage_complete, 372 v9fs_vfs_readpage_complete,
430 NULL, 373 NULL,
@@ -453,11 +396,22 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
453void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) 396void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
454{ 397{
455 int ret; 398 int ret;
456 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 399 const struct v9fs_inode *v9inode = V9FS_I(inode);
457 400
458 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 401 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
459 ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL); 402 ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
460 P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); 403 P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret);
461 if (ret != 0) 404 if (ret != 0)
462 v9fs_uncache_page(inode, page); 405 v9fs_uncache_page(inode, page);
463} 406}
407
408/*
409 * wait for a page to complete writing to the cache
410 */
411void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
412{
413 const struct v9fs_inode *v9inode = V9FS_I(inode);
414 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
415 if (PageFsCache(page))
416 fscache_wait_on_page_write(v9inode->fscache, page);
417}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index a94192bfaee8..049507a5b01c 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -25,20 +25,6 @@
25#include <linux/fscache.h> 25#include <linux/fscache.h>
26#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27 27
28extern struct kmem_cache *vcookie_cache;
29
30struct v9fs_cookie {
31 spinlock_t lock;
32 struct inode inode;
33 struct fscache_cookie *fscache;
34 struct p9_qid *qid;
35};
36
37static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
38{
39 return container_of(inode, struct v9fs_cookie, inode);
40}
41
42extern struct fscache_netfs v9fs_cache_netfs; 28extern struct fscache_netfs v9fs_cache_netfs;
43extern const struct fscache_cookie_def v9fs_cache_session_index_def; 29extern const struct fscache_cookie_def v9fs_cache_session_index_def;
44extern const struct fscache_cookie_def v9fs_cache_inode_index_def; 30extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
@@ -64,23 +50,8 @@ extern int __v9fs_readpages_from_fscache(struct inode *inode,
64 struct list_head *pages, 50 struct list_head *pages,
65 unsigned *nr_pages); 51 unsigned *nr_pages);
66extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page); 52extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
67 53extern void __v9fs_fscache_wait_on_page_write(struct inode *inode,
68 54 struct page *page);
69/**
70 * v9fs_cache_register - Register v9fs file system with the cache
71 */
72static inline int v9fs_cache_register(void)
73{
74 return __v9fs_cache_register();
75}
76
77/**
78 * v9fs_cache_unregister - Unregister v9fs from the cache
79 */
80static inline void v9fs_cache_unregister(void)
81{
82 __v9fs_cache_unregister();
83}
84 55
85static inline int v9fs_fscache_release_page(struct page *page, 56static inline int v9fs_fscache_release_page(struct page *page,
86 gfp_t gfp) 57 gfp_t gfp)
@@ -117,28 +88,27 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
117 88
118static inline void v9fs_uncache_page(struct inode *inode, struct page *page) 89static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
119{ 90{
120 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 91 struct v9fs_inode *v9inode = V9FS_I(inode);
121 fscache_uncache_page(vcookie->fscache, page); 92 fscache_uncache_page(v9inode->fscache, page);
122 BUG_ON(PageFsCache(page)); 93 BUG_ON(PageFsCache(page));
123} 94}
124 95
125static inline void v9fs_vcookie_set_qid(struct inode *inode, 96static inline void v9fs_fscache_set_key(struct inode *inode,
126 struct p9_qid *qid) 97 struct p9_qid *qid)
127{ 98{
128 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 99 struct v9fs_inode *v9inode = V9FS_I(inode);
129 spin_lock(&vcookie->lock); 100 spin_lock(&v9inode->fscache_lock);
130 vcookie->qid = qid; 101 v9inode->fscache_key = qid;
131 spin_unlock(&vcookie->lock); 102 spin_unlock(&v9inode->fscache_lock);
132} 103}
133 104
134#else /* CONFIG_9P_FSCACHE */ 105static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
135 106 struct page *page)
136static inline int v9fs_cache_register(void)
137{ 107{
138 return 1; 108 return __v9fs_fscache_wait_on_page_write(inode, page);
139} 109}
140 110
141static inline void v9fs_cache_unregister(void) {} 111#else /* CONFIG_9P_FSCACHE */
142 112
143static inline int v9fs_fscache_release_page(struct page *page, 113static inline int v9fs_fscache_release_page(struct page *page,
144 gfp_t gfp) { 114 gfp_t gfp) {
@@ -168,9 +138,11 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
168static inline void v9fs_uncache_page(struct inode *inode, struct page *page) 138static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
169{} 139{}
170 140
171static inline void v9fs_vcookie_set_qid(struct inode *inode, 141static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
172 struct p9_qid *qid) 142 struct page *page)
173{} 143{
144 return;
145}
174 146
175#endif /* CONFIG_9P_FSCACHE */ 147#endif /* CONFIG_9P_FSCACHE */
176#endif /* _9P_CACHE_H */ 148#endif /* _9P_CACHE_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index b00223c99d70..cd63e002d826 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -125,46 +125,17 @@ err_out:
125 return -ENOMEM; 125 return -ENOMEM;
126} 126}
127 127
128/** 128static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
129 * v9fs_fid_lookup - lookup for a fid, try to walk if not found 129 uid_t uid, int any)
130 * @dentry: dentry to look for fid in
131 *
132 * Look for a fid in the specified dentry for the current user.
133 * If no fid is found, try to create one walking from a fid from the parent
134 * dentry (if it has one), or the root dentry. If the user haven't accessed
135 * the fs yet, attach now and walk from the root.
136 */
137
138struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
139{ 130{
140 int i, n, l, clone, any, access;
141 u32 uid;
142 struct p9_fid *fid, *old_fid = NULL;
143 struct dentry *ds; 131 struct dentry *ds;
144 struct v9fs_session_info *v9ses;
145 char **wnames, *uname; 132 char **wnames, *uname;
133 int i, n, l, clone, access;
134 struct v9fs_session_info *v9ses;
135 struct p9_fid *fid, *old_fid = NULL;
146 136
147 v9ses = v9fs_inode2v9ses(dentry->d_inode); 137 v9ses = v9fs_inode2v9ses(dentry->d_inode);
148 access = v9ses->flags & V9FS_ACCESS_MASK; 138 access = v9ses->flags & V9FS_ACCESS_MASK;
149 switch (access) {
150 case V9FS_ACCESS_SINGLE:
151 case V9FS_ACCESS_USER:
152 case V9FS_ACCESS_CLIENT:
153 uid = current_fsuid();
154 any = 0;
155 break;
156
157 case V9FS_ACCESS_ANY:
158 uid = v9ses->uid;
159 any = 1;
160 break;
161
162 default:
163 uid = ~0;
164 any = 0;
165 break;
166 }
167
168 fid = v9fs_fid_find(dentry, uid, any); 139 fid = v9fs_fid_find(dentry, uid, any);
169 if (fid) 140 if (fid)
170 return fid; 141 return fid;
@@ -250,6 +221,45 @@ err_out:
250 return fid; 221 return fid;
251} 222}
252 223
224/**
225 * v9fs_fid_lookup - lookup for a fid, try to walk if not found
226 * @dentry: dentry to look for fid in
227 *
228 * Look for a fid in the specified dentry for the current user.
229 * If no fid is found, try to create one walking from a fid from the parent
230 * dentry (if it has one), or the root dentry. If the user haven't accessed
231 * the fs yet, attach now and walk from the root.
232 */
233
234struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
235{
236 uid_t uid;
237 int any, access;
238 struct v9fs_session_info *v9ses;
239
240 v9ses = v9fs_inode2v9ses(dentry->d_inode);
241 access = v9ses->flags & V9FS_ACCESS_MASK;
242 switch (access) {
243 case V9FS_ACCESS_SINGLE:
244 case V9FS_ACCESS_USER:
245 case V9FS_ACCESS_CLIENT:
246 uid = current_fsuid();
247 any = 0;
248 break;
249
250 case V9FS_ACCESS_ANY:
251 uid = v9ses->uid;
252 any = 1;
253 break;
254
255 default:
256 uid = ~0;
257 any = 0;
258 break;
259 }
260 return v9fs_fid_lookup_with_uid(dentry, uid, any);
261}
262
253struct p9_fid *v9fs_fid_clone(struct dentry *dentry) 263struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
254{ 264{
255 struct p9_fid *fid, *ret; 265 struct p9_fid *fid, *ret;
@@ -261,3 +271,39 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
261 ret = p9_client_walk(fid, 0, NULL, 1); 271 ret = p9_client_walk(fid, 0, NULL, 1);
262 return ret; 272 return ret;
263} 273}
274
275static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
276{
277 struct p9_fid *fid, *ret;
278
279 fid = v9fs_fid_lookup_with_uid(dentry, uid, 0);
280 if (IS_ERR(fid))
281 return fid;
282
283 ret = p9_client_walk(fid, 0, NULL, 1);
284 return ret;
285}
286
287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
288{
289 int err;
290 struct p9_fid *fid;
291
292 fid = v9fs_fid_clone_with_uid(dentry, 0);
293 if (IS_ERR(fid))
294 goto error_out;
295 /*
296 * writeback fid will only be used to write back the
297 * dirty pages. We always request for the open fid in read-write
298 * mode so that a partial page write which result in page
299 * read can work.
300 */
301 err = p9_client_open(fid, O_RDWR);
302 if (err < 0) {
303 p9_client_clunk(fid);
304 fid = ERR_PTR(err);
305 goto error_out;
306 }
307error_out:
308 return fid;
309}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index c3bbd6af996d..bb0b6e7f58fc 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -19,7 +19,8 @@
19 * Boston, MA 02111-1301 USA 19 * Boston, MA 02111-1301 USA
20 * 20 *
21 */ 21 */
22 22#ifndef FS_9P_FID_H
23#define FS_9P_FID_H
23#include <linux/list.h> 24#include <linux/list.h>
24 25
25/** 26/**
@@ -45,3 +46,5 @@ struct v9fs_dentry {
45struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); 46struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
46struct p9_fid *v9fs_fid_clone(struct dentry *dentry); 47struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
47int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); 48int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
49struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
50#endif
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 2f77cd33ba83..c82b017f51f3 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -39,6 +39,7 @@
39 39
40static DEFINE_SPINLOCK(v9fs_sessionlist_lock); 40static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
41static LIST_HEAD(v9fs_sessionlist); 41static LIST_HEAD(v9fs_sessionlist);
42struct kmem_cache *v9fs_inode_cache;
42 43
43/* 44/*
44 * Option Parsing (code inspired by NFS code) 45 * Option Parsing (code inspired by NFS code)
@@ -55,7 +56,7 @@ enum {
55 /* Cache options */ 56 /* Cache options */
56 Opt_cache_loose, Opt_fscache, 57 Opt_cache_loose, Opt_fscache,
57 /* Access options */ 58 /* Access options */
58 Opt_access, 59 Opt_access, Opt_posixacl,
59 /* Error token */ 60 /* Error token */
60 Opt_err 61 Opt_err
61}; 62};
@@ -73,6 +74,7 @@ static const match_table_t tokens = {
73 {Opt_fscache, "fscache"}, 74 {Opt_fscache, "fscache"},
74 {Opt_cachetag, "cachetag=%s"}, 75 {Opt_cachetag, "cachetag=%s"},
75 {Opt_access, "access=%s"}, 76 {Opt_access, "access=%s"},
77 {Opt_posixacl, "posixacl"},
76 {Opt_err, NULL} 78 {Opt_err, NULL}
77}; 79};
78 80
@@ -194,15 +196,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
194 else if (strcmp(s, "any") == 0) 196 else if (strcmp(s, "any") == 0)
195 v9ses->flags |= V9FS_ACCESS_ANY; 197 v9ses->flags |= V9FS_ACCESS_ANY;
196 else if (strcmp(s, "client") == 0) { 198 else if (strcmp(s, "client") == 0) {
197#ifdef CONFIG_9P_FS_POSIX_ACL
198 v9ses->flags |= V9FS_ACCESS_CLIENT; 199 v9ses->flags |= V9FS_ACCESS_CLIENT;
199#else
200 P9_DPRINTK(P9_DEBUG_ERROR,
201 "access=client option not supported\n");
202 kfree(s);
203 ret = -EINVAL;
204 goto free_and_return;
205#endif
206 } else { 200 } else {
207 v9ses->flags |= V9FS_ACCESS_SINGLE; 201 v9ses->flags |= V9FS_ACCESS_SINGLE;
208 v9ses->uid = simple_strtoul(s, &e, 10); 202 v9ses->uid = simple_strtoul(s, &e, 10);
@@ -212,6 +206,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
212 kfree(s); 206 kfree(s);
213 break; 207 break;
214 208
209 case Opt_posixacl:
210#ifdef CONFIG_9P_FS_POSIX_ACL
211 v9ses->flags |= V9FS_POSIX_ACL;
212#else
213 P9_DPRINTK(P9_DEBUG_ERROR,
214 "Not defined CONFIG_9P_FS_POSIX_ACL. "
215 "Ignoring posixacl option\n");
216#endif
217 break;
218
215 default: 219 default:
216 continue; 220 continue;
217 } 221 }
@@ -260,19 +264,12 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
260 list_add(&v9ses->slist, &v9fs_sessionlist); 264 list_add(&v9ses->slist, &v9fs_sessionlist);
261 spin_unlock(&v9fs_sessionlist_lock); 265 spin_unlock(&v9fs_sessionlist_lock);
262 266
263 v9ses->flags = V9FS_ACCESS_USER;
264 strcpy(v9ses->uname, V9FS_DEFUSER); 267 strcpy(v9ses->uname, V9FS_DEFUSER);
265 strcpy(v9ses->aname, V9FS_DEFANAME); 268 strcpy(v9ses->aname, V9FS_DEFANAME);
266 v9ses->uid = ~0; 269 v9ses->uid = ~0;
267 v9ses->dfltuid = V9FS_DEFUID; 270 v9ses->dfltuid = V9FS_DEFUID;
268 v9ses->dfltgid = V9FS_DEFGID; 271 v9ses->dfltgid = V9FS_DEFGID;
269 272
270 rc = v9fs_parse_options(v9ses, data);
271 if (rc < 0) {
272 retval = rc;
273 goto error;
274 }
275
276 v9ses->clnt = p9_client_create(dev_name, data); 273 v9ses->clnt = p9_client_create(dev_name, data);
277 if (IS_ERR(v9ses->clnt)) { 274 if (IS_ERR(v9ses->clnt)) {
278 retval = PTR_ERR(v9ses->clnt); 275 retval = PTR_ERR(v9ses->clnt);
@@ -281,10 +278,20 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
281 goto error; 278 goto error;
282 } 279 }
283 280
284 if (p9_is_proto_dotl(v9ses->clnt)) 281 v9ses->flags = V9FS_ACCESS_USER;
282
283 if (p9_is_proto_dotl(v9ses->clnt)) {
284 v9ses->flags = V9FS_ACCESS_CLIENT;
285 v9ses->flags |= V9FS_PROTO_2000L; 285 v9ses->flags |= V9FS_PROTO_2000L;
286 else if (p9_is_proto_dotu(v9ses->clnt)) 286 } else if (p9_is_proto_dotu(v9ses->clnt)) {
287 v9ses->flags |= V9FS_PROTO_2000U; 287 v9ses->flags |= V9FS_PROTO_2000U;
288 }
289
290 rc = v9fs_parse_options(v9ses, data);
291 if (rc < 0) {
292 retval = rc;
293 goto error;
294 }
288 295
289 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; 296 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
290 297
@@ -306,6 +313,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
306 v9ses->flags |= V9FS_ACCESS_ANY; 313 v9ses->flags |= V9FS_ACCESS_ANY;
307 v9ses->uid = ~0; 314 v9ses->uid = ~0;
308 } 315 }
316 if (!v9fs_proto_dotl(v9ses) ||
317 !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
318 /*
319 * We support ACL checks on clinet only if the protocol is
320 * 9P2000.L and access is V9FS_ACCESS_CLIENT.
321 */
322 v9ses->flags &= ~V9FS_ACL_MASK;
323 }
309 324
310 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0, 325 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0,
311 v9ses->aname); 326 v9ses->aname);
@@ -467,6 +482,63 @@ static void v9fs_sysfs_cleanup(void)
467 kobject_put(v9fs_kobj); 482 kobject_put(v9fs_kobj);
468} 483}
469 484
485static void v9fs_inode_init_once(void *foo)
486{
487 struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
488#ifdef CONFIG_9P_FSCACHE
489 v9inode->fscache = NULL;
490 v9inode->fscache_key = NULL;
491#endif
492 inode_init_once(&v9inode->vfs_inode);
493}
494
495/**
496 * v9fs_init_inode_cache - initialize a cache for 9P
497 * Returns 0 on success.
498 */
499static int v9fs_init_inode_cache(void)
500{
501 v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache",
502 sizeof(struct v9fs_inode),
503 0, (SLAB_RECLAIM_ACCOUNT|
504 SLAB_MEM_SPREAD),
505 v9fs_inode_init_once);
506 if (!v9fs_inode_cache)
507 return -ENOMEM;
508
509 return 0;
510}
511
512/**
513 * v9fs_destroy_inode_cache - destroy the cache of 9P inode
514 *
515 */
516static void v9fs_destroy_inode_cache(void)
517{
518 kmem_cache_destroy(v9fs_inode_cache);
519}
520
521static int v9fs_cache_register(void)
522{
523 int ret;
524 ret = v9fs_init_inode_cache();
525 if (ret < 0)
526 return ret;
527#ifdef CONFIG_9P_FSCACHE
528 return fscache_register_netfs(&v9fs_cache_netfs);
529#else
530 return ret;
531#endif
532}
533
534static void v9fs_cache_unregister(void)
535{
536 v9fs_destroy_inode_cache();
537#ifdef CONFIG_9P_FSCACHE
538 fscache_unregister_netfs(&v9fs_cache_netfs);
539#endif
540}
541
470/** 542/**
471 * init_v9fs - Initialize module 543 * init_v9fs - Initialize module
472 * 544 *
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index c4b5d8864f0d..bd8496db135b 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,9 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#ifndef FS_9P_V9FS_H
24#define FS_9P_V9FS_H
25
23#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
24 27
25/** 28/**
@@ -28,8 +31,10 @@
28 * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions 31 * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions
29 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy 32 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
30 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) 33 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
34 * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client.
31 * @V9FS_ACCESS_ANY: use a single attach for all users 35 * @V9FS_ACCESS_ANY: use a single attach for all users
32 * @V9FS_ACCESS_MASK: bit mask of different ACCESS options 36 * @V9FS_ACCESS_MASK: bit mask of different ACCESS options
37 * @V9FS_POSIX_ACL: POSIX ACLs are enforced
33 * 38 *
34 * Session flags reflect options selected by users at mount time 39 * Session flags reflect options selected by users at mount time
35 */ 40 */
@@ -37,13 +42,15 @@
37 V9FS_ACCESS_USER | \ 42 V9FS_ACCESS_USER | \
38 V9FS_ACCESS_CLIENT) 43 V9FS_ACCESS_CLIENT)
39#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY 44#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
45#define V9FS_ACL_MASK V9FS_POSIX_ACL
40 46
41enum p9_session_flags { 47enum p9_session_flags {
42 V9FS_PROTO_2000U = 0x01, 48 V9FS_PROTO_2000U = 0x01,
43 V9FS_PROTO_2000L = 0x02, 49 V9FS_PROTO_2000L = 0x02,
44 V9FS_ACCESS_SINGLE = 0x04, 50 V9FS_ACCESS_SINGLE = 0x04,
45 V9FS_ACCESS_USER = 0x08, 51 V9FS_ACCESS_USER = 0x08,
46 V9FS_ACCESS_CLIENT = 0x10 52 V9FS_ACCESS_CLIENT = 0x10,
53 V9FS_POSIX_ACL = 0x20
47}; 54};
48 55
49/* possible values of ->cache */ 56/* possible values of ->cache */
@@ -109,8 +116,28 @@ struct v9fs_session_info {
109 struct list_head slist; /* list of sessions registered with v9fs */ 116 struct list_head slist; /* list of sessions registered with v9fs */
110 struct backing_dev_info bdi; 117 struct backing_dev_info bdi;
111 struct rw_semaphore rename_sem; 118 struct rw_semaphore rename_sem;
119 struct p9_fid *root_fid; /* Used for file system sync */
120};
121
122/* cache_validity flags */
123#define V9FS_INO_INVALID_ATTR 0x01
124
125struct v9fs_inode {
126#ifdef CONFIG_9P_FSCACHE
127 spinlock_t fscache_lock;
128 struct fscache_cookie *fscache;
129 struct p9_qid *fscache_key;
130#endif
131 unsigned int cache_validity;
132 struct p9_fid *writeback_fid;
133 struct inode vfs_inode;
112}; 134};
113 135
136static inline struct v9fs_inode *V9FS_I(const struct inode *inode)
137{
138 return container_of(inode, struct v9fs_inode, vfs_inode);
139}
140
114struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 141struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
115 char *); 142 char *);
116extern void v9fs_session_close(struct v9fs_session_info *v9ses); 143extern void v9fs_session_close(struct v9fs_session_info *v9ses);
@@ -124,16 +151,15 @@ extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
124 struct inode *new_dir, struct dentry *new_dentry); 151 struct inode *new_dir, struct dentry *new_dentry);
125extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, 152extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
126 void *p); 153 void *p);
127extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses, 154extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
128 struct p9_fid *fid, 155 struct p9_fid *fid,
129 struct super_block *sb); 156 struct super_block *sb);
130
131extern const struct inode_operations v9fs_dir_inode_operations_dotl; 157extern const struct inode_operations v9fs_dir_inode_operations_dotl;
132extern const struct inode_operations v9fs_file_inode_operations_dotl; 158extern const struct inode_operations v9fs_file_inode_operations_dotl;
133extern const struct inode_operations v9fs_symlink_inode_operations_dotl; 159extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
134extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses, 160extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
135 struct p9_fid *fid, 161 struct p9_fid *fid,
136 struct super_block *sb); 162 struct super_block *sb);
137 163
138/* other default globals */ 164/* other default globals */
139#define V9FS_PORT 564 165#define V9FS_PORT 564
@@ -158,7 +184,7 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
158} 184}
159 185
160/** 186/**
161 * v9fs_inode_from_fid - Helper routine to populate an inode by 187 * v9fs_get_inode_from_fid - Helper routine to populate an inode by
162 * issuing a attribute request 188 * issuing a attribute request
163 * @v9ses: session information 189 * @v9ses: session information
164 * @fid: fid to issue attribute request for 190 * @fid: fid to issue attribute request for
@@ -166,11 +192,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
166 * 192 *
167 */ 193 */
168static inline struct inode * 194static inline struct inode *
169v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, 195v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
170 struct super_block *sb) 196 struct super_block *sb)
171{ 197{
172 if (v9fs_proto_dotl(v9ses)) 198 if (v9fs_proto_dotl(v9ses))
173 return v9fs_inode_dotl(v9ses, fid, sb); 199 return v9fs_inode_from_fid_dotl(v9ses, fid, sb);
174 else 200 else
175 return v9fs_inode(v9ses, fid, sb); 201 return v9fs_inode_from_fid(v9ses, fid, sb);
176} 202}
203#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b789f8e597ec..4014160903a9 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -20,6 +20,8 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#ifndef FS_9P_V9FS_VFS_H
24#define FS_9P_V9FS_VFS_H
23 25
24/* plan9 semantics are that created files are implicitly opened. 26/* plan9 semantics are that created files are implicitly opened.
25 * But linux semantics are that you call create, then open. 27 * But linux semantics are that you call create, then open.
@@ -36,6 +38,7 @@
36 * unlink calls remove, which is an implicit clunk. So we have to track 38 * unlink calls remove, which is an implicit clunk. So we have to track
37 * that kind of thing so that we don't try to clunk a dead fid. 39 * that kind of thing so that we don't try to clunk a dead fid.
38 */ 40 */
41#define P9_LOCK_TIMEOUT (30*HZ)
39 42
40extern struct file_system_type v9fs_fs_type; 43extern struct file_system_type v9fs_fs_type;
41extern const struct address_space_operations v9fs_addr_operations; 44extern const struct address_space_operations v9fs_addr_operations;
@@ -45,13 +48,15 @@ extern const struct file_operations v9fs_dir_operations;
45extern const struct file_operations v9fs_dir_operations_dotl; 48extern const struct file_operations v9fs_dir_operations_dotl;
46extern const struct dentry_operations v9fs_dentry_operations; 49extern const struct dentry_operations v9fs_dentry_operations;
47extern const struct dentry_operations v9fs_cached_dentry_operations; 50extern const struct dentry_operations v9fs_cached_dentry_operations;
51extern const struct file_operations v9fs_cached_file_operations;
52extern const struct file_operations v9fs_cached_file_operations_dotl;
53extern struct kmem_cache *v9fs_inode_cache;
48 54
49#ifdef CONFIG_9P_FSCACHE
50struct inode *v9fs_alloc_inode(struct super_block *sb); 55struct inode *v9fs_alloc_inode(struct super_block *sb);
51void v9fs_destroy_inode(struct inode *inode); 56void v9fs_destroy_inode(struct inode *inode);
52#endif
53
54struct inode *v9fs_get_inode(struct super_block *sb, int mode); 57struct inode *v9fs_get_inode(struct super_block *sb, int mode);
58int v9fs_init_inode(struct v9fs_session_info *v9ses,
59 struct inode *inode, int mode);
55void v9fs_evict_inode(struct inode *inode); 60void v9fs_evict_inode(struct inode *inode);
56ino_t v9fs_qid2ino(struct p9_qid *qid); 61ino_t v9fs_qid2ino(struct p9_qid *qid);
57void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); 62void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
@@ -62,8 +67,19 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
62int v9fs_uflags2omode(int uflags, int extended); 67int v9fs_uflags2omode(int uflags, int extended);
63 68
64ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); 69ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
70ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
65void v9fs_blank_wstat(struct p9_wstat *wstat); 71void v9fs_blank_wstat(struct p9_wstat *wstat);
66int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); 72int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
67int v9fs_file_fsync_dotl(struct file *filp, int datasync); 73int v9fs_file_fsync_dotl(struct file *filp, int datasync);
68 74ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
69#define P9_LOCK_TIMEOUT (30*HZ) 75 const char __user *, size_t, loff_t *, int);
76int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
77int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
78static inline void v9fs_invalidate_inode_attr(struct inode *inode)
79{
80 struct v9fs_inode *v9inode;
81 v9inode = V9FS_I(inode);
82 v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
83 return;
84}
85#endif
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index b7f2a8e3863e..2524e4cbb8ea 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -39,16 +39,16 @@
39#include "v9fs.h" 39#include "v9fs.h"
40#include "v9fs_vfs.h" 40#include "v9fs_vfs.h"
41#include "cache.h" 41#include "cache.h"
42#include "fid.h"
42 43
43/** 44/**
44 * v9fs_vfs_readpage - read an entire page in from 9P 45 * v9fs_fid_readpage - read an entire page in from 9P
45 * 46 *
46 * @filp: file being read 47 * @fid: fid being read
47 * @page: structure to page 48 * @page: structure to page
48 * 49 *
49 */ 50 */
50 51static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
51static int v9fs_vfs_readpage(struct file *filp, struct page *page)
52{ 52{
53 int retval; 53 int retval;
54 loff_t offset; 54 loff_t offset;
@@ -67,7 +67,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
67 buffer = kmap(page); 67 buffer = kmap(page);
68 offset = page_offset(page); 68 offset = page_offset(page);
69 69
70 retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset); 70 retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset);
71 if (retval < 0) { 71 if (retval < 0) {
72 v9fs_uncache_page(inode, page); 72 v9fs_uncache_page(inode, page);
73 goto done; 73 goto done;
@@ -87,6 +87,19 @@ done:
87} 87}
88 88
89/** 89/**
90 * v9fs_vfs_readpage - read an entire page in from 9P
91 *
92 * @filp: file being read
93 * @page: structure to page
94 *
95 */
96
97static int v9fs_vfs_readpage(struct file *filp, struct page *page)
98{
99 return v9fs_fid_readpage(filp->private_data, page);
100}
101
102/**
90 * v9fs_vfs_readpages - read a set of pages from 9P 103 * v9fs_vfs_readpages - read a set of pages from 9P
91 * 104 *
92 * @filp: file being read 105 * @filp: file being read
@@ -124,7 +137,6 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
124{ 137{
125 if (PagePrivate(page)) 138 if (PagePrivate(page))
126 return 0; 139 return 0;
127
128 return v9fs_fscache_release_page(page, gfp); 140 return v9fs_fscache_release_page(page, gfp);
129} 141}
130 142
@@ -137,20 +149,89 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
137 149
138static void v9fs_invalidate_page(struct page *page, unsigned long offset) 150static void v9fs_invalidate_page(struct page *page, unsigned long offset)
139{ 151{
152 /*
153 * If called with zero offset, we should release
154 * the private state assocated with the page
155 */
140 if (offset == 0) 156 if (offset == 0)
141 v9fs_fscache_invalidate_page(page); 157 v9fs_fscache_invalidate_page(page);
142} 158}
143 159
160static int v9fs_vfs_writepage_locked(struct page *page)
161{
162 char *buffer;
163 int retval, len;
164 loff_t offset, size;
165 mm_segment_t old_fs;
166 struct v9fs_inode *v9inode;
167 struct inode *inode = page->mapping->host;
168
169 v9inode = V9FS_I(inode);
170 size = i_size_read(inode);
171 if (page->index == size >> PAGE_CACHE_SHIFT)
172 len = size & ~PAGE_CACHE_MASK;
173 else
174 len = PAGE_CACHE_SIZE;
175
176 set_page_writeback(page);
177
178 buffer = kmap(page);
179 offset = page_offset(page);
180
181 old_fs = get_fs();
182 set_fs(get_ds());
183 /* We should have writeback_fid always set */
184 BUG_ON(!v9inode->writeback_fid);
185
186 retval = v9fs_file_write_internal(inode,
187 v9inode->writeback_fid,
188 (__force const char __user *)buffer,
189 len, &offset, 0);
190 if (retval > 0)
191 retval = 0;
192
193 set_fs(old_fs);
194 kunmap(page);
195 end_page_writeback(page);
196 return retval;
197}
198
199static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
200{
201 int retval;
202
203 retval = v9fs_vfs_writepage_locked(page);
204 if (retval < 0) {
205 if (retval == -EAGAIN) {
206 redirty_page_for_writepage(wbc, page);
207 retval = 0;
208 } else {
209 SetPageError(page);
210 mapping_set_error(page->mapping, retval);
211 }
212 } else
213 retval = 0;
214
215 unlock_page(page);
216 return retval;
217}
218
144/** 219/**
145 * v9fs_launder_page - Writeback a dirty page 220 * v9fs_launder_page - Writeback a dirty page
146 * Since the writes go directly to the server, we simply return a 0
147 * here to indicate success.
148 *
149 * Returns 0 on success. 221 * Returns 0 on success.
150 */ 222 */
151 223
152static int v9fs_launder_page(struct page *page) 224static int v9fs_launder_page(struct page *page)
153{ 225{
226 int retval;
227 struct inode *inode = page->mapping->host;
228
229 v9fs_fscache_wait_on_page_write(inode, page);
230 if (clear_page_dirty_for_io(page)) {
231 retval = v9fs_vfs_writepage_locked(page);
232 if (retval)
233 return retval;
234 }
154 return 0; 235 return 0;
155} 236}
156 237
@@ -173,9 +254,15 @@ static int v9fs_launder_page(struct page *page)
173 * with an error. 254 * with an error.
174 * 255 *
175 */ 256 */
176ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 257static ssize_t
177 loff_t pos, unsigned long nr_segs) 258v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
259 loff_t pos, unsigned long nr_segs)
178{ 260{
261 /*
262 * FIXME
263 * Now that we do caching with cache mode enabled, We need
264 * to support direct IO
265 */
179 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) " 266 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
180 "off/no(%lld/%lu) EINVAL\n", 267 "off/no(%lld/%lu) EINVAL\n",
181 iocb->ki_filp->f_path.dentry->d_name.name, 268 iocb->ki_filp->f_path.dentry->d_name.name,
@@ -183,11 +270,84 @@ ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
183 270
184 return -EINVAL; 271 return -EINVAL;
185} 272}
273
274static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
275 loff_t pos, unsigned len, unsigned flags,
276 struct page **pagep, void **fsdata)
277{
278 int retval = 0;
279 struct page *page;
280 struct v9fs_inode *v9inode;
281 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
282 struct inode *inode = mapping->host;
283
284 v9inode = V9FS_I(inode);
285start:
286 page = grab_cache_page_write_begin(mapping, index, flags);
287 if (!page) {
288 retval = -ENOMEM;
289 goto out;
290 }
291 BUG_ON(!v9inode->writeback_fid);
292 if (PageUptodate(page))
293 goto out;
294
295 if (len == PAGE_CACHE_SIZE)
296 goto out;
297
298 retval = v9fs_fid_readpage(v9inode->writeback_fid, page);
299 page_cache_release(page);
300 if (!retval)
301 goto start;
302out:
303 *pagep = page;
304 return retval;
305}
306
307static int v9fs_write_end(struct file *filp, struct address_space *mapping,
308 loff_t pos, unsigned len, unsigned copied,
309 struct page *page, void *fsdata)
310{
311 loff_t last_pos = pos + copied;
312 struct inode *inode = page->mapping->host;
313
314 if (unlikely(copied < len)) {
315 /*
316 * zero out the rest of the area
317 */
318 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
319
320 zero_user(page, from + copied, len - copied);
321 flush_dcache_page(page);
322 }
323
324 if (!PageUptodate(page))
325 SetPageUptodate(page);
326 /*
327 * No need to use i_size_read() here, the i_size
328 * cannot change under us because we hold the i_mutex.
329 */
330 if (last_pos > inode->i_size) {
331 inode_add_bytes(inode, last_pos - inode->i_size);
332 i_size_write(inode, last_pos);
333 }
334 set_page_dirty(page);
335 unlock_page(page);
336 page_cache_release(page);
337
338 return copied;
339}
340
341
186const struct address_space_operations v9fs_addr_operations = { 342const struct address_space_operations v9fs_addr_operations = {
187 .readpage = v9fs_vfs_readpage, 343 .readpage = v9fs_vfs_readpage,
188 .readpages = v9fs_vfs_readpages, 344 .readpages = v9fs_vfs_readpages,
189 .releasepage = v9fs_release_page, 345 .set_page_dirty = __set_page_dirty_nobuffers,
190 .invalidatepage = v9fs_invalidate_page, 346 .writepage = v9fs_vfs_writepage,
191 .launder_page = v9fs_launder_page, 347 .write_begin = v9fs_write_begin,
192 .direct_IO = v9fs_direct_IO, 348 .write_end = v9fs_write_end,
349 .releasepage = v9fs_release_page,
350 .invalidatepage = v9fs_invalidate_page,
351 .launder_page = v9fs_launder_page,
352 .direct_IO = v9fs_direct_IO,
193}; 353};
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 233b7d4ffe5e..b6a3b9f7fe4d 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -63,20 +63,15 @@ static int v9fs_dentry_delete(const struct dentry *dentry)
63 * v9fs_cached_dentry_delete - called when dentry refcount equals 0 63 * v9fs_cached_dentry_delete - called when dentry refcount equals 0
64 * @dentry: dentry in question 64 * @dentry: dentry in question
65 * 65 *
66 * Only return 1 if our inode is invalid. Only non-synthetic files
67 * (ones without mtime == 0) should be calling this function.
68 *
69 */ 66 */
70
71static int v9fs_cached_dentry_delete(const struct dentry *dentry) 67static int v9fs_cached_dentry_delete(const struct dentry *dentry)
72{ 68{
73 struct inode *inode = dentry->d_inode; 69 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n",
74 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 70 dentry->d_name.name, dentry);
75 dentry);
76 71
77 if(!inode) 72 /* Don't cache negative dentries */
73 if (!dentry->d_inode)
78 return 1; 74 return 1;
79
80 return 0; 75 return 0;
81} 76}
82 77
@@ -105,7 +100,41 @@ static void v9fs_dentry_release(struct dentry *dentry)
105 } 100 }
106} 101}
107 102
103static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
104{
105 struct p9_fid *fid;
106 struct inode *inode;
107 struct v9fs_inode *v9inode;
108
109 if (nd->flags & LOOKUP_RCU)
110 return -ECHILD;
111
112 inode = dentry->d_inode;
113 if (!inode)
114 goto out_valid;
115
116 v9inode = V9FS_I(inode);
117 if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
118 int retval;
119 struct v9fs_session_info *v9ses;
120 fid = v9fs_fid_lookup(dentry);
121 if (IS_ERR(fid))
122 return PTR_ERR(fid);
123
124 v9ses = v9fs_inode2v9ses(inode);
125 if (v9fs_proto_dotl(v9ses))
126 retval = v9fs_refresh_inode_dotl(fid, inode);
127 else
128 retval = v9fs_refresh_inode(fid, inode);
129 if (retval <= 0)
130 return retval;
131 }
132out_valid:
133 return 1;
134}
135
108const struct dentry_operations v9fs_cached_dentry_operations = { 136const struct dentry_operations v9fs_cached_dentry_operations = {
137 .d_revalidate = v9fs_lookup_revalidate,
109 .d_delete = v9fs_cached_dentry_delete, 138 .d_delete = v9fs_cached_dentry_delete,
110 .d_release = v9fs_dentry_release, 139 .d_release = v9fs_dentry_release,
111}; 140};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index b84ebe8cefed..9c2bdda5cd9d 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -295,7 +295,6 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
295 P9_DPRINTK(P9_DEBUG_VFS, 295 P9_DPRINTK(P9_DEBUG_VFS,
296 "v9fs_dir_release: inode: %p filp: %p fid: %d\n", 296 "v9fs_dir_release: inode: %p filp: %p fid: %d\n",
297 inode, filp, fid ? fid->fid : -1); 297 inode, filp, fid ? fid->fid : -1);
298 filemap_write_and_wait(inode->i_mapping);
299 if (fid) 298 if (fid)
300 p9_client_clunk(fid); 299 p9_client_clunk(fid);
301 return 0; 300 return 0;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 240c30674396..78bcb97c3425 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -44,8 +44,7 @@
44#include "fid.h" 44#include "fid.h"
45#include "cache.h" 45#include "cache.h"
46 46
47static const struct file_operations v9fs_cached_file_operations; 47static const struct vm_operations_struct v9fs_file_vm_ops;
48static const struct file_operations v9fs_cached_file_operations_dotl;
49 48
50/** 49/**
51 * v9fs_file_open - open a file (or directory) 50 * v9fs_file_open - open a file (or directory)
@@ -57,11 +56,13 @@ static const struct file_operations v9fs_cached_file_operations_dotl;
57int v9fs_file_open(struct inode *inode, struct file *file) 56int v9fs_file_open(struct inode *inode, struct file *file)
58{ 57{
59 int err; 58 int err;
59 struct v9fs_inode *v9inode;
60 struct v9fs_session_info *v9ses; 60 struct v9fs_session_info *v9ses;
61 struct p9_fid *fid; 61 struct p9_fid *fid;
62 int omode; 62 int omode;
63 63
64 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); 64 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
65 v9inode = V9FS_I(inode);
65 v9ses = v9fs_inode2v9ses(inode); 66 v9ses = v9fs_inode2v9ses(inode);
66 if (v9fs_proto_dotl(v9ses)) 67 if (v9fs_proto_dotl(v9ses))
67 omode = file->f_flags; 68 omode = file->f_flags;
@@ -89,20 +90,30 @@ int v9fs_file_open(struct inode *inode, struct file *file)
89 } 90 }
90 91
91 file->private_data = fid; 92 file->private_data = fid;
92 if ((fid->qid.version) && (v9ses->cache)) { 93 if (v9ses->cache && !v9inode->writeback_fid) {
93 P9_DPRINTK(P9_DEBUG_VFS, "cached"); 94 /*
94 /* enable cached file options */ 95 * clone a fid and add it to writeback_fid
95 if(file->f_op == &v9fs_file_operations) 96 * we do it during open time instead of
96 file->f_op = &v9fs_cached_file_operations; 97 * page dirty time via write_begin/page_mkwrite
97 else if (file->f_op == &v9fs_file_operations_dotl) 98 * because we want write after unlink usecase
98 file->f_op = &v9fs_cached_file_operations_dotl; 99 * to work.
99 100 */
101 fid = v9fs_writeback_fid(file->f_path.dentry);
102 if (IS_ERR(fid)) {
103 err = PTR_ERR(fid);
104 goto out_error;
105 }
106 v9inode->writeback_fid = (void *) fid;
107 }
100#ifdef CONFIG_9P_FSCACHE 108#ifdef CONFIG_9P_FSCACHE
109 if (v9ses->cache)
101 v9fs_cache_inode_set_cookie(inode, file); 110 v9fs_cache_inode_set_cookie(inode, file);
102#endif 111#endif
103 }
104
105 return 0; 112 return 0;
113out_error:
114 p9_client_clunk(file->private_data);
115 file->private_data = NULL;
116 return err;
106} 117}
107 118
108/** 119/**
@@ -335,25 +346,22 @@ out_err:
335} 346}
336 347
337/** 348/**
338 * v9fs_file_readn - read from a file 349 * v9fs_fid_readn - read from a fid
339 * @filp: file pointer to read 350 * @fid: fid to read
340 * @data: data buffer to read data into 351 * @data: data buffer to read data into
341 * @udata: user data buffer to read data into 352 * @udata: user data buffer to read data into
342 * @count: size of buffer 353 * @count: size of buffer
343 * @offset: offset at which to read data 354 * @offset: offset at which to read data
344 * 355 *
345 */ 356 */
346
347ssize_t 357ssize_t
348v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, 358v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
349 u64 offset) 359 u64 offset)
350{ 360{
351 int n, total, size; 361 int n, total, size;
352 struct p9_fid *fid = filp->private_data;
353 362
354 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, 363 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
355 (long long unsigned) offset, count); 364 (long long unsigned) offset, count);
356
357 n = 0; 365 n = 0;
358 total = 0; 366 total = 0;
359 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; 367 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -379,6 +387,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
379} 387}
380 388
381/** 389/**
390 * v9fs_file_readn - read from a file
391 * @filp: file pointer to read
392 * @data: data buffer to read data into
393 * @udata: user data buffer to read data into
394 * @count: size of buffer
395 * @offset: offset at which to read data
396 *
397 */
398ssize_t
399v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
400 u64 offset)
401{
402 return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
403}
404
405/**
382 * v9fs_file_read - read from a file 406 * v9fs_file_read - read from a file
383 * @filp: file pointer to read 407 * @filp: file pointer to read
384 * @udata: user data buffer to read data into 408 * @udata: user data buffer to read data into
@@ -410,45 +434,22 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
410 return ret; 434 return ret;
411} 435}
412 436
413/** 437ssize_t
414 * v9fs_file_write - write to a file 438v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
415 * @filp: file pointer to write 439 const char __user *data, size_t count,
416 * @data: data buffer to write data from 440 loff_t *offset, int invalidate)
417 * @count: size of buffer
418 * @offset: offset at which to write data
419 *
420 */
421
422static ssize_t
423v9fs_file_write(struct file *filp, const char __user * data,
424 size_t count, loff_t * offset)
425{ 441{
426 ssize_t retval;
427 size_t total = 0;
428 int n; 442 int n;
429 struct p9_fid *fid; 443 loff_t i_size;
444 size_t total = 0;
430 struct p9_client *clnt; 445 struct p9_client *clnt;
431 struct inode *inode = filp->f_path.dentry->d_inode;
432 loff_t origin = *offset; 446 loff_t origin = *offset;
433 unsigned long pg_start, pg_end; 447 unsigned long pg_start, pg_end;
434 448
435 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, 449 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
436 (int)count, (int)*offset); 450 (int)count, (int)*offset);
437 451
438 fid = filp->private_data;
439 clnt = fid->clnt; 452 clnt = fid->clnt;
440
441 retval = generic_write_checks(filp, &origin, &count, 0);
442 if (retval)
443 goto out;
444
445 retval = -EINVAL;
446 if ((ssize_t) count < 0)
447 goto out;
448 retval = 0;
449 if (!count)
450 goto out;
451
452 do { 453 do {
453 n = p9_client_write(fid, NULL, data+total, origin+total, count); 454 n = p9_client_write(fid, NULL, data+total, origin+total, count);
454 if (n <= 0) 455 if (n <= 0)
@@ -457,25 +458,60 @@ v9fs_file_write(struct file *filp, const char __user * data,
457 total += n; 458 total += n;
458 } while (count > 0); 459 } while (count > 0);
459 460
460 if (total > 0) { 461 if (invalidate && (total > 0)) {
461 pg_start = origin >> PAGE_CACHE_SHIFT; 462 pg_start = origin >> PAGE_CACHE_SHIFT;
462 pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT; 463 pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
463 if (inode->i_mapping && inode->i_mapping->nrpages) 464 if (inode->i_mapping && inode->i_mapping->nrpages)
464 invalidate_inode_pages2_range(inode->i_mapping, 465 invalidate_inode_pages2_range(inode->i_mapping,
465 pg_start, pg_end); 466 pg_start, pg_end);
466 *offset += total; 467 *offset += total;
467 i_size_write(inode, i_size_read(inode) + total); 468 i_size = i_size_read(inode);
468 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; 469 if (*offset > i_size) {
470 inode_add_bytes(inode, *offset - i_size);
471 i_size_write(inode, *offset);
472 }
469 } 473 }
470
471 if (n < 0) 474 if (n < 0)
472 retval = n; 475 return n;
473 else 476
474 retval = total; 477 return total;
478}
479
480/**
481 * v9fs_file_write - write to a file
482 * @filp: file pointer to write
483 * @data: data buffer to write data from
484 * @count: size of buffer
485 * @offset: offset at which to write data
486 *
487 */
488static ssize_t
489v9fs_file_write(struct file *filp, const char __user * data,
490 size_t count, loff_t *offset)
491{
492 ssize_t retval = 0;
493 loff_t origin = *offset;
494
495
496 retval = generic_write_checks(filp, &origin, &count, 0);
497 if (retval)
498 goto out;
499
500 retval = -EINVAL;
501 if ((ssize_t) count < 0)
502 goto out;
503 retval = 0;
504 if (!count)
505 goto out;
506
507 return v9fs_file_write_internal(filp->f_path.dentry->d_inode,
508 filp->private_data,
509 data, count, offset, 1);
475out: 510out:
476 return retval; 511 return retval;
477} 512}
478 513
514
479static int v9fs_file_fsync(struct file *filp, int datasync) 515static int v9fs_file_fsync(struct file *filp, int datasync)
480{ 516{
481 struct p9_fid *fid; 517 struct p9_fid *fid;
@@ -505,28 +541,182 @@ int v9fs_file_fsync_dotl(struct file *filp, int datasync)
505 return retval; 541 return retval;
506} 542}
507 543
508static const struct file_operations v9fs_cached_file_operations = { 544static int
545v9fs_file_mmap(struct file *file, struct vm_area_struct *vma)
546{
547 int retval;
548
549 retval = generic_file_mmap(file, vma);
550 if (!retval)
551 vma->vm_ops = &v9fs_file_vm_ops;
552
553 return retval;
554}
555
556static int
557v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
558{
559 struct v9fs_inode *v9inode;
560 struct page *page = vmf->page;
561 struct file *filp = vma->vm_file;
562 struct inode *inode = filp->f_path.dentry->d_inode;
563
564
565 P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n",
566 page, (unsigned long)filp->private_data);
567
568 v9inode = V9FS_I(inode);
569 /* make sure the cache has finished storing the page */
570 v9fs_fscache_wait_on_page_write(inode, page);
571 BUG_ON(!v9inode->writeback_fid);
572 lock_page(page);
573 if (page->mapping != inode->i_mapping)
574 goto out_unlock;
575
576 return VM_FAULT_LOCKED;
577out_unlock:
578 unlock_page(page);
579 return VM_FAULT_NOPAGE;
580}
581
582static ssize_t
583v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
584 loff_t *offsetp)
585{
586 loff_t size, offset;
587 struct inode *inode;
588 struct address_space *mapping;
589
590 offset = *offsetp;
591 mapping = filp->f_mapping;
592 inode = mapping->host;
593 if (!count)
594 return 0;
595 size = i_size_read(inode);
596 if (offset < size)
597 filemap_write_and_wait_range(mapping, offset,
598 offset + count - 1);
599
600 return v9fs_file_read(filp, udata, count, offsetp);
601}
602
603/**
604 * v9fs_cached_file_read - read from a file
605 * @filp: file pointer to read
606 * @udata: user data buffer to read data into
607 * @count: size of buffer
608 * @offset: offset at which to read data
609 *
610 */
611static ssize_t
612v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
613 loff_t *offset)
614{
615 if (filp->f_flags & O_DIRECT)
616 return v9fs_direct_read(filp, data, count, offset);
617 return do_sync_read(filp, data, count, offset);
618}
619
620static ssize_t
621v9fs_direct_write(struct file *filp, const char __user * data,
622 size_t count, loff_t *offsetp)
623{
624 loff_t offset;
625 ssize_t retval;
626 struct inode *inode;
627 struct address_space *mapping;
628
629 offset = *offsetp;
630 mapping = filp->f_mapping;
631 inode = mapping->host;
632 if (!count)
633 return 0;
634
635 mutex_lock(&inode->i_mutex);
636 retval = filemap_write_and_wait_range(mapping, offset,
637 offset + count - 1);
638 if (retval)
639 goto err_out;
640 /*
641 * After a write we want buffered reads to be sure to go to disk to get
642 * the new data. We invalidate clean cached page from the region we're
643 * about to write. We do this *before* the write so that if we fail
644 * here we fall back to buffered write
645 */
646 if (mapping->nrpages) {
647 pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
648 pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
649
650 retval = invalidate_inode_pages2_range(mapping,
651 pg_start, pg_end);
652 /*
653 * If a page can not be invalidated, fall back
654 * to buffered write.
655 */
656 if (retval) {
657 if (retval == -EBUSY)
658 goto buff_write;
659 goto err_out;
660 }
661 }
662 retval = v9fs_file_write(filp, data, count, offsetp);
663err_out:
664 mutex_unlock(&inode->i_mutex);
665 return retval;
666
667buff_write:
668 mutex_unlock(&inode->i_mutex);
669 return do_sync_write(filp, data, count, offsetp);
670}
671
672/**
673 * v9fs_cached_file_write - write to a file
674 * @filp: file pointer to write
675 * @data: data buffer to write data from
676 * @count: size of buffer
677 * @offset: offset at which to write data
678 *
679 */
680static ssize_t
681v9fs_cached_file_write(struct file *filp, const char __user * data,
682 size_t count, loff_t *offset)
683{
684
685 if (filp->f_flags & O_DIRECT)
686 return v9fs_direct_write(filp, data, count, offset);
687 return do_sync_write(filp, data, count, offset);
688}
689
690static const struct vm_operations_struct v9fs_file_vm_ops = {
691 .fault = filemap_fault,
692 .page_mkwrite = v9fs_vm_page_mkwrite,
693};
694
695
696const struct file_operations v9fs_cached_file_operations = {
509 .llseek = generic_file_llseek, 697 .llseek = generic_file_llseek,
510 .read = do_sync_read, 698 .read = v9fs_cached_file_read,
699 .write = v9fs_cached_file_write,
511 .aio_read = generic_file_aio_read, 700 .aio_read = generic_file_aio_read,
512 .write = v9fs_file_write, 701 .aio_write = generic_file_aio_write,
513 .open = v9fs_file_open, 702 .open = v9fs_file_open,
514 .release = v9fs_dir_release, 703 .release = v9fs_dir_release,
515 .lock = v9fs_file_lock, 704 .lock = v9fs_file_lock,
516 .mmap = generic_file_readonly_mmap, 705 .mmap = v9fs_file_mmap,
517 .fsync = v9fs_file_fsync, 706 .fsync = v9fs_file_fsync,
518}; 707};
519 708
520static const struct file_operations v9fs_cached_file_operations_dotl = { 709const struct file_operations v9fs_cached_file_operations_dotl = {
521 .llseek = generic_file_llseek, 710 .llseek = generic_file_llseek,
522 .read = do_sync_read, 711 .read = v9fs_cached_file_read,
712 .write = v9fs_cached_file_write,
523 .aio_read = generic_file_aio_read, 713 .aio_read = generic_file_aio_read,
524 .write = v9fs_file_write, 714 .aio_write = generic_file_aio_write,
525 .open = v9fs_file_open, 715 .open = v9fs_file_open,
526 .release = v9fs_dir_release, 716 .release = v9fs_dir_release,
527 .lock = v9fs_file_lock_dotl, 717 .lock = v9fs_file_lock_dotl,
528 .flock = v9fs_file_flock_dotl, 718 .flock = v9fs_file_flock_dotl,
529 .mmap = generic_file_readonly_mmap, 719 .mmap = v9fs_file_mmap,
530 .fsync = v9fs_file_fsync_dotl, 720 .fsync = v9fs_file_fsync_dotl,
531}; 721};
532 722
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b76a40bdf4c2..8a2c232f708a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -203,26 +203,25 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
203 wstat->extension = NULL; 203 wstat->extension = NULL;
204} 204}
205 205
206#ifdef CONFIG_9P_FSCACHE
207/** 206/**
208 * v9fs_alloc_inode - helper function to allocate an inode 207 * v9fs_alloc_inode - helper function to allocate an inode
209 * This callback is executed before setting up the inode so that we
210 * can associate a vcookie with each inode.
211 * 208 *
212 */ 209 */
213
214struct inode *v9fs_alloc_inode(struct super_block *sb) 210struct inode *v9fs_alloc_inode(struct super_block *sb)
215{ 211{
216 struct v9fs_cookie *vcookie; 212 struct v9fs_inode *v9inode;
217 vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache, 213 v9inode = (struct v9fs_inode *)kmem_cache_alloc(v9fs_inode_cache,
218 GFP_KERNEL); 214 GFP_KERNEL);
219 if (!vcookie) 215 if (!v9inode)
220 return NULL; 216 return NULL;
221 217#ifdef CONFIG_9P_FSCACHE
222 vcookie->fscache = NULL; 218 v9inode->fscache = NULL;
223 vcookie->qid = NULL; 219 v9inode->fscache_key = NULL;
224 spin_lock_init(&vcookie->lock); 220 spin_lock_init(&v9inode->fscache_lock);
225 return &vcookie->inode; 221#endif
222 v9inode->writeback_fid = NULL;
223 v9inode->cache_validity = 0;
224 return &v9inode->vfs_inode;
226} 225}
227 226
228/** 227/**
@@ -234,35 +233,18 @@ static void v9fs_i_callback(struct rcu_head *head)
234{ 233{
235 struct inode *inode = container_of(head, struct inode, i_rcu); 234 struct inode *inode = container_of(head, struct inode, i_rcu);
236 INIT_LIST_HEAD(&inode->i_dentry); 235 INIT_LIST_HEAD(&inode->i_dentry);
237 kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); 236 kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
238} 237}
239 238
240void v9fs_destroy_inode(struct inode *inode) 239void v9fs_destroy_inode(struct inode *inode)
241{ 240{
242 call_rcu(&inode->i_rcu, v9fs_i_callback); 241 call_rcu(&inode->i_rcu, v9fs_i_callback);
243} 242}
244#endif
245 243
246/** 244int v9fs_init_inode(struct v9fs_session_info *v9ses,
247 * v9fs_get_inode - helper function to setup an inode 245 struct inode *inode, int mode)
248 * @sb: superblock
249 * @mode: mode to setup inode with
250 *
251 */
252
253struct inode *v9fs_get_inode(struct super_block *sb, int mode)
254{ 246{
255 int err; 247 int err = 0;
256 struct inode *inode;
257 struct v9fs_session_info *v9ses = sb->s_fs_info;
258
259 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
260
261 inode = new_inode(sb);
262 if (!inode) {
263 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
264 return ERR_PTR(-ENOMEM);
265 }
266 248
267 inode_init_owner(inode, NULL, mode); 249 inode_init_owner(inode, NULL, mode);
268 inode->i_blocks = 0; 250 inode->i_blocks = 0;
@@ -292,14 +274,20 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
292 case S_IFREG: 274 case S_IFREG:
293 if (v9fs_proto_dotl(v9ses)) { 275 if (v9fs_proto_dotl(v9ses)) {
294 inode->i_op = &v9fs_file_inode_operations_dotl; 276 inode->i_op = &v9fs_file_inode_operations_dotl;
295 inode->i_fop = &v9fs_file_operations_dotl; 277 if (v9ses->cache)
278 inode->i_fop =
279 &v9fs_cached_file_operations_dotl;
280 else
281 inode->i_fop = &v9fs_file_operations_dotl;
296 } else { 282 } else {
297 inode->i_op = &v9fs_file_inode_operations; 283 inode->i_op = &v9fs_file_inode_operations;
298 inode->i_fop = &v9fs_file_operations; 284 if (v9ses->cache)
285 inode->i_fop = &v9fs_cached_file_operations;
286 else
287 inode->i_fop = &v9fs_file_operations;
299 } 288 }
300 289
301 break; 290 break;
302
303 case S_IFLNK: 291 case S_IFLNK:
304 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { 292 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) {
305 P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " 293 P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with "
@@ -335,12 +323,37 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
335 err = -EINVAL; 323 err = -EINVAL;
336 goto error; 324 goto error;
337 } 325 }
326error:
327 return err;
338 328
339 return inode; 329}
340 330
341error: 331/**
342 iput(inode); 332 * v9fs_get_inode - helper function to setup an inode
343 return ERR_PTR(err); 333 * @sb: superblock
334 * @mode: mode to setup inode with
335 *
336 */
337
338struct inode *v9fs_get_inode(struct super_block *sb, int mode)
339{
340 int err;
341 struct inode *inode;
342 struct v9fs_session_info *v9ses = sb->s_fs_info;
343
344 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
345
346 inode = new_inode(sb);
347 if (!inode) {
348 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
349 return ERR_PTR(-ENOMEM);
350 }
351 err = v9fs_init_inode(v9ses, inode, mode);
352 if (err) {
353 iput(inode);
354 return ERR_PTR(err);
355 }
356 return inode;
344} 357}
345 358
346/* 359/*
@@ -403,6 +416,8 @@ error:
403 */ 416 */
404void v9fs_evict_inode(struct inode *inode) 417void v9fs_evict_inode(struct inode *inode)
405{ 418{
419 struct v9fs_inode *v9inode = V9FS_I(inode);
420
406 truncate_inode_pages(inode->i_mapping, 0); 421 truncate_inode_pages(inode->i_mapping, 0);
407 end_writeback(inode); 422 end_writeback(inode);
408 filemap_fdatawrite(inode->i_mapping); 423 filemap_fdatawrite(inode->i_mapping);
@@ -410,41 +425,67 @@ void v9fs_evict_inode(struct inode *inode)
410#ifdef CONFIG_9P_FSCACHE 425#ifdef CONFIG_9P_FSCACHE
411 v9fs_cache_inode_put_cookie(inode); 426 v9fs_cache_inode_put_cookie(inode);
412#endif 427#endif
428 /* clunk the fid stashed in writeback_fid */
429 if (v9inode->writeback_fid) {
430 p9_client_clunk(v9inode->writeback_fid);
431 v9inode->writeback_fid = NULL;
432 }
413} 433}
414 434
415struct inode * 435static struct inode *v9fs_qid_iget(struct super_block *sb,
416v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid, 436 struct p9_qid *qid,
417 struct super_block *sb) 437 struct p9_wstat *st)
418{ 438{
419 int err, umode; 439 int retval, umode;
420 struct inode *ret = NULL; 440 unsigned long i_ino;
421 struct p9_wstat *st; 441 struct inode *inode;
422 442 struct v9fs_session_info *v9ses = sb->s_fs_info;
423 st = p9_client_stat(fid);
424 if (IS_ERR(st))
425 return ERR_CAST(st);
426 443
444 i_ino = v9fs_qid2ino(qid);
445 inode = iget_locked(sb, i_ino);
446 if (!inode)
447 return ERR_PTR(-ENOMEM);
448 if (!(inode->i_state & I_NEW))
449 return inode;
450 /*
451 * initialize the inode with the stat info
452 * FIXME!! we may need support for stale inodes
453 * later.
454 */
427 umode = p9mode2unixmode(v9ses, st->mode); 455 umode = p9mode2unixmode(v9ses, st->mode);
428 ret = v9fs_get_inode(sb, umode); 456 retval = v9fs_init_inode(v9ses, inode, umode);
429 if (IS_ERR(ret)) { 457 if (retval)
430 err = PTR_ERR(ret);
431 goto error; 458 goto error;
432 }
433
434 v9fs_stat2inode(st, ret, sb);
435 ret->i_ino = v9fs_qid2ino(&st->qid);
436 459
460 v9fs_stat2inode(st, inode, sb);
437#ifdef CONFIG_9P_FSCACHE 461#ifdef CONFIG_9P_FSCACHE
438 v9fs_vcookie_set_qid(ret, &st->qid); 462 v9fs_fscache_set_key(inode, &st->qid);
439 v9fs_cache_inode_get_cookie(ret); 463 v9fs_cache_inode_get_cookie(inode);
440#endif 464#endif
441 p9stat_free(st); 465 unlock_new_inode(inode);
442 kfree(st); 466 return inode;
443 return ret;
444error: 467error:
468 unlock_new_inode(inode);
469 iput(inode);
470 return ERR_PTR(retval);
471
472}
473
474struct inode *
475v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
476 struct super_block *sb)
477{
478 struct p9_wstat *st;
479 struct inode *inode = NULL;
480
481 st = p9_client_stat(fid);
482 if (IS_ERR(st))
483 return ERR_CAST(st);
484
485 inode = v9fs_qid_iget(sb, &st->qid, st);
445 p9stat_free(st); 486 p9stat_free(st);
446 kfree(st); 487 kfree(st);
447 return ERR_PTR(err); 488 return inode;
448} 489}
449 490
450/** 491/**
@@ -458,8 +499,8 @@ error:
458static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) 499static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
459{ 500{
460 int retval; 501 int retval;
461 struct inode *file_inode;
462 struct p9_fid *v9fid; 502 struct p9_fid *v9fid;
503 struct inode *file_inode;
463 504
464 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, 505 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
465 rmdir); 506 rmdir);
@@ -470,8 +511,20 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
470 return PTR_ERR(v9fid); 511 return PTR_ERR(v9fid);
471 512
472 retval = p9_client_remove(v9fid); 513 retval = p9_client_remove(v9fid);
473 if (!retval) 514 if (!retval) {
474 drop_nlink(file_inode); 515 /*
516 * directories on unlink should have zero
517 * link count
518 */
519 if (rmdir) {
520 clear_nlink(file_inode);
521 drop_nlink(dir);
522 } else
523 drop_nlink(file_inode);
524
525 v9fs_invalidate_inode_attr(file_inode);
526 v9fs_invalidate_inode_attr(dir);
527 }
475 return retval; 528 return retval;
476} 529}
477 530
@@ -531,7 +584,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
531 } 584 }
532 585
533 /* instantiate inode and assign the unopened fid to the dentry */ 586 /* instantiate inode and assign the unopened fid to the dentry */
534 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 587 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
535 if (IS_ERR(inode)) { 588 if (IS_ERR(inode)) {
536 err = PTR_ERR(inode); 589 err = PTR_ERR(inode);
537 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 590 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -570,9 +623,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
570 int err; 623 int err;
571 u32 perm; 624 u32 perm;
572 int flags; 625 int flags;
573 struct v9fs_session_info *v9ses;
574 struct p9_fid *fid;
575 struct file *filp; 626 struct file *filp;
627 struct v9fs_inode *v9inode;
628 struct v9fs_session_info *v9ses;
629 struct p9_fid *fid, *inode_fid;
576 630
577 err = 0; 631 err = 0;
578 fid = NULL; 632 fid = NULL;
@@ -592,8 +646,25 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
592 goto error; 646 goto error;
593 } 647 }
594 648
649 v9fs_invalidate_inode_attr(dir);
595 /* if we are opening a file, assign the open fid to the file */ 650 /* if we are opening a file, assign the open fid to the file */
596 if (nd && nd->flags & LOOKUP_OPEN) { 651 if (nd && nd->flags & LOOKUP_OPEN) {
652 v9inode = V9FS_I(dentry->d_inode);
653 if (v9ses->cache && !v9inode->writeback_fid) {
654 /*
655 * clone a fid and add it to writeback_fid
656 * we do it during open time instead of
657 * page dirty time via write_begin/page_mkwrite
658 * because we want write after unlink usecase
659 * to work.
660 */
661 inode_fid = v9fs_writeback_fid(dentry);
662 if (IS_ERR(inode_fid)) {
663 err = PTR_ERR(inode_fid);
664 goto error;
665 }
666 v9inode->writeback_fid = (void *) inode_fid;
667 }
597 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 668 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
598 if (IS_ERR(filp)) { 669 if (IS_ERR(filp)) {
599 err = PTR_ERR(filp); 670 err = PTR_ERR(filp);
@@ -601,6 +672,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
601 } 672 }
602 673
603 filp->private_data = fid; 674 filp->private_data = fid;
675#ifdef CONFIG_9P_FSCACHE
676 if (v9ses->cache)
677 v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
678#endif
604 } else 679 } else
605 p9_client_clunk(fid); 680 p9_client_clunk(fid);
606 681
@@ -625,8 +700,8 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
625{ 700{
626 int err; 701 int err;
627 u32 perm; 702 u32 perm;
628 struct v9fs_session_info *v9ses;
629 struct p9_fid *fid; 703 struct p9_fid *fid;
704 struct v9fs_session_info *v9ses;
630 705
631 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); 706 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
632 err = 0; 707 err = 0;
@@ -636,6 +711,9 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
636 if (IS_ERR(fid)) { 711 if (IS_ERR(fid)) {
637 err = PTR_ERR(fid); 712 err = PTR_ERR(fid);
638 fid = NULL; 713 fid = NULL;
714 } else {
715 inc_nlink(dir);
716 v9fs_invalidate_inode_attr(dir);
639 } 717 }
640 718
641 if (fid) 719 if (fid)
@@ -687,7 +765,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
687 return ERR_PTR(result); 765 return ERR_PTR(result);
688 } 766 }
689 767
690 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 768 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
691 if (IS_ERR(inode)) { 769 if (IS_ERR(inode)) {
692 result = PTR_ERR(inode); 770 result = PTR_ERR(inode);
693 inode = NULL; 771 inode = NULL;
@@ -747,17 +825,19 @@ int
747v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 825v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
748 struct inode *new_dir, struct dentry *new_dentry) 826 struct inode *new_dir, struct dentry *new_dentry)
749{ 827{
828 int retval;
750 struct inode *old_inode; 829 struct inode *old_inode;
830 struct inode *new_inode;
751 struct v9fs_session_info *v9ses; 831 struct v9fs_session_info *v9ses;
752 struct p9_fid *oldfid; 832 struct p9_fid *oldfid;
753 struct p9_fid *olddirfid; 833 struct p9_fid *olddirfid;
754 struct p9_fid *newdirfid; 834 struct p9_fid *newdirfid;
755 struct p9_wstat wstat; 835 struct p9_wstat wstat;
756 int retval;
757 836
758 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 837 P9_DPRINTK(P9_DEBUG_VFS, "\n");
759 retval = 0; 838 retval = 0;
760 old_inode = old_dentry->d_inode; 839 old_inode = old_dentry->d_inode;
840 new_inode = new_dentry->d_inode;
761 v9ses = v9fs_inode2v9ses(old_inode); 841 v9ses = v9fs_inode2v9ses(old_inode);
762 oldfid = v9fs_fid_lookup(old_dentry); 842 oldfid = v9fs_fid_lookup(old_dentry);
763 if (IS_ERR(oldfid)) 843 if (IS_ERR(oldfid))
@@ -798,9 +878,30 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
798 retval = p9_client_wstat(oldfid, &wstat); 878 retval = p9_client_wstat(oldfid, &wstat);
799 879
800clunk_newdir: 880clunk_newdir:
801 if (!retval) 881 if (!retval) {
882 if (new_inode) {
883 if (S_ISDIR(new_inode->i_mode))
884 clear_nlink(new_inode);
885 else
886 drop_nlink(new_inode);
887 /*
888 * Work around vfs rename rehash bug with
889 * FS_RENAME_DOES_D_MOVE
890 */
891 v9fs_invalidate_inode_attr(new_inode);
892 }
893 if (S_ISDIR(old_inode->i_mode)) {
894 if (!new_inode)
895 inc_nlink(new_dir);
896 drop_nlink(old_dir);
897 }
898 v9fs_invalidate_inode_attr(old_inode);
899 v9fs_invalidate_inode_attr(old_dir);
900 v9fs_invalidate_inode_attr(new_dir);
901
802 /* successful rename */ 902 /* successful rename */
803 d_move(old_dentry, new_dentry); 903 d_move(old_dentry, new_dentry);
904 }
804 up_write(&v9ses->rename_sem); 905 up_write(&v9ses->rename_sem);
805 p9_client_clunk(newdirfid); 906 p9_client_clunk(newdirfid);
806 907
@@ -831,9 +932,10 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
831 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 932 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
832 err = -EPERM; 933 err = -EPERM;
833 v9ses = v9fs_inode2v9ses(dentry->d_inode); 934 v9ses = v9fs_inode2v9ses(dentry->d_inode);
834 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 935 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
835 return simple_getattr(mnt, dentry, stat); 936 generic_fillattr(dentry->d_inode, stat);
836 937 return 0;
938 }
837 fid = v9fs_fid_lookup(dentry); 939 fid = v9fs_fid_lookup(dentry);
838 if (IS_ERR(fid)) 940 if (IS_ERR(fid))
839 return PTR_ERR(fid); 941 return PTR_ERR(fid);
@@ -891,17 +993,20 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
891 if (iattr->ia_valid & ATTR_GID) 993 if (iattr->ia_valid & ATTR_GID)
892 wstat.n_gid = iattr->ia_gid; 994 wstat.n_gid = iattr->ia_gid;
893 } 995 }
894
895 retval = p9_client_wstat(fid, &wstat);
896 if (retval < 0)
897 return retval;
898
899 if ((iattr->ia_valid & ATTR_SIZE) && 996 if ((iattr->ia_valid & ATTR_SIZE) &&
900 iattr->ia_size != i_size_read(dentry->d_inode)) { 997 iattr->ia_size != i_size_read(dentry->d_inode)) {
901 retval = vmtruncate(dentry->d_inode, iattr->ia_size); 998 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
902 if (retval) 999 if (retval)
903 return retval; 1000 return retval;
904 } 1001 }
1002 /* Write all dirty data */
1003 if (S_ISREG(dentry->d_inode->i_mode))
1004 filemap_write_and_wait(dentry->d_inode->i_mapping);
1005
1006 retval = p9_client_wstat(fid, &wstat);
1007 if (retval < 0)
1008 return retval;
1009 v9fs_invalidate_inode_attr(dentry->d_inode);
905 1010
906 setattr_copy(dentry->d_inode, iattr); 1011 setattr_copy(dentry->d_inode, iattr);
907 mark_inode_dirty(dentry->d_inode); 1012 mark_inode_dirty(dentry->d_inode);
@@ -924,6 +1029,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
924 char tag_name[14]; 1029 char tag_name[14];
925 unsigned int i_nlink; 1030 unsigned int i_nlink;
926 struct v9fs_session_info *v9ses = sb->s_fs_info; 1031 struct v9fs_session_info *v9ses = sb->s_fs_info;
1032 struct v9fs_inode *v9inode = V9FS_I(inode);
927 1033
928 inode->i_nlink = 1; 1034 inode->i_nlink = 1;
929 1035
@@ -983,6 +1089,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
983 1089
984 /* not real number of blocks, but 512 byte ones ... */ 1090 /* not real number of blocks, but 512 byte ones ... */
985 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; 1091 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
1092 v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
986} 1093}
987 1094
988/** 1095/**
@@ -1115,8 +1222,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1115 int mode, const char *extension) 1222 int mode, const char *extension)
1116{ 1223{
1117 u32 perm; 1224 u32 perm;
1118 struct v9fs_session_info *v9ses;
1119 struct p9_fid *fid; 1225 struct p9_fid *fid;
1226 struct v9fs_session_info *v9ses;
1120 1227
1121 v9ses = v9fs_inode2v9ses(dir); 1228 v9ses = v9fs_inode2v9ses(dir);
1122 if (!v9fs_proto_dotu(v9ses)) { 1229 if (!v9fs_proto_dotu(v9ses)) {
@@ -1130,6 +1237,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1130 if (IS_ERR(fid)) 1237 if (IS_ERR(fid))
1131 return PTR_ERR(fid); 1238 return PTR_ERR(fid);
1132 1239
1240 v9fs_invalidate_inode_attr(dir);
1133 p9_client_clunk(fid); 1241 p9_client_clunk(fid);
1134 return 0; 1242 return 0;
1135} 1243}
@@ -1166,8 +1274,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1166 struct dentry *dentry) 1274 struct dentry *dentry)
1167{ 1275{
1168 int retval; 1276 int retval;
1169 struct p9_fid *oldfid;
1170 char *name; 1277 char *name;
1278 struct p9_fid *oldfid;
1171 1279
1172 P9_DPRINTK(P9_DEBUG_VFS, 1280 P9_DPRINTK(P9_DEBUG_VFS,
1173 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1281 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
@@ -1186,7 +1294,10 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1186 sprintf(name, "%d\n", oldfid->fid); 1294 sprintf(name, "%d\n", oldfid->fid);
1187 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); 1295 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
1188 __putname(name); 1296 __putname(name);
1189 1297 if (!retval) {
1298 v9fs_refresh_inode(oldfid, old_dentry->d_inode);
1299 v9fs_invalidate_inode_attr(dir);
1300 }
1190clunk_fid: 1301clunk_fid:
1191 p9_client_clunk(oldfid); 1302 p9_client_clunk(oldfid);
1192 return retval; 1303 return retval;
@@ -1237,6 +1348,32 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1237 return retval; 1348 return retval;
1238} 1349}
1239 1350
1351int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
1352{
1353 loff_t i_size;
1354 struct p9_wstat *st;
1355 struct v9fs_session_info *v9ses;
1356
1357 v9ses = v9fs_inode2v9ses(inode);
1358 st = p9_client_stat(fid);
1359 if (IS_ERR(st))
1360 return PTR_ERR(st);
1361
1362 spin_lock(&inode->i_lock);
1363 /*
1364 * We don't want to refresh inode->i_size,
1365 * because we may have cached data
1366 */
1367 i_size = inode->i_size;
1368 v9fs_stat2inode(st, inode, inode->i_sb);
1369 if (v9ses->cache)
1370 inode->i_size = i_size;
1371 spin_unlock(&inode->i_lock);
1372 p9stat_free(st);
1373 kfree(st);
1374 return 0;
1375}
1376
1240static const struct inode_operations v9fs_dir_inode_operations_dotu = { 1377static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1241 .create = v9fs_vfs_create, 1378 .create = v9fs_vfs_create,
1242 .lookup = v9fs_vfs_lookup, 1379 .lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index fe3ffa9aace4..67c138e94feb 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,40 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
86 return dentry; 86 return dentry;
87} 87}
88 88
89static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
90 struct p9_qid *qid,
91 struct p9_fid *fid,
92 struct p9_stat_dotl *st)
93{
94 int retval;
95 unsigned long i_ino;
96 struct inode *inode;
97 struct v9fs_session_info *v9ses = sb->s_fs_info;
98
99 i_ino = v9fs_qid2ino(qid);
100 inode = iget_locked(sb, i_ino);
101 if (!inode)
102 return ERR_PTR(-ENOMEM);
103 if (!(inode->i_state & I_NEW))
104 return inode;
105 /*
106 * initialize the inode with the stat info
107 * FIXME!! we may need support for stale inodes
108 * later.
109 */
110 retval = v9fs_init_inode(v9ses, inode, st->st_mode);
111 if (retval)
112 goto error;
113
114 v9fs_stat2inode_dotl(st, inode);
115#ifdef CONFIG_9P_FSCACHE
116 v9fs_fscache_set_key(inode, &st->qid);
117 v9fs_cache_inode_get_cookie(inode);
118#endif
119 retval = v9fs_get_acl(inode, fid);
120 if (retval)
121 goto error;
122
123 unlock_new_inode(inode);
124 return inode;
125error:
126 unlock_new_inode(inode);
127 iput(inode);
128 return ERR_PTR(retval);
129
130}
131
89struct inode * 132struct inode *
90v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, 133v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
91 struct super_block *sb) 134 struct super_block *sb)
92{ 135{
93 struct inode *ret = NULL;
94 int err;
95 struct p9_stat_dotl *st; 136 struct p9_stat_dotl *st;
137 struct inode *inode = NULL;
96 138
97 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 139 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
98 if (IS_ERR(st)) 140 if (IS_ERR(st))
99 return ERR_CAST(st); 141 return ERR_CAST(st);
100 142
101 ret = v9fs_get_inode(sb, st->st_mode); 143 inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st);
102 if (IS_ERR(ret)) {
103 err = PTR_ERR(ret);
104 goto error;
105 }
106
107 v9fs_stat2inode_dotl(st, ret);
108 ret->i_ino = v9fs_qid2ino(&st->qid);
109#ifdef CONFIG_9P_FSCACHE
110 v9fs_vcookie_set_qid(ret, &st->qid);
111 v9fs_cache_inode_get_cookie(ret);
112#endif
113 err = v9fs_get_acl(ret, fid);
114 if (err) {
115 iput(ret);
116 goto error;
117 }
118 kfree(st);
119 return ret;
120error:
121 kfree(st); 144 kfree(st);
122 return ERR_PTR(err); 145 return inode;
123} 146}
124 147
125/** 148/**
@@ -136,16 +159,17 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
136 struct nameidata *nd) 159 struct nameidata *nd)
137{ 160{
138 int err = 0; 161 int err = 0;
139 char *name = NULL;
140 gid_t gid; 162 gid_t gid;
141 int flags; 163 int flags;
142 mode_t mode; 164 mode_t mode;
143 struct v9fs_session_info *v9ses; 165 char *name = NULL;
144 struct p9_fid *fid = NULL;
145 struct p9_fid *dfid, *ofid;
146 struct file *filp; 166 struct file *filp;
147 struct p9_qid qid; 167 struct p9_qid qid;
148 struct inode *inode; 168 struct inode *inode;
169 struct p9_fid *fid = NULL;
170 struct v9fs_inode *v9inode;
171 struct p9_fid *dfid, *ofid, *inode_fid;
172 struct v9fs_session_info *v9ses;
149 struct posix_acl *pacl = NULL, *dacl = NULL; 173 struct posix_acl *pacl = NULL, *dacl = NULL;
150 174
151 v9ses = v9fs_inode2v9ses(dir); 175 v9ses = v9fs_inode2v9ses(dir);
@@ -196,6 +220,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
196 err); 220 err);
197 goto error; 221 goto error;
198 } 222 }
223 v9fs_invalidate_inode_attr(dir);
199 224
200 /* instantiate inode and assign the unopened fid to the dentry */ 225 /* instantiate inode and assign the unopened fid to the dentry */
201 fid = p9_client_walk(dfid, 1, &name, 1); 226 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -205,7 +230,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
205 fid = NULL; 230 fid = NULL;
206 goto error; 231 goto error;
207 } 232 }
208 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 233 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
209 if (IS_ERR(inode)) { 234 if (IS_ERR(inode)) {
210 err = PTR_ERR(inode); 235 err = PTR_ERR(inode);
211 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 236 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -219,6 +244,22 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
219 /* Now set the ACL based on the default value */ 244 /* Now set the ACL based on the default value */
220 v9fs_set_create_acl(dentry, dacl, pacl); 245 v9fs_set_create_acl(dentry, dacl, pacl);
221 246
247 v9inode = V9FS_I(inode);
248 if (v9ses->cache && !v9inode->writeback_fid) {
249 /*
250 * clone a fid and add it to writeback_fid
251 * we do it during open time instead of
252 * page dirty time via write_begin/page_mkwrite
253 * because we want write after unlink usecase
254 * to work.
255 */
256 inode_fid = v9fs_writeback_fid(dentry);
257 if (IS_ERR(inode_fid)) {
258 err = PTR_ERR(inode_fid);
259 goto error;
260 }
261 v9inode->writeback_fid = (void *) inode_fid;
262 }
222 /* Since we are opening a file, assign the open fid to the file */ 263 /* Since we are opening a file, assign the open fid to the file */
223 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 264 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
224 if (IS_ERR(filp)) { 265 if (IS_ERR(filp)) {
@@ -226,6 +267,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
226 return PTR_ERR(filp); 267 return PTR_ERR(filp);
227 } 268 }
228 filp->private_data = ofid; 269 filp->private_data = ofid;
270#ifdef CONFIG_9P_FSCACHE
271 if (v9ses->cache)
272 v9fs_cache_inode_set_cookie(inode, filp);
273#endif
229 return 0; 274 return 0;
230 275
231error: 276error:
@@ -300,7 +345,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
300 goto error; 345 goto error;
301 } 346 }
302 347
303 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 348 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
304 if (IS_ERR(inode)) { 349 if (IS_ERR(inode)) {
305 err = PTR_ERR(inode); 350 err = PTR_ERR(inode);
306 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 351 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -327,7 +372,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
327 } 372 }
328 /* Now set the ACL based on the default value */ 373 /* Now set the ACL based on the default value */
329 v9fs_set_create_acl(dentry, dacl, pacl); 374 v9fs_set_create_acl(dentry, dacl, pacl);
330 375 inc_nlink(dir);
376 v9fs_invalidate_inode_attr(dir);
331error: 377error:
332 if (fid) 378 if (fid)
333 p9_client_clunk(fid); 379 p9_client_clunk(fid);
@@ -346,9 +392,10 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
346 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 392 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
347 err = -EPERM; 393 err = -EPERM;
348 v9ses = v9fs_inode2v9ses(dentry->d_inode); 394 v9ses = v9fs_inode2v9ses(dentry->d_inode);
349 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 395 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
350 return simple_getattr(mnt, dentry, stat); 396 generic_fillattr(dentry->d_inode, stat);
351 397 return 0;
398 }
352 fid = v9fs_fid_lookup(dentry); 399 fid = v9fs_fid_lookup(dentry);
353 if (IS_ERR(fid)) 400 if (IS_ERR(fid))
354 return PTR_ERR(fid); 401 return PTR_ERR(fid);
@@ -406,16 +453,20 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
406 if (IS_ERR(fid)) 453 if (IS_ERR(fid))
407 return PTR_ERR(fid); 454 return PTR_ERR(fid);
408 455
409 retval = p9_client_setattr(fid, &p9attr);
410 if (retval < 0)
411 return retval;
412
413 if ((iattr->ia_valid & ATTR_SIZE) && 456 if ((iattr->ia_valid & ATTR_SIZE) &&
414 iattr->ia_size != i_size_read(dentry->d_inode)) { 457 iattr->ia_size != i_size_read(dentry->d_inode)) {
415 retval = vmtruncate(dentry->d_inode, iattr->ia_size); 458 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
416 if (retval) 459 if (retval)
417 return retval; 460 return retval;
418 } 461 }
462 /* Write all dirty data */
463 if (S_ISREG(dentry->d_inode->i_mode))
464 filemap_write_and_wait(dentry->d_inode->i_mapping);
465
466 retval = p9_client_setattr(fid, &p9attr);
467 if (retval < 0)
468 return retval;
469 v9fs_invalidate_inode_attr(dentry->d_inode);
419 470
420 setattr_copy(dentry->d_inode, iattr); 471 setattr_copy(dentry->d_inode, iattr);
421 mark_inode_dirty(dentry->d_inode); 472 mark_inode_dirty(dentry->d_inode);
@@ -439,6 +490,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
439void 490void
440v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) 491v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
441{ 492{
493 struct v9fs_inode *v9inode = V9FS_I(inode);
442 494
443 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { 495 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
444 inode->i_atime.tv_sec = stat->st_atime_sec; 496 inode->i_atime.tv_sec = stat->st_atime_sec;
@@ -497,20 +549,21 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
497 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION 549 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
498 * because the inode structure does not have fields for them. 550 * because the inode structure does not have fields for them.
499 */ 551 */
552 v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
500} 553}
501 554
502static int 555static int
503v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, 556v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
504 const char *symname) 557 const char *symname)
505{ 558{
506 struct v9fs_session_info *v9ses;
507 struct p9_fid *dfid;
508 struct p9_fid *fid = NULL;
509 struct inode *inode;
510 struct p9_qid qid;
511 char *name;
512 int err; 559 int err;
513 gid_t gid; 560 gid_t gid;
561 char *name;
562 struct p9_qid qid;
563 struct inode *inode;
564 struct p9_fid *dfid;
565 struct p9_fid *fid = NULL;
566 struct v9fs_session_info *v9ses;
514 567
515 name = (char *) dentry->d_name.name; 568 name = (char *) dentry->d_name.name;
516 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", 569 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
@@ -534,6 +587,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
534 goto error; 587 goto error;
535 } 588 }
536 589
590 v9fs_invalidate_inode_attr(dir);
537 if (v9ses->cache) { 591 if (v9ses->cache) {
538 /* Now walk from the parent so we can get an unopened fid. */ 592 /* Now walk from the parent so we can get an unopened fid. */
539 fid = p9_client_walk(dfid, 1, &name, 1); 593 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -546,7 +600,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
546 } 600 }
547 601
548 /* instantiate inode and assign the unopened fid to dentry */ 602 /* instantiate inode and assign the unopened fid to dentry */
549 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 603 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
550 if (IS_ERR(inode)) { 604 if (IS_ERR(inode)) {
551 err = PTR_ERR(inode); 605 err = PTR_ERR(inode);
552 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 606 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -588,10 +642,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
588 struct dentry *dentry) 642 struct dentry *dentry)
589{ 643{
590 int err; 644 int err;
591 struct p9_fid *dfid, *oldfid;
592 char *name; 645 char *name;
593 struct v9fs_session_info *v9ses;
594 struct dentry *dir_dentry; 646 struct dentry *dir_dentry;
647 struct p9_fid *dfid, *oldfid;
648 struct v9fs_session_info *v9ses;
595 649
596 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", 650 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
597 dir->i_ino, old_dentry->d_name.name, 651 dir->i_ino, old_dentry->d_name.name,
@@ -616,29 +670,17 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
616 return err; 670 return err;
617 } 671 }
618 672
673 v9fs_invalidate_inode_attr(dir);
619 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 674 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
620 /* Get the latest stat info from server. */ 675 /* Get the latest stat info from server. */
621 struct p9_fid *fid; 676 struct p9_fid *fid;
622 struct p9_stat_dotl *st;
623
624 fid = v9fs_fid_lookup(old_dentry); 677 fid = v9fs_fid_lookup(old_dentry);
625 if (IS_ERR(fid)) 678 if (IS_ERR(fid))
626 return PTR_ERR(fid); 679 return PTR_ERR(fid);
627 680
628 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 681 v9fs_refresh_inode_dotl(fid, old_dentry->d_inode);
629 if (IS_ERR(st))
630 return PTR_ERR(st);
631
632 v9fs_stat2inode_dotl(st, old_dentry->d_inode);
633
634 kfree(st);
635 } else {
636 /* Caching disabled. No need to get upto date stat info.
637 * This dentry will be released immediately. So, just hold the
638 * inode
639 */
640 ihold(old_dentry->d_inode);
641 } 682 }
683 ihold(old_dentry->d_inode);
642 d_instantiate(dentry, old_dentry->d_inode); 684 d_instantiate(dentry, old_dentry->d_inode);
643 685
644 return err; 686 return err;
@@ -657,12 +699,12 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
657 dev_t rdev) 699 dev_t rdev)
658{ 700{
659 int err; 701 int err;
702 gid_t gid;
660 char *name; 703 char *name;
661 mode_t mode; 704 mode_t mode;
662 struct v9fs_session_info *v9ses; 705 struct v9fs_session_info *v9ses;
663 struct p9_fid *fid = NULL, *dfid = NULL; 706 struct p9_fid *fid = NULL, *dfid = NULL;
664 struct inode *inode; 707 struct inode *inode;
665 gid_t gid;
666 struct p9_qid qid; 708 struct p9_qid qid;
667 struct dentry *dir_dentry; 709 struct dentry *dir_dentry;
668 struct posix_acl *dacl = NULL, *pacl = NULL; 710 struct posix_acl *dacl = NULL, *pacl = NULL;
@@ -699,6 +741,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
699 if (err < 0) 741 if (err < 0)
700 goto error; 742 goto error;
701 743
744 v9fs_invalidate_inode_attr(dir);
702 /* instantiate inode and assign the unopened fid to the dentry */ 745 /* instantiate inode and assign the unopened fid to the dentry */
703 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 746 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
704 fid = p9_client_walk(dfid, 1, &name, 1); 747 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -710,7 +753,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
710 goto error; 753 goto error;
711 } 754 }
712 755
713 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 756 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
714 if (IS_ERR(inode)) { 757 if (IS_ERR(inode)) {
715 err = PTR_ERR(inode); 758 err = PTR_ERR(inode);
716 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 759 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -782,6 +825,31 @@ ndset:
782 return NULL; 825 return NULL;
783} 826}
784 827
828int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
829{
830 loff_t i_size;
831 struct p9_stat_dotl *st;
832 struct v9fs_session_info *v9ses;
833
834 v9ses = v9fs_inode2v9ses(inode);
835 st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
836 if (IS_ERR(st))
837 return PTR_ERR(st);
838
839 spin_lock(&inode->i_lock);
840 /*
841 * We don't want to refresh inode->i_size,
842 * because we may have cached data
843 */
844 i_size = inode->i_size;
845 v9fs_stat2inode_dotl(st, inode);
846 if (v9ses->cache)
847 inode->i_size = i_size;
848 spin_unlock(&inode->i_lock);
849 kfree(st);
850 return 0;
851}
852
785const struct inode_operations v9fs_dir_inode_operations_dotl = { 853const struct inode_operations v9fs_dir_inode_operations_dotl = {
786 .create = v9fs_vfs_create_dotl, 854 .create = v9fs_vfs_create_dotl,
787 .lookup = v9fs_vfs_lookup, 855 .lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index dbaabe3b8131..09fd08d1606f 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -86,12 +86,15 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
86 } else 86 } else
87 sb->s_op = &v9fs_super_ops; 87 sb->s_op = &v9fs_super_ops;
88 sb->s_bdi = &v9ses->bdi; 88 sb->s_bdi = &v9ses->bdi;
89 if (v9ses->cache)
90 sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
89 91
90 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 92 sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
91 MS_NOATIME; 93 if (!v9ses->cache)
94 sb->s_flags |= MS_SYNCHRONOUS;
92 95
93#ifdef CONFIG_9P_FS_POSIX_ACL 96#ifdef CONFIG_9P_FS_POSIX_ACL
94 if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT) 97 if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL)
95 sb->s_flags |= MS_POSIXACL; 98 sb->s_flags |= MS_POSIXACL;
96#endif 99#endif
97 100
@@ -151,7 +154,6 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
151 retval = PTR_ERR(inode); 154 retval = PTR_ERR(inode);
152 goto release_sb; 155 goto release_sb;
153 } 156 }
154
155 root = d_alloc_root(inode); 157 root = d_alloc_root(inode);
156 if (!root) { 158 if (!root) {
157 iput(inode); 159 iput(inode);
@@ -166,7 +168,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
166 retval = PTR_ERR(st); 168 retval = PTR_ERR(st);
167 goto release_sb; 169 goto release_sb;
168 } 170 }
169 171 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
170 v9fs_stat2inode_dotl(st, root->d_inode); 172 v9fs_stat2inode_dotl(st, root->d_inode);
171 kfree(st); 173 kfree(st);
172 } else { 174 } else {
@@ -183,10 +185,21 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
183 p9stat_free(st); 185 p9stat_free(st);
184 kfree(st); 186 kfree(st);
185 } 187 }
188 v9fs_fid_add(root, fid);
186 retval = v9fs_get_acl(inode, fid); 189 retval = v9fs_get_acl(inode, fid);
187 if (retval) 190 if (retval)
188 goto release_sb; 191 goto release_sb;
189 v9fs_fid_add(root, fid); 192 /*
193 * Add the root fid to session info. This is used
194 * for file system sync. We want a cloned fid here
195 * so that we can do a sync_filesystem after a
196 * shrink_dcache_for_umount
197 */
198 v9ses->root_fid = v9fs_fid_clone(root);
199 if (IS_ERR(v9ses->root_fid)) {
200 retval = PTR_ERR(v9ses->root_fid);
201 goto release_sb;
202 }
190 203
191 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 204 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
192 return dget(sb->s_root); 205 return dget(sb->s_root);
@@ -197,15 +210,11 @@ close_session:
197 v9fs_session_close(v9ses); 210 v9fs_session_close(v9ses);
198 kfree(v9ses); 211 kfree(v9ses);
199 return ERR_PTR(retval); 212 return ERR_PTR(retval);
200
201release_sb: 213release_sb:
202 /* 214 /*
203 * we will do the session_close and root dentry release 215 * we will do the session_close and root dentry
204 * in the below call. But we need to clunk fid, because we haven't 216 * release in the below call.
205 * attached the fid to dentry so it won't get clunked
206 * automatically.
207 */ 217 */
208 p9_client_clunk(fid);
209 deactivate_locked_super(sb); 218 deactivate_locked_super(sb);
210 return ERR_PTR(retval); 219 return ERR_PTR(retval);
211} 220}
@@ -223,7 +232,7 @@ static void v9fs_kill_super(struct super_block *s)
223 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 232 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
224 233
225 kill_anon_super(s); 234 kill_anon_super(s);
226 235 p9_client_clunk(v9ses->root_fid);
227 v9fs_session_cancel(v9ses); 236 v9fs_session_cancel(v9ses);
228 v9fs_session_close(v9ses); 237 v9fs_session_close(v9ses);
229 kfree(v9ses); 238 kfree(v9ses);
@@ -276,11 +285,31 @@ done:
276 return res; 285 return res;
277} 286}
278 287
288static int v9fs_sync_fs(struct super_block *sb, int wait)
289{
290 struct v9fs_session_info *v9ses = sb->s_fs_info;
291
292 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
293 return p9_client_sync_fs(v9ses->root_fid);
294}
295
296static int v9fs_drop_inode(struct inode *inode)
297{
298 struct v9fs_session_info *v9ses;
299 v9ses = v9fs_inode2v9ses(inode);
300 if (v9ses->cache)
301 return generic_drop_inode(inode);
302 /*
303 * in case of non cached mode always drop the
304 * the inode because we want the inode attribute
305 * to always match that on the server.
306 */
307 return 1;
308}
309
279static const struct super_operations v9fs_super_ops = { 310static const struct super_operations v9fs_super_ops = {
280#ifdef CONFIG_9P_FSCACHE
281 .alloc_inode = v9fs_alloc_inode, 311 .alloc_inode = v9fs_alloc_inode,
282 .destroy_inode = v9fs_destroy_inode, 312 .destroy_inode = v9fs_destroy_inode,
283#endif
284 .statfs = simple_statfs, 313 .statfs = simple_statfs,
285 .evict_inode = v9fs_evict_inode, 314 .evict_inode = v9fs_evict_inode,
286 .show_options = generic_show_options, 315 .show_options = generic_show_options,
@@ -288,11 +317,11 @@ static const struct super_operations v9fs_super_ops = {
288}; 317};
289 318
290static const struct super_operations v9fs_super_ops_dotl = { 319static const struct super_operations v9fs_super_ops_dotl = {
291#ifdef CONFIG_9P_FSCACHE
292 .alloc_inode = v9fs_alloc_inode, 320 .alloc_inode = v9fs_alloc_inode,
293 .destroy_inode = v9fs_destroy_inode, 321 .destroy_inode = v9fs_destroy_inode,
294#endif 322 .sync_fs = v9fs_sync_fs,
295 .statfs = v9fs_statfs, 323 .statfs = v9fs_statfs,
324 .drop_inode = v9fs_drop_inode,
296 .evict_inode = v9fs_evict_inode, 325 .evict_inode = v9fs_evict_inode,
297 .show_options = generic_show_options, 326 .show_options = generic_show_options,
298 .umount_begin = v9fs_umount_begin, 327 .umount_begin = v9fs_umount_begin,
@@ -303,5 +332,5 @@ struct file_system_type v9fs_fs_type = {
303 .mount = v9fs_mount, 332 .mount = v9fs_mount,
304 .kill_sb = v9fs_kill_super, 333 .kill_sb = v9fs_kill_super,
305 .owner = THIS_MODULE, 334 .owner = THIS_MODULE,
306 .fs_flags = FS_RENAME_DOES_D_MOVE, 335 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT,
307}; 336};
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57edc..7cb53aafac1e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
47 def_bool n 47 def_bool n
48 48
49config EXPORTFS 49config EXPORTFS
50 tristate 50 bool
51 51
52config FILE_LOCKING 52config FILE_LOCKING
53 bool "Enable POSIX file locking API" if EXPERT 53 bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c8..ba01202844c5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
48obj-$(CONFIG_NFS_COMMON) += nfs_common/ 48obj-$(CONFIG_NFS_COMMON) += nfs_common/
49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o 49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
50 50
51obj-$(CONFIG_FHANDLE) += fhandle.o
52
51obj-y += quota/ 53obj-y += quota/
52 54
53obj-$(CONFIG_PROC_FS) += proc/ 55obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b5..789b3afb3423 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
140 candidate->first = candidate->last = index; 140 candidate->first = candidate->last = index;
141 candidate->offset_first = from; 141 candidate->offset_first = from;
142 candidate->to_last = to; 142 candidate->to_last = to;
143 INIT_LIST_HEAD(&candidate->link);
143 candidate->usage = 1; 144 candidate->usage = 1;
144 candidate->state = AFS_WBACK_PENDING; 145 candidate->state = AFS_WBACK_PENDING;
145 init_waitqueue_head(&candidate->waitq); 146 init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3be0a9..7f54f43b8f7c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -85,7 +85,7 @@ static int __init aio_setup(void)
85 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 85 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
87 87
88 aio_wq = create_workqueue("aio"); 88 aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); 89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
90 BUG_ON(!aio_wq || !abe_pool); 90 BUG_ON(!aio_wq || !abe_pool);
91 91
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
239 call_rcu(&ctx->rcu_head, ctx_rcu_free); 239 call_rcu(&ctx->rcu_head, ctx_rcu_free);
240} 240}
241 241
242#define get_ioctx(kioctx) do { \ 242static inline void get_ioctx(struct kioctx *kioctx)
243 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 243{
244 atomic_inc(&(kioctx)->users); \ 244 BUG_ON(atomic_read(&kioctx->users) <= 0);
245} while (0) 245 atomic_inc(&kioctx->users);
246#define put_ioctx(kioctx) do { \ 246}
247 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 247
248 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ 248static inline int try_get_ioctx(struct kioctx *kioctx)
249 __put_ioctx(kioctx); \ 249{
250} while (0) 250 return atomic_inc_not_zero(&kioctx->users);
251}
252
253static inline void put_ioctx(struct kioctx *kioctx)
254{
255 BUG_ON(atomic_read(&kioctx->users) <= 0);
256 if (unlikely(atomic_dec_and_test(&kioctx->users)))
257 __put_ioctx(kioctx);
258}
251 259
252/* ioctx_alloc 260/* ioctx_alloc
253 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 261 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -569,7 +577,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
569 spin_lock(&fput_lock); 577 spin_lock(&fput_lock);
570 list_add(&req->ki_list, &fput_head); 578 list_add(&req->ki_list, &fput_head);
571 spin_unlock(&fput_lock); 579 spin_unlock(&fput_lock);
572 queue_work(aio_wq, &fput_work); 580 schedule_work(&fput_work);
573 } else { 581 } else {
574 req->ki_filp = NULL; 582 req->ki_filp = NULL;
575 really_put_req(ctx, req); 583 really_put_req(ctx, req);
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
601 rcu_read_lock(); 609 rcu_read_lock();
602 610
603 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { 611 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
604 if (ctx->user_id == ctx_id && !ctx->dead) { 612 /*
605 get_ioctx(ctx); 613 * RCU protects us against accessing freed memory but
614 * we have to be careful not to get a reference when the
615 * reference count already dropped to 0 (ctx->dead test
616 * is unreliable because of races).
617 */
618 if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
606 ret = ctx; 619 ret = ctx;
607 break; 620 break;
608 } 621 }
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1629 goto out_put_req; 1642 goto out_put_req;
1630 1643
1631 spin_lock_irq(&ctx->ctx_lock); 1644 spin_lock_irq(&ctx->ctx_lock);
1645 /*
1646 * We could have raced with io_destroy() and are currently holding a
1647 * reference to ctx which should be destroyed. We cannot submit IO
1648 * since ctx gets freed as soon as io_submit() puts its reference. The
1649 * check here is reliable: io_destroy() sets ctx->dead before waiting
1650 * for outstanding IO and the barrier between these two is realized by
1651 * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
1652 * increment ctx->reqs_active before checking for ctx->dead and the
1653 * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
1654 * don't see ctx->dead set here, io_destroy() waits for our IO to
1655 * finish.
1656 */
1657 if (ctx->dead) {
1658 spin_unlock_irq(&ctx->ctx_lock);
1659 ret = -EINVAL;
1660 goto out_put_req;
1661 }
1632 aio_run_iocb(req); 1662 aio_run_iocb(req);
1633 if (!list_empty(&ctx->run_list)) { 1663 if (!list_empty(&ctx->run_list)) {
1634 /* drain the run list */ 1664 /* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 333a7bb4cb9c..889287019599 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); 873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
874 if (ret) 874 if (ret)
875 goto out_del; 875 goto out_del;
876 /*
877 * bdev could be deleted beneath us which would implicitly destroy
878 * the holder directory. Hold on to it.
879 */
880 kobject_get(bdev->bd_part->holder_dir);
876 881
877 list_add(&holder->list, &bdev->bd_holder_disks); 882 list_add(&holder->list, &bdev->bd_holder_disks);
878 goto out_unlock; 883 goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
909 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 914 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
910 del_symlink(bdev->bd_part->holder_dir, 915 del_symlink(bdev->bd_part->holder_dir,
911 &disk_to_dev(disk)->kobj); 916 &disk_to_dev(disk)->kobj);
917 kobject_put(bdev->bd_part->holder_dir);
912 list_del_init(&holder->list); 918 list_del_init(&holder->list);
913 kfree(holder); 919 kfree(holder);
914 } 920 }
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
922 * flush_disk - invalidates all buffer-cache entries on a disk 928 * flush_disk - invalidates all buffer-cache entries on a disk
923 * 929 *
924 * @bdev: struct block device to be flushed 930 * @bdev: struct block device to be flushed
931 * @kill_dirty: flag to guide handling of dirty inodes
925 * 932 *
926 * Invalidates all buffer-cache entries on a disk. It should be called 933 * Invalidates all buffer-cache entries on a disk. It should be called
927 * when a disk has been changed -- either by a media change or online 934 * when a disk has been changed -- either by a media change or online
928 * resize. 935 * resize.
929 */ 936 */
930static void flush_disk(struct block_device *bdev) 937static void flush_disk(struct block_device *bdev, bool kill_dirty)
931{ 938{
932 if (__invalidate_device(bdev)) { 939 if (__invalidate_device(bdev, kill_dirty)) {
933 char name[BDEVNAME_SIZE] = ""; 940 char name[BDEVNAME_SIZE] = "";
934 941
935 if (bdev->bd_disk) 942 if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
966 "%s: detected capacity change from %lld to %lld\n", 973 "%s: detected capacity change from %lld to %lld\n",
967 name, bdev_size, disk_size); 974 name, bdev_size, disk_size);
968 i_size_write(bdev->bd_inode, disk_size); 975 i_size_write(bdev->bd_inode, disk_size);
969 flush_disk(bdev); 976 flush_disk(bdev, false);
970 } 977 }
971} 978}
972EXPORT_SYMBOL(check_disk_size_change); 979EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
1019 if (!(events & DISK_EVENT_MEDIA_CHANGE)) 1026 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1020 return 0; 1027 return 0;
1021 1028
1022 flush_disk(bdev); 1029 flush_disk(bdev, true);
1023 if (bdops->revalidate_disk) 1030 if (bdops->revalidate_disk)
1024 bdops->revalidate_disk(bdev->bd_disk); 1031 bdops->revalidate_disk(bdev->bd_disk);
1025 return 1; 1032 return 1;
@@ -1215,12 +1222,6 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1215 1222
1216 res = __blkdev_get(bdev, mode, 0); 1223 res = __blkdev_get(bdev, mode, 0);
1217 1224
1218 /* __blkdev_get() may alter read only status, check it afterwards */
1219 if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1220 __blkdev_put(bdev, mode, 0);
1221 res = -EACCES;
1222 }
1223
1224 if (whole) { 1225 if (whole) {
1225 /* finish claiming */ 1226 /* finish claiming */
1226 mutex_lock(&bdev->bd_mutex); 1227 mutex_lock(&bdev->bd_mutex);
@@ -1298,6 +1299,11 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1298 if (err) 1299 if (err)
1299 return ERR_PTR(err); 1300 return ERR_PTR(err);
1300 1301
1302 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1303 blkdev_put(bdev, mode);
1304 return ERR_PTR(-EACCES);
1305 }
1306
1301 return bdev; 1307 return bdev;
1302} 1308}
1303EXPORT_SYMBOL(blkdev_get_by_path); 1309EXPORT_SYMBOL(blkdev_get_by_path);
@@ -1601,7 +1607,7 @@ fail:
1601} 1607}
1602EXPORT_SYMBOL(lookup_bdev); 1608EXPORT_SYMBOL(lookup_bdev);
1603 1609
1604int __invalidate_device(struct block_device *bdev) 1610int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1605{ 1611{
1606 struct super_block *sb = get_super(bdev); 1612 struct super_block *sb = get_super(bdev);
1607 int res = 0; 1613 int res = 0;
@@ -1614,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
1614 * hold). 1620 * hold).
1615 */ 1621 */
1616 shrink_dcache_sb(sb); 1622 shrink_dcache_sb(sb);
1617 res = invalidate_inodes(sb); 1623 res = invalidate_inodes(sb, kill_dirty);
1618 drop_super(sb); 1624 drop_super(sb);
1619 } 1625 }
1620 invalidate_bdev(bdev); 1626 invalidate_bdev(bdev);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..7f78cc78fdd0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
729 u64 disk_total; /* total bytes on disk, takes mirrors into 729 u64 disk_total; /* total bytes on disk, takes mirrors into
730 account */ 730 account */
731 731
732 /*
733 * we bump reservation progress every time we decrement
734 * bytes_reserved. This way people waiting for reservations
735 * know something good has happened and they can check
736 * for progress. The number here isn't to be trusted, it
737 * just shows reclaim activity
738 */
739 unsigned long reservation_progress;
740
732 int full; /* indicates that we cannot allocate any more 741 int full; /* indicates that we cannot allocate any more
733 chunks for this space */ 742 chunks for this space */
734 int force_alloc; /* set if we need to force a chunk alloc for 743 int force_alloc; /* set if we need to force a chunk alloc for
@@ -1254,6 +1263,7 @@ struct btrfs_root {
1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12) 1263#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1264#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1265#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1266#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1257 1267
1258#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1268#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1259#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1269#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2228,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2218 u64 start, u64 end); 2228 u64 start, u64 end);
2219int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2229int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2220 u64 num_bytes); 2230 u64 num_bytes);
2231int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root, u64 type);
2221 2233
2222/* ctree.c */ 2234/* ctree.c */
2223int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2235int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b2..b4ffad859adb 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
21 int len = *max_len; 21 int len = *max_len;
22 int type; 22 int type;
23 23
24 if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || 24 if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
25 (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) 25 *max_len = BTRFS_FID_SIZE_CONNECTABLE;
26 return 255; 26 return 255;
27 } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
28 *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 return 255;
30 }
27 31
28 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f3c96fc01439..7b3089b5c2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3342 u64 max_reclaim; 3342 u64 max_reclaim;
3343 u64 reclaimed = 0; 3343 u64 reclaimed = 0;
3344 long time_left; 3344 long time_left;
3345 int pause = 1;
3346 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3345 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347 int loops = 0; 3346 int loops = 0;
3347 unsigned long progress;
3348 3348
3349 block_rsv = &root->fs_info->delalloc_block_rsv; 3349 block_rsv = &root->fs_info->delalloc_block_rsv;
3350 space_info = block_rsv->space_info; 3350 space_info = block_rsv->space_info;
3351 3351
3352 smp_mb(); 3352 smp_mb();
3353 reserved = space_info->bytes_reserved; 3353 reserved = space_info->bytes_reserved;
3354 progress = space_info->reservation_progress;
3354 3355
3355 if (reserved == 0) 3356 if (reserved == 0)
3356 return 0; 3357 return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3365 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3366 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3366 3367
3367 spin_lock(&space_info->lock); 3368 spin_lock(&space_info->lock);
3368 if (reserved > space_info->bytes_reserved) { 3369 if (reserved > space_info->bytes_reserved)
3369 loops = 0;
3370 reclaimed += reserved - space_info->bytes_reserved; 3370 reclaimed += reserved - space_info->bytes_reserved;
3371 } else {
3372 loops++;
3373 }
3374 reserved = space_info->bytes_reserved; 3371 reserved = space_info->bytes_reserved;
3375 spin_unlock(&space_info->lock); 3372 spin_unlock(&space_info->lock);
3376 3373
3374 loops++;
3375
3377 if (reserved == 0 || reclaimed >= max_reclaim) 3376 if (reserved == 0 || reclaimed >= max_reclaim)
3378 break; 3377 break;
3379 3378
3380 if (trans && trans->transaction->blocked) 3379 if (trans && trans->transaction->blocked)
3381 return -EAGAIN; 3380 return -EAGAIN;
3382 3381
3383 __set_current_state(TASK_INTERRUPTIBLE); 3382 time_left = schedule_timeout_interruptible(1);
3384 time_left = schedule_timeout(pause);
3385 3383
3386 /* We were interrupted, exit */ 3384 /* We were interrupted, exit */
3387 if (time_left) 3385 if (time_left)
3388 break; 3386 break;
3389 3387
3390 pause <<= 1; 3388 /* we've kicked the IO a few times, if anything has been freed,
3391 if (pause > HZ / 10) 3389 * exit. There is no sense in looping here for a long time
3392 pause = HZ / 10; 3390 * when we really need to commit the transaction, or there are
3391 * just too many writers without enough free space
3392 */
3393
3394 if (loops > 3) {
3395 smp_mb();
3396 if (progress != space_info->reservation_progress)
3397 break;
3398 }
3393 3399
3394 } 3400 }
3395 return reclaimed >= to_reclaim; 3401 return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3612 if (num_bytes) { 3618 if (num_bytes) {
3613 spin_lock(&space_info->lock); 3619 spin_lock(&space_info->lock);
3614 space_info->bytes_reserved -= num_bytes; 3620 space_info->bytes_reserved -= num_bytes;
3621 space_info->reservation_progress++;
3615 spin_unlock(&space_info->lock); 3622 spin_unlock(&space_info->lock);
3616 } 3623 }
3617 } 3624 }
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3844 if (block_rsv->reserved >= block_rsv->size) { 3851 if (block_rsv->reserved >= block_rsv->size) {
3845 num_bytes = block_rsv->reserved - block_rsv->size; 3852 num_bytes = block_rsv->reserved - block_rsv->size;
3846 sinfo->bytes_reserved -= num_bytes; 3853 sinfo->bytes_reserved -= num_bytes;
3854 sinfo->reservation_progress++;
3847 block_rsv->reserved = block_rsv->size; 3855 block_rsv->reserved = block_rsv->size;
3848 block_rsv->full = 1; 3856 block_rsv->full = 1;
3849 } 3857 }
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4005 to_reserve = 0; 4013 to_reserve = 0;
4006 } 4014 }
4007 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4015 spin_unlock(&BTRFS_I(inode)->accounting_lock);
4008
4009 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4016 to_reserve += calc_csum_metadata_size(inode, num_bytes);
4010 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4017 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4011 if (ret) 4018 if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4133 btrfs_set_block_group_used(&cache->item, old_val); 4140 btrfs_set_block_group_used(&cache->item, old_val);
4134 cache->reserved -= num_bytes; 4141 cache->reserved -= num_bytes;
4135 cache->space_info->bytes_reserved -= num_bytes; 4142 cache->space_info->bytes_reserved -= num_bytes;
4143 cache->space_info->reservation_progress++;
4136 cache->space_info->bytes_used += num_bytes; 4144 cache->space_info->bytes_used += num_bytes;
4137 cache->space_info->disk_used += num_bytes * factor; 4145 cache->space_info->disk_used += num_bytes * factor;
4138 spin_unlock(&cache->lock); 4146 spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
4184 if (reserved) { 4192 if (reserved) {
4185 cache->reserved -= num_bytes; 4193 cache->reserved -= num_bytes;
4186 cache->space_info->bytes_reserved -= num_bytes; 4194 cache->space_info->bytes_reserved -= num_bytes;
4195 cache->space_info->reservation_progress++;
4187 } 4196 }
4188 spin_unlock(&cache->lock); 4197 spin_unlock(&cache->lock);
4189 spin_unlock(&cache->space_info->lock); 4198 spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
4234 space_info->bytes_readonly += num_bytes; 4243 space_info->bytes_readonly += num_bytes;
4235 cache->reserved -= num_bytes; 4244 cache->reserved -= num_bytes;
4236 space_info->bytes_reserved -= num_bytes; 4245 space_info->bytes_reserved -= num_bytes;
4246 space_info->reservation_progress++;
4237 } 4247 }
4238 spin_unlock(&cache->lock); 4248 spin_unlock(&cache->lock);
4239 spin_unlock(&space_info->lock); 4249 spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4712 if (ret) { 4722 if (ret) {
4713 spin_lock(&cache->space_info->lock); 4723 spin_lock(&cache->space_info->lock);
4714 cache->space_info->bytes_reserved -= buf->len; 4724 cache->space_info->bytes_reserved -= buf->len;
4725 cache->space_info->reservation_progress++;
4715 spin_unlock(&cache->space_info->lock); 4726 spin_unlock(&cache->space_info->lock);
4716 } 4727 }
4717 goto out; 4728 goto out;
@@ -5376,7 +5387,7 @@ again:
5376 num_bytes, data, 1); 5387 num_bytes, data, 1);
5377 goto again; 5388 goto again;
5378 } 5389 }
5379 if (ret == -ENOSPC) { 5390 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
5380 struct btrfs_space_info *sinfo; 5391 struct btrfs_space_info *sinfo;
5381 5392
5382 sinfo = __find_space_info(root->fs_info, data); 5393 sinfo = __find_space_info(root->fs_info, data);
@@ -8065,6 +8076,13 @@ out:
8065 return ret; 8076 return ret;
8066} 8077}
8067 8078
8079int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8080 struct btrfs_root *root, u64 type)
8081{
8082 u64 alloc_flags = get_alloc_profile(root, type);
8083 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
8084}
8085
8068/* 8086/*
8069 * helper to account the unused space of all the readonly block group in the 8087 * helper to account the unused space of all the readonly block group in the
8070 * list. takes mirrors into account. 8088 * list. takes mirrors into account.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 92ac5192c518..714adc4ac4c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -2912,6 +2918,46 @@ out:
2912 return sector; 2918 return sector;
2913} 2919}
2914 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2915int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2916 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2917{ 2963{
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2921 u32 flags = 0; 2967 u32 flags = 0;
2922 u32 found_type; 2968 u32 found_type;
2923 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2924 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2925 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2926 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2927 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2928 struct btrfs_path *path; 2976 struct btrfs_path *path;
2929 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2930 int end = 0; 2978 int end = 0;
2931 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2932 unsigned long emflags; 2982 unsigned long emflags;
2933 int hole = 0;
2934 2983
2935 if (len == 0) 2984 if (len == 0)
2936 return -EINVAL; 2985 return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2940 return -ENOMEM; 2989 return -ENOMEM;
2941 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2942 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2943 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2944 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2945 if (ret < 0) { 2998 if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2953 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2954 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2955 3008
2956 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2957 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2958 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2959 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2960 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2961 } 3023 }
2962 last = found_key.offset;
2963 btrfs_free_path(path); 3024 btrfs_free_path(path);
2964 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2965 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2966 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2967 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2968 if (!em) 3041 if (!em)
2969 goto out; 3042 goto out;
2970 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2973,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2973 } 3046 }
2974 3047
2975 while (!end) { 3048 while (!end) {
2976 hole = 0; 3049 u64 offset_in_extent;
2977 off = em->start + em->len;
2978 if (off >= max)
2979 end = 1;
2980 3050
2981 if (em->block_start == EXTENT_MAP_HOLE) { 3051 /* break if the extent we found is outside the range */
2982 hole = 1; 3052 if (em->start >= max || extent_map_end(em) < off)
2983 goto next; 3053 break;
2984 }
2985 3054
2986 em_start = em->start; 3055 /*
2987 em_len = em->len; 3056 * get_extent may return an extent that starts before our
3057 * requested range. We have to make sure the ranges
3058 * we return to fiemap always move forward and don't
3059 * overlap, so adjust the offsets here
3060 */
3061 em_start = max(em->start, off);
2988 3062
3063 /*
3064 * record the offset from the start of the extent
3065 * for adjusting the disk offset below
3066 */
3067 offset_in_extent = em_start - em->start;
3068 em_end = extent_map_end(em);
3069 em_len = em_end - em_start;
3070 emflags = em->flags;
2989 disko = 0; 3071 disko = 0;
2990 flags = 0; 3072 flags = 0;
2991 3073
3074 /*
3075 * bump off for our next call to get_extent
3076 */
3077 off = extent_map_end(em);
3078 if (off >= max)
3079 end = 1;
3080
2992 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 3081 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
2993 end = 1; 3082 end = 1;
2994 flags |= FIEMAP_EXTENT_LAST; 3083 flags |= FIEMAP_EXTENT_LAST;
@@ -2999,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2999 flags |= (FIEMAP_EXTENT_DELALLOC | 3088 flags |= (FIEMAP_EXTENT_DELALLOC |
3000 FIEMAP_EXTENT_UNKNOWN); 3089 FIEMAP_EXTENT_UNKNOWN);
3001 } else { 3090 } else {
3002 disko = em->block_start; 3091 disko = em->block_start + offset_in_extent;
3003 } 3092 }
3004 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3093 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3005 flags |= FIEMAP_EXTENT_ENCODED; 3094 flags |= FIEMAP_EXTENT_ENCODED;
3006 3095
3007next:
3008 emflags = em->flags;
3009 free_extent_map(em); 3096 free_extent_map(em);
3010 em = NULL; 3097 em = NULL;
3011 if (!end) { 3098 if ((em_start >= last) || em_len == (u64)-1 ||
3012 em = get_extent(inode, NULL, 0, off, max - off, 0); 3099 (last == (u64)-1 && isize <= em_end)) {
3013 if (!em)
3014 goto out;
3015 if (IS_ERR(em)) {
3016 ret = PTR_ERR(em);
3017 goto out;
3018 }
3019 emflags = em->flags;
3020 }
3021
3022 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3023 flags |= FIEMAP_EXTENT_LAST; 3100 flags |= FIEMAP_EXTENT_LAST;
3024 end = 1; 3101 end = 1;
3025 } 3102 }
3026 3103
3027 if (em_start == last) { 3104 /* now scan forward to see if this is really the last extent. */
3105 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3106 get_extent);
3107 if (IS_ERR(em)) {
3108 ret = PTR_ERR(em);
3109 goto out;
3110 }
3111 if (!em) {
3028 flags |= FIEMAP_EXTENT_LAST; 3112 flags |= FIEMAP_EXTENT_LAST;
3029 end = 1; 3113 end = 1;
3030 } 3114 }
3031 3115 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3032 if (!hole) { 3116 em_len, flags);
3033 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3117 if (ret)
3034 em_len, flags); 3118 goto out_free;
3035 if (ret)
3036 goto out_free;
3037 }
3038 } 3119 }
3039out_free: 3120out_free:
3040 free_extent_map(em); 3121 free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
191 191
192u64 count_range_bits(struct extent_io_tree *tree, 192u64 count_range_bits(struct extent_io_tree *tree,
193 u64 *start, u64 search_end, 193 u64 *start, u64 search_end,
194 u64 max_bytes, unsigned long bits); 194 u64 max_bytes, unsigned long bits, int contig);
195 195
196void free_extent_state(struct extent_state *state); 196void free_extent_state(struct extent_state *state);
197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d5940..f447b783bb84 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
70 70
71 /* Flush processor's dcache for this page */ 71 /* Flush processor's dcache for this page */
72 flush_dcache_page(page); 72 flush_dcache_page(page);
73
74 /*
75 * if we get a partial write, we can end up with
76 * partially up to date pages. These add
77 * a lot of complexity, so make sure they don't
78 * happen by forcing this copy to be retried.
79 *
80 * The rest of the btrfs_file_write code will fall
81 * back to page at a time copies after we return 0.
82 */
83 if (!PageUptodate(page) && copied < count)
84 copied = 0;
85
73 iov_iter_advance(i, copied); 86 iov_iter_advance(i, copied);
74 write_bytes -= copied; 87 write_bytes -= copied;
75 total_copied += copied; 88 total_copied += copied;
@@ -763,6 +776,27 @@ out:
763} 776}
764 777
765/* 778/*
779 * on error we return an unlocked page and the error value
780 * on success we return a locked page and 0
781 */
782static int prepare_uptodate_page(struct page *page, u64 pos)
783{
784 int ret = 0;
785
786 if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
787 ret = btrfs_readpage(NULL, page);
788 if (ret)
789 return ret;
790 lock_page(page);
791 if (!PageUptodate(page)) {
792 unlock_page(page);
793 return -EIO;
794 }
795 }
796 return 0;
797}
798
799/*
766 * this gets pages into the page cache and locks them down, it also properly 800 * this gets pages into the page cache and locks them down, it also properly
767 * waits for data=ordered extents to finish before allowing the pages to be 801 * waits for data=ordered extents to finish before allowing the pages to be
768 * modified. 802 * modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
777 unsigned long index = pos >> PAGE_CACHE_SHIFT; 811 unsigned long index = pos >> PAGE_CACHE_SHIFT;
778 struct inode *inode = fdentry(file)->d_inode; 812 struct inode *inode = fdentry(file)->d_inode;
779 int err = 0; 813 int err = 0;
814 int faili = 0;
780 u64 start_pos; 815 u64 start_pos;
781 u64 last_pos; 816 u64 last_pos;
782 817
@@ -794,15 +829,24 @@ again:
794 for (i = 0; i < num_pages; i++) { 829 for (i = 0; i < num_pages; i++) {
795 pages[i] = grab_cache_page(inode->i_mapping, index + i); 830 pages[i] = grab_cache_page(inode->i_mapping, index + i);
796 if (!pages[i]) { 831 if (!pages[i]) {
797 int c; 832 faili = i - 1;
798 for (c = i - 1; c >= 0; c--) { 833 err = -ENOMEM;
799 unlock_page(pages[c]); 834 goto fail;
800 page_cache_release(pages[c]); 835 }
801 } 836
802 return -ENOMEM; 837 if (i == 0)
838 err = prepare_uptodate_page(pages[i], pos);
839 if (i == num_pages - 1)
840 err = prepare_uptodate_page(pages[i],
841 pos + write_bytes);
842 if (err) {
843 page_cache_release(pages[i]);
844 faili = i - 1;
845 goto fail;
803 } 846 }
804 wait_on_page_writeback(pages[i]); 847 wait_on_page_writeback(pages[i]);
805 } 848 }
849 err = 0;
806 if (start_pos < inode->i_size) { 850 if (start_pos < inode->i_size) {
807 struct btrfs_ordered_extent *ordered; 851 struct btrfs_ordered_extent *ordered;
808 lock_extent_bits(&BTRFS_I(inode)->io_tree, 852 lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
842 WARN_ON(!PageLocked(pages[i])); 886 WARN_ON(!PageLocked(pages[i]));
843 } 887 }
844 return 0; 888 return 0;
889fail:
890 while (faili >= 0) {
891 unlock_page(pages[faili]);
892 page_cache_release(pages[faili]);
893 faili--;
894 }
895 return err;
896
845} 897}
846 898
847static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 899static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
851 struct file *file = iocb->ki_filp; 903 struct file *file = iocb->ki_filp;
852 struct inode *inode = fdentry(file)->d_inode; 904 struct inode *inode = fdentry(file)->d_inode;
853 struct btrfs_root *root = BTRFS_I(inode)->root; 905 struct btrfs_root *root = BTRFS_I(inode)->root;
854 struct page *pinned[2];
855 struct page **pages = NULL; 906 struct page **pages = NULL;
856 struct iov_iter i; 907 struct iov_iter i;
857 loff_t *ppos = &iocb->ki_pos; 908 loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
872 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 923 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
873 (file->f_flags & O_DIRECT)); 924 (file->f_flags & O_DIRECT));
874 925
875 pinned[0] = NULL;
876 pinned[1] = NULL;
877
878 start_pos = pos; 926 start_pos = pos;
879 927
880 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 928 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
962 first_index = pos >> PAGE_CACHE_SHIFT; 1010 first_index = pos >> PAGE_CACHE_SHIFT;
963 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; 1011 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
964 1012
965 /*
966 * there are lots of better ways to do this, but this code
967 * makes sure the first and last page in the file range are
968 * up to date and ready for cow
969 */
970 if ((pos & (PAGE_CACHE_SIZE - 1))) {
971 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
972 if (!PageUptodate(pinned[0])) {
973 ret = btrfs_readpage(NULL, pinned[0]);
974 BUG_ON(ret);
975 wait_on_page_locked(pinned[0]);
976 } else {
977 unlock_page(pinned[0]);
978 }
979 }
980 if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
981 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
982 if (!PageUptodate(pinned[1])) {
983 ret = btrfs_readpage(NULL, pinned[1]);
984 BUG_ON(ret);
985 wait_on_page_locked(pinned[1]);
986 } else {
987 unlock_page(pinned[1]);
988 }
989 }
990
991 while (iov_iter_count(&i) > 0) { 1013 while (iov_iter_count(&i) > 0) {
992 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1014 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
993 size_t write_bytes = min(iov_iter_count(&i), 1015 size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1024 1046
1025 copied = btrfs_copy_from_user(pos, num_pages, 1047 copied = btrfs_copy_from_user(pos, num_pages,
1026 write_bytes, pages, &i); 1048 write_bytes, pages, &i);
1027 dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> 1049
1028 PAGE_CACHE_SHIFT; 1050 /*
1051 * if we have trouble faulting in the pages, fall
1052 * back to one page at a time
1053 */
1054 if (copied < write_bytes)
1055 nrptrs = 1;
1056
1057 if (copied == 0)
1058 dirty_pages = 0;
1059 else
1060 dirty_pages = (copied + offset +
1061 PAGE_CACHE_SIZE - 1) >>
1062 PAGE_CACHE_SHIFT;
1029 1063
1030 if (num_pages > dirty_pages) { 1064 if (num_pages > dirty_pages) {
1031 if (copied > 0) 1065 if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
1069 err = ret; 1103 err = ret;
1070 1104
1071 kfree(pages); 1105 kfree(pages);
1072 if (pinned[0])
1073 page_cache_release(pinned[0]);
1074 if (pinned[1])
1075 page_cache_release(pinned[1]);
1076 *ppos = pos; 1106 *ppos = pos;
1077 1107
1078 /* 1108 /*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fb9bd7832b6d..512c3d1da083 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -90,13 +90,14 @@ static noinline int cow_file_range(struct inode *inode,
90 unsigned long *nr_written, int unlock); 90 unsigned long *nr_written, int unlock);
91 91
92static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 92static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
93 struct inode *inode, struct inode *dir) 93 struct inode *inode, struct inode *dir,
94 const struct qstr *qstr)
94{ 95{
95 int err; 96 int err;
96 97
97 err = btrfs_init_acl(trans, inode, dir); 98 err = btrfs_init_acl(trans, inode, dir);
98 if (!err) 99 if (!err)
99 err = btrfs_xattr_security_init(trans, inode, dir); 100 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
100 return err; 101 return err;
101} 102}
102 103
@@ -1913,7 +1914,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1913 1914
1914 private = 0; 1915 private = 0;
1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, 1916 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1916 (u64)-1, 1, EXTENT_DIRTY)) { 1917 (u64)-1, 1, EXTENT_DIRTY, 0)) {
1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, 1918 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1918 start, &private_failure); 1919 start, &private_failure);
1919 if (ret == 0) { 1920 if (ret == 0) {
@@ -4704,7 +4705,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4704 if (IS_ERR(inode)) 4705 if (IS_ERR(inode))
4705 goto out_unlock; 4706 goto out_unlock;
4706 4707
4707 err = btrfs_init_inode_security(trans, inode, dir); 4708 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4708 if (err) { 4709 if (err) {
4709 drop_inode = 1; 4710 drop_inode = 1;
4710 goto out_unlock; 4711 goto out_unlock;
@@ -4765,7 +4766,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4765 if (IS_ERR(inode)) 4766 if (IS_ERR(inode))
4766 goto out_unlock; 4767 goto out_unlock;
4767 4768
4768 err = btrfs_init_inode_security(trans, inode, dir); 4769 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4769 if (err) { 4770 if (err) {
4770 drop_inode = 1; 4771 drop_inode = 1;
4771 goto out_unlock; 4772 goto out_unlock;
@@ -4806,9 +4807,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4806 int err; 4807 int err;
4807 int drop_inode = 0; 4808 int drop_inode = 0;
4808 4809
4809 if (inode->i_nlink == 0)
4810 return -ENOENT;
4811
4812 /* do not allow sys_link's with other subvols of the same device */ 4810 /* do not allow sys_link's with other subvols of the same device */
4813 if (root->objectid != BTRFS_I(inode)->root->objectid) 4811 if (root->objectid != BTRFS_I(inode)->root->objectid)
4814 return -EPERM; 4812 return -EPERM;
@@ -4821,10 +4819,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4821 goto fail; 4819 goto fail;
4822 4820
4823 /* 4821 /*
4824 * 1 item for inode ref 4822 * 2 items for inode and inode ref
4825 * 2 items for dir items 4823 * 2 items for dir items
4824 * 1 item for parent inode
4826 */ 4825 */
4827 trans = btrfs_start_transaction(root, 3); 4826 trans = btrfs_start_transaction(root, 5);
4828 if (IS_ERR(trans)) { 4827 if (IS_ERR(trans)) {
4829 err = PTR_ERR(trans); 4828 err = PTR_ERR(trans);
4830 goto fail; 4829 goto fail;
@@ -4893,7 +4892,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4893 4892
4894 drop_on_err = 1; 4893 drop_on_err = 1;
4895 4894
4896 err = btrfs_init_inode_security(trans, inode, dir); 4895 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4897 if (err) 4896 if (err)
4898 goto out_fail; 4897 goto out_fail;
4899 4898
@@ -5280,6 +5279,128 @@ out:
5280 return em; 5279 return em;
5281} 5280}
5282 5281
5282struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
5283 size_t pg_offset, u64 start, u64 len,
5284 int create)
5285{
5286 struct extent_map *em;
5287 struct extent_map *hole_em = NULL;
5288 u64 range_start = start;
5289 u64 end;
5290 u64 found;
5291 u64 found_end;
5292 int err = 0;
5293
5294 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
5295 if (IS_ERR(em))
5296 return em;
5297 if (em) {
5298 /*
5299 * if our em maps to a hole, there might
5300 * actually be delalloc bytes behind it
5301 */
5302 if (em->block_start != EXTENT_MAP_HOLE)
5303 return em;
5304 else
5305 hole_em = em;
5306 }
5307
5308 /* check to see if we've wrapped (len == -1 or similar) */
5309 end = start + len;
5310 if (end < start)
5311 end = (u64)-1;
5312 else
5313 end -= 1;
5314
5315 em = NULL;
5316
5317 /* ok, we didn't find anything, lets look for delalloc */
5318 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
5319 end, len, EXTENT_DELALLOC, 1);
5320 found_end = range_start + found;
5321 if (found_end < range_start)
5322 found_end = (u64)-1;
5323
5324 /*
5325 * we didn't find anything useful, return
5326 * the original results from get_extent()
5327 */
5328 if (range_start > end || found_end <= start) {
5329 em = hole_em;
5330 hole_em = NULL;
5331 goto out;
5332 }
5333
5334 /* adjust the range_start to make sure it doesn't
5335 * go backwards from the start they passed in
5336 */
5337 range_start = max(start,range_start);
5338 found = found_end - range_start;
5339
5340 if (found > 0) {
5341 u64 hole_start = start;
5342 u64 hole_len = len;
5343
5344 em = alloc_extent_map(GFP_NOFS);
5345 if (!em) {
5346 err = -ENOMEM;
5347 goto out;
5348 }
5349 /*
5350 * when btrfs_get_extent can't find anything it
5351 * returns one huge hole
5352 *
5353 * make sure what it found really fits our range, and
5354 * adjust to make sure it is based on the start from
5355 * the caller
5356 */
5357 if (hole_em) {
5358 u64 calc_end = extent_map_end(hole_em);
5359
5360 if (calc_end <= start || (hole_em->start > end)) {
5361 free_extent_map(hole_em);
5362 hole_em = NULL;
5363 } else {
5364 hole_start = max(hole_em->start, start);
5365 hole_len = calc_end - hole_start;
5366 }
5367 }
5368 em->bdev = NULL;
5369 if (hole_em && range_start > hole_start) {
5370 /* our hole starts before our delalloc, so we
5371 * have to return just the parts of the hole
5372 * that go until the delalloc starts
5373 */
5374 em->len = min(hole_len,
5375 range_start - hole_start);
5376 em->start = hole_start;
5377 em->orig_start = hole_start;
5378 /*
5379 * don't adjust block start at all,
5380 * it is fixed at EXTENT_MAP_HOLE
5381 */
5382 em->block_start = hole_em->block_start;
5383 em->block_len = hole_len;
5384 } else {
5385 em->start = range_start;
5386 em->len = found;
5387 em->orig_start = range_start;
5388 em->block_start = EXTENT_MAP_DELALLOC;
5389 em->block_len = found;
5390 }
5391 } else if (hole_em) {
5392 return hole_em;
5393 }
5394out:
5395
5396 free_extent_map(hole_em);
5397 if (err) {
5398 free_extent_map(em);
5399 return ERR_PTR(err);
5400 }
5401 return em;
5402}
5403
5283static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5404static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5284 u64 start, u64 len) 5405 u64 start, u64 len)
5285{ 5406{
@@ -5934,6 +6055,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5934 if (!skip_sum) { 6055 if (!skip_sum) {
5935 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); 6056 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
5936 if (!dip->csums) { 6057 if (!dip->csums) {
6058 kfree(dip);
5937 ret = -ENOMEM; 6059 ret = -ENOMEM;
5938 goto free_ordered; 6060 goto free_ordered;
5939 } 6061 }
@@ -6102,7 +6224,7 @@ out:
6102static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6224static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
6103 __u64 start, __u64 len) 6225 __u64 start, __u64 len)
6104{ 6226{
6105 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); 6227 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
6106} 6228}
6107 6229
6108int btrfs_readpage(struct file *file, struct page *page) 6230int btrfs_readpage(struct file *file, struct page *page)
@@ -6982,7 +7104,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
6982 if (IS_ERR(inode)) 7104 if (IS_ERR(inode))
6983 goto out_unlock; 7105 goto out_unlock;
6984 7106
6985 err = btrfs_init_inode_security(trans, inode, dir); 7107 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6986 if (err) { 7108 if (err) {
6987 drop_inode = 1; 7109 drop_inode = 1;
6988 goto out_unlock; 7110 goto out_unlock;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index be2d4f6aaa5e..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1071 if (copy_from_user(&flags, arg, sizeof(flags))) 1071 if (copy_from_user(&flags, arg, sizeof(flags)))
1072 return -EFAULT; 1072 return -EFAULT;
1073 1073
1074 if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) 1074 if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
1075 return -EINVAL; 1075 return -EINVAL;
1076 1076
1077 if (flags & ~BTRFS_SUBVOL_RDONLY) 1077 if (flags & ~BTRFS_SUBVOL_RDONLY)
1078 return -EOPNOTSUPP; 1078 return -EOPNOTSUPP;
1079 1079
1080 if (!is_owner_or_cap(inode))
1081 return -EACCES;
1082
1080 down_write(&root->fs_info->subvol_sem); 1083 down_write(&root->fs_info->subvol_sem);
1081 1084
1082 /* nothing to do */ 1085 /* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1097 goto out_reset; 1100 goto out_reset;
1098 } 1101 }
1099 1102
1100 ret = btrfs_update_root(trans, root, 1103 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1101 &root->root_key, &root->root_item); 1104 &root->root_key, &root->root_item);
1102 1105
1103 btrfs_commit_transaction(trans, root); 1106 btrfs_commit_transaction(trans, root);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
280 unsigned long tot_out; 280 unsigned long tot_out;
281 unsigned long tot_len; 281 unsigned long tot_len;
282 char *buf; 282 char *buf;
283 bool may_late_unmap, need_unmap;
283 284
284 data_in = kmap(pages_in[0]); 285 data_in = kmap(pages_in[0]);
285 tot_len = read_compress_length(data_in); 286 tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
300 301
301 tot_in += in_len; 302 tot_in += in_len;
302 working_bytes = in_len; 303 working_bytes = in_len;
304 may_late_unmap = need_unmap = false;
303 305
304 /* fast path: avoid using the working buffer */ 306 /* fast path: avoid using the working buffer */
305 if (in_page_bytes_left >= in_len) { 307 if (in_page_bytes_left >= in_len) {
306 buf = data_in + in_offset; 308 buf = data_in + in_offset;
307 bytes = in_len; 309 bytes = in_len;
310 may_late_unmap = true;
308 goto cont; 311 goto cont;
309 } 312 }
310 313
@@ -329,14 +332,17 @@ cont:
329 if (working_bytes == 0 && tot_in >= tot_len) 332 if (working_bytes == 0 && tot_in >= tot_len)
330 break; 333 break;
331 334
332 kunmap(pages_in[page_in_index]); 335 if (page_in_index + 1 >= total_pages_in) {
333 page_in_index++;
334 if (page_in_index >= total_pages_in) {
335 ret = -1; 336 ret = -1;
336 data_in = NULL;
337 goto done; 337 goto done;
338 } 338 }
339 data_in = kmap(pages_in[page_in_index]); 339
340 if (may_late_unmap)
341 need_unmap = true;
342 else
343 kunmap(pages_in[page_in_index]);
344
345 data_in = kmap(pages_in[++page_in_index]);
340 346
341 in_page_bytes_left = PAGE_CACHE_SIZE; 347 in_page_bytes_left = PAGE_CACHE_SIZE;
342 in_offset = 0; 348 in_offset = 0;
@@ -346,6 +352,8 @@ cont:
346 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); 352 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
347 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, 353 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
348 &out_len); 354 &out_len);
355 if (need_unmap)
356 kunmap(pages_in[page_in_index - 1]);
349 if (ret != LZO_E_OK) { 357 if (ret != LZO_E_OK) {
350 printk(KERN_WARNING "btrfs decompress failed\n"); 358 printk(KERN_WARNING "btrfs decompress failed\n");
351 ret = -1; 359 ret = -1;
@@ -363,8 +371,7 @@ cont:
363 break; 371 break;
364 } 372 }
365done: 373done:
366 if (data_in) 374 kunmap(pages_in[page_in_index]);
367 kunmap(pages_in[page_in_index]);
368 return ret; 375 return ret;
369} 376}
370 377
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0825e4ed9447..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3654 u32 item_size; 3654 u32 item_size;
3655 int ret; 3655 int ret;
3656 int err = 0; 3656 int err = 0;
3657 int progress = 0;
3657 3658
3658 path = btrfs_alloc_path(); 3659 path = btrfs_alloc_path();
3659 if (!path) 3660 if (!path)
@@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3666 } 3667 }
3667 3668
3668 while (1) { 3669 while (1) {
3670 progress++;
3669 trans = btrfs_start_transaction(rc->extent_root, 0); 3671 trans = btrfs_start_transaction(rc->extent_root, 0);
3670 BUG_ON(IS_ERR(trans)); 3672 BUG_ON(IS_ERR(trans));
3671 3673restart:
3672 if (update_backref_cache(trans, &rc->backref_cache)) { 3674 if (update_backref_cache(trans, &rc->backref_cache)) {
3673 btrfs_end_transaction(trans, rc->extent_root); 3675 btrfs_end_transaction(trans, rc->extent_root);
3674 continue; 3676 continue;
@@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3781 } 3783 }
3782 } 3784 }
3783 } 3785 }
3786 if (trans && progress && err == -ENOSPC) {
3787 ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
3788 rc->block_group->flags);
3789 if (ret == 0) {
3790 err = 0;
3791 progress = 0;
3792 goto restart;
3793 }
3794 }
3784 3795
3785 btrfs_release_path(rc->extent_root, path); 3796 btrfs_release_path(rc->extent_root, path);
3786 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3797 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a004008f7d28..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, 158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
159 Opt_enospc_debug, Opt_err,
159}; 160};
160 161
161static match_table_t tokens = { 162static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
184 {Opt_space_cache, "space_cache"}, 185 {Opt_space_cache, "space_cache"},
185 {Opt_clear_cache, "clear_cache"}, 186 {Opt_clear_cache, "clear_cache"},
186 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 187 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
188 {Opt_enospc_debug, "enospc_debug"},
187 {Opt_err, NULL}, 189 {Opt_err, NULL},
188}; 190};
189 191
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
358 case Opt_user_subvol_rm_allowed: 360 case Opt_user_subvol_rm_allowed:
359 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 361 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
360 break; 362 break;
363 case Opt_enospc_debug:
364 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
365 break;
361 case Opt_err: 366 case Opt_err:
362 printk(KERN_INFO "btrfs: unrecognized mount option " 367 printk(KERN_INFO "btrfs: unrecognized mount option "
363 "'%s'\n", p); 368 "'%s'\n", p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index af7dbca15276..dd13eb81ee40 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1338,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1338 1338
1339 ret = btrfs_shrink_device(device, 0); 1339 ret = btrfs_shrink_device(device, 0);
1340 if (ret) 1340 if (ret)
1341 goto error_brelse; 1341 goto error_undo;
1342 1342
1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); 1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1344 if (ret) 1344 if (ret)
1345 goto error_brelse; 1345 goto error_undo;
1346 1346
1347 device->in_fs_metadata = 0; 1347 device->in_fs_metadata = 0;
1348 1348
@@ -1416,6 +1416,13 @@ out:
1416 mutex_unlock(&root->fs_info->volume_mutex); 1416 mutex_unlock(&root->fs_info->volume_mutex);
1417 mutex_unlock(&uuid_mutex); 1417 mutex_unlock(&uuid_mutex);
1418 return ret; 1418 return ret;
1419error_undo:
1420 if (device->writeable) {
1421 list_add(&device->dev_alloc_list,
1422 &root->fs_info->fs_devices->alloc_list);
1423 root->fs_info->fs_devices->rw_devices++;
1424 }
1425 goto error_brelse;
1419} 1426}
1420 1427
1421/* 1428/*
@@ -1633,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1633 device->dev_root = root->fs_info->dev_root; 1640 device->dev_root = root->fs_info->dev_root;
1634 device->bdev = bdev; 1641 device->bdev = bdev;
1635 device->in_fs_metadata = 1; 1642 device->in_fs_metadata = 1;
1636 device->mode = 0; 1643 device->mode = FMODE_EXCL;
1637 set_blocksize(device->bdev, 4096); 1644 set_blocksize(device->bdev, 4096);
1638 1645
1639 if (seeding_dev) { 1646 if (seeding_dev) {
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5776531dc2b..d779cefcfd7d 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -370,7 +370,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
370} 370}
371 371
372int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, 372int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
373 struct inode *inode, struct inode *dir) 373 struct inode *inode, struct inode *dir,
374 const struct qstr *qstr)
374{ 375{
375 int err; 376 int err;
376 size_t len; 377 size_t len;
@@ -378,7 +379,8 @@ int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
378 char *suffix; 379 char *suffix;
379 char *name; 380 char *name;
380 381
381 err = security_inode_init_security(inode, dir, &suffix, &value, &len); 382 err = security_inode_init_security(inode, dir, qstr, &suffix, &value,
383 &len);
382 if (err) { 384 if (err) {
383 if (err == -EOPNOTSUPP) 385 if (err == -EOPNOTSUPP)
384 return 0; 386 return 0;
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 7a43fd640bbb..b3cc8039134b 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -37,6 +37,7 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name,
37extern int btrfs_removexattr(struct dentry *dentry, const char *name); 37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38 38
39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, 39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
40 struct inode *inode, struct inode *dir); 40 struct inode *inode, struct inode *dir,
41 const struct qstr *qstr);
41 42
42#endif /* __XATTR__ */ 43#endif /* __XATTR__ */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 42c7fafc8bfe..a0358c2189cb 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -275,6 +275,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
275 bool preemptive) 275 bool preemptive)
276{ 276{
277 struct dentry *grave, *trap; 277 struct dentry *grave, *trap;
278 struct path path, path_to_graveyard;
278 char nbuffer[8 + 8 + 1]; 279 char nbuffer[8 + 8 + 1];
279 int ret; 280 int ret;
280 281
@@ -287,10 +288,18 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
287 /* non-directories can just be unlinked */ 288 /* non-directories can just be unlinked */
288 if (!S_ISDIR(rep->d_inode->i_mode)) { 289 if (!S_ISDIR(rep->d_inode->i_mode)) {
289 _debug("unlink stale object"); 290 _debug("unlink stale object");
290 ret = vfs_unlink(dir->d_inode, rep);
291 291
292 if (preemptive) 292 path.mnt = cache->mnt;
293 cachefiles_mark_object_buried(cache, rep); 293 path.dentry = dir;
294 ret = security_path_unlink(&path, rep);
295 if (ret < 0) {
296 cachefiles_io_error(cache, "Unlink security error");
297 } else {
298 ret = vfs_unlink(dir->d_inode, rep);
299
300 if (preemptive)
301 cachefiles_mark_object_buried(cache, rep);
302 }
294 303
295 mutex_unlock(&dir->d_inode->i_mutex); 304 mutex_unlock(&dir->d_inode->i_mutex);
296 305
@@ -379,12 +388,23 @@ try_again:
379 } 388 }
380 389
381 /* attempt the rename */ 390 /* attempt the rename */
382 ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave); 391 path.mnt = cache->mnt;
383 if (ret != 0 && ret != -ENOMEM) 392 path.dentry = dir;
384 cachefiles_io_error(cache, "Rename failed with error %d", ret); 393 path_to_graveyard.mnt = cache->mnt;
394 path_to_graveyard.dentry = cache->graveyard;
395 ret = security_path_rename(&path, rep, &path_to_graveyard, grave);
396 if (ret < 0) {
397 cachefiles_io_error(cache, "Rename security error %d", ret);
398 } else {
399 ret = vfs_rename(dir->d_inode, rep,
400 cache->graveyard->d_inode, grave);
401 if (ret != 0 && ret != -ENOMEM)
402 cachefiles_io_error(cache,
403 "Rename failed with error %d", ret);
385 404
386 if (preemptive) 405 if (preemptive)
387 cachefiles_mark_object_buried(cache, rep); 406 cachefiles_mark_object_buried(cache, rep);
407 }
388 408
389 unlock_rename(cache->graveyard, dir); 409 unlock_rename(cache->graveyard, dir);
390 dput(grave); 410 dput(grave);
@@ -448,6 +468,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
448{ 468{
449 struct cachefiles_cache *cache; 469 struct cachefiles_cache *cache;
450 struct dentry *dir, *next = NULL; 470 struct dentry *dir, *next = NULL;
471 struct path path;
451 unsigned long start; 472 unsigned long start;
452 const char *name; 473 const char *name;
453 int ret, nlen; 474 int ret, nlen;
@@ -458,6 +479,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
458 479
459 cache = container_of(parent->fscache.cache, 480 cache = container_of(parent->fscache.cache,
460 struct cachefiles_cache, cache); 481 struct cachefiles_cache, cache);
482 path.mnt = cache->mnt;
461 483
462 ASSERT(parent->dentry); 484 ASSERT(parent->dentry);
463 ASSERT(parent->dentry->d_inode); 485 ASSERT(parent->dentry->d_inode);
@@ -511,6 +533,10 @@ lookup_again:
511 if (ret < 0) 533 if (ret < 0)
512 goto create_error; 534 goto create_error;
513 535
536 path.dentry = dir;
537 ret = security_path_mkdir(&path, next, 0);
538 if (ret < 0)
539 goto create_error;
514 start = jiffies; 540 start = jiffies;
515 ret = vfs_mkdir(dir->d_inode, next, 0); 541 ret = vfs_mkdir(dir->d_inode, next, 0);
516 cachefiles_hist(cachefiles_mkdir_histogram, start); 542 cachefiles_hist(cachefiles_mkdir_histogram, start);
@@ -536,6 +562,10 @@ lookup_again:
536 if (ret < 0) 562 if (ret < 0)
537 goto create_error; 563 goto create_error;
538 564
565 path.dentry = dir;
566 ret = security_path_mknod(&path, next, S_IFREG, 0);
567 if (ret < 0)
568 goto create_error;
539 start = jiffies; 569 start = jiffies;
540 ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); 570 ret = vfs_create(dir->d_inode, next, S_IFREG, NULL);
541 cachefiles_hist(cachefiles_create_histogram, start); 571 cachefiles_hist(cachefiles_create_histogram, start);
@@ -692,6 +722,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
692{ 722{
693 struct dentry *subdir; 723 struct dentry *subdir;
694 unsigned long start; 724 unsigned long start;
725 struct path path;
695 int ret; 726 int ret;
696 727
697 _enter(",,%s", dirname); 728 _enter(",,%s", dirname);
@@ -719,6 +750,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
719 750
720 _debug("attempt mkdir"); 751 _debug("attempt mkdir");
721 752
753 path.mnt = cache->mnt;
754 path.dentry = dir;
755 ret = security_path_mkdir(&path, subdir, 0700);
756 if (ret < 0)
757 goto mkdir_error;
722 ret = vfs_mkdir(dir->d_inode, subdir, 0700); 758 ret = vfs_mkdir(dir->d_inode, subdir, 0700);
723 if (ret < 0) 759 if (ret < 0)
724 goto mkdir_error; 760 goto mkdir_error;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0bc68de8edd7..ebafa65a29b6 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -409,7 +409,7 @@ more:
409 spin_lock(&inode->i_lock); 409 spin_lock(&inode->i_lock);
410 if (ci->i_release_count == fi->dir_release_count) { 410 if (ci->i_release_count == fi->dir_release_count) {
411 dout(" marking %p complete\n", inode); 411 dout(" marking %p complete\n", inode);
412 ci->i_ceph_flags |= CEPH_I_COMPLETE; 412 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
413 ci->i_max_offset = filp->f_pos; 413 ci->i_max_offset = filp->f_pos;
414 } 414 }
415 spin_unlock(&inode->i_lock); 415 spin_unlock(&inode->i_lock);
@@ -496,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
496 496
497 /* .snap dir? */ 497 /* .snap dir? */
498 if (err == -ENOENT && 498 if (err == -ENOENT &&
499 ceph_snap(parent) == CEPH_NOSNAP &&
499 strcmp(dentry->d_name.name, 500 strcmp(dentry->d_name.name,
500 fsc->mount_options->snapdir_name) == 0) { 501 fsc->mount_options->snapdir_name) == 0) {
501 struct inode *inode = ceph_get_snapdir(parent); 502 struct inode *inode = ceph_get_snapdir(parent);
@@ -992,7 +993,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
992{ 993{
993 struct inode *dir; 994 struct inode *dir;
994 995
995 if (nd->flags & LOOKUP_RCU) 996 if (nd && nd->flags & LOOKUP_RCU)
996 return -ECHILD; 997 return -ECHILD;
997 998
998 dir = dentry->d_parent->d_inode; 999 dir = dentry->d_parent->d_inode;
@@ -1029,28 +1030,8 @@ out_touch:
1029static void ceph_dentry_release(struct dentry *dentry) 1030static void ceph_dentry_release(struct dentry *dentry)
1030{ 1031{
1031 struct ceph_dentry_info *di = ceph_dentry(dentry); 1032 struct ceph_dentry_info *di = ceph_dentry(dentry);
1032 struct inode *parent_inode = NULL;
1033 u64 snapid = CEPH_NOSNAP;
1034 1033
1035 if (!IS_ROOT(dentry)) { 1034 dout("dentry_release %p\n", dentry);
1036 parent_inode = dentry->d_parent->d_inode;
1037 if (parent_inode)
1038 snapid = ceph_snap(parent_inode);
1039 }
1040 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1041 if (parent_inode && snapid != CEPH_SNAPDIR) {
1042 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1043
1044 spin_lock(&parent_inode->i_lock);
1045 if (ci->i_shared_gen == di->lease_shared_gen ||
1046 snapid <= CEPH_MAXSNAP) {
1047 dout(" clearing %p complete (d_release)\n",
1048 parent_inode);
1049 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
1050 ci->i_release_count++;
1051 }
1052 spin_unlock(&parent_inode->i_lock);
1053 }
1054 if (di) { 1035 if (di) {
1055 ceph_dentry_lru_del(dentry); 1036 ceph_dentry_lru_del(dentry);
1056 if (di->lease_session) 1037 if (di->lease_session)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5625463aa479..193bfa5e9cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,7 +707,7 @@ static int fill_inode(struct inode *inode,
707 (issued & CEPH_CAP_FILE_EXCL) == 0 && 707 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
709 dout(" marking %p complete (empty)\n", inode); 709 dout(" marking %p complete (empty)\n", inode);
710 ci->i_ceph_flags |= CEPH_I_COMPLETE; 710 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
711 ci->i_max_offset = 2; 711 ci->i_max_offset = 2;
712 } 712 }
713 break; 713 break;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 39c243acd062..f40b9139e437 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -584,10 +584,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
584 if (lastinode) 584 if (lastinode)
585 iput(lastinode); 585 iput(lastinode);
586 586
587 dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino); 587 list_for_each_entry(child, &realm->children, child_item) {
588 list_for_each_entry(child, &realm->children, child_item) 588 dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
589 queue_realm_cap_snaps(child); 589 realm, realm->ino, child, child->ino);
590 list_del_init(&child->dirty_item);
591 list_add(&child->dirty_item, &realm->dirty_item);
592 }
590 593
594 list_del_init(&realm->dirty_item);
591 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); 595 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
592} 596}
593 597
@@ -683,7 +687,9 @@ more:
683 * queue cap snaps _after_ we've built the new snap contexts, 687 * queue cap snaps _after_ we've built the new snap contexts,
684 * so that i_head_snapc can be set appropriately. 688 * so that i_head_snapc can be set appropriately.
685 */ 689 */
686 list_for_each_entry(realm, &dirty_realms, dirty_item) { 690 while (!list_empty(&dirty_realms)) {
691 realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
692 dirty_item);
687 queue_realm_cap_snaps(realm); 693 queue_realm_cap_snaps(realm);
688 } 694 }
689 695
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 4a3330235d55..a9371b6578c0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -127,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
127extern const struct export_operations cifs_export_ops; 127extern const struct export_operations cifs_export_ops;
128#endif /* EXPERIMENTAL */ 128#endif /* EXPERIMENTAL */
129 129
130#define CIFS_VERSION "1.70" 130#define CIFS_VERSION "1.71"
131#endif /* _CIFSFS_H */ 131#endif /* _CIFSFS_H */
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 8d9189f64477..79f641eeda30 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -170,7 +170,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
170{ 170{
171 int rc, alen, slen; 171 int rc, alen, slen;
172 const char *pct; 172 const char *pct;
173 char *endp, scope_id[13]; 173 char scope_id[13];
174 struct sockaddr_in *s4 = (struct sockaddr_in *) dst; 174 struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
175 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; 175 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
176 176
@@ -197,9 +197,9 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
197 memcpy(scope_id, pct + 1, slen); 197 memcpy(scope_id, pct + 1, slen);
198 scope_id[slen] = '\0'; 198 scope_id[slen] = '\0';
199 199
200 s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); 200 rc = strict_strtoul(scope_id, 0,
201 if (endp != scope_id + slen) 201 (unsigned long *)&s6->sin6_scope_id);
202 return 0; 202 rc = (rc == 0) ? 1 : 0;
203 } 203 }
204 204
205 return rc; 205 return rc;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 1adc9625a344..16765703131b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -656,13 +656,13 @@ ssetup_ntlmssp_authenticate:
656 656
657 if (type == LANMAN) { 657 if (type == LANMAN) {
658#ifdef CONFIG_CIFS_WEAK_PW_HASH 658#ifdef CONFIG_CIFS_WEAK_PW_HASH
659 char lnm_session_key[CIFS_SESS_KEY_SIZE]; 659 char lnm_session_key[CIFS_AUTH_RESP_SIZE];
660 660
661 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; 661 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
662 662
663 /* no capabilities flags in old lanman negotiation */ 663 /* no capabilities flags in old lanman negotiation */
664 664
665 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); 665 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
666 666
667 /* Calculate hash with password and copy into bcc_ptr. 667 /* Calculate hash with password and copy into bcc_ptr.
668 * Encryption Key (stored as in cryptkey) gets used if the 668 * Encryption Key (stored as in cryptkey) gets used if the
@@ -675,8 +675,8 @@ ssetup_ntlmssp_authenticate:
675 true : false, lnm_session_key); 675 true : false, lnm_session_key);
676 676
677 ses->flags |= CIFS_SES_LANMAN; 677 ses->flags |= CIFS_SES_LANMAN;
678 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE); 678 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
679 bcc_ptr += CIFS_SESS_KEY_SIZE; 679 bcc_ptr += CIFS_AUTH_RESP_SIZE;
680 680
681 /* can not sign if LANMAN negotiated so no need 681 /* can not sign if LANMAN negotiated so no need
682 to calculate signing key? but what if server 682 to calculate signing key? but what if server
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6cc..c6d31a3bab88 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
262 */ 262 */
263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) 263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
264{ 264{
265 struct path path; 265 struct kstatfs tmp;
266 int error; 266 int error = user_statfs(pathname, &tmp);
267 267 if (!error)
268 error = user_path(pathname, &path); 268 error = put_compat_statfs(buf, &tmp);
269 if (!error) {
270 struct kstatfs tmp;
271 error = vfs_statfs(&path, &tmp);
272 if (!error)
273 error = put_compat_statfs(buf, &tmp);
274 path_put(&path);
275 }
276 return error; 269 return error;
277} 270}
278 271
279asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) 272asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
280{ 273{
281 struct file * file;
282 struct kstatfs tmp; 274 struct kstatfs tmp;
283 int error; 275 int error = fd_statfs(fd, &tmp);
284
285 error = -EBADF;
286 file = fget(fd);
287 if (!file)
288 goto out;
289 error = vfs_statfs(&file->f_path, &tmp);
290 if (!error) 276 if (!error)
291 error = put_compat_statfs(buf, &tmp); 277 error = put_compat_statfs(buf, &tmp);
292 fput(file);
293out:
294 return error; 278 return error;
295} 279}
296 280
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
329 313
330asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) 314asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
331{ 315{
332 struct path path; 316 struct kstatfs tmp;
333 int error; 317 int error;
334 318
335 if (sz != sizeof(*buf)) 319 if (sz != sizeof(*buf))
336 return -EINVAL; 320 return -EINVAL;
337 321
338 error = user_path(pathname, &path); 322 error = user_statfs(pathname, &tmp);
339 if (!error) { 323 if (!error)
340 struct kstatfs tmp; 324 error = put_compat_statfs64(buf, &tmp);
341 error = vfs_statfs(&path, &tmp);
342 if (!error)
343 error = put_compat_statfs64(buf, &tmp);
344 path_put(&path);
345 }
346 return error; 325 return error;
347} 326}
348 327
349asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) 328asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
350{ 329{
351 struct file * file;
352 struct kstatfs tmp; 330 struct kstatfs tmp;
353 int error; 331 int error;
354 332
355 if (sz != sizeof(*buf)) 333 if (sz != sizeof(*buf))
356 return -EINVAL; 334 return -EINVAL;
357 335
358 error = -EBADF; 336 error = fd_statfs(fd, &tmp);
359 file = fget(fd);
360 if (!file)
361 goto out;
362 error = vfs_statfs(&file->f_path, &tmp);
363 if (!error) 337 if (!error)
364 error = put_compat_statfs64(buf, &tmp); 338 error = put_compat_statfs64(buf, &tmp);
365 fput(file);
366out:
367 return error; 339 return error;
368} 340}
369 341
@@ -1228,7 +1200,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1228 file = fget_light(fd, &fput_needed); 1200 file = fget_light(fd, &fput_needed);
1229 if (!file) 1201 if (!file)
1230 return -EBADF; 1202 return -EBADF;
1231 ret = compat_readv(file, vec, vlen, &pos); 1203 ret = -ESPIPE;
1204 if (file->f_mode & FMODE_PREAD)
1205 ret = compat_readv(file, vec, vlen, &pos);
1232 fput_light(file, fput_needed); 1206 fput_light(file, fput_needed);
1233 return ret; 1207 return ret;
1234} 1208}
@@ -1285,7 +1259,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1285 file = fget_light(fd, &fput_needed); 1259 file = fget_light(fd, &fput_needed);
1286 if (!file) 1260 if (!file)
1287 return -EBADF; 1261 return -EBADF;
1288 ret = compat_writev(file, vec, vlen, &pos); 1262 ret = -ESPIPE;
1263 if (file->f_mode & FMODE_PWRITE)
1264 ret = compat_writev(file, vec, vlen, &pos);
1289 fput_light(file, fput_needed); 1265 fput_light(file, fput_needed);
1290 return ret; 1266 return ret;
1291} 1267}
@@ -2308,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
2308} 2284}
2309 2285
2310#endif /* CONFIG_TIMERFD */ 2286#endif /* CONFIG_TIMERFD */
2287
2288#ifdef CONFIG_FHANDLE
2289/*
2290 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
2291 * doesn't set the O_LARGEFILE flag.
2292 */
2293asmlinkage long
2294compat_sys_open_by_handle_at(int mountdirfd,
2295 struct file_handle __user *handle, int flags)
2296{
2297 return do_handle_open(mountdirfd, handle, flags);
2298}
2299#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a6bd9a4ae97..a39fe47c466f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
296 __releases(parent->d_lock) 296 __releases(parent->d_lock)
297 __releases(dentry->d_inode->i_lock) 297 __releases(dentry->d_inode->i_lock)
298{ 298{
299 dentry->d_parent = NULL;
300 list_del(&dentry->d_u.d_child); 299 list_del(&dentry->d_u.d_child);
300 /*
301 * Inform try_to_ascend() that we are no longer attached to the
302 * dentry tree
303 */
304 dentry->d_flags |= DCACHE_DISCONNECTED;
301 if (parent) 305 if (parent)
302 spin_unlock(&parent->d_lock); 306 spin_unlock(&parent->d_lock);
303 dentry_iput(dentry); 307 dentry_iput(dentry);
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
1012} 1016}
1013 1017
1014/* 1018/*
1019 * This tries to ascend one level of parenthood, but
1020 * we can race with renaming, so we need to re-check
1021 * the parenthood after dropping the lock and check
1022 * that the sequence number still matches.
1023 */
1024static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1025{
1026 struct dentry *new = old->d_parent;
1027
1028 rcu_read_lock();
1029 spin_unlock(&old->d_lock);
1030 spin_lock(&new->d_lock);
1031
1032 /*
1033 * might go back up the wrong parent if we have had a rename
1034 * or deletion
1035 */
1036 if (new != old->d_parent ||
1037 (old->d_flags & DCACHE_DISCONNECTED) ||
1038 (!locked && read_seqretry(&rename_lock, seq))) {
1039 spin_unlock(&new->d_lock);
1040 new = NULL;
1041 }
1042 rcu_read_unlock();
1043 return new;
1044}
1045
1046
1047/*
1015 * Search for at least 1 mount point in the dentry's subdirs. 1048 * Search for at least 1 mount point in the dentry's subdirs.
1016 * We descend to the next level whenever the d_subdirs 1049 * We descend to the next level whenever the d_subdirs
1017 * list is non-empty and continue searching. 1050 * list is non-empty and continue searching.
@@ -1066,24 +1099,10 @@ resume:
1066 * All done at this level ... ascend and resume the search. 1099 * All done at this level ... ascend and resume the search.
1067 */ 1100 */
1068 if (this_parent != parent) { 1101 if (this_parent != parent) {
1069 struct dentry *tmp; 1102 struct dentry *child = this_parent;
1070 struct dentry *child; 1103 this_parent = try_to_ascend(this_parent, locked, seq);
1071 1104 if (!this_parent)
1072 tmp = this_parent->d_parent;
1073 rcu_read_lock();
1074 spin_unlock(&this_parent->d_lock);
1075 child = this_parent;
1076 this_parent = tmp;
1077 spin_lock(&this_parent->d_lock);
1078 /* might go back up the wrong parent if we have had a rename
1079 * or deletion */
1080 if (this_parent != child->d_parent ||
1081 (!locked && read_seqretry(&rename_lock, seq))) {
1082 spin_unlock(&this_parent->d_lock);
1083 rcu_read_unlock();
1084 goto rename_retry; 1105 goto rename_retry;
1085 }
1086 rcu_read_unlock();
1087 next = child->d_u.d_child.next; 1106 next = child->d_u.d_child.next;
1088 goto resume; 1107 goto resume;
1089 } 1108 }
@@ -1181,24 +1200,10 @@ resume:
1181 * All done at this level ... ascend and resume the search. 1200 * All done at this level ... ascend and resume the search.
1182 */ 1201 */
1183 if (this_parent != parent) { 1202 if (this_parent != parent) {
1184 struct dentry *tmp; 1203 struct dentry *child = this_parent;
1185 struct dentry *child; 1204 this_parent = try_to_ascend(this_parent, locked, seq);
1186 1205 if (!this_parent)
1187 tmp = this_parent->d_parent;
1188 rcu_read_lock();
1189 spin_unlock(&this_parent->d_lock);
1190 child = this_parent;
1191 this_parent = tmp;
1192 spin_lock(&this_parent->d_lock);
1193 /* might go back up the wrong parent if we have had a rename
1194 * or deletion */
1195 if (this_parent != child->d_parent ||
1196 (!locked && read_seqretry(&rename_lock, seq))) {
1197 spin_unlock(&this_parent->d_lock);
1198 rcu_read_unlock();
1199 goto rename_retry; 1206 goto rename_retry;
1200 }
1201 rcu_read_unlock();
1202 next = child->d_u.d_child.next; 1207 next = child->d_u.d_child.next;
1203 goto resume; 1208 goto resume;
1204 } 1209 }
@@ -1523,6 +1528,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1523} 1528}
1524EXPORT_SYMBOL(d_alloc_root); 1529EXPORT_SYMBOL(d_alloc_root);
1525 1530
1531static struct dentry * __d_find_any_alias(struct inode *inode)
1532{
1533 struct dentry *alias;
1534
1535 if (list_empty(&inode->i_dentry))
1536 return NULL;
1537 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1538 __dget(alias);
1539 return alias;
1540}
1541
1542static struct dentry * d_find_any_alias(struct inode *inode)
1543{
1544 struct dentry *de;
1545
1546 spin_lock(&inode->i_lock);
1547 de = __d_find_any_alias(inode);
1548 spin_unlock(&inode->i_lock);
1549 return de;
1550}
1551
1552
1526/** 1553/**
1527 * d_obtain_alias - find or allocate a dentry for a given inode 1554 * d_obtain_alias - find or allocate a dentry for a given inode
1528 * @inode: inode to allocate the dentry for 1555 * @inode: inode to allocate the dentry for
@@ -1552,7 +1579,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1552 if (IS_ERR(inode)) 1579 if (IS_ERR(inode))
1553 return ERR_CAST(inode); 1580 return ERR_CAST(inode);
1554 1581
1555 res = d_find_alias(inode); 1582 res = d_find_any_alias(inode);
1556 if (res) 1583 if (res)
1557 goto out_iput; 1584 goto out_iput;
1558 1585
@@ -1565,7 +1592,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1565 1592
1566 1593
1567 spin_lock(&inode->i_lock); 1594 spin_lock(&inode->i_lock);
1568 res = __d_find_alias(inode, 0); 1595 res = __d_find_any_alias(inode);
1569 if (res) { 1596 if (res) {
1570 spin_unlock(&inode->i_lock); 1597 spin_unlock(&inode->i_lock);
1571 dput(tmp); 1598 dput(tmp);
@@ -2920,28 +2947,14 @@ resume:
2920 spin_unlock(&dentry->d_lock); 2947 spin_unlock(&dentry->d_lock);
2921 } 2948 }
2922 if (this_parent != root) { 2949 if (this_parent != root) {
2923 struct dentry *tmp; 2950 struct dentry *child = this_parent;
2924 struct dentry *child;
2925
2926 tmp = this_parent->d_parent;
2927 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2951 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2928 this_parent->d_flags |= DCACHE_GENOCIDE; 2952 this_parent->d_flags |= DCACHE_GENOCIDE;
2929 this_parent->d_count--; 2953 this_parent->d_count--;
2930 } 2954 }
2931 rcu_read_lock(); 2955 this_parent = try_to_ascend(this_parent, locked, seq);
2932 spin_unlock(&this_parent->d_lock); 2956 if (!this_parent)
2933 child = this_parent;
2934 this_parent = tmp;
2935 spin_lock(&this_parent->d_lock);
2936 /* might go back up the wrong parent if we have had a rename
2937 * or deletion */
2938 if (this_parent != child->d_parent ||
2939 (!locked && read_seqretry(&rename_lock, seq))) {
2940 spin_unlock(&this_parent->d_lock);
2941 rcu_read_unlock();
2942 goto rename_retry; 2957 goto rename_retry;
2943 }
2944 rcu_read_unlock();
2945 next = child->d_u.d_child.next; 2958 next = child->d_u.d_child.next;
2946 goto resume; 2959 goto resume;
2947 } 2960 }
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 6fc4f319b550..534c1d46e69e 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -46,24 +46,28 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
46{ 46{
47 struct dentry *lower_dentry; 47 struct dentry *lower_dentry;
48 struct vfsmount *lower_mnt; 48 struct vfsmount *lower_mnt;
49 struct dentry *dentry_save; 49 struct dentry *dentry_save = NULL;
50 struct vfsmount *vfsmount_save; 50 struct vfsmount *vfsmount_save = NULL;
51 int rc = 1; 51 int rc = 1;
52 52
53 if (nd->flags & LOOKUP_RCU) 53 if (nd && nd->flags & LOOKUP_RCU)
54 return -ECHILD; 54 return -ECHILD;
55 55
56 lower_dentry = ecryptfs_dentry_to_lower(dentry); 56 lower_dentry = ecryptfs_dentry_to_lower(dentry);
57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) 58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
59 goto out; 59 goto out;
60 dentry_save = nd->path.dentry; 60 if (nd) {
61 vfsmount_save = nd->path.mnt; 61 dentry_save = nd->path.dentry;
62 nd->path.dentry = lower_dentry; 62 vfsmount_save = nd->path.mnt;
63 nd->path.mnt = lower_mnt; 63 nd->path.dentry = lower_dentry;
64 nd->path.mnt = lower_mnt;
65 }
64 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); 66 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
65 nd->path.dentry = dentry_save; 67 if (nd) {
66 nd->path.mnt = vfsmount_save; 68 nd->path.dentry = dentry_save;
69 nd->path.mnt = vfsmount_save;
70 }
67 if (dentry->d_inode) { 71 if (dentry->d_inode) {
68 struct inode *lower_inode = 72 struct inode *lower_inode =
69 ecryptfs_inode_to_lower(dentry->d_inode); 73 ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index dbc84ed96336..e00753496e3e 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -632,8 +632,7 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
632 u32 flags); 632 u32 flags);
633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
634 struct dentry *lower_dentry, 634 struct dentry *lower_dentry,
635 struct inode *ecryptfs_dir_inode, 635 struct inode *ecryptfs_dir_inode);
636 struct nameidata *ecryptfs_nd);
637int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, 636int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
638 size_t *decrypted_name_size, 637 size_t *decrypted_name_size,
639 struct dentry *ecryptfs_dentry, 638 struct dentry *ecryptfs_dentry,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 81e10e6a9443..7d1050e254f9 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -317,6 +317,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
317 317
318const struct file_operations ecryptfs_dir_fops = { 318const struct file_operations ecryptfs_dir_fops = {
319 .readdir = ecryptfs_readdir, 319 .readdir = ecryptfs_readdir,
320 .read = generic_read_dir,
320 .unlocked_ioctl = ecryptfs_unlocked_ioctl, 321 .unlocked_ioctl = ecryptfs_unlocked_ioctl,
321#ifdef CONFIG_COMPAT 322#ifdef CONFIG_COMPAT
322 .compat_ioctl = ecryptfs_compat_ioctl, 323 .compat_ioctl = ecryptfs_compat_ioctl,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index bd33f87a1907..b592938a84bc 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -74,16 +74,20 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
74 unsigned int flags_save; 74 unsigned int flags_save;
75 int rc; 75 int rc;
76 76
77 dentry_save = nd->path.dentry; 77 if (nd) {
78 vfsmount_save = nd->path.mnt; 78 dentry_save = nd->path.dentry;
79 flags_save = nd->flags; 79 vfsmount_save = nd->path.mnt;
80 nd->path.dentry = lower_dentry; 80 flags_save = nd->flags;
81 nd->path.mnt = lower_mnt; 81 nd->path.dentry = lower_dentry;
82 nd->flags &= ~LOOKUP_OPEN; 82 nd->path.mnt = lower_mnt;
83 nd->flags &= ~LOOKUP_OPEN;
84 }
83 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); 85 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
84 nd->path.dentry = dentry_save; 86 if (nd) {
85 nd->path.mnt = vfsmount_save; 87 nd->path.dentry = dentry_save;
86 nd->flags = flags_save; 88 nd->path.mnt = vfsmount_save;
89 nd->flags = flags_save;
90 }
87 return rc; 91 return rc;
88} 92}
89 93
@@ -241,8 +245,7 @@ out:
241 */ 245 */
242int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 246int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
243 struct dentry *lower_dentry, 247 struct dentry *lower_dentry,
244 struct inode *ecryptfs_dir_inode, 248 struct inode *ecryptfs_dir_inode)
245 struct nameidata *ecryptfs_nd)
246{ 249{
247 struct dentry *lower_dir_dentry; 250 struct dentry *lower_dir_dentry;
248 struct vfsmount *lower_mnt; 251 struct vfsmount *lower_mnt;
@@ -290,8 +293,6 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
290 goto out; 293 goto out;
291 if (special_file(lower_inode->i_mode)) 294 if (special_file(lower_inode->i_mode))
292 goto out; 295 goto out;
293 if (!ecryptfs_nd)
294 goto out;
295 /* Released in this function */ 296 /* Released in this function */
296 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); 297 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER);
297 if (!page_virt) { 298 if (!page_virt) {
@@ -349,75 +350,6 @@ out:
349} 350}
350 351
351/** 352/**
352 * ecryptfs_new_lower_dentry
353 * @name: The name of the new dentry.
354 * @lower_dir_dentry: Parent directory of the new dentry.
355 * @nd: nameidata from last lookup.
356 *
357 * Create a new dentry or get it from lower parent dir.
358 */
359static struct dentry *
360ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
361 struct nameidata *nd)
362{
363 struct dentry *new_dentry;
364 struct dentry *tmp;
365 struct inode *lower_dir_inode;
366
367 lower_dir_inode = lower_dir_dentry->d_inode;
368
369 tmp = d_alloc(lower_dir_dentry, name);
370 if (!tmp)
371 return ERR_PTR(-ENOMEM);
372
373 mutex_lock(&lower_dir_inode->i_mutex);
374 new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
375 mutex_unlock(&lower_dir_inode->i_mutex);
376
377 if (!new_dentry)
378 new_dentry = tmp;
379 else
380 dput(tmp);
381
382 return new_dentry;
383}
384
385
386/**
387 * ecryptfs_lookup_one_lower
388 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
389 * @lower_dir_dentry: lower parent directory
390 * @name: lower file name
391 *
392 * Get the lower dentry from vfs. If lower dentry does not exist yet,
393 * create it.
394 */
395static struct dentry *
396ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
397 struct dentry *lower_dir_dentry, struct qstr *name)
398{
399 struct nameidata nd;
400 struct vfsmount *lower_mnt;
401 int err;
402
403 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
404 ecryptfs_dentry->d_parent));
405 err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
406 mntput(lower_mnt);
407
408 if (!err) {
409 /* we dont need the mount */
410 mntput(nd.path.mnt);
411 return nd.path.dentry;
412 }
413 if (err != -ENOENT)
414 return ERR_PTR(err);
415
416 /* create a new lower dentry */
417 return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
418}
419
420/**
421 * ecryptfs_lookup 353 * ecryptfs_lookup
422 * @ecryptfs_dir_inode: The eCryptfs directory inode 354 * @ecryptfs_dir_inode: The eCryptfs directory inode
423 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up 355 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -434,7 +366,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
434 size_t encrypted_and_encoded_name_size; 366 size_t encrypted_and_encoded_name_size;
435 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; 367 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
436 struct dentry *lower_dir_dentry, *lower_dentry; 368 struct dentry *lower_dir_dentry, *lower_dentry;
437 struct qstr lower_name;
438 int rc = 0; 369 int rc = 0;
439 370
440 if ((ecryptfs_dentry->d_name.len == 1 371 if ((ecryptfs_dentry->d_name.len == 1
@@ -444,20 +375,14 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
444 goto out_d_drop; 375 goto out_d_drop;
445 } 376 }
446 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 377 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
447 lower_name.name = ecryptfs_dentry->d_name.name; 378 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
448 lower_name.len = ecryptfs_dentry->d_name.len; 379 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
449 lower_name.hash = ecryptfs_dentry->d_name.hash; 380 lower_dir_dentry,
450 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 381 ecryptfs_dentry->d_name.len);
451 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 382 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
452 lower_dir_dentry->d_inode, &lower_name);
453 if (rc < 0)
454 goto out_d_drop;
455 }
456 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
457 lower_dir_dentry, &lower_name);
458 if (IS_ERR(lower_dentry)) { 383 if (IS_ERR(lower_dentry)) {
459 rc = PTR_ERR(lower_dentry); 384 rc = PTR_ERR(lower_dentry);
460 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " 385 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
461 "[%d] on lower_dentry = [%s]\n", __func__, rc, 386 "[%d] on lower_dentry = [%s]\n", __func__, rc,
462 encrypted_and_encoded_name); 387 encrypted_and_encoded_name);
463 goto out_d_drop; 388 goto out_d_drop;
@@ -479,28 +404,21 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
479 "filename; rc = [%d]\n", __func__, rc); 404 "filename; rc = [%d]\n", __func__, rc);
480 goto out_d_drop; 405 goto out_d_drop;
481 } 406 }
482 lower_name.name = encrypted_and_encoded_name; 407 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
483 lower_name.len = encrypted_and_encoded_name_size; 408 lower_dentry = lookup_one_len(encrypted_and_encoded_name,
484 lower_name.hash = full_name_hash(lower_name.name, lower_name.len); 409 lower_dir_dentry,
485 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 410 encrypted_and_encoded_name_size);
486 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 411 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
487 lower_dir_dentry->d_inode, &lower_name);
488 if (rc < 0)
489 goto out_d_drop;
490 }
491 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
492 lower_dir_dentry, &lower_name);
493 if (IS_ERR(lower_dentry)) { 412 if (IS_ERR(lower_dentry)) {
494 rc = PTR_ERR(lower_dentry); 413 rc = PTR_ERR(lower_dentry);
495 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " 414 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
496 "[%d] on lower_dentry = [%s]\n", __func__, rc, 415 "[%d] on lower_dentry = [%s]\n", __func__, rc,
497 encrypted_and_encoded_name); 416 encrypted_and_encoded_name);
498 goto out_d_drop; 417 goto out_d_drop;
499 } 418 }
500lookup_and_interpose: 419lookup_and_interpose:
501 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, 420 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
502 ecryptfs_dir_inode, 421 ecryptfs_dir_inode);
503 ecryptfs_nd);
504 goto out; 422 goto out;
505out_d_drop: 423out_d_drop:
506 d_drop(ecryptfs_dentry); 424 d_drop(ecryptfs_dentry);
@@ -1092,6 +1010,8 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1092 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry), 1010 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
1093 ecryptfs_dentry_to_lower(dentry), &lower_stat); 1011 ecryptfs_dentry_to_lower(dentry), &lower_stat);
1094 if (!rc) { 1012 if (!rc) {
1013 fsstack_copy_attr_all(dentry->d_inode,
1014 ecryptfs_inode_to_lower(dentry->d_inode));
1095 generic_fillattr(dentry->d_inode, stat); 1015 generic_fillattr(dentry->d_inode, stat);
1096 stat->blocks = lower_stat.blocks; 1016 stat->blocks = lower_stat.blocks;
1097 } 1017 }
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e0194b3e14d6..d9a591773919 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_get);
99 * @ctx: [in] Pointer to eventfd context. 99 * @ctx: [in] Pointer to eventfd context.
100 * 100 *
101 * The eventfd context reference must have been previously acquired either 101 * The eventfd context reference must have been previously acquired either
102 * with eventfd_ctx_get() or eventfd_ctx_fdget()). 102 * with eventfd_ctx_get() or eventfd_ctx_fdget().
103 */ 103 */
104void eventfd_ctx_put(struct eventfd_ctx *ctx) 104void eventfd_ctx_put(struct eventfd_ctx *ctx)
105{ 105{
@@ -146,9 +146,9 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
146 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. 146 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
147 * @ctx: [in] Pointer to eventfd context. 147 * @ctx: [in] Pointer to eventfd context.
148 * @wait: [in] Wait queue to be removed. 148 * @wait: [in] Wait queue to be removed.
149 * @cnt: [out] Pointer to the 64bit conter value. 149 * @cnt: [out] Pointer to the 64-bit counter value.
150 * 150 *
151 * Returns zero if successful, or the following error codes: 151 * Returns %0 if successful, or the following error codes:
152 * 152 *
153 * -EAGAIN : The operation would have blocked. 153 * -EAGAIN : The operation would have blocked.
154 * 154 *
@@ -175,11 +175,11 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
175 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. 175 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
176 * @ctx: [in] Pointer to eventfd context. 176 * @ctx: [in] Pointer to eventfd context.
177 * @no_wait: [in] Different from zero if the operation should not block. 177 * @no_wait: [in] Different from zero if the operation should not block.
178 * @cnt: [out] Pointer to the 64bit conter value. 178 * @cnt: [out] Pointer to the 64-bit counter value.
179 * 179 *
180 * Returns zero if successful, or the following error codes: 180 * Returns %0 if successful, or the following error codes:
181 * 181 *
182 * -EAGAIN : The operation would have blocked but @no_wait was nonzero. 182 * -EAGAIN : The operation would have blocked but @no_wait was non-zero.
183 * -ERESTARTSYS : A signal interrupted the wait operation. 183 * -ERESTARTSYS : A signal interrupted the wait operation.
184 * 184 *
185 * If @no_wait is zero, the function might sleep until the eventfd internal 185 * If @no_wait is zero, the function might sleep until the eventfd internal
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 267d0ada4541..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
63 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
64 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
66 * It is also acquired when inserting an epoll fd onto another epoll
67 * fd. We do this so that we walk the epoll tree and ensure that this
68 * insertion does not create a cycle of epoll file descriptors, which
69 * could lead to deadlock. We need a global mutex to prevent two
70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is
72 * going to.
66 * It is possible to drop the "ep->mtx" and to use the global 73 * It is possible to drop the "ep->mtx" and to use the global
67 * mutex "epmutex" (together with "ep->lock") to have it working, 74 * mutex "epmutex" (together with "ep->lock") to have it working,
68 * but having "ep->mtx" will make the interface more scalable. 75 * but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
224 */ 231 */
225static DEFINE_MUTEX(epmutex); 232static DEFINE_MUTEX(epmutex);
226 233
234/* Used to check for epoll file descriptor inclusion loops */
235static struct nested_calls poll_loop_ncalls;
236
227/* Used for safe wake up implementation */ 237/* Used for safe wake up implementation */
228static struct nested_calls poll_safewake_ncalls; 238static struct nested_calls poll_safewake_ncalls;
229 239
@@ -1198,6 +1208,62 @@ retry:
1198 return res; 1208 return res;
1199} 1209}
1200 1210
1211/**
1212 * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
1213 * API, to verify that adding an epoll file inside another
1214 * epoll structure, does not violate the constraints, in
1215 * terms of closed loops, or too deep chains (which can
1216 * result in excessive stack usage).
1217 *
1218 * @priv: Pointer to the epoll file to be currently checked.
1219 * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
1220 * data structure pointer.
1221 * @call_nests: Current dept of the @ep_call_nested() call stack.
1222 *
1223 * Returns: Returns zero if adding the epoll @file inside current epoll
1224 * structure @ep does not violate the constraints, or -1 otherwise.
1225 */
1226static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1227{
1228 int error = 0;
1229 struct file *file = priv;
1230 struct eventpoll *ep = file->private_data;
1231 struct rb_node *rbp;
1232 struct epitem *epi;
1233
1234 mutex_lock(&ep->mtx);
1235 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1236 epi = rb_entry(rbp, struct epitem, rbn);
1237 if (unlikely(is_file_epoll(epi->ffd.file))) {
1238 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1239 ep_loop_check_proc, epi->ffd.file,
1240 epi->ffd.file->private_data, current);
1241 if (error != 0)
1242 break;
1243 }
1244 }
1245 mutex_unlock(&ep->mtx);
1246
1247 return error;
1248}
1249
1250/**
1251 * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
1252 * another epoll file (represented by @ep) does not create
1253 * closed loops or too deep chains.
1254 *
1255 * @ep: Pointer to the epoll private data structure.
1256 * @file: Pointer to the epoll file to be checked.
1257 *
1258 * Returns: Returns zero if adding the epoll @file inside current epoll
1259 * structure @ep does not violate the constraints, or -1 otherwise.
1260 */
1261static int ep_loop_check(struct eventpoll *ep, struct file *file)
1262{
1263 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1264 ep_loop_check_proc, file, ep, current);
1265}
1266
1201/* 1267/*
1202 * Open an eventpoll file descriptor. 1268 * Open an eventpoll file descriptor.
1203 */ 1269 */
@@ -1246,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1246 struct epoll_event __user *, event) 1312 struct epoll_event __user *, event)
1247{ 1313{
1248 int error; 1314 int error;
1315 int did_lock_epmutex = 0;
1249 struct file *file, *tfile; 1316 struct file *file, *tfile;
1250 struct eventpoll *ep; 1317 struct eventpoll *ep;
1251 struct epitem *epi; 1318 struct epitem *epi;
@@ -1287,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1287 */ 1354 */
1288 ep = file->private_data; 1355 ep = file->private_data;
1289 1356
1357 /*
1358 * When we insert an epoll file descriptor, inside another epoll file
1359 * descriptor, there is the change of creating closed loops, which are
1360 * better be handled here, than in more critical paths.
1361 *
1362 * We hold epmutex across the loop check and the insert in this case, in
1363 * order to prevent two separate inserts from racing and each doing the
1364 * insert "at the same time" such that ep_loop_check passes on both
1365 * before either one does the insert, thereby creating a cycle.
1366 */
1367 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
1368 mutex_lock(&epmutex);
1369 did_lock_epmutex = 1;
1370 error = -ELOOP;
1371 if (ep_loop_check(ep, tfile) != 0)
1372 goto error_tgt_fput;
1373 }
1374
1375
1290 mutex_lock(&ep->mtx); 1376 mutex_lock(&ep->mtx);
1291 1377
1292 /* 1378 /*
@@ -1322,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1322 mutex_unlock(&ep->mtx); 1408 mutex_unlock(&ep->mtx);
1323 1409
1324error_tgt_fput: 1410error_tgt_fput:
1411 if (unlikely(did_lock_epmutex))
1412 mutex_unlock(&epmutex);
1413
1325 fput(tfile); 1414 fput(tfile);
1326error_fput: 1415error_fput:
1327 fput(file); 1416 fput(file);
@@ -1441,6 +1530,12 @@ static int __init eventpoll_init(void)
1441 EP_ITEM_COST; 1530 EP_ITEM_COST;
1442 BUG_ON(max_user_watches < 0); 1531 BUG_ON(max_user_watches < 0);
1443 1532
1533 /*
1534 * Initialize the structure used to perform epoll file descriptor
1535 * inclusion loops checks.
1536 */
1537 ep_nested_calls_init(&poll_loop_ncalls);
1538
1444 /* Initialize the structure used to perform safe poll wait head wake ups */ 1539 /* Initialize the structure used to perform safe poll wait head wake ups */
1445 ep_nested_calls_init(&poll_safewake_ncalls); 1540 ep_nested_calls_init(&poll_safewake_ncalls);
1446 1541
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6ab..ba99e1abb1aa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
115 struct file *file; 115 struct file *file;
116 char *tmp = getname(library); 116 char *tmp = getname(library);
117 int error = PTR_ERR(tmp); 117 int error = PTR_ERR(tmp);
118 static const struct open_flags uselib_flags = {
119 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
120 .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
121 .intent = LOOKUP_OPEN
122 };
118 123
119 if (IS_ERR(tmp)) 124 if (IS_ERR(tmp))
120 goto out; 125 goto out;
121 126
122 file = do_filp_open(AT_FDCWD, tmp, 127 file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 128 putname(tmp);
126 error = PTR_ERR(file); 129 error = PTR_ERR(file);
127 if (IS_ERR(file)) 130 if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
721{ 724{
722 struct file *file; 725 struct file *file;
723 int err; 726 int err;
727 static const struct open_flags open_exec_flags = {
728 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
729 .acc_mode = MAY_EXEC | MAY_OPEN,
730 .intent = LOOKUP_OPEN
731 };
724 732
725 file = do_filp_open(AT_FDCWD, name, 733 file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 734 if (IS_ERR(file))
729 goto out; 735 goto out;
730 736
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d02830..4d70db110cfc 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page); 272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
273 if (!new_de) 273 if (!new_de)
274 goto out_dir; 274 goto out_dir;
275 inode_inc_link_count(old_inode);
276 err = exofs_set_link(new_dir, new_de, new_page, old_inode); 275 err = exofs_set_link(new_dir, new_de, new_page, old_inode);
277 new_inode->i_ctime = CURRENT_TIME; 276 new_inode->i_ctime = CURRENT_TIME;
278 if (dir_de) 277 if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
286 if (new_dir->i_nlink >= EXOFS_LINK_MAX) 285 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
287 goto out_dir; 286 goto out_dir;
288 } 287 }
289 inode_inc_link_count(old_inode);
290 err = exofs_add_link(new_dentry, old_inode); 288 err = exofs_add_link(new_dentry, old_inode);
291 if (err) { 289 if (err)
292 inode_dec_link_count(old_inode);
293 goto out_dir; 290 goto out_dir;
294 }
295 if (dir_de) 291 if (dir_de)
296 inode_inc_link_count(new_dir); 292 inode_inc_link_count(new_dir);
297 } 293 }
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
299 old_inode->i_ctime = CURRENT_TIME; 295 old_inode->i_ctime = CURRENT_TIME;
300 296
301 exofs_delete_entry(old_de, old_page); 297 exofs_delete_entry(old_de, old_page);
302 inode_dec_link_count(old_inode); 298 mark_inode_dirty(old_inode);
303 299
304 if (dir_de) { 300 if (dir_de) {
305 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir); 301 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd5..b05acb796135 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
320 struct inode * inode = dentry->d_inode; 320 struct inode * inode = dentry->d_inode;
321 int len = *max_len; 321 int len = *max_len;
322 int type = FILEID_INO32_GEN; 322 int type = FILEID_INO32_GEN;
323 323
324 if (len < 2 || (connectable && len < 4)) 324 if (connectable && (len < 4)) {
325 *max_len = 4;
326 return 255;
327 } else if (len < 2) {
328 *max_len = 2;
325 return 255; 329 return 255;
330 }
326 331
327 len = 2; 332 len = 2;
328 fid->i32.ino = inode->i_ino; 333 fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
369 /* 374 /*
370 * Try to get any dentry for the given file handle from the filesystem. 375 * Try to get any dentry for the given file handle from the filesystem.
371 */ 376 */
377 if (!nop || !nop->fh_to_dentry)
378 return ERR_PTR(-ESTALE);
372 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 379 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
373 if (!result) 380 if (!result)
374 result = ERR_PTR(-ESTALE); 381 result = ERR_PTR(-ESTALE);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 6346a2acf326..1b48c3370872 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -110,7 +110,7 @@ extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int); 110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
111 111
112/* ialloc.c */ 112/* ialloc.c */
113extern struct inode * ext2_new_inode (struct inode *, int); 113extern struct inode * ext2_new_inode (struct inode *, int, const struct qstr *);
114extern void ext2_free_inode (struct inode *); 114extern void ext2_free_inode (struct inode *);
115extern unsigned long ext2_count_free_inodes (struct super_block *); 115extern unsigned long ext2_count_free_inodes (struct super_block *);
116extern void ext2_check_inodes_bitmap (struct super_block *); 116extern void ext2_check_inodes_bitmap (struct super_block *);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ad70479aabff..ee9ed31948e1 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -429,7 +429,8 @@ found:
429 return group; 429 return group;
430} 430}
431 431
432struct inode *ext2_new_inode(struct inode *dir, int mode) 432struct inode *ext2_new_inode(struct inode *dir, int mode,
433 const struct qstr *qstr)
433{ 434{
434 struct super_block *sb; 435 struct super_block *sb;
435 struct buffer_head *bitmap_bh = NULL; 436 struct buffer_head *bitmap_bh = NULL;
@@ -585,7 +586,7 @@ got:
585 if (err) 586 if (err)
586 goto fail_free_drop; 587 goto fail_free_drop;
587 588
588 err = ext2_init_security(inode,dir); 589 err = ext2_init_security(inode, dir, qstr);
589 if (err) 590 if (err)
590 goto fail_free_drop; 591 goto fail_free_drop;
591 592
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d827..ed5c5d496ee9 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -104,7 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
104 104
105 dquot_initialize(dir); 105 dquot_initialize(dir);
106 106
107 inode = ext2_new_inode(dir, mode); 107 inode = ext2_new_inode(dir, mode, &dentry->d_name);
108 if (IS_ERR(inode)) 108 if (IS_ERR(inode))
109 return PTR_ERR(inode); 109 return PTR_ERR(inode);
110 110
@@ -133,7 +133,7 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_
133 133
134 dquot_initialize(dir); 134 dquot_initialize(dir);
135 135
136 inode = ext2_new_inode (dir, mode); 136 inode = ext2_new_inode (dir, mode, &dentry->d_name);
137 err = PTR_ERR(inode); 137 err = PTR_ERR(inode);
138 if (!IS_ERR(inode)) { 138 if (!IS_ERR(inode)) {
139 init_special_inode(inode, inode->i_mode, rdev); 139 init_special_inode(inode, inode->i_mode, rdev);
@@ -159,7 +159,7 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
159 159
160 dquot_initialize(dir); 160 dquot_initialize(dir);
161 161
162 inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO); 162 inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO, &dentry->d_name);
163 err = PTR_ERR(inode); 163 err = PTR_ERR(inode);
164 if (IS_ERR(inode)) 164 if (IS_ERR(inode))
165 goto out; 165 goto out;
@@ -230,7 +230,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
230 230
231 inode_inc_link_count(dir); 231 inode_inc_link_count(dir);
232 232
233 inode = ext2_new_inode (dir, S_IFDIR | mode); 233 inode = ext2_new_inode(dir, S_IFDIR | mode, &dentry->d_name);
234 err = PTR_ERR(inode); 234 err = PTR_ERR(inode);
235 if (IS_ERR(inode)) 235 if (IS_ERR(inode))
236 goto out_dir; 236 goto out_dir;
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); 344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
345 if (!new_de) 345 if (!new_de)
346 goto out_dir; 346 goto out_dir;
347 inode_inc_link_count(old_inode);
348 ext2_set_link(new_dir, new_de, new_page, old_inode, 1); 347 ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
349 new_inode->i_ctime = CURRENT_TIME_SEC; 348 new_inode->i_ctime = CURRENT_TIME_SEC;
350 if (dir_de) 349 if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
356 if (new_dir->i_nlink >= EXT2_LINK_MAX) 355 if (new_dir->i_nlink >= EXT2_LINK_MAX)
357 goto out_dir; 356 goto out_dir;
358 } 357 }
359 inode_inc_link_count(old_inode);
360 err = ext2_add_link(new_dentry, old_inode); 358 err = ext2_add_link(new_dentry, old_inode);
361 if (err) { 359 if (err)
362 inode_dec_link_count(old_inode);
363 goto out_dir; 360 goto out_dir;
364 }
365 if (dir_de) 361 if (dir_de)
366 inode_inc_link_count(new_dir); 362 inode_inc_link_count(new_dir);
367 } 363 }
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
369 /* 365 /*
370 * Like most other Unix systems, set the ctime for inodes on a 366 * Like most other Unix systems, set the ctime for inodes on a
371 * rename. 367 * rename.
372 * inode_dec_link_count() will mark the inode dirty.
373 */ 368 */
374 old_inode->i_ctime = CURRENT_TIME_SEC; 369 old_inode->i_ctime = CURRENT_TIME_SEC;
370 mark_inode_dirty(old_inode);
375 371
376 ext2_delete_entry (old_de, old_page); 372 ext2_delete_entry (old_de, old_page);
377 inode_dec_link_count(old_inode);
378 373
379 if (dir_de) { 374 if (dir_de) {
380 if (old_dir != new_dir) 375 if (old_dir != new_dir)
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index a1a1c2184616..5e41cccff762 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,9 +116,11 @@ exit_ext2_xattr(void)
116# endif /* CONFIG_EXT2_FS_XATTR */ 116# endif /* CONFIG_EXT2_FS_XATTR */
117 117
118#ifdef CONFIG_EXT2_FS_SECURITY 118#ifdef CONFIG_EXT2_FS_SECURITY
119extern int ext2_init_security(struct inode *inode, struct inode *dir); 119extern int ext2_init_security(struct inode *inode, struct inode *dir,
120 const struct qstr *qstr);
120#else 121#else
121static inline int ext2_init_security(struct inode *inode, struct inode *dir) 122static inline int ext2_init_security(struct inode *inode, struct inode *dir,
123 const struct qstr *qstr)
122{ 124{
123 return 0; 125 return 0;
124} 126}
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 3004e15d5da5..5d979b4347b0 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -47,14 +47,15 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
47} 47}
48 48
49int 49int
50ext2_init_security(struct inode *inode, struct inode *dir) 50ext2_init_security(struct inode *inode, struct inode *dir,
51 const struct qstr *qstr)
51{ 52{
52 int err; 53 int err;
53 size_t len; 54 size_t len;
54 void *value; 55 void *value;
55 char *name; 56 char *name;
56 57
57 err = security_inode_init_security(inode, dir, &name, &value, &len); 58 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
58 if (err) { 59 if (err) {
59 if (err == -EOPNOTSUPP) 60 if (err == -EOPNOTSUPP)
60 return 0; 61 return 0;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 9724aef22460..bfc2dc43681d 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -404,7 +404,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
404 * For other inodes, search forward from the parent directory's block 404 * For other inodes, search forward from the parent directory's block
405 * group to find a free inode. 405 * group to find a free inode.
406 */ 406 */
407struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) 407struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
408 const struct qstr *qstr, int mode)
408{ 409{
409 struct super_block *sb; 410 struct super_block *sb;
410 struct buffer_head *bitmap_bh = NULL; 411 struct buffer_head *bitmap_bh = NULL;
@@ -589,7 +590,7 @@ got:
589 if (err) 590 if (err)
590 goto fail_free_drop; 591 goto fail_free_drop;
591 592
592 err = ext3_init_security(handle,inode, dir); 593 err = ext3_init_security(handle, inode, dir, qstr);
593 if (err) 594 if (err)
594 goto fail_free_drop; 595 goto fail_free_drop;
595 596
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810ec..0521a007ae6d 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1710,7 +1710,7 @@ retry:
1710 if (IS_DIRSYNC(dir)) 1710 if (IS_DIRSYNC(dir))
1711 handle->h_sync = 1; 1711 handle->h_sync = 1;
1712 1712
1713 inode = ext3_new_inode (handle, dir, mode); 1713 inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
1714 err = PTR_ERR(inode); 1714 err = PTR_ERR(inode);
1715 if (!IS_ERR(inode)) { 1715 if (!IS_ERR(inode)) {
1716 inode->i_op = &ext3_file_inode_operations; 1716 inode->i_op = &ext3_file_inode_operations;
@@ -1746,7 +1746,7 @@ retry:
1746 if (IS_DIRSYNC(dir)) 1746 if (IS_DIRSYNC(dir))
1747 handle->h_sync = 1; 1747 handle->h_sync = 1;
1748 1748
1749 inode = ext3_new_inode (handle, dir, mode); 1749 inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
1750 err = PTR_ERR(inode); 1750 err = PTR_ERR(inode);
1751 if (!IS_ERR(inode)) { 1751 if (!IS_ERR(inode)) {
1752 init_special_inode(inode, inode->i_mode, rdev); 1752 init_special_inode(inode, inode->i_mode, rdev);
@@ -1784,7 +1784,7 @@ retry:
1784 if (IS_DIRSYNC(dir)) 1784 if (IS_DIRSYNC(dir))
1785 handle->h_sync = 1; 1785 handle->h_sync = 1;
1786 1786
1787 inode = ext3_new_inode (handle, dir, S_IFDIR | mode); 1787 inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFDIR | mode);
1788 err = PTR_ERR(inode); 1788 err = PTR_ERR(inode);
1789 if (IS_ERR(inode)) 1789 if (IS_ERR(inode))
1790 goto out_stop; 1790 goto out_stop;
@@ -2206,7 +2206,7 @@ retry:
2206 if (IS_DIRSYNC(dir)) 2206 if (IS_DIRSYNC(dir))
2207 handle->h_sync = 1; 2207 handle->h_sync = 1;
2208 2208
2209 inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); 2209 inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFLNK|S_IRWXUGO);
2210 err = PTR_ERR(inode); 2210 err = PTR_ERR(inode);
2211 if (IS_ERR(inode)) 2211 if (IS_ERR(inode))
2212 goto out_stop; 2212 goto out_stop;
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
2253 2253
2254 dquot_initialize(dir); 2254 dquot_initialize(dir);
2255 2255
2256 /*
2257 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2258 * otherwise has the potential to corrupt the orphan inode list.
2259 */
2260 if (inode->i_nlink == 0)
2261 return -ENOENT;
2262
2263retry: 2256retry:
2264 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2257 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2265 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2258 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f2473..9cc19a1dea8e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1936 sb->s_qcop = &ext3_qctl_operations; 1936 sb->s_qcop = &ext3_qctl_operations;
1937 sb->dq_op = &ext3_quota_operations; 1937 sb->dq_op = &ext3_quota_operations;
1938#endif 1938#endif
1939 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1939 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1940 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1940 mutex_init(&sbi->s_orphan_lock); 1941 mutex_init(&sbi->s_orphan_lock);
1941 mutex_init(&sbi->s_resize_lock); 1942 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index 377fe7201169..2be4f69bfa64 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -128,10 +128,10 @@ exit_ext3_xattr(void)
128 128
129#ifdef CONFIG_EXT3_FS_SECURITY 129#ifdef CONFIG_EXT3_FS_SECURITY
130extern int ext3_init_security(handle_t *handle, struct inode *inode, 130extern int ext3_init_security(handle_t *handle, struct inode *inode,
131 struct inode *dir); 131 struct inode *dir, const struct qstr *qstr);
132#else 132#else
133static inline int ext3_init_security(handle_t *handle, struct inode *inode, 133static inline int ext3_init_security(handle_t *handle, struct inode *inode,
134 struct inode *dir) 134 struct inode *dir, const struct qstr *qstr)
135{ 135{
136 return 0; 136 return 0;
137} 137}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 03a99bfc59f9..b8d9f83aa5c5 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -49,14 +49,15 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
49} 49}
50 50
51int 51int
52ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir) 52ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
53 const struct qstr *qstr)
53{ 54{
54 int err; 55 int err;
55 size_t len; 56 size_t len;
56 void *value; 57 void *value;
57 char *name; 58 char *name;
58 59
59 err = security_inode_init_security(inode, dir, &name, &value, &len); 60 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
60 if (err) { 61 if (err) {
61 if (err == -EOPNOTSUPP) 62 if (err == -EOPNOTSUPP)
62 return 0; 63 return 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index eb9097aec6f0..78b79e1bd7ed 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1042,7 +1042,7 @@ got:
1042 if (err) 1042 if (err)
1043 goto fail_free_drop; 1043 goto fail_free_drop;
1044 1044
1045 err = ext4_init_security(handle, inode, dir); 1045 err = ext4_init_security(handle, inode, dir, qstr);
1046 if (err) 1046 if (err)
1047 goto fail_free_drop; 1047 goto fail_free_drop;
1048 1048
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c5..e781b7ea5630 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
2304 2304
2305 dquot_initialize(dir); 2305 dquot_initialize(dir);
2306 2306
2307 /*
2308 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2309 * otherwise has the potential to corrupt the orphan inode list.
2310 */
2311 if (inode->i_nlink == 0)
2312 return -ENOENT;
2313
2314retry: 2307retry:
2315 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2308 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2316 EXT4_INDEX_EXTRA_TRANS_BLOCKS); 2309 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b2..203f9e4a70be 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3415 sb->s_qcop = &ext4_qctl_operations; 3415 sb->s_qcop = &ext4_qctl_operations;
3416 sb->dq_op = &ext4_quota_operations; 3416 sb->dq_op = &ext4_quota_operations;
3417#endif 3417#endif
3418 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3419
3418 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3420 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3419 mutex_init(&sbi->s_orphan_lock); 3421 mutex_init(&sbi->s_orphan_lock);
3420 mutex_init(&sbi->s_resize_lock); 3422 mutex_init(&sbi->s_resize_lock);
@@ -3509,7 +3511,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3509 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); 3511 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
3510 3512
3511no_journal: 3513no_journal:
3512 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 3514 /*
3515 * The maximum number of concurrent works can be high and
3516 * concurrency isn't really necessary. Limit it to 1.
3517 */
3518 EXT4_SB(sb)->dio_unwritten_wq =
3519 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
3513 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3520 if (!EXT4_SB(sb)->dio_unwritten_wq) {
3514 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3521 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
3515 goto failed_mount_wq; 3522 goto failed_mount_wq;
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 1ef16520b950..25b7387ff183 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -145,10 +145,10 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
145 145
146#ifdef CONFIG_EXT4_FS_SECURITY 146#ifdef CONFIG_EXT4_FS_SECURITY
147extern int ext4_init_security(handle_t *handle, struct inode *inode, 147extern int ext4_init_security(handle_t *handle, struct inode *inode,
148 struct inode *dir); 148 struct inode *dir, const struct qstr *qstr);
149#else 149#else
150static inline int ext4_init_security(handle_t *handle, struct inode *inode, 150static inline int ext4_init_security(handle_t *handle, struct inode *inode,
151 struct inode *dir) 151 struct inode *dir, const struct qstr *qstr)
152{ 152{
153 return 0; 153 return 0;
154} 154}
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 9b21268e121c..007c3bfbf094 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -49,14 +49,15 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
49} 49}
50 50
51int 51int
52ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir) 52ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
53 const struct qstr *qstr)
53{ 54{
54 int err; 55 int err;
55 size_t len; 56 size_t len;
56 void *value; 57 void *value;
57 char *name; 58 char *name;
58 59
59 err = security_inode_init_security(inode, dir, &name, &value, &len); 60 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
60 if (err) { 61 if (err) {
61 if (err == -EOPNOTSUPP) 62 if (err == -EOPNOTSUPP)
62 return 0; 63 return 0;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd1..0e277ec4b612 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
757 struct inode *inode = de->d_inode; 757 struct inode *inode = de->d_inode;
758 u32 ipos_h, ipos_m, ipos_l; 758 u32 ipos_h, ipos_m, ipos_l;
759 759
760 if (len < 5) 760 if (len < 5) {
761 *lenp = 5;
761 return 255; /* no room */ 762 return 255; /* no room */
763 }
762 764
763 ipos_h = MSDOS_I(inode)->i_pos >> 8; 765 ipos_h = MSDOS_I(inode)->i_pos >> 8;
764 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; 766 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752babd9..adae3fb7451a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
43 43
44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
45{ 45{
46 if (nd->flags & LOOKUP_RCU) 46 if (nd && nd->flags & LOOKUP_RCU)
47 return -ECHILD; 47 return -ECHILD;
48 48
49 /* This is not negative dentry. Always valid. */ 49 /* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
54 54
55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) 55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
56{ 56{
57 if (nd->flags & LOOKUP_RCU) 57 if (nd && nd->flags & LOOKUP_RCU)
58 return -ECHILD; 58 return -ECHILD;
59 59
60 /* 60 /*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bdc..6c82e5bac039 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
131SYSCALL_DEFINE1(dup, unsigned int, fildes) 131SYSCALL_DEFINE1(dup, unsigned int, fildes)
132{ 132{
133 int ret = -EBADF; 133 int ret = -EBADF;
134 struct file *file = fget(fildes); 134 struct file *file = fget_raw(fildes);
135 135
136 if (file) { 136 if (file) {
137 ret = get_unused_fd(); 137 ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
426 return err; 426 return err;
427} 427}
428 428
429static int check_fcntl_cmd(unsigned cmd)
430{
431 switch (cmd) {
432 case F_DUPFD:
433 case F_DUPFD_CLOEXEC:
434 case F_GETFD:
435 case F_SETFD:
436 case F_GETFL:
437 return 1;
438 }
439 return 0;
440}
441
429SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 442SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
430{ 443{
431 struct file *filp; 444 struct file *filp;
432 long err = -EBADF; 445 long err = -EBADF;
433 446
434 filp = fget(fd); 447 filp = fget_raw(fd);
435 if (!filp) 448 if (!filp)
436 goto out; 449 goto out;
437 450
451 if (unlikely(filp->f_mode & FMODE_PATH)) {
452 if (!check_fcntl_cmd(cmd)) {
453 fput(filp);
454 goto out;
455 }
456 }
457
438 err = security_file_fcntl(filp, cmd, arg); 458 err = security_file_fcntl(filp, cmd, arg);
439 if (err) { 459 if (err) {
440 fput(filp); 460 fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
456 long err; 476 long err;
457 477
458 err = -EBADF; 478 err = -EBADF;
459 filp = fget(fd); 479 filp = fget_raw(fd);
460 if (!filp) 480 if (!filp)
461 goto out; 481 goto out;
462 482
483 if (unlikely(filp->f_mode & FMODE_PATH)) {
484 if (!check_fcntl_cmd(cmd)) {
485 fput(filp);
486 goto out;
487 }
488 }
489
463 err = security_file_fcntl(filp, cmd, arg); 490 err = security_file_fcntl(filp, cmd, arg);
464 if (err) { 491 if (err) {
465 fput(filp); 492 fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
808 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 835 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
809 * is defined as O_NONBLOCK on some platforms and not on others. 836 * is defined as O_NONBLOCK on some platforms and not on others.
810 */ 837 */
811 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 838 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
812 O_RDONLY | O_WRONLY | O_RDWR | 839 O_RDONLY | O_WRONLY | O_RDWR |
813 O_CREAT | O_EXCL | O_NOCTTY | 840 O_CREAT | O_EXCL | O_NOCTTY |
814 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 841 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
815 __O_SYNC | O_DSYNC | FASYNC | 842 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 843 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 844 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 __FMODE_EXEC 845 __FMODE_EXEC | O_PATH
819 )); 846 ));
820 847
821 fasync_cache = kmem_cache_create("fasync_cache", 848 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 000000000000..bf93ad2bee07
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
1#include <linux/syscalls.h>
2#include <linux/slab.h>
3#include <linux/fs.h>
4#include <linux/file.h>
5#include <linux/mount.h>
6#include <linux/namei.h>
7#include <linux/exportfs.h>
8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h>
10#include <asm/uaccess.h>
11#include "internal.h"
12
13static long do_sys_name_to_handle(struct path *path,
14 struct file_handle __user *ufh,
15 int __user *mnt_id)
16{
17 long retval;
18 struct file_handle f_handle;
19 int handle_dwords, handle_bytes;
20 struct file_handle *handle = NULL;
21
22 /*
23 * We need t make sure wether the file system
24 * support decoding of the file handle
25 */
26 if (!path->mnt->mnt_sb->s_export_op ||
27 !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
28 return -EOPNOTSUPP;
29
30 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
31 return -EFAULT;
32
33 if (f_handle.handle_bytes > MAX_HANDLE_SZ)
34 return -EINVAL;
35
36 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
37 GFP_KERNEL);
38 if (!handle)
39 return -ENOMEM;
40
41 /* convert handle size to multiple of sizeof(u32) */
42 handle_dwords = f_handle.handle_bytes >> 2;
43
44 /* we ask for a non connected handle */
45 retval = exportfs_encode_fh(path->dentry,
46 (struct fid *)handle->f_handle,
47 &handle_dwords, 0);
48 handle->handle_type = retval;
49 /* convert handle size to bytes */
50 handle_bytes = handle_dwords * sizeof(u32);
51 handle->handle_bytes = handle_bytes;
52 if ((handle->handle_bytes > f_handle.handle_bytes) ||
53 (retval == 255) || (retval == -ENOSPC)) {
54 /* As per old exportfs_encode_fh documentation
55 * we could return ENOSPC to indicate overflow
56 * But file system returned 255 always. So handle
57 * both the values
58 */
59 /*
60 * set the handle size to zero so we copy only
61 * non variable part of the file_handle
62 */
63 handle_bytes = 0;
64 retval = -EOVERFLOW;
65 } else
66 retval = 0;
67 /* copy the mount id */
68 if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
69 copy_to_user(ufh, handle,
70 sizeof(struct file_handle) + handle_bytes))
71 retval = -EFAULT;
72 kfree(handle);
73 return retval;
74}
75
76/**
77 * sys_name_to_handle_at: convert name to handle
78 * @dfd: directory relative to which name is interpreted if not absolute
79 * @name: name that should be converted to handle.
80 * @handle: resulting file handle
81 * @mnt_id: mount id of the file system containing the file
82 * @flag: flag value to indicate whether to follow symlink or not
83 *
84 * @handle->handle_size indicate the space available to store the
85 * variable part of the file handle in bytes. If there is not
86 * enough space, the field is updated to return the minimum
87 * value required.
88 */
89SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
90 struct file_handle __user *, handle, int __user *, mnt_id,
91 int, flag)
92{
93 struct path path;
94 int lookup_flags;
95 int err;
96
97 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
98 return -EINVAL;
99
100 lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
101 if (flag & AT_EMPTY_PATH)
102 lookup_flags |= LOOKUP_EMPTY;
103 err = user_path_at(dfd, name, lookup_flags, &path);
104 if (!err) {
105 err = do_sys_name_to_handle(&path, handle, mnt_id);
106 path_put(&path);
107 }
108 return err;
109}
110
111static struct vfsmount *get_vfsmount_from_fd(int fd)
112{
113 struct path path;
114
115 if (fd == AT_FDCWD) {
116 struct fs_struct *fs = current->fs;
117 spin_lock(&fs->lock);
118 path = fs->pwd;
119 mntget(path.mnt);
120 spin_unlock(&fs->lock);
121 } else {
122 int fput_needed;
123 struct file *file = fget_light(fd, &fput_needed);
124 if (!file)
125 return ERR_PTR(-EBADF);
126 path = file->f_path;
127 mntget(path.mnt);
128 fput_light(file, fput_needed);
129 }
130 return path.mnt;
131}
132
133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
134{
135 return 1;
136}
137
138static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
139 struct path *path)
140{
141 int retval = 0;
142 int handle_dwords;
143
144 path->mnt = get_vfsmount_from_fd(mountdirfd);
145 if (IS_ERR(path->mnt)) {
146 retval = PTR_ERR(path->mnt);
147 goto out_err;
148 }
149 /* change the handle size to multiple of sizeof(u32) */
150 handle_dwords = handle->handle_bytes >> 2;
151 path->dentry = exportfs_decode_fh(path->mnt,
152 (struct fid *)handle->f_handle,
153 handle_dwords, handle->handle_type,
154 vfs_dentry_acceptable, NULL);
155 if (IS_ERR(path->dentry)) {
156 retval = PTR_ERR(path->dentry);
157 goto out_mnt;
158 }
159 return 0;
160out_mnt:
161 mntput(path->mnt);
162out_err:
163 return retval;
164}
165
166static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
167 struct path *path)
168{
169 int retval = 0;
170 struct file_handle f_handle;
171 struct file_handle *handle = NULL;
172
173 /*
174 * With handle we don't look at the execute bit on the
175 * the directory. Ideally we would like CAP_DAC_SEARCH.
176 * But we don't have that
177 */
178 if (!capable(CAP_DAC_READ_SEARCH)) {
179 retval = -EPERM;
180 goto out_err;
181 }
182 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
183 retval = -EFAULT;
184 goto out_err;
185 }
186 if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
187 (f_handle.handle_bytes == 0)) {
188 retval = -EINVAL;
189 goto out_err;
190 }
191 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
192 GFP_KERNEL);
193 if (!handle) {
194 retval = -ENOMEM;
195 goto out_err;
196 }
197 /* copy the full handle */
198 if (copy_from_user(handle, ufh,
199 sizeof(struct file_handle) +
200 f_handle.handle_bytes)) {
201 retval = -EFAULT;
202 goto out_handle;
203 }
204
205 retval = do_handle_to_path(mountdirfd, handle, path);
206
207out_handle:
208 kfree(handle);
209out_err:
210 return retval;
211}
212
213long do_handle_open(int mountdirfd,
214 struct file_handle __user *ufh, int open_flag)
215{
216 long retval = 0;
217 struct path path;
218 struct file *file;
219 int fd;
220
221 retval = handle_to_path(mountdirfd, ufh, &path);
222 if (retval)
223 return retval;
224
225 fd = get_unused_fd_flags(open_flag);
226 if (fd < 0) {
227 path_put(&path);
228 return fd;
229 }
230 file = file_open_root(path.dentry, path.mnt, "", open_flag);
231 if (IS_ERR(file)) {
232 put_unused_fd(fd);
233 retval = PTR_ERR(file);
234 } else {
235 retval = fd;
236 fsnotify_open(file);
237 fd_install(fd, file);
238 }
239 path_put(&path);
240 return retval;
241}
242
243/**
244 * sys_open_by_handle_at: Open the file handle
245 * @mountdirfd: directory file descriptor
246 * @handle: file handle to be opened
247 * @flag: open flags.
248 *
249 * @mountdirfd indicate the directory file descriptor
250 * of the mount point. file handle is decoded relative
251 * to the vfsmount pointed by the @mountdirfd. @flags
252 * value is same as the open(2) flags.
253 */
254SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
255 struct file_handle __user *, handle,
256 int, flags)
257{
258 long ret;
259
260 if (force_o_largefile())
261 flags |= O_LARGEFILE;
262
263 ret = do_handle_open(mountdirfd, handle, flags);
264 return ret;
265}
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e26..bfab973c6c5b 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -190,7 +190,8 @@ struct file *alloc_file(struct path *path, fmode_t mode,
190 file_take_write(file); 190 file_take_write(file);
191 WARN_ON(mnt_clone_write(path->mnt)); 191 WARN_ON(mnt_clone_write(path->mnt));
192 } 192 }
193 ima_counts_get(file); 193 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
194 i_readcount_inc(path->dentry->d_inode);
194 return file; 195 return file;
195} 196}
196EXPORT_SYMBOL(alloc_file); 197EXPORT_SYMBOL(alloc_file);
@@ -251,6 +252,8 @@ static void __fput(struct file *file)
251 fops_put(file->f_op); 252 fops_put(file->f_op);
252 put_pid(file->f_owner.pid); 253 put_pid(file->f_owner.pid);
253 file_sb_list_del(file); 254 file_sb_list_del(file);
255 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
256 i_readcount_dec(inode);
254 if (file->f_mode & FMODE_WRITE) 257 if (file->f_mode & FMODE_WRITE)
255 drop_file_write_access(file); 258 drop_file_write_access(file);
256 file->f_path.dentry = NULL; 259 file->f_path.dentry = NULL;
@@ -276,11 +279,10 @@ struct file *fget(unsigned int fd)
276 rcu_read_lock(); 279 rcu_read_lock();
277 file = fcheck_files(files, fd); 280 file = fcheck_files(files, fd);
278 if (file) { 281 if (file) {
279 if (!atomic_long_inc_not_zero(&file->f_count)) { 282 /* File object ref couldn't be taken */
280 /* File object ref couldn't be taken */ 283 if (file->f_mode & FMODE_PATH ||
281 rcu_read_unlock(); 284 !atomic_long_inc_not_zero(&file->f_count))
282 return NULL; 285 file = NULL;
283 }
284 } 286 }
285 rcu_read_unlock(); 287 rcu_read_unlock();
286 288
@@ -289,6 +291,25 @@ struct file *fget(unsigned int fd)
289 291
290EXPORT_SYMBOL(fget); 292EXPORT_SYMBOL(fget);
291 293
294struct file *fget_raw(unsigned int fd)
295{
296 struct file *file;
297 struct files_struct *files = current->files;
298
299 rcu_read_lock();
300 file = fcheck_files(files, fd);
301 if (file) {
302 /* File object ref couldn't be taken */
303 if (!atomic_long_inc_not_zero(&file->f_count))
304 file = NULL;
305 }
306 rcu_read_unlock();
307
308 return file;
309}
310
311EXPORT_SYMBOL(fget_raw);
312
292/* 313/*
293 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 314 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
294 * 315 *
@@ -313,6 +334,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
313 *fput_needed = 0; 334 *fput_needed = 0;
314 if (atomic_read(&files->count) == 1) { 335 if (atomic_read(&files->count) == 1) {
315 file = fcheck_files(files, fd); 336 file = fcheck_files(files, fd);
337 if (file && (file->f_mode & FMODE_PATH))
338 file = NULL;
339 } else {
340 rcu_read_lock();
341 file = fcheck_files(files, fd);
342 if (file) {
343 if (!(file->f_mode & FMODE_PATH) &&
344 atomic_long_inc_not_zero(&file->f_count))
345 *fput_needed = 1;
346 else
347 /* Didn't get the reference, someone's freed */
348 file = NULL;
349 }
350 rcu_read_unlock();
351 }
352
353 return file;
354}
355
356struct file *fget_raw_light(unsigned int fd, int *fput_needed)
357{
358 struct file *file;
359 struct files_struct *files = current->files;
360
361 *fput_needed = 0;
362 if (atomic_read(&files->count) == 1) {
363 file = fcheck_files(files, fd);
316 } else { 364 } else {
317 rcu_read_lock(); 365 rcu_read_lock();
318 file = fcheck_files(files, fd); 366 file = fcheck_files(files, fd);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed80..8bd0ef9286c3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,7 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode; 159 struct inode *inode;
160 160
161 if (nd->flags & LOOKUP_RCU) 161 if (nd && nd->flags & LOOKUP_RCU)
162 return -ECHILD; 162 return -ECHILD;
163 163
164 inode = entry->d_inode; 164 inode = entry->d_inode;
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1283 if (err) 1283 if (err)
1284 return err; 1284 return err;
1285 1285
1286 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) 1286 if (attr->ia_valid & ATTR_OPEN) {
1287 return 0; 1287 if (fc->atomic_o_trunc)
1288 return 0;
1289 file = NULL;
1290 }
1288 1291
1289 if (attr->ia_valid & ATTR_SIZE) 1292 if (attr->ia_valid & ATTR_SIZE)
1290 is_truncate = true; 1293 is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c826..9e0832dbb1e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
86 return ff; 86 return ff;
87} 87}
88 88
89static void fuse_release_async(struct work_struct *work)
90{
91 struct fuse_req *req;
92 struct fuse_conn *fc;
93 struct path path;
94
95 req = container_of(work, struct fuse_req, misc.release.work);
96 path = req->misc.release.path;
97 fc = get_fuse_conn(path.dentry->d_inode);
98
99 fuse_put_request(fc, req);
100 path_put(&path);
101}
102
89static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 103static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
90{ 104{
91 path_put(&req->misc.release.path); 105 if (fc->destroy_req) {
106 /*
107 * If this is a fuseblk mount, then it's possible that
108 * releasing the path will result in releasing the
109 * super block and sending the DESTROY request. If
110 * the server is single threaded, this would hang.
111 * For this reason do the path_put() in a separate
112 * thread.
113 */
114 atomic_inc(&req->count);
115 INIT_WORK(&req->misc.release.work, fuse_release_async);
116 schedule_work(&req->misc.release.work);
117 } else {
118 path_put(&req->misc.release.path);
119 }
92} 120}
93 121
94static void fuse_file_put(struct fuse_file *ff) 122static void fuse_file_put(struct fuse_file *ff, bool sync)
95{ 123{
96 if (atomic_dec_and_test(&ff->count)) { 124 if (atomic_dec_and_test(&ff->count)) {
97 struct fuse_req *req = ff->reserved_req; 125 struct fuse_req *req = ff->reserved_req;
98 126
99 req->end = fuse_release_end; 127 if (sync) {
100 fuse_request_send_background(ff->fc, req); 128 fuse_request_send(ff->fc, req);
129 path_put(&req->misc.release.path);
130 fuse_put_request(ff->fc, req);
131 } else {
132 req->end = fuse_release_end;
133 fuse_request_send_background(ff->fc, req);
134 }
101 kfree(ff); 135 kfree(ff);
102 } 136 }
103} 137}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
219 * Normally this will send the RELEASE request, however if 253 * Normally this will send the RELEASE request, however if
220 * some asynchronous READ or WRITE requests are outstanding, 254 * some asynchronous READ or WRITE requests are outstanding,
221 * the sending will be delayed. 255 * the sending will be delayed.
256 *
257 * Make the release synchronous if this is a fuseblk mount,
258 * synchronous RELEASE is allowed (and desirable) in this case
259 * because the server can be trusted not to screw up.
222 */ 260 */
223 fuse_file_put(ff); 261 fuse_file_put(ff, ff->fc->destroy_req != NULL);
224} 262}
225 263
226static int fuse_open(struct inode *inode, struct file *file) 264static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
558 page_cache_release(page); 596 page_cache_release(page);
559 } 597 }
560 if (req->ff) 598 if (req->ff)
561 fuse_file_put(req->ff); 599 fuse_file_put(req->ff, false);
562} 600}
563 601
564static void fuse_send_readpages(struct fuse_req *req, struct file *file) 602static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1137static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1175static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1138{ 1176{
1139 __free_page(req->pages[0]); 1177 __free_page(req->pages[0]);
1140 fuse_file_put(req->ff); 1178 fuse_file_put(req->ff, false);
1141} 1179}
1142 1180
1143static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1181static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e9..d4286947bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22#include <linux/rbtree.h> 22#include <linux/rbtree.h>
23#include <linux/poll.h> 23#include <linux/poll.h>
24#include <linux/workqueue.h>
24 25
25/** Max number of pages that can be used in a single read request */ 26/** Max number of pages that can be used in a single read request */
26#define FUSE_MAX_PAGES_PER_REQ 32 27#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
262 /** Data for asynchronous requests */ 263 /** Data for asynchronous requests */
263 union { 264 union {
264 struct { 265 struct {
265 struct fuse_release_in in; 266 union {
267 struct fuse_release_in in;
268 struct work_struct work;
269 };
266 struct path path; 270 struct path path;
267 } release; 271 } release;
268 struct fuse_init_in init_in; 272 struct fuse_init_in init_in;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd1..051b1a084528 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
637 u64 nodeid; 637 u64 nodeid;
638 u32 generation; 638 u32 generation;
639 639
640 if (*max_len < len) 640 if (*max_len < len) {
641 *max_len = len;
641 return 255; 642 return 255;
643 }
642 644
643 nodeid = get_fuse_inode(inode)->nodeid; 645 nodeid = get_fuse_inode(inode)->nodeid;
644 generation = inode->i_generation; 646 generation = inode->i_generation;
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7118f1a780a9..cbc07155b1a0 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -80,8 +80,11 @@ int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
80 struct posix_acl *acl; 80 struct posix_acl *acl;
81 int error; 81 int error;
82 82
83 if (flags & IPERM_FLAG_RCU) 83 if (flags & IPERM_FLAG_RCU) {
84 return -ECHILD; 84 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
85 return -ECHILD;
86 return -EAGAIN;
87 }
85 88
86 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); 89 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
87 if (IS_ERR(acl)) 90 if (IS_ERR(acl))
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4f36f8832b9b..aad77e4f61b5 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -695,6 +695,7 @@ out:
695 if (error == 0) 695 if (error == 0)
696 return 0; 696 return 0;
697 697
698 unlock_page(page);
698 page_cache_release(page); 699 page_cache_release(page);
699 700
700 gfs2_trans_end(sdp); 701 gfs2_trans_end(sdp);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3c4039d5eef1..ef3dc4b9fae2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -21,6 +21,7 @@
21#include "meta_io.h" 21#include "meta_io.h"
22#include "quota.h" 22#include "quota.h"
23#include "rgrp.h" 23#include "rgrp.h"
24#include "super.h"
24#include "trans.h" 25#include "trans.h"
25#include "dir.h" 26#include "dir.h"
26#include "util.h" 27#include "util.h"
@@ -757,7 +758,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
757 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 758 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
758 struct gfs2_rgrp_list rlist; 759 struct gfs2_rgrp_list rlist;
759 u64 bn, bstart; 760 u64 bn, bstart;
760 u32 blen; 761 u32 blen, btotal;
761 __be64 *p; 762 __be64 *p;
762 unsigned int rg_blocks = 0; 763 unsigned int rg_blocks = 0;
763 int metadata; 764 int metadata;
@@ -839,6 +840,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
839 840
840 bstart = 0; 841 bstart = 0;
841 blen = 0; 842 blen = 0;
843 btotal = 0;
842 844
843 for (p = top; p < bottom; p++) { 845 for (p = top; p < bottom; p++) {
844 if (!*p) 846 if (!*p)
@@ -851,9 +853,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
851 else { 853 else {
852 if (bstart) { 854 if (bstart) {
853 if (metadata) 855 if (metadata)
854 gfs2_free_meta(ip, bstart, blen); 856 __gfs2_free_meta(ip, bstart, blen);
855 else 857 else
856 gfs2_free_data(ip, bstart, blen); 858 __gfs2_free_data(ip, bstart, blen);
859
860 btotal += blen;
857 } 861 }
858 862
859 bstart = bn; 863 bstart = bn;
@@ -865,11 +869,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
865 } 869 }
866 if (bstart) { 870 if (bstart) {
867 if (metadata) 871 if (metadata)
868 gfs2_free_meta(ip, bstart, blen); 872 __gfs2_free_meta(ip, bstart, blen);
869 else 873 else
870 gfs2_free_data(ip, bstart, blen); 874 __gfs2_free_data(ip, bstart, blen);
875
876 btotal += blen;
871 } 877 }
872 878
879 gfs2_statfs_change(sdp, 0, +btotal, 0);
880 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
881 ip->i_inode.i_gid);
882
873 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 883 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
874 884
875 gfs2_dinode_out(ip, dibh->b_data); 885 gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a456338b873..0da8da2c991d 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
44 int error; 44 int error;
45 int had_lock = 0; 45 int had_lock = 0;
46 46
47 if (nd->flags & LOOKUP_RCU) 47 if (nd && nd->flags & LOOKUP_RCU)
48 return -ECHILD; 48 return -ECHILD;
49 49
50 parent = dget_parent(dentry); 50 parent = dget_parent(dentry);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f9..b5a5e60df0d5 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
36 struct super_block *sb = inode->i_sb; 36 struct super_block *sb = inode->i_sb;
37 struct gfs2_inode *ip = GFS2_I(inode); 37 struct gfs2_inode *ip = GFS2_I(inode);
38 38
39 if (*len < GFS2_SMALL_FH_SIZE || 39 if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
40 (connectable && *len < GFS2_LARGE_FH_SIZE)) 40 *len = GFS2_LARGE_FH_SIZE;
41 return 255; 41 return 255;
42 } else if (*len < GFS2_SMALL_FH_SIZE) {
43 *len = GFS2_SMALL_FH_SIZE;
44 return 255;
45 }
42 46
43 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); 47 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
44 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); 48 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7cfdcb913363..4074b952b059 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -448,15 +448,20 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
448{ 448{
449 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 449 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
450 450
451 if (!(file->f_flags & O_NOATIME)) { 451 if (!(file->f_flags & O_NOATIME) &&
452 !IS_NOATIME(&ip->i_inode)) {
452 struct gfs2_holder i_gh; 453 struct gfs2_holder i_gh;
453 int error; 454 int error;
454 455
455 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 456 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
456 error = gfs2_glock_nq(&i_gh); 457 error = gfs2_glock_nq(&i_gh);
457 file_accessed(file); 458 if (error == 0) {
458 if (error == 0) 459 file_accessed(file);
459 gfs2_glock_dq_uninit(&i_gh); 460 gfs2_glock_dq(&i_gh);
461 }
462 gfs2_holder_uninit(&i_gh);
463 if (error)
464 return error;
460 } 465 }
461 vma->vm_ops = &gfs2_vm_ops; 466 vma->vm_ops = &gfs2_vm_ops;
462 vma->vm_flags |= VM_CAN_NONLINEAR; 467 vma->vm_flags |= VM_CAN_NONLINEAR;
@@ -617,8 +622,7 @@ static void empty_write_end(struct page *page, unsigned from,
617{ 622{
618 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 623 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
619 624
620 page_zero_new_buffers(page, from, to); 625 zero_user(page, from, to-from);
621 flush_dcache_page(page);
622 mark_page_accessed(page); 626 mark_page_accessed(page);
623 627
624 if (!gfs2_is_writeback(ip)) 628 if (!gfs2_is_writeback(ip))
@@ -627,36 +631,43 @@ static void empty_write_end(struct page *page, unsigned from,
627 block_commit_write(page, from, to); 631 block_commit_write(page, from, to);
628} 632}
629 633
630static int write_empty_blocks(struct page *page, unsigned from, unsigned to) 634static int needs_empty_write(sector_t block, struct inode *inode)
631{ 635{
632 unsigned start, end, next;
633 struct buffer_head *bh, *head;
634 int error; 636 int error;
637 struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
635 638
636 if (!page_has_buffers(page)) { 639 bh_map.b_size = 1 << inode->i_blkbits;
637 error = __block_write_begin(page, from, to - from, gfs2_block_map); 640 error = gfs2_block_map(inode, block, &bh_map, 0);
638 if (unlikely(error)) 641 if (unlikely(error))
639 return error; 642 return error;
643 return !buffer_mapped(&bh_map);
644}
640 645
641 empty_write_end(page, from, to); 646static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
642 return 0; 647{
643 } 648 struct inode *inode = page->mapping->host;
649 unsigned start, end, next, blksize;
650 sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
651 int ret;
644 652
645 bh = head = page_buffers(page); 653 blksize = 1 << inode->i_blkbits;
646 next = end = 0; 654 next = end = 0;
647 while (next < from) { 655 while (next < from) {
648 next += bh->b_size; 656 next += blksize;
649 bh = bh->b_this_page; 657 block++;
650 } 658 }
651 start = next; 659 start = next;
652 do { 660 do {
653 next += bh->b_size; 661 next += blksize;
654 if (buffer_mapped(bh)) { 662 ret = needs_empty_write(block, inode);
663 if (unlikely(ret < 0))
664 return ret;
665 if (ret == 0) {
655 if (end) { 666 if (end) {
656 error = __block_write_begin(page, start, end - start, 667 ret = __block_write_begin(page, start, end - start,
657 gfs2_block_map); 668 gfs2_block_map);
658 if (unlikely(error)) 669 if (unlikely(ret))
659 return error; 670 return ret;
660 empty_write_end(page, start, end); 671 empty_write_end(page, start, end);
661 end = 0; 672 end = 0;
662 } 673 }
@@ -664,13 +675,13 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
664 } 675 }
665 else 676 else
666 end = next; 677 end = next;
667 bh = bh->b_this_page; 678 block++;
668 } while (next < to); 679 } while (next < to);
669 680
670 if (end) { 681 if (end) {
671 error = __block_write_begin(page, start, end - start, gfs2_block_map); 682 ret = __block_write_begin(page, start, end - start, gfs2_block_map);
672 if (unlikely(error)) 683 if (unlikely(ret))
673 return error; 684 return ret;
674 empty_write_end(page, start, end); 685 empty_write_end(page, start, end);
675 } 686 }
676 687
@@ -976,8 +987,10 @@ static void do_unflock(struct file *file, struct file_lock *fl)
976 987
977 mutex_lock(&fp->f_fl_mutex); 988 mutex_lock(&fp->f_fl_mutex);
978 flock_lock_file_wait(file, fl); 989 flock_lock_file_wait(file, fl);
979 if (fl_gh->gh_gl) 990 if (fl_gh->gh_gl) {
980 gfs2_glock_dq_uninit(fl_gh); 991 gfs2_glock_dq_wait(fl_gh);
992 gfs2_holder_uninit(fl_gh);
993 }
981 mutex_unlock(&fp->f_fl_mutex); 994 mutex_unlock(&fp->f_fl_mutex);
982} 995}
983 996
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 08a8beb152e6..e2431313491f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -26,6 +26,9 @@
26#include <linux/freezer.h> 26#include <linux/freezer.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/jiffies.h> 28#include <linux/jiffies.h>
29#include <linux/rcupdate.h>
30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h>
29 32
30#include "gfs2.h" 33#include "gfs2.h"
31#include "incore.h" 34#include "incore.h"
@@ -41,10 +44,6 @@
41#define CREATE_TRACE_POINTS 44#define CREATE_TRACE_POINTS
42#include "trace_gfs2.h" 45#include "trace_gfs2.h"
43 46
44struct gfs2_gl_hash_bucket {
45 struct hlist_head hb_list;
46};
47
48struct gfs2_glock_iter { 47struct gfs2_glock_iter {
49 int hash; /* hash bucket index */ 48 int hash; /* hash bucket index */
50 struct gfs2_sbd *sdp; /* incore superblock */ 49 struct gfs2_sbd *sdp; /* incore superblock */
@@ -54,7 +53,6 @@ struct gfs2_glock_iter {
54 53
55typedef void (*glock_examiner) (struct gfs2_glock * gl); 54typedef void (*glock_examiner) (struct gfs2_glock * gl);
56 55
57static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
58static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); 56static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
59#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) 57#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
60static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 58static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
@@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock);
70#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 68#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
71#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) 69#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
72 70
73static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; 71static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
74static struct dentry *gfs2_root; 72static struct dentry *gfs2_root;
75 73
76/*
77 * Despite what you might think, the numbers below are not arbitrary :-)
78 * They are taken from the ipv4 routing hash code, which is well tested
79 * and thus should be nearly optimal. Later on we might tweek the numbers
80 * but for now this should be fine.
81 *
82 * The reason for putting the locks in a separate array from the list heads
83 * is that we can have fewer locks than list heads and save memory. We use
84 * the same hash function for both, but with a different hash mask.
85 */
86#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
87 defined(CONFIG_PROVE_LOCKING)
88
89#ifdef CONFIG_LOCKDEP
90# define GL_HASH_LOCK_SZ 256
91#else
92# if NR_CPUS >= 32
93# define GL_HASH_LOCK_SZ 4096
94# elif NR_CPUS >= 16
95# define GL_HASH_LOCK_SZ 2048
96# elif NR_CPUS >= 8
97# define GL_HASH_LOCK_SZ 1024
98# elif NR_CPUS >= 4
99# define GL_HASH_LOCK_SZ 512
100# else
101# define GL_HASH_LOCK_SZ 256
102# endif
103#endif
104
105/* We never want more locks than chains */
106#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
107# undef GL_HASH_LOCK_SZ
108# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
109#endif
110
111static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
112
113static inline rwlock_t *gl_lock_addr(unsigned int x)
114{
115 return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
116}
117#else /* not SMP, so no spinlocks required */
118static inline rwlock_t *gl_lock_addr(unsigned int x)
119{
120 return NULL;
121}
122#endif
123
124/** 74/**
125 * gl_hash() - Turn glock number into hash bucket number 75 * gl_hash() - Turn glock number into hash bucket number
126 * @lock: The glock number 76 * @lock: The glock number
@@ -141,25 +91,35 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
141 return h; 91 return h;
142} 92}
143 93
144/** 94static inline void spin_lock_bucket(unsigned int hash)
145 * glock_free() - Perform a few checks and then release struct gfs2_glock 95{
146 * @gl: The glock to release 96 struct hlist_bl_head *bl = &gl_hash_table[hash];
147 * 97 bit_spin_lock(0, (unsigned long *)bl);
148 * Also calls lock module to release its internal structure for this glock. 98}
149 *
150 */
151 99
152static void glock_free(struct gfs2_glock *gl) 100static inline void spin_unlock_bucket(unsigned int hash)
101{
102 struct hlist_bl_head *bl = &gl_hash_table[hash];
103 __bit_spin_unlock(0, (unsigned long *)bl);
104}
105
106static void gfs2_glock_dealloc(struct rcu_head *rcu)
107{
108 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
109
110 if (gl->gl_ops->go_flags & GLOF_ASPACE)
111 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
112 else
113 kmem_cache_free(gfs2_glock_cachep, gl);
114}
115
116void gfs2_glock_free(struct gfs2_glock *gl)
153{ 117{
154 struct gfs2_sbd *sdp = gl->gl_sbd; 118 struct gfs2_sbd *sdp = gl->gl_sbd;
155 struct address_space *mapping = gfs2_glock2aspace(gl);
156 struct kmem_cache *cachep = gfs2_glock_cachep;
157 119
158 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 120 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
159 trace_gfs2_glock_put(gl); 121 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
160 if (mapping) 122 wake_up(&sdp->sd_glock_wait);
161 cachep = gfs2_glock_aspace_cachep;
162 sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl);
163} 123}
164 124
165/** 125/**
@@ -185,34 +145,49 @@ static int demote_ok(const struct gfs2_glock *gl)
185{ 145{
186 const struct gfs2_glock_operations *glops = gl->gl_ops; 146 const struct gfs2_glock_operations *glops = gl->gl_ops;
187 147
148 /* assert_spin_locked(&gl->gl_spin); */
149
188 if (gl->gl_state == LM_ST_UNLOCKED) 150 if (gl->gl_state == LM_ST_UNLOCKED)
189 return 0; 151 return 0;
190 if (!list_empty(&gl->gl_holders)) 152 if (test_bit(GLF_LFLUSH, &gl->gl_flags))
153 return 0;
154 if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
155 !list_empty(&gl->gl_holders))
191 return 0; 156 return 0;
192 if (glops->go_demote_ok) 157 if (glops->go_demote_ok)
193 return glops->go_demote_ok(gl); 158 return glops->go_demote_ok(gl);
194 return 1; 159 return 1;
195} 160}
196 161
162
197/** 163/**
198 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 164 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
199 * @gl: the glock 165 * @gl: the glock
200 * 166 *
167 * If the glock is demotable, then we add it (or move it) to the end
168 * of the glock LRU list.
201 */ 169 */
202 170
203static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 171static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
204{ 172{
205 int may_reclaim; 173 if (demote_ok(gl)) {
206 may_reclaim = (demote_ok(gl) && 174 spin_lock(&lru_lock);
207 (atomic_read(&gl->gl_ref) == 1 || 175
208 (gl->gl_name.ln_type == LM_TYPE_INODE && 176 if (!list_empty(&gl->gl_lru))
209 atomic_read(&gl->gl_ref) <= 2))); 177 list_del_init(&gl->gl_lru);
210 spin_lock(&lru_lock); 178 else
211 if (list_empty(&gl->gl_lru) && may_reclaim) { 179 atomic_inc(&lru_count);
180
212 list_add_tail(&gl->gl_lru, &lru_list); 181 list_add_tail(&gl->gl_lru, &lru_list);
213 atomic_inc(&lru_count); 182 spin_unlock(&lru_lock);
214 } 183 }
215 spin_unlock(&lru_lock); 184}
185
186void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
187{
188 spin_lock(&gl->gl_spin);
189 __gfs2_glock_schedule_for_reclaim(gl);
190 spin_unlock(&gl->gl_spin);
216} 191}
217 192
218/** 193/**
@@ -227,7 +202,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
227{ 202{
228 if (atomic_dec_and_test(&gl->gl_ref)) 203 if (atomic_dec_and_test(&gl->gl_ref))
229 GLOCK_BUG_ON(gl, 1); 204 GLOCK_BUG_ON(gl, 1);
230 gfs2_glock_schedule_for_reclaim(gl);
231} 205}
232 206
233/** 207/**
@@ -236,30 +210,26 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
236 * 210 *
237 */ 211 */
238 212
239int gfs2_glock_put(struct gfs2_glock *gl) 213void gfs2_glock_put(struct gfs2_glock *gl)
240{ 214{
241 int rv = 0; 215 struct gfs2_sbd *sdp = gl->gl_sbd;
216 struct address_space *mapping = gfs2_glock2aspace(gl);
242 217
243 write_lock(gl_lock_addr(gl->gl_hash)); 218 if (atomic_dec_and_test(&gl->gl_ref)) {
244 if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { 219 spin_lock_bucket(gl->gl_hash);
245 hlist_del(&gl->gl_list); 220 hlist_bl_del_rcu(&gl->gl_list);
221 spin_unlock_bucket(gl->gl_hash);
222 spin_lock(&lru_lock);
246 if (!list_empty(&gl->gl_lru)) { 223 if (!list_empty(&gl->gl_lru)) {
247 list_del_init(&gl->gl_lru); 224 list_del_init(&gl->gl_lru);
248 atomic_dec(&lru_count); 225 atomic_dec(&lru_count);
249 } 226 }
250 spin_unlock(&lru_lock); 227 spin_unlock(&lru_lock);
251 write_unlock(gl_lock_addr(gl->gl_hash));
252 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 228 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
253 glock_free(gl); 229 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
254 rv = 1; 230 trace_gfs2_glock_put(gl);
255 goto out; 231 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
256 } 232 }
257 spin_lock(&gl->gl_spin);
258 gfs2_glock_schedule_for_reclaim(gl);
259 spin_unlock(&gl->gl_spin);
260 write_unlock(gl_lock_addr(gl->gl_hash));
261out:
262 return rv;
263} 233}
264 234
265/** 235/**
@@ -275,17 +245,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
275 const struct lm_lockname *name) 245 const struct lm_lockname *name)
276{ 246{
277 struct gfs2_glock *gl; 247 struct gfs2_glock *gl;
278 struct hlist_node *h; 248 struct hlist_bl_node *h;
279 249
280 hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { 250 hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
281 if (!lm_name_equal(&gl->gl_name, name)) 251 if (!lm_name_equal(&gl->gl_name, name))
282 continue; 252 continue;
283 if (gl->gl_sbd != sdp) 253 if (gl->gl_sbd != sdp)
284 continue; 254 continue;
285 255 if (atomic_inc_not_zero(&gl->gl_ref))
286 atomic_inc(&gl->gl_ref); 256 return gl;
287
288 return gl;
289 } 257 }
290 258
291 return NULL; 259 return NULL;
@@ -743,10 +711,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
743 struct gfs2_glock *gl, *tmp; 711 struct gfs2_glock *gl, *tmp;
744 unsigned int hash = gl_hash(sdp, &name); 712 unsigned int hash = gl_hash(sdp, &name);
745 struct address_space *mapping; 713 struct address_space *mapping;
714 struct kmem_cache *cachep;
746 715
747 read_lock(gl_lock_addr(hash)); 716 rcu_read_lock();
748 gl = search_bucket(hash, sdp, &name); 717 gl = search_bucket(hash, sdp, &name);
749 read_unlock(gl_lock_addr(hash)); 718 rcu_read_unlock();
750 719
751 *glp = gl; 720 *glp = gl;
752 if (gl) 721 if (gl)
@@ -755,9 +724,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
755 return -ENOENT; 724 return -ENOENT;
756 725
757 if (glops->go_flags & GLOF_ASPACE) 726 if (glops->go_flags & GLOF_ASPACE)
758 gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL); 727 cachep = gfs2_glock_aspace_cachep;
759 else 728 else
760 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); 729 cachep = gfs2_glock_cachep;
730 gl = kmem_cache_alloc(cachep, GFP_KERNEL);
761 if (!gl) 731 if (!gl)
762 return -ENOMEM; 732 return -ENOMEM;
763 733
@@ -790,15 +760,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
790 mapping->writeback_index = 0; 760 mapping->writeback_index = 0;
791 } 761 }
792 762
793 write_lock(gl_lock_addr(hash)); 763 spin_lock_bucket(hash);
794 tmp = search_bucket(hash, sdp, &name); 764 tmp = search_bucket(hash, sdp, &name);
795 if (tmp) { 765 if (tmp) {
796 write_unlock(gl_lock_addr(hash)); 766 spin_unlock_bucket(hash);
797 glock_free(gl); 767 kmem_cache_free(cachep, gl);
768 atomic_dec(&sdp->sd_glock_disposal);
798 gl = tmp; 769 gl = tmp;
799 } else { 770 } else {
800 hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); 771 hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
801 write_unlock(gl_lock_addr(hash)); 772 spin_unlock_bucket(hash);
802 } 773 }
803 774
804 *glp = gl; 775 *glp = gl;
@@ -1007,13 +978,13 @@ fail:
1007 insert_pt = &gh2->gh_list; 978 insert_pt = &gh2->gh_list;
1008 } 979 }
1009 set_bit(GLF_QUEUED, &gl->gl_flags); 980 set_bit(GLF_QUEUED, &gl->gl_flags);
981 trace_gfs2_glock_queue(gh, 1);
1010 if (likely(insert_pt == NULL)) { 982 if (likely(insert_pt == NULL)) {
1011 list_add_tail(&gh->gh_list, &gl->gl_holders); 983 list_add_tail(&gh->gh_list, &gl->gl_holders);
1012 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) 984 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
1013 goto do_cancel; 985 goto do_cancel;
1014 return; 986 return;
1015 } 987 }
1016 trace_gfs2_glock_queue(gh, 1);
1017 list_add_tail(&gh->gh_list, insert_pt); 988 list_add_tail(&gh->gh_list, insert_pt);
1018do_cancel: 989do_cancel:
1019 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); 990 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1113,6 +1084,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1113 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1084 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1114 fast_path = 1; 1085 fast_path = 1;
1115 } 1086 }
1087 __gfs2_glock_schedule_for_reclaim(gl);
1116 trace_gfs2_glock_queue(gh, 0); 1088 trace_gfs2_glock_queue(gh, 0);
1117 spin_unlock(&gl->gl_spin); 1089 spin_unlock(&gl->gl_spin);
1118 if (likely(fast_path)) 1090 if (likely(fast_path))
@@ -1276,10 +1248,8 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1276 1248
1277void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1249void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1278{ 1250{
1279 unsigned int x; 1251 while (num_gh--)
1280 1252 gfs2_glock_dq(&ghs[num_gh]);
1281 for (x = 0; x < num_gh; x++)
1282 gfs2_glock_dq(&ghs[x]);
1283} 1253}
1284 1254
1285/** 1255/**
@@ -1291,10 +1261,8 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1291 1261
1292void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) 1262void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1293{ 1263{
1294 unsigned int x; 1264 while (num_gh--)
1295 1265 gfs2_glock_dq_uninit(&ghs[num_gh]);
1296 for (x = 0; x < num_gh; x++)
1297 gfs2_glock_dq_uninit(&ghs[x]);
1298} 1266}
1299 1267
1300void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) 1268void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
@@ -1440,42 +1408,30 @@ static struct shrinker glock_shrinker = {
1440 * @sdp: the filesystem 1408 * @sdp: the filesystem
1441 * @bucket: the bucket 1409 * @bucket: the bucket
1442 * 1410 *
1443 * Returns: 1 if the bucket has entries
1444 */ 1411 */
1445 1412
1446static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, 1413static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
1447 unsigned int hash) 1414 unsigned int hash)
1448{ 1415{
1449 struct gfs2_glock *gl, *prev = NULL; 1416 struct gfs2_glock *gl;
1450 int has_entries = 0; 1417 struct hlist_bl_head *head = &gl_hash_table[hash];
1451 struct hlist_head *head = &gl_hash_table[hash].hb_list; 1418 struct hlist_bl_node *pos;
1452 1419
1453 read_lock(gl_lock_addr(hash)); 1420 rcu_read_lock();
1454 /* Can't use hlist_for_each_entry - don't want prefetch here */ 1421 hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
1455 if (hlist_empty(head)) 1422 if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref))
1456 goto out;
1457 gl = list_entry(head->first, struct gfs2_glock, gl_list);
1458 while(1) {
1459 if (!sdp || gl->gl_sbd == sdp) {
1460 gfs2_glock_hold(gl);
1461 read_unlock(gl_lock_addr(hash));
1462 if (prev)
1463 gfs2_glock_put(prev);
1464 prev = gl;
1465 examiner(gl); 1423 examiner(gl);
1466 has_entries = 1;
1467 read_lock(gl_lock_addr(hash));
1468 }
1469 if (gl->gl_list.next == NULL)
1470 break;
1471 gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
1472 } 1424 }
1473out: 1425 rcu_read_unlock();
1474 read_unlock(gl_lock_addr(hash));
1475 if (prev)
1476 gfs2_glock_put(prev);
1477 cond_resched(); 1426 cond_resched();
1478 return has_entries; 1427}
1428
1429static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1430{
1431 unsigned x;
1432
1433 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1434 examine_bucket(examiner, sdp, x);
1479} 1435}
1480 1436
1481 1437
@@ -1529,10 +1485,21 @@ static void clear_glock(struct gfs2_glock *gl)
1529 1485
1530void gfs2_glock_thaw(struct gfs2_sbd *sdp) 1486void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1531{ 1487{
1532 unsigned x; 1488 glock_hash_walk(thaw_glock, sdp);
1489}
1533 1490
1534 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) 1491static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1535 examine_bucket(thaw_glock, sdp, x); 1492{
1493 int ret;
1494 spin_lock(&gl->gl_spin);
1495 ret = __dump_glock(seq, gl);
1496 spin_unlock(&gl->gl_spin);
1497 return ret;
1498}
1499
1500static void dump_glock_func(struct gfs2_glock *gl)
1501{
1502 dump_glock(NULL, gl);
1536} 1503}
1537 1504
1538/** 1505/**
@@ -1545,13 +1512,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1545 1512
1546void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 1513void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1547{ 1514{
1548 unsigned int x; 1515 glock_hash_walk(clear_glock, sdp);
1549
1550 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1551 examine_bucket(clear_glock, sdp, x);
1552 flush_workqueue(glock_workqueue); 1516 flush_workqueue(glock_workqueue);
1553 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); 1517 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
1554 gfs2_dump_lockstate(sdp); 1518 glock_hash_walk(dump_glock_func, sdp);
1555} 1519}
1556 1520
1557void gfs2_glock_finish_truncate(struct gfs2_inode *ip) 1521void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
@@ -1717,73 +1681,22 @@ out:
1717 return error; 1681 return error;
1718} 1682}
1719 1683
1720static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1721{
1722 int ret;
1723 spin_lock(&gl->gl_spin);
1724 ret = __dump_glock(seq, gl);
1725 spin_unlock(&gl->gl_spin);
1726 return ret;
1727}
1728 1684
1729/**
1730 * gfs2_dump_lockstate - print out the current lockstate
1731 * @sdp: the filesystem
1732 * @ub: the buffer to copy the information into
1733 *
1734 * If @ub is NULL, dump the lockstate to the console.
1735 *
1736 */
1737
1738static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
1739{
1740 struct gfs2_glock *gl;
1741 struct hlist_node *h;
1742 unsigned int x;
1743 int error = 0;
1744
1745 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
1746
1747 read_lock(gl_lock_addr(x));
1748
1749 hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
1750 if (gl->gl_sbd != sdp)
1751 continue;
1752
1753 error = dump_glock(NULL, gl);
1754 if (error)
1755 break;
1756 }
1757
1758 read_unlock(gl_lock_addr(x));
1759
1760 if (error)
1761 break;
1762 }
1763
1764
1765 return error;
1766}
1767 1685
1768 1686
1769int __init gfs2_glock_init(void) 1687int __init gfs2_glock_init(void)
1770{ 1688{
1771 unsigned i; 1689 unsigned i;
1772 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { 1690 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
1773 INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); 1691 INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
1774 }
1775#ifdef GL_HASH_LOCK_SZ
1776 for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
1777 rwlock_init(&gl_hash_locks[i]);
1778 } 1692 }
1779#endif
1780 1693
1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 1694 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1782 WQ_HIGHPRI | WQ_FREEZEABLE, 0); 1695 WQ_HIGHPRI | WQ_FREEZABLE, 0);
1783 if (IS_ERR(glock_workqueue)) 1696 if (IS_ERR(glock_workqueue))
1784 return PTR_ERR(glock_workqueue); 1697 return PTR_ERR(glock_workqueue);
1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", 1698 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
1786 WQ_MEM_RECLAIM | WQ_FREEZEABLE, 1699 WQ_MEM_RECLAIM | WQ_FREEZABLE,
1787 0); 1700 0);
1788 if (IS_ERR(gfs2_delete_workqueue)) { 1701 if (IS_ERR(gfs2_delete_workqueue)) {
1789 destroy_workqueue(glock_workqueue); 1702 destroy_workqueue(glock_workqueue);
@@ -1802,62 +1715,54 @@ void gfs2_glock_exit(void)
1802 destroy_workqueue(gfs2_delete_workqueue); 1715 destroy_workqueue(gfs2_delete_workqueue);
1803} 1716}
1804 1717
1718static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
1719{
1720 return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
1721 struct gfs2_glock, gl_list);
1722}
1723
1724static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
1725{
1726 return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
1727 struct gfs2_glock, gl_list);
1728}
1729
1805static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) 1730static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
1806{ 1731{
1807 struct gfs2_glock *gl; 1732 struct gfs2_glock *gl;
1808 1733
1809restart: 1734 do {
1810 read_lock(gl_lock_addr(gi->hash)); 1735 gl = gi->gl;
1811 gl = gi->gl; 1736 if (gl) {
1812 if (gl) { 1737 gi->gl = glock_hash_next(gl);
1813 gi->gl = hlist_entry(gl->gl_list.next, 1738 } else {
1814 struct gfs2_glock, gl_list); 1739 gi->gl = glock_hash_chain(gi->hash);
1815 } else { 1740 }
1816 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, 1741 while (gi->gl == NULL) {
1817 struct gfs2_glock, gl_list); 1742 gi->hash++;
1818 } 1743 if (gi->hash >= GFS2_GL_HASH_SIZE) {
1819 if (gi->gl) 1744 rcu_read_unlock();
1820 gfs2_glock_hold(gi->gl); 1745 return 1;
1821 read_unlock(gl_lock_addr(gi->hash)); 1746 }
1822 if (gl) 1747 gi->gl = glock_hash_chain(gi->hash);
1823 gfs2_glock_put(gl); 1748 }
1824 while (gi->gl == NULL) { 1749 /* Skip entries for other sb and dead entries */
1825 gi->hash++; 1750 } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
1826 if (gi->hash >= GFS2_GL_HASH_SIZE)
1827 return 1;
1828 read_lock(gl_lock_addr(gi->hash));
1829 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
1830 struct gfs2_glock, gl_list);
1831 if (gi->gl)
1832 gfs2_glock_hold(gi->gl);
1833 read_unlock(gl_lock_addr(gi->hash));
1834 }
1835
1836 if (gi->sdp != gi->gl->gl_sbd)
1837 goto restart;
1838 1751
1839 return 0; 1752 return 0;
1840} 1753}
1841 1754
1842static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
1843{
1844 if (gi->gl)
1845 gfs2_glock_put(gi->gl);
1846 gi->gl = NULL;
1847}
1848
1849static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 1755static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1850{ 1756{
1851 struct gfs2_glock_iter *gi = seq->private; 1757 struct gfs2_glock_iter *gi = seq->private;
1852 loff_t n = *pos; 1758 loff_t n = *pos;
1853 1759
1854 gi->hash = 0; 1760 gi->hash = 0;
1761 rcu_read_lock();
1855 1762
1856 do { 1763 do {
1857 if (gfs2_glock_iter_next(gi)) { 1764 if (gfs2_glock_iter_next(gi))
1858 gfs2_glock_iter_free(gi);
1859 return NULL; 1765 return NULL;
1860 }
1861 } while (n--); 1766 } while (n--);
1862 1767
1863 return gi->gl; 1768 return gi->gl;
@@ -1870,10 +1775,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1870 1775
1871 (*pos)++; 1776 (*pos)++;
1872 1777
1873 if (gfs2_glock_iter_next(gi)) { 1778 if (gfs2_glock_iter_next(gi))
1874 gfs2_glock_iter_free(gi);
1875 return NULL; 1779 return NULL;
1876 }
1877 1780
1878 return gi->gl; 1781 return gi->gl;
1879} 1782}
@@ -1881,7 +1784,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1881static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 1784static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
1882{ 1785{
1883 struct gfs2_glock_iter *gi = seq->private; 1786 struct gfs2_glock_iter *gi = seq->private;
1884 gfs2_glock_iter_free(gi); 1787
1788 if (gi->gl)
1789 rcu_read_unlock();
1790 gi->gl = NULL;
1885} 1791}
1886 1792
1887static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 1793static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 691851ceb615..aea160690e94 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -118,7 +118,7 @@ struct lm_lockops {
118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); 118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
119 void (*lm_unmount) (struct gfs2_sbd *sdp); 119 void (*lm_unmount) (struct gfs2_sbd *sdp);
120 void (*lm_withdraw) (struct gfs2_sbd *sdp); 120 void (*lm_withdraw) (struct gfs2_sbd *sdp);
121 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); 121 void (*lm_put_lock) (struct gfs2_glock *gl);
122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, 122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
123 unsigned int flags); 123 unsigned int flags);
124 void (*lm_cancel) (struct gfs2_glock *gl); 124 void (*lm_cancel) (struct gfs2_glock *gl);
@@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
174 int create, struct gfs2_glock **glp); 174 int create, struct gfs2_glock **glp);
175void gfs2_glock_hold(struct gfs2_glock *gl); 175void gfs2_glock_hold(struct gfs2_glock *gl);
176void gfs2_glock_put_nolock(struct gfs2_glock *gl); 176void gfs2_glock_put_nolock(struct gfs2_glock *gl);
177int gfs2_glock_put(struct gfs2_glock *gl); 177void gfs2_glock_put(struct gfs2_glock *gl);
178void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, 178void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
179 struct gfs2_holder *gh); 179 struct gfs2_holder *gh);
180void gfs2_holder_reinit(unsigned int state, unsigned flags, 180void gfs2_holder_reinit(unsigned int state, unsigned flags,
@@ -223,25 +223,22 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
223 return error; 223 return error;
224} 224}
225 225
226/* Lock Value Block functions */ 226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
227 227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
228int gfs2_lvb_hold(struct gfs2_glock *gl); 228extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
229void gfs2_lvb_unhold(struct gfs2_glock *gl); 229extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
230 230extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
231void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); 231extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
232void gfs2_glock_complete(struct gfs2_glock *gl, int ret); 232extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
233void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 233extern void gfs2_glock_free(struct gfs2_glock *gl);
234void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 234
235void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 235extern int __init gfs2_glock_init(void);
236void gfs2_glock_thaw(struct gfs2_sbd *sdp); 236extern void gfs2_glock_exit(void);
237 237
238int __init gfs2_glock_init(void); 238extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
239void gfs2_glock_exit(void); 239extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
240 240extern int gfs2_register_debugfs(void);
241int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); 241extern void gfs2_unregister_debugfs(void);
242void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
243int gfs2_register_debugfs(void);
244void gfs2_unregister_debugfs(void);
245 242
246extern const struct lm_lockops gfs2_dlm_ops; 243extern const struct lm_lockops gfs2_dlm_ops;
247 244
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 263561bf1a50..3754e3cbf02b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -56,20 +56,26 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
56 BUG_ON(current->journal_info); 56 BUG_ON(current->journal_info);
57 current->journal_info = &tr; 57 current->journal_info = &tr;
58 58
59 gfs2_log_lock(sdp); 59 spin_lock(&sdp->sd_ail_lock);
60 while (!list_empty(head)) { 60 while (!list_empty(head)) {
61 bd = list_entry(head->next, struct gfs2_bufdata, 61 bd = list_entry(head->next, struct gfs2_bufdata,
62 bd_ail_gl_list); 62 bd_ail_gl_list);
63 bh = bd->bd_bh; 63 bh = bd->bd_bh;
64 gfs2_remove_from_ail(bd); 64 gfs2_remove_from_ail(bd);
65 spin_unlock(&sdp->sd_ail_lock);
66
65 bd->bd_bh = NULL; 67 bd->bd_bh = NULL;
66 bh->b_private = NULL; 68 bh->b_private = NULL;
67 bd->bd_blkno = bh->b_blocknr; 69 bd->bd_blkno = bh->b_blocknr;
70 gfs2_log_lock(sdp);
68 gfs2_assert_withdraw(sdp, !buffer_busy(bh)); 71 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
69 gfs2_trans_add_revoke(sdp, bd); 72 gfs2_trans_add_revoke(sdp, bd);
73 gfs2_log_unlock(sdp);
74
75 spin_lock(&sdp->sd_ail_lock);
70 } 76 }
71 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 77 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
72 gfs2_log_unlock(sdp); 78 spin_unlock(&sdp->sd_ail_lock);
73 79
74 gfs2_trans_end(sdp); 80 gfs2_trans_end(sdp);
75 gfs2_log_flush(sdp, NULL); 81 gfs2_log_flush(sdp, NULL);
@@ -206,8 +212,17 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
206static int inode_go_demote_ok(const struct gfs2_glock *gl) 212static int inode_go_demote_ok(const struct gfs2_glock *gl)
207{ 213{
208 struct gfs2_sbd *sdp = gl->gl_sbd; 214 struct gfs2_sbd *sdp = gl->gl_sbd;
215 struct gfs2_holder *gh;
216
209 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) 217 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
210 return 0; 218 return 0;
219
220 if (!list_empty(&gl->gl_holders)) {
221 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
222 if (gh->gh_list.next != &gl->gl_holders)
223 return 0;
224 }
225
211 return 1; 226 return 1;
212} 227}
213 228
@@ -272,19 +287,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
272} 287}
273 288
274/** 289/**
275 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
276 * @gl: the glock
277 *
278 * Returns: 1 if it's ok
279 */
280
281static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
282{
283 const struct address_space *mapping = (const struct address_space *)(gl + 1);
284 return !mapping->nrpages;
285}
286
287/**
288 * rgrp_go_lock - operation done after an rgrp lock is locked by 290 * rgrp_go_lock - operation done after an rgrp lock is locked by
289 * a first holder on this node. 291 * a first holder on this node.
290 * @gl: the glock 292 * @gl: the glock
@@ -410,7 +412,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
410const struct gfs2_glock_operations gfs2_rgrp_glops = { 412const struct gfs2_glock_operations gfs2_rgrp_glops = {
411 .go_xmote_th = rgrp_go_sync, 413 .go_xmote_th = rgrp_go_sync,
412 .go_inval = rgrp_go_inval, 414 .go_inval = rgrp_go_inval,
413 .go_demote_ok = rgrp_go_demote_ok,
414 .go_lock = rgrp_go_lock, 415 .go_lock = rgrp_go_lock,
415 .go_unlock = rgrp_go_unlock, 416 .go_unlock = rgrp_go_unlock,
416 .go_dump = gfs2_rgrp_dump, 417 .go_dump = gfs2_rgrp_dump,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a79790c06275..870a89d6d4dc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -15,6 +15,8 @@
15#include <linux/workqueue.h> 15#include <linux/workqueue.h>
16#include <linux/dlm.h> 16#include <linux/dlm.h>
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/rcupdate.h>
19#include <linux/rculist_bl.h>
18 20
19#define DIO_WAIT 0x00000010 21#define DIO_WAIT 0x00000010
20#define DIO_METADATA 0x00000020 22#define DIO_METADATA 0x00000020
@@ -201,7 +203,7 @@ enum {
201}; 203};
202 204
203struct gfs2_glock { 205struct gfs2_glock {
204 struct hlist_node gl_list; 206 struct hlist_bl_node gl_list;
205 unsigned long gl_flags; /* GLF_... */ 207 unsigned long gl_flags; /* GLF_... */
206 struct lm_lockname gl_name; 208 struct lm_lockname gl_name;
207 atomic_t gl_ref; 209 atomic_t gl_ref;
@@ -234,6 +236,7 @@ struct gfs2_glock {
234 atomic_t gl_ail_count; 236 atomic_t gl_ail_count;
235 struct delayed_work gl_work; 237 struct delayed_work gl_work;
236 struct work_struct gl_delete; 238 struct work_struct gl_delete;
239 struct rcu_head gl_rcu;
237}; 240};
238 241
239#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ 242#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
@@ -314,6 +317,7 @@ enum {
314 QDF_USER = 0, 317 QDF_USER = 0,
315 QDF_CHANGE = 1, 318 QDF_CHANGE = 1,
316 QDF_LOCKED = 2, 319 QDF_LOCKED = 2,
320 QDF_REFRESH = 3,
317}; 321};
318 322
319struct gfs2_quota_data { 323struct gfs2_quota_data {
@@ -647,6 +651,7 @@ struct gfs2_sbd {
647 unsigned int sd_log_flush_head; 651 unsigned int sd_log_flush_head;
648 u64 sd_log_flush_wrapped; 652 u64 sd_log_flush_wrapped;
649 653
654 spinlock_t sd_ail_lock;
650 struct list_head sd_ail1_list; 655 struct list_head sd_ail1_list;
651 struct list_head sd_ail2_list; 656 struct list_head sd_ail2_list;
652 u64 sd_ail_sync_gen; 657 u64 sd_ail_sync_gen;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7aa7d4f8984a..97d54a28776a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -763,14 +763,15 @@ fail:
763 return error; 763 return error;
764} 764}
765 765
766static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) 766static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
767 const struct qstr *qstr)
767{ 768{
768 int err; 769 int err;
769 size_t len; 770 size_t len;
770 void *value; 771 void *value;
771 char *name; 772 char *name;
772 773
773 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, 774 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr,
774 &name, &value, &len); 775 &name, &value, &len);
775 776
776 if (err) { 777 if (err) {
@@ -854,7 +855,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
854 if (error) 855 if (error)
855 goto fail_gunlock2; 856 goto fail_gunlock2;
856 857
857 error = gfs2_security_init(dip, GFS2_I(inode)); 858 error = gfs2_security_init(dip, GFS2_I(inode), name);
858 if (error) 859 if (error)
859 goto fail_gunlock2; 860 goto fail_gunlock2;
860 861
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 6e493aee28f8..98c80d8c2a62 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -22,7 +22,6 @@ static void gdlm_ast(void *arg)
22{ 22{
23 struct gfs2_glock *gl = arg; 23 struct gfs2_glock *gl = arg;
24 unsigned ret = gl->gl_state; 24 unsigned ret = gl->gl_state;
25 struct gfs2_sbd *sdp = gl->gl_sbd;
26 25
27 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 26 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
28 27
@@ -31,12 +30,7 @@ static void gdlm_ast(void *arg)
31 30
32 switch (gl->gl_lksb.sb_status) { 31 switch (gl->gl_lksb.sb_status) {
33 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ 32 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
34 if (gl->gl_ops->go_flags & GLOF_ASPACE) 33 gfs2_glock_free(gl);
35 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
36 else
37 kmem_cache_free(gfs2_glock_cachep, gl);
38 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
39 wake_up(&sdp->sd_glock_wait);
40 return; 34 return;
41 case -DLM_ECANCEL: /* Cancel while getting lock */ 35 case -DLM_ECANCEL: /* Cancel while getting lock */
42 ret |= LM_OUT_CANCELED; 36 ret |= LM_OUT_CANCELED;
@@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
164 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 158 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
165} 159}
166 160
167static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) 161static void gdlm_put_lock(struct gfs2_glock *gl)
168{ 162{
169 struct gfs2_sbd *sdp = gl->gl_sbd; 163 struct gfs2_sbd *sdp = gl->gl_sbd;
170 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 164 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
171 int error; 165 int error;
172 166
173 if (gl->gl_lksb.sb_lkid == 0) { 167 if (gl->gl_lksb.sb_lkid == 0) {
174 kmem_cache_free(cachep, gl); 168 gfs2_glock_free(gl);
175 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
176 wake_up(&sdp->sd_glock_wait);
177 return; 169 return;
178 } 170 }
179 171
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index eb01f3575e10..e7ed31f858dd 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -67,7 +67,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
67 * @mapping: The associated mapping (maybe NULL) 67 * @mapping: The associated mapping (maybe NULL)
68 * @bd: The gfs2_bufdata to remove 68 * @bd: The gfs2_bufdata to remove
69 * 69 *
70 * The log lock _must_ be held when calling this function 70 * The ail lock _must_ be held when calling this function
71 * 71 *
72 */ 72 */
73 73
@@ -88,8 +88,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
88 */ 88 */
89 89
90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
91__releases(&sdp->sd_log_lock) 91__releases(&sdp->sd_ail_lock)
92__acquires(&sdp->sd_log_lock) 92__acquires(&sdp->sd_ail_lock)
93{ 93{
94 struct gfs2_bufdata *bd, *s; 94 struct gfs2_bufdata *bd, *s;
95 struct buffer_head *bh; 95 struct buffer_head *bh;
@@ -117,7 +117,7 @@ __acquires(&sdp->sd_log_lock)
117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); 117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
118 118
119 get_bh(bh); 119 get_bh(bh);
120 gfs2_log_unlock(sdp); 120 spin_unlock(&sdp->sd_ail_lock);
121 lock_buffer(bh); 121 lock_buffer(bh);
122 if (test_clear_buffer_dirty(bh)) { 122 if (test_clear_buffer_dirty(bh)) {
123 bh->b_end_io = end_buffer_write_sync; 123 bh->b_end_io = end_buffer_write_sync;
@@ -126,7 +126,7 @@ __acquires(&sdp->sd_log_lock)
126 unlock_buffer(bh); 126 unlock_buffer(bh);
127 brelse(bh); 127 brelse(bh);
128 } 128 }
129 gfs2_log_lock(sdp); 129 spin_lock(&sdp->sd_ail_lock);
130 130
131 retry = 1; 131 retry = 1;
132 break; 132 break;
@@ -175,10 +175,10 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
175 struct gfs2_ail *ai; 175 struct gfs2_ail *ai;
176 int done = 0; 176 int done = 0;
177 177
178 gfs2_log_lock(sdp); 178 spin_lock(&sdp->sd_ail_lock);
179 head = &sdp->sd_ail1_list; 179 head = &sdp->sd_ail1_list;
180 if (list_empty(head)) { 180 if (list_empty(head)) {
181 gfs2_log_unlock(sdp); 181 spin_unlock(&sdp->sd_ail_lock);
182 return; 182 return;
183 } 183 }
184 sync_gen = sdp->sd_ail_sync_gen++; 184 sync_gen = sdp->sd_ail_sync_gen++;
@@ -189,13 +189,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
189 if (ai->ai_sync_gen >= sync_gen) 189 if (ai->ai_sync_gen >= sync_gen)
190 continue; 190 continue;
191 ai->ai_sync_gen = sync_gen; 191 ai->ai_sync_gen = sync_gen;
192 gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */ 192 gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
193 done = 0; 193 done = 0;
194 break; 194 break;
195 } 195 }
196 } 196 }
197 197
198 gfs2_log_unlock(sdp); 198 spin_unlock(&sdp->sd_ail_lock);
199} 199}
200 200
201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) 201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
@@ -203,7 +203,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
203 struct gfs2_ail *ai, *s; 203 struct gfs2_ail *ai, *s;
204 int ret; 204 int ret;
205 205
206 gfs2_log_lock(sdp); 206 spin_lock(&sdp->sd_ail_lock);
207 207
208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { 208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
209 if (gfs2_ail1_empty_one(sdp, ai, flags)) 209 if (gfs2_ail1_empty_one(sdp, ai, flags))
@@ -214,7 +214,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
214 214
215 ret = list_empty(&sdp->sd_ail1_list); 215 ret = list_empty(&sdp->sd_ail1_list);
216 216
217 gfs2_log_unlock(sdp); 217 spin_unlock(&sdp->sd_ail_lock);
218 218
219 return ret; 219 return ret;
220} 220}
@@ -247,7 +247,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
247 int wrap = (new_tail < old_tail); 247 int wrap = (new_tail < old_tail);
248 int a, b, rm; 248 int a, b, rm;
249 249
250 gfs2_log_lock(sdp); 250 spin_lock(&sdp->sd_ail_lock);
251 251
252 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) { 252 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
253 a = (old_tail <= ai->ai_first); 253 a = (old_tail <= ai->ai_first);
@@ -263,7 +263,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
263 kfree(ai); 263 kfree(ai);
264 } 264 }
265 265
266 gfs2_log_unlock(sdp); 266 spin_unlock(&sdp->sd_ail_lock);
267} 267}
268 268
269/** 269/**
@@ -421,7 +421,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
421 struct gfs2_ail *ai; 421 struct gfs2_ail *ai;
422 unsigned int tail; 422 unsigned int tail;
423 423
424 gfs2_log_lock(sdp); 424 spin_lock(&sdp->sd_ail_lock);
425 425
426 if (list_empty(&sdp->sd_ail1_list)) { 426 if (list_empty(&sdp->sd_ail1_list)) {
427 tail = sdp->sd_log_head; 427 tail = sdp->sd_log_head;
@@ -430,7 +430,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
430 tail = ai->ai_first; 430 tail = ai->ai_first;
431 } 431 }
432 432
433 gfs2_log_unlock(sdp); 433 spin_unlock(&sdp->sd_ail_lock);
434 434
435 return tail; 435 return tail;
436} 436}
@@ -743,10 +743,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
743 sdp->sd_log_commited_databuf = 0; 743 sdp->sd_log_commited_databuf = 0;
744 sdp->sd_log_commited_revoke = 0; 744 sdp->sd_log_commited_revoke = 0;
745 745
746 spin_lock(&sdp->sd_ail_lock);
746 if (!list_empty(&ai->ai_ail1_list)) { 747 if (!list_empty(&ai->ai_ail1_list)) {
747 list_add(&ai->ai_list, &sdp->sd_ail1_list); 748 list_add(&ai->ai_list, &sdp->sd_ail1_list);
748 ai = NULL; 749 ai = NULL;
749 } 750 }
751 spin_unlock(&sdp->sd_ail_lock);
750 gfs2_log_unlock(sdp); 752 gfs2_log_unlock(sdp);
751 trace_gfs2_log_flush(sdp, 0); 753 trace_gfs2_log_flush(sdp, 0);
752 up_write(&sdp->sd_log_flush_lock); 754 up_write(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index bf33f822058d..e919abf25ecd 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -51,8 +51,10 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
51 /* If this buffer is in the AIL and it has already been written 51 /* If this buffer is in the AIL and it has already been written
52 * to in-place disk block, remove it from the AIL. 52 * to in-place disk block, remove it from the AIL.
53 */ 53 */
54 spin_lock(&sdp->sd_ail_lock);
54 if (bd->bd_ail) 55 if (bd->bd_ail)
55 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); 56 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
57 spin_unlock(&sdp->sd_ail_lock);
56 get_bh(bh); 58 get_bh(bh);
57 atomic_inc(&sdp->sd_log_pinned); 59 atomic_inc(&sdp->sd_log_pinned);
58 trace_gfs2_pin(bd, 1); 60 trace_gfs2_pin(bd, 1);
@@ -80,7 +82,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
80 mark_buffer_dirty(bh); 82 mark_buffer_dirty(bh);
81 clear_buffer_pinned(bh); 83 clear_buffer_pinned(bh);
82 84
83 gfs2_log_lock(sdp); 85 spin_lock(&sdp->sd_ail_lock);
84 if (bd->bd_ail) { 86 if (bd->bd_ail) {
85 list_del(&bd->bd_ail_st_list); 87 list_del(&bd->bd_ail_st_list);
86 brelse(bh); 88 brelse(bh);
@@ -91,9 +93,11 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
91 } 93 }
92 bd->bd_ail = ai; 94 bd->bd_ail = ai;
93 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 95 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
94 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 96 spin_unlock(&sdp->sd_ail_lock);
97
98 if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
99 gfs2_glock_schedule_for_reclaim(bd->bd_gl);
95 trace_gfs2_pin(bd, 0); 100 trace_gfs2_pin(bd, 0);
96 gfs2_log_unlock(sdp);
97 unlock_buffer(bh); 101 unlock_buffer(bh);
98 atomic_dec(&sdp->sd_log_pinned); 102 atomic_dec(&sdp->sd_log_pinned);
99} 103}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index ebef7ab6e17e..888a5f5a1a58 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,6 +14,8 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/rcupdate.h>
18#include <linux/rculist_bl.h>
17#include <asm/atomic.h> 19#include <asm/atomic.h>
18 20
19#include "gfs2.h" 21#include "gfs2.h"
@@ -45,7 +47,7 @@ static void gfs2_init_glock_once(void *foo)
45{ 47{
46 struct gfs2_glock *gl = foo; 48 struct gfs2_glock *gl = foo;
47 49
48 INIT_HLIST_NODE(&gl->gl_list); 50 INIT_HLIST_BL_NODE(&gl->gl_list);
49 spin_lock_init(&gl->gl_spin); 51 spin_lock_init(&gl->gl_spin);
50 INIT_LIST_HEAD(&gl->gl_holders); 52 INIT_LIST_HEAD(&gl->gl_holders);
51 INIT_LIST_HEAD(&gl->gl_lru); 53 INIT_LIST_HEAD(&gl->gl_lru);
@@ -59,14 +61,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
59 struct address_space *mapping = (struct address_space *)(gl + 1); 61 struct address_space *mapping = (struct address_space *)(gl + 1);
60 62
61 gfs2_init_glock_once(gl); 63 gfs2_init_glock_once(gl);
62 memset(mapping, 0, sizeof(*mapping)); 64 address_space_init_once(mapping);
63 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
64 spin_lock_init(&mapping->tree_lock);
65 spin_lock_init(&mapping->i_mmap_lock);
66 INIT_LIST_HEAD(&mapping->private_list);
67 spin_lock_init(&mapping->private_lock);
68 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
69 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
70} 65}
71 66
72/** 67/**
@@ -144,7 +139,7 @@ static int __init init_gfs2_fs(void)
144 139
145 error = -ENOMEM; 140 error = -ENOMEM;
146 gfs_recovery_wq = alloc_workqueue("gfs_recovery", 141 gfs_recovery_wq = alloc_workqueue("gfs_recovery",
147 WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); 142 WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
148 if (!gfs_recovery_wq) 143 if (!gfs_recovery_wq)
149 goto fail_wq; 144 goto fail_wq;
150 145
@@ -198,6 +193,8 @@ static void __exit exit_gfs2_fs(void)
198 unregister_filesystem(&gfs2meta_fs_type); 193 unregister_filesystem(&gfs2meta_fs_type);
199 destroy_workqueue(gfs_recovery_wq); 194 destroy_workqueue(gfs_recovery_wq);
200 195
196 rcu_barrier();
197
201 kmem_cache_destroy(gfs2_quotad_cachep); 198 kmem_cache_destroy(gfs2_quotad_cachep);
202 kmem_cache_destroy(gfs2_rgrpd_cachep); 199 kmem_cache_destroy(gfs2_rgrpd_cachep);
203 kmem_cache_destroy(gfs2_bufdata_cachep); 200 kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 939739c7b3f9..01d97f486553 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -326,6 +326,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
326 brelse(bh); 326 brelse(bh);
327 } 327 }
328 if (bd) { 328 if (bd) {
329 spin_lock(&sdp->sd_ail_lock);
329 if (bd->bd_ail) { 330 if (bd->bd_ail) {
330 gfs2_remove_from_ail(bd); 331 gfs2_remove_from_ail(bd);
331 bh->b_private = NULL; 332 bh->b_private = NULL;
@@ -333,6 +334,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
333 bd->bd_blkno = bh->b_blocknr; 334 bd->bd_blkno = bh->b_blocknr;
334 gfs2_trans_add_revoke(sdp, bd); 335 gfs2_trans_add_revoke(sdp, bd);
335 } 336 }
337 spin_unlock(&sdp->sd_ail_lock);
336 } 338 }
337 clear_buffer_dirty(bh); 339 clear_buffer_dirty(bh);
338 clear_buffer_uptodate(bh); 340 clear_buffer_uptodate(bh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 777927ce6f79..42ef24355afb 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -99,6 +99,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
99 99
100 init_waitqueue_head(&sdp->sd_log_waitq); 100 init_waitqueue_head(&sdp->sd_log_waitq);
101 init_waitqueue_head(&sdp->sd_logd_waitq); 101 init_waitqueue_head(&sdp->sd_logd_waitq);
102 spin_lock_init(&sdp->sd_ail_lock);
102 INIT_LIST_HEAD(&sdp->sd_ail1_list); 103 INIT_LIST_HEAD(&sdp->sd_ail1_list);
103 INIT_LIST_HEAD(&sdp->sd_ail2_list); 104 INIT_LIST_HEAD(&sdp->sd_ail2_list);
104 105
@@ -928,17 +929,9 @@ static const match_table_t nolock_tokens = {
928 { Opt_err, NULL }, 929 { Opt_err, NULL },
929}; 930};
930 931
931static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
932{
933 struct gfs2_sbd *sdp = gl->gl_sbd;
934 kmem_cache_free(cachep, gl);
935 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
936 wake_up(&sdp->sd_glock_wait);
937}
938
939static const struct lm_lockops nolock_ops = { 932static const struct lm_lockops nolock_ops = {
940 .lm_proto_name = "lock_nolock", 933 .lm_proto_name = "lock_nolock",
941 .lm_put_lock = nolock_put_lock, 934 .lm_put_lock = gfs2_glock_free,
942 .lm_tokens = &nolock_tokens, 935 .lm_tokens = &nolock_tokens,
943}; 936};
944 937
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d8b26ac2e20b..09e436a50723 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1026,9 +1026,9 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1026 1026
1027/** 1027/**
1028 * gfs2_permission - 1028 * gfs2_permission -
1029 * @inode: 1029 * @inode: The inode
1030 * @mask: 1030 * @mask: The mask to be tested
1031 * @nd: passed from Linux VFS, ignored by us 1031 * @flags: Indicates whether this is an RCU path walk or not
1032 * 1032 *
1033 * This may be called from the VFS directly, or from within GFS2 with the 1033 * This may be called from the VFS directly, or from within GFS2 with the
1034 * inode locked, so we look to see if the glock is already locked and only 1034 * inode locked, so we look to see if the glock is already locked and only
@@ -1044,11 +1044,11 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1044 int error; 1044 int error;
1045 int unlock = 0; 1045 int unlock = 0;
1046 1046
1047 if (flags & IPERM_FLAG_RCU)
1048 return -ECHILD;
1049 1047
1050 ip = GFS2_I(inode); 1048 ip = GFS2_I(inode);
1051 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1049 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1050 if (flags & IPERM_FLAG_RCU)
1051 return -ECHILD;
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error) 1053 if (error)
1054 return error; 1054 return error;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a689901963de..e23d9864c418 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -834,6 +834,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
834 goto out_end_trans; 834 goto out_end_trans;
835 835
836 do_qc(qd, -qd->qd_change_sync); 836 do_qc(qd, -qd->qd_change_sync);
837 set_bit(QDF_REFRESH, &qd->qd_flags);
837 } 838 }
838 839
839 error = 0; 840 error = 0;
@@ -929,6 +930,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
929{ 930{
930 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 931 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
931 struct gfs2_alloc *al = ip->i_alloc; 932 struct gfs2_alloc *al = ip->i_alloc;
933 struct gfs2_quota_data *qd;
932 unsigned int x; 934 unsigned int x;
933 int error = 0; 935 int error = 0;
934 936
@@ -942,7 +944,11 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
942 sort_qd, NULL); 944 sort_qd, NULL);
943 945
944 for (x = 0; x < al->al_qd_num; x++) { 946 for (x = 0; x < al->al_qd_num; x++) {
945 error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]); 947 int force = NO_FORCE;
948 qd = al->al_qd[x];
949 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
950 force = FORCE;
951 error = do_glock(qd, force, &al->al_qd_ghs[x]);
946 if (error) 952 if (error)
947 break; 953 break;
948 } 954 }
@@ -1587,6 +1593,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1587 1593
1588 offset = qd2offset(qd); 1594 offset = qd2offset(qd);
1589 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota)); 1595 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
1596 if (gfs2_is_stuffed(ip))
1597 alloc_required = 1;
1590 if (alloc_required) { 1598 if (alloc_required) {
1591 al = gfs2_alloc_get(ip); 1599 al = gfs2_alloc_get(ip);
1592 if (al == NULL) 1600 if (al == NULL)
@@ -1600,7 +1608,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1600 blocks += gfs2_rg_blocks(al); 1608 blocks += gfs2_rg_blocks(al);
1601 } 1609 }
1602 1610
1603 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); 1611 /* Some quotas span block boundaries and can update two blocks,
1612 adding an extra block to the transaction to handle such quotas */
1613 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0);
1604 if (error) 1614 if (error)
1605 goto out_release; 1615 goto out_release;
1606 1616
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7293ea27020c..cf930cd9664a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1602,7 +1602,7 @@ rgrp_error:
1602 * 1602 *
1603 */ 1603 */
1604 1604
1605void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) 1605void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1606{ 1606{
1607 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1607 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1608 struct gfs2_rgrpd *rgd; 1608 struct gfs2_rgrpd *rgd;
@@ -1617,7 +1617,21 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1618 1618
1619 gfs2_trans_add_rg(rgd); 1619 gfs2_trans_add_rg(rgd);
1620}
1620 1621
1622/**
1623 * gfs2_free_data - free a contiguous run of data block(s)
1624 * @ip: the inode these blocks are being freed from
1625 * @bstart: first block of a run of contiguous blocks
1626 * @blen: the length of the block run
1627 *
1628 */
1629
1630void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1631{
1632 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1633
1634 __gfs2_free_data(ip, bstart, blen);
1621 gfs2_statfs_change(sdp, 0, +blen, 0); 1635 gfs2_statfs_change(sdp, 0, +blen, 0);
1622 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1636 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1623} 1637}
@@ -1630,7 +1644,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1630 * 1644 *
1631 */ 1645 */
1632 1646
1633void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 1647void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1634{ 1648{
1635 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1649 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1636 struct gfs2_rgrpd *rgd; 1650 struct gfs2_rgrpd *rgd;
@@ -1645,10 +1659,24 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1645 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1659 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1646 1660
1647 gfs2_trans_add_rg(rgd); 1661 gfs2_trans_add_rg(rgd);
1662 gfs2_meta_wipe(ip, bstart, blen);
1663}
1648 1664
1665/**
1666 * gfs2_free_meta - free a contiguous run of data block(s)
1667 * @ip: the inode these blocks are being freed from
1668 * @bstart: first block of a run of contiguous blocks
1669 * @blen: the length of the block run
1670 *
1671 */
1672
1673void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1674{
1675 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1676
1677 __gfs2_free_meta(ip, bstart, blen);
1649 gfs2_statfs_change(sdp, 0, +blen, 0); 1678 gfs2_statfs_change(sdp, 0, +blen, 0);
1650 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1679 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1651 gfs2_meta_wipe(ip, bstart, blen);
1652} 1680}
1653 1681
1654void gfs2_unlink_di(struct inode *inode) 1682void gfs2_unlink_di(struct inode *inode)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 50c2bb04369c..a80e3034ac47 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip);
52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); 53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
54 54
55extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
55extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); 56extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
57extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
56extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 58extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
57extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); 59extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
58extern void gfs2_unlink_di(struct inode *inode); 60extern void gfs2_unlink_di(struct inode *inode);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa2237..b4d70b13be92 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
238} 238}
239 239
240/* 240/*
241 * hfs_unlink() 241 * hfs_remove()
242 * 242 *
243 * This is the unlink() entry in the inode_operations structure for 243 * This serves as both unlink() and rmdir() in the inode_operations
244 * regular HFS directories. The purpose is to delete an existing 244 * structure for regular HFS directories. The purpose is to delete
245 * file, given the inode for the parent directory and the name 245 * an existing child, given the inode for the parent directory and
246 * (and its length) of the existing file. 246 * the name (and its length) of the existing directory.
247 */
248static int hfs_unlink(struct inode *dir, struct dentry *dentry)
249{
250 struct inode *inode;
251 int res;
252
253 inode = dentry->d_inode;
254 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
255 if (res)
256 return res;
257
258 drop_nlink(inode);
259 hfs_delete_inode(inode);
260 inode->i_ctime = CURRENT_TIME_SEC;
261 mark_inode_dirty(inode);
262
263 return res;
264}
265
266/*
267 * hfs_rmdir()
268 * 247 *
269 * This is the rmdir() entry in the inode_operations structure for 248 * HFS does not have hardlinks, so both rmdir and unlink set the
270 * regular HFS directories. The purpose is to delete an existing 249 * link count to 0. The only difference is the emptiness check.
271 * directory, given the inode for the parent directory and the name
272 * (and its length) of the existing directory.
273 */ 250 */
274static int hfs_rmdir(struct inode *dir, struct dentry *dentry) 251static int hfs_remove(struct inode *dir, struct dentry *dentry)
275{ 252{
276 struct inode *inode; 253 struct inode *inode = dentry->d_inode;
277 int res; 254 int res;
278 255
279 inode = dentry->d_inode; 256 if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
280 if (inode->i_size != 2)
281 return -ENOTEMPTY; 257 return -ENOTEMPTY;
282 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); 258 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
283 if (res) 259 if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
307 283
308 /* Unlink destination if it already exists */ 284 /* Unlink destination if it already exists */
309 if (new_dentry->d_inode) { 285 if (new_dentry->d_inode) {
310 res = hfs_unlink(new_dir, new_dentry); 286 res = hfs_remove(new_dir, new_dentry);
311 if (res) 287 if (res)
312 return res; 288 return res;
313 } 289 }
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
332const struct inode_operations hfs_dir_inode_operations = { 308const struct inode_operations hfs_dir_inode_operations = {
333 .create = hfs_create, 309 .create = hfs_create,
334 .lookup = hfs_lookup, 310 .lookup = hfs_lookup,
335 .unlink = hfs_unlink, 311 .unlink = hfs_remove,
336 .mkdir = hfs_mkdir, 312 .mkdir = hfs_mkdir,
337 .rmdir = hfs_rmdir, 313 .rmdir = hfs_remove,
338 .rename = hfs_rename, 314 .rename = hfs_rename,
339 .setattr = hfs_inode_setattr, 315 .setattr = hfs_inode_setattr,
340}; 316};
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..9910c039f026 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -84,16 +84,13 @@ static struct hlist_head *inode_hashtable __read_mostly;
84DEFINE_SPINLOCK(inode_lock); 84DEFINE_SPINLOCK(inode_lock);
85 85
86/* 86/*
87 * iprune_sem provides exclusion between the kswapd or try_to_free_pages 87 * iprune_sem provides exclusion between the icache shrinking and the
88 * icache shrinking path, and the umount path. Without this exclusion, 88 * umount path.
89 * by the time prune_icache calls iput for the inode whose pages it has
90 * been invalidating, or by the time it calls clear_inode & destroy_inode
91 * from its final dispose_list, the struct super_block they refer to
92 * (for inode->i_sb->s_op) may already have been freed and reused.
93 * 89 *
94 * We make this an rwsem because the fastpath is icache shrinking. In 90 * We don't actually need it to protect anything in the umount path,
95 * some cases a filesystem may be doing a significant amount of work in 91 * but only need to cycle through it to make sure any inode that
96 * its inode reclaim code, so this should improve parallelism. 92 * prune_icache took off the LRU list has been fully torn down by the
93 * time we are past evict_inodes.
97 */ 94 */
98static DECLARE_RWSEM(iprune_sem); 95static DECLARE_RWSEM(iprune_sem);
99 96
@@ -295,6 +292,20 @@ static void destroy_inode(struct inode *inode)
295 call_rcu(&inode->i_rcu, i_callback); 292 call_rcu(&inode->i_rcu, i_callback);
296} 293}
297 294
295void address_space_init_once(struct address_space *mapping)
296{
297 memset(mapping, 0, sizeof(*mapping));
298 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
299 spin_lock_init(&mapping->tree_lock);
300 spin_lock_init(&mapping->i_mmap_lock);
301 INIT_LIST_HEAD(&mapping->private_list);
302 spin_lock_init(&mapping->private_lock);
303 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
304 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
305 mutex_init(&mapping->unmap_mutex);
306}
307EXPORT_SYMBOL(address_space_init_once);
308
298/* 309/*
299 * These are initializations that only need to be done 310 * These are initializations that only need to be done
300 * once, because the fields are idempotent across use 311 * once, because the fields are idempotent across use
@@ -308,13 +319,7 @@ void inode_init_once(struct inode *inode)
308 INIT_LIST_HEAD(&inode->i_devices); 319 INIT_LIST_HEAD(&inode->i_devices);
309 INIT_LIST_HEAD(&inode->i_wb_list); 320 INIT_LIST_HEAD(&inode->i_wb_list);
310 INIT_LIST_HEAD(&inode->i_lru); 321 INIT_LIST_HEAD(&inode->i_lru);
311 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 322 address_space_init_once(&inode->i_data);
312 spin_lock_init(&inode->i_data.tree_lock);
313 spin_lock_init(&inode->i_data.i_mmap_lock);
314 INIT_LIST_HEAD(&inode->i_data.private_list);
315 spin_lock_init(&inode->i_data.private_lock);
316 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
317 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
318 i_size_ordered_init(inode); 323 i_size_ordered_init(inode);
319#ifdef CONFIG_FSNOTIFY 324#ifdef CONFIG_FSNOTIFY
320 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 325 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -508,17 +513,12 @@ void evict_inodes(struct super_block *sb)
508 struct inode *inode, *next; 513 struct inode *inode, *next;
509 LIST_HEAD(dispose); 514 LIST_HEAD(dispose);
510 515
511 down_write(&iprune_sem);
512
513 spin_lock(&inode_lock); 516 spin_lock(&inode_lock);
514 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 517 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
515 if (atomic_read(&inode->i_count)) 518 if (atomic_read(&inode->i_count))
516 continue; 519 continue;
517 520 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
518 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
519 WARN_ON(1);
520 continue; 521 continue;
521 }
522 522
523 inode->i_state |= I_FREEING; 523 inode->i_state |= I_FREEING;
524 524
@@ -534,28 +534,40 @@ void evict_inodes(struct super_block *sb)
534 spin_unlock(&inode_lock); 534 spin_unlock(&inode_lock);
535 535
536 dispose_list(&dispose); 536 dispose_list(&dispose);
537
538 /*
539 * Cycle through iprune_sem to make sure any inode that prune_icache
540 * moved off the list before we took the lock has been fully torn
541 * down.
542 */
543 down_write(&iprune_sem);
537 up_write(&iprune_sem); 544 up_write(&iprune_sem);
538} 545}
539 546
540/** 547/**
541 * invalidate_inodes - attempt to free all inodes on a superblock 548 * invalidate_inodes - attempt to free all inodes on a superblock
542 * @sb: superblock to operate on 549 * @sb: superblock to operate on
550 * @kill_dirty: flag to guide handling of dirty inodes
543 * 551 *
544 * Attempts to free all inodes for a given superblock. If there were any 552 * Attempts to free all inodes for a given superblock. If there were any
545 * busy inodes return a non-zero value, else zero. 553 * busy inodes return a non-zero value, else zero.
554 * If @kill_dirty is set, discard dirty inodes too, otherwise treat
555 * them as busy.
546 */ 556 */
547int invalidate_inodes(struct super_block *sb) 557int invalidate_inodes(struct super_block *sb, bool kill_dirty)
548{ 558{
549 int busy = 0; 559 int busy = 0;
550 struct inode *inode, *next; 560 struct inode *inode, *next;
551 LIST_HEAD(dispose); 561 LIST_HEAD(dispose);
552 562
553 down_write(&iprune_sem);
554
555 spin_lock(&inode_lock); 563 spin_lock(&inode_lock);
556 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 564 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 565 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
558 continue; 566 continue;
567 if (inode->i_state & I_DIRTY && !kill_dirty) {
568 busy = 1;
569 continue;
570 }
559 if (atomic_read(&inode->i_count)) { 571 if (atomic_read(&inode->i_count)) {
560 busy = 1; 572 busy = 1;
561 continue; 573 continue;
@@ -575,7 +587,6 @@ int invalidate_inodes(struct super_block *sb)
575 spin_unlock(&inode_lock); 587 spin_unlock(&inode_lock);
576 588
577 dispose_list(&dispose); 589 dispose_list(&dispose);
578 up_write(&iprune_sem);
579 590
580 return busy; 591 return busy;
581} 592}
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..f3d15de44b15 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,10 +106,23 @@ extern void put_super(struct super_block *sb);
106struct nameidata; 106struct nameidata;
107extern struct file *nameidata_to_filp(struct nameidata *); 107extern struct file *nameidata_to_filp(struct nameidata *);
108extern void release_open_intent(struct nameidata *); 108extern void release_open_intent(struct nameidata *);
109struct open_flags {
110 int open_flag;
111 int mode;
112 int acc_mode;
113 int intent;
114};
115extern struct file *do_filp_open(int dfd, const char *pathname,
116 const struct open_flags *op, int lookup_flags);
117extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
118 const char *, const struct open_flags *, int lookup_flags);
119
120extern long do_handle_open(int mountdirfd,
121 struct file_handle __user *ufh, int open_flag);
109 122
110/* 123/*
111 * inode.c 124 * inode.c
112 */ 125 */
113extern int get_nr_dirty_inodes(void); 126extern int get_nr_dirty_inodes(void);
114extern void evict_inodes(struct super_block *); 127extern void evict_inodes(struct super_block *);
115extern int invalidate_inodes(struct super_block *); 128extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb38474..dd4687ff30d0 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
124 * offset of the inode and the upper 16 bits of fh32[1] to 124 * offset of the inode and the upper 16 bits of fh32[1] to
125 * hold the offset of the parent. 125 * hold the offset of the parent.
126 */ 126 */
127 127 if (connectable && (len < 5)) {
128 if (len < 3 || (connectable && len < 5)) 128 *max_len = 5;
129 return 255;
130 } else if (len < 3) {
131 *max_len = 3;
129 return 255; 132 return 255;
133 }
130 134
131 len = 3; 135 len = 3;
132 fh32[0] = ei->i_iget5_block; 136 fh32[0] = ei->i_iget5_block;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 92978658ed18..82faddd1f321 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -215,8 +215,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
215 no chance of AB-BA deadlock involving its f->sem). */ 215 no chance of AB-BA deadlock involving its f->sem). */
216 mutex_unlock(&f->sem); 216 mutex_unlock(&f->sem);
217 217
218 ret = jffs2_do_create(c, dir_f, f, ri, 218 ret = jffs2_do_create(c, dir_f, f, ri, &dentry->d_name);
219 dentry->d_name.name, dentry->d_name.len);
220 if (ret) 219 if (ret)
221 goto fail; 220 goto fail;
222 221
@@ -386,7 +385,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
386 385
387 jffs2_complete_reservation(c); 386 jffs2_complete_reservation(c);
388 387
389 ret = jffs2_init_security(inode, dir_i); 388 ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
390 if (ret) 389 if (ret)
391 goto fail; 390 goto fail;
392 391
@@ -530,7 +529,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
530 529
531 jffs2_complete_reservation(c); 530 jffs2_complete_reservation(c);
532 531
533 ret = jffs2_init_security(inode, dir_i); 532 ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
534 if (ret) 533 if (ret)
535 goto fail; 534 goto fail;
536 535
@@ -703,7 +702,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
703 702
704 jffs2_complete_reservation(c); 703 jffs2_complete_reservation(c);
705 704
706 ret = jffs2_init_security(inode, dir_i); 705 ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
707 if (ret) 706 if (ret)
708 goto fail; 707 goto fail;
709 708
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 5a53d9bdb2b5..e4619b00f7c5 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -401,7 +401,7 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
401 struct jffs2_raw_inode *ri, unsigned char *buf, 401 struct jffs2_raw_inode *ri, unsigned char *buf,
402 uint32_t offset, uint32_t writelen, uint32_t *retlen); 402 uint32_t offset, uint32_t writelen, uint32_t *retlen);
403int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, 403int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f,
404 struct jffs2_raw_inode *ri, const char *name, int namelen); 404 struct jffs2_raw_inode *ri, const struct qstr *qstr);
405int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name, 405int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name,
406 int namelen, struct jffs2_inode_info *dead_f, uint32_t time); 406 int namelen, struct jffs2_inode_info *dead_f, uint32_t time);
407int jffs2_do_link(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino, 407int jffs2_do_link(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino,
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 239f51216a68..cfeb7164b085 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -23,14 +23,15 @@
23#include "nodelist.h" 23#include "nodelist.h"
24 24
25/* ---- Initial Security Label Attachment -------------- */ 25/* ---- Initial Security Label Attachment -------------- */
26int jffs2_init_security(struct inode *inode, struct inode *dir) 26int jffs2_init_security(struct inode *inode, struct inode *dir,
27 const struct qstr *qstr)
27{ 28{
28 int rc; 29 int rc;
29 size_t len; 30 size_t len;
30 void *value; 31 void *value;
31 char *name; 32 char *name;
32 33
33 rc = security_inode_init_security(inode, dir, &name, &value, &len); 34 rc = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
34 if (rc) { 35 if (rc) {
35 if (rc == -EOPNOTSUPP) 36 if (rc == -EOPNOTSUPP)
36 return 0; 37 return 0;
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index c819eb0e982d..30d175b6d290 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -424,7 +424,9 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
424 return ret; 424 return ret;
425} 425}
426 426
427int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const char *name, int namelen) 427int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
428 struct jffs2_inode_info *f, struct jffs2_raw_inode *ri,
429 const struct qstr *qstr)
428{ 430{
429 struct jffs2_raw_dirent *rd; 431 struct jffs2_raw_dirent *rd;
430 struct jffs2_full_dnode *fn; 432 struct jffs2_full_dnode *fn;
@@ -466,15 +468,15 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
466 mutex_unlock(&f->sem); 468 mutex_unlock(&f->sem);
467 jffs2_complete_reservation(c); 469 jffs2_complete_reservation(c);
468 470
469 ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode); 471 ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode, qstr);
470 if (ret) 472 if (ret)
471 return ret; 473 return ret;
472 ret = jffs2_init_acl_post(&f->vfs_inode); 474 ret = jffs2_init_acl_post(&f->vfs_inode);
473 if (ret) 475 if (ret)
474 return ret; 476 return ret;
475 477
476 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, 478 ret = jffs2_reserve_space(c, sizeof(*rd)+qstr->len, &alloclen,
477 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 479 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(qstr->len));
478 480
479 if (ret) { 481 if (ret) {
480 /* Eep. */ 482 /* Eep. */
@@ -493,19 +495,19 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
493 495
494 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 496 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
495 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT); 497 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
496 rd->totlen = cpu_to_je32(sizeof(*rd) + namelen); 498 rd->totlen = cpu_to_je32(sizeof(*rd) + qstr->len);
497 rd->hdr_crc = cpu_to_je32(crc32(0, rd, sizeof(struct jffs2_unknown_node)-4)); 499 rd->hdr_crc = cpu_to_je32(crc32(0, rd, sizeof(struct jffs2_unknown_node)-4));
498 500
499 rd->pino = cpu_to_je32(dir_f->inocache->ino); 501 rd->pino = cpu_to_je32(dir_f->inocache->ino);
500 rd->version = cpu_to_je32(++dir_f->highest_version); 502 rd->version = cpu_to_je32(++dir_f->highest_version);
501 rd->ino = ri->ino; 503 rd->ino = ri->ino;
502 rd->mctime = ri->ctime; 504 rd->mctime = ri->ctime;
503 rd->nsize = namelen; 505 rd->nsize = qstr->len;
504 rd->type = DT_REG; 506 rd->type = DT_REG;
505 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); 507 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
506 rd->name_crc = cpu_to_je32(crc32(0, name, namelen)); 508 rd->name_crc = cpu_to_je32(crc32(0, qstr->name, qstr->len));
507 509
508 fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_NORMAL); 510 fd = jffs2_write_dirent(c, dir_f, rd, qstr->name, qstr->len, ALLOC_NORMAL);
509 511
510 jffs2_free_raw_dirent(rd); 512 jffs2_free_raw_dirent(rd);
511 513
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index cf4f5759b42b..7be4beb306f3 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -121,10 +121,11 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
121#endif /* CONFIG_JFFS2_FS_XATTR */ 121#endif /* CONFIG_JFFS2_FS_XATTR */
122 122
123#ifdef CONFIG_JFFS2_FS_SECURITY 123#ifdef CONFIG_JFFS2_FS_SECURITY
124extern int jffs2_init_security(struct inode *inode, struct inode *dir); 124extern int jffs2_init_security(struct inode *inode, struct inode *dir,
125 const struct qstr *qstr);
125extern const struct xattr_handler jffs2_security_xattr_handler; 126extern const struct xattr_handler jffs2_security_xattr_handler;
126#else 127#else
127#define jffs2_init_security(inode,dir) (0) 128#define jffs2_init_security(inode,dir,qstr) (0)
128#endif /* CONFIG_JFFS2_FS_SECURITY */ 129#endif /* CONFIG_JFFS2_FS_SECURITY */
129 130
130#endif /* _JFFS2_FS_XATTR_H_ */ 131#endif /* _JFFS2_FS_XATTR_H_ */
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index 88b6cc535bf2..e9e100fd7c09 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -62,10 +62,11 @@ extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
62extern int jfs_removexattr(struct dentry *, const char *); 62extern int jfs_removexattr(struct dentry *, const char *);
63 63
64#ifdef CONFIG_JFS_SECURITY 64#ifdef CONFIG_JFS_SECURITY
65extern int jfs_init_security(tid_t, struct inode *, struct inode *); 65extern int jfs_init_security(tid_t, struct inode *, struct inode *,
66 const struct qstr *);
66#else 67#else
67static inline int jfs_init_security(tid_t tid, struct inode *inode, 68static inline int jfs_init_security(tid_t tid, struct inode *inode,
68 struct inode *dir) 69 struct inode *dir, const struct qstr *qstr)
69{ 70{
70 return 0; 71 return 0;
71} 72}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead850ddb6..eaaf2b511e89 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -115,7 +115,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
115 if (rc) 115 if (rc)
116 goto out3; 116 goto out3;
117 117
118 rc = jfs_init_security(tid, ip, dip); 118 rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
119 if (rc) { 119 if (rc) {
120 txAbort(tid, 0); 120 txAbort(tid, 0);
121 goto out3; 121 goto out3;
@@ -253,7 +253,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
253 if (rc) 253 if (rc)
254 goto out3; 254 goto out3;
255 255
256 rc = jfs_init_security(tid, ip, dip); 256 rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
257 if (rc) { 257 if (rc) {
258 txAbort(tid, 0); 258 txAbort(tid, 0);
259 goto out3; 259 goto out3;
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
809 if (ip->i_nlink == JFS_LINK_MAX) 809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK; 810 return -EMLINK;
811 811
812 if (ip->i_nlink == 0)
813 return -ENOENT;
814
815 dquot_initialize(dir); 812 dquot_initialize(dir);
816 813
817 tid = txBegin(ip->i_sb, 0); 814 tid = txBegin(ip->i_sb, 0);
@@ -932,7 +929,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
932 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); 929 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
933 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); 930 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
934 931
935 rc = jfs_init_security(tid, ip, dip); 932 rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
936 if (rc) 933 if (rc)
937 goto out3; 934 goto out3;
938 935
@@ -1395,7 +1392,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1395 if (rc) 1392 if (rc)
1396 goto out3; 1393 goto out3;
1397 1394
1398 rc = jfs_init_security(tid, ip, dir); 1395 rc = jfs_init_security(tid, ip, dir, &dentry->d_name);
1399 if (rc) { 1396 if (rc) {
1400 txAbort(tid, 0); 1397 txAbort(tid, 0);
1401 goto out3; 1398 goto out3;
@@ -1600,7 +1597,7 @@ out:
1600 1597
1601static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) 1598static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1602{ 1599{
1603 if (nd->flags & LOOKUP_RCU) 1600 if (nd && nd->flags & LOOKUP_RCU)
1604 return -ECHILD; 1601 return -ECHILD;
1605 /* 1602 /*
1606 * This is not negative dentry. Always valid. 1603 * This is not negative dentry. Always valid.
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 2d7f165d0f1d..3fa4c32272df 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -1091,7 +1091,8 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
1091} 1091}
1092 1092
1093#ifdef CONFIG_JFS_SECURITY 1093#ifdef CONFIG_JFS_SECURITY
1094int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir) 1094int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir,
1095 const struct qstr *qstr)
1095{ 1096{
1096 int rc; 1097 int rc;
1097 size_t len; 1098 size_t len;
@@ -1099,7 +1100,8 @@ int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
1099 char *suffix; 1100 char *suffix;
1100 char *name; 1101 char *name;
1101 1102
1102 rc = security_inode_init_security(inode, dir, &suffix, &value, &len); 1103 rc = security_inode_init_security(inode, dir, qstr, &suffix, &value,
1104 &len);
1103 if (rc) { 1105 if (rc) {
1104 if (rc == -EOPNOTSUPP) 1106 if (rc == -EOPNOTSUPP)
1105 return 0; 1107 return 0;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdbf..6e6777f1b4b2 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
213 new_de = minix_find_entry(new_dentry, &new_page); 213 new_de = minix_find_entry(new_dentry, &new_page);
214 if (!new_de) 214 if (!new_de)
215 goto out_dir; 215 goto out_dir;
216 inode_inc_link_count(old_inode);
217 minix_set_link(new_de, new_page, old_inode); 216 minix_set_link(new_de, new_page, old_inode);
218 new_inode->i_ctime = CURRENT_TIME_SEC; 217 new_inode->i_ctime = CURRENT_TIME_SEC;
219 if (dir_de) 218 if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
225 if (new_dir->i_nlink >= info->s_link_max) 224 if (new_dir->i_nlink >= info->s_link_max)
226 goto out_dir; 225 goto out_dir;
227 } 226 }
228 inode_inc_link_count(old_inode);
229 err = minix_add_link(new_dentry, old_inode); 227 err = minix_add_link(new_dentry, old_inode);
230 if (err) { 228 if (err)
231 inode_dec_link_count(old_inode);
232 goto out_dir; 229 goto out_dir;
233 }
234 if (dir_de) 230 if (dir_de)
235 inode_inc_link_count(new_dir); 231 inode_inc_link_count(new_dir);
236 } 232 }
237 233
238 minix_delete_entry(old_de, old_page); 234 minix_delete_entry(old_de, old_page);
239 inode_dec_link_count(old_inode); 235 mark_inode_dirty(old_inode);
240 236
241 if (dir_de) { 237 if (dir_de) {
242 minix_set_link(dir_de, dir_page, new_dir); 238 minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 9e701e28a329..b912b7abe747 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139char * getname(const char __user * filename) 139static char *getname_flags(const char __user * filename, int flags)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 __putname(tmp); 150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 result = ERR_PTR(retval); 151 __putname(tmp);
152 result = ERR_PTR(retval);
153 }
152 } 154 }
153 } 155 }
154 audit_getname(result); 156 audit_getname(result);
155 return result; 157 return result;
156} 158}
157 159
160char *getname(const char __user * filename)
161{
162 return getname_flags(filename, 0);
163}
164
158#ifdef CONFIG_AUDITSYSCALL 165#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name) 166void putname(const char *name)
160{ 167{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
401{ 408{
402 struct fs_struct *fs = current->fs; 409 struct fs_struct *fs = current->fs;
403 struct dentry *dentry = nd->path.dentry; 410 struct dentry *dentry = nd->path.dentry;
411 int want_root = 0;
404 412
405 BUG_ON(!(nd->flags & LOOKUP_RCU)); 413 BUG_ON(!(nd->flags & LOOKUP_RCU));
406 if (nd->root.mnt) { 414 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
415 want_root = 1;
407 spin_lock(&fs->lock); 416 spin_lock(&fs->lock);
408 if (nd->root.mnt != fs->root.mnt || 417 if (nd->root.mnt != fs->root.mnt ||
409 nd->root.dentry != fs->root.dentry) 418 nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
414 goto err; 423 goto err;
415 BUG_ON(nd->inode != dentry->d_inode); 424 BUG_ON(nd->inode != dentry->d_inode);
416 spin_unlock(&dentry->d_lock); 425 spin_unlock(&dentry->d_lock);
417 if (nd->root.mnt) { 426 if (want_root) {
418 path_get(&nd->root); 427 path_get(&nd->root);
419 spin_unlock(&fs->lock); 428 spin_unlock(&fs->lock);
420 } 429 }
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
427err: 436err:
428 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
429err_root: 438err_root:
430 if (nd->root.mnt) 439 if (want_root)
431 spin_unlock(&fs->lock); 440 spin_unlock(&fs->lock);
432 return -ECHILD; 441 return -ECHILD;
433} 442}
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
454{ 463{
455 struct fs_struct *fs = current->fs; 464 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 465 struct dentry *parent = nd->path.dentry;
466 int want_root = 0;
457 467
458 BUG_ON(!(nd->flags & LOOKUP_RCU)); 468 BUG_ON(!(nd->flags & LOOKUP_RCU));
459 if (nd->root.mnt) { 469 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
470 want_root = 1;
460 spin_lock(&fs->lock); 471 spin_lock(&fs->lock);
461 if (nd->root.mnt != fs->root.mnt || 472 if (nd->root.mnt != fs->root.mnt ||
462 nd->root.dentry != fs->root.dentry) 473 nd->root.dentry != fs->root.dentry)
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
476 parent->d_count++; 487 parent->d_count++;
477 spin_unlock(&dentry->d_lock); 488 spin_unlock(&dentry->d_lock);
478 spin_unlock(&parent->d_lock); 489 spin_unlock(&parent->d_lock);
479 if (nd->root.mnt) { 490 if (want_root) {
480 path_get(&nd->root); 491 path_get(&nd->root);
481 spin_unlock(&fs->lock); 492 spin_unlock(&fs->lock);
482 } 493 }
@@ -490,7 +501,7 @@ err:
490 spin_unlock(&dentry->d_lock); 501 spin_unlock(&dentry->d_lock);
491 spin_unlock(&parent->d_lock); 502 spin_unlock(&parent->d_lock);
492err_root: 503err_root:
493 if (nd->root.mnt) 504 if (want_root)
494 spin_unlock(&fs->lock); 505 spin_unlock(&fs->lock);
495 return -ECHILD; 506 return -ECHILD;
496} 507}
@@ -498,8 +509,16 @@ err_root:
498/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 509/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
499static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) 510static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
500{ 511{
501 if (nd->flags & LOOKUP_RCU) 512 if (nd->flags & LOOKUP_RCU) {
502 return nameidata_dentry_drop_rcu(nd, dentry); 513 if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
514 nd->flags &= ~LOOKUP_RCU;
515 if (!(nd->flags & LOOKUP_ROOT))
516 nd->root.mnt = NULL;
517 rcu_read_unlock();
518 br_read_unlock(vfsmount_lock);
519 return -ECHILD;
520 }
521 }
503 return 0; 522 return 0;
504} 523}
505 524
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
518 537
519 BUG_ON(!(nd->flags & LOOKUP_RCU)); 538 BUG_ON(!(nd->flags & LOOKUP_RCU));
520 nd->flags &= ~LOOKUP_RCU; 539 nd->flags &= ~LOOKUP_RCU;
521 nd->root.mnt = NULL; 540 if (!(nd->flags & LOOKUP_ROOT))
541 nd->root.mnt = NULL;
522 spin_lock(&dentry->d_lock); 542 spin_lock(&dentry->d_lock);
523 if (!__d_rcu_to_refcount(dentry, nd->seq)) 543 if (!__d_rcu_to_refcount(dentry, nd->seq))
524 goto err_unlock; 544 goto err_unlock;
@@ -539,14 +559,6 @@ err_unlock:
539 return -ECHILD; 559 return -ECHILD;
540} 560}
541 561
542/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
543static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
544{
545 if (likely(nd->flags & LOOKUP_RCU))
546 return nameidata_drop_rcu_last(nd);
547 return 0;
548}
549
550/** 562/**
551 * release_open_intent - free up open intent resources 563 * release_open_intent - free up open intent resources
552 * @nd: pointer to nameidata 564 * @nd: pointer to nameidata
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
590 return dentry; 602 return dentry;
591} 603}
592 604
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
616static inline int need_reval_dot(struct dentry *dentry)
617{
618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
619 return 0;
620
621 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
622 return 0;
623
624 return 1;
625}
626
627/* 605/*
628 * force_reval_path - force revalidation of a dentry 606 * handle_reval_path - force revalidation of a dentry
629 * 607 *
630 * In some situations the path walking code will trust dentries without 608 * In some situations the path walking code will trust dentries without
631 * revalidating them. This causes problems for filesystems that depend on 609 * revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
639 * invalidate the dentry. It's up to the caller to handle putting references 617 * invalidate the dentry. It's up to the caller to handle putting references
640 * to the path if necessary. 618 * to the path if necessary.
641 */ 619 */
642static int 620static inline int handle_reval_path(struct nameidata *nd)
643force_reval_path(struct path *path, struct nameidata *nd)
644{ 621{
622 struct dentry *dentry = nd->path.dentry;
645 int status; 623 int status;
646 struct dentry *dentry = path->dentry;
647 624
648 /* 625 if (likely(!(nd->flags & LOOKUP_JUMPED)))
649 * only check on filesystems where it's possible for the dentry to 626 return 0;
650 * become stale. 627
651 */ 628 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
652 if (!need_reval_dot(dentry)) 629 return 0;
630
631 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
653 return 0; 632 return 0;
654 633
634 /* Note: we do not d_invalidate() */
655 status = d_revalidate(dentry, nd); 635 status = d_revalidate(dentry, nd);
656 if (status > 0) 636 if (status > 0)
657 return 0; 637 return 0;
658 638
659 if (!status) { 639 if (!status)
660 d_invalidate(dentry);
661 status = -ESTALE; 640 status = -ESTALE;
662 } 641
663 return status; 642 return status;
664} 643}
665 644
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
728 path_put(&nd->path); 707 path_put(&nd->path);
729 nd->path = nd->root; 708 nd->path = nd->root;
730 path_get(&nd->root); 709 path_get(&nd->root);
710 nd->flags |= LOOKUP_JUMPED;
731 } 711 }
732 nd->inode = nd->path.dentry->d_inode; 712 nd->inode = nd->path.dentry->d_inode;
733 713
@@ -757,19 +737,42 @@ static inline void path_to_nameidata(const struct path *path,
757 nd->path.dentry = path->dentry; 737 nd->path.dentry = path->dentry;
758} 738}
759 739
740static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
741{
742 struct inode *inode = link->dentry->d_inode;
743 if (!IS_ERR(cookie) && inode->i_op->put_link)
744 inode->i_op->put_link(link->dentry, nd, cookie);
745 path_put(link);
746}
747
760static __always_inline int 748static __always_inline int
761__do_follow_link(const struct path *link, struct nameidata *nd, void **p) 749follow_link(struct path *link, struct nameidata *nd, void **p)
762{ 750{
763 int error; 751 int error;
764 struct dentry *dentry = link->dentry; 752 struct dentry *dentry = link->dentry;
765 753
766 BUG_ON(nd->flags & LOOKUP_RCU); 754 BUG_ON(nd->flags & LOOKUP_RCU);
767 755
756 if (link->mnt == nd->path.mnt)
757 mntget(link->mnt);
758
759 if (unlikely(current->total_link_count >= 40)) {
760 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
761 path_put(&nd->path);
762 return -ELOOP;
763 }
764 cond_resched();
765 current->total_link_count++;
766
768 touch_atime(link->mnt, dentry); 767 touch_atime(link->mnt, dentry);
769 nd_set_link(nd, NULL); 768 nd_set_link(nd, NULL);
770 769
771 if (link->mnt == nd->path.mnt) 770 error = security_inode_follow_link(link->dentry, nd);
772 mntget(link->mnt); 771 if (error) {
772 *p = ERR_PTR(error); /* no ->put_link(), please */
773 path_put(&nd->path);
774 return error;
775 }
773 776
774 nd->last_type = LAST_BIND; 777 nd->last_type = LAST_BIND;
775 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 778 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -780,55 +783,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
780 if (s) 783 if (s)
781 error = __vfs_follow_link(nd, s); 784 error = __vfs_follow_link(nd, s);
782 else if (nd->last_type == LAST_BIND) { 785 else if (nd->last_type == LAST_BIND) {
783 error = force_reval_path(&nd->path, nd); 786 nd->flags |= LOOKUP_JUMPED;
784 if (error) 787 nd->inode = nd->path.dentry->d_inode;
788 if (nd->inode->i_op->follow_link) {
789 /* stepped on a _really_ weird one */
785 path_put(&nd->path); 790 path_put(&nd->path);
791 error = -ELOOP;
792 }
786 } 793 }
787 } 794 }
788 return error; 795 return error;
789} 796}
790 797
791/*
792 * This limits recursive symlink follows to 8, while
793 * limiting consecutive symlinks to 40.
794 *
795 * Without that kind of total limit, nasty chains of consecutive
796 * symlinks can cause almost arbitrarily long lookups.
797 */
798static inline int do_follow_link(struct path *path, struct nameidata *nd)
799{
800 void *cookie;
801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806
807 if (current->link_count >= MAX_NESTED_LINKS)
808 goto loop;
809 if (current->total_link_count >= 40)
810 goto loop;
811 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
812 cond_resched();
813 err = security_inode_follow_link(path->dentry, nd);
814 if (err)
815 goto loop;
816 current->link_count++;
817 current->total_link_count++;
818 nd->depth++;
819 err = __do_follow_link(path, nd, &cookie);
820 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
821 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
822 path_put(path);
823 current->link_count--;
824 nd->depth--;
825 return err;
826loop:
827 path_put_conditional(path, nd);
828 path_put(&nd->path);
829 return err;
830}
831
832static int follow_up_rcu(struct path *path) 798static int follow_up_rcu(struct path *path)
833{ 799{
834 struct vfsmount *parent; 800 struct vfsmount *parent;
@@ -1067,7 +1033,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1067 1033
1068 seq = read_seqcount_begin(&parent->d_seq); 1034 seq = read_seqcount_begin(&parent->d_seq);
1069 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1035 if (read_seqcount_retry(&old->d_seq, nd->seq))
1070 return -ECHILD; 1036 goto failed;
1071 inode = parent->d_inode; 1037 inode = parent->d_inode;
1072 nd->path.dentry = parent; 1038 nd->path.dentry = parent;
1073 nd->seq = seq; 1039 nd->seq = seq;
@@ -1080,8 +1046,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1080 } 1046 }
1081 __follow_mount_rcu(nd, &nd->path, &inode, true); 1047 __follow_mount_rcu(nd, &nd->path, &inode, true);
1082 nd->inode = inode; 1048 nd->inode = inode;
1083
1084 return 0; 1049 return 0;
1050
1051failed:
1052 nd->flags &= ~LOOKUP_RCU;
1053 if (!(nd->flags & LOOKUP_ROOT))
1054 nd->root.mnt = NULL;
1055 rcu_read_unlock();
1056 br_read_unlock(vfsmount_lock);
1057 return -ECHILD;
1085} 1058}
1086 1059
1087/* 1060/*
@@ -1215,68 +1188,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1215{ 1188{
1216 struct vfsmount *mnt = nd->path.mnt; 1189 struct vfsmount *mnt = nd->path.mnt;
1217 struct dentry *dentry, *parent = nd->path.dentry; 1190 struct dentry *dentry, *parent = nd->path.dentry;
1218 struct inode *dir; 1191 int need_reval = 1;
1192 int status = 1;
1219 int err; 1193 int err;
1220 1194
1221 /* 1195 /*
1222 * See if the low-level filesystem might want
1223 * to use its own hash..
1224 */
1225 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1226 err = parent->d_op->d_hash(parent, nd->inode, name);
1227 if (err < 0)
1228 return err;
1229 }
1230
1231 /*
1232 * Rename seqlock is not required here because in the off chance 1196 * Rename seqlock is not required here because in the off chance
1233 * of a false negative due to a concurrent rename, we're going to 1197 * of a false negative due to a concurrent rename, we're going to
1234 * do the non-racy lookup, below. 1198 * do the non-racy lookup, below.
1235 */ 1199 */
1236 if (nd->flags & LOOKUP_RCU) { 1200 if (nd->flags & LOOKUP_RCU) {
1237 unsigned seq; 1201 unsigned seq;
1238
1239 *inode = nd->inode; 1202 *inode = nd->inode;
1240 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1203 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1241 if (!dentry) { 1204 if (!dentry)
1242 if (nameidata_drop_rcu(nd)) 1205 goto unlazy;
1243 return -ECHILD; 1206
1244 goto need_lookup;
1245 }
1246 /* Memory barrier in read_seqcount_begin of child is enough */ 1207 /* Memory barrier in read_seqcount_begin of child is enough */
1247 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1208 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1248 return -ECHILD; 1209 return -ECHILD;
1249
1250 nd->seq = seq; 1210 nd->seq = seq;
1211
1251 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1212 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1252 dentry = do_revalidate_rcu(dentry, nd); 1213 status = d_revalidate(dentry, nd);
1253 if (!dentry) 1214 if (unlikely(status <= 0)) {
1254 goto need_lookup; 1215 if (status != -ECHILD)
1255 if (IS_ERR(dentry)) 1216 need_reval = 0;
1256 goto fail; 1217 goto unlazy;
1257 if (!(nd->flags & LOOKUP_RCU)) 1218 }
1258 goto done;
1259 } 1219 }
1260 path->mnt = mnt; 1220 path->mnt = mnt;
1261 path->dentry = dentry; 1221 path->dentry = dentry;
1262 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1222 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1263 return 0; 1223 return 0;
1264 if (nameidata_drop_rcu(nd)) 1224unlazy:
1265 return -ECHILD; 1225 if (dentry) {
1266 /* fallthru */ 1226 if (nameidata_dentry_drop_rcu(nd, dentry))
1227 return -ECHILD;
1228 } else {
1229 if (nameidata_drop_rcu(nd))
1230 return -ECHILD;
1231 }
1232 } else {
1233 dentry = __d_lookup(parent, name);
1267 } 1234 }
1268 dentry = __d_lookup(parent, name); 1235
1269 if (!dentry) 1236retry:
1270 goto need_lookup; 1237 if (unlikely(!dentry)) {
1271found: 1238 struct inode *dir = parent->d_inode;
1272 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1239 BUG_ON(nd->inode != dir);
1273 dentry = do_revalidate(dentry, nd); 1240
1274 if (!dentry) 1241 mutex_lock(&dir->i_mutex);
1275 goto need_lookup; 1242 dentry = d_lookup(parent, name);
1276 if (IS_ERR(dentry)) 1243 if (likely(!dentry)) {
1277 goto fail; 1244 dentry = d_alloc_and_lookup(parent, name, nd);
1245 if (IS_ERR(dentry)) {
1246 mutex_unlock(&dir->i_mutex);
1247 return PTR_ERR(dentry);
1248 }
1249 /* known good */
1250 need_reval = 0;
1251 status = 1;
1252 }
1253 mutex_unlock(&dir->i_mutex);
1278 } 1254 }
1279done: 1255 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1256 status = d_revalidate(dentry, nd);
1257 if (unlikely(status <= 0)) {
1258 if (status < 0) {
1259 dput(dentry);
1260 return status;
1261 }
1262 if (!d_invalidate(dentry)) {
1263 dput(dentry);
1264 dentry = NULL;
1265 need_reval = 1;
1266 goto retry;
1267 }
1268 }
1269
1280 path->mnt = mnt; 1270 path->mnt = mnt;
1281 path->dentry = dentry; 1271 path->dentry = dentry;
1282 err = follow_managed(path, nd->flags); 1272 err = follow_managed(path, nd->flags);
@@ -1286,39 +1276,113 @@ done:
1286 } 1276 }
1287 *inode = path->dentry->d_inode; 1277 *inode = path->dentry->d_inode;
1288 return 0; 1278 return 0;
1279}
1280
1281static inline int may_lookup(struct nameidata *nd)
1282{
1283 if (nd->flags & LOOKUP_RCU) {
1284 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1285 if (err != -ECHILD)
1286 return err;
1287 if (nameidata_drop_rcu(nd))
1288 return -ECHILD;
1289 }
1290 return exec_permission(nd->inode, 0);
1291}
1289 1292
1290need_lookup: 1293static inline int handle_dots(struct nameidata *nd, int type)
1291 dir = parent->d_inode; 1294{
1292 BUG_ON(nd->inode != dir); 1295 if (type == LAST_DOTDOT) {
1296 if (nd->flags & LOOKUP_RCU) {
1297 if (follow_dotdot_rcu(nd))
1298 return -ECHILD;
1299 } else
1300 follow_dotdot(nd);
1301 }
1302 return 0;
1303}
1293 1304
1294 mutex_lock(&dir->i_mutex); 1305static void terminate_walk(struct nameidata *nd)
1295 /* 1306{
1296 * First re-do the cached lookup just in case it was created 1307 if (!(nd->flags & LOOKUP_RCU)) {
1297 * while we waited for the directory semaphore, or the first 1308 path_put(&nd->path);
1298 * lookup failed due to an unrelated rename. 1309 } else {
1299 * 1310 nd->flags &= ~LOOKUP_RCU;
1300 * This could use version numbering or similar to avoid unnecessary 1311 if (!(nd->flags & LOOKUP_ROOT))
1301 * cache lookups, but then we'd have to do the first lookup in the 1312 nd->root.mnt = NULL;
1302 * non-racy way. However in the common case here, everything should 1313 rcu_read_unlock();
1303 * be hot in cache, so would it be a big win? 1314 br_read_unlock(vfsmount_lock);
1304 */
1305 dentry = d_lookup(parent, name);
1306 if (likely(!dentry)) {
1307 dentry = d_alloc_and_lookup(parent, name, nd);
1308 mutex_unlock(&dir->i_mutex);
1309 if (IS_ERR(dentry))
1310 goto fail;
1311 goto done;
1312 } 1315 }
1316}
1317
1318static inline int walk_component(struct nameidata *nd, struct path *path,
1319 struct qstr *name, int type, int follow)
1320{
1321 struct inode *inode;
1322 int err;
1313 /* 1323 /*
1314 * Uhhuh! Nasty case: the cache was re-populated while 1324 * "." and ".." are special - ".." especially so because it has
1315 * we waited on the semaphore. Need to revalidate. 1325 * to be able to know about the current root directory and
1326 * parent relationships.
1316 */ 1327 */
1317 mutex_unlock(&dir->i_mutex); 1328 if (unlikely(type != LAST_NORM))
1318 goto found; 1329 return handle_dots(nd, type);
1330 err = do_lookup(nd, name, path, &inode);
1331 if (unlikely(err)) {
1332 terminate_walk(nd);
1333 return err;
1334 }
1335 if (!inode) {
1336 path_to_nameidata(path, nd);
1337 terminate_walk(nd);
1338 return -ENOENT;
1339 }
1340 if (unlikely(inode->i_op->follow_link) && follow) {
1341 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
1342 return -ECHILD;
1343 BUG_ON(inode != path->dentry->d_inode);
1344 return 1;
1345 }
1346 path_to_nameidata(path, nd);
1347 nd->inode = inode;
1348 return 0;
1349}
1319 1350
1320fail: 1351/*
1321 return PTR_ERR(dentry); 1352 * This limits recursive symlink follows to 8, while
1353 * limiting consecutive symlinks to 40.
1354 *
1355 * Without that kind of total limit, nasty chains of consecutive
1356 * symlinks can cause almost arbitrarily long lookups.
1357 */
1358static inline int nested_symlink(struct path *path, struct nameidata *nd)
1359{
1360 int res;
1361
1362 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1363 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1364 path_put_conditional(path, nd);
1365 path_put(&nd->path);
1366 return -ELOOP;
1367 }
1368
1369 nd->depth++;
1370 current->link_count++;
1371
1372 do {
1373 struct path link = *path;
1374 void *cookie;
1375
1376 res = follow_link(&link, nd, &cookie);
1377 if (!res)
1378 res = walk_component(nd, path, &nd->last,
1379 nd->last_type, LOOKUP_FOLLOW);
1380 put_link(nd, &link, cookie);
1381 } while (res > 0);
1382
1383 current->link_count--;
1384 nd->depth--;
1385 return res;
1322} 1386}
1323 1387
1324/* 1388/*
@@ -1338,30 +1402,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1338 while (*name=='/') 1402 while (*name=='/')
1339 name++; 1403 name++;
1340 if (!*name) 1404 if (!*name)
1341 goto return_reval; 1405 return 0;
1342
1343 if (nd->depth)
1344 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
1345 1406
1346 /* At this point we know we have a real path component. */ 1407 /* At this point we know we have a real path component. */
1347 for(;;) { 1408 for(;;) {
1348 struct inode *inode;
1349 unsigned long hash; 1409 unsigned long hash;
1350 struct qstr this; 1410 struct qstr this;
1351 unsigned int c; 1411 unsigned int c;
1412 int type;
1352 1413
1353 nd->flags |= LOOKUP_CONTINUE; 1414 nd->flags |= LOOKUP_CONTINUE;
1354 if (nd->flags & LOOKUP_RCU) { 1415
1355 err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1416 err = may_lookup(nd);
1356 if (err == -ECHILD) {
1357 if (nameidata_drop_rcu(nd))
1358 return -ECHILD;
1359 goto exec_again;
1360 }
1361 } else {
1362exec_again:
1363 err = exec_permission(nd->inode, 0);
1364 }
1365 if (err) 1417 if (err)
1366 break; 1418 break;
1367 1419
@@ -1377,53 +1429,43 @@ exec_again:
1377 this.len = name - (const char *) this.name; 1429 this.len = name - (const char *) this.name;
1378 this.hash = end_name_hash(hash); 1430 this.hash = end_name_hash(hash);
1379 1431
1432 type = LAST_NORM;
1433 if (this.name[0] == '.') switch (this.len) {
1434 case 2:
1435 if (this.name[1] == '.') {
1436 type = LAST_DOTDOT;
1437 nd->flags |= LOOKUP_JUMPED;
1438 }
1439 break;
1440 case 1:
1441 type = LAST_DOT;
1442 }
1443 if (likely(type == LAST_NORM)) {
1444 struct dentry *parent = nd->path.dentry;
1445 nd->flags &= ~LOOKUP_JUMPED;
1446 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1447 err = parent->d_op->d_hash(parent, nd->inode,
1448 &this);
1449 if (err < 0)
1450 break;
1451 }
1452 }
1453
1380 /* remove trailing slashes? */ 1454 /* remove trailing slashes? */
1381 if (!c) 1455 if (!c)
1382 goto last_component; 1456 goto last_component;
1383 while (*++name == '/'); 1457 while (*++name == '/');
1384 if (!*name) 1458 if (!*name)
1385 goto last_with_slashes; 1459 goto last_component;
1386 1460
1387 /* 1461 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1388 * "." and ".." are special - ".." especially so because it has 1462 if (err < 0)
1389 * to be able to know about the current root directory and 1463 return err;
1390 * parent relationships.
1391 */
1392 if (this.name[0] == '.') switch (this.len) {
1393 default:
1394 break;
1395 case 2:
1396 if (this.name[1] != '.')
1397 break;
1398 if (nd->flags & LOOKUP_RCU) {
1399 if (follow_dotdot_rcu(nd))
1400 return -ECHILD;
1401 } else
1402 follow_dotdot(nd);
1403 /* fallthrough */
1404 case 1:
1405 continue;
1406 }
1407 /* This does the actual lookups.. */
1408 err = do_lookup(nd, &this, &next, &inode);
1409 if (err)
1410 break;
1411 err = -ENOENT;
1412 if (!inode)
1413 goto out_dput;
1414 1464
1415 if (inode->i_op->follow_link) { 1465 if (err) {
1416 BUG_ON(inode != next.dentry->d_inode); 1466 err = nested_symlink(&next, nd);
1417 err = do_follow_link(&next, nd);
1418 if (err) 1467 if (err)
1419 goto return_err; 1468 return err;
1420 nd->inode = nd->path.dentry->d_inode;
1421 err = -ENOENT;
1422 if (!nd->inode)
1423 break;
1424 } else {
1425 path_to_nameidata(&next, nd);
1426 nd->inode = inode;
1427 } 1469 }
1428 err = -ENOTDIR; 1470 err = -ENOTDIR;
1429 if (!nd->inode->i_op->lookup) 1471 if (!nd->inode->i_op->lookup)
@@ -1431,210 +1473,109 @@ exec_again:
1431 continue; 1473 continue;
1432 /* here ends the main loop */ 1474 /* here ends the main loop */
1433 1475
1434last_with_slashes:
1435 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1436last_component: 1476last_component:
1437 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1477 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1438 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1478 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1439 if (lookup_flags & LOOKUP_PARENT)
1440 goto lookup_parent;
1441 if (this.name[0] == '.') switch (this.len) {
1442 default:
1443 break;
1444 case 2:
1445 if (this.name[1] != '.')
1446 break;
1447 if (nd->flags & LOOKUP_RCU) {
1448 if (follow_dotdot_rcu(nd))
1449 return -ECHILD;
1450 } else
1451 follow_dotdot(nd);
1452 /* fallthrough */
1453 case 1:
1454 goto return_reval;
1455 }
1456 err = do_lookup(nd, &this, &next, &inode);
1457 if (err)
1458 break;
1459 if (inode && unlikely(inode->i_op->follow_link) &&
1460 (lookup_flags & LOOKUP_FOLLOW)) {
1461 BUG_ON(inode != next.dentry->d_inode);
1462 err = do_follow_link(&next, nd);
1463 if (err)
1464 goto return_err;
1465 nd->inode = nd->path.dentry->d_inode;
1466 } else {
1467 path_to_nameidata(&next, nd);
1468 nd->inode = inode;
1469 }
1470 err = -ENOENT;
1471 if (!nd->inode)
1472 break;
1473 if (lookup_flags & LOOKUP_DIRECTORY) {
1474 err = -ENOTDIR;
1475 if (!nd->inode->i_op->lookup)
1476 break;
1477 }
1478 goto return_base;
1479lookup_parent:
1480 nd->last = this; 1479 nd->last = this;
1481 nd->last_type = LAST_NORM; 1480 nd->last_type = type;
1482 if (this.name[0] != '.')
1483 goto return_base;
1484 if (this.len == 1)
1485 nd->last_type = LAST_DOT;
1486 else if (this.len == 2 && this.name[1] == '.')
1487 nd->last_type = LAST_DOTDOT;
1488 else
1489 goto return_base;
1490return_reval:
1491 /*
1492 * We bypassed the ordinary revalidation routines.
1493 * We may need to check the cached dentry for staleness.
1494 */
1495 if (need_reval_dot(nd->path.dentry)) {
1496 if (nameidata_drop_rcu_last_maybe(nd))
1497 return -ECHILD;
1498 /* Note: we do not d_invalidate() */
1499 err = d_revalidate(nd->path.dentry, nd);
1500 if (!err)
1501 err = -ESTALE;
1502 if (err < 0)
1503 break;
1504 return 0;
1505 }
1506return_base:
1507 if (nameidata_drop_rcu_last_maybe(nd))
1508 return -ECHILD;
1509 return 0; 1481 return 0;
1510out_dput:
1511 if (!(nd->flags & LOOKUP_RCU))
1512 path_put_conditional(&next, nd);
1513 break;
1514 } 1482 }
1515 if (!(nd->flags & LOOKUP_RCU)) 1483 terminate_walk(nd);
1516 path_put(&nd->path);
1517return_err:
1518 return err; 1484 return err;
1519} 1485}
1520 1486
1521static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1487static int path_init(int dfd, const char *name, unsigned int flags,
1522{ 1488 struct nameidata *nd, struct file **fp)
1523 current->total_link_count = 0;
1524
1525 return link_path_walk(name, nd);
1526}
1527
1528static inline int path_walk_simple(const char *name, struct nameidata *nd)
1529{
1530 current->total_link_count = 0;
1531
1532 return link_path_walk(name, nd);
1533}
1534
1535static int path_walk(const char *name, struct nameidata *nd)
1536{
1537 struct path save = nd->path;
1538 int result;
1539
1540 current->total_link_count = 0;
1541
1542 /* make sure the stuff we saved doesn't go away */
1543 path_get(&save);
1544
1545 result = link_path_walk(name, nd);
1546 if (result == -ESTALE) {
1547 /* nd->path had been dropped */
1548 current->total_link_count = 0;
1549 nd->path = save;
1550 path_get(&nd->path);
1551 nd->flags |= LOOKUP_REVAL;
1552 result = link_path_walk(name, nd);
1553 }
1554
1555 path_put(&save);
1556
1557 return result;
1558}
1559
1560static void path_finish_rcu(struct nameidata *nd)
1561{
1562 if (nd->flags & LOOKUP_RCU) {
1563 /* RCU dangling. Cancel it. */
1564 nd->flags &= ~LOOKUP_RCU;
1565 nd->root.mnt = NULL;
1566 rcu_read_unlock();
1567 br_read_unlock(vfsmount_lock);
1568 }
1569 if (nd->file)
1570 fput(nd->file);
1571}
1572
1573static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1574{ 1489{
1575 int retval = 0; 1490 int retval = 0;
1576 int fput_needed; 1491 int fput_needed;
1577 struct file *file; 1492 struct file *file;
1578 1493
1579 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1494 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1580 nd->flags = flags | LOOKUP_RCU; 1495 nd->flags = flags | LOOKUP_JUMPED;
1581 nd->depth = 0; 1496 nd->depth = 0;
1497 if (flags & LOOKUP_ROOT) {
1498 struct inode *inode = nd->root.dentry->d_inode;
1499 if (*name) {
1500 if (!inode->i_op->lookup)
1501 return -ENOTDIR;
1502 retval = inode_permission(inode, MAY_EXEC);
1503 if (retval)
1504 return retval;
1505 }
1506 nd->path = nd->root;
1507 nd->inode = inode;
1508 if (flags & LOOKUP_RCU) {
1509 br_read_lock(vfsmount_lock);
1510 rcu_read_lock();
1511 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1512 } else {
1513 path_get(&nd->path);
1514 }
1515 return 0;
1516 }
1517
1582 nd->root.mnt = NULL; 1518 nd->root.mnt = NULL;
1583 nd->file = NULL;
1584 1519
1585 if (*name=='/') { 1520 if (*name=='/') {
1586 struct fs_struct *fs = current->fs; 1521 if (flags & LOOKUP_RCU) {
1587 unsigned seq; 1522 br_read_lock(vfsmount_lock);
1588 1523 rcu_read_lock();
1589 br_read_lock(vfsmount_lock); 1524 set_root_rcu(nd);
1590 rcu_read_lock(); 1525 } else {
1591 1526 set_root(nd);
1592 do { 1527 path_get(&nd->root);
1593 seq = read_seqcount_begin(&fs->seq); 1528 }
1594 nd->root = fs->root; 1529 nd->path = nd->root;
1595 nd->path = nd->root;
1596 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1597 } while (read_seqcount_retry(&fs->seq, seq));
1598
1599 } else if (dfd == AT_FDCWD) { 1530 } else if (dfd == AT_FDCWD) {
1600 struct fs_struct *fs = current->fs; 1531 if (flags & LOOKUP_RCU) {
1601 unsigned seq; 1532 struct fs_struct *fs = current->fs;
1602 1533 unsigned seq;
1603 br_read_lock(vfsmount_lock);
1604 rcu_read_lock();
1605 1534
1606 do { 1535 br_read_lock(vfsmount_lock);
1607 seq = read_seqcount_begin(&fs->seq); 1536 rcu_read_lock();
1608 nd->path = fs->pwd;
1609 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1610 } while (read_seqcount_retry(&fs->seq, seq));
1611 1537
1538 do {
1539 seq = read_seqcount_begin(&fs->seq);
1540 nd->path = fs->pwd;
1541 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1542 } while (read_seqcount_retry(&fs->seq, seq));
1543 } else {
1544 get_fs_pwd(current->fs, &nd->path);
1545 }
1612 } else { 1546 } else {
1613 struct dentry *dentry; 1547 struct dentry *dentry;
1614 1548
1615 file = fget_light(dfd, &fput_needed); 1549 file = fget_raw_light(dfd, &fput_needed);
1616 retval = -EBADF; 1550 retval = -EBADF;
1617 if (!file) 1551 if (!file)
1618 goto out_fail; 1552 goto out_fail;
1619 1553
1620 dentry = file->f_path.dentry; 1554 dentry = file->f_path.dentry;
1621 1555
1622 retval = -ENOTDIR; 1556 if (*name) {
1623 if (!S_ISDIR(dentry->d_inode->i_mode)) 1557 retval = -ENOTDIR;
1624 goto fput_fail; 1558 if (!S_ISDIR(dentry->d_inode->i_mode))
1559 goto fput_fail;
1625 1560
1626 retval = file_permission(file, MAY_EXEC); 1561 retval = file_permission(file, MAY_EXEC);
1627 if (retval) 1562 if (retval)
1628 goto fput_fail; 1563 goto fput_fail;
1564 }
1629 1565
1630 nd->path = file->f_path; 1566 nd->path = file->f_path;
1631 if (fput_needed) 1567 if (flags & LOOKUP_RCU) {
1632 nd->file = file; 1568 if (fput_needed)
1633 1569 *fp = file;
1634 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1570 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1635 br_read_lock(vfsmount_lock); 1571 br_read_lock(vfsmount_lock);
1636 rcu_read_lock(); 1572 rcu_read_lock();
1573 } else {
1574 path_get(&file->f_path);
1575 fput_light(file, fput_needed);
1576 }
1637 } 1577 }
1578
1638 nd->inode = nd->path.dentry->d_inode; 1579 nd->inode = nd->path.dentry->d_inode;
1639 return 0; 1580 return 0;
1640 1581
@@ -1644,60 +1585,23 @@ out_fail:
1644 return retval; 1585 return retval;
1645} 1586}
1646 1587
1647static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1588static inline int lookup_last(struct nameidata *nd, struct path *path)
1648{ 1589{
1649 int retval = 0; 1590 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1650 int fput_needed; 1591 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1651 struct file *file;
1652
1653 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1654 nd->flags = flags;
1655 nd->depth = 0;
1656 nd->root.mnt = NULL;
1657
1658 if (*name=='/') {
1659 set_root(nd);
1660 nd->path = nd->root;
1661 path_get(&nd->root);
1662 } else if (dfd == AT_FDCWD) {
1663 get_fs_pwd(current->fs, &nd->path);
1664 } else {
1665 struct dentry *dentry;
1666
1667 file = fget_light(dfd, &fput_needed);
1668 retval = -EBADF;
1669 if (!file)
1670 goto out_fail;
1671
1672 dentry = file->f_path.dentry;
1673
1674 retval = -ENOTDIR;
1675 if (!S_ISDIR(dentry->d_inode->i_mode))
1676 goto fput_fail;
1677 1592
1678 retval = file_permission(file, MAY_EXEC); 1593 nd->flags &= ~LOOKUP_PARENT;
1679 if (retval) 1594 return walk_component(nd, path, &nd->last, nd->last_type,
1680 goto fput_fail; 1595 nd->flags & LOOKUP_FOLLOW);
1681
1682 nd->path = file->f_path;
1683 path_get(&file->f_path);
1684
1685 fput_light(file, fput_needed);
1686 }
1687 nd->inode = nd->path.dentry->d_inode;
1688 return 0;
1689
1690fput_fail:
1691 fput_light(file, fput_needed);
1692out_fail:
1693 return retval;
1694} 1596}
1695 1597
1696/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1598/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1697static int do_path_lookup(int dfd, const char *name, 1599static int path_lookupat(int dfd, const char *name,
1698 unsigned int flags, struct nameidata *nd) 1600 unsigned int flags, struct nameidata *nd)
1699{ 1601{
1700 int retval; 1602 struct file *base = NULL;
1603 struct path path;
1604 int err;
1701 1605
1702 /* 1606 /*
1703 * Path walking is largely split up into 2 different synchronisation 1607 * Path walking is largely split up into 2 different synchronisation
@@ -1713,44 +1617,75 @@ static int do_path_lookup(int dfd, const char *name,
1713 * be handled by restarting a traditional ref-walk (which will always 1617 * be handled by restarting a traditional ref-walk (which will always
1714 * be able to complete). 1618 * be able to complete).
1715 */ 1619 */
1716 retval = path_init_rcu(dfd, name, flags, nd); 1620 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1717 if (unlikely(retval)) 1621
1718 return retval; 1622 if (unlikely(err))
1719 retval = path_walk_rcu(name, nd); 1623 return err;
1720 path_finish_rcu(nd); 1624
1721 if (nd->root.mnt) { 1625 current->total_link_count = 0;
1722 path_put(&nd->root); 1626 err = link_path_walk(name, nd);
1723 nd->root.mnt = NULL; 1627
1628 if (!err && !(flags & LOOKUP_PARENT)) {
1629 err = lookup_last(nd, &path);
1630 while (err > 0) {
1631 void *cookie;
1632 struct path link = path;
1633 nd->flags |= LOOKUP_PARENT;
1634 err = follow_link(&link, nd, &cookie);
1635 if (!err)
1636 err = lookup_last(nd, &path);
1637 put_link(nd, &link, cookie);
1638 }
1724 } 1639 }
1725 1640
1726 if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1641 if (nd->flags & LOOKUP_RCU) {
1727 /* slower, locked walk */ 1642 /* went all way through without dropping RCU */
1728 if (retval == -ESTALE) 1643 BUG_ON(err);
1729 flags |= LOOKUP_REVAL; 1644 if (nameidata_drop_rcu_last(nd))
1730 retval = path_init(dfd, name, flags, nd); 1645 err = -ECHILD;
1731 if (unlikely(retval)) 1646 }
1732 return retval; 1647
1733 retval = path_walk(name, nd); 1648 if (!err)
1734 if (nd->root.mnt) { 1649 err = handle_reval_path(nd);
1735 path_put(&nd->root); 1650
1736 nd->root.mnt = NULL; 1651 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1652 if (!nd->inode->i_op->lookup) {
1653 path_put(&nd->path);
1654 return -ENOTDIR;
1737 } 1655 }
1738 } 1656 }
1739 1657
1658 if (base)
1659 fput(base);
1660
1661 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1662 path_put(&nd->root);
1663 nd->root.mnt = NULL;
1664 }
1665 return err;
1666}
1667
1668static int do_path_lookup(int dfd, const char *name,
1669 unsigned int flags, struct nameidata *nd)
1670{
1671 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1672 if (unlikely(retval == -ECHILD))
1673 retval = path_lookupat(dfd, name, flags, nd);
1674 if (unlikely(retval == -ESTALE))
1675 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1676
1740 if (likely(!retval)) { 1677 if (likely(!retval)) {
1741 if (unlikely(!audit_dummy_context())) { 1678 if (unlikely(!audit_dummy_context())) {
1742 if (nd->path.dentry && nd->inode) 1679 if (nd->path.dentry && nd->inode)
1743 audit_inode(name, nd->path.dentry); 1680 audit_inode(name, nd->path.dentry);
1744 } 1681 }
1745 } 1682 }
1746
1747 return retval; 1683 return retval;
1748} 1684}
1749 1685
1750int path_lookup(const char *name, unsigned int flags, 1686int kern_path_parent(const char *name, struct nameidata *nd)
1751 struct nameidata *nd)
1752{ 1687{
1753 return do_path_lookup(AT_FDCWD, name, flags, nd); 1688 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1754} 1689}
1755 1690
1756int kern_path(const char *name, unsigned int flags, struct path *path) 1691int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1774,29 +1709,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1774 const char *name, unsigned int flags, 1709 const char *name, unsigned int flags,
1775 struct nameidata *nd) 1710 struct nameidata *nd)
1776{ 1711{
1777 int retval; 1712 nd->root.dentry = dentry;
1778 1713 nd->root.mnt = mnt;
1779 /* same as do_path_lookup */ 1714 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1780 nd->last_type = LAST_ROOT; 1715 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1781 nd->flags = flags;
1782 nd->depth = 0;
1783
1784 nd->path.dentry = dentry;
1785 nd->path.mnt = mnt;
1786 path_get(&nd->path);
1787 nd->root = nd->path;
1788 path_get(&nd->root);
1789 nd->inode = nd->path.dentry->d_inode;
1790
1791 retval = path_walk(name, nd);
1792 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1793 nd->inode))
1794 audit_inode(name, nd->path.dentry);
1795
1796 path_put(&nd->root);
1797 nd->root.mnt = NULL;
1798
1799 return retval;
1800} 1716}
1801 1717
1802static struct dentry *__lookup_hash(struct qstr *name, 1718static struct dentry *__lookup_hash(struct qstr *name,
@@ -1811,17 +1727,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
1811 return ERR_PTR(err); 1727 return ERR_PTR(err);
1812 1728
1813 /* 1729 /*
1814 * See if the low-level filesystem might want
1815 * to use its own hash..
1816 */
1817 if (base->d_flags & DCACHE_OP_HASH) {
1818 err = base->d_op->d_hash(base, inode, name);
1819 dentry = ERR_PTR(err);
1820 if (err < 0)
1821 goto out;
1822 }
1823
1824 /*
1825 * Don't bother with __d_lookup: callers are for creat as 1730 * Don't bother with __d_lookup: callers are for creat as
1826 * well as unlink, so a lot of the time it would cost 1731 * well as unlink, so a lot of the time it would cost
1827 * a double lookup. 1732 * a double lookup.
@@ -1833,7 +1738,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1833 1738
1834 if (!dentry) 1739 if (!dentry)
1835 dentry = d_alloc_and_lookup(base, name, nd); 1740 dentry = d_alloc_and_lookup(base, name, nd);
1836out: 1741
1837 return dentry; 1742 return dentry;
1838} 1743}
1839 1744
@@ -1847,28 +1752,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1847 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1752 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1848} 1753}
1849 1754
1850static int __lookup_one_len(const char *name, struct qstr *this,
1851 struct dentry *base, int len)
1852{
1853 unsigned long hash;
1854 unsigned int c;
1855
1856 this->name = name;
1857 this->len = len;
1858 if (!len)
1859 return -EACCES;
1860
1861 hash = init_name_hash();
1862 while (len--) {
1863 c = *(const unsigned char *)name++;
1864 if (c == '/' || c == '\0')
1865 return -EACCES;
1866 hash = partial_name_hash(c, hash);
1867 }
1868 this->hash = end_name_hash(hash);
1869 return 0;
1870}
1871
1872/** 1755/**
1873 * lookup_one_len - filesystem helper to lookup single pathname component 1756 * lookup_one_len - filesystem helper to lookup single pathname component
1874 * @name: pathname component to lookup 1757 * @name: pathname component to lookup
@@ -1882,14 +1765,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1882 */ 1765 */
1883struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1766struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1884{ 1767{
1885 int err;
1886 struct qstr this; 1768 struct qstr this;
1769 unsigned long hash;
1770 unsigned int c;
1887 1771
1888 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1772 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1889 1773
1890 err = __lookup_one_len(name, &this, base, len); 1774 this.name = name;
1891 if (err) 1775 this.len = len;
1892 return ERR_PTR(err); 1776 if (!len)
1777 return ERR_PTR(-EACCES);
1778
1779 hash = init_name_hash();
1780 while (len--) {
1781 c = *(const unsigned char *)name++;
1782 if (c == '/' || c == '\0')
1783 return ERR_PTR(-EACCES);
1784 hash = partial_name_hash(c, hash);
1785 }
1786 this.hash = end_name_hash(hash);
1787 /*
1788 * See if the low-level filesystem might want
1789 * to use its own hash..
1790 */
1791 if (base->d_flags & DCACHE_OP_HASH) {
1792 int err = base->d_op->d_hash(base, base->d_inode, &this);
1793 if (err < 0)
1794 return ERR_PTR(err);
1795 }
1893 1796
1894 return __lookup_hash(&this, base, NULL); 1797 return __lookup_hash(&this, base, NULL);
1895} 1798}
@@ -1898,7 +1801,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1898 struct path *path) 1801 struct path *path)
1899{ 1802{
1900 struct nameidata nd; 1803 struct nameidata nd;
1901 char *tmp = getname(name); 1804 char *tmp = getname_flags(name, flags);
1902 int err = PTR_ERR(tmp); 1805 int err = PTR_ERR(tmp);
1903 if (!IS_ERR(tmp)) { 1806 if (!IS_ERR(tmp)) {
1904 1807
@@ -2078,12 +1981,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
2078 return error; 1981 return error;
2079} 1982}
2080 1983
2081int may_open(struct path *path, int acc_mode, int flag) 1984static int may_open(struct path *path, int acc_mode, int flag)
2082{ 1985{
2083 struct dentry *dentry = path->dentry; 1986 struct dentry *dentry = path->dentry;
2084 struct inode *inode = dentry->d_inode; 1987 struct inode *inode = dentry->d_inode;
2085 int error; 1988 int error;
2086 1989
1990 /* O_PATH? */
1991 if (!acc_mode)
1992 return 0;
1993
2087 if (!inode) 1994 if (!inode)
2088 return -ENOENT; 1995 return -ENOENT;
2089 1996
@@ -2152,34 +2059,6 @@ static int handle_truncate(struct file *filp)
2152} 2059}
2153 2060
2154/* 2061/*
2155 * Be careful about ever adding any more callers of this
2156 * function. Its flags must be in the namei format, not
2157 * what get passed to sys_open().
2158 */
2159static int __open_namei_create(struct nameidata *nd, struct path *path,
2160 int open_flag, int mode)
2161{
2162 int error;
2163 struct dentry *dir = nd->path.dentry;
2164
2165 if (!IS_POSIXACL(dir->d_inode))
2166 mode &= ~current_umask();
2167 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
2168 if (error)
2169 goto out_unlock;
2170 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
2171out_unlock:
2172 mutex_unlock(&dir->d_inode->i_mutex);
2173 dput(nd->path.dentry);
2174 nd->path.dentry = path->dentry;
2175
2176 if (error)
2177 return error;
2178 /* Don't check for write permission, don't truncate */
2179 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
2180}
2181
2182/*
2183 * Note that while the flag value (low two bits) for sys_open means: 2062 * Note that while the flag value (low two bits) for sys_open means:
2184 * 00 - read-only 2063 * 00 - read-only
2185 * 01 - write-only 2064 * 01 - write-only
@@ -2203,126 +2082,115 @@ static inline int open_to_namei_flags(int flag)
2203 return flag; 2082 return flag;
2204} 2083}
2205 2084
2206static int open_will_truncate(int flag, struct inode *inode)
2207{
2208 /*
2209 * We'll never write to the fs underlying
2210 * a device file.
2211 */
2212 if (special_file(inode->i_mode))
2213 return 0;
2214 return (flag & O_TRUNC);
2215}
2216
2217static struct file *finish_open(struct nameidata *nd,
2218 int open_flag, int acc_mode)
2219{
2220 struct file *filp;
2221 int will_truncate;
2222 int error;
2223
2224 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
2225 if (will_truncate) {
2226 error = mnt_want_write(nd->path.mnt);
2227 if (error)
2228 goto exit;
2229 }
2230 error = may_open(&nd->path, acc_mode, open_flag);
2231 if (error) {
2232 if (will_truncate)
2233 mnt_drop_write(nd->path.mnt);
2234 goto exit;
2235 }
2236 filp = nameidata_to_filp(nd);
2237 if (!IS_ERR(filp)) {
2238 error = ima_file_check(filp, acc_mode);
2239 if (error) {
2240 fput(filp);
2241 filp = ERR_PTR(error);
2242 }
2243 }
2244 if (!IS_ERR(filp)) {
2245 if (will_truncate) {
2246 error = handle_truncate(filp);
2247 if (error) {
2248 fput(filp);
2249 filp = ERR_PTR(error);
2250 }
2251 }
2252 }
2253 /*
2254 * It is now safe to drop the mnt write
2255 * because the filp has had a write taken
2256 * on its behalf.
2257 */
2258 if (will_truncate)
2259 mnt_drop_write(nd->path.mnt);
2260 path_put(&nd->path);
2261 return filp;
2262
2263exit:
2264 path_put(&nd->path);
2265 return ERR_PTR(error);
2266}
2267
2268/* 2085/*
2269 * Handle O_CREAT case for do_filp_open 2086 * Handle the last step of open()
2270 */ 2087 */
2271static struct file *do_last(struct nameidata *nd, struct path *path, 2088static struct file *do_last(struct nameidata *nd, struct path *path,
2272 int open_flag, int acc_mode, 2089 const struct open_flags *op, const char *pathname)
2273 int mode, const char *pathname)
2274{ 2090{
2275 struct dentry *dir = nd->path.dentry; 2091 struct dentry *dir = nd->path.dentry;
2092 struct dentry *dentry;
2093 int open_flag = op->open_flag;
2094 int will_truncate = open_flag & O_TRUNC;
2095 int want_write = 0;
2096 int acc_mode = op->acc_mode;
2276 struct file *filp; 2097 struct file *filp;
2277 int error = -EISDIR; 2098 int error;
2099
2100 nd->flags &= ~LOOKUP_PARENT;
2101 nd->flags |= op->intent;
2278 2102
2279 switch (nd->last_type) { 2103 switch (nd->last_type) {
2280 case LAST_DOTDOT: 2104 case LAST_DOTDOT:
2281 follow_dotdot(nd);
2282 dir = nd->path.dentry;
2283 case LAST_DOT: 2105 case LAST_DOT:
2284 if (need_reval_dot(dir)) { 2106 error = handle_dots(nd, nd->last_type);
2285 int status = d_revalidate(nd->path.dentry, nd); 2107 if (error)
2286 if (!status) 2108 return ERR_PTR(error);
2287 status = -ESTALE;
2288 if (status < 0) {
2289 error = status;
2290 goto exit;
2291 }
2292 }
2293 /* fallthrough */ 2109 /* fallthrough */
2294 case LAST_ROOT: 2110 case LAST_ROOT:
2295 goto exit; 2111 if (nd->flags & LOOKUP_RCU) {
2112 if (nameidata_drop_rcu_last(nd))
2113 return ERR_PTR(-ECHILD);
2114 }
2115 error = handle_reval_path(nd);
2116 if (error)
2117 goto exit;
2118 audit_inode(pathname, nd->path.dentry);
2119 if (open_flag & O_CREAT) {
2120 error = -EISDIR;
2121 goto exit;
2122 }
2123 goto ok;
2296 case LAST_BIND: 2124 case LAST_BIND:
2125 /* can't be RCU mode here */
2126 error = handle_reval_path(nd);
2127 if (error)
2128 goto exit;
2297 audit_inode(pathname, dir); 2129 audit_inode(pathname, dir);
2298 goto ok; 2130 goto ok;
2299 } 2131 }
2300 2132
2133 if (!(open_flag & O_CREAT)) {
2134 int symlink_ok = 0;
2135 if (nd->last.name[nd->last.len])
2136 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2137 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2138 symlink_ok = 1;
2139 /* we _can_ be in RCU mode here */
2140 error = walk_component(nd, path, &nd->last, LAST_NORM,
2141 !symlink_ok);
2142 if (error < 0)
2143 return ERR_PTR(error);
2144 if (error) /* symlink */
2145 return NULL;
2146 /* sayonara */
2147 if (nd->flags & LOOKUP_RCU) {
2148 if (nameidata_drop_rcu_last(nd))
2149 return ERR_PTR(-ECHILD);
2150 }
2151
2152 error = -ENOTDIR;
2153 if (nd->flags & LOOKUP_DIRECTORY) {
2154 if (!nd->inode->i_op->lookup)
2155 goto exit;
2156 }
2157 audit_inode(pathname, nd->path.dentry);
2158 goto ok;
2159 }
2160
2161 /* create side of things */
2162
2163 if (nd->flags & LOOKUP_RCU) {
2164 if (nameidata_drop_rcu_last(nd))
2165 return ERR_PTR(-ECHILD);
2166 }
2167
2168 audit_inode(pathname, dir);
2169 error = -EISDIR;
2301 /* trailing slashes? */ 2170 /* trailing slashes? */
2302 if (nd->last.name[nd->last.len]) 2171 if (nd->last.name[nd->last.len])
2303 goto exit; 2172 goto exit;
2304 2173
2305 mutex_lock(&dir->d_inode->i_mutex); 2174 mutex_lock(&dir->d_inode->i_mutex);
2306 2175
2307 path->dentry = lookup_hash(nd); 2176 dentry = lookup_hash(nd);
2308 path->mnt = nd->path.mnt; 2177 error = PTR_ERR(dentry);
2309 2178 if (IS_ERR(dentry)) {
2310 error = PTR_ERR(path->dentry);
2311 if (IS_ERR(path->dentry)) {
2312 mutex_unlock(&dir->d_inode->i_mutex); 2179 mutex_unlock(&dir->d_inode->i_mutex);
2313 goto exit; 2180 goto exit;
2314 } 2181 }
2315 2182
2316 if (IS_ERR(nd->intent.open.file)) { 2183 path->dentry = dentry;
2317 error = PTR_ERR(nd->intent.open.file); 2184 path->mnt = nd->path.mnt;
2318 goto exit_mutex_unlock;
2319 }
2320 2185
2321 /* Negative dentry, just create the file */ 2186 /* Negative dentry, just create the file */
2322 if (!path->dentry->d_inode) { 2187 if (!dentry->d_inode) {
2188 int mode = op->mode;
2189 if (!IS_POSIXACL(dir->d_inode))
2190 mode &= ~current_umask();
2323 /* 2191 /*
2324 * This write is needed to ensure that a 2192 * This write is needed to ensure that a
2325 * ro->rw transition does not occur between 2193 * rw->ro transition does not occur between
2326 * the time when the file is created and when 2194 * the time when the file is created and when
2327 * a permanent write count is taken through 2195 * a permanent write count is taken through
2328 * the 'struct file' in nameidata_to_filp(). 2196 * the 'struct file' in nameidata_to_filp().
@@ -2330,22 +2198,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2330 error = mnt_want_write(nd->path.mnt); 2198 error = mnt_want_write(nd->path.mnt);
2331 if (error) 2199 if (error)
2332 goto exit_mutex_unlock; 2200 goto exit_mutex_unlock;
2333 error = __open_namei_create(nd, path, open_flag, mode); 2201 want_write = 1;
2334 if (error) { 2202 /* Don't check for write permission, don't truncate */
2335 mnt_drop_write(nd->path.mnt); 2203 open_flag &= ~O_TRUNC;
2336 goto exit; 2204 will_truncate = 0;
2337 } 2205 acc_mode = MAY_OPEN;
2338 filp = nameidata_to_filp(nd); 2206 error = security_path_mknod(&nd->path, dentry, mode, 0);
2339 mnt_drop_write(nd->path.mnt); 2207 if (error)
2340 path_put(&nd->path); 2208 goto exit_mutex_unlock;
2341 if (!IS_ERR(filp)) { 2209 error = vfs_create(dir->d_inode, dentry, mode, nd);
2342 error = ima_file_check(filp, acc_mode); 2210 if (error)
2343 if (error) { 2211 goto exit_mutex_unlock;
2344 fput(filp); 2212 mutex_unlock(&dir->d_inode->i_mutex);
2345 filp = ERR_PTR(error); 2213 dput(nd->path.dentry);
2346 } 2214 nd->path.dentry = dentry;
2347 } 2215 goto common;
2348 return filp;
2349 } 2216 }
2350 2217
2351 /* 2218 /*
@@ -2375,7 +2242,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2375 if (S_ISDIR(nd->inode->i_mode)) 2242 if (S_ISDIR(nd->inode->i_mode))
2376 goto exit; 2243 goto exit;
2377ok: 2244ok:
2378 filp = finish_open(nd, open_flag, acc_mode); 2245 if (!S_ISREG(nd->inode->i_mode))
2246 will_truncate = 0;
2247
2248 if (will_truncate) {
2249 error = mnt_want_write(nd->path.mnt);
2250 if (error)
2251 goto exit;
2252 want_write = 1;
2253 }
2254common:
2255 error = may_open(&nd->path, acc_mode, open_flag);
2256 if (error)
2257 goto exit;
2258 filp = nameidata_to_filp(nd);
2259 if (!IS_ERR(filp)) {
2260 error = ima_file_check(filp, op->acc_mode);
2261 if (error) {
2262 fput(filp);
2263 filp = ERR_PTR(error);
2264 }
2265 }
2266 if (!IS_ERR(filp)) {
2267 if (will_truncate) {
2268 error = handle_truncate(filp);
2269 if (error) {
2270 fput(filp);
2271 filp = ERR_PTR(error);
2272 }
2273 }
2274 }
2275out:
2276 if (want_write)
2277 mnt_drop_write(nd->path.mnt);
2278 path_put(&nd->path);
2379 return filp; 2279 return filp;
2380 2280
2381exit_mutex_unlock: 2281exit_mutex_unlock:
@@ -2383,197 +2283,103 @@ exit_mutex_unlock:
2383exit_dput: 2283exit_dput:
2384 path_put_conditional(path, nd); 2284 path_put_conditional(path, nd);
2385exit: 2285exit:
2386 path_put(&nd->path); 2286 filp = ERR_PTR(error);
2387 return ERR_PTR(error); 2287 goto out;
2388} 2288}
2389 2289
2390/* 2290static struct file *path_openat(int dfd, const char *pathname,
2391 * Note that the low bits of the passed in "open_flag" 2291 struct nameidata *nd, const struct open_flags *op, int flags)
2392 * are not the same as in the local variable "flag". See
2393 * open_to_namei_flags() for more details.
2394 */
2395struct file *do_filp_open(int dfd, const char *pathname,
2396 int open_flag, int mode, int acc_mode)
2397{ 2292{
2293 struct file *base = NULL;
2398 struct file *filp; 2294 struct file *filp;
2399 struct nameidata nd;
2400 int error;
2401 struct path path; 2295 struct path path;
2402 int count = 0; 2296 int error;
2403 int flag = open_to_namei_flags(open_flag);
2404 int flags;
2405
2406 if (!(open_flag & O_CREAT))
2407 mode = 0;
2408
2409 /* Must never be set by userspace */
2410 open_flag &= ~FMODE_NONOTIFY;
2411
2412 /*
2413 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
2414 * check for O_DSYNC if the need any syncing at all we enforce it's
2415 * always set instead of having to deal with possibly weird behaviour
2416 * for malicious applications setting only __O_SYNC.
2417 */
2418 if (open_flag & __O_SYNC)
2419 open_flag |= O_DSYNC;
2420
2421 if (!acc_mode)
2422 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
2423
2424 /* O_TRUNC implies we need access checks for write permissions */
2425 if (open_flag & O_TRUNC)
2426 acc_mode |= MAY_WRITE;
2427
2428 /* Allow the LSM permission hook to distinguish append
2429 access from general write access. */
2430 if (open_flag & O_APPEND)
2431 acc_mode |= MAY_APPEND;
2432
2433 flags = LOOKUP_OPEN;
2434 if (open_flag & O_CREAT) {
2435 flags |= LOOKUP_CREATE;
2436 if (open_flag & O_EXCL)
2437 flags |= LOOKUP_EXCL;
2438 }
2439 if (open_flag & O_DIRECTORY)
2440 flags |= LOOKUP_DIRECTORY;
2441 if (!(open_flag & O_NOFOLLOW))
2442 flags |= LOOKUP_FOLLOW;
2443 2297
2444 filp = get_empty_filp(); 2298 filp = get_empty_filp();
2445 if (!filp) 2299 if (!filp)
2446 return ERR_PTR(-ENFILE); 2300 return ERR_PTR(-ENFILE);
2447 2301
2448 filp->f_flags = open_flag; 2302 filp->f_flags = op->open_flag;
2449 nd.intent.open.file = filp; 2303 nd->intent.open.file = filp;
2450 nd.intent.open.flags = flag; 2304 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2451 nd.intent.open.create_mode = mode; 2305 nd->intent.open.create_mode = op->mode;
2452
2453 if (open_flag & O_CREAT)
2454 goto creat;
2455 2306
2456 /* !O_CREAT, simple open */ 2307 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2457 error = do_path_lookup(dfd, pathname, flags, &nd);
2458 if (unlikely(error)) 2308 if (unlikely(error))
2459 goto out_filp; 2309 goto out_filp;
2460 error = -ELOOP;
2461 if (!(nd.flags & LOOKUP_FOLLOW)) {
2462 if (nd.inode->i_op->follow_link)
2463 goto out_path;
2464 }
2465 error = -ENOTDIR;
2466 if (nd.flags & LOOKUP_DIRECTORY) {
2467 if (!nd.inode->i_op->lookup)
2468 goto out_path;
2469 }
2470 audit_inode(pathname, nd.path.dentry);
2471 filp = finish_open(&nd, open_flag, acc_mode);
2472 release_open_intent(&nd);
2473 return filp;
2474 2310
2475creat: 2311 current->total_link_count = 0;
2476 /* OK, have to create the file. Find the parent. */ 2312 error = link_path_walk(pathname, nd);
2477 error = path_init_rcu(dfd, pathname,
2478 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2479 if (error)
2480 goto out_filp;
2481 error = path_walk_rcu(pathname, &nd);
2482 path_finish_rcu(&nd);
2483 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2484 /* slower, locked walk */
2485 if (error == -ESTALE) {
2486reval:
2487 flags |= LOOKUP_REVAL;
2488 }
2489 error = path_init(dfd, pathname,
2490 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2491 if (error)
2492 goto out_filp;
2493
2494 error = path_walk_simple(pathname, &nd);
2495 }
2496 if (unlikely(error)) 2313 if (unlikely(error))
2497 goto out_filp; 2314 goto out_filp;
2498 if (unlikely(!audit_dummy_context()))
2499 audit_inode(pathname, nd.path.dentry);
2500 2315
2501 /* 2316 filp = do_last(nd, &path, op, pathname);
2502 * We have the parent and last component.
2503 */
2504 nd.flags = flags;
2505 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
2506 while (unlikely(!filp)) { /* trailing symlink */ 2317 while (unlikely(!filp)) { /* trailing symlink */
2507 struct path link = path; 2318 struct path link = path;
2508 struct inode *linki = link.dentry->d_inode;
2509 void *cookie; 2319 void *cookie;
2510 error = -ELOOP; 2320 if (!(nd->flags & LOOKUP_FOLLOW)) {
2511 if (!(nd.flags & LOOKUP_FOLLOW)) 2321 path_put_conditional(&path, nd);
2512 goto exit_dput; 2322 path_put(&nd->path);
2513 if (count++ == 32) 2323 filp = ERR_PTR(-ELOOP);
2514 goto exit_dput; 2324 break;
2515 /*
2516 * This is subtle. Instead of calling do_follow_link() we do
2517 * the thing by hands. The reason is that this way we have zero
2518 * link_count and path_walk() (called from ->follow_link)
2519 * honoring LOOKUP_PARENT. After that we have the parent and
2520 * last component, i.e. we are in the same situation as after
2521 * the first path_walk(). Well, almost - if the last component
2522 * is normal we get its copy stored in nd->last.name and we will
2523 * have to putname() it when we are done. Procfs-like symlinks
2524 * just set LAST_BIND.
2525 */
2526 nd.flags |= LOOKUP_PARENT;
2527 error = security_inode_follow_link(link.dentry, &nd);
2528 if (error)
2529 goto exit_dput;
2530 error = __do_follow_link(&link, &nd, &cookie);
2531 if (unlikely(error)) {
2532 if (!IS_ERR(cookie) && linki->i_op->put_link)
2533 linki->i_op->put_link(link.dentry, &nd, cookie);
2534 /* nd.path had been dropped */
2535 nd.path = link;
2536 goto out_path;
2537 } 2325 }
2538 nd.flags &= ~LOOKUP_PARENT; 2326 nd->flags |= LOOKUP_PARENT;
2539 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2327 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2540 if (linki->i_op->put_link) 2328 error = follow_link(&link, nd, &cookie);
2541 linki->i_op->put_link(link.dentry, &nd, cookie); 2329 if (unlikely(error))
2542 path_put(&link); 2330 filp = ERR_PTR(error);
2331 else
2332 filp = do_last(nd, &path, op, pathname);
2333 put_link(nd, &link, cookie);
2543 } 2334 }
2544out: 2335out:
2545 if (nd.root.mnt) 2336 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2546 path_put(&nd.root); 2337 path_put(&nd->root);
2547 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2338 if (base)
2548 goto reval; 2339 fput(base);
2549 release_open_intent(&nd); 2340 release_open_intent(nd);
2550 return filp; 2341 return filp;
2551 2342
2552exit_dput:
2553 path_put_conditional(&path, &nd);
2554out_path:
2555 path_put(&nd.path);
2556out_filp: 2343out_filp:
2557 filp = ERR_PTR(error); 2344 filp = ERR_PTR(error);
2558 goto out; 2345 goto out;
2559} 2346}
2560 2347
2561/** 2348struct file *do_filp_open(int dfd, const char *pathname,
2562 * filp_open - open file and return file pointer 2349 const struct open_flags *op, int flags)
2563 * 2350{
2564 * @filename: path to open 2351 struct nameidata nd;
2565 * @flags: open flags as per the open(2) second argument 2352 struct file *filp;
2566 * @mode: mode for the new file if O_CREAT is set, else ignored 2353
2567 * 2354 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2568 * This is the helper to open a file from kernelspace if you really 2355 if (unlikely(filp == ERR_PTR(-ECHILD)))
2569 * have to. But in generally you should not do this, so please move 2356 filp = path_openat(dfd, pathname, &nd, op, flags);
2570 * along, nothing to see here.. 2357 if (unlikely(filp == ERR_PTR(-ESTALE)))
2571 */ 2358 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2572struct file *filp_open(const char *filename, int flags, int mode) 2359 return filp;
2360}
2361
2362struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2363 const char *name, const struct open_flags *op, int flags)
2573{ 2364{
2574 return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2365 struct nameidata nd;
2366 struct file *file;
2367
2368 nd.root.mnt = mnt;
2369 nd.root.dentry = dentry;
2370
2371 flags |= LOOKUP_ROOT;
2372
2373 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2374 return ERR_PTR(-ELOOP);
2375
2376 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2377 if (unlikely(file == ERR_PTR(-ECHILD)))
2378 file = path_openat(-1, name, &nd, op, flags);
2379 if (unlikely(file == ERR_PTR(-ESTALE)))
2380 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2381 return file;
2575} 2382}
2576EXPORT_SYMBOL(filp_open);
2577 2383
2578/** 2384/**
2579 * lookup_create - lookup a dentry, creating it if it doesn't exist 2385 * lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3112,7 +2918,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3112 return error; 2918 return error;
3113 2919
3114 mutex_lock(&inode->i_mutex); 2920 mutex_lock(&inode->i_mutex);
3115 error = dir->i_op->link(old_dentry, dir, new_dentry); 2921 /* Make sure we don't allow creating hardlink to an unlinked file */
2922 if (inode->i_nlink == 0)
2923 error = -ENOENT;
2924 else
2925 error = dir->i_op->link(old_dentry, dir, new_dentry);
3116 mutex_unlock(&inode->i_mutex); 2926 mutex_unlock(&inode->i_mutex);
3117 if (!error) 2927 if (!error)
3118 fsnotify_link(dir, inode, new_dentry); 2928 fsnotify_link(dir, inode, new_dentry);
@@ -3134,15 +2944,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3134 struct dentry *new_dentry; 2944 struct dentry *new_dentry;
3135 struct nameidata nd; 2945 struct nameidata nd;
3136 struct path old_path; 2946 struct path old_path;
2947 int how = 0;
3137 int error; 2948 int error;
3138 char *to; 2949 char *to;
3139 2950
3140 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2951 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3141 return -EINVAL; 2952 return -EINVAL;
2953 /*
2954 * To use null names we require CAP_DAC_READ_SEARCH
2955 * This ensures that not everyone will be able to create
2956 * handlink using the passed filedescriptor.
2957 */
2958 if (flags & AT_EMPTY_PATH) {
2959 if (!capable(CAP_DAC_READ_SEARCH))
2960 return -ENOENT;
2961 how = LOOKUP_EMPTY;
2962 }
2963
2964 if (flags & AT_SYMLINK_FOLLOW)
2965 how |= LOOKUP_FOLLOW;
3142 2966
3143 error = user_path_at(olddfd, oldname, 2967 error = user_path_at(olddfd, oldname, how, &old_path);
3144 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
3145 &old_path);
3146 if (error) 2968 if (error)
3147 return error; 2969 return error;
3148 2970
@@ -3579,7 +3401,7 @@ EXPORT_SYMBOL(page_readlink);
3579EXPORT_SYMBOL(__page_symlink); 3401EXPORT_SYMBOL(__page_symlink);
3580EXPORT_SYMBOL(page_symlink); 3402EXPORT_SYMBOL(page_symlink);
3581EXPORT_SYMBOL(page_symlink_inode_operations); 3403EXPORT_SYMBOL(page_symlink_inode_operations);
3582EXPORT_SYMBOL(path_lookup); 3404EXPORT_SYMBOL(kern_path_parent);
3583EXPORT_SYMBOL(kern_path); 3405EXPORT_SYMBOL(kern_path);
3584EXPORT_SYMBOL(vfs_path_lookup); 3406EXPORT_SYMBOL(vfs_path_lookup);
3585EXPORT_SYMBOL(inode_permission); 3407EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b95371696..e96e03782def 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = {
1002 .show = show_vfsmnt 1002 .show = show_vfsmnt
1003}; 1003};
1004 1004
1005static int uuid_is_nil(u8 *uuid)
1006{
1007 int i;
1008 u8 *cp = (u8 *)uuid;
1009
1010 for (i = 0; i < 16; i++) {
1011 if (*cp++)
1012 return 0;
1013 }
1014 return 1;
1015}
1016
1005static int show_mountinfo(struct seq_file *m, void *v) 1017static int show_mountinfo(struct seq_file *m, void *v)
1006{ 1018{
1007 struct proc_mounts *p = m->private; 1019 struct proc_mounts *p = m->private;
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v)
1040 if (IS_MNT_UNBINDABLE(mnt)) 1052 if (IS_MNT_UNBINDABLE(mnt))
1041 seq_puts(m, " unbindable"); 1053 seq_puts(m, " unbindable");
1042 1054
1055 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1056 /* print the uuid */
1057 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1058
1043 /* Filesystem specific data */ 1059 /* Filesystem specific data */
1044 seq_puts(m, " - "); 1060 seq_puts(m, " - ");
1045 show_type(m, sb); 1061 show_type(m, sb);
@@ -1244,7 +1260,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1244 */ 1260 */
1245 br_write_lock(vfsmount_lock); 1261 br_write_lock(vfsmount_lock);
1246 if (mnt_get_count(mnt) != 2) { 1262 if (mnt_get_count(mnt) != 2) {
1247 br_write_lock(vfsmount_lock); 1263 br_write_unlock(vfsmount_lock);
1248 return -EBUSY; 1264 return -EBUSY;
1249 } 1265 }
1250 br_write_unlock(vfsmount_lock); 1266 br_write_unlock(vfsmount_lock);
@@ -1767,6 +1783,10 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1767 if (path->dentry != path->mnt->mnt_root) 1783 if (path->dentry != path->mnt->mnt_root)
1768 return -EINVAL; 1784 return -EINVAL;
1769 1785
1786 err = security_sb_remount(sb, data);
1787 if (err)
1788 return err;
1789
1770 down_write(&sb->s_umount); 1790 down_write(&sb->s_umount);
1771 if (flags & MS_BIND) 1791 if (flags & MS_BIND)
1772 err = change_mount_flags(path->mnt, flags); 1792 err = change_mount_flags(path->mnt, flags);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb4..01768e5e2c9b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/inet.h> 37#include <linux/inet.h>
38#include <linux/nfs_xdr.h> 38#include <linux/nfs_xdr.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h>
40 41
41#include <asm/system.h> 42#include <asm/system.h>
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
89 */ 90 */
90u64 nfs_compat_user_ino64(u64 fileid) 91u64 nfs_compat_user_ino64(u64 fileid)
91{ 92{
92 int ino; 93#ifdef CONFIG_COMPAT
94 compat_ulong_t ino;
95#else
96 unsigned long ino;
97#endif
93 98
94 if (enable_ino64) 99 if (enable_ino64)
95 return fileid; 100 return fileid;
@@ -1513,7 +1518,7 @@ static int nfsiod_start(void)
1513{ 1518{
1514 struct workqueue_struct *wq; 1519 struct workqueue_struct *wq;
1515 dprintk("RPC: creating workqueue nfsiod\n"); 1520 dprintk("RPC: creating workqueue nfsiod\n");
1516 wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0); 1521 wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
1517 if (wq == NULL) 1522 if (wq == NULL)
1518 return -ENOMEM; 1523 return -ENOMEM;
1519 nfsiod_workqueue = wq; 1524 nfsiod_workqueue = wq;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a7474073148..1be36cf65bfc 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -298,6 +298,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
298#if defined(CONFIG_NFS_V4_1) 298#if defined(CONFIG_NFS_V4_1)
299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
301extern void nfs4_schedule_session_recovery(struct nfs4_session *);
302#else
303static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
304{
305}
301#endif /* CONFIG_NFS_V4_1 */ 306#endif /* CONFIG_NFS_V4_1 */
302 307
303extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 308extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +312,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
307extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 312extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
308extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 313extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
309extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 314extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
310extern void nfs4_schedule_state_recovery(struct nfs_client *); 315extern void nfs4_schedule_lease_recovery(struct nfs_client *);
311extern void nfs4_schedule_state_manager(struct nfs_client *); 316extern void nfs4_schedule_state_manager(struct nfs_client *);
312extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 317extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
313extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
314extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 318extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
315extern void nfs41_handle_recall_slot(struct nfs_client *clp); 319extern void nfs41_handle_recall_slot(struct nfs_client *clp);
316extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 320extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8cc..b73c34375f60 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -219,6 +219,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
219 goto out_err; 219 goto out_err;
220 } 220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL); 221 buf = kmalloc(rlen + 1, GFP_KERNEL);
222 if (!buf) {
223 dprintk("%s: Not enough memory\n", __func__);
224 goto out_err;
225 }
222 buf[rlen] = '\0'; 226 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen); 227 memcpy(buf, r_addr, rlen);
224 228
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40ab..0a07e353a961 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -256,12 +256,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
256 case -NFS4ERR_OPENMODE: 256 case -NFS4ERR_OPENMODE:
257 if (state == NULL) 257 if (state == NULL)
258 break; 258 break;
259 nfs4_state_mark_reclaim_nograce(clp, state); 259 nfs4_schedule_stateid_recovery(server, state);
260 goto do_state_recovery; 260 goto wait_on_recovery;
261 case -NFS4ERR_STALE_STATEID: 261 case -NFS4ERR_STALE_STATEID:
262 case -NFS4ERR_STALE_CLIENTID: 262 case -NFS4ERR_STALE_CLIENTID:
263 case -NFS4ERR_EXPIRED: 263 case -NFS4ERR_EXPIRED:
264 goto do_state_recovery; 264 nfs4_schedule_lease_recovery(clp);
265 goto wait_on_recovery;
265#if defined(CONFIG_NFS_V4_1) 266#if defined(CONFIG_NFS_V4_1)
266 case -NFS4ERR_BADSESSION: 267 case -NFS4ERR_BADSESSION:
267 case -NFS4ERR_BADSLOT: 268 case -NFS4ERR_BADSLOT:
@@ -272,7 +273,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
272 case -NFS4ERR_SEQ_MISORDERED: 273 case -NFS4ERR_SEQ_MISORDERED:
273 dprintk("%s ERROR: %d Reset session\n", __func__, 274 dprintk("%s ERROR: %d Reset session\n", __func__,
274 errorcode); 275 errorcode);
275 nfs4_schedule_state_recovery(clp); 276 nfs4_schedule_session_recovery(clp->cl_session);
276 exception->retry = 1; 277 exception->retry = 1;
277 break; 278 break;
278#endif /* defined(CONFIG_NFS_V4_1) */ 279#endif /* defined(CONFIG_NFS_V4_1) */
@@ -295,8 +296,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
295 } 296 }
296 /* We failed to handle the error */ 297 /* We failed to handle the error */
297 return nfs4_map_errors(ret); 298 return nfs4_map_errors(ret);
298do_state_recovery: 299wait_on_recovery:
299 nfs4_schedule_state_recovery(clp);
300 ret = nfs4_wait_clnt_recover(clp); 300 ret = nfs4_wait_clnt_recover(clp);
301 if (ret == 0) 301 if (ret == 0)
302 exception->retry = 1; 302 exception->retry = 1;
@@ -435,8 +435,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
435 clp = res->sr_session->clp; 435 clp = res->sr_session->clp;
436 do_renew_lease(clp, timestamp); 436 do_renew_lease(clp, timestamp);
437 /* Check sequence flags */ 437 /* Check sequence flags */
438 if (atomic_read(&clp->cl_count) > 1) 438 if (res->sr_status_flags != 0)
439 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 439 nfs4_schedule_lease_recovery(clp);
440 break; 440 break;
441 case -NFS4ERR_DELAY: 441 case -NFS4ERR_DELAY:
442 /* The server detected a resend of the RPC call and 442 /* The server detected a resend of the RPC call and
@@ -1255,14 +1255,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1255 case -NFS4ERR_BAD_HIGH_SLOT: 1255 case -NFS4ERR_BAD_HIGH_SLOT:
1256 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1256 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1257 case -NFS4ERR_DEADSESSION: 1257 case -NFS4ERR_DEADSESSION:
1258 nfs4_schedule_state_recovery( 1258 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
1259 server->nfs_client);
1260 goto out; 1259 goto out;
1261 case -NFS4ERR_STALE_CLIENTID: 1260 case -NFS4ERR_STALE_CLIENTID:
1262 case -NFS4ERR_STALE_STATEID: 1261 case -NFS4ERR_STALE_STATEID:
1263 case -NFS4ERR_EXPIRED: 1262 case -NFS4ERR_EXPIRED:
1264 /* Don't recall a delegation if it was lost */ 1263 /* Don't recall a delegation if it was lost */
1265 nfs4_schedule_state_recovery(server->nfs_client); 1264 nfs4_schedule_lease_recovery(server->nfs_client);
1266 goto out; 1265 goto out;
1267 case -ERESTARTSYS: 1266 case -ERESTARTSYS:
1268 /* 1267 /*
@@ -1271,7 +1270,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1271 */ 1270 */
1272 case -NFS4ERR_ADMIN_REVOKED: 1271 case -NFS4ERR_ADMIN_REVOKED:
1273 case -NFS4ERR_BAD_STATEID: 1272 case -NFS4ERR_BAD_STATEID:
1274 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1273 nfs4_schedule_stateid_recovery(server, state);
1275 case -EKEYEXPIRED: 1274 case -EKEYEXPIRED:
1276 /* 1275 /*
1277 * User RPCSEC_GSS context has expired. 1276 * User RPCSEC_GSS context has expired.
@@ -1587,7 +1586,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
1587 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1586 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1588 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1587 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1589 break; 1588 break;
1590 nfs4_schedule_state_recovery(clp); 1589 nfs4_schedule_state_manager(clp);
1591 ret = -EIO; 1590 ret = -EIO;
1592 } 1591 }
1593 return ret; 1592 return ret;
@@ -3178,7 +3177,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3178 if (task->tk_status < 0) { 3177 if (task->tk_status < 0) {
3179 /* Unless we're shutting down, schedule state recovery! */ 3178 /* Unless we're shutting down, schedule state recovery! */
3180 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) 3179 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
3181 nfs4_schedule_state_recovery(clp); 3180 nfs4_schedule_lease_recovery(clp);
3182 return; 3181 return;
3183 } 3182 }
3184 do_renew_lease(clp, timestamp); 3183 do_renew_lease(clp, timestamp);
@@ -3252,6 +3251,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
3252 } 3251 }
3253} 3252}
3254 3253
3254static int buf_to_pages_noslab(const void *buf, size_t buflen,
3255 struct page **pages, unsigned int *pgbase)
3256{
3257 struct page *newpage, **spages;
3258 int rc = 0;
3259 size_t len;
3260 spages = pages;
3261
3262 do {
3263 len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
3264 newpage = alloc_page(GFP_KERNEL);
3265
3266 if (newpage == NULL)
3267 goto unwind;
3268 memcpy(page_address(newpage), buf, len);
3269 buf += len;
3270 buflen -= len;
3271 *pages++ = newpage;
3272 rc++;
3273 } while (buflen != 0);
3274
3275 return rc;
3276
3277unwind:
3278 for(; rc > 0; rc--)
3279 __free_page(spages[rc-1]);
3280 return -ENOMEM;
3281}
3282
3255struct nfs4_cached_acl { 3283struct nfs4_cached_acl {
3256 int cached; 3284 int cached;
3257 size_t len; 3285 size_t len;
@@ -3420,13 +3448,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3420 .rpc_argp = &arg, 3448 .rpc_argp = &arg,
3421 .rpc_resp = &res, 3449 .rpc_resp = &res,
3422 }; 3450 };
3423 int ret; 3451 int ret, i;
3424 3452
3425 if (!nfs4_server_supports_acls(server)) 3453 if (!nfs4_server_supports_acls(server))
3426 return -EOPNOTSUPP; 3454 return -EOPNOTSUPP;
3455 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3456 if (i < 0)
3457 return i;
3427 nfs_inode_return_delegation(inode); 3458 nfs_inode_return_delegation(inode);
3428 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3429 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3459 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
3460
3461 /*
3462 * Free each page after tx, so the only ref left is
3463 * held by the network stack
3464 */
3465 for (; i > 0; i--)
3466 put_page(pages[i-1]);
3467
3430 /* 3468 /*
3431 * Acl update can result in inode attribute update. 3469 * Acl update can result in inode attribute update.
3432 * so mark the attribute cache invalid. 3470 * so mark the attribute cache invalid.
@@ -3464,12 +3502,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3464 case -NFS4ERR_OPENMODE: 3502 case -NFS4ERR_OPENMODE:
3465 if (state == NULL) 3503 if (state == NULL)
3466 break; 3504 break;
3467 nfs4_state_mark_reclaim_nograce(clp, state); 3505 nfs4_schedule_stateid_recovery(server, state);
3468 goto do_state_recovery; 3506 goto wait_on_recovery;
3469 case -NFS4ERR_STALE_STATEID: 3507 case -NFS4ERR_STALE_STATEID:
3470 case -NFS4ERR_STALE_CLIENTID: 3508 case -NFS4ERR_STALE_CLIENTID:
3471 case -NFS4ERR_EXPIRED: 3509 case -NFS4ERR_EXPIRED:
3472 goto do_state_recovery; 3510 nfs4_schedule_lease_recovery(clp);
3511 goto wait_on_recovery;
3473#if defined(CONFIG_NFS_V4_1) 3512#if defined(CONFIG_NFS_V4_1)
3474 case -NFS4ERR_BADSESSION: 3513 case -NFS4ERR_BADSESSION:
3475 case -NFS4ERR_BADSLOT: 3514 case -NFS4ERR_BADSLOT:
@@ -3480,7 +3519,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3480 case -NFS4ERR_SEQ_MISORDERED: 3519 case -NFS4ERR_SEQ_MISORDERED:
3481 dprintk("%s ERROR %d, Reset session\n", __func__, 3520 dprintk("%s ERROR %d, Reset session\n", __func__,
3482 task->tk_status); 3521 task->tk_status);
3483 nfs4_schedule_state_recovery(clp); 3522 nfs4_schedule_session_recovery(clp->cl_session);
3484 task->tk_status = 0; 3523 task->tk_status = 0;
3485 return -EAGAIN; 3524 return -EAGAIN;
3486#endif /* CONFIG_NFS_V4_1 */ 3525#endif /* CONFIG_NFS_V4_1 */
@@ -3497,9 +3536,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3497 } 3536 }
3498 task->tk_status = nfs4_map_errors(task->tk_status); 3537 task->tk_status = nfs4_map_errors(task->tk_status);
3499 return 0; 3538 return 0;
3500do_state_recovery: 3539wait_on_recovery:
3501 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 3540 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
3502 nfs4_schedule_state_recovery(clp);
3503 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) 3541 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3504 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 3542 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3505 task->tk_status = 0; 3543 task->tk_status = 0;
@@ -4110,7 +4148,7 @@ static void nfs4_lock_release(void *calldata)
4110 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, 4148 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
4111 data->arg.lock_seqid); 4149 data->arg.lock_seqid);
4112 if (!IS_ERR(task)) 4150 if (!IS_ERR(task))
4113 rpc_put_task(task); 4151 rpc_put_task_async(task);
4114 dprintk("%s: cancelling lock!\n", __func__); 4152 dprintk("%s: cancelling lock!\n", __func__);
4115 } else 4153 } else
4116 nfs_free_seqid(data->arg.lock_seqid); 4154 nfs_free_seqid(data->arg.lock_seqid);
@@ -4134,23 +4172,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
4134 4172
4135static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) 4173static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
4136{ 4174{
4137 struct nfs_client *clp = server->nfs_client;
4138 struct nfs4_state *state = lsp->ls_state;
4139
4140 switch (error) { 4175 switch (error) {
4141 case -NFS4ERR_ADMIN_REVOKED: 4176 case -NFS4ERR_ADMIN_REVOKED:
4142 case -NFS4ERR_BAD_STATEID: 4177 case -NFS4ERR_BAD_STATEID:
4143 case -NFS4ERR_EXPIRED: 4178 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4144 if (new_lock_owner != 0 || 4179 if (new_lock_owner != 0 ||
4145 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 4180 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4146 nfs4_state_mark_reclaim_nograce(clp, state); 4181 nfs4_schedule_stateid_recovery(server, lsp->ls_state);
4147 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4148 break; 4182 break;
4149 case -NFS4ERR_STALE_STATEID: 4183 case -NFS4ERR_STALE_STATEID:
4150 if (new_lock_owner != 0 ||
4151 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4152 nfs4_state_mark_reclaim_reboot(clp, state);
4153 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; 4184 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4185 case -NFS4ERR_EXPIRED:
4186 nfs4_schedule_lease_recovery(server->nfs_client);
4154 }; 4187 };
4155} 4188}
4156 4189
@@ -4366,12 +4399,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4366 case -NFS4ERR_EXPIRED: 4399 case -NFS4ERR_EXPIRED:
4367 case -NFS4ERR_STALE_CLIENTID: 4400 case -NFS4ERR_STALE_CLIENTID:
4368 case -NFS4ERR_STALE_STATEID: 4401 case -NFS4ERR_STALE_STATEID:
4402 nfs4_schedule_lease_recovery(server->nfs_client);
4403 goto out;
4369 case -NFS4ERR_BADSESSION: 4404 case -NFS4ERR_BADSESSION:
4370 case -NFS4ERR_BADSLOT: 4405 case -NFS4ERR_BADSLOT:
4371 case -NFS4ERR_BAD_HIGH_SLOT: 4406 case -NFS4ERR_BAD_HIGH_SLOT:
4372 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4407 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4373 case -NFS4ERR_DEADSESSION: 4408 case -NFS4ERR_DEADSESSION:
4374 nfs4_schedule_state_recovery(server->nfs_client); 4409 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
4375 goto out; 4410 goto out;
4376 case -ERESTARTSYS: 4411 case -ERESTARTSYS:
4377 /* 4412 /*
@@ -4381,7 +4416,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4381 case -NFS4ERR_ADMIN_REVOKED: 4416 case -NFS4ERR_ADMIN_REVOKED:
4382 case -NFS4ERR_BAD_STATEID: 4417 case -NFS4ERR_BAD_STATEID:
4383 case -NFS4ERR_OPENMODE: 4418 case -NFS4ERR_OPENMODE:
4384 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4419 nfs4_schedule_stateid_recovery(server, state);
4385 err = 0; 4420 err = 0;
4386 goto out; 4421 goto out;
4387 case -EKEYEXPIRED: 4422 case -EKEYEXPIRED:
@@ -4988,10 +5023,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
4988 int status; 5023 int status;
4989 unsigned *ptr; 5024 unsigned *ptr;
4990 struct nfs4_session *session = clp->cl_session; 5025 struct nfs4_session *session = clp->cl_session;
5026 long timeout = 0;
5027 int err;
4991 5028
4992 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5029 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
4993 5030
4994 status = _nfs4_proc_create_session(clp); 5031 do {
5032 status = _nfs4_proc_create_session(clp);
5033 if (status == -NFS4ERR_DELAY) {
5034 err = nfs4_delay(clp->cl_rpcclient, &timeout);
5035 if (err)
5036 status = err;
5037 }
5038 } while (status == -NFS4ERR_DELAY);
5039
4995 if (status) 5040 if (status)
4996 goto out; 5041 goto out;
4997 5042
@@ -5100,7 +5145,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5100 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5145 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5101 return -EAGAIN; 5146 return -EAGAIN;
5102 default: 5147 default:
5103 nfs4_schedule_state_recovery(clp); 5148 nfs4_schedule_lease_recovery(clp);
5104 } 5149 }
5105 return 0; 5150 return 0;
5106} 5151}
@@ -5187,7 +5232,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
5187 if (IS_ERR(task)) 5232 if (IS_ERR(task))
5188 ret = PTR_ERR(task); 5233 ret = PTR_ERR(task);
5189 else 5234 else
5190 rpc_put_task(task); 5235 rpc_put_task_async(task);
5191 dprintk("<-- %s status=%d\n", __func__, ret); 5236 dprintk("<-- %s status=%d\n", __func__, ret);
5192 return ret; 5237 return ret;
5193} 5238}
@@ -5203,8 +5248,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5203 goto out; 5248 goto out;
5204 } 5249 }
5205 ret = rpc_wait_for_completion_task(task); 5250 ret = rpc_wait_for_completion_task(task);
5206 if (!ret) 5251 if (!ret) {
5252 struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
5253
5254 if (task->tk_status == 0)
5255 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
5207 ret = task->tk_status; 5256 ret = task->tk_status;
5257 }
5208 rpc_put_task(task); 5258 rpc_put_task(task);
5209out: 5259out:
5210 dprintk("<-- %s status=%d\n", __func__, ret); 5260 dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5241,7 +5291,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5241 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5291 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5242 return -EAGAIN; 5292 return -EAGAIN;
5243 default: 5293 default:
5244 nfs4_schedule_state_recovery(clp); 5294 nfs4_schedule_lease_recovery(clp);
5245 } 5295 }
5246 return 0; 5296 return 0;
5247} 5297}
@@ -5309,6 +5359,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5309 status = PTR_ERR(task); 5359 status = PTR_ERR(task);
5310 goto out; 5360 goto out;
5311 } 5361 }
5362 status = nfs4_wait_for_completion_rpc_task(task);
5363 if (status == 0)
5364 status = task->tk_status;
5312 rpc_put_task(task); 5365 rpc_put_task(task);
5313 return 0; 5366 return 0;
5314out: 5367out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04c..0592288f9f06 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1007,9 +1007,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
1007} 1007}
1008 1008
1009/* 1009/*
1010 * Schedule a state recovery attempt 1010 * Schedule a lease recovery attempt
1011 */ 1011 */
1012void nfs4_schedule_state_recovery(struct nfs_client *clp) 1012void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1013{ 1013{
1014 if (!clp) 1014 if (!clp)
1015 return; 1015 return;
@@ -1018,7 +1018,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
1018 nfs4_schedule_state_manager(clp); 1018 nfs4_schedule_state_manager(clp);
1019} 1019}
1020 1020
1021int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 1021static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1022{ 1022{
1023 1023
1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1032,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
1032 return 1; 1032 return 1;
1033} 1033}
1034 1034
1035int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) 1035static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1036{ 1036{
1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); 1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1041,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
1041 return 1; 1041 return 1;
1042} 1042}
1043 1043
1044void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1045{
1046 struct nfs_client *clp = server->nfs_client;
1047
1048 nfs4_state_mark_reclaim_nograce(clp, state);
1049 nfs4_schedule_state_manager(clp);
1050}
1051
1044static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1052static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1045{ 1053{
1046 struct inode *inode = state->inode; 1054 struct inode *inode = state->inode;
@@ -1436,10 +1444,15 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
1436} 1444}
1437 1445
1438#ifdef CONFIG_NFS_V4_1 1446#ifdef CONFIG_NFS_V4_1
1447void nfs4_schedule_session_recovery(struct nfs4_session *session)
1448{
1449 nfs4_schedule_lease_recovery(session->clp);
1450}
1451
1439void nfs41_handle_recall_slot(struct nfs_client *clp) 1452void nfs41_handle_recall_slot(struct nfs_client *clp)
1440{ 1453{
1441 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1454 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1442 nfs4_schedule_state_recovery(clp); 1455 nfs4_schedule_state_manager(clp);
1443} 1456}
1444 1457
1445static void nfs4_reset_all_state(struct nfs_client *clp) 1458static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1460,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
1447 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1460 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1448 clp->cl_boot_time = CURRENT_TIME; 1461 clp->cl_boot_time = CURRENT_TIME;
1449 nfs4_state_start_reclaim_nograce(clp); 1462 nfs4_state_start_reclaim_nograce(clp);
1450 nfs4_schedule_state_recovery(clp); 1463 nfs4_schedule_state_manager(clp);
1451 } 1464 }
1452} 1465}
1453 1466
@@ -1455,7 +1468,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1455{ 1468{
1456 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1469 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1457 nfs4_state_start_reclaim_reboot(clp); 1470 nfs4_state_start_reclaim_reboot(clp);
1458 nfs4_schedule_state_recovery(clp); 1471 nfs4_schedule_state_manager(clp);
1459 } 1472 }
1460} 1473}
1461 1474
@@ -1475,7 +1488,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1475{ 1488{
1476 nfs_expire_all_delegations(clp); 1489 nfs_expire_all_delegations(clp);
1477 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1490 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1478 nfs4_schedule_state_recovery(clp); 1491 nfs4_schedule_state_manager(clp);
1479} 1492}
1480 1493
1481void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1494void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee9..94d50e86a124 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1660,7 +1660,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1660 1660
1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); 1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1662 *p++ = cpu_to_be32(OP_CREATE_SESSION); 1662 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1663 p = xdr_encode_hyper(p, clp->cl_ex_clid); 1663 p = xdr_encode_hyper(p, clp->cl_clientid);
1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ 1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1665 *p++ = cpu_to_be32(args->flags); /*flags */ 1665 *p++ = cpu_to_be32(args->flags); /*flags */
1666 1666
@@ -4694,7 +4694,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4694 p = xdr_inline_decode(xdr, 8); 4694 p = xdr_inline_decode(xdr, 8);
4695 if (unlikely(!p)) 4695 if (unlikely(!p))
4696 goto out_overflow; 4696 goto out_overflow;
4697 xdr_decode_hyper(p, &clp->cl_ex_clid); 4697 xdr_decode_hyper(p, &clp->cl_clientid);
4698 p = xdr_inline_decode(xdr, 12); 4698 p = xdr_inline_decode(xdr, 12);
4699 if (unlikely(!p)) 4699 if (unlikely(!p))
4700 goto out_overflow; 4700 goto out_overflow;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a20023..c541093a5bf2 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
86/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
87#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
88 88
89/* Default NFSROOT mount options. */
90#define NFS_DEF_OPTIONS "udp"
91
89/* Parameters passed from the kernel command line */ 92/* Parameters passed from the kernel command line */
90static char nfs_root_parms[256] __initdata = ""; 93static char nfs_root_parms[256] __initdata = "";
91 94
92/* Text-based mount options passed to super.c */ 95/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = ""; 96static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
94 97
95/* Address of NFS server */ 98/* Address of NFS server */
96static __be32 servaddr __initdata = htonl(INADDR_NONE); 99static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
160} 163}
161 164
162static int __init root_nfs_cat(char *dest, const char *src, 165static int __init root_nfs_cat(char *dest, const char *src,
163 const size_t destlen) 166 const size_t destlen)
164{ 167{
168 size_t len = strlen(dest);
169
170 if (len && dest[len - 1] != ',')
171 if (strlcat(dest, ",", destlen) > destlen)
172 return -1;
173
165 if (strlcat(dest, src, destlen) > destlen) 174 if (strlcat(dest, src, destlen) > destlen)
166 return -1; 175 return -1;
167 return 0; 176 return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
194 if (root_nfs_cat(nfs_root_options, incoming, 203 if (root_nfs_cat(nfs_root_options, incoming,
195 sizeof(nfs_root_options))) 204 sizeof(nfs_root_options)))
196 return -1; 205 return -1;
197
198 /*
199 * Possibly prepare for more options to be appended
200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
206
207 return 0; 206 return 0;
208} 207}
209 208
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
217 */ 216 */
218static int __init root_nfs_data(char *cmdline) 217static int __init root_nfs_data(char *cmdline)
219{ 218{
220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; 219 char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
221 int len, retval = -1; 220 int len, retval = -1;
222 char *tmp = NULL; 221 char *tmp = NULL;
223 const size_t tmplen = sizeof(nfs_export_path); 222 const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
244 * Append mandatory options for nfsroot so they override 243 * Append mandatory options for nfsroot so they override
245 * what has come before 244 * what has come before
246 */ 245 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", 246 snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
248 &servaddr); 247 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option, 248 if (root_nfs_cat(nfs_root_options, mand_options,
250 sizeof(nfs_root_options))) 249 sizeof(nfs_root_options)))
251 goto out_optionstoolong; 250 goto out_optionstoolong;
252 251
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd1..6481d537d69d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -180,7 +180,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
180 task_setup_data.rpc_client = NFS_CLIENT(dir); 180 task_setup_data.rpc_client = NFS_CLIENT(dir);
181 task = rpc_run_task(&task_setup_data); 181 task = rpc_run_task(&task_setup_data);
182 if (!IS_ERR(task)) 182 if (!IS_ERR(task))
183 rpc_put_task(task); 183 rpc_put_task_async(task);
184 return 1; 184 return 1;
185} 185}
186 186
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..42b92d7a9cc4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1292,6 +1292,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1292 task = rpc_run_task(&task_setup_data); 1292 task = rpc_run_task(&task_setup_data);
1293 if (IS_ERR(task)) 1293 if (IS_ERR(task))
1294 return PTR_ERR(task); 1294 return PTR_ERR(task);
1295 if (how & FLUSH_SYNC)
1296 rpc_wait_for_completion_task(task);
1295 rpc_put_task(task); 1297 rpc_put_task(task);
1296 return 0; 1298 return 0;
1297} 1299}
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242ddd..124e8fcb0dd6 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
22 22
23static struct file *do_open(char *name, int flags) 23static struct file *do_open(char *name, int flags)
24{ 24{
25 struct nameidata nd;
26 struct vfsmount *mnt; 25 struct vfsmount *mnt;
27 int error; 26 struct file *file;
28 27
29 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); 28 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
30 if (IS_ERR(mnt)) 29 if (IS_ERR(mnt))
31 return (struct file *)mnt; 30 return (struct file *)mnt;
32 31
33 error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); 32 file = file_open_root(mnt->mnt_root, mnt, name, flags);
34 mntput(mnt); /* drop do_kern_mount reference */
35 if (error)
36 return ERR_PTR(error);
37
38 if (flags == O_RDWR)
39 error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
40 else
41 error = may_open(&nd.path, MAY_WRITE, flags);
42 33
43 if (!error) 34 mntput(mnt); /* drop do_kern_mount reference */
44 return dentry_open(nd.path.dentry, nd.path.mnt, flags, 35 return file;
45 current_cred());
46
47 path_put(&nd.path);
48 return ERR_PTR(error);
49} 36}
50 37
51static struct { 38static struct {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cde36cb0f348..02eb4edf0ece 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,7 +432,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
432 * If the server returns different values for sessionID, slotID or 432 * If the server returns different values for sessionID, slotID or
433 * sequence number, the server is looney tunes. 433 * sequence number, the server is looney tunes.
434 */ 434 */
435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4); 435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
436 if (unlikely(p == NULL)) 436 if (unlikely(p == NULL))
437 goto out_overflow; 437 goto out_overflow;
438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 54b60bfceb8d..7b566ec14e18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2445,15 +2445,16 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2445static struct nfs4_delegation * 2445static struct nfs4_delegation *
2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid) 2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2447{ 2447{
2448 struct nfs4_delegation *dp = NULL; 2448 struct nfs4_delegation *dp;
2449 2449
2450 spin_lock(&recall_lock); 2450 spin_lock(&recall_lock);
2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { 2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) 2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
2453 break; 2453 spin_unlock(&recall_lock);
2454 } 2454 return dp;
2455 }
2455 spin_unlock(&recall_lock); 2456 spin_unlock(&recall_lock);
2456 return dp; 2457 return NULL;
2457} 2458}
2458 2459
2459int share_access_to_flags(u32 share_access) 2460int share_access_to_flags(u32 share_access)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 956629b9cdc9..615f0a9f0600 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -317,8 +317,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
317 READ_BUF(dummy32); 317 READ_BUF(dummy32);
318 len += (XDR_QUADLEN(dummy32) << 2); 318 len += (XDR_QUADLEN(dummy32) << 2);
319 READMEM(buf, dummy32); 319 READMEM(buf, dummy32);
320 if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) 320 if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
321 goto out_nfserr; 321 return status;
322 iattr->ia_valid |= ATTR_UID; 322 iattr->ia_valid |= ATTR_UID;
323 } 323 }
324 if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { 324 if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
@@ -328,8 +328,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
328 READ_BUF(dummy32); 328 READ_BUF(dummy32);
329 len += (XDR_QUADLEN(dummy32) << 2); 329 len += (XDR_QUADLEN(dummy32) << 2);
330 READMEM(buf, dummy32); 330 READMEM(buf, dummy32);
331 if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) 331 if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
332 goto out_nfserr; 332 return status;
333 iattr->ia_valid |= ATTR_GID; 333 iattr->ia_valid |= ATTR_GID;
334 } 334 }
335 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { 335 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1142 1142
1143 u32 dummy; 1143 u32 dummy;
1144 char *machine_name; 1144 char *machine_name;
1145 int i; 1145 int i, j;
1146 int nr_secflavs; 1146 int nr_secflavs;
1147 1147
1148 READ_BUF(16); 1148 READ_BUF(16);
@@ -1215,7 +1215,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1215 READ_BUF(4); 1215 READ_BUF(4);
1216 READ32(dummy); 1216 READ32(dummy);
1217 READ_BUF(dummy * 4); 1217 READ_BUF(dummy * 4);
1218 for (i = 0; i < dummy; ++i) 1218 for (j = 0; j < dummy; ++j)
1219 READ32(dummy); 1219 READ32(dummy);
1220 break; 1220 break;
1221 case RPC_AUTH_GSS: 1221 case RPC_AUTH_GSS:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8f5286..85f7baa15f5d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
35#include "btnode.h" 35#include "btnode.h"
36 36
37 37
38void nilfs_btnode_cache_init_once(struct address_space *btnc)
39{
40 nilfs_mapping_init_once(btnc);
41}
42
43static const struct address_space_operations def_btnode_aops = { 38static const struct address_space_operations def_btnode_aops = {
44 .sync_page = block_sync_page, 39 .sync_page = block_sync_page,
45}; 40};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e0..1b8ebd888c28 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); 40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 41void nilfs_btnode_cache_clear(struct address_space *);
43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a189f60..a0babd2bff6a 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
454 struct backing_dev_info *bdi = inode->i_sb->s_bdi; 454 struct backing_dev_info *bdi = inode->i_sb->s_bdi;
455 455
456 INIT_LIST_HEAD(&shadow->frozen_buffers); 456 INIT_LIST_HEAD(&shadow->frozen_buffers);
457 nilfs_mapping_init_once(&shadow->frozen_data); 457 address_space_init_once(&shadow->frozen_data);
458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); 458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
459 nilfs_mapping_init_once(&shadow->frozen_btnodes); 459 address_space_init_once(&shadow->frozen_btnodes);
460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); 460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
461 mi->mi_shadow = shadow; 461 mi->mi_shadow = shadow;
462 return 0; 462 return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd02..161791d26458 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); 397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
398 if (!new_de) 398 if (!new_de)
399 goto out_dir; 399 goto out_dir;
400 inc_nlink(old_inode);
401 nilfs_set_link(new_dir, new_de, new_page, old_inode); 400 nilfs_set_link(new_dir, new_de, new_page, old_inode);
402 nilfs_mark_inode_dirty(new_dir); 401 nilfs_mark_inode_dirty(new_dir);
403 new_inode->i_ctime = CURRENT_TIME; 402 new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
411 if (new_dir->i_nlink >= NILFS_LINK_MAX) 410 if (new_dir->i_nlink >= NILFS_LINK_MAX)
412 goto out_dir; 411 goto out_dir;
413 } 412 }
414 inc_nlink(old_inode);
415 err = nilfs_add_link(new_dentry, old_inode); 413 err = nilfs_add_link(new_dentry, old_inode);
416 if (err) { 414 if (err)
417 drop_nlink(old_inode);
418 nilfs_mark_inode_dirty(old_inode);
419 goto out_dir; 415 goto out_dir;
420 }
421 if (dir_de) { 416 if (dir_de) {
422 inc_nlink(new_dir); 417 inc_nlink(new_dir);
423 nilfs_mark_inode_dirty(new_dir); 418 nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
431 old_inode->i_ctime = CURRENT_TIME; 426 old_inode->i_ctime = CURRENT_TIME;
432 427
433 nilfs_delete_entry(old_de, old_page); 428 nilfs_delete_entry(old_de, old_page);
434 drop_nlink(old_inode);
435 429
436 if (dir_de) { 430 if (dir_de) {
437 nilfs_set_link(old_inode, dir_de, dir_page, new_dir); 431 nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c432416cfef..a585b35fd6bc 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 492 return nc;
493} 493}
494 494
495void nilfs_mapping_init_once(struct address_space *mapping)
496{
497 memset(mapping, 0, sizeof(*mapping));
498 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
499 spin_lock_init(&mapping->tree_lock);
500 INIT_LIST_HEAD(&mapping->private_list);
501 spin_lock_init(&mapping->private_lock);
502
503 spin_lock_init(&mapping->i_mmap_lock);
504 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
505 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
506}
507
508void nilfs_mapping_init(struct address_space *mapping, 495void nilfs_mapping_init(struct address_space *mapping,
509 struct backing_dev_info *bdi, 496 struct backing_dev_info *bdi,
510 const struct address_space_operations *aops) 497 const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27cd891..2a00953ebd5f 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 62void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 63void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init_once(struct address_space *mapping);
65void nilfs_mapping_init(struct address_space *mapping, 64void nilfs_mapping_init(struct address_space *mapping,
66 struct backing_dev_info *bdi, 65 struct backing_dev_info *bdi,
67 const struct address_space_operations *aops); 66 const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f39..2de9f636792a 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
430 nilfs_segctor_map_segsum_entry( 430 nilfs_segctor_map_segsum_entry(
431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
432 432
433 if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 433 if (NILFS_I(inode)->i_root &&
434 !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
434 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 435 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
435 /* skip finfo */ 436 /* skip finfo */
436} 437}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 58fd707174e1..1673b3d99842 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
1279#ifdef CONFIG_NILFS_XATTR 1279#ifdef CONFIG_NILFS_XATTR
1280 init_rwsem(&ii->xattr_sem); 1280 init_rwsem(&ii->xattr_sem);
1281#endif 1281#endif
1282 nilfs_btnode_cache_init_once(&ii->i_btnode_cache); 1282 address_space_init_once(&ii->i_btnode_cache);
1283 ii->i_bmap = &ii->i_bmap_data; 1283 ii->i_bmap = &ii->i_bmap_data;
1284 inode_init_once(&ii->vfs_inode); 1284 inode_init_once(&ii->vfs_inode);
1285} 1285}
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc7834f..7eb90403fc8a 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
56 int ret = 0; /* if all else fails, just return false */ 56 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb; 57 struct ocfs2_super *osb;
58 58
59 if (nd->flags & LOOKUP_RCU) 59 if (nd && nd->flags & LOOKUP_RCU)
60 return -ECHILD; 60 return -ECHILD;
61 61
62 inode = dentry->d_inode; 62 inode = dentry->d_inode;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4fd..254652a9b542 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
197 dentry->d_name.len, dentry->d_name.name, 197 dentry->d_name.len, dentry->d_name.name,
198 fh, len, connectable); 198 fh, len, connectable);
199 199
200 if (len < 3 || (connectable && len < 6)) { 200 if (connectable && (len < 6)) {
201 mlog(ML_ERROR, "fh buffer is too small for encoding\n"); 201 *max_len = 6;
202 type = 255;
203 goto bail;
204 } else if (len < 3) {
205 *max_len = 3;
202 type = 255; 206 type = 255;
203 goto bail; 207 goto bail;
204 } 208 }
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c0..6180da1e37e6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
405 ocfs2_quota_trans_credits(sb); 405 ocfs2_quota_trans_credits(sb);
406} 406}
407 407
408/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 408/* data block for new dir/symlink, allocation of directory block, dx_root
409 * bitmap block for the new bit) dx_root update for free list */ 409 * update for free list */
410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1) 410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
411 411
412static inline int ocfs2_add_dir_index_credits(struct super_block *sb) 412static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
413{ 413{
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 849fb4a2e814..d6c25d76b537 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -293,7 +293,7 @@ static int ocfs2_mknod(struct inode *dir,
293 } 293 }
294 294
295 /* get security xattr */ 295 /* get security xattr */
296 status = ocfs2_init_security_get(inode, dir, &si); 296 status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
297 if (status) { 297 if (status) {
298 if (status == -EOPNOTSUPP) 298 if (status == -EOPNOTSUPP)
299 si.enable = 0; 299 si.enable = 0;
@@ -1665,7 +1665,7 @@ static int ocfs2_symlink(struct inode *dir,
1665 } 1665 }
1666 1666
1667 /* get security xattr */ 1667 /* get security xattr */
1668 status = ocfs2_init_security_get(inode, dir, &si); 1668 status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
1669 if (status) { 1669 if (status) {
1670 if (status == -EOPNOTSUPP) 1670 if (status == -EOPNOTSUPP)
1671 si.enable = 0; 1671 si.enable = 0;
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 196fcb52d95d..d5ab56cbe5c5 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -114,7 +114,4 @@ int ocfs2_local_write_dquot(struct dquot *dquot);
114extern const struct dquot_operations ocfs2_quota_operations; 114extern const struct dquot_operations ocfs2_quota_operations;
115extern struct quota_format_type ocfs2_quota_format; 115extern struct quota_format_type ocfs2_quota_format;
116 116
117int ocfs2_quota_setup(void);
118void ocfs2_quota_shutdown(void);
119
120#endif /* _OCFS2_QUOTA_H */ 117#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 4607923eb24c..a73f64166481 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -63,8 +63,6 @@
63 * write to gf 63 * write to gf
64 */ 64 */
65 65
66static struct workqueue_struct *ocfs2_quota_wq = NULL;
67
68static void qsync_work_fn(struct work_struct *work); 66static void qsync_work_fn(struct work_struct *work);
69 67
70static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp) 68static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
@@ -400,8 +398,8 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
400 OCFS2_QBLK_RESERVED_SPACE; 398 OCFS2_QBLK_RESERVED_SPACE;
401 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 399 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
402 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 400 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
403 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 401 schedule_delayed_work(&oinfo->dqi_sync_work,
404 msecs_to_jiffies(oinfo->dqi_syncms)); 402 msecs_to_jiffies(oinfo->dqi_syncms));
405 403
406out_err: 404out_err:
407 mlog_exit(status); 405 mlog_exit(status);
@@ -635,8 +633,8 @@ static void qsync_work_fn(struct work_struct *work)
635 struct super_block *sb = oinfo->dqi_gqinode->i_sb; 633 struct super_block *sb = oinfo->dqi_gqinode->i_sb;
636 634
637 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 635 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
638 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 636 schedule_delayed_work(&oinfo->dqi_sync_work,
639 msecs_to_jiffies(oinfo->dqi_syncms)); 637 msecs_to_jiffies(oinfo->dqi_syncms));
640} 638}
641 639
642/* 640/*
@@ -923,20 +921,3 @@ const struct dquot_operations ocfs2_quota_operations = {
923 .alloc_dquot = ocfs2_alloc_dquot, 921 .alloc_dquot = ocfs2_alloc_dquot,
924 .destroy_dquot = ocfs2_destroy_dquot, 922 .destroy_dquot = ocfs2_destroy_dquot,
925}; 923};
926
927int ocfs2_quota_setup(void)
928{
929 ocfs2_quota_wq = create_workqueue("o2quot");
930 if (!ocfs2_quota_wq)
931 return -ENOMEM;
932 return 0;
933}
934
935void ocfs2_quota_shutdown(void)
936{
937 if (ocfs2_quota_wq) {
938 flush_workqueue(ocfs2_quota_wq);
939 destroy_workqueue(ocfs2_quota_wq);
940 ocfs2_quota_wq = NULL;
941 }
942}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e9..c384d634872a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3228 u32 num_clusters, unsigned int e_flags) 3228 u32 num_clusters, unsigned int e_flags)
3229{ 3229{
3230 int ret, delete, index, credits = 0; 3230 int ret, delete, index, credits = 0;
3231 u32 new_bit, new_len; 3231 u32 new_bit, new_len, orig_num_clusters;
3232 unsigned int set_len; 3232 unsigned int set_len;
3233 struct ocfs2_super *osb = OCFS2_SB(sb); 3233 struct ocfs2_super *osb = OCFS2_SB(sb);
3234 handle_t *handle; 3234 handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3261 goto out; 3261 goto out;
3262 } 3262 }
3263 3263
3264 orig_num_clusters = num_clusters;
3265
3264 while (num_clusters) { 3266 while (num_clusters) {
3265 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, 3267 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
3266 p_cluster, num_clusters, 3268 p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3348 * in write-back mode. 3350 * in write-back mode.
3349 */ 3351 */
3350 if (context->get_clusters == ocfs2_di_get_clusters) { 3352 if (context->get_clusters == ocfs2_di_get_clusters) {
3351 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); 3353 ret = ocfs2_cow_sync_writeback(sb, context, cpos,
3354 orig_num_clusters);
3352 if (ret) 3355 if (ret)
3353 mlog_errno(ret); 3356 mlog_errno(ret);
3354 } 3357 }
@@ -4325,7 +4328,8 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4325 4328
4326 /* If the security isn't preserved, we need to re-initialize them. */ 4329 /* If the security isn't preserved, we need to re-initialize them. */
4327 if (!preserve) { 4330 if (!preserve) {
4328 error = ocfs2_init_security_and_acl(dir, new_orphan_inode); 4331 error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
4332 &new_dentry->d_name);
4329 if (error) 4333 if (error)
4330 mlog_errno(error); 4334 mlog_errno(error);
4331 } 4335 }
@@ -4376,7 +4380,7 @@ static int ocfs2_user_path_parent(const char __user *path,
4376 if (IS_ERR(s)) 4380 if (IS_ERR(s))
4377 return PTR_ERR(s); 4381 return PTR_ERR(s);
4378 4382
4379 error = path_lookup(s, LOOKUP_PARENT, nd); 4383 error = kern_path_parent(s, nd);
4380 if (error) 4384 if (error)
4381 putname(s); 4385 putname(s);
4382 else 4386 else
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447e..236ed1bdca2c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1316 struct mount_options *mopt, 1316 struct mount_options *mopt,
1317 int is_remount) 1317 int is_remount)
1318{ 1318{
1319 int status; 1319 int status, user_stack = 0;
1320 char *p; 1320 char *p;
1321 u32 tmp; 1321 u32 tmp;
1322 1322
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
1459 memcpy(mopt->cluster_stack, args[0].from, 1459 memcpy(mopt->cluster_stack, args[0].from,
1460 OCFS2_STACK_LABEL_LEN); 1460 OCFS2_STACK_LABEL_LEN);
1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; 1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
1462 /*
1463 * Open code the memcmp here as we don't have
1464 * an osb to pass to
1465 * ocfs2_userspace_stack().
1466 */
1467 if (memcmp(mopt->cluster_stack,
1468 OCFS2_CLASSIC_CLUSTER_STACK,
1469 OCFS2_STACK_LABEL_LEN))
1470 user_stack = 1;
1462 break; 1471 break;
1463 case Opt_inode64: 1472 case Opt_inode64:
1464 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1473 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
1514 } 1523 }
1515 } 1524 }
1516 1525
1517 /* Ensure only one heartbeat mode */ 1526 if (user_stack == 0) {
1518 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | 1527 /* Ensure only one heartbeat mode */
1519 OCFS2_MOUNT_HB_NONE); 1528 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
1520 if (hweight32(tmp) != 1) { 1529 OCFS2_MOUNT_HB_GLOBAL |
1521 mlog(ML_ERROR, "Invalid heartbeat mount options\n"); 1530 OCFS2_MOUNT_HB_NONE);
1522 status = 0; 1531 if (hweight32(tmp) != 1) {
1523 goto bail; 1532 mlog(ML_ERROR, "Invalid heartbeat mount options\n");
1533 status = 0;
1534 goto bail;
1535 }
1524 } 1536 }
1525 1537
1526 status = 1; 1538 status = 1;
@@ -1645,16 +1657,11 @@ static int __init ocfs2_init(void)
1645 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); 1657 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
1646 } 1658 }
1647 1659
1648 status = ocfs2_quota_setup();
1649 if (status)
1650 goto leave;
1651
1652 ocfs2_set_locking_protocol(); 1660 ocfs2_set_locking_protocol();
1653 1661
1654 status = register_quota_format(&ocfs2_quota_format); 1662 status = register_quota_format(&ocfs2_quota_format);
1655leave: 1663leave:
1656 if (status < 0) { 1664 if (status < 0) {
1657 ocfs2_quota_shutdown();
1658 ocfs2_free_mem_caches(); 1665 ocfs2_free_mem_caches();
1659 exit_ocfs2_uptodate_cache(); 1666 exit_ocfs2_uptodate_cache();
1660 } 1667 }
@@ -1671,8 +1678,6 @@ static void __exit ocfs2_exit(void)
1671{ 1678{
1672 mlog_entry_void(); 1679 mlog_entry_void();
1673 1680
1674 ocfs2_quota_shutdown();
1675
1676 if (ocfs2_wq) { 1681 if (ocfs2_wq) {
1677 flush_workqueue(ocfs2_wq); 1682 flush_workqueue(ocfs2_wq);
1678 destroy_workqueue(ocfs2_wq); 1683 destroy_workqueue(ocfs2_wq);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 67cd43914641..6bb602486c6b 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7185,7 +7185,8 @@ out:
7185 * must not hold any lock expect i_mutex. 7185 * must not hold any lock expect i_mutex.
7186 */ 7186 */
7187int ocfs2_init_security_and_acl(struct inode *dir, 7187int ocfs2_init_security_and_acl(struct inode *dir,
7188 struct inode *inode) 7188 struct inode *inode,
7189 const struct qstr *qstr)
7189{ 7190{
7190 int ret = 0; 7191 int ret = 0;
7191 struct buffer_head *dir_bh = NULL; 7192 struct buffer_head *dir_bh = NULL;
@@ -7193,7 +7194,7 @@ int ocfs2_init_security_and_acl(struct inode *dir,
7193 .enable = 1, 7194 .enable = 1,
7194 }; 7195 };
7195 7196
7196 ret = ocfs2_init_security_get(inode, dir, &si); 7197 ret = ocfs2_init_security_get(inode, dir, qstr, &si);
7197 if (!ret) { 7198 if (!ret) {
7198 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7199 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7199 si.name, si.value, si.value_len, 7200 si.name, si.value, si.value_len,
@@ -7261,13 +7262,14 @@ static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7261 7262
7262int ocfs2_init_security_get(struct inode *inode, 7263int ocfs2_init_security_get(struct inode *inode,
7263 struct inode *dir, 7264 struct inode *dir,
7265 const struct qstr *qstr,
7264 struct ocfs2_security_xattr_info *si) 7266 struct ocfs2_security_xattr_info *si)
7265{ 7267{
7266 /* check whether ocfs2 support feature xattr */ 7268 /* check whether ocfs2 support feature xattr */
7267 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7269 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7268 return -EOPNOTSUPP; 7270 return -EOPNOTSUPP;
7269 return security_inode_init_security(inode, dir, &si->name, &si->value, 7271 return security_inode_init_security(inode, dir, qstr, &si->name,
7270 &si->value_len); 7272 &si->value, &si->value_len);
7271} 7273}
7272 7274
7273int ocfs2_init_security_set(handle_t *handle, 7275int ocfs2_init_security_set(handle_t *handle,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index aa64bb37a65b..d63cfb72316b 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -57,6 +57,7 @@ int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
57 struct ocfs2_dinode *di); 57 struct ocfs2_dinode *di);
58int ocfs2_xattr_remove(struct inode *, struct buffer_head *); 58int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
59int ocfs2_init_security_get(struct inode *, struct inode *, 59int ocfs2_init_security_get(struct inode *, struct inode *,
60 const struct qstr *,
60 struct ocfs2_security_xattr_info *); 61 struct ocfs2_security_xattr_info *);
61int ocfs2_init_security_set(handle_t *, struct inode *, 62int ocfs2_init_security_set(handle_t *, struct inode *,
62 struct buffer_head *, 63 struct buffer_head *,
@@ -94,5 +95,6 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
94 struct buffer_head *new_bh, 95 struct buffer_head *new_bh,
95 bool preserve_security); 96 bool preserve_security);
96int ocfs2_init_security_and_acl(struct inode *dir, 97int ocfs2_init_security_and_acl(struct inode *dir,
97 struct inode *inode); 98 struct inode *inode,
99 const struct qstr *qstr);
98#endif /* OCFS2_XATTR_H */ 100#endif /* OCFS2_XATTR_H */
diff --git a/fs/open.c b/fs/open.c
index 5a2c6ebc22b5..f83ca80cc59a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
233 233
234 if (!(file->f_mode & FMODE_WRITE)) 234 if (!(file->f_mode & FMODE_WRITE))
235 return -EBADF; 235 return -EBADF;
236
237 /* It's not possible punch hole on append only file */
238 if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
239 return -EPERM;
240
241 if (IS_IMMUTABLE(inode))
242 return -EPERM;
243
236 /* 244 /*
237 * Revalidate the write permissions, in case security policy has 245 * Revalidate the write permissions, in case security policy has
238 * changed since the files were opened. 246 * changed since the files were opened.
@@ -565,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
565{ 573{
566 struct path path; 574 struct path path;
567 int error = -EINVAL; 575 int error = -EINVAL;
568 int follow; 576 int lookup_flags;
569 577
570 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 578 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
571 goto out; 579 goto out;
572 580
573 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 581 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
574 error = user_path_at(dfd, filename, follow, &path); 582 if (flag & AT_EMPTY_PATH)
583 lookup_flags |= LOOKUP_EMPTY;
584 error = user_path_at(dfd, filename, lookup_flags, &path);
575 if (error) 585 if (error)
576 goto out; 586 goto out;
577 error = mnt_want_write(path.mnt); 587 error = mnt_want_write(path.mnt);
@@ -661,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
661 int (*open)(struct inode *, struct file *), 671 int (*open)(struct inode *, struct file *),
662 const struct cred *cred) 672 const struct cred *cred)
663{ 673{
674 static const struct file_operations empty_fops = {};
664 struct inode *inode; 675 struct inode *inode;
665 int error; 676 int error;
666 677
667 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 678 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
668 FMODE_PREAD | FMODE_PWRITE; 679 FMODE_PREAD | FMODE_PWRITE;
680
681 if (unlikely(f->f_flags & O_PATH))
682 f->f_mode = FMODE_PATH;
683
669 inode = dentry->d_inode; 684 inode = dentry->d_inode;
670 if (f->f_mode & FMODE_WRITE) { 685 if (f->f_mode & FMODE_WRITE) {
671 error = __get_file_write_access(inode, mnt); 686 error = __get_file_write_access(inode, mnt);
@@ -679,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
679 f->f_path.dentry = dentry; 694 f->f_path.dentry = dentry;
680 f->f_path.mnt = mnt; 695 f->f_path.mnt = mnt;
681 f->f_pos = 0; 696 f->f_pos = 0;
682 f->f_op = fops_get(inode->i_fop);
683 file_sb_list_add(f, inode->i_sb); 697 file_sb_list_add(f, inode->i_sb);
684 698
699 if (unlikely(f->f_mode & FMODE_PATH)) {
700 f->f_op = &empty_fops;
701 return f;
702 }
703
704 f->f_op = fops_get(inode->i_fop);
705
685 error = security_dentry_open(f, cred); 706 error = security_dentry_open(f, cred);
686 if (error) 707 if (error)
687 goto cleanup_all; 708 goto cleanup_all;
@@ -693,7 +714,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
693 if (error) 714 if (error)
694 goto cleanup_all; 715 goto cleanup_all;
695 } 716 }
696 ima_counts_get(f); 717 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
718 i_readcount_inc(inode);
697 719
698 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 720 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
699 721
@@ -882,15 +904,110 @@ void fd_install(unsigned int fd, struct file *file)
882 904
883EXPORT_SYMBOL(fd_install); 905EXPORT_SYMBOL(fd_install);
884 906
907static inline int build_open_flags(int flags, int mode, struct open_flags *op)
908{
909 int lookup_flags = 0;
910 int acc_mode;
911
912 if (!(flags & O_CREAT))
913 mode = 0;
914 op->mode = mode;
915
916 /* Must never be set by userspace */
917 flags &= ~FMODE_NONOTIFY;
918
919 /*
920 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
921 * check for O_DSYNC if the need any syncing at all we enforce it's
922 * always set instead of having to deal with possibly weird behaviour
923 * for malicious applications setting only __O_SYNC.
924 */
925 if (flags & __O_SYNC)
926 flags |= O_DSYNC;
927
928 /*
929 * If we have O_PATH in the open flag. Then we
930 * cannot have anything other than the below set of flags
931 */
932 if (flags & O_PATH) {
933 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
934 acc_mode = 0;
935 } else {
936 acc_mode = MAY_OPEN | ACC_MODE(flags);
937 }
938
939 op->open_flag = flags;
940
941 /* O_TRUNC implies we need access checks for write permissions */
942 if (flags & O_TRUNC)
943 acc_mode |= MAY_WRITE;
944
945 /* Allow the LSM permission hook to distinguish append
946 access from general write access. */
947 if (flags & O_APPEND)
948 acc_mode |= MAY_APPEND;
949
950 op->acc_mode = acc_mode;
951
952 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
953
954 if (flags & O_CREAT) {
955 op->intent |= LOOKUP_CREATE;
956 if (flags & O_EXCL)
957 op->intent |= LOOKUP_EXCL;
958 }
959
960 if (flags & O_DIRECTORY)
961 lookup_flags |= LOOKUP_DIRECTORY;
962 if (!(flags & O_NOFOLLOW))
963 lookup_flags |= LOOKUP_FOLLOW;
964 return lookup_flags;
965}
966
967/**
968 * filp_open - open file and return file pointer
969 *
970 * @filename: path to open
971 * @flags: open flags as per the open(2) second argument
972 * @mode: mode for the new file if O_CREAT is set, else ignored
973 *
974 * This is the helper to open a file from kernelspace if you really
975 * have to. But in generally you should not do this, so please move
976 * along, nothing to see here..
977 */
978struct file *filp_open(const char *filename, int flags, int mode)
979{
980 struct open_flags op;
981 int lookup = build_open_flags(flags, mode, &op);
982 return do_filp_open(AT_FDCWD, filename, &op, lookup);
983}
984EXPORT_SYMBOL(filp_open);
985
986struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
987 const char *filename, int flags)
988{
989 struct open_flags op;
990 int lookup = build_open_flags(flags, 0, &op);
991 if (flags & O_CREAT)
992 return ERR_PTR(-EINVAL);
993 if (!filename && (flags & O_DIRECTORY))
994 if (!dentry->d_inode->i_op->lookup)
995 return ERR_PTR(-ENOTDIR);
996 return do_file_open_root(dentry, mnt, filename, &op, lookup);
997}
998EXPORT_SYMBOL(file_open_root);
999
885long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 1000long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
886{ 1001{
1002 struct open_flags op;
1003 int lookup = build_open_flags(flags, mode, &op);
887 char *tmp = getname(filename); 1004 char *tmp = getname(filename);
888 int fd = PTR_ERR(tmp); 1005 int fd = PTR_ERR(tmp);
889 1006
890 if (!IS_ERR(tmp)) { 1007 if (!IS_ERR(tmp)) {
891 fd = get_unused_fd_flags(flags); 1008 fd = get_unused_fd_flags(flags);
892 if (fd >= 0) { 1009 if (fd >= 0) {
893 struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); 1010 struct file *f = do_filp_open(dfd, tmp, &op, lookup);
894 if (IS_ERR(f)) { 1011 if (IS_ERR(f)) {
895 put_unused_fd(fd); 1012 put_unused_fd(fd);
896 fd = PTR_ERR(f); 1013 fd = PTR_ERR(f);
@@ -960,8 +1077,10 @@ int filp_close(struct file *filp, fl_owner_t id)
960 if (filp->f_op && filp->f_op->flush) 1077 if (filp->f_op && filp->f_op->flush)
961 retval = filp->f_op->flush(filp, id); 1078 retval = filp->f_op->flush(filp, id);
962 1079
963 dnotify_flush(filp, id); 1080 if (likely(!(filp->f_mode & FMODE_PATH))) {
964 locks_remove_posix(filp, id); 1081 dnotify_flush(filp, id);
1082 locks_remove_posix(filp, id);
1083 }
965 fput(filp); 1084 fput(filp);
966 return retval; 1085 return retval;
967} 1086}
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa5..b10e3540d5b7 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
251 } 251 }
252 252
253 vm->vblk_size = get_unaligned_be32(data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 if (vm->vblk_size == 0) {
255 ldm_error ("Illegal VBLK size");
256 return false;
257 }
258
254 vm->vblk_offset = get_unaligned_be32(data + 0x0C); 259 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04); 260 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 261
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index 68d6a216ee79..11f688bd76c5 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -29,10 +29,9 @@ static inline void mac_fix_string(char *stg, int len)
29 29
30int mac_partition(struct parsed_partitions *state) 30int mac_partition(struct parsed_partitions *state)
31{ 31{
32 int slot = 1;
33 Sector sect; 32 Sector sect;
34 unsigned char *data; 33 unsigned char *data;
35 int blk, blocks_in_map; 34 int slot, blocks_in_map;
36 unsigned secsize; 35 unsigned secsize;
37#ifdef CONFIG_PPC_PMAC 36#ifdef CONFIG_PPC_PMAC
38 int found_root = 0; 37 int found_root = 0;
@@ -59,10 +58,14 @@ int mac_partition(struct parsed_partitions *state)
59 put_dev_sector(sect); 58 put_dev_sector(sect);
60 return 0; /* not a MacOS disk */ 59 return 0; /* not a MacOS disk */
61 } 60 }
62 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
63 blocks_in_map = be32_to_cpu(part->map_count); 61 blocks_in_map = be32_to_cpu(part->map_count);
64 for (blk = 1; blk <= blocks_in_map; ++blk) { 62 if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
65 int pos = blk * secsize; 63 put_dev_sector(sect);
64 return 0;
65 }
66 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
67 for (slot = 1; slot <= blocks_in_map; ++slot) {
68 int pos = slot * secsize;
66 put_dev_sector(sect); 69 put_dev_sector(sect);
67 data = read_part_sector(state, pos/512, &sect); 70 data = read_part_sector(state, pos/512, &sect);
68 if (!data) 71 if (!data)
@@ -113,13 +116,11 @@ int mac_partition(struct parsed_partitions *state)
113 } 116 }
114 117
115 if (goodness > found_root_goodness) { 118 if (goodness > found_root_goodness) {
116 found_root = blk; 119 found_root = slot;
117 found_root_goodness = goodness; 120 found_root_goodness = goodness;
118 } 121 }
119 } 122 }
120#endif /* CONFIG_PPC_PMAC */ 123#endif /* CONFIG_PPC_PMAC */
121
122 ++slot;
123 } 124 }
124#ifdef CONFIG_PPC_PMAC 125#ifdef CONFIG_PPC_PMAC
125 if (found_root_goodness) 126 if (found_root_goodness)
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7cbca17..764b86a01965 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
10#include "check.h" 10#include "check.h"
11#include "osf.h" 11#include "osf.h"
12 12
13#define MAX_OSF_PARTITIONS 18
14
13int osf_partition(struct parsed_partitions *state) 15int osf_partition(struct parsed_partitions *state)
14{ 16{
15 int i; 17 int i;
16 int slot = 1; 18 int slot = 1;
19 unsigned int npartitions;
17 Sector sect; 20 Sector sect;
18 unsigned char *data; 21 unsigned char *data;
19 struct disklabel { 22 struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
45 u8 p_fstype; 48 u8 p_fstype;
46 u8 p_frag; 49 u8 p_frag;
47 __le16 p_cpg; 50 __le16 p_cpg;
48 } d_partitions[8]; 51 } d_partitions[MAX_OSF_PARTITIONS];
49 } * label; 52 } * label;
50 struct d_partition * partition; 53 struct d_partition * partition;
51 54
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
63 put_dev_sector(sect); 66 put_dev_sector(sect);
64 return 0; 67 return 0;
65 } 68 }
66 for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) { 69 npartitions = le16_to_cpu(label->d_npartitions);
70 if (npartitions > MAX_OSF_PARTITIONS) {
71 put_dev_sector(sect);
72 return 0;
73 }
74 for (i = 0 ; i < npartitions; i++, partition++) {
67 if (slot == state->limit) 75 if (slot == state->limit)
68 break; 76 break;
69 if (le32_to_cpu(partition->p_size)) 77 if (le32_to_cpu(partition->p_size))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e82b201..d49c4b5d2c3e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2620,35 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
2620 &proc_self_inode_operations, NULL, {}), 2620 &proc_self_inode_operations, NULL, {}),
2621}; 2621};
2622 2622
2623/*
2624 * Exceptional case: normally we are not allowed to unhash a busy
2625 * directory. In this case, however, we can do it - no aliasing problems
2626 * due to the way we treat inodes.
2627 */
2628static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2629{
2630 struct inode *inode;
2631 struct task_struct *task;
2632
2633 if (nd->flags & LOOKUP_RCU)
2634 return -ECHILD;
2635
2636 inode = dentry->d_inode;
2637 task = get_proc_task(inode);
2638 if (task) {
2639 put_task_struct(task);
2640 return 1;
2641 }
2642 d_drop(dentry);
2643 return 0;
2644}
2645
2646static const struct dentry_operations proc_base_dentry_operations =
2647{
2648 .d_revalidate = proc_base_revalidate,
2649 .d_delete = pid_delete_dentry,
2650};
2651
2652static struct dentry *proc_base_instantiate(struct inode *dir, 2623static struct dentry *proc_base_instantiate(struct inode *dir,
2653 struct dentry *dentry, struct task_struct *task, const void *ptr) 2624 struct dentry *dentry, struct task_struct *task, const void *ptr)
2654{ 2625{
@@ -2685,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2685 if (p->fop) 2656 if (p->fop)
2686 inode->i_fop = p->fop; 2657 inode->i_fop = p->fop;
2687 ei->op = p->op; 2658 ei->op = p->op;
2688 d_set_d_op(dentry, &proc_base_dentry_operations);
2689 d_add(dentry, inode); 2659 d_add(dentry, inode);
2690 error = NULL; 2660 error = NULL;
2691out: 2661out:
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4cda68a..d6a7ca1fdac5 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
27static void proc_evict_inode(struct inode *inode) 27static void proc_evict_inode(struct inode *inode)
28{ 28{
29 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
30 struct ctl_table_header *head;
30 31
31 truncate_inode_pages(&inode->i_data, 0); 32 truncate_inode_pages(&inode->i_data, 0);
32 end_writeback(inode); 33 end_writeback(inode);
@@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode)
38 de = PROC_I(inode)->pde; 39 de = PROC_I(inode)->pde;
39 if (de) 40 if (de)
40 pde_put(de); 41 pde_put(de);
41 if (PROC_I(inode)->sysctl) 42 head = PROC_I(inode)->sysctl;
42 sysctl_head_put(PROC_I(inode)->sysctl); 43 if (head) {
44 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
45 sysctl_head_put(head);
46 }
43} 47}
44 48
45struct vfsmount *proc_mnt; 49struct vfsmount *proc_mnt;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7ff..927cbd115e53 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
233 return; 233 return;
234 root = of_find_node_by_path("/"); 234 root = of_find_node_by_path("/");
235 if (root == NULL) { 235 if (root == NULL) {
236 printk(KERN_ERR "/proc/device-tree: can't find root\n"); 236 pr_debug("/proc/device-tree: can't find root\n");
237 return; 237 return;
238 } 238 }
239 proc_device_tree_add_node(root, proc_device_tree); 239 proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92a34ef..f50133c11c24 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -32,7 +32,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
32 ei->sysctl_entry = table; 32 ei->sysctl_entry = table;
33 33
34 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 34 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
35 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
36 inode->i_mode = table->mode; 35 inode->i_mode = table->mode;
37 if (!table->child) { 36 if (!table->child) {
38 inode->i_mode |= S_IFREG; 37 inode->i_mode |= S_IFREG;
@@ -408,15 +407,18 @@ static int proc_sys_compare(const struct dentry *parent,
408 const struct dentry *dentry, const struct inode *inode, 407 const struct dentry *dentry, const struct inode *inode,
409 unsigned int len, const char *str, const struct qstr *name) 408 unsigned int len, const char *str, const struct qstr *name)
410{ 409{
410 struct ctl_table_header *head;
411 /* Although proc doesn't have negative dentries, rcu-walk means 411 /* Although proc doesn't have negative dentries, rcu-walk means
412 * that inode here can be NULL */ 412 * that inode here can be NULL */
413 /* AV: can it, indeed? */
413 if (!inode) 414 if (!inode)
414 return 0; 415 return 1;
415 if (name->len != len) 416 if (name->len != len)
416 return 1; 417 return 1;
417 if (memcmp(name->name, str, len)) 418 if (memcmp(name->name, str, len))
418 return 1; 419 return 1;
419 return !sysctl_is_seen(PROC_I(inode)->sysctl); 420 head = rcu_dereference(PROC_I(inode)->sysctl);
421 return !head || !sysctl_is_seen(head);
420} 422}
421 423
422static const struct dentry_operations proc_sys_dentry_operations = { 424static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e2..1bba24bad820 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1593 struct inode *inode = dentry->d_inode; 1593 struct inode *inode = dentry->d_inode;
1594 int maxlen = *lenp; 1594 int maxlen = *lenp;
1595 1595
1596 if (maxlen < 3) 1596 if (need_parent && (maxlen < 5)) {
1597 *lenp = 5;
1597 return 255; 1598 return 255;
1599 } else if (maxlen < 3) {
1600 *lenp = 3;
1601 return 255;
1602 }
1598 1603
1599 data[0] = inode->i_ino; 1604 data[0] = inode->i_ino;
1600 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1605 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3eea859e6990..c77514bd5776 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2876,7 +2876,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2876 reiserfs_mounted_fs_count++; 2876 reiserfs_mounted_fs_count++;
2877 if (reiserfs_mounted_fs_count <= 1) { 2877 if (reiserfs_mounted_fs_count <= 1) {
2878 reiserfs_write_unlock(sb); 2878 reiserfs_write_unlock(sb);
2879 commit_wq = create_workqueue("reiserfs"); 2879 commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
2880 reiserfs_write_lock(sb); 2880 reiserfs_write_lock(sb);
2881 } 2881 }
2882 2882
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec3458..118662690cdf 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -593,7 +593,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
593 new_inode_init(inode, dir, mode); 593 new_inode_init(inode, dir, mode);
594 594
595 jbegin_count += reiserfs_cache_default_acl(dir); 595 jbegin_count += reiserfs_cache_default_acl(dir);
596 retval = reiserfs_security_init(dir, inode, &security); 596 retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
597 if (retval < 0) { 597 if (retval < 0) {
598 drop_new_inode(inode); 598 drop_new_inode(inode);
599 return retval; 599 return retval;
@@ -667,7 +667,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
667 new_inode_init(inode, dir, mode); 667 new_inode_init(inode, dir, mode);
668 668
669 jbegin_count += reiserfs_cache_default_acl(dir); 669 jbegin_count += reiserfs_cache_default_acl(dir);
670 retval = reiserfs_security_init(dir, inode, &security); 670 retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
671 if (retval < 0) { 671 if (retval < 0) {
672 drop_new_inode(inode); 672 drop_new_inode(inode);
673 return retval; 673 return retval;
@@ -747,7 +747,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
747 new_inode_init(inode, dir, mode); 747 new_inode_init(inode, dir, mode);
748 748
749 jbegin_count += reiserfs_cache_default_acl(dir); 749 jbegin_count += reiserfs_cache_default_acl(dir);
750 retval = reiserfs_security_init(dir, inode, &security); 750 retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
751 if (retval < 0) { 751 if (retval < 0) {
752 drop_new_inode(inode); 752 drop_new_inode(inode);
753 return retval; 753 return retval;
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, 771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
772 dentry, inode, &security); 772 dentry, inode, &security);
773 if (retval) { 773 if (retval) {
774 dir->i_nlink--; 774 DEC_DIR_INODE_NLINK(dir)
775 goto out_failed; 775 goto out_failed;
776 } 776 }
777 777
@@ -1032,7 +1032,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
1032 } 1032 }
1033 new_inode_init(inode, parent_dir, mode); 1033 new_inode_init(inode, parent_dir, mode);
1034 1034
1035 retval = reiserfs_security_init(parent_dir, inode, &security); 1035 retval = reiserfs_security_init(parent_dir, inode, &dentry->d_name,
1036 &security);
1036 if (retval < 0) { 1037 if (retval < 0) {
1037 drop_new_inode(inode); 1038 drop_new_inode(inode);
1038 return retval; 1039 return retval;
@@ -1122,10 +1123,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1122 reiserfs_write_unlock(dir->i_sb); 1123 reiserfs_write_unlock(dir->i_sb);
1123 return -EMLINK; 1124 return -EMLINK;
1124 } 1125 }
1125 if (inode->i_nlink == 0) {
1126 reiserfs_write_unlock(dir->i_sb);
1127 return -ENOENT;
1128 }
1129 1126
1130 /* inc before scheduling so reiserfs_unlink knows we are here */ 1127 /* inc before scheduling so reiserfs_unlink knows we are here */
1131 inc_nlink(inode); 1128 inc_nlink(inode);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e933644..5c11ca82b782 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
978 978
979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
980{ 980{
981 if (nd->flags & LOOKUP_RCU)
982 return -ECHILD;
983 return -EPERM; 981 return -EPERM;
984} 982}
985 983
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 237c6928d3c6..ef66c18a9332 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -54,6 +54,7 @@ static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
54 * of blocks needed for the transaction. If successful, reiserfs_security 54 * of blocks needed for the transaction. If successful, reiserfs_security
55 * must be released using reiserfs_security_free when the caller is done. */ 55 * must be released using reiserfs_security_free when the caller is done. */
56int reiserfs_security_init(struct inode *dir, struct inode *inode, 56int reiserfs_security_init(struct inode *dir, struct inode *inode,
57 const struct qstr *qstr,
57 struct reiserfs_security_handle *sec) 58 struct reiserfs_security_handle *sec)
58{ 59{
59 int blocks = 0; 60 int blocks = 0;
@@ -65,7 +66,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
65 if (IS_PRIVATE(dir)) 66 if (IS_PRIVATE(dir))
66 return 0; 67 return 0;
67 68
68 error = security_inode_init_security(inode, dir, &sec->name, 69 error = security_inode_init_security(inode, dir, qstr, &sec->name,
69 &sec->value, &sec->length); 70 &sec->value, &sec->length);
70 if (error) { 71 if (error) {
71 if (error == -EOPNOTSUPP) 72 if (error == -EOPNOTSUPP)
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b703..961039121cb8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
75 int error = -EINVAL; 75 int error = -EINVAL;
76 int lookup_flags = 0; 76 int lookup_flags = 0;
77 77
78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) 78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
79 AT_EMPTY_PATH)) != 0)
79 goto out; 80 goto out;
80 81
81 if (!(flag & AT_SYMLINK_NOFOLLOW)) 82 if (!(flag & AT_SYMLINK_NOFOLLOW))
82 lookup_flags |= LOOKUP_FOLLOW; 83 lookup_flags |= LOOKUP_FOLLOW;
83 if (flag & AT_NO_AUTOMOUNT) 84 if (flag & AT_NO_AUTOMOUNT)
84 lookup_flags |= LOOKUP_NO_AUTOMOUNT; 85 lookup_flags |= LOOKUP_NO_AUTOMOUNT;
86 if (flag & AT_EMPTY_PATH)
87 lookup_flags |= LOOKUP_EMPTY;
85 88
86 error = user_path_at(dfd, filename, lookup_flags, &path); 89 error = user_path_at(dfd, filename, lookup_flags, &path);
87 if (error) 90 if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
297 if (bufsiz <= 0) 300 if (bufsiz <= 0)
298 return -EINVAL; 301 return -EINVAL;
299 302
300 error = user_path_at(dfd, pathname, 0, &path); 303 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
301 if (!error) { 304 if (!error) {
302 struct inode *inode = path.dentry->d_inode; 305 struct inode *inode = path.dentry->d_inode;
303 306
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996b..8244924dec55 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
73} 73}
74EXPORT_SYMBOL(vfs_statfs); 74EXPORT_SYMBOL(vfs_statfs);
75 75
76static int do_statfs_native(struct path *path, struct statfs *buf) 76int user_statfs(const char __user *pathname, struct kstatfs *st)
77{ 77{
78 struct kstatfs st; 78 struct path path;
79 int retval; 79 int error = user_path(pathname, &path);
80 if (!error) {
81 error = vfs_statfs(&path, st);
82 path_put(&path);
83 }
84 return error;
85}
80 86
81 retval = vfs_statfs(path, &st); 87int fd_statfs(int fd, struct kstatfs *st)
82 if (retval) 88{
83 return retval; 89 struct file *file = fget(fd);
90 int error = -EBADF;
91 if (file) {
92 error = vfs_statfs(&file->f_path, st);
93 fput(file);
94 }
95 return error;
96}
84 97
85 if (sizeof(*buf) == sizeof(st)) 98static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
86 memcpy(buf, &st, sizeof(st)); 99{
100 struct statfs buf;
101
102 if (sizeof(buf) == sizeof(*st))
103 memcpy(&buf, st, sizeof(*st));
87 else { 104 else {
88 if (sizeof buf->f_blocks == 4) { 105 if (sizeof buf.f_blocks == 4) {
89 if ((st.f_blocks | st.f_bfree | st.f_bavail | 106 if ((st->f_blocks | st->f_bfree | st->f_bavail |
90 st.f_bsize | st.f_frsize) & 107 st->f_bsize | st->f_frsize) &
91 0xffffffff00000000ULL) 108 0xffffffff00000000ULL)
92 return -EOVERFLOW; 109 return -EOVERFLOW;
93 /* 110 /*
94 * f_files and f_ffree may be -1; it's okay to stuff 111 * f_files and f_ffree may be -1; it's okay to stuff
95 * that into 32 bits 112 * that into 32 bits
96 */ 113 */
97 if (st.f_files != -1 && 114 if (st->f_files != -1 &&
98 (st.f_files & 0xffffffff00000000ULL)) 115 (st->f_files & 0xffffffff00000000ULL))
99 return -EOVERFLOW; 116 return -EOVERFLOW;
100 if (st.f_ffree != -1 && 117 if (st->f_ffree != -1 &&
101 (st.f_ffree & 0xffffffff00000000ULL)) 118 (st->f_ffree & 0xffffffff00000000ULL))
102 return -EOVERFLOW; 119 return -EOVERFLOW;
103 } 120 }
104 121
105 buf->f_type = st.f_type; 122 buf.f_type = st->f_type;
106 buf->f_bsize = st.f_bsize; 123 buf.f_bsize = st->f_bsize;
107 buf->f_blocks = st.f_blocks; 124 buf.f_blocks = st->f_blocks;
108 buf->f_bfree = st.f_bfree; 125 buf.f_bfree = st->f_bfree;
109 buf->f_bavail = st.f_bavail; 126 buf.f_bavail = st->f_bavail;
110 buf->f_files = st.f_files; 127 buf.f_files = st->f_files;
111 buf->f_ffree = st.f_ffree; 128 buf.f_ffree = st->f_ffree;
112 buf->f_fsid = st.f_fsid; 129 buf.f_fsid = st->f_fsid;
113 buf->f_namelen = st.f_namelen; 130 buf.f_namelen = st->f_namelen;
114 buf->f_frsize = st.f_frsize; 131 buf.f_frsize = st->f_frsize;
115 buf->f_flags = st.f_flags; 132 buf.f_flags = st->f_flags;
116 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 133 memset(buf.f_spare, 0, sizeof(buf.f_spare));
117 } 134 }
135 if (copy_to_user(p, &buf, sizeof(buf)))
136 return -EFAULT;
118 return 0; 137 return 0;
119} 138}
120 139
121static int do_statfs64(struct path *path, struct statfs64 *buf) 140static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
122{ 141{
123 struct kstatfs st; 142 struct statfs64 buf;
124 int retval; 143 if (sizeof(buf) == sizeof(*st))
125 144 memcpy(&buf, st, sizeof(*st));
126 retval = vfs_statfs(path, &st);
127 if (retval)
128 return retval;
129
130 if (sizeof(*buf) == sizeof(st))
131 memcpy(buf, &st, sizeof(st));
132 else { 145 else {
133 buf->f_type = st.f_type; 146 buf.f_type = st->f_type;
134 buf->f_bsize = st.f_bsize; 147 buf.f_bsize = st->f_bsize;
135 buf->f_blocks = st.f_blocks; 148 buf.f_blocks = st->f_blocks;
136 buf->f_bfree = st.f_bfree; 149 buf.f_bfree = st->f_bfree;
137 buf->f_bavail = st.f_bavail; 150 buf.f_bavail = st->f_bavail;
138 buf->f_files = st.f_files; 151 buf.f_files = st->f_files;
139 buf->f_ffree = st.f_ffree; 152 buf.f_ffree = st->f_ffree;
140 buf->f_fsid = st.f_fsid; 153 buf.f_fsid = st->f_fsid;
141 buf->f_namelen = st.f_namelen; 154 buf.f_namelen = st->f_namelen;
142 buf->f_frsize = st.f_frsize; 155 buf.f_frsize = st->f_frsize;
143 buf->f_flags = st.f_flags; 156 buf.f_flags = st->f_flags;
144 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 157 memset(buf.f_spare, 0, sizeof(buf.f_spare));
145 } 158 }
159 if (copy_to_user(p, &buf, sizeof(buf)))
160 return -EFAULT;
146 return 0; 161 return 0;
147} 162}
148 163
149SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) 164SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
150{ 165{
151 struct path path; 166 struct kstatfs st;
152 int error; 167 int error = user_statfs(pathname, &st);
153 168 if (!error)
154 error = user_path(pathname, &path); 169 error = do_statfs_native(&st, buf);
155 if (!error) {
156 struct statfs tmp;
157 error = do_statfs_native(&path, &tmp);
158 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
159 error = -EFAULT;
160 path_put(&path);
161 }
162 return error; 170 return error;
163} 171}
164 172
165SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) 173SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
166{ 174{
167 struct path path; 175 struct kstatfs st;
168 long error; 176 int error;
169
170 if (sz != sizeof(*buf)) 177 if (sz != sizeof(*buf))
171 return -EINVAL; 178 return -EINVAL;
172 error = user_path(pathname, &path); 179 error = user_statfs(pathname, &st);
173 if (!error) { 180 if (!error)
174 struct statfs64 tmp; 181 error = do_statfs64(&st, buf);
175 error = do_statfs64(&path, &tmp);
176 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
177 error = -EFAULT;
178 path_put(&path);
179 }
180 return error; 182 return error;
181} 183}
182 184
183SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) 185SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
184{ 186{
185 struct file *file; 187 struct kstatfs st;
186 struct statfs tmp; 188 int error = fd_statfs(fd, &st);
187 int error; 189 if (!error)
188 190 error = do_statfs_native(&st, buf);
189 error = -EBADF;
190 file = fget(fd);
191 if (!file)
192 goto out;
193 error = do_statfs_native(&file->f_path, &tmp);
194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
195 error = -EFAULT;
196 fput(file);
197out:
198 return error; 191 return error;
199} 192}
200 193
201SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) 194SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
202{ 195{
203 struct file *file; 196 struct kstatfs st;
204 struct statfs64 tmp;
205 int error; 197 int error;
206 198
207 if (sz != sizeof(*buf)) 199 if (sz != sizeof(*buf))
208 return -EINVAL; 200 return -EINVAL;
209 201
210 error = -EBADF; 202 error = fd_statfs(fd, &st);
211 file = fget(fd); 203 if (!error)
212 if (!file) 204 error = do_statfs64(&st, buf);
213 goto out;
214 error = do_statfs64(&file->f_path, &tmp);
215 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
216 error = -EFAULT;
217 fput(file);
218out:
219 return error; 205 return error;
220} 206}
221 207
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c26..e474fbcf8bde 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
245 new_de = sysv_find_entry(new_dentry, &new_page); 245 new_de = sysv_find_entry(new_dentry, &new_page);
246 if (!new_de) 246 if (!new_de)
247 goto out_dir; 247 goto out_dir;
248 inode_inc_link_count(old_inode);
249 sysv_set_link(new_de, new_page, old_inode); 248 sysv_set_link(new_de, new_page, old_inode);
250 new_inode->i_ctime = CURRENT_TIME_SEC; 249 new_inode->i_ctime = CURRENT_TIME_SEC;
251 if (dir_de) 250 if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
257 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max) 256 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
258 goto out_dir; 257 goto out_dir;
259 } 258 }
260 inode_inc_link_count(old_inode);
261 err = sysv_add_link(new_dentry, old_inode); 259 err = sysv_add_link(new_dentry, old_inode);
262 if (err) { 260 if (err)
263 inode_dec_link_count(old_inode);
264 goto out_dir; 261 goto out_dir;
265 }
266 if (dir_de) 262 if (dir_de)
267 inode_inc_link_count(new_dir); 263 inode_inc_link_count(new_dir);
268 } 264 }
269 265
270 sysv_delete_entry(old_de, old_page); 266 sysv_delete_entry(old_de, old_page);
271 inode_dec_link_count(old_inode); 267 mark_inode_dirty(old_inode);
272 268
273 if (dir_de) { 269 if (dir_de) {
274 sysv_set_link(dir_de, dir_page, new_dir); 270 sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7f..7217d67a80a6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 522 ubifs_assert(mutex_is_locked(&dir->i_mutex));
523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 523 ubifs_assert(mutex_is_locked(&inode->i_mutex));
524 524
525 /*
526 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
527 * otherwise has the potential to corrupt the orphan inode list.
528 *
529 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
530 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
531 * lock 'dirA->i_mutex', so this is possible. Both of the functions
532 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
533 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
534 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
535 * to the list of orphans. After this, 'vfs_link()' will link
536 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
537 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
538 * to the list of orphans.
539 */
540 if (inode->i_nlink == 0)
541 return -ENOENT;
542
543 err = dbg_check_synced_i_size(inode); 525 err = dbg_check_synced_i_size(inode);
544 if (err) 526 if (err)
545 return err; 527 return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d2..f1dce848ef96 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
32#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 33#include <linux/exportfs.h>
34 34
35enum { UDF_MAX_LINKS = 0xffff };
36
35static inline int udf_match(int len1, const unsigned char *name1, int len2, 37static inline int udf_match(int len1, const unsigned char *name1, int len2,
36 const unsigned char *name2) 38 const unsigned char *name2)
37{ 39{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
650 struct udf_inode_info *iinfo; 652 struct udf_inode_info *iinfo;
651 653
652 err = -EMLINK; 654 err = -EMLINK;
653 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) 655 if (dir->i_nlink >= UDF_MAX_LINKS)
654 goto out; 656 goto out;
655 657
656 err = -EIO; 658 err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1034 struct fileIdentDesc cfi, *fi; 1036 struct fileIdentDesc cfi, *fi;
1035 int err; 1037 int err;
1036 1038
1037 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { 1039 if (inode->i_nlink >= UDF_MAX_LINKS)
1038 return -EMLINK; 1040 return -EMLINK;
1039 }
1040 1041
1041 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1042 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1042 if (!fi) { 1043 if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1131 goto end_rename; 1132 goto end_rename;
1132 1133
1133 retval = -EMLINK; 1134 retval = -EMLINK;
1134 if (!new_inode && 1135 if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
1135 new_dir->i_nlink >=
1136 (256 << sizeof(new_dir->i_nlink)) - 1)
1137 goto end_rename; 1136 goto end_rename;
1138 } 1137 }
1139 if (!nfi) { 1138 if (!nfi) {
@@ -1287,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
1287 struct fid *fid = (struct fid *)fh; 1286 struct fid *fid = (struct fid *)fh;
1288 int type = FILEID_UDF_WITHOUT_PARENT; 1287 int type = FILEID_UDF_WITHOUT_PARENT;
1289 1288
1290 if (len < 3 || (connectable && len < 5)) 1289 if (connectable && (len < 5)) {
1290 *lenp = 5;
1291 return 255; 1291 return 255;
1292 } else if (len < 3) {
1293 *lenp = 3;
1294 return 255;
1295 }
1292 1296
1293 *lenp = 3; 1297 *lenp = 3;
1294 fid->udf.block = location.logicalBlockNum; 1298 fid->udf.block = location.logicalBlockNum;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9e4437..d6f681535eb8 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -306,7 +306,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page); 306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
307 if (!new_de) 307 if (!new_de)
308 goto out_dir; 308 goto out_dir;
309 inode_inc_link_count(old_inode);
310 ufs_set_link(new_dir, new_de, new_page, old_inode); 309 ufs_set_link(new_dir, new_de, new_page, old_inode);
311 new_inode->i_ctime = CURRENT_TIME_SEC; 310 new_inode->i_ctime = CURRENT_TIME_SEC;
312 if (dir_de) 311 if (dir_de)
@@ -318,12 +317,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
318 if (new_dir->i_nlink >= UFS_LINK_MAX) 317 if (new_dir->i_nlink >= UFS_LINK_MAX)
319 goto out_dir; 318 goto out_dir;
320 } 319 }
321 inode_inc_link_count(old_inode);
322 err = ufs_add_link(new_dentry, old_inode); 320 err = ufs_add_link(new_dentry, old_inode);
323 if (err) { 321 if (err)
324 inode_dec_link_count(old_inode);
325 goto out_dir; 322 goto out_dir;
326 }
327 if (dir_de) 323 if (dir_de)
328 inode_inc_link_count(new_dir); 324 inode_inc_link_count(new_dir);
329 } 325 }
@@ -331,12 +327,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
331 /* 327 /*
332 * Like most other Unix systems, set the ctime for inodes on a 328 * Like most other Unix systems, set the ctime for inodes on a
333 * rename. 329 * rename.
334 * inode_dec_link_count() will mark the inode dirty.
335 */ 330 */
336 old_inode->i_ctime = CURRENT_TIME_SEC; 331 old_inode->i_ctime = CURRENT_TIME_SEC;
337 332
338 ufs_delete_entry(old_dir, old_de, old_page); 333 ufs_delete_entry(old_dir, old_de, old_page);
339 inode_dec_link_count(old_inode); 334 mark_inode_dirty(old_inode);
340 335
341 if (dir_de) { 336 if (dir_de) {
342 ufs_set_link(old_inode, dir_de, dir_page, new_dir); 337 ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ac1c7e8378dd..f83a4c830a65 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -2022,11 +2022,12 @@ xfs_buf_init(void)
2022 if (!xfslogd_workqueue) 2022 if (!xfslogd_workqueue)
2023 goto out_free_buf_zone; 2023 goto out_free_buf_zone;
2024 2024
2025 xfsdatad_workqueue = create_workqueue("xfsdatad"); 2025 xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
2026 if (!xfsdatad_workqueue) 2026 if (!xfsdatad_workqueue)
2027 goto out_destroy_xfslogd_workqueue; 2027 goto out_destroy_xfslogd_workqueue;
2028 2028
2029 xfsconvertd_workqueue = create_workqueue("xfsconvertd"); 2029 xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
2030 WQ_MEM_RECLAIM, 1);
2030 if (!xfsconvertd_workqueue) 2031 if (!xfsconvertd_workqueue)
2031 goto out_destroy_xfsdatad_workqueue; 2032 goto out_destroy_xfsdatad_workqueue;
2032 2033
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e5..d61611c88012 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
152 152
153 if (!capable(CAP_SYS_ADMIN)) 153 if (!capable(CAP_SYS_ADMIN))
154 return -XFS_ERROR(EPERM); 154 return -XFS_ERROR(EPERM);
155 if (!blk_queue_discard(q))
156 return -XFS_ERROR(EOPNOTSUPP);
155 if (copy_from_user(&range, urange, sizeof(range))) 157 if (copy_from_user(&range, urange, sizeof(range)))
156 return -XFS_ERROR(EFAULT); 158 return -XFS_ERROR(EFAULT);
157 159
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fdd..f4f878fc0083 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
89 * seven combinations work. The real answer is "don't use v2". 89 * seven combinations work. The real answer is "don't use v2".
90 */ 90 */
91 len = xfs_fileid_length(fileid_type); 91 len = xfs_fileid_length(fileid_type);
92 if (*max_len < len) 92 if (*max_len < len) {
93 *max_len = len;
93 return 255; 94 return 255;
95 }
94 *max_len = len; 96 *max_len = len;
95 97
96 switch (fileid_type) { 98 switch (fileid_type) {
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f5e2a19e0f8e..0ca0e3c024d7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
695 xfs_mount_t *mp, 695 xfs_mount_t *mp,
696 void __user *arg) 696 void __user *arg)
697{ 697{
698 xfs_fsop_geom_v1_t fsgeo; 698 xfs_fsop_geom_t fsgeo;
699 int error; 699 int error;
700 700
701 error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); 701 error = xfs_fs_geometry(mp, &fsgeo, 3);
702 if (error) 702 if (error)
703 return -error; 703 return -error;
704 704
705 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) 705 /*
706 * Caller should have passed an argument of type
707 * xfs_fsop_geom_v1_t. This is a proper subset of the
708 * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
709 */
710 if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
706 return -XFS_ERROR(EFAULT); 711 return -XFS_ERROR(EFAULT);
707 return 0; 712 return 0;
708} 713}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index bd5727852fd6..9ff7fc603d2f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -102,7 +102,8 @@ xfs_mark_inode_dirty(
102STATIC int 102STATIC int
103xfs_init_security( 103xfs_init_security(
104 struct inode *inode, 104 struct inode *inode,
105 struct inode *dir) 105 struct inode *dir,
106 const struct qstr *qstr)
106{ 107{
107 struct xfs_inode *ip = XFS_I(inode); 108 struct xfs_inode *ip = XFS_I(inode);
108 size_t length; 109 size_t length;
@@ -110,7 +111,7 @@ xfs_init_security(
110 unsigned char *name; 111 unsigned char *name;
111 int error; 112 int error;
112 113
113 error = security_inode_init_security(inode, dir, (char **)&name, 114 error = security_inode_init_security(inode, dir, qstr, (char **)&name,
114 &value, &length); 115 &value, &length);
115 if (error) { 116 if (error) {
116 if (error == -EOPNOTSUPP) 117 if (error == -EOPNOTSUPP)
@@ -194,7 +195,7 @@ xfs_vn_mknod(
194 195
195 inode = VFS_I(ip); 196 inode = VFS_I(ip);
196 197
197 error = xfs_init_security(inode, dir); 198 error = xfs_init_security(inode, dir, &dentry->d_name);
198 if (unlikely(error)) 199 if (unlikely(error))
199 goto out_cleanup_inode; 200 goto out_cleanup_inode;
200 201
@@ -367,7 +368,7 @@ xfs_vn_symlink(
367 368
368 inode = VFS_I(cip); 369 inode = VFS_I(cip);
369 370
370 error = xfs_init_security(inode, dir); 371 error = xfs_init_security(inode, dir, &dentry->d_name);
371 if (unlikely(error)) 372 if (unlikely(error))
372 goto out_cleanup_inode; 373 goto out_cleanup_inode;
373 374
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d2..85668efb3e3e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
53 xfs_fsop_geom_t *geo, 53 xfs_fsop_geom_t *geo,
54 int new_version) 54 int new_version)
55{ 55{
56
57 memset(geo, 0, sizeof(*geo));
58
56 geo->blocksize = mp->m_sb.sb_blocksize; 59 geo->blocksize = mp->m_sb.sb_blocksize;
57 geo->rtextsize = mp->m_sb.sb_rextsize; 60 geo->rtextsize = mp->m_sb.sb_rextsize;
58 geo->agblocks = mp->m_sb.sb_agblocks; 61 geo->agblocks = mp->m_sb.sb_agblocks;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index edfa178bafb6..4aff56395732 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -309,7 +309,7 @@ xfs_mru_cache_init(void)
309 if (!xfs_mru_elem_zone) 309 if (!xfs_mru_elem_zone)
310 goto out; 310 goto out;
311 311
312 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); 312 xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
313 if (!xfs_mru_reap_wq) 313 if (!xfs_mru_reap_wq)
314 goto out_destroy_mru_elem_zone; 314 goto out_destroy_mru_elem_zone;
315 315
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 78ca429929f7..ff103ba96b78 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -381,7 +381,7 @@ struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
381int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state); 381int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
382int acpi_disable_wakeup_device_power(struct acpi_device *dev); 382int acpi_disable_wakeup_device_power(struct acpi_device *dev);
383 383
384#ifdef CONFIG_PM_OPS 384#ifdef CONFIG_PM
385int acpi_pm_device_sleep_state(struct device *, int *); 385int acpi_pm_device_sleep_state(struct device *, int *);
386#else 386#else
387static inline int acpi_pm_device_sleep_state(struct device *d, int *p) 387static inline int acpi_pm_device_sleep_state(struct device *d, int *p)
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 2bcc5c7c22a6..61e03dd7939e 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -30,6 +30,9 @@ typedef u64 cputime64_t;
30#define cputime64_to_jiffies64(__ct) (__ct) 30#define cputime64_to_jiffies64(__ct) (__ct)
31#define jiffies64_to_cputime64(__jif) (__jif) 31#define jiffies64_to_cputime64(__jif) (__jif)
32#define cputime_to_cputime64(__ct) ((u64) __ct) 32#define cputime_to_cputime64(__ct) ((u64) __ct)
33#define cputime64_gt(__a, __b) ((__a) > (__b))
34
35#define nsecs_to_cputime64(__ct) nsecs_to_jiffies64(__ct)
33 36
34 37
35/* 38/*
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 0fc16e3f0bfc..84793c7025e2 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -80,6 +80,10 @@
80#define O_SYNC (__O_SYNC|O_DSYNC) 80#define O_SYNC (__O_SYNC|O_DSYNC)
81#endif 81#endif
82 82
83#ifndef O_PATH
84#define O_PATH 010000000
85#endif
86
83#ifndef O_NDELAY 87#ifndef O_NDELAY
84#define O_NDELAY O_NONBLOCK 88#define O_NDELAY O_NONBLOCK
85#endif 89#endif
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 3c2344f48136..01f227e14254 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -6,7 +6,7 @@
6#include <asm/errno.h> 6#include <asm/errno.h>
7 7
8static inline int 8static inline int
9futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 9futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
10{ 10{
11 int op = (encoded_op >> 28) & 7; 11 int op = (encoded_op >> 28) & 7;
12 int cmp = (encoded_op >> 24) & 15; 12 int cmp = (encoded_op >> 24) & 15;
@@ -16,7 +16,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
16 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 16 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
17 oparg = 1 << oparg; 17 oparg = 1 << oparg;
18 18
19 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 19 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
20 return -EFAULT; 20 return -EFAULT;
21 21
22 pagefault_disable(); 22 pagefault_disable();
@@ -48,7 +48,8 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
48} 48}
49 49
50static inline int 50static inline int
51futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 51futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
52 u32 oldval, u32 newval)
52{ 53{
53 return -ENOSYS; 54 return -ENOSYS;
54} 55}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 31b6188df221..b4bfe338ea0e 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -4,6 +4,8 @@
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5#ifdef CONFIG_MMU 5#ifdef CONFIG_MMU
6 6
7#include <linux/mm_types.h>
8
7#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 9#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
8extern int ptep_set_access_flags(struct vm_area_struct *vma, 10extern int ptep_set_access_flags(struct vm_area_struct *vma,
9 unsigned long address, pte_t *ptep, 11 unsigned long address, pte_t *ptep,
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index b3bfabc258f3..c1a1216e29ce 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -11,6 +11,7 @@ extern char _sinittext[], _einittext[];
11extern char _end[]; 11extern char _end[];
12extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; 12extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
13extern char __kprobes_text_start[], __kprobes_text_end[]; 13extern char __kprobes_text_start[], __kprobes_text_end[];
14extern char __entry_text_start[], __entry_text_end[];
14extern char __initdata_begin[], __initdata_end[]; 15extern char __initdata_begin[], __initdata_end[];
15extern char __start_rodata[], __end_rodata[]; 16extern char __start_rodata[], __end_rodata[];
16 17
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index b969770196c2..57af0338d270 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -646,9 +646,13 @@ __SYSCALL(__NR_prlimit64, sys_prlimit64)
646__SYSCALL(__NR_fanotify_init, sys_fanotify_init) 646__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
647#define __NR_fanotify_mark 263 647#define __NR_fanotify_mark 263
648__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) 648__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
649#define __NR_name_to_handle_at 264
650__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
651#define __NR_open_by_handle_at 265
652__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
649 653
650#undef __NR_syscalls 654#undef __NR_syscalls
651#define __NR_syscalls 264 655#define __NR_syscalls 266
652 656
653/* 657/*
654 * All syscalls below here should go away really, 658 * All syscalls below here should go away really,
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index fe77e3395b40..32c45e5fe0ab 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -15,7 +15,7 @@
15 * HEAD_TEXT_SECTION 15 * HEAD_TEXT_SECTION
16 * INIT_TEXT_SECTION(PAGE_SIZE) 16 * INIT_TEXT_SECTION(PAGE_SIZE)
17 * INIT_DATA_SECTION(...) 17 * INIT_DATA_SECTION(...)
18 * PERCPU(PAGE_SIZE) 18 * PERCPU(CACHELINE_SIZE, PAGE_SIZE)
19 * __init_end = .; 19 * __init_end = .;
20 * 20 *
21 * _stext = .; 21 * _stext = .;
@@ -424,6 +424,12 @@
424 *(.kprobes.text) \ 424 *(.kprobes.text) \
425 VMLINUX_SYMBOL(__kprobes_text_end) = .; 425 VMLINUX_SYMBOL(__kprobes_text_end) = .;
426 426
427#define ENTRY_TEXT \
428 ALIGN_FUNCTION(); \
429 VMLINUX_SYMBOL(__entry_text_start) = .; \
430 *(.entry.text) \
431 VMLINUX_SYMBOL(__entry_text_end) = .;
432
427#ifdef CONFIG_FUNCTION_GRAPH_TRACER 433#ifdef CONFIG_FUNCTION_GRAPH_TRACER
428#define IRQENTRY_TEXT \ 434#define IRQENTRY_TEXT \
429 ALIGN_FUNCTION(); \ 435 ALIGN_FUNCTION(); \
@@ -683,13 +689,18 @@
683 689
684/** 690/**
685 * PERCPU_VADDR - define output section for percpu area 691 * PERCPU_VADDR - define output section for percpu area
692 * @cacheline: cacheline size
686 * @vaddr: explicit base address (optional) 693 * @vaddr: explicit base address (optional)
687 * @phdr: destination PHDR (optional) 694 * @phdr: destination PHDR (optional)
688 * 695 *
689 * Macro which expands to output section for percpu area. If @vaddr 696 * Macro which expands to output section for percpu area.
690 * is not blank, it specifies explicit base address and all percpu 697 *
691 * symbols will be offset from the given address. If blank, @vaddr 698 * @cacheline is used to align subsections to avoid false cacheline
692 * always equals @laddr + LOAD_OFFSET. 699 * sharing between subsections for different purposes.
700 *
701 * If @vaddr is not blank, it specifies explicit base address and all
702 * percpu symbols will be offset from the given address. If blank,
703 * @vaddr always equals @laddr + LOAD_OFFSET.
693 * 704 *
694 * @phdr defines the output PHDR to use if not blank. Be warned that 705 * @phdr defines the output PHDR to use if not blank. Be warned that
695 * output PHDR is sticky. If @phdr is specified, the next output 706 * output PHDR is sticky. If @phdr is specified, the next output
@@ -700,7 +711,7 @@
700 * If there is no need to put the percpu section at a predetermined 711 * If there is no need to put the percpu section at a predetermined
701 * address, use PERCPU(). 712 * address, use PERCPU().
702 */ 713 */
703#define PERCPU_VADDR(vaddr, phdr) \ 714#define PERCPU_VADDR(cacheline, vaddr, phdr) \
704 VMLINUX_SYMBOL(__per_cpu_load) = .; \ 715 VMLINUX_SYMBOL(__per_cpu_load) = .; \
705 .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ 716 .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
706 - LOAD_OFFSET) { \ 717 - LOAD_OFFSET) { \
@@ -708,7 +719,9 @@
708 *(.data..percpu..first) \ 719 *(.data..percpu..first) \
709 . = ALIGN(PAGE_SIZE); \ 720 . = ALIGN(PAGE_SIZE); \
710 *(.data..percpu..page_aligned) \ 721 *(.data..percpu..page_aligned) \
722 . = ALIGN(cacheline); \
711 *(.data..percpu..readmostly) \ 723 *(.data..percpu..readmostly) \
724 . = ALIGN(cacheline); \
712 *(.data..percpu) \ 725 *(.data..percpu) \
713 *(.data..percpu..shared_aligned) \ 726 *(.data..percpu..shared_aligned) \
714 VMLINUX_SYMBOL(__per_cpu_end) = .; \ 727 VMLINUX_SYMBOL(__per_cpu_end) = .; \
@@ -717,18 +730,18 @@
717 730
718/** 731/**
719 * PERCPU - define output section for percpu area, simple version 732 * PERCPU - define output section for percpu area, simple version
733 * @cacheline: cacheline size
720 * @align: required alignment 734 * @align: required alignment
721 * 735 *
722 * Align to @align and outputs output section for percpu area. This 736 * Align to @align and outputs output section for percpu area. This macro
723 * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and 737 * doesn't manipulate @vaddr or @phdr and __per_cpu_load and
724 * __per_cpu_start will be identical. 738 * __per_cpu_start will be identical.
725 * 739 *
726 * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except 740 * This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,)
727 * that __per_cpu_load is defined as a relative symbol against 741 * except that __per_cpu_load is defined as a relative symbol against
728 * .data..percpu which is required for relocatable x86_32 742 * .data..percpu which is required for relocatable x86_32 configuration.
729 * configuration.
730 */ 743 */
731#define PERCPU(align) \ 744#define PERCPU(cacheline, align) \
732 . = ALIGN(align); \ 745 . = ALIGN(align); \
733 .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ 746 .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \
734 VMLINUX_SYMBOL(__per_cpu_load) = .; \ 747 VMLINUX_SYMBOL(__per_cpu_load) = .; \
@@ -736,7 +749,9 @@
736 *(.data..percpu..first) \ 749 *(.data..percpu..first) \
737 . = ALIGN(PAGE_SIZE); \ 750 . = ALIGN(PAGE_SIZE); \
738 *(.data..percpu..page_aligned) \ 751 *(.data..percpu..page_aligned) \
752 . = ALIGN(cacheline); \
739 *(.data..percpu..readmostly) \ 753 *(.data..percpu..readmostly) \
754 . = ALIGN(cacheline); \
740 *(.data..percpu) \ 755 *(.data..percpu) \
741 *(.data..percpu..shared_aligned) \ 756 *(.data..percpu..shared_aligned) \
742 VMLINUX_SYMBOL(__per_cpu_end) = .; \ 757 VMLINUX_SYMBOL(__per_cpu_end) = .; \
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index fe29aadb129d..348843b80150 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1101,7 +1101,7 @@ struct drm_device {
1101 struct platform_device *platformdev; /**< Platform device struture */ 1101 struct platform_device *platformdev; /**< Platform device struture */
1102 1102
1103 struct drm_sg_mem *sg; /**< Scatter gather memory */ 1103 struct drm_sg_mem *sg; /**< Scatter gather memory */
1104 int num_crtcs; /**< Number of CRTCs on this device */ 1104 unsigned int num_crtcs; /**< Number of CRTCs on this device */
1105 void *dev_private; /**< device private data */ 1105 void *dev_private; /**< device private data */
1106 void *mm_private; 1106 void *mm_private;
1107 struct address_space *dev_mapping; 1107 struct address_space *dev_mapping;
diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index 5cb86c307f5d..fc4875433817 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -99,7 +99,6 @@ struct rxrpc_key_token {
99 * structure of raw payloads passed to add_key() or instantiate key 99 * structure of raw payloads passed to add_key() or instantiate key
100 */ 100 */
101struct rxrpc_key_data_v1 { 101struct rxrpc_key_data_v1 {
102 u32 kif_version; /* 1 */
103 u16 security_index; 102 u16 security_index;
104 u16 ticket_length; 103 u16 ticket_length;
105 u32 expiry; /* time_t */ 104 u32 expiry; /* time_t */
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 0c4929fa34d3..32df2b6ef0e0 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -89,6 +89,7 @@ enum {
89 ATA_ID_SPG = 98, 89 ATA_ID_SPG = 98,
90 ATA_ID_LBA_CAPACITY_2 = 100, 90 ATA_ID_LBA_CAPACITY_2 = 100,
91 ATA_ID_SECTOR_SIZE = 106, 91 ATA_ID_SECTOR_SIZE = 106,
92 ATA_ID_WWN = 108,
92 ATA_ID_LOGICAL_SECTOR_SIZE = 117, /* and 118 */ 93 ATA_ID_LOGICAL_SECTOR_SIZE = 117, /* and 118 */
93 ATA_ID_LAST_LUN = 126, 94 ATA_ID_LAST_LUN = 126,
94 ATA_ID_DLF = 128, 95 ATA_ID_DLF = 128,
@@ -103,6 +104,7 @@ enum {
103 ATA_ID_SERNO_LEN = 20, 104 ATA_ID_SERNO_LEN = 20,
104 ATA_ID_FW_REV_LEN = 8, 105 ATA_ID_FW_REV_LEN = 8,
105 ATA_ID_PROD_LEN = 40, 106 ATA_ID_PROD_LEN = 40,
107 ATA_ID_WWN_LEN = 8,
106 108
107 ATA_PCI_CTL_OFS = 2, 109 ATA_PCI_CTL_OFS = 2,
108 110
@@ -598,42 +600,42 @@ static inline bool ata_id_has_dipm(const u16 *id)
598} 600}
599 601
600 602
601static inline int ata_id_has_fua(const u16 *id) 603static inline bool ata_id_has_fua(const u16 *id)
602{ 604{
603 if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000) 605 if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000)
604 return 0; 606 return false;
605 return id[ATA_ID_CFSSE] & (1 << 6); 607 return id[ATA_ID_CFSSE] & (1 << 6);
606} 608}
607 609
608static inline int ata_id_has_flush(const u16 *id) 610static inline bool ata_id_has_flush(const u16 *id)
609{ 611{
610 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) 612 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
611 return 0; 613 return false;
612 return id[ATA_ID_COMMAND_SET_2] & (1 << 12); 614 return id[ATA_ID_COMMAND_SET_2] & (1 << 12);
613} 615}
614 616
615static inline int ata_id_flush_enabled(const u16 *id) 617static inline bool ata_id_flush_enabled(const u16 *id)
616{ 618{
617 if (ata_id_has_flush(id) == 0) 619 if (ata_id_has_flush(id) == 0)
618 return 0; 620 return false;
619 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) 621 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
620 return 0; 622 return false;
621 return id[ATA_ID_CFS_ENABLE_2] & (1 << 12); 623 return id[ATA_ID_CFS_ENABLE_2] & (1 << 12);
622} 624}
623 625
624static inline int ata_id_has_flush_ext(const u16 *id) 626static inline bool ata_id_has_flush_ext(const u16 *id)
625{ 627{
626 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) 628 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
627 return 0; 629 return false;
628 return id[ATA_ID_COMMAND_SET_2] & (1 << 13); 630 return id[ATA_ID_COMMAND_SET_2] & (1 << 13);
629} 631}
630 632
631static inline int ata_id_flush_ext_enabled(const u16 *id) 633static inline bool ata_id_flush_ext_enabled(const u16 *id)
632{ 634{
633 if (ata_id_has_flush_ext(id) == 0) 635 if (ata_id_has_flush_ext(id) == 0)
634 return 0; 636 return false;
635 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) 637 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
636 return 0; 638 return false;
637 /* 639 /*
638 * some Maxtor disks have bit 13 defined incorrectly 640 * some Maxtor disks have bit 13 defined incorrectly
639 * so check bit 10 too 641 * so check bit 10 too
@@ -686,64 +688,64 @@ static inline u16 ata_id_logical_sector_offset(const u16 *id,
686 return 0; 688 return 0;
687} 689}
688 690
689static inline int ata_id_has_lba48(const u16 *id) 691static inline bool ata_id_has_lba48(const u16 *id)
690{ 692{
691 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) 693 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
692 return 0; 694 return false;
693 if (!ata_id_u64(id, ATA_ID_LBA_CAPACITY_2)) 695 if (!ata_id_u64(id, ATA_ID_LBA_CAPACITY_2))
694 return 0; 696 return false;
695 return id[ATA_ID_COMMAND_SET_2] & (1 << 10); 697 return id[ATA_ID_COMMAND_SET_2] & (1 << 10);
696} 698}
697 699
698static inline int ata_id_lba48_enabled(const u16 *id) 700static inline bool ata_id_lba48_enabled(const u16 *id)
699{ 701{
700 if (ata_id_has_lba48(id) == 0) 702 if (ata_id_has_lba48(id) == 0)
701 return 0; 703 return false;
702 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) 704 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
703 return 0; 705 return false;
704 return id[ATA_ID_CFS_ENABLE_2] & (1 << 10); 706 return id[ATA_ID_CFS_ENABLE_2] & (1 << 10);
705} 707}
706 708
707static inline int ata_id_hpa_enabled(const u16 *id) 709static inline bool ata_id_hpa_enabled(const u16 *id)
708{ 710{
709 /* Yes children, word 83 valid bits cover word 82 data */ 711 /* Yes children, word 83 valid bits cover word 82 data */
710 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) 712 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
711 return 0; 713 return false;
712 /* And 87 covers 85-87 */ 714 /* And 87 covers 85-87 */
713 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) 715 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
714 return 0; 716 return false;
715 /* Check command sets enabled as well as supported */ 717 /* Check command sets enabled as well as supported */
716 if ((id[ATA_ID_CFS_ENABLE_1] & (1 << 10)) == 0) 718 if ((id[ATA_ID_CFS_ENABLE_1] & (1 << 10)) == 0)
717 return 0; 719 return false;
718 return id[ATA_ID_COMMAND_SET_1] & (1 << 10); 720 return id[ATA_ID_COMMAND_SET_1] & (1 << 10);
719} 721}
720 722
721static inline int ata_id_has_wcache(const u16 *id) 723static inline bool ata_id_has_wcache(const u16 *id)
722{ 724{
723 /* Yes children, word 83 valid bits cover word 82 data */ 725 /* Yes children, word 83 valid bits cover word 82 data */
724 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) 726 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
725 return 0; 727 return false;
726 return id[ATA_ID_COMMAND_SET_1] & (1 << 5); 728 return id[ATA_ID_COMMAND_SET_1] & (1 << 5);
727} 729}
728 730
729static inline int ata_id_has_pm(const u16 *id) 731static inline bool ata_id_has_pm(const u16 *id)
730{ 732{
731 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) 733 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
732 return 0; 734 return false;
733 return id[ATA_ID_COMMAND_SET_1] & (1 << 3); 735 return id[ATA_ID_COMMAND_SET_1] & (1 << 3);
734} 736}
735 737
736static inline int ata_id_rahead_enabled(const u16 *id) 738static inline bool ata_id_rahead_enabled(const u16 *id)
737{ 739{
738 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) 740 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
739 return 0; 741 return false;
740 return id[ATA_ID_CFS_ENABLE_1] & (1 << 6); 742 return id[ATA_ID_CFS_ENABLE_1] & (1 << 6);
741} 743}
742 744
743static inline int ata_id_wcache_enabled(const u16 *id) 745static inline bool ata_id_wcache_enabled(const u16 *id)
744{ 746{
745 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) 747 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
746 return 0; 748 return false;
747 return id[ATA_ID_CFS_ENABLE_1] & (1 << 5); 749 return id[ATA_ID_CFS_ENABLE_1] & (1 << 5);
748} 750}
749 751
@@ -773,7 +775,7 @@ static inline unsigned int ata_id_major_version(const u16 *id)
773 return mver; 775 return mver;
774} 776}
775 777
776static inline int ata_id_is_sata(const u16 *id) 778static inline bool ata_id_is_sata(const u16 *id)
777{ 779{
778 /* 780 /*
779 * See if word 93 is 0 AND drive is at least ATA-5 compatible 781 * See if word 93 is 0 AND drive is at least ATA-5 compatible
@@ -782,37 +784,40 @@ static inline int ata_id_is_sata(const u16 *id)
782 * 0x0000 and 0xffff along with the earlier ATA revisions... 784 * 0x0000 and 0xffff along with the earlier ATA revisions...
783 */ 785 */
784 if (id[ATA_ID_HW_CONFIG] == 0 && (short)id[ATA_ID_MAJOR_VER] >= 0x0020) 786 if (id[ATA_ID_HW_CONFIG] == 0 && (short)id[ATA_ID_MAJOR_VER] >= 0x0020)
785 return 1; 787 return true;
786 return 0; 788 return false;
787} 789}
788 790
789static inline int ata_id_has_tpm(const u16 *id) 791static inline bool ata_id_has_tpm(const u16 *id)
790{ 792{
791 /* The TPM bits are only valid on ATA8 */ 793 /* The TPM bits are only valid on ATA8 */
792 if (ata_id_major_version(id) < 8) 794 if (ata_id_major_version(id) < 8)
793 return 0; 795 return false;
794 if ((id[48] & 0xC000) != 0x4000) 796 if ((id[48] & 0xC000) != 0x4000)
795 return 0; 797 return false;
796 return id[48] & (1 << 0); 798 return id[48] & (1 << 0);
797} 799}
798 800
799static inline int ata_id_has_dword_io(const u16 *id) 801static inline bool ata_id_has_dword_io(const u16 *id)
800{ 802{
801 /* ATA 8 reuses this flag for "trusted" computing */ 803 /* ATA 8 reuses this flag for "trusted" computing */
802 if (ata_id_major_version(id) > 7) 804 if (ata_id_major_version(id) > 7)
803 return 0; 805 return false;
804 if (id[ATA_ID_DWORD_IO] & (1 << 0)) 806 return id[ATA_ID_DWORD_IO] & (1 << 0);
805 return 1;
806 return 0;
807} 807}
808 808
809static inline int ata_id_has_unload(const u16 *id) 809static inline bool ata_id_has_unload(const u16 *id)
810{ 810{
811 if (ata_id_major_version(id) >= 7 && 811 if (ata_id_major_version(id) >= 7 &&
812 (id[ATA_ID_CFSSE] & 0xC000) == 0x4000 && 812 (id[ATA_ID_CFSSE] & 0xC000) == 0x4000 &&
813 id[ATA_ID_CFSSE] & (1 << 13)) 813 id[ATA_ID_CFSSE] & (1 << 13))
814 return 1; 814 return true;
815 return 0; 815 return false;
816}
817
818static inline bool ata_id_has_wwn(const u16 *id)
819{
820 return (id[ATA_ID_CSF_DEFAULT] & 0xC100) == 0x4100;
816} 821}
817 822
818static inline int ata_id_form_factor(const u16 *id) 823static inline int ata_id_form_factor(const u16 *id)
@@ -843,25 +848,25 @@ static inline int ata_id_rotation_rate(const u16 *id)
843 return val; 848 return val;
844} 849}
845 850
846static inline int ata_id_has_trim(const u16 *id) 851static inline bool ata_id_has_trim(const u16 *id)
847{ 852{
848 if (ata_id_major_version(id) >= 7 && 853 if (ata_id_major_version(id) >= 7 &&
849 (id[ATA_ID_DATA_SET_MGMT] & 1)) 854 (id[ATA_ID_DATA_SET_MGMT] & 1))
850 return 1; 855 return true;
851 return 0; 856 return false;
852} 857}
853 858
854static inline int ata_id_has_zero_after_trim(const u16 *id) 859static inline bool ata_id_has_zero_after_trim(const u16 *id)
855{ 860{
856 /* DSM supported, deterministic read, and read zero after trim set */ 861 /* DSM supported, deterministic read, and read zero after trim set */
857 if (ata_id_has_trim(id) && 862 if (ata_id_has_trim(id) &&
858 (id[ATA_ID_ADDITIONAL_SUPP] & 0x4020) == 0x4020) 863 (id[ATA_ID_ADDITIONAL_SUPP] & 0x4020) == 0x4020)
859 return 1; 864 return true;
860 865
861 return 0; 866 return false;
862} 867}
863 868
864static inline int ata_id_current_chs_valid(const u16 *id) 869static inline bool ata_id_current_chs_valid(const u16 *id)
865{ 870{
866 /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command 871 /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command
867 has not been issued to the device then the values of 872 has not been issued to the device then the values of
@@ -873,11 +878,11 @@ static inline int ata_id_current_chs_valid(const u16 *id)
873 id[ATA_ID_CUR_SECTORS]; /* sectors in current translation */ 878 id[ATA_ID_CUR_SECTORS]; /* sectors in current translation */
874} 879}
875 880
876static inline int ata_id_is_cfa(const u16 *id) 881static inline bool ata_id_is_cfa(const u16 *id)
877{ 882{
878 if ((id[ATA_ID_CONFIG] == 0x848A) || /* Traditional CF */ 883 if ((id[ATA_ID_CONFIG] == 0x848A) || /* Traditional CF */
879 (id[ATA_ID_CONFIG] == 0x844A)) /* Delkin Devices CF */ 884 (id[ATA_ID_CONFIG] == 0x844A)) /* Delkin Devices CF */
880 return 1; 885 return true;
881 /* 886 /*
882 * CF specs don't require specific value in the word 0 anymore and yet 887 * CF specs don't require specific value in the word 0 anymore and yet
883 * they forbid to report the ATA version in the word 80 and require the 888 * they forbid to report the ATA version in the word 80 and require the
@@ -886,44 +891,40 @@ static inline int ata_id_is_cfa(const u16 *id)
886 * and while those that don't indicate CFA feature support need some 891 * and while those that don't indicate CFA feature support need some
887 * sort of quirk list, it seems impractical for the ones that do... 892 * sort of quirk list, it seems impractical for the ones that do...
888 */ 893 */
889 if ((id[ATA_ID_COMMAND_SET_2] & 0xC004) == 0x4004) 894 return (id[ATA_ID_COMMAND_SET_2] & 0xC004) == 0x4004;
890 return 1;
891 return 0;
892} 895}
893 896
894static inline int ata_id_is_ssd(const u16 *id) 897static inline bool ata_id_is_ssd(const u16 *id)
895{ 898{
896 return id[ATA_ID_ROT_SPEED] == 0x01; 899 return id[ATA_ID_ROT_SPEED] == 0x01;
897} 900}
898 901
899static inline int ata_id_pio_need_iordy(const u16 *id, const u8 pio) 902static inline bool ata_id_pio_need_iordy(const u16 *id, const u8 pio)
900{ 903{
901 /* CF spec. r4.1 Table 22 says no IORDY on PIO5 and PIO6. */ 904 /* CF spec. r4.1 Table 22 says no IORDY on PIO5 and PIO6. */
902 if (pio > 4 && ata_id_is_cfa(id)) 905 if (pio > 4 && ata_id_is_cfa(id))
903 return 0; 906 return false;
904 /* For PIO3 and higher it is mandatory. */ 907 /* For PIO3 and higher it is mandatory. */
905 if (pio > 2) 908 if (pio > 2)
906 return 1; 909 return true;
907 /* Turn it on when possible. */ 910 /* Turn it on when possible. */
908 if (ata_id_has_iordy(id)) 911 return ata_id_has_iordy(id);
909 return 1;
910 return 0;
911} 912}
912 913
913static inline int ata_drive_40wire(const u16 *dev_id) 914static inline bool ata_drive_40wire(const u16 *dev_id)
914{ 915{
915 if (ata_id_is_sata(dev_id)) 916 if (ata_id_is_sata(dev_id))
916 return 0; /* SATA */ 917 return false; /* SATA */
917 if ((dev_id[ATA_ID_HW_CONFIG] & 0xE000) == 0x6000) 918 if ((dev_id[ATA_ID_HW_CONFIG] & 0xE000) == 0x6000)
918 return 0; /* 80 wire */ 919 return false; /* 80 wire */
919 return 1; 920 return true;
920} 921}
921 922
922static inline int ata_drive_40wire_relaxed(const u16 *dev_id) 923static inline bool ata_drive_40wire_relaxed(const u16 *dev_id)
923{ 924{
924 if ((dev_id[ATA_ID_HW_CONFIG] & 0x2000) == 0x2000) 925 if ((dev_id[ATA_ID_HW_CONFIG] & 0x2000) == 0x2000)
925 return 0; /* 80 wire */ 926 return false; /* 80 wire */
926 return 1; 927 return true;
927} 928}
928 929
929static inline int atapi_cdb_len(const u16 *dev_id) 930static inline int atapi_cdb_len(const u16 *dev_id)
@@ -936,12 +937,12 @@ static inline int atapi_cdb_len(const u16 *dev_id)
936 } 937 }
937} 938}
938 939
939static inline int atapi_command_packet_set(const u16 *dev_id) 940static inline bool atapi_command_packet_set(const u16 *dev_id)
940{ 941{
941 return (dev_id[ATA_ID_CONFIG] >> 8) & 0x1f; 942 return (dev_id[ATA_ID_CONFIG] >> 8) & 0x1f;
942} 943}
943 944
944static inline int atapi_id_dmadir(const u16 *dev_id) 945static inline bool atapi_id_dmadir(const u16 *dev_id)
945{ 946{
946 return ata_id_major_version(dev_id) >= 7 && (dev_id[62] & 0x8000); 947 return ata_id_major_version(dev_id) >= 7 && (dev_id[62] & 0x8000);
947} 948}
@@ -954,13 +955,13 @@ static inline int atapi_id_dmadir(const u16 *dev_id)
954 * 955 *
955 * It is called only once for each device. 956 * It is called only once for each device.
956 */ 957 */
957static inline int ata_id_is_lba_capacity_ok(u16 *id) 958static inline bool ata_id_is_lba_capacity_ok(u16 *id)
958{ 959{
959 unsigned long lba_sects, chs_sects, head, tail; 960 unsigned long lba_sects, chs_sects, head, tail;
960 961
961 /* No non-LBA info .. so valid! */ 962 /* No non-LBA info .. so valid! */
962 if (id[ATA_ID_CYLS] == 0) 963 if (id[ATA_ID_CYLS] == 0)
963 return 1; 964 return true;
964 965
965 lba_sects = ata_id_u32(id, ATA_ID_LBA_CAPACITY); 966 lba_sects = ata_id_u32(id, ATA_ID_LBA_CAPACITY);
966 967
@@ -975,13 +976,13 @@ static inline int ata_id_is_lba_capacity_ok(u16 *id)
975 id[ATA_ID_SECTORS] == 63 && 976 id[ATA_ID_SECTORS] == 63 &&
976 (id[ATA_ID_HEADS] == 15 || id[ATA_ID_HEADS] == 16) && 977 (id[ATA_ID_HEADS] == 15 || id[ATA_ID_HEADS] == 16) &&
977 (lba_sects >= 16383 * 63 * id[ATA_ID_HEADS])) 978 (lba_sects >= 16383 * 63 * id[ATA_ID_HEADS]))
978 return 1; 979 return true;
979 980
980 chs_sects = id[ATA_ID_CYLS] * id[ATA_ID_HEADS] * id[ATA_ID_SECTORS]; 981 chs_sects = id[ATA_ID_CYLS] * id[ATA_ID_HEADS] * id[ATA_ID_SECTORS];
981 982
982 /* perform a rough sanity check on lba_sects: within 10% is OK */ 983 /* perform a rough sanity check on lba_sects: within 10% is OK */
983 if (lba_sects - chs_sects < chs_sects/10) 984 if (lba_sects - chs_sects < chs_sects/10)
984 return 1; 985 return true;
985 986
986 /* some drives have the word order reversed */ 987 /* some drives have the word order reversed */
987 head = (lba_sects >> 16) & 0xffff; 988 head = (lba_sects >> 16) & 0xffff;
@@ -990,10 +991,10 @@ static inline int ata_id_is_lba_capacity_ok(u16 *id)
990 991
991 if (lba_sects - chs_sects < chs_sects/10) { 992 if (lba_sects - chs_sects < chs_sects/10) {
992 *(__le32 *)&id[ATA_ID_LBA_CAPACITY] = __cpu_to_le32(lba_sects); 993 *(__le32 *)&id[ATA_ID_LBA_CAPACITY] = __cpu_to_le32(lba_sects);
993 return 1; /* LBA capacity is (now) good */ 994 return true; /* LBA capacity is (now) good */
994 } 995 }
995 996
996 return 0; /* LBA capacity value may be bad */ 997 return false; /* LBA capacity value may be bad */
997} 998}
998 999
999static inline void ata_id_to_hd_driveid(u16 *id) 1000static inline void ata_id_to_hd_driveid(u16 *id)
@@ -1051,19 +1052,19 @@ static inline int is_multi_taskfile(struct ata_taskfile *tf)
1051 (tf->command == ATA_CMD_WRITE_MULTI_FUA_EXT); 1052 (tf->command == ATA_CMD_WRITE_MULTI_FUA_EXT);
1052} 1053}
1053 1054
1054static inline int ata_ok(u8 status) 1055static inline bool ata_ok(u8 status)
1055{ 1056{
1056 return ((status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | ATA_ERR)) 1057 return ((status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | ATA_ERR))
1057 == ATA_DRDY); 1058 == ATA_DRDY);
1058} 1059}
1059 1060
1060static inline int lba_28_ok(u64 block, u32 n_block) 1061static inline bool lba_28_ok(u64 block, u32 n_block)
1061{ 1062{
1062 /* check the ending block number: must be LESS THAN 0x0fffffff */ 1063 /* check the ending block number: must be LESS THAN 0x0fffffff */
1063 return ((block + n_block) < ((1 << 28) - 1)) && (n_block <= 256); 1064 return ((block + n_block) < ((1 << 28) - 1)) && (n_block <= 256);
1064} 1065}
1065 1066
1066static inline int lba_48_ok(u64 block, u32 n_block) 1067static inline bool lba_48_ok(u64 block, u32 n_block)
1067{ 1068{
1068 /* check the ending block number */ 1069 /* check the ending block number */
1069 return ((block + n_block - 1) < ((u64)1 << 48)) && (n_block <= 65536); 1070 return ((block + n_block - 1) < ((u64)1 << 48)) && (n_block <= 65536);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4d18ff34670a..d5063e1b5555 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -699,7 +699,7 @@ extern void blk_start_queue(struct request_queue *q);
699extern void blk_stop_queue(struct request_queue *q); 699extern void blk_stop_queue(struct request_queue *q);
700extern void blk_sync_queue(struct request_queue *q); 700extern void blk_sync_queue(struct request_queue *q);
701extern void __blk_stop_queue(struct request_queue *q); 701extern void __blk_stop_queue(struct request_queue *q);
702extern void __blk_run_queue(struct request_queue *); 702extern void __blk_run_queue(struct request_queue *q, bool force_kblockd);
703extern void blk_run_queue(struct request_queue *); 703extern void blk_run_queue(struct request_queue *);
704extern int blk_rq_map_user(struct request_queue *, struct request *, 704extern int blk_rq_map_user(struct request_queue *, struct request *,
705 struct rq_map_data *, void __user *, unsigned long, 705 struct rq_map_data *, void __user *, unsigned long,
@@ -1088,7 +1088,6 @@ static inline void put_dev_sector(Sector p)
1088 1088
1089struct work_struct; 1089struct work_struct;
1090int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); 1090int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
1091int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
1092 1091
1093#ifdef CONFIG_BLK_CGROUP 1092#ifdef CONFIG_BLK_CGROUP
1094/* 1093/*
@@ -1136,7 +1135,6 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
1136extern int blk_throtl_init(struct request_queue *q); 1135extern int blk_throtl_init(struct request_queue *q);
1137extern void blk_throtl_exit(struct request_queue *q); 1136extern void blk_throtl_exit(struct request_queue *q);
1138extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); 1137extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
1139extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay);
1140extern void throtl_shutdown_timer_wq(struct request_queue *q); 1138extern void throtl_shutdown_timer_wq(struct request_queue *q);
1141#else /* CONFIG_BLK_DEV_THROTTLING */ 1139#else /* CONFIG_BLK_DEV_THROTTLING */
1142static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) 1140static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
@@ -1146,7 +1144,6 @@ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
1146 1144
1147static inline int blk_throtl_init(struct request_queue *q) { return 0; } 1145static inline int blk_throtl_init(struct request_queue *q) { return 0; }
1148static inline int blk_throtl_exit(struct request_queue *q) { return 0; } 1146static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
1149static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {}
1150static inline void throtl_shutdown_timer_wq(struct request_queue *q) {} 1147static inline void throtl_shutdown_timer_wq(struct request_queue *q) {}
1151#endif /* CONFIG_BLK_DEV_THROTTLING */ 1148#endif /* CONFIG_BLK_DEV_THROTTLING */
1152 1149
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 3395cf7130f5..b22fb0d3db0f 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -245,7 +245,6 @@ static inline int blk_cmd_buf_len(struct request *rq)
245 245
246extern void blk_dump_cmd(char *buf, struct request *rq); 246extern void blk_dump_cmd(char *buf, struct request *rq);
247extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); 247extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
248extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
249 248
250#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ 249#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
251 250
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c3011beac30d..31d91a64838b 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -123,6 +123,7 @@ struct ceph_msg_pos {
123#define SOCK_CLOSED 11 /* socket state changed to closed */ 123#define SOCK_CLOSED 11 /* socket state changed to closed */
124#define OPENING 13 /* open connection w/ (possibly new) peer */ 124#define OPENING 13 /* open connection w/ (possibly new) peer */
125#define DEAD 14 /* dead, about to kfree */ 125#define DEAD 14 /* dead, about to kfree */
126#define BACKOFF 15
126 127
127/* 128/*
128 * A single connection with another host. 129 * A single connection with another host.
@@ -160,7 +161,6 @@ struct ceph_connection {
160 struct list_head out_queue; 161 struct list_head out_queue;
161 struct list_head out_sent; /* sending or sent but unacked */ 162 struct list_head out_sent; /* sending or sent but unacked */
162 u64 out_seq; /* last message queued for send */ 163 u64 out_seq; /* last message queued for send */
163 bool out_keepalive_pending;
164 164
165 u64 in_seq, in_seq_acked; /* last message received, acked */ 165 u64 in_seq, in_seq_acked; /* last message received, acked */
166 166
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ce104e33cd22..e654fa239916 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -474,7 +474,8 @@ struct cgroup_subsys {
474 struct cgroup *old_cgrp, struct task_struct *tsk, 474 struct cgroup *old_cgrp, struct task_struct *tsk,
475 bool threadgroup); 475 bool threadgroup);
476 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); 476 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
477 void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); 477 void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp,
478 struct cgroup *old_cgrp, struct task_struct *task);
478 int (*populate)(struct cgroup_subsys *ss, 479 int (*populate)(struct cgroup_subsys *ss,
479 struct cgroup *cgrp); 480 struct cgroup *cgrp);
480 void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); 481 void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
@@ -626,6 +627,7 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg,
626/* Get id and depth of css */ 627/* Get id and depth of css */
627unsigned short css_id(struct cgroup_subsys_state *css); 628unsigned short css_id(struct cgroup_subsys_state *css);
628unsigned short css_depth(struct cgroup_subsys_state *css); 629unsigned short css_depth(struct cgroup_subsys_state *css);
630struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
629 631
630#else /* !CONFIG_CGROUPS */ 632#else /* !CONFIG_CGROUPS */
631 633
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index ccefff02b6cb..cdbfcb8780ec 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -65,4 +65,8 @@ SUBSYS(net_cls)
65SUBSYS(blkio) 65SUBSYS(blkio)
66#endif 66#endif
67 67
68#ifdef CONFIG_CGROUP_PERF
69SUBSYS(perf)
70#endif
71
68/* */ 72/* */
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 68cd248f6d3e..66900e3c6eb1 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -101,8 +101,8 @@ struct ieee_pfc {
101 */ 101 */
102struct dcb_app { 102struct dcb_app {
103 __u8 selector; 103 __u8 selector;
104 __u32 protocol;
105 __u8 priority; 104 __u8 priority;
105 __u16 protocol;
106}; 106};
107 107
108struct dcbmsg { 108struct dcbmsg {
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 597692f1fc8d..65970b811e22 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -34,7 +34,10 @@ struct debug_obj {
34 34
35/** 35/**
36 * struct debug_obj_descr - object type specific debug description structure 36 * struct debug_obj_descr - object type specific debug description structure
37 *
37 * @name: name of the object typee 38 * @name: name of the object typee
39 * @debug_hint: function returning address, which have associated
40 * kernel symbol, to allow identify the object
38 * @fixup_init: fixup function, which is called when the init check 41 * @fixup_init: fixup function, which is called when the init check
39 * fails 42 * fails
40 * @fixup_activate: fixup function, which is called when the activate check 43 * @fixup_activate: fixup function, which is called when the activate check
@@ -46,7 +49,7 @@ struct debug_obj {
46 */ 49 */
47struct debug_obj_descr { 50struct debug_obj_descr {
48 const char *name; 51 const char *name;
49 52 void *(*debug_hint) (void *addr);
50 int (*fixup_init) (void *addr, enum debug_obj_state state); 53 int (*fixup_init) (void *addr, enum debug_obj_state state);
51 int (*fixup_activate) (void *addr, enum debug_obj_state state); 54 int (*fixup_activate) (void *addr, enum debug_obj_state state);
52 int (*fixup_destroy) (void *addr, enum debug_obj_state state); 55 int (*fixup_destroy) (void *addr, enum debug_obj_state state);
diff --git a/include/linux/device.h b/include/linux/device.h
index 1bf5cf0b4513..dba775a68752 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -128,9 +128,7 @@ struct device_driver {
128 128
129 bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ 129 bool suppress_bind_attrs; /* disables bind/unbind via sysfs */
130 130
131#if defined(CONFIG_OF)
132 const struct of_device_id *of_match_table; 131 const struct of_device_id *of_match_table;
133#endif
134 132
135 int (*probe) (struct device *dev); 133 int (*probe) (struct device *dev);
136 int (*remove) (struct device *dev); 134 int (*remove) (struct device *dev);
@@ -422,6 +420,7 @@ struct device {
422 void *platform_data; /* Platform specific data, device 420 void *platform_data; /* Platform specific data, device
423 core doesn't touch it */ 421 core doesn't touch it */
424 struct dev_pm_info power; 422 struct dev_pm_info power;
423 struct dev_power_domain *pwr_domain;
425 424
426#ifdef CONFIG_NUMA 425#ifdef CONFIG_NUMA
427 int numa_node; /* NUMA node this device is close to */ 426 int numa_node; /* NUMA node this device is close to */
@@ -441,9 +440,8 @@ struct device {
441 override */ 440 override */
442 /* arch specific additions */ 441 /* arch specific additions */
443 struct dev_archdata archdata; 442 struct dev_archdata archdata;
444#ifdef CONFIG_OF 443
445 struct device_node *of_node; 444 struct device_node *of_node; /* associated device tree node */
446#endif
447 445
448 dev_t devt; /* dev_t, creates the sysfs "dev" */ 446 dev_t devt; /* dev_t, creates the sysfs "dev" */
449 447
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 28028988c862..33a42f24b275 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -8,6 +8,9 @@ struct inode;
8struct super_block; 8struct super_block;
9struct vfsmount; 9struct vfsmount;
10 10
11/* limit the handle size to NFSv4 handle size now */
12#define MAX_HANDLE_SZ 128
13
11/* 14/*
12 * The fileid_type identifies how the file within the filesystem is encoded. 15 * The fileid_type identifies how the file within the filesystem is encoded.
13 * In theory this is freely set and parsed by the filesystem, but we try to 16 * In theory this is freely set and parsed by the filesystem, but we try to
@@ -121,8 +124,10 @@ struct fid {
121 * set, the encode_fh() should store sufficient information so that a good 124 * set, the encode_fh() should store sufficient information so that a good
122 * attempt can be made to find not only the file but also it's place in the 125 * attempt can be made to find not only the file but also it's place in the
123 * filesystem. This typically means storing a reference to de->d_parent in 126 * filesystem. This typically means storing a reference to de->d_parent in
124 * the filehandle fragment. encode_fh() should return the number of bytes 127 * the filehandle fragment. encode_fh() should return the fileid_type on
125 * stored or a negative error code such as %-ENOSPC 128 * success and on error returns 255 (if the space needed to encode fh is
129 * greater than @max_len*4 bytes). On error @max_len contains the minimum
130 * size(in 4 byte unit) needed to encode the file handle.
126 * 131 *
127 * fh_to_dentry: 132 * fh_to_dentry:
128 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle 133 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 65990ef612f5..6043c64c207a 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -884,7 +884,8 @@ extern int ext3fs_dirhash(const char *name, int len, struct
884 dx_hash_info *hinfo); 884 dx_hash_info *hinfo);
885 885
886/* ialloc.c */ 886/* ialloc.c */
887extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); 887extern struct inode * ext3_new_inode (handle_t *, struct inode *,
888 const struct qstr *, int);
888extern void ext3_free_inode (handle_t *, struct inode *); 889extern void ext3_free_inode (handle_t *, struct inode *);
889extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); 890extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
890extern unsigned long ext3_count_free_inodes (struct super_block *); 891extern unsigned long ext3_count_free_inodes (struct super_block *);
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index a562fa5fb4e3..f550f894ba15 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -46,6 +46,7 @@
46 unlinking file. */ 46 unlinking file. */
47#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ 47#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
48#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ 48#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
49#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
49 50
50#ifdef __KERNEL__ 51#ifdef __KERNEL__
51 52
diff --git a/include/linux/file.h b/include/linux/file.h
index e85baebf6279..21a79958541c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -29,6 +29,8 @@ static inline void fput_light(struct file *file, int fput_needed)
29 29
30extern struct file *fget(unsigned int fd); 30extern struct file *fget(unsigned int fd);
31extern struct file *fget_light(unsigned int fd, int *fput_needed); 31extern struct file *fget_light(unsigned int fd, int *fput_needed);
32extern struct file *fget_raw(unsigned int fd);
33extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
32extern void set_close_on_exec(unsigned int fd, int flag); 34extern void set_close_on_exec(unsigned int fd, int flag);
33extern void put_filp(struct file *); 35extern void put_filp(struct file *);
34extern int alloc_fd(unsigned start, unsigned flags); 36extern int alloc_fd(unsigned start, unsigned flags);
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index da7e52b099f3..1effc8b56b4e 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -109,7 +109,7 @@ static inline void freezer_count(void)
109} 109}
110 110
111/* 111/*
112 * Check if the task should be counted as freezeable by the freezer 112 * Check if the task should be counted as freezable by the freezer
113 */ 113 */
114static inline int freezer_should_skip(struct task_struct *p) 114static inline int freezer_should_skip(struct task_struct *p)
115{ 115{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bd3215940c37..2f5a71d6d766 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -102,6 +102,9 @@ struct inodes_stat_t {
102/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ 102/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
103#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) 103#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
104 104
105/* File is opened with O_PATH; almost nothing can be done with it */
106#define FMODE_PATH ((__force fmode_t)0x4000)
107
105/* File was opened by fanotify and shouldn't generate fanotify events */ 108/* File was opened by fanotify and shouldn't generate fanotify events */
106#define FMODE_NONOTIFY ((__force fmode_t)0x1000000) 109#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
107 110
@@ -649,6 +652,7 @@ struct address_space {
649 spinlock_t private_lock; /* for use by the address_space */ 652 spinlock_t private_lock; /* for use by the address_space */
650 struct list_head private_list; /* ditto */ 653 struct list_head private_list; /* ditto */
651 struct address_space *assoc_mapping; /* ditto */ 654 struct address_space *assoc_mapping; /* ditto */
655 struct mutex unmap_mutex; /* to protect unmapping */
652} __attribute__((aligned(sizeof(long)))); 656} __attribute__((aligned(sizeof(long))));
653 /* 657 /*
654 * On most architectures that alignment is already the case; but 658 * On most architectures that alignment is already the case; but
@@ -797,8 +801,7 @@ struct inode {
797#endif 801#endif
798 802
799#ifdef CONFIG_IMA 803#ifdef CONFIG_IMA
800 /* protected by i_lock */ 804 atomic_t i_readcount; /* struct files open RO */
801 unsigned int i_readcount; /* struct files open RO */
802#endif 805#endif
803 atomic_t i_writecount; 806 atomic_t i_writecount;
804#ifdef CONFIG_SECURITY 807#ifdef CONFIG_SECURITY
@@ -977,6 +980,13 @@ struct file {
977#endif 980#endif
978}; 981};
979 982
983struct file_handle {
984 __u32 handle_bytes;
985 int handle_type;
986 /* file identifier */
987 unsigned char f_handle[0];
988};
989
980#define get_file(x) atomic_long_inc(&(x)->f_count) 990#define get_file(x) atomic_long_inc(&(x)->f_count)
981#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 991#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
982#define file_count(x) atomic_long_read(&(x)->f_count) 992#define file_count(x) atomic_long_read(&(x)->f_count)
@@ -1400,6 +1410,7 @@ struct super_block {
1400 wait_queue_head_t s_wait_unfrozen; 1410 wait_queue_head_t s_wait_unfrozen;
1401 1411
1402 char s_id[32]; /* Informational name */ 1412 char s_id[32]; /* Informational name */
1413 u8 s_uuid[16]; /* UUID */
1403 1414
1404 void *s_fs_info; /* Filesystem private info */ 1415 void *s_fs_info; /* Filesystem private info */
1405 fmode_t s_mode; 1416 fmode_t s_mode;
@@ -1873,6 +1884,8 @@ extern void drop_collected_mounts(struct vfsmount *);
1873extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, 1884extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
1874 struct vfsmount *); 1885 struct vfsmount *);
1875extern int vfs_statfs(struct path *, struct kstatfs *); 1886extern int vfs_statfs(struct path *, struct kstatfs *);
1887extern int user_statfs(const char __user *, struct kstatfs *);
1888extern int fd_statfs(int, struct kstatfs *);
1876extern int statfs_by_dentry(struct dentry *, struct kstatfs *); 1889extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
1877extern int freeze_super(struct super_block *super); 1890extern int freeze_super(struct super_block *super);
1878extern int thaw_super(struct super_block *super); 1891extern int thaw_super(struct super_block *super);
@@ -1989,6 +2002,8 @@ extern int do_fallocate(struct file *file, int mode, loff_t offset,
1989extern long do_sys_open(int dfd, const char __user *filename, int flags, 2002extern long do_sys_open(int dfd, const char __user *filename, int flags,
1990 int mode); 2003 int mode);
1991extern struct file *filp_open(const char *, int, int); 2004extern struct file *filp_open(const char *, int, int);
2005extern struct file *file_open_root(struct dentry *, struct vfsmount *,
2006 const char *, int);
1992extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, 2007extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
1993 const struct cred *); 2008 const struct cred *);
1994extern int filp_close(struct file *, fl_owner_t id); 2009extern int filp_close(struct file *, fl_owner_t id);
@@ -2139,7 +2154,7 @@ extern void check_disk_size_change(struct gendisk *disk,
2139 struct block_device *bdev); 2154 struct block_device *bdev);
2140extern int revalidate_disk(struct gendisk *); 2155extern int revalidate_disk(struct gendisk *);
2141extern int check_disk_change(struct block_device *); 2156extern int check_disk_change(struct block_device *);
2142extern int __invalidate_device(struct block_device *); 2157extern int __invalidate_device(struct block_device *, bool);
2143extern int invalidate_partition(struct gendisk *, int); 2158extern int invalidate_partition(struct gendisk *, int);
2144#endif 2159#endif
2145unsigned long invalidate_mapping_pages(struct address_space *mapping, 2160unsigned long invalidate_mapping_pages(struct address_space *mapping,
@@ -2199,15 +2214,31 @@ static inline void allow_write_access(struct file *file)
2199 if (file) 2214 if (file)
2200 atomic_inc(&file->f_path.dentry->d_inode->i_writecount); 2215 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
2201} 2216}
2217#ifdef CONFIG_IMA
2218static inline void i_readcount_dec(struct inode *inode)
2219{
2220 BUG_ON(!atomic_read(&inode->i_readcount));
2221 atomic_dec(&inode->i_readcount);
2222}
2223static inline void i_readcount_inc(struct inode *inode)
2224{
2225 atomic_inc(&inode->i_readcount);
2226}
2227#else
2228static inline void i_readcount_dec(struct inode *inode)
2229{
2230 return;
2231}
2232static inline void i_readcount_inc(struct inode *inode)
2233{
2234 return;
2235}
2236#endif
2202extern int do_pipe_flags(int *, int); 2237extern int do_pipe_flags(int *, int);
2203extern struct file *create_read_pipe(struct file *f, int flags); 2238extern struct file *create_read_pipe(struct file *f, int flags);
2204extern struct file *create_write_pipe(int flags); 2239extern struct file *create_write_pipe(int flags);
2205extern void free_write_pipe(struct file *); 2240extern void free_write_pipe(struct file *);
2206 2241
2207extern struct file *do_filp_open(int dfd, const char *pathname,
2208 int open_flag, int mode, int acc_mode);
2209extern int may_open(struct path *, int, int);
2210
2211extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2242extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2212extern struct file * open_exec(const char *); 2243extern struct file * open_exec(const char *);
2213 2244
@@ -2225,6 +2256,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
2225 2256
2226extern int inode_init_always(struct super_block *, struct inode *); 2257extern int inode_init_always(struct super_block *, struct inode *);
2227extern void inode_init_once(struct inode *); 2258extern void inode_init_once(struct inode *);
2259extern void address_space_init_once(struct address_space *mapping);
2228extern void ihold(struct inode * inode); 2260extern void ihold(struct inode * inode);
2229extern void iput(struct inode *); 2261extern void iput(struct inode *);
2230extern struct inode * igrab(struct inode *); 2262extern struct inode * igrab(struct inode *);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index dcd6a7c3a435..ca29e03c1fac 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -428,6 +428,7 @@ extern void unregister_ftrace_graph(void);
428 428
429extern void ftrace_graph_init_task(struct task_struct *t); 429extern void ftrace_graph_init_task(struct task_struct *t);
430extern void ftrace_graph_exit_task(struct task_struct *t); 430extern void ftrace_graph_exit_task(struct task_struct *t);
431extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu);
431 432
432static inline int task_curr_ret_stack(struct task_struct *t) 433static inline int task_curr_ret_stack(struct task_struct *t)
433{ 434{
@@ -451,6 +452,7 @@ static inline void unpause_graph_tracing(void)
451 452
452static inline void ftrace_graph_init_task(struct task_struct *t) { } 453static inline void ftrace_graph_init_task(struct task_struct *t) { }
453static inline void ftrace_graph_exit_task(struct task_struct *t) { } 454static inline void ftrace_graph_exit_task(struct task_struct *t) { }
455static inline void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { }
454 456
455static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc, 457static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc,
456 trace_func_graph_ent_t entryfunc) 458 trace_func_graph_ent_t entryfunc)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 47e3997f7b5c..22b32af1b5ec 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -37,7 +37,6 @@ struct trace_entry {
37 unsigned char flags; 37 unsigned char flags;
38 unsigned char preempt_count; 38 unsigned char preempt_count;
39 int pid; 39 int pid;
40 int lock_depth;
41}; 40};
42 41
43#define FTRACE_MAX_EVENT \ 42#define FTRACE_MAX_EVENT \
@@ -208,7 +207,6 @@ struct ftrace_event_call {
208 207
209#define PERF_MAX_TRACE_SIZE 2048 208#define PERF_MAX_TRACE_SIZE 2048
210 209
211#define MAX_FILTER_PRED 32
212#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 210#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
213 211
214extern void destroy_preds(struct ftrace_event_call *call); 212extern void destroy_preds(struct ftrace_event_call *call);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0b84c61607e8..dca31761b311 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -332,16 +332,19 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
332 return alloc_pages_current(gfp_mask, order); 332 return alloc_pages_current(gfp_mask, order);
333} 333}
334extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, 334extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
335 struct vm_area_struct *vma, unsigned long addr); 335 struct vm_area_struct *vma, unsigned long addr,
336 int node);
336#else 337#else
337#define alloc_pages(gfp_mask, order) \ 338#define alloc_pages(gfp_mask, order) \
338 alloc_pages_node(numa_node_id(), gfp_mask, order) 339 alloc_pages_node(numa_node_id(), gfp_mask, order)
339#define alloc_pages_vma(gfp_mask, order, vma, addr) \ 340#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \
340 alloc_pages(gfp_mask, order) 341 alloc_pages(gfp_mask, order)
341#endif 342#endif
342#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) 343#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
343#define alloc_page_vma(gfp_mask, vma, addr) \ 344#define alloc_page_vma(gfp_mask, vma, addr) \
344 alloc_pages_vma(gfp_mask, 0, vma, addr) 345 alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
346#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
347 alloc_pages_vma(gfp_mask, 0, vma, addr, node)
345 348
346extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); 349extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
347extern unsigned long get_zeroed_page(gfp_t gfp_mask); 350extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f376ddc64c4d..62f500c724f9 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -54,11 +54,13 @@ enum hrtimer_restart {
54 * 0x00 inactive 54 * 0x00 inactive
55 * 0x01 enqueued into rbtree 55 * 0x01 enqueued into rbtree
56 * 0x02 callback function running 56 * 0x02 callback function running
57 * 0x04 timer is migrated to another cpu
57 * 58 *
58 * Special cases: 59 * Special cases:
59 * 0x03 callback function running and enqueued 60 * 0x03 callback function running and enqueued
60 * (was requeued on another CPU) 61 * (was requeued on another CPU)
61 * 0x09 timer was migrated on CPU hotunplug 62 * 0x05 timer was migrated on CPU hotunplug
63 *
62 * The "callback function running and enqueued" status is only possible on 64 * The "callback function running and enqueued" status is only possible on
63 * SMP. It happens for example when a posix timer expired and the callback 65 * SMP. It happens for example when a posix timer expired and the callback
64 * queued a signal. Between dropping the lock which protects the posix timer 66 * queued a signal. Between dropping the lock which protects the posix timer
@@ -67,8 +69,11 @@ enum hrtimer_restart {
67 * as otherwise the timer could be removed before the softirq code finishes the 69 * as otherwise the timer could be removed before the softirq code finishes the
68 * the handling of the timer. 70 * the handling of the timer.
69 * 71 *
70 * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state to 72 * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
71 * preserve the HRTIMER_STATE_CALLBACK bit in the above scenario. 73 * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This
74 * also affects HRTIMER_STATE_MIGRATE where the preservation is not
75 * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is
76 * enqueued on the new cpu.
72 * 77 *
73 * All state transitions are protected by cpu_base->lock. 78 * All state transitions are protected by cpu_base->lock.
74 */ 79 */
@@ -148,7 +153,12 @@ struct hrtimer_clock_base {
148#endif 153#endif
149}; 154};
150 155
151#define HRTIMER_MAX_CLOCK_BASES 2 156enum hrtimer_base_type {
157 HRTIMER_BASE_REALTIME,
158 HRTIMER_BASE_MONOTONIC,
159 HRTIMER_BASE_BOOTTIME,
160 HRTIMER_MAX_CLOCK_BASES,
161};
152 162
153/* 163/*
154 * struct hrtimer_cpu_base - the per cpu clock bases 164 * struct hrtimer_cpu_base - the per cpu clock bases
@@ -308,6 +318,7 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
308 318
309extern ktime_t ktime_get(void); 319extern ktime_t ktime_get(void);
310extern ktime_t ktime_get_real(void); 320extern ktime_t ktime_get_real(void);
321extern ktime_t ktime_get_boottime(void);
311 322
312 323
313DECLARE_PER_CPU(struct tick_device, tick_cpu_device); 324DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
@@ -370,8 +381,9 @@ extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
370extern ktime_t hrtimer_get_next_event(void); 381extern ktime_t hrtimer_get_next_event(void);
371 382
372/* 383/*
373 * A timer is active, when it is enqueued into the rbtree or the callback 384 * A timer is active, when it is enqueued into the rbtree or the
374 * function is running. 385 * callback function is running or it's in the state of being migrated
386 * to another cpu.
375 */ 387 */
376static inline int hrtimer_active(const struct hrtimer *timer) 388static inline int hrtimer_active(const struct hrtimer *timer)
377{ 389{
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 903576df88dc..06a8d9c7de98 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -258,9 +258,7 @@ struct i2c_board_info {
258 unsigned short addr; 258 unsigned short addr;
259 void *platform_data; 259 void *platform_data;
260 struct dev_archdata *archdata; 260 struct dev_archdata *archdata;
261#ifdef CONFIG_OF
262 struct device_node *of_node; 261 struct device_node *of_node;
263#endif
264 int irq; 262 int irq;
265}; 263};
266 264
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 975837e7d6c0..09e6e62f9953 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -20,7 +20,6 @@ extern void ima_inode_free(struct inode *inode);
20extern int ima_file_check(struct file *file, int mask); 20extern int ima_file_check(struct file *file, int mask);
21extern void ima_file_free(struct file *file); 21extern void ima_file_free(struct file *file);
22extern int ima_file_mmap(struct file *file, unsigned long prot); 22extern int ima_file_mmap(struct file *file, unsigned long prot);
23extern void ima_counts_get(struct file *file);
24 23
25#else 24#else
26static inline int ima_bprm_check(struct linux_binprm *bprm) 25static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -53,10 +52,5 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
53 return 0; 52 return 0;
54} 53}
55 54
56static inline void ima_counts_get(struct file *file)
57{
58 return;
59}
60
61#endif /* CONFIG_IMA_H */ 55#endif /* CONFIG_IMA_H */
62#endif /* _LINUX_IMA_H */ 56#endif /* _LINUX_IMA_H */
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 55e0d4253e49..59b72ca1c5d1 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,6 +14,8 @@
14#include <linux/smp.h> 14#include <linux/smp.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/hrtimer.h> 16#include <linux/hrtimer.h>
17#include <linux/kref.h>
18#include <linux/workqueue.h>
17 19
18#include <asm/atomic.h> 20#include <asm/atomic.h>
19#include <asm/ptrace.h> 21#include <asm/ptrace.h>
@@ -55,7 +57,8 @@
55 * Used by threaded interrupts which need to keep the 57 * Used by threaded interrupts which need to keep the
56 * irq line disabled until the threaded handler has been run. 58 * irq line disabled until the threaded handler has been run.
57 * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend 59 * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
58 * 60 * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
61 * IRQF_NO_THREAD - Interrupt cannot be threaded
59 */ 62 */
60#define IRQF_DISABLED 0x00000020 63#define IRQF_DISABLED 0x00000020
61#define IRQF_SAMPLE_RANDOM 0x00000040 64#define IRQF_SAMPLE_RANDOM 0x00000040
@@ -67,22 +70,10 @@
67#define IRQF_IRQPOLL 0x00001000 70#define IRQF_IRQPOLL 0x00001000
68#define IRQF_ONESHOT 0x00002000 71#define IRQF_ONESHOT 0x00002000
69#define IRQF_NO_SUSPEND 0x00004000 72#define IRQF_NO_SUSPEND 0x00004000
73#define IRQF_FORCE_RESUME 0x00008000
74#define IRQF_NO_THREAD 0x00010000
70 75
71#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND) 76#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
72
73/*
74 * Bits used by threaded handlers:
75 * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
76 * IRQTF_DIED - handler thread died
77 * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
78 * IRQTF_AFFINITY - irq thread is requested to adjust affinity
79 */
80enum {
81 IRQTF_RUNTHREAD,
82 IRQTF_DIED,
83 IRQTF_WARNED,
84 IRQTF_AFFINITY,
85};
86 77
87/* 78/*
88 * These values can be returned by request_any_context_irq() and 79 * These values can be returned by request_any_context_irq() and
@@ -110,6 +101,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
110 * @thread_fn: interupt handler function for threaded interrupts 101 * @thread_fn: interupt handler function for threaded interrupts
111 * @thread: thread pointer for threaded interrupts 102 * @thread: thread pointer for threaded interrupts
112 * @thread_flags: flags related to @thread 103 * @thread_flags: flags related to @thread
104 * @thread_mask: bitmask for keeping track of @thread activity
113 */ 105 */
114struct irqaction { 106struct irqaction {
115 irq_handler_t handler; 107 irq_handler_t handler;
@@ -120,6 +112,7 @@ struct irqaction {
120 irq_handler_t thread_fn; 112 irq_handler_t thread_fn;
121 struct task_struct *thread; 113 struct task_struct *thread;
122 unsigned long thread_flags; 114 unsigned long thread_flags;
115 unsigned long thread_mask;
123 const char *name; 116 const char *name;
124 struct proc_dir_entry *dir; 117 struct proc_dir_entry *dir;
125} ____cacheline_internodealigned_in_smp; 118} ____cacheline_internodealigned_in_smp;
@@ -240,6 +233,35 @@ extern int irq_can_set_affinity(unsigned int irq);
240extern int irq_select_affinity(unsigned int irq); 233extern int irq_select_affinity(unsigned int irq);
241 234
242extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); 235extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
236
237/**
238 * struct irq_affinity_notify - context for notification of IRQ affinity changes
239 * @irq: Interrupt to which notification applies
240 * @kref: Reference count, for internal use
241 * @work: Work item, for internal use
242 * @notify: Function to be called on change. This will be
243 * called in process context.
244 * @release: Function to be called on release. This will be
245 * called in process context. Once registered, the
246 * structure must only be freed when this function is
247 * called or later.
248 */
249struct irq_affinity_notify {
250 unsigned int irq;
251 struct kref kref;
252 struct work_struct work;
253 void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
254 void (*release)(struct kref *ref);
255};
256
257extern int
258irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
259
260static inline void irq_run_affinity_notifiers(void)
261{
262 flush_scheduled_work();
263}
264
243#else /* CONFIG_SMP */ 265#else /* CONFIG_SMP */
244 266
245static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) 267static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -255,7 +277,7 @@ static inline int irq_can_set_affinity(unsigned int irq)
255static inline int irq_select_affinity(unsigned int irq) { return 0; } 277static inline int irq_select_affinity(unsigned int irq) { return 0; }
256 278
257static inline int irq_set_affinity_hint(unsigned int irq, 279static inline int irq_set_affinity_hint(unsigned int irq,
258 const struct cpumask *m) 280 const struct cpumask *m)
259{ 281{
260 return -EINVAL; 282 return -EINVAL;
261} 283}
@@ -314,16 +336,24 @@ static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long
314} 336}
315 337
316/* IRQ wakeup (PM) control: */ 338/* IRQ wakeup (PM) control: */
317extern int set_irq_wake(unsigned int irq, unsigned int on); 339extern int irq_set_irq_wake(unsigned int irq, unsigned int on);
340
341#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
342/* Please do not use: Use the replacement functions instead */
343static inline int set_irq_wake(unsigned int irq, unsigned int on)
344{
345 return irq_set_irq_wake(irq, on);
346}
347#endif
318 348
319static inline int enable_irq_wake(unsigned int irq) 349static inline int enable_irq_wake(unsigned int irq)
320{ 350{
321 return set_irq_wake(irq, 1); 351 return irq_set_irq_wake(irq, 1);
322} 352}
323 353
324static inline int disable_irq_wake(unsigned int irq) 354static inline int disable_irq_wake(unsigned int irq)
325{ 355{
326 return set_irq_wake(irq, 0); 356 return irq_set_irq_wake(irq, 0);
327} 357}
328 358
329#else /* !CONFIG_GENERIC_HARDIRQS */ 359#else /* !CONFIG_GENERIC_HARDIRQS */
@@ -353,6 +383,13 @@ static inline int disable_irq_wake(unsigned int irq)
353} 383}
354#endif /* CONFIG_GENERIC_HARDIRQS */ 384#endif /* CONFIG_GENERIC_HARDIRQS */
355 385
386
387#ifdef CONFIG_IRQ_FORCED_THREADING
388extern bool force_irqthreads;
389#else
390#define force_irqthreads (0)
391#endif
392
356#ifndef __ARCH_SET_SOFTIRQ_PENDING 393#ifndef __ARCH_SET_SOFTIRQ_PENDING
357#define set_softirq_pending(x) (local_softirq_pending() = (x)) 394#define set_softirq_pending(x) (local_softirq_pending() = (x))
358#define or_softirq_pending(x) (local_softirq_pending() |= (x)) 395#define or_softirq_pending(x) (local_softirq_pending() |= (x))
@@ -426,6 +463,13 @@ extern void raise_softirq(unsigned int nr);
426 */ 463 */
427DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); 464DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
428 465
466DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
467
468static inline struct task_struct *this_cpu_ksoftirqd(void)
469{
470 return this_cpu_read(ksoftirqd);
471}
472
429/* Try to send a softirq to a remote cpu. If this cannot be done, the 473/* Try to send a softirq to a remote cpu. If this cannot be done, the
430 * work will be queued to the local cpu. 474 * work will be queued to the local cpu.
431 */ 475 */
@@ -645,6 +689,7 @@ static inline void init_irq_proc(void)
645 689
646struct seq_file; 690struct seq_file;
647int show_interrupts(struct seq_file *p, void *v); 691int show_interrupts(struct seq_file *p, void *v);
692int arch_show_interrupts(struct seq_file *p, int prec);
648 693
649extern int early_irq_init(void); 694extern int early_irq_init(void);
650extern int arch_probe_nr_irqs(void); 695extern int arch_probe_nr_irqs(void);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 80fcb53057bc..1d3577f30d45 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -29,61 +29,104 @@
29#include <asm/irq_regs.h> 29#include <asm/irq_regs.h>
30 30
31struct irq_desc; 31struct irq_desc;
32struct irq_data;
32typedef void (*irq_flow_handler_t)(unsigned int irq, 33typedef void (*irq_flow_handler_t)(unsigned int irq,
33 struct irq_desc *desc); 34 struct irq_desc *desc);
34 35typedef void (*irq_preflow_handler_t)(struct irq_data *data);
35 36
36/* 37/*
37 * IRQ line status. 38 * IRQ line status.
38 * 39 *
39 * Bits 0-7 are reserved for the IRQF_* bits in linux/interrupt.h 40 * Bits 0-7 are the same as the IRQF_* bits in linux/interrupt.h
41 *
42 * IRQ_TYPE_NONE - default, unspecified type
43 * IRQ_TYPE_EDGE_RISING - rising edge triggered
44 * IRQ_TYPE_EDGE_FALLING - falling edge triggered
45 * IRQ_TYPE_EDGE_BOTH - rising and falling edge triggered
46 * IRQ_TYPE_LEVEL_HIGH - high level triggered
47 * IRQ_TYPE_LEVEL_LOW - low level triggered
48 * IRQ_TYPE_LEVEL_MASK - Mask to filter out the level bits
49 * IRQ_TYPE_SENSE_MASK - Mask for all the above bits
50 * IRQ_TYPE_PROBE - Special flag for probing in progress
51 *
52 * Bits which can be modified via irq_set/clear/modify_status_flags()
53 * IRQ_LEVEL - Interrupt is level type. Will be also
54 * updated in the code when the above trigger
55 * bits are modified via set_irq_type()
56 * IRQ_PER_CPU - Mark an interrupt PER_CPU. Will protect
57 * it from affinity setting
58 * IRQ_NOPROBE - Interrupt cannot be probed by autoprobing
59 * IRQ_NOREQUEST - Interrupt cannot be requested via
60 * request_irq()
61 * IRQ_NOAUTOEN - Interrupt is not automatically enabled in
62 * request/setup_irq()
63 * IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set)
64 * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context
65 * IRQ_NESTED_TRHEAD - Interrupt nests into another thread
66 *
67 * Deprecated bits. They are kept updated as long as
68 * CONFIG_GENERIC_HARDIRQS_NO_COMPAT is not set. Will go away soon. These bits
69 * are internal state of the core code and if you really need to acces
70 * them then talk to the genirq maintainer instead of hacking
71 * something weird.
40 * 72 *
41 * IRQ types
42 */ 73 */
43#define IRQ_TYPE_NONE 0x00000000 /* Default, unspecified type */ 74enum {
44#define IRQ_TYPE_EDGE_RISING 0x00000001 /* Edge rising type */ 75 IRQ_TYPE_NONE = 0x00000000,
45#define IRQ_TYPE_EDGE_FALLING 0x00000002 /* Edge falling type */ 76 IRQ_TYPE_EDGE_RISING = 0x00000001,
46#define IRQ_TYPE_EDGE_BOTH (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING) 77 IRQ_TYPE_EDGE_FALLING = 0x00000002,
47#define IRQ_TYPE_LEVEL_HIGH 0x00000004 /* Level high type */ 78 IRQ_TYPE_EDGE_BOTH = (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING),
48#define IRQ_TYPE_LEVEL_LOW 0x00000008 /* Level low type */ 79 IRQ_TYPE_LEVEL_HIGH = 0x00000004,
49#define IRQ_TYPE_SENSE_MASK 0x0000000f /* Mask of the above */ 80 IRQ_TYPE_LEVEL_LOW = 0x00000008,
50#define IRQ_TYPE_PROBE 0x00000010 /* Probing in progress */ 81 IRQ_TYPE_LEVEL_MASK = (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH),
51 82 IRQ_TYPE_SENSE_MASK = 0x0000000f,
52/* Internal flags */ 83
53#define IRQ_INPROGRESS 0x00000100 /* IRQ handler active - do not enter! */ 84 IRQ_TYPE_PROBE = 0x00000010,
54#define IRQ_DISABLED 0x00000200 /* IRQ disabled - do not enter! */ 85
55#define IRQ_PENDING 0x00000400 /* IRQ pending - replay on enable */ 86 IRQ_LEVEL = (1 << 8),
56#define IRQ_REPLAY 0x00000800 /* IRQ has been replayed but not acked yet */ 87 IRQ_PER_CPU = (1 << 9),
57#define IRQ_AUTODETECT 0x00001000 /* IRQ is being autodetected */ 88 IRQ_NOPROBE = (1 << 10),
58#define IRQ_WAITING 0x00002000 /* IRQ not yet seen - for autodetection */ 89 IRQ_NOREQUEST = (1 << 11),
59#define IRQ_LEVEL 0x00004000 /* IRQ level triggered */ 90 IRQ_NOAUTOEN = (1 << 12),
60#define IRQ_MASKED 0x00008000 /* IRQ masked - shouldn't be seen again */ 91 IRQ_NO_BALANCING = (1 << 13),
61#define IRQ_PER_CPU 0x00010000 /* IRQ is per CPU */ 92 IRQ_MOVE_PCNTXT = (1 << 14),
62#define IRQ_NOPROBE 0x00020000 /* IRQ is not valid for probing */ 93 IRQ_NESTED_THREAD = (1 << 15),
63#define IRQ_NOREQUEST 0x00040000 /* IRQ cannot be requested */ 94
64#define IRQ_NOAUTOEN 0x00080000 /* IRQ will not be enabled on request irq */ 95#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
65#define IRQ_WAKEUP 0x00100000 /* IRQ triggers system wakeup */ 96 IRQ_INPROGRESS = (1 << 16),
66#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ 97 IRQ_REPLAY = (1 << 17),
67#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ 98 IRQ_WAITING = (1 << 18),
68#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ 99 IRQ_DISABLED = (1 << 19),
69#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ 100 IRQ_PENDING = (1 << 20),
70#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/ 101 IRQ_MASKED = (1 << 21),
71#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */ 102 IRQ_MOVE_PENDING = (1 << 22),
72#define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */ 103 IRQ_AFFINITY_SET = (1 << 23),
73#define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */ 104 IRQ_WAKEUP = (1 << 24),
105#endif
106};
74 107
75#define IRQF_MODIFY_MASK \ 108#define IRQF_MODIFY_MASK \
76 (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ 109 (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
77 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ 110 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
78 IRQ_PER_CPU) 111 IRQ_PER_CPU | IRQ_NESTED_THREAD)
79 112
80#ifdef CONFIG_IRQ_PER_CPU 113#define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
81# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) 114
82# define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) 115static inline __deprecated bool CHECK_IRQ_PER_CPU(unsigned int status)
83#else 116{
84# define CHECK_IRQ_PER_CPU(var) 0 117 return status & IRQ_PER_CPU;
85# define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING 118}
86#endif 119
120/*
121 * Return value for chip->irq_set_affinity()
122 *
123 * IRQ_SET_MASK_OK - OK, core updates irq_data.affinity
124 * IRQ_SET_MASK_NOCPY - OK, chip did update irq_data.affinity
125 */
126enum {
127 IRQ_SET_MASK_OK = 0,
128 IRQ_SET_MASK_OK_NOCOPY,
129};
87 130
88struct msi_desc; 131struct msi_desc;
89 132
@@ -91,6 +134,8 @@ struct msi_desc;
91 * struct irq_data - per irq and irq chip data passed down to chip functions 134 * struct irq_data - per irq and irq chip data passed down to chip functions
92 * @irq: interrupt number 135 * @irq: interrupt number
93 * @node: node index useful for balancing 136 * @node: node index useful for balancing
137 * @state_use_accessor: status information for irq chip functions.
138 * Use accessor functions to deal with it
94 * @chip: low level interrupt hardware access 139 * @chip: low level interrupt hardware access
95 * @handler_data: per-IRQ data for the irq_chip methods 140 * @handler_data: per-IRQ data for the irq_chip methods
96 * @chip_data: platform-specific per-chip private data for the chip 141 * @chip_data: platform-specific per-chip private data for the chip
@@ -105,6 +150,7 @@ struct msi_desc;
105struct irq_data { 150struct irq_data {
106 unsigned int irq; 151 unsigned int irq;
107 unsigned int node; 152 unsigned int node;
153 unsigned int state_use_accessors;
108 struct irq_chip *chip; 154 struct irq_chip *chip;
109 void *handler_data; 155 void *handler_data;
110 void *chip_data; 156 void *chip_data;
@@ -114,6 +160,80 @@ struct irq_data {
114#endif 160#endif
115}; 161};
116 162
163/*
164 * Bit masks for irq_data.state
165 *
166 * IRQD_TRIGGER_MASK - Mask for the trigger type bits
167 * IRQD_SETAFFINITY_PENDING - Affinity setting is pending
168 * IRQD_NO_BALANCING - Balancing disabled for this IRQ
169 * IRQD_PER_CPU - Interrupt is per cpu
170 * IRQD_AFFINITY_SET - Interrupt affinity was set
171 * IRQD_LEVEL - Interrupt is level triggered
172 * IRQD_WAKEUP_STATE - Interrupt is configured for wakeup
173 * from suspend
174 * IRDQ_MOVE_PCNTXT - Interrupt can be moved in process
175 * context
176 */
177enum {
178 IRQD_TRIGGER_MASK = 0xf,
179 IRQD_SETAFFINITY_PENDING = (1 << 8),
180 IRQD_NO_BALANCING = (1 << 10),
181 IRQD_PER_CPU = (1 << 11),
182 IRQD_AFFINITY_SET = (1 << 12),
183 IRQD_LEVEL = (1 << 13),
184 IRQD_WAKEUP_STATE = (1 << 14),
185 IRQD_MOVE_PCNTXT = (1 << 15),
186};
187
188static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
189{
190 return d->state_use_accessors & IRQD_SETAFFINITY_PENDING;
191}
192
193static inline bool irqd_is_per_cpu(struct irq_data *d)
194{
195 return d->state_use_accessors & IRQD_PER_CPU;
196}
197
198static inline bool irqd_can_balance(struct irq_data *d)
199{
200 return !(d->state_use_accessors & (IRQD_PER_CPU | IRQD_NO_BALANCING));
201}
202
203static inline bool irqd_affinity_was_set(struct irq_data *d)
204{
205 return d->state_use_accessors & IRQD_AFFINITY_SET;
206}
207
208static inline u32 irqd_get_trigger_type(struct irq_data *d)
209{
210 return d->state_use_accessors & IRQD_TRIGGER_MASK;
211}
212
213/*
214 * Must only be called inside irq_chip.irq_set_type() functions.
215 */
216static inline void irqd_set_trigger_type(struct irq_data *d, u32 type)
217{
218 d->state_use_accessors &= ~IRQD_TRIGGER_MASK;
219 d->state_use_accessors |= type & IRQD_TRIGGER_MASK;
220}
221
222static inline bool irqd_is_level_type(struct irq_data *d)
223{
224 return d->state_use_accessors & IRQD_LEVEL;
225}
226
227static inline bool irqd_is_wakeup_set(struct irq_data *d)
228{
229 return d->state_use_accessors & IRQD_WAKEUP_STATE;
230}
231
232static inline bool irqd_can_move_in_process_context(struct irq_data *d)
233{
234 return d->state_use_accessors & IRQD_MOVE_PCNTXT;
235}
236
117/** 237/**
118 * struct irq_chip - hardware interrupt chip descriptor 238 * struct irq_chip - hardware interrupt chip descriptor
119 * 239 *
@@ -150,6 +270,7 @@ struct irq_data {
150 * @irq_set_wake: enable/disable power-management wake-on of an IRQ 270 * @irq_set_wake: enable/disable power-management wake-on of an IRQ
151 * @irq_bus_lock: function to lock access to slow bus (i2c) chips 271 * @irq_bus_lock: function to lock access to slow bus (i2c) chips
152 * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips 272 * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
273 * @flags: chip specific flags
153 * 274 *
154 * @release: release function solely used by UML 275 * @release: release function solely used by UML
155 */ 276 */
@@ -196,12 +317,27 @@ struct irq_chip {
196 void (*irq_bus_lock)(struct irq_data *data); 317 void (*irq_bus_lock)(struct irq_data *data);
197 void (*irq_bus_sync_unlock)(struct irq_data *data); 318 void (*irq_bus_sync_unlock)(struct irq_data *data);
198 319
320 unsigned long flags;
321
199 /* Currently used only by UML, might disappear one day.*/ 322 /* Currently used only by UML, might disappear one day.*/
200#ifdef CONFIG_IRQ_RELEASE_METHOD 323#ifdef CONFIG_IRQ_RELEASE_METHOD
201 void (*release)(unsigned int irq, void *dev_id); 324 void (*release)(unsigned int irq, void *dev_id);
202#endif 325#endif
203}; 326};
204 327
328/*
329 * irq_chip specific flags
330 *
331 * IRQCHIP_SET_TYPE_MASKED: Mask before calling chip.irq_set_type()
332 * IRQCHIP_EOI_IF_HANDLED: Only issue irq_eoi() when irq was handled
333 * IRQCHIP_MASK_ON_SUSPEND: Mask non wake irqs in the suspend path
334 */
335enum {
336 IRQCHIP_SET_TYPE_MASKED = (1 << 0),
337 IRQCHIP_EOI_IF_HANDLED = (1 << 1),
338 IRQCHIP_MASK_ON_SUSPEND = (1 << 2),
339};
340
205/* This include will go away once we isolated irq_desc usage to core code */ 341/* This include will go away once we isolated irq_desc usage to core code */
206#include <linux/irqdesc.h> 342#include <linux/irqdesc.h>
207 343
@@ -218,7 +354,7 @@ struct irq_chip {
218# define ARCH_IRQ_INIT_FLAGS 0 354# define ARCH_IRQ_INIT_FLAGS 0
219#endif 355#endif
220 356
221#define IRQ_DEFAULT_INIT_FLAGS (IRQ_DISABLED | ARCH_IRQ_INIT_FLAGS) 357#define IRQ_DEFAULT_INIT_FLAGS ARCH_IRQ_INIT_FLAGS
222 358
223struct irqaction; 359struct irqaction;
224extern int setup_irq(unsigned int irq, struct irqaction *new); 360extern int setup_irq(unsigned int irq, struct irqaction *new);
@@ -229,9 +365,13 @@ extern void remove_irq(unsigned int irq, struct irqaction *act);
229#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) 365#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
230void move_native_irq(int irq); 366void move_native_irq(int irq);
231void move_masked_irq(int irq); 367void move_masked_irq(int irq);
368void irq_move_irq(struct irq_data *data);
369void irq_move_masked_irq(struct irq_data *data);
232#else 370#else
233static inline void move_native_irq(int irq) { } 371static inline void move_native_irq(int irq) { }
234static inline void move_masked_irq(int irq) { } 372static inline void move_masked_irq(int irq) { }
373static inline void irq_move_irq(struct irq_data *data) { }
374static inline void irq_move_masked_irq(struct irq_data *data) { }
235#endif 375#endif
236 376
237extern int no_irq_affinity; 377extern int no_irq_affinity;
@@ -267,23 +407,23 @@ extern struct irq_chip no_irq_chip;
267extern struct irq_chip dummy_irq_chip; 407extern struct irq_chip dummy_irq_chip;
268 408
269extern void 409extern void
270set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip, 410irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
271 irq_flow_handler_t handle);
272extern void
273set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
274 irq_flow_handler_t handle, const char *name); 411 irq_flow_handler_t handle, const char *name);
275 412
413static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip,
414 irq_flow_handler_t handle)
415{
416 irq_set_chip_and_handler_name(irq, chip, handle, NULL);
417}
418
276extern void 419extern void
277__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, 420__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
278 const char *name); 421 const char *name);
279 422
280/*
281 * Set a highlevel flow handler for a given IRQ:
282 */
283static inline void 423static inline void
284set_irq_handler(unsigned int irq, irq_flow_handler_t handle) 424irq_set_handler(unsigned int irq, irq_flow_handler_t handle)
285{ 425{
286 __set_irq_handler(irq, handle, 0, NULL); 426 __irq_set_handler(irq, handle, 0, NULL);
287} 427}
288 428
289/* 429/*
@@ -292,14 +432,11 @@ set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
292 * IRQ_NOREQUEST and IRQ_NOPROBE) 432 * IRQ_NOREQUEST and IRQ_NOPROBE)
293 */ 433 */
294static inline void 434static inline void
295set_irq_chained_handler(unsigned int irq, 435irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle)
296 irq_flow_handler_t handle)
297{ 436{
298 __set_irq_handler(irq, handle, 1, NULL); 437 __irq_set_handler(irq, handle, 1, NULL);
299} 438}
300 439
301extern void set_irq_nested_thread(unsigned int irq, int nest);
302
303void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set); 440void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set);
304 441
305static inline void irq_set_status_flags(unsigned int irq, unsigned long set) 442static inline void irq_set_status_flags(unsigned int irq, unsigned long set)
@@ -312,16 +449,24 @@ static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr)
312 irq_modify_status(irq, clr, 0); 449 irq_modify_status(irq, clr, 0);
313} 450}
314 451
315static inline void set_irq_noprobe(unsigned int irq) 452static inline void irq_set_noprobe(unsigned int irq)
316{ 453{
317 irq_modify_status(irq, 0, IRQ_NOPROBE); 454 irq_modify_status(irq, 0, IRQ_NOPROBE);
318} 455}
319 456
320static inline void set_irq_probe(unsigned int irq) 457static inline void irq_set_probe(unsigned int irq)
321{ 458{
322 irq_modify_status(irq, IRQ_NOPROBE, 0); 459 irq_modify_status(irq, IRQ_NOPROBE, 0);
323} 460}
324 461
462static inline void irq_set_nested_thread(unsigned int irq, bool nest)
463{
464 if (nest)
465 irq_set_status_flags(irq, IRQ_NESTED_THREAD);
466 else
467 irq_clear_status_flags(irq, IRQ_NESTED_THREAD);
468}
469
325/* Handle dynamic irq creation and destruction */ 470/* Handle dynamic irq creation and destruction */
326extern unsigned int create_irq_nr(unsigned int irq_want, int node); 471extern unsigned int create_irq_nr(unsigned int irq_want, int node);
327extern int create_irq(void); 472extern int create_irq(void);
@@ -338,14 +483,14 @@ static inline void dynamic_irq_init(unsigned int irq)
338} 483}
339 484
340/* Set/get chip/data for an IRQ: */ 485/* Set/get chip/data for an IRQ: */
341extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); 486extern int irq_set_chip(unsigned int irq, struct irq_chip *chip);
342extern int set_irq_data(unsigned int irq, void *data); 487extern int irq_set_handler_data(unsigned int irq, void *data);
343extern int set_irq_chip_data(unsigned int irq, void *data); 488extern int irq_set_chip_data(unsigned int irq, void *data);
344extern int set_irq_type(unsigned int irq, unsigned int type); 489extern int irq_set_irq_type(unsigned int irq, unsigned int type);
345extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); 490extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
346extern struct irq_data *irq_get_irq_data(unsigned int irq); 491extern struct irq_data *irq_get_irq_data(unsigned int irq);
347 492
348static inline struct irq_chip *get_irq_chip(unsigned int irq) 493static inline struct irq_chip *irq_get_chip(unsigned int irq)
349{ 494{
350 struct irq_data *d = irq_get_irq_data(irq); 495 struct irq_data *d = irq_get_irq_data(irq);
351 return d ? d->chip : NULL; 496 return d ? d->chip : NULL;
@@ -356,7 +501,7 @@ static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d)
356 return d->chip; 501 return d->chip;
357} 502}
358 503
359static inline void *get_irq_chip_data(unsigned int irq) 504static inline void *irq_get_chip_data(unsigned int irq)
360{ 505{
361 struct irq_data *d = irq_get_irq_data(irq); 506 struct irq_data *d = irq_get_irq_data(irq);
362 return d ? d->chip_data : NULL; 507 return d ? d->chip_data : NULL;
@@ -367,18 +512,18 @@ static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
367 return d->chip_data; 512 return d->chip_data;
368} 513}
369 514
370static inline void *get_irq_data(unsigned int irq) 515static inline void *irq_get_handler_data(unsigned int irq)
371{ 516{
372 struct irq_data *d = irq_get_irq_data(irq); 517 struct irq_data *d = irq_get_irq_data(irq);
373 return d ? d->handler_data : NULL; 518 return d ? d->handler_data : NULL;
374} 519}
375 520
376static inline void *irq_data_get_irq_data(struct irq_data *d) 521static inline void *irq_data_get_irq_handler_data(struct irq_data *d)
377{ 522{
378 return d->handler_data; 523 return d->handler_data;
379} 524}
380 525
381static inline struct msi_desc *get_irq_msi(unsigned int irq) 526static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
382{ 527{
383 struct irq_data *d = irq_get_irq_data(irq); 528 struct irq_data *d = irq_get_irq_data(irq);
384 return d ? d->msi_desc : NULL; 529 return d ? d->msi_desc : NULL;
@@ -389,6 +534,89 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
389 return d->msi_desc; 534 return d->msi_desc;
390} 535}
391 536
537#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
538/* Please do not use: Use the replacement functions instead */
539static inline int set_irq_chip(unsigned int irq, struct irq_chip *chip)
540{
541 return irq_set_chip(irq, chip);
542}
543static inline int set_irq_data(unsigned int irq, void *data)
544{
545 return irq_set_handler_data(irq, data);
546}
547static inline int set_irq_chip_data(unsigned int irq, void *data)
548{
549 return irq_set_chip_data(irq, data);
550}
551static inline int set_irq_type(unsigned int irq, unsigned int type)
552{
553 return irq_set_irq_type(irq, type);
554}
555static inline int set_irq_msi(unsigned int irq, struct msi_desc *entry)
556{
557 return irq_set_msi_desc(irq, entry);
558}
559static inline struct irq_chip *get_irq_chip(unsigned int irq)
560{
561 return irq_get_chip(irq);
562}
563static inline void *get_irq_chip_data(unsigned int irq)
564{
565 return irq_get_chip_data(irq);
566}
567static inline void *get_irq_data(unsigned int irq)
568{
569 return irq_get_handler_data(irq);
570}
571static inline void *irq_data_get_irq_data(struct irq_data *d)
572{
573 return irq_data_get_irq_handler_data(d);
574}
575static inline struct msi_desc *get_irq_msi(unsigned int irq)
576{
577 return irq_get_msi_desc(irq);
578}
579static inline void set_irq_noprobe(unsigned int irq)
580{
581 irq_set_noprobe(irq);
582}
583static inline void set_irq_probe(unsigned int irq)
584{
585 irq_set_probe(irq);
586}
587static inline void set_irq_nested_thread(unsigned int irq, int nest)
588{
589 irq_set_nested_thread(irq, nest);
590}
591static inline void
592set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
593 irq_flow_handler_t handle, const char *name)
594{
595 irq_set_chip_and_handler_name(irq, chip, handle, name);
596}
597static inline void
598set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
599 irq_flow_handler_t handle)
600{
601 irq_set_chip_and_handler(irq, chip, handle);
602}
603static inline void
604__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
605 const char *name)
606{
607 __irq_set_handler(irq, handle, is_chained, name);
608}
609static inline void set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
610{
611 irq_set_handler(irq, handle);
612}
613static inline void
614set_irq_chained_handler(unsigned int irq, irq_flow_handler_t handle)
615{
616 irq_set_chained_handler(irq, handle);
617}
618#endif
619
392int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); 620int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
393void irq_free_descs(unsigned int irq, unsigned int cnt); 621void irq_free_descs(unsigned int irq, unsigned int cnt);
394int irq_reserve_irqs(unsigned int from, unsigned int cnt); 622int irq_reserve_irqs(unsigned int from, unsigned int cnt);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c1a95b7b58de..00218371518b 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -8,6 +8,7 @@
8 * For now it's included from <linux/irq.h> 8 * For now it's included from <linux/irq.h>
9 */ 9 */
10 10
11struct irq_affinity_notify;
11struct proc_dir_entry; 12struct proc_dir_entry;
12struct timer_rand_state; 13struct timer_rand_state;
13/** 14/**
@@ -18,13 +19,16 @@ struct timer_rand_state;
18 * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] 19 * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()]
19 * @action: the irq action chain 20 * @action: the irq action chain
20 * @status: status information 21 * @status: status information
22 * @core_internal_state__do_not_mess_with_it: core internal status information
21 * @depth: disable-depth, for nested irq_disable() calls 23 * @depth: disable-depth, for nested irq_disable() calls
22 * @wake_depth: enable depth, for multiple set_irq_wake() callers 24 * @wake_depth: enable depth, for multiple set_irq_wake() callers
23 * @irq_count: stats field to detect stalled irqs 25 * @irq_count: stats field to detect stalled irqs
24 * @last_unhandled: aging timer for unhandled count 26 * @last_unhandled: aging timer for unhandled count
25 * @irqs_unhandled: stats field for spurious unhandled interrupts 27 * @irqs_unhandled: stats field for spurious unhandled interrupts
26 * @lock: locking for SMP 28 * @lock: locking for SMP
29 * @affinity_notify: context for notification of affinity changes
27 * @pending_mask: pending rebalanced interrupts 30 * @pending_mask: pending rebalanced interrupts
31 * @threads_oneshot: bitfield to handle shared oneshot threads
28 * @threads_active: number of irqaction threads currently running 32 * @threads_active: number of irqaction threads currently running
29 * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers 33 * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers
30 * @dir: /proc/irq/ procfs entry 34 * @dir: /proc/irq/ procfs entry
@@ -45,6 +49,7 @@ struct irq_desc {
45 struct { 49 struct {
46 unsigned int irq; 50 unsigned int irq;
47 unsigned int node; 51 unsigned int node;
52 unsigned int pad_do_not_even_think_about_it;
48 struct irq_chip *chip; 53 struct irq_chip *chip;
49 void *handler_data; 54 void *handler_data;
50 void *chip_data; 55 void *chip_data;
@@ -59,9 +64,16 @@ struct irq_desc {
59 struct timer_rand_state *timer_rand_state; 64 struct timer_rand_state *timer_rand_state;
60 unsigned int __percpu *kstat_irqs; 65 unsigned int __percpu *kstat_irqs;
61 irq_flow_handler_t handle_irq; 66 irq_flow_handler_t handle_irq;
67#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
68 irq_preflow_handler_t preflow_handler;
69#endif
62 struct irqaction *action; /* IRQ action list */ 70 struct irqaction *action; /* IRQ action list */
71#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
72 unsigned int status_use_accessors;
73#else
63 unsigned int status; /* IRQ status */ 74 unsigned int status; /* IRQ status */
64 75#endif
76 unsigned int core_internal_state__do_not_mess_with_it;
65 unsigned int depth; /* nested irq disables */ 77 unsigned int depth; /* nested irq disables */
66 unsigned int wake_depth; /* nested wake enables */ 78 unsigned int wake_depth; /* nested wake enables */
67 unsigned int irq_count; /* For detecting broken IRQs */ 79 unsigned int irq_count; /* For detecting broken IRQs */
@@ -70,10 +82,12 @@ struct irq_desc {
70 raw_spinlock_t lock; 82 raw_spinlock_t lock;
71#ifdef CONFIG_SMP 83#ifdef CONFIG_SMP
72 const struct cpumask *affinity_hint; 84 const struct cpumask *affinity_hint;
85 struct irq_affinity_notify *affinity_notify;
73#ifdef CONFIG_GENERIC_PENDING_IRQ 86#ifdef CONFIG_GENERIC_PENDING_IRQ
74 cpumask_var_t pending_mask; 87 cpumask_var_t pending_mask;
75#endif 88#endif
76#endif 89#endif
90 unsigned long threads_oneshot;
77 atomic_t threads_active; 91 atomic_t threads_active;
78 wait_queue_head_t wait_for_threads; 92 wait_queue_head_t wait_for_threads;
79#ifdef CONFIG_PROC_FS 93#ifdef CONFIG_PROC_FS
@@ -95,10 +109,51 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
95 109
96#ifdef CONFIG_GENERIC_HARDIRQS 110#ifdef CONFIG_GENERIC_HARDIRQS
97 111
98#define get_irq_desc_chip(desc) ((desc)->irq_data.chip) 112static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
99#define get_irq_desc_chip_data(desc) ((desc)->irq_data.chip_data) 113{
100#define get_irq_desc_data(desc) ((desc)->irq_data.handler_data) 114 return &desc->irq_data;
101#define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc) 115}
116
117static inline struct irq_chip *irq_desc_get_chip(struct irq_desc *desc)
118{
119 return desc->irq_data.chip;
120}
121
122static inline void *irq_desc_get_chip_data(struct irq_desc *desc)
123{
124 return desc->irq_data.chip_data;
125}
126
127static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
128{
129 return desc->irq_data.handler_data;
130}
131
132static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
133{
134 return desc->irq_data.msi_desc;
135}
136
137#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
138static inline struct irq_chip *get_irq_desc_chip(struct irq_desc *desc)
139{
140 return irq_desc_get_chip(desc);
141}
142static inline void *get_irq_desc_data(struct irq_desc *desc)
143{
144 return irq_desc_get_handler_data(desc);
145}
146
147static inline void *get_irq_desc_chip_data(struct irq_desc *desc)
148{
149 return irq_desc_get_chip_data(desc);
150}
151
152static inline struct msi_desc *get_irq_desc_msi(struct irq_desc *desc)
153{
154 return irq_desc_get_msi_desc(desc);
155}
156#endif
102 157
103/* 158/*
104 * Architectures call this to let the generic IRQ layer 159 * Architectures call this to let the generic IRQ layer
@@ -123,6 +178,7 @@ static inline int irq_has_action(unsigned int irq)
123 return desc->action != NULL; 178 return desc->action != NULL;
124} 179}
125 180
181#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
126static inline int irq_balancing_disabled(unsigned int irq) 182static inline int irq_balancing_disabled(unsigned int irq)
127{ 183{
128 struct irq_desc *desc; 184 struct irq_desc *desc;
@@ -130,6 +186,7 @@ static inline int irq_balancing_disabled(unsigned int irq)
130 desc = irq_to_desc(irq); 186 desc = irq_to_desc(irq);
131 return desc->status & IRQ_NO_BALANCING_MASK; 187 return desc->status & IRQ_NO_BALANCING_MASK;
132} 188}
189#endif
133 190
134/* caller has locked the irq_desc and both params are valid */ 191/* caller has locked the irq_desc and both params are valid */
135static inline void __set_irq_handler_unlocked(int irq, 192static inline void __set_irq_handler_unlocked(int irq,
@@ -140,6 +197,17 @@ static inline void __set_irq_handler_unlocked(int irq,
140 desc = irq_to_desc(irq); 197 desc = irq_to_desc(irq);
141 desc->handle_irq = handler; 198 desc->handle_irq = handler;
142} 199}
200
201#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
202static inline void
203__irq_set_preflow_handler(unsigned int irq, irq_preflow_handler_t handler)
204{
205 struct irq_desc *desc;
206
207 desc = irq_to_desc(irq);
208 desc->preflow_handler = handler;
209}
210#endif
143#endif 211#endif
144 212
145#endif 213#endif
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 6811f4bfc6e7..922aa313c9f9 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
307extern unsigned long clock_t_to_jiffies(unsigned long x); 307extern unsigned long clock_t_to_jiffies(unsigned long x);
308extern u64 jiffies_64_to_clock_t(u64 x); 308extern u64 jiffies_64_to_clock_t(u64 x);
309extern u64 nsec_to_clock_t(u64 x); 309extern u64 nsec_to_clock_t(u64 x);
310extern u64 nsecs_to_jiffies64(u64 n);
310extern unsigned long nsecs_to_jiffies(u64 n); 311extern unsigned long nsecs_to_jiffies(u64 n);
311 312
312#define TIMESTAMP_SIZE 30 313#define TIMESTAMP_SIZE 30
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 65833d4d5998..9efd081bb31e 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -41,6 +41,9 @@ struct key_type {
41 */ 41 */
42 size_t def_datalen; 42 size_t def_datalen;
43 43
44 /* vet a description */
45 int (*vet_description)(const char *description);
46
44 /* instantiate a key of this type 47 /* instantiate a key of this type
45 * - this method should call key_payload_reserve() to determine if the 48 * - this method should call key_payload_reserve() to determine if the
46 * user's quota will hold the payload 49 * user's quota will hold the payload
@@ -102,11 +105,20 @@ extern int key_instantiate_and_link(struct key *key,
102 size_t datalen, 105 size_t datalen,
103 struct key *keyring, 106 struct key *keyring,
104 struct key *instkey); 107 struct key *instkey);
105extern int key_negate_and_link(struct key *key, 108extern int key_reject_and_link(struct key *key,
106 unsigned timeout, 109 unsigned timeout,
110 unsigned error,
107 struct key *keyring, 111 struct key *keyring,
108 struct key *instkey); 112 struct key *instkey);
109extern void complete_request_key(struct key_construction *cons, int error); 113extern void complete_request_key(struct key_construction *cons, int error);
110 114
115static inline int key_negate_and_link(struct key *key,
116 unsigned timeout,
117 struct key *keyring,
118 struct key *instkey)
119{
120 return key_reject_and_link(key, timeout, ENOKEY, keyring, instkey);
121}
122
111#endif /* CONFIG_KEYS */ 123#endif /* CONFIG_KEYS */
112#endif /* _LINUX_KEY_TYPE_H */ 124#endif /* _LINUX_KEY_TYPE_H */
diff --git a/include/linux/key.h b/include/linux/key.h
index 3db0adce1fda..b2bb01719561 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -170,6 +170,7 @@ struct key {
170 struct list_head link; 170 struct list_head link;
171 unsigned long x[2]; 171 unsigned long x[2];
172 void *p[2]; 172 void *p[2];
173 int reject_error;
173 } type_data; 174 } type_data;
174 175
175 /* key data 176 /* key data
@@ -275,6 +276,10 @@ static inline key_serial_t key_serial(struct key *key)
275 return key ? key->serial : 0; 276 return key ? key->serial : 0;
276} 277}
277 278
279#define rcu_dereference_key(KEY) \
280 (rcu_dereference_protected((KEY)->payload.rcudata, \
281 rwsem_is_locked(&((struct key *)(KEY))->sem)))
282
278#ifdef CONFIG_SYSCTL 283#ifdef CONFIG_SYSCTL
279extern ctl_table key_sysctls[]; 284extern ctl_table key_sysctls[];
280#endif 285#endif
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
index bd383f1944fb..9b0b865ce622 100644
--- a/include/linux/keyctl.h
+++ b/include/linux/keyctl.h
@@ -53,5 +53,7 @@
53#define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */ 53#define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */
54#define KEYCTL_GET_SECURITY 17 /* get key security label */ 54#define KEYCTL_GET_SECURITY 17 /* get key security label */
55#define KEYCTL_SESSION_TO_PARENT 18 /* apply session keyring to parent process */ 55#define KEYCTL_SESSION_TO_PARENT 18 /* apply session keyring to parent process */
56#define KEYCTL_REJECT 19 /* reject a partially constructed key */
57#define KEYCTL_INSTANTIATE_IOV 20 /* instantiate a partially constructed key */
56 58
57#endif /* _LINUX_KEYCTL_H */ 59#endif /* _LINUX_KEYCTL_H */
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index ce0775aa64c3..7ff16f7d3ed4 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -64,7 +64,7 @@ struct kthread_work {
64}; 64};
65 65
66#define KTHREAD_WORKER_INIT(worker) { \ 66#define KTHREAD_WORKER_INIT(worker) { \
67 .lock = SPIN_LOCK_UNLOCKED, \ 67 .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \
68 .work_list = LIST_HEAD_INIT((worker).work_list), \ 68 .work_list = LIST_HEAD_INIT((worker).work_list), \
69 } 69 }
70 70
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c9c5d7ad1a2b..c71f46960f39 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -179,10 +179,6 @@ enum {
179 ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ 179 ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */
180 /* (doesn't imply presence) */ 180 /* (doesn't imply presence) */
181 ATA_FLAG_SATA = (1 << 1), 181 ATA_FLAG_SATA = (1 << 1),
182 ATA_FLAG_NO_LEGACY = (1 << 2), /* no legacy mode check */
183 ATA_FLAG_MMIO = (1 << 3), /* use MMIO, not PIO */
184 ATA_FLAG_SRST = (1 << 4), /* (obsolete) use ATA SRST, not E.D.D. */
185 ATA_FLAG_SATA_RESET = (1 << 5), /* (obsolete) use COMRESET */
186 ATA_FLAG_NO_ATAPI = (1 << 6), /* No ATAPI support */ 182 ATA_FLAG_NO_ATAPI = (1 << 6), /* No ATAPI support */
187 ATA_FLAG_PIO_DMA = (1 << 7), /* PIO cmds via DMA */ 183 ATA_FLAG_PIO_DMA = (1 << 7), /* PIO cmds via DMA */
188 ATA_FLAG_PIO_LBA48 = (1 << 8), /* Host DMA engine is LBA28 only */ 184 ATA_FLAG_PIO_LBA48 = (1 << 8), /* Host DMA engine is LBA28 only */
@@ -198,7 +194,6 @@ enum {
198 ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */ 194 ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */
199 ATA_FLAG_AN = (1 << 18), /* controller supports AN */ 195 ATA_FLAG_AN = (1 << 18), /* controller supports AN */
200 ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */ 196 ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */
201 ATA_FLAG_LPM = (1 << 20), /* driver can handle LPM */
202 ATA_FLAG_EM = (1 << 21), /* driver supports enclosure 197 ATA_FLAG_EM = (1 << 21), /* driver supports enclosure
203 * management */ 198 * management */
204 ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity 199 ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity
@@ -1050,6 +1045,8 @@ extern int ata_scsi_change_queue_depth(struct scsi_device *sdev,
1050 int queue_depth, int reason); 1045 int queue_depth, int reason);
1051extern struct ata_device *ata_dev_pair(struct ata_device *adev); 1046extern struct ata_device *ata_dev_pair(struct ata_device *adev);
1052extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev); 1047extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev);
1048extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap);
1049extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q);
1053 1050
1054extern int ata_cable_40wire(struct ata_port *ap); 1051extern int ata_cable_40wire(struct ata_port *ap);
1055extern int ata_cable_80wire(struct ata_port *ap); 1052extern int ata_cable_80wire(struct ata_port *ap);
@@ -1613,6 +1610,9 @@ extern void ata_sff_irq_on(struct ata_port *ap);
1613extern void ata_sff_irq_clear(struct ata_port *ap); 1610extern void ata_sff_irq_clear(struct ata_port *ap);
1614extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, 1611extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
1615 u8 status, int in_wq); 1612 u8 status, int in_wq);
1613extern void ata_sff_queue_work(struct work_struct *work);
1614extern void ata_sff_queue_delayed_work(struct delayed_work *dwork,
1615 unsigned long delay);
1616extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay); 1616extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay);
1617extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc); 1617extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc);
1618extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); 1618extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc);
diff --git a/include/linux/list.h b/include/linux/list.h
index 9a5f8a71810c..3a54266a1e85 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -96,6 +96,11 @@ static inline void __list_del(struct list_head * prev, struct list_head * next)
96 * in an undefined state. 96 * in an undefined state.
97 */ 97 */
98#ifndef CONFIG_DEBUG_LIST 98#ifndef CONFIG_DEBUG_LIST
99static inline void __list_del_entry(struct list_head *entry)
100{
101 __list_del(entry->prev, entry->next);
102}
103
99static inline void list_del(struct list_head *entry) 104static inline void list_del(struct list_head *entry)
100{ 105{
101 __list_del(entry->prev, entry->next); 106 __list_del(entry->prev, entry->next);
@@ -103,6 +108,7 @@ static inline void list_del(struct list_head *entry)
103 entry->prev = LIST_POISON2; 108 entry->prev = LIST_POISON2;
104} 109}
105#else 110#else
111extern void __list_del_entry(struct list_head *entry);
106extern void list_del(struct list_head *entry); 112extern void list_del(struct list_head *entry);
107#endif 113#endif
108 114
@@ -135,7 +141,7 @@ static inline void list_replace_init(struct list_head *old,
135 */ 141 */
136static inline void list_del_init(struct list_head *entry) 142static inline void list_del_init(struct list_head *entry)
137{ 143{
138 __list_del(entry->prev, entry->next); 144 __list_del_entry(entry);
139 INIT_LIST_HEAD(entry); 145 INIT_LIST_HEAD(entry);
140} 146}
141 147
@@ -146,7 +152,7 @@ static inline void list_del_init(struct list_head *entry)
146 */ 152 */
147static inline void list_move(struct list_head *list, struct list_head *head) 153static inline void list_move(struct list_head *list, struct list_head *head)
148{ 154{
149 __list_del(list->prev, list->next); 155 __list_del_entry(list);
150 list_add(list, head); 156 list_add(list, head);
151} 157}
152 158
@@ -158,7 +164,7 @@ static inline void list_move(struct list_head *list, struct list_head *head)
158static inline void list_move_tail(struct list_head *list, 164static inline void list_move_tail(struct list_head *list,
159 struct list_head *head) 165 struct list_head *head)
160{ 166{
161 __list_del(list->prev, list->next); 167 __list_del_entry(list);
162 list_add_tail(list, head); 168 list_add_tail(list, head);
163} 169}
164 170
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index 3fd36845ca45..ef4f0b6083a3 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -71,6 +71,7 @@ struct wm8994 {
71 u16 irq_masks_cache[WM8994_NUM_IRQ_REGS]; 71 u16 irq_masks_cache[WM8994_NUM_IRQ_REGS];
72 72
73 /* Used over suspend/resume */ 73 /* Used over suspend/resume */
74 bool suspended;
74 u16 ldo_regs[WM8994_NUM_LDO_REGS]; 75 u16 ldo_regs[WM8994_NUM_LDO_REGS];
75 u16 gpio_regs[WM8994_NUM_GPIO_REGS]; 76 u16 gpio_regs[WM8994_NUM_GPIO_REGS];
76 77
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6385fc17ad4..679300c050f5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1309,8 +1309,6 @@ int add_from_early_node_map(struct range *range, int az,
1309 int nr_range, int nid); 1309 int nr_range, int nid);
1310u64 __init find_memory_core_early(int nid, u64 size, u64 align, 1310u64 __init find_memory_core_early(int nid, u64 size, u64 align,
1311 u64 goal, u64 limit); 1311 u64 goal, u64 limit);
1312void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
1313 u64 goal, u64 limit);
1314typedef int (*work_fn_t)(unsigned long, unsigned long, void *); 1312typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
1315extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); 1313extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
1316extern void sparse_memory_present_with_active_regions(int nid); 1314extern void sparse_memory_present_with_active_regions(int nid);
diff --git a/include/linux/module.h b/include/linux/module.h
index 9bdf27c7615b..5de42043dff0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -62,7 +62,7 @@ struct module_version_attribute {
62 struct module_attribute mattr; 62 struct module_attribute mattr;
63 const char *module_name; 63 const char *module_name;
64 const char *version; 64 const char *version;
65}; 65} __attribute__ ((__aligned__(sizeof(void *))));
66 66
67struct module_kobject 67struct module_kobject
68{ 68{
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f276d4fa01fc..9c8603872c36 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -19,7 +19,6 @@ struct nameidata {
19 struct path path; 19 struct path path;
20 struct qstr last; 20 struct qstr last;
21 struct path root; 21 struct path root;
22 struct file *file;
23 struct inode *inode; /* path.dentry.d_inode */ 22 struct inode *inode; /* path.dentry.d_inode */
24 unsigned int flags; 23 unsigned int flags;
25 unsigned seq; 24 unsigned seq;
@@ -63,6 +62,10 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
63#define LOOKUP_EXCL 0x0400 62#define LOOKUP_EXCL 0x0400
64#define LOOKUP_RENAME_TARGET 0x0800 63#define LOOKUP_RENAME_TARGET 0x0800
65 64
65#define LOOKUP_JUMPED 0x1000
66#define LOOKUP_ROOT 0x2000
67#define LOOKUP_EMPTY 0x4000
68
66extern int user_path_at(int, const char __user *, unsigned, struct path *); 69extern int user_path_at(int, const char __user *, unsigned, struct path *);
67 70
68#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) 71#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
@@ -72,7 +75,7 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *);
72 75
73extern int kern_path(const char *, unsigned, struct path *); 76extern int kern_path(const char *, unsigned, struct path *);
74 77
75extern int path_lookup(const char *, unsigned, struct nameidata *); 78extern int kern_path_parent(const char *, struct nameidata *);
76extern int vfs_path_lookup(struct dentry *, struct vfsmount *, 79extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
77 const char *, unsigned int, struct nameidata *); 80 const char *, unsigned int, struct nameidata *);
78 81
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d971346b0340..71caf7a5e6c6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2392,6 +2392,9 @@ extern int netdev_notice(const struct net_device *dev, const char *format, ...)
2392extern int netdev_info(const struct net_device *dev, const char *format, ...) 2392extern int netdev_info(const struct net_device *dev, const char *format, ...)
2393 __attribute__ ((format (printf, 2, 3))); 2393 __attribute__ ((format (printf, 2, 3)));
2394 2394
2395#define MODULE_ALIAS_NETDEV(device) \
2396 MODULE_ALIAS("netdev-" device)
2397
2395#if defined(DEBUG) 2398#if defined(DEBUG)
2396#define netdev_dbg(__dev, format, args...) \ 2399#define netdev_dbg(__dev, format, args...) \
2397 netdev_printk(KERN_DEBUG, __dev, format, ##args) 2400 netdev_printk(KERN_DEBUG, __dev, format, ##args)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b197563913bf..3e112de12d8d 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -68,11 +68,7 @@ struct nfs_client {
68 unsigned char cl_id_uniquifier; 68 unsigned char cl_id_uniquifier;
69 u32 cl_cb_ident; /* v4.0 callback identifier */ 69 u32 cl_cb_ident; /* v4.0 callback identifier */
70 const struct nfs4_minor_version_ops *cl_mvops; 70 const struct nfs4_minor_version_ops *cl_mvops;
71#endif /* CONFIG_NFS_V4 */
72 71
73#ifdef CONFIG_NFS_V4_1
74 /* clientid returned from EXCHANGE_ID, used by session operations */
75 u64 cl_ex_clid;
76 /* The sequence id to use for the next CREATE_SESSION */ 72 /* The sequence id to use for the next CREATE_SESSION */
77 u32 cl_seqid; 73 u32 cl_seqid;
78 /* The flags used for obtaining the clientid during EXCHANGE_ID */ 74 /* The flags used for obtaining the clientid during EXCHANGE_ID */
@@ -80,7 +76,7 @@ struct nfs_client {
80 struct nfs4_session *cl_session; /* sharred session */ 76 struct nfs4_session *cl_session; /* sharred session */
81 struct list_head cl_layouts; 77 struct list_head cl_layouts;
82 struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */ 78 struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
83#endif /* CONFIG_NFS_V4_1 */ 79#endif /* CONFIG_NFS_V4 */
84 80
85#ifdef CONFIG_NFS_FSCACHE 81#ifdef CONFIG_NFS_FSCACHE
86 struct fscache_cookie *fscache; /* client index cache cookie */ 82 struct fscache_cookie *fscache; /* client index cache cookie */
@@ -185,7 +181,7 @@ struct nfs_server {
185/* maximum number of slots to use */ 181/* maximum number of slots to use */
186#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE 182#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE
187 183
188#if defined(CONFIG_NFS_V4_1) 184#if defined(CONFIG_NFS_V4)
189 185
190/* Sessions */ 186/* Sessions */
191#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long))) 187#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long)))
@@ -225,5 +221,5 @@ struct nfs4_session {
225 struct nfs_client *clp; 221 struct nfs_client *clp;
226}; 222};
227 223
228#endif /* CONFIG_NFS_V4_1 */ 224#endif /* CONFIG_NFS_V4 */
229#endif 225#endif
diff --git a/include/linux/of.h b/include/linux/of.h
index cad7cf0ab278..266db1d0baa9 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -23,8 +23,6 @@
23 23
24#include <asm/byteorder.h> 24#include <asm/byteorder.h>
25 25
26#ifdef CONFIG_OF
27
28typedef u32 phandle; 26typedef u32 phandle;
29typedef u32 ihandle; 27typedef u32 ihandle;
30 28
@@ -65,11 +63,18 @@ struct device_node {
65#endif 63#endif
66}; 64};
67 65
66#ifdef CONFIG_OF
67
68/* Pointer for first entry in chain of all nodes. */ 68/* Pointer for first entry in chain of all nodes. */
69extern struct device_node *allnodes; 69extern struct device_node *allnodes;
70extern struct device_node *of_chosen; 70extern struct device_node *of_chosen;
71extern rwlock_t devtree_lock; 71extern rwlock_t devtree_lock;
72 72
73static inline bool of_have_populated_dt(void)
74{
75 return allnodes != NULL;
76}
77
73static inline bool of_node_is_root(const struct device_node *node) 78static inline bool of_node_is_root(const struct device_node *node)
74{ 79{
75 return node && (node->parent == NULL); 80 return node && (node->parent == NULL);
@@ -222,5 +227,12 @@ extern void of_attach_node(struct device_node *);
222extern void of_detach_node(struct device_node *); 227extern void of_detach_node(struct device_node *);
223#endif 228#endif
224 229
230#else
231
232static inline bool of_have_populated_dt(void)
233{
234 return false;
235}
236
225#endif /* CONFIG_OF */ 237#endif /* CONFIG_OF */
226#endif /* _LINUX_OF_H */ 238#endif /* _LINUX_OF_H */
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
new file mode 100644
index 000000000000..85a27b650d76
--- /dev/null
+++ b/include/linux/of_pci.h
@@ -0,0 +1,9 @@
1#ifndef __OF_PCI_H
2#define __OF_PCI_H
3
4#include <linux/pci.h>
5
6struct pci_dev;
7struct of_irq;
8int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
9#endif
diff --git a/include/linux/pata_arasan_cf_data.h b/include/linux/pata_arasan_cf_data.h
new file mode 100644
index 000000000000..a6ee9aa898bb
--- /dev/null
+++ b/include/linux/pata_arasan_cf_data.h
@@ -0,0 +1,49 @@
1/*
2 * include/linux/pata_arasan_cf_data.h
3 *
4 * Arasan Compact Flash host controller platform data header file
5 *
6 * Copyright (C) 2011 ST Microelectronics
7 * Viresh Kumar <viresh.kumar@st.com>
8 *
9 * This file is licensed under the terms of the GNU General Public
10 * License version 2. This program is licensed "as is" without any
11 * warranty of any kind, whether express or implied.
12 */
13
14#ifndef _PATA_ARASAN_CF_DATA_H
15#define _PATA_ARASAN_CF_DATA_H
16
17#include <linux/platform_device.h>
18
19struct arasan_cf_pdata {
20 u8 cf_if_clk;
21 #define CF_IF_CLK_100M (0x0)
22 #define CF_IF_CLK_75M (0x1)
23 #define CF_IF_CLK_66M (0x2)
24 #define CF_IF_CLK_50M (0x3)
25 #define CF_IF_CLK_40M (0x4)
26 #define CF_IF_CLK_33M (0x5)
27 #define CF_IF_CLK_25M (0x6)
28 #define CF_IF_CLK_125M (0x7)
29 #define CF_IF_CLK_150M (0x8)
30 #define CF_IF_CLK_166M (0x9)
31 #define CF_IF_CLK_200M (0xA)
32 /*
33 * Platform specific incapabilities of CF controller is handled via
34 * quirks
35 */
36 u32 quirk;
37 #define CF_BROKEN_PIO (1)
38 #define CF_BROKEN_MWDMA (1 << 1)
39 #define CF_BROKEN_UDMA (1 << 2)
40 /* This is platform specific data for the DMA controller */
41 void *dma_priv;
42};
43
44static inline void
45set_arasan_cf_pdata(struct platform_device *pdev, struct arasan_cf_pdata *data)
46{
47 pdev->dev.platform_data = data;
48}
49#endif /* _PATA_ARASAN_CF_DATA_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3adb06ebf841..580de67f318b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -518,6 +518,7 @@
518#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 518#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
519#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 519#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
520#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603 520#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603
521#define PCI_DEVICE_ID_AMD_15H_NB_LINK 0x1604
521#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 522#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
522#define PCI_DEVICE_ID_AMD_LANCE 0x2000 523#define PCI_DEVICE_ID_AMD_LANCE 0x2000
523#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 524#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 27c3c6fcfad3..3a5c4449fd36 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -255,6 +255,30 @@ extern void __bad_size_call_parameter(void);
255 pscr2_ret__; \ 255 pscr2_ret__; \
256}) 256})
257 257
258/*
259 * Special handling for cmpxchg_double. cmpxchg_double is passed two
260 * percpu variables. The first has to be aligned to a double word
261 * boundary and the second has to follow directly thereafter.
262 */
263#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \
264({ \
265 bool pdcrb_ret__; \
266 __verify_pcpu_ptr(&pcp1); \
267 BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \
268 VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1))); \
269 VM_BUG_ON((unsigned long)(&pcp2) != \
270 (unsigned long)(&pcp1) + sizeof(pcp1)); \
271 switch(sizeof(pcp1)) { \
272 case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \
273 case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \
274 case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \
275 case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \
276 default: \
277 __bad_size_call_parameter(); break; \
278 } \
279 pdcrb_ret__; \
280})
281
258#define __pcpu_size_call(stem, variable, ...) \ 282#define __pcpu_size_call(stem, variable, ...) \
259do { \ 283do { \
260 __verify_pcpu_ptr(&(variable)); \ 284 __verify_pcpu_ptr(&(variable)); \
@@ -501,6 +525,45 @@ do { \
501#endif 525#endif
502 526
503/* 527/*
528 * cmpxchg_double replaces two adjacent scalars at once. The first
529 * two parameters are per cpu variables which have to be of the same
530 * size. A truth value is returned to indicate success or failure
531 * (since a double register result is difficult to handle). There is
532 * very limited hardware support for these operations, so only certain
533 * sizes may work.
534 */
535#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
536({ \
537 int ret__; \
538 preempt_disable(); \
539 ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \
540 oval1, oval2, nval1, nval2); \
541 preempt_enable(); \
542 ret__; \
543})
544
545#ifndef this_cpu_cmpxchg_double
546# ifndef this_cpu_cmpxchg_double_1
547# define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
548 _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
549# endif
550# ifndef this_cpu_cmpxchg_double_2
551# define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
552 _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
553# endif
554# ifndef this_cpu_cmpxchg_double_4
555# define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
556 _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
557# endif
558# ifndef this_cpu_cmpxchg_double_8
559# define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
560 _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
561# endif
562# define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
563 __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
564#endif
565
566/*
504 * Generic percpu operations that do not require preemption handling. 567 * Generic percpu operations that do not require preemption handling.
505 * Either we do not care about races or the caller has the 568 * Either we do not care about races or the caller has the
506 * responsibility of handling preemptions issues. Arch code can still 569 * responsibility of handling preemptions issues. Arch code can still
@@ -703,6 +766,39 @@ do { \
703 __pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval) 766 __pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval)
704#endif 767#endif
705 768
769#define __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
770({ \
771 int __ret = 0; \
772 if (__this_cpu_read(pcp1) == (oval1) && \
773 __this_cpu_read(pcp2) == (oval2)) { \
774 __this_cpu_write(pcp1, (nval1)); \
775 __this_cpu_write(pcp2, (nval2)); \
776 __ret = 1; \
777 } \
778 (__ret); \
779})
780
781#ifndef __this_cpu_cmpxchg_double
782# ifndef __this_cpu_cmpxchg_double_1
783# define __this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
784 __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
785# endif
786# ifndef __this_cpu_cmpxchg_double_2
787# define __this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
788 __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
789# endif
790# ifndef __this_cpu_cmpxchg_double_4
791# define __this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
792 __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
793# endif
794# ifndef __this_cpu_cmpxchg_double_8
795# define __this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
796 __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
797# endif
798# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
799 __pcpu_double_call_return_bool(__this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
800#endif
801
706/* 802/*
707 * IRQ safe versions of the per cpu RMW operations. Note that these operations 803 * IRQ safe versions of the per cpu RMW operations. Note that these operations
708 * are *not* safe against modification of the same variable from another 804 * are *not* safe against modification of the same variable from another
@@ -823,4 +919,36 @@ do { \
823 __pcpu_size_call_return2(irqsafe_cpu_cmpxchg_, (pcp), oval, nval) 919 __pcpu_size_call_return2(irqsafe_cpu_cmpxchg_, (pcp), oval, nval)
824#endif 920#endif
825 921
922#define irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
923({ \
924 int ret__; \
925 unsigned long flags; \
926 local_irq_save(flags); \
927 ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \
928 oval1, oval2, nval1, nval2); \
929 local_irq_restore(flags); \
930 ret__; \
931})
932
933#ifndef irqsafe_cpu_cmpxchg_double
934# ifndef irqsafe_cpu_cmpxchg_double_1
935# define irqsafe_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
936 irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
937# endif
938# ifndef irqsafe_cpu_cmpxchg_double_2
939# define irqsafe_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
940 irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
941# endif
942# ifndef irqsafe_cpu_cmpxchg_double_4
943# define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
944 irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
945# endif
946# ifndef irqsafe_cpu_cmpxchg_double_8
947# define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
948 irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
949# endif
950# define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
951 __pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
952#endif
953
826#endif /* __LINUX_PERCPU_H */ 954#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index dda5b0a3ff60..614615b8d42b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -225,8 +225,14 @@ struct perf_event_attr {
225 }; 225 };
226 226
227 __u32 bp_type; 227 __u32 bp_type;
228 __u64 bp_addr; 228 union {
229 __u64 bp_len; 229 __u64 bp_addr;
230 __u64 config1; /* extension of config */
231 };
232 union {
233 __u64 bp_len;
234 __u64 config2; /* extension of config1 */
235 };
230}; 236};
231 237
232/* 238/*
@@ -464,6 +470,7 @@ enum perf_callchain_context {
464 470
465#define PERF_FLAG_FD_NO_GROUP (1U << 0) 471#define PERF_FLAG_FD_NO_GROUP (1U << 0)
466#define PERF_FLAG_FD_OUTPUT (1U << 1) 472#define PERF_FLAG_FD_OUTPUT (1U << 1)
473#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
467 474
468#ifdef __KERNEL__ 475#ifdef __KERNEL__
469/* 476/*
@@ -471,6 +478,7 @@ enum perf_callchain_context {
471 */ 478 */
472 479
473#ifdef CONFIG_PERF_EVENTS 480#ifdef CONFIG_PERF_EVENTS
481# include <linux/cgroup.h>
474# include <asm/perf_event.h> 482# include <asm/perf_event.h>
475# include <asm/local64.h> 483# include <asm/local64.h>
476#endif 484#endif
@@ -539,6 +547,9 @@ struct hw_perf_event {
539 unsigned long event_base; 547 unsigned long event_base;
540 int idx; 548 int idx;
541 int last_cpu; 549 int last_cpu;
550 unsigned int extra_reg;
551 u64 extra_config;
552 int extra_alloc;
542 }; 553 };
543 struct { /* software */ 554 struct { /* software */
544 struct hrtimer hrtimer; 555 struct hrtimer hrtimer;
@@ -716,6 +727,22 @@ struct swevent_hlist {
716#define PERF_ATTACH_GROUP 0x02 727#define PERF_ATTACH_GROUP 0x02
717#define PERF_ATTACH_TASK 0x04 728#define PERF_ATTACH_TASK 0x04
718 729
730#ifdef CONFIG_CGROUP_PERF
731/*
732 * perf_cgroup_info keeps track of time_enabled for a cgroup.
733 * This is a per-cpu dynamically allocated data structure.
734 */
735struct perf_cgroup_info {
736 u64 time;
737 u64 timestamp;
738};
739
740struct perf_cgroup {
741 struct cgroup_subsys_state css;
742 struct perf_cgroup_info *info; /* timing info, one per cpu */
743};
744#endif
745
719/** 746/**
720 * struct perf_event - performance event kernel representation: 747 * struct perf_event - performance event kernel representation:
721 */ 748 */
@@ -832,6 +859,11 @@ struct perf_event {
832 struct event_filter *filter; 859 struct event_filter *filter;
833#endif 860#endif
834 861
862#ifdef CONFIG_CGROUP_PERF
863 struct perf_cgroup *cgrp; /* cgroup event is attach to */
864 int cgrp_defer_enabled;
865#endif
866
835#endif /* CONFIG_PERF_EVENTS */ 867#endif /* CONFIG_PERF_EVENTS */
836}; 868};
837 869
@@ -886,6 +918,7 @@ struct perf_event_context {
886 u64 generation; 918 u64 generation;
887 int pin_count; 919 int pin_count;
888 struct rcu_head rcu_head; 920 struct rcu_head rcu_head;
921 int nr_cgroups; /* cgroup events present */
889}; 922};
890 923
891/* 924/*
@@ -905,6 +938,9 @@ struct perf_cpu_context {
905 struct list_head rotation_list; 938 struct list_head rotation_list;
906 int jiffies_interval; 939 int jiffies_interval;
907 struct pmu *active_pmu; 940 struct pmu *active_pmu;
941#ifdef CONFIG_CGROUP_PERF
942 struct perf_cgroup *cgrp;
943#endif
908}; 944};
909 945
910struct perf_output_handle { 946struct perf_output_handle {
@@ -1040,11 +1076,11 @@ have_event:
1040 __perf_sw_event(event_id, nr, nmi, regs, addr); 1076 __perf_sw_event(event_id, nr, nmi, regs, addr);
1041} 1077}
1042 1078
1043extern atomic_t perf_task_events; 1079extern atomic_t perf_sched_events;
1044 1080
1045static inline void perf_event_task_sched_in(struct task_struct *task) 1081static inline void perf_event_task_sched_in(struct task_struct *task)
1046{ 1082{
1047 COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); 1083 COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task));
1048} 1084}
1049 1085
1050static inline 1086static inline
@@ -1052,7 +1088,7 @@ void perf_event_task_sched_out(struct task_struct *task, struct task_struct *nex
1052{ 1088{
1053 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); 1089 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1054 1090
1055 COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); 1091 COND_STMT(&perf_sched_events, __perf_event_task_sched_out(task, next));
1056} 1092}
1057 1093
1058extern void perf_event_mmap(struct vm_area_struct *vma); 1094extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -1083,6 +1119,10 @@ extern int sysctl_perf_event_paranoid;
1083extern int sysctl_perf_event_mlock; 1119extern int sysctl_perf_event_mlock;
1084extern int sysctl_perf_event_sample_rate; 1120extern int sysctl_perf_event_sample_rate;
1085 1121
1122extern int perf_proc_update_handler(struct ctl_table *table, int write,
1123 void __user *buffer, size_t *lenp,
1124 loff_t *ppos);
1125
1086static inline bool perf_paranoid_tracepoint_raw(void) 1126static inline bool perf_paranoid_tracepoint_raw(void)
1087{ 1127{
1088 return sysctl_perf_event_paranoid > -1; 1128 return sysctl_perf_event_paranoid > -1;
diff --git a/include/linux/plist.h b/include/linux/plist.h
index 7254eda078e5..c9b9f322c8d8 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -31,15 +31,17 @@
31 * 31 *
32 * Simple ASCII art explanation: 32 * Simple ASCII art explanation:
33 * 33 *
34 * |HEAD | 34 * pl:prio_list (only for plist_node)
35 * | | 35 * nl:node_list
36 * |prio_list.prev|<------------------------------------| 36 * HEAD| NODE(S)
37 * |prio_list.next|<->|pl|<->|pl|<--------------->|pl|<-| 37 * |
38 * |10 | |10| |21| |21| |21| |40| (prio) 38 * ||------------------------------------|
39 * | | | | | | | | | | | | 39 * ||->|pl|<->|pl|<--------------->|pl|<-|
40 * | | | | | | | | | | | | 40 * | |10| |21| |21| |21| |40| (prio)
41 * |node_list.next|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-| 41 * | | | | | | | | | | |
42 * |node_list.prev|<------------------------------------| 42 * | | | | | | | | | | |
43 * |->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-|
44 * |-------------------------------------------|
43 * 45 *
44 * The nodes on the prio_list list are sorted by priority to simplify 46 * The nodes on the prio_list list are sorted by priority to simplify
45 * the insertion of new nodes. There are no nodes with duplicate 47 * the insertion of new nodes. There are no nodes with duplicate
@@ -78,7 +80,6 @@
78#include <linux/spinlock_types.h> 80#include <linux/spinlock_types.h>
79 81
80struct plist_head { 82struct plist_head {
81 struct list_head prio_list;
82 struct list_head node_list; 83 struct list_head node_list;
83#ifdef CONFIG_DEBUG_PI_LIST 84#ifdef CONFIG_DEBUG_PI_LIST
84 raw_spinlock_t *rawlock; 85 raw_spinlock_t *rawlock;
@@ -88,7 +89,8 @@ struct plist_head {
88 89
89struct plist_node { 90struct plist_node {
90 int prio; 91 int prio;
91 struct plist_head plist; 92 struct list_head prio_list;
93 struct list_head node_list;
92}; 94};
93 95
94#ifdef CONFIG_DEBUG_PI_LIST 96#ifdef CONFIG_DEBUG_PI_LIST
@@ -100,7 +102,6 @@ struct plist_node {
100#endif 102#endif
101 103
102#define _PLIST_HEAD_INIT(head) \ 104#define _PLIST_HEAD_INIT(head) \
103 .prio_list = LIST_HEAD_INIT((head).prio_list), \
104 .node_list = LIST_HEAD_INIT((head).node_list) 105 .node_list = LIST_HEAD_INIT((head).node_list)
105 106
106/** 107/**
@@ -133,7 +134,8 @@ struct plist_node {
133#define PLIST_NODE_INIT(node, __prio) \ 134#define PLIST_NODE_INIT(node, __prio) \
134{ \ 135{ \
135 .prio = (__prio), \ 136 .prio = (__prio), \
136 .plist = { _PLIST_HEAD_INIT((node).plist) }, \ 137 .prio_list = LIST_HEAD_INIT((node).prio_list), \
138 .node_list = LIST_HEAD_INIT((node).node_list), \
137} 139}
138 140
139/** 141/**
@@ -144,7 +146,6 @@ struct plist_node {
144static inline void 146static inline void
145plist_head_init(struct plist_head *head, spinlock_t *lock) 147plist_head_init(struct plist_head *head, spinlock_t *lock)
146{ 148{
147 INIT_LIST_HEAD(&head->prio_list);
148 INIT_LIST_HEAD(&head->node_list); 149 INIT_LIST_HEAD(&head->node_list);
149#ifdef CONFIG_DEBUG_PI_LIST 150#ifdef CONFIG_DEBUG_PI_LIST
150 head->spinlock = lock; 151 head->spinlock = lock;
@@ -160,7 +161,6 @@ plist_head_init(struct plist_head *head, spinlock_t *lock)
160static inline void 161static inline void
161plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock) 162plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
162{ 163{
163 INIT_LIST_HEAD(&head->prio_list);
164 INIT_LIST_HEAD(&head->node_list); 164 INIT_LIST_HEAD(&head->node_list);
165#ifdef CONFIG_DEBUG_PI_LIST 165#ifdef CONFIG_DEBUG_PI_LIST
166 head->rawlock = lock; 166 head->rawlock = lock;
@@ -176,7 +176,8 @@ plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
176static inline void plist_node_init(struct plist_node *node, int prio) 176static inline void plist_node_init(struct plist_node *node, int prio)
177{ 177{
178 node->prio = prio; 178 node->prio = prio;
179 plist_head_init(&node->plist, NULL); 179 INIT_LIST_HEAD(&node->prio_list);
180 INIT_LIST_HEAD(&node->node_list);
180} 181}
181 182
182extern void plist_add(struct plist_node *node, struct plist_head *head); 183extern void plist_add(struct plist_node *node, struct plist_head *head);
@@ -188,7 +189,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
188 * @head: the head for your list 189 * @head: the head for your list
189 */ 190 */
190#define plist_for_each(pos, head) \ 191#define plist_for_each(pos, head) \
191 list_for_each_entry(pos, &(head)->node_list, plist.node_list) 192 list_for_each_entry(pos, &(head)->node_list, node_list)
192 193
193/** 194/**
194 * plist_for_each_safe - iterate safely over a plist of given type 195 * plist_for_each_safe - iterate safely over a plist of given type
@@ -199,7 +200,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
199 * Iterate over a plist of given type, safe against removal of list entry. 200 * Iterate over a plist of given type, safe against removal of list entry.
200 */ 201 */
201#define plist_for_each_safe(pos, n, head) \ 202#define plist_for_each_safe(pos, n, head) \
202 list_for_each_entry_safe(pos, n, &(head)->node_list, plist.node_list) 203 list_for_each_entry_safe(pos, n, &(head)->node_list, node_list)
203 204
204/** 205/**
205 * plist_for_each_entry - iterate over list of given type 206 * plist_for_each_entry - iterate over list of given type
@@ -208,7 +209,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
208 * @mem: the name of the list_struct within the struct 209 * @mem: the name of the list_struct within the struct
209 */ 210 */
210#define plist_for_each_entry(pos, head, mem) \ 211#define plist_for_each_entry(pos, head, mem) \
211 list_for_each_entry(pos, &(head)->node_list, mem.plist.node_list) 212 list_for_each_entry(pos, &(head)->node_list, mem.node_list)
212 213
213/** 214/**
214 * plist_for_each_entry_safe - iterate safely over list of given type 215 * plist_for_each_entry_safe - iterate safely over list of given type
@@ -220,7 +221,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
220 * Iterate over list of given type, safe against removal of list entry. 221 * Iterate over list of given type, safe against removal of list entry.
221 */ 222 */
222#define plist_for_each_entry_safe(pos, n, head, m) \ 223#define plist_for_each_entry_safe(pos, n, head, m) \
223 list_for_each_entry_safe(pos, n, &(head)->node_list, m.plist.node_list) 224 list_for_each_entry_safe(pos, n, &(head)->node_list, m.node_list)
224 225
225/** 226/**
226 * plist_head_empty - return !0 if a plist_head is empty 227 * plist_head_empty - return !0 if a plist_head is empty
@@ -237,7 +238,7 @@ static inline int plist_head_empty(const struct plist_head *head)
237 */ 238 */
238static inline int plist_node_empty(const struct plist_node *node) 239static inline int plist_node_empty(const struct plist_node *node)
239{ 240{
240 return plist_head_empty(&node->plist); 241 return list_empty(&node->node_list);
241} 242}
242 243
243/* All functions below assume the plist_head is not empty. */ 244/* All functions below assume the plist_head is not empty. */
@@ -285,7 +286,7 @@ static inline int plist_node_empty(const struct plist_node *node)
285static inline struct plist_node *plist_first(const struct plist_head *head) 286static inline struct plist_node *plist_first(const struct plist_head *head)
286{ 287{
287 return list_entry(head->node_list.next, 288 return list_entry(head->node_list.next,
288 struct plist_node, plist.node_list); 289 struct plist_node, node_list);
289} 290}
290 291
291/** 292/**
@@ -297,7 +298,7 @@ static inline struct plist_node *plist_first(const struct plist_head *head)
297static inline struct plist_node *plist_last(const struct plist_head *head) 298static inline struct plist_node *plist_last(const struct plist_head *head)
298{ 299{
299 return list_entry(head->node_list.prev, 300 return list_entry(head->node_list.prev,
300 struct plist_node, plist.node_list); 301 struct plist_node, node_list);
301} 302}
302 303
303#endif 304#endif
diff --git a/include/linux/pm.h b/include/linux/pm.h
index dd9c7ab38270..6618216bb973 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -267,7 +267,7 @@ const struct dev_pm_ops name = { \
267 * callbacks provided by device drivers supporting both the system sleep PM and 267 * callbacks provided by device drivers supporting both the system sleep PM and
268 * runtime PM, make the pm member point to generic_subsys_pm_ops. 268 * runtime PM, make the pm member point to generic_subsys_pm_ops.
269 */ 269 */
270#ifdef CONFIG_PM_OPS 270#ifdef CONFIG_PM
271extern struct dev_pm_ops generic_subsys_pm_ops; 271extern struct dev_pm_ops generic_subsys_pm_ops;
272#define GENERIC_SUBSYS_PM_OPS (&generic_subsys_pm_ops) 272#define GENERIC_SUBSYS_PM_OPS (&generic_subsys_pm_ops)
273#else 273#else
@@ -431,6 +431,8 @@ struct dev_pm_info {
431 struct list_head entry; 431 struct list_head entry;
432 struct completion completion; 432 struct completion completion;
433 struct wakeup_source *wakeup; 433 struct wakeup_source *wakeup;
434#else
435 unsigned int should_wakeup:1;
434#endif 436#endif
435#ifdef CONFIG_PM_RUNTIME 437#ifdef CONFIG_PM_RUNTIME
436 struct timer_list suspend_timer; 438 struct timer_list suspend_timer;
@@ -463,6 +465,14 @@ struct dev_pm_info {
463 465
464extern void update_pm_runtime_accounting(struct device *dev); 466extern void update_pm_runtime_accounting(struct device *dev);
465 467
468/*
469 * Power domains provide callbacks that are executed during system suspend,
470 * hibernation, system resume and during runtime PM transitions along with
471 * subsystem-level and driver-level callbacks.
472 */
473struct dev_power_domain {
474 struct dev_pm_ops ops;
475};
466 476
467/* 477/*
468 * The PM_EVENT_ messages are also used by drivers implementing the legacy 478 * The PM_EVENT_ messages are also used by drivers implementing the legacy
@@ -563,15 +573,6 @@ enum dpm_order {
563 DPM_ORDER_DEV_LAST, 573 DPM_ORDER_DEV_LAST,
564}; 574};
565 575
566/*
567 * Global Power Management flags
568 * Used to keep APM and ACPI from both being active
569 */
570extern unsigned int pm_flags;
571
572#define PM_APM 1
573#define PM_ACPI 2
574
575extern int pm_generic_suspend(struct device *dev); 576extern int pm_generic_suspend(struct device *dev);
576extern int pm_generic_resume(struct device *dev); 577extern int pm_generic_resume(struct device *dev);
577extern int pm_generic_freeze(struct device *dev); 578extern int pm_generic_freeze(struct device *dev);
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index d34f067e2a7f..8de9aa6e7def 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -87,6 +87,11 @@ static inline bool pm_runtime_enabled(struct device *dev)
87 return !dev->power.disable_depth; 87 return !dev->power.disable_depth;
88} 88}
89 89
90static inline bool pm_runtime_callbacks_present(struct device *dev)
91{
92 return !dev->power.no_callbacks;
93}
94
90static inline void pm_runtime_mark_last_busy(struct device *dev) 95static inline void pm_runtime_mark_last_busy(struct device *dev)
91{ 96{
92 ACCESS_ONCE(dev->power.last_busy) = jiffies; 97 ACCESS_ONCE(dev->power.last_busy) = jiffies;
@@ -133,6 +138,7 @@ static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
133static inline void pm_runtime_no_callbacks(struct device *dev) {} 138static inline void pm_runtime_no_callbacks(struct device *dev) {}
134static inline void pm_runtime_irq_safe(struct device *dev) {} 139static inline void pm_runtime_irq_safe(struct device *dev) {}
135 140
141static inline bool pm_runtime_callbacks_present(struct device *dev) { return false; }
136static inline void pm_runtime_mark_last_busy(struct device *dev) {} 142static inline void pm_runtime_mark_last_busy(struct device *dev) {}
137static inline void __pm_runtime_use_autosuspend(struct device *dev, 143static inline void __pm_runtime_use_autosuspend(struct device *dev,
138 bool use) {} 144 bool use) {}
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 9cff00dd6b63..a32da962d693 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -62,18 +62,11 @@ struct wakeup_source {
62 * Changes to device_may_wakeup take effect on the next pm state change. 62 * Changes to device_may_wakeup take effect on the next pm state change.
63 */ 63 */
64 64
65static inline void device_set_wakeup_capable(struct device *dev, bool capable)
66{
67 dev->power.can_wakeup = capable;
68}
69
70static inline bool device_can_wakeup(struct device *dev) 65static inline bool device_can_wakeup(struct device *dev)
71{ 66{
72 return dev->power.can_wakeup; 67 return dev->power.can_wakeup;
73} 68}
74 69
75
76
77static inline bool device_may_wakeup(struct device *dev) 70static inline bool device_may_wakeup(struct device *dev)
78{ 71{
79 return dev->power.can_wakeup && !!dev->power.wakeup; 72 return dev->power.can_wakeup && !!dev->power.wakeup;
@@ -88,6 +81,7 @@ extern struct wakeup_source *wakeup_source_register(const char *name);
88extern void wakeup_source_unregister(struct wakeup_source *ws); 81extern void wakeup_source_unregister(struct wakeup_source *ws);
89extern int device_wakeup_enable(struct device *dev); 82extern int device_wakeup_enable(struct device *dev);
90extern int device_wakeup_disable(struct device *dev); 83extern int device_wakeup_disable(struct device *dev);
84extern void device_set_wakeup_capable(struct device *dev, bool capable);
91extern int device_init_wakeup(struct device *dev, bool val); 85extern int device_init_wakeup(struct device *dev, bool val);
92extern int device_set_wakeup_enable(struct device *dev, bool enable); 86extern int device_set_wakeup_enable(struct device *dev, bool enable);
93extern void __pm_stay_awake(struct wakeup_source *ws); 87extern void __pm_stay_awake(struct wakeup_source *ws);
@@ -109,11 +103,6 @@ static inline bool device_can_wakeup(struct device *dev)
109 return dev->power.can_wakeup; 103 return dev->power.can_wakeup;
110} 104}
111 105
112static inline bool device_may_wakeup(struct device *dev)
113{
114 return false;
115}
116
117static inline struct wakeup_source *wakeup_source_create(const char *name) 106static inline struct wakeup_source *wakeup_source_create(const char *name)
118{ 107{
119 return NULL; 108 return NULL;
@@ -134,24 +123,32 @@ static inline void wakeup_source_unregister(struct wakeup_source *ws) {}
134 123
135static inline int device_wakeup_enable(struct device *dev) 124static inline int device_wakeup_enable(struct device *dev)
136{ 125{
137 return -EINVAL; 126 dev->power.should_wakeup = true;
127 return 0;
138} 128}
139 129
140static inline int device_wakeup_disable(struct device *dev) 130static inline int device_wakeup_disable(struct device *dev)
141{ 131{
132 dev->power.should_wakeup = false;
142 return 0; 133 return 0;
143} 134}
144 135
145static inline int device_init_wakeup(struct device *dev, bool val) 136static inline int device_set_wakeup_enable(struct device *dev, bool enable)
146{ 137{
147 dev->power.can_wakeup = val; 138 dev->power.should_wakeup = enable;
148 return val ? -EINVAL : 0; 139 return 0;
149} 140}
150 141
142static inline int device_init_wakeup(struct device *dev, bool val)
143{
144 device_set_wakeup_capable(dev, val);
145 device_set_wakeup_enable(dev, val);
146 return 0;
147}
151 148
152static inline int device_set_wakeup_enable(struct device *dev, bool enable) 149static inline bool device_may_wakeup(struct device *dev)
153{ 150{
154 return -EINVAL; 151 return dev->power.can_wakeup && dev->power.should_wakeup;
155} 152}
156 153
157static inline void __pm_stay_awake(struct wakeup_source *ws) {} 154static inline void __pm_stay_awake(struct wakeup_source *ws) {}
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
new file mode 100644
index 000000000000..369e19d3750b
--- /dev/null
+++ b/include/linux/posix-clock.h
@@ -0,0 +1,150 @@
1/*
2 * posix-clock.h - support for dynamic clock devices
3 *
4 * Copyright (C) 2010 OMICRON electronics GmbH
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#ifndef _LINUX_POSIX_CLOCK_H_
21#define _LINUX_POSIX_CLOCK_H_
22
23#include <linux/cdev.h>
24#include <linux/fs.h>
25#include <linux/poll.h>
26#include <linux/posix-timers.h>
27
28struct posix_clock;
29
30/**
31 * struct posix_clock_operations - functional interface to the clock
32 *
33 * Every posix clock is represented by a character device. Drivers may
34 * optionally offer extended capabilities by implementing the
35 * character device methods. The character device file operations are
36 * first handled by the clock device layer, then passed on to the
37 * driver by calling these functions.
38 *
39 * @owner: The clock driver should set to THIS_MODULE
40 * @clock_adjtime: Adjust the clock
41 * @clock_gettime: Read the current time
42 * @clock_getres: Get the clock resolution
43 * @clock_settime: Set the current time value
44 * @timer_create: Create a new timer
45 * @timer_delete: Remove a previously created timer
46 * @timer_gettime: Get remaining time and interval of a timer
47 * @timer_setttime: Set a timer's initial expiration and interval
48 * @fasync: Optional character device fasync method
49 * @mmap: Optional character device mmap method
50 * @open: Optional character device open method
51 * @release: Optional character device release method
52 * @ioctl: Optional character device ioctl method
53 * @read: Optional character device read method
54 * @poll: Optional character device poll method
55 */
56struct posix_clock_operations {
57 struct module *owner;
58
59 int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx);
60
61 int (*clock_gettime)(struct posix_clock *pc, struct timespec *ts);
62
63 int (*clock_getres) (struct posix_clock *pc, struct timespec *ts);
64
65 int (*clock_settime)(struct posix_clock *pc,
66 const struct timespec *ts);
67
68 int (*timer_create) (struct posix_clock *pc, struct k_itimer *kit);
69
70 int (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit);
71
72 void (*timer_gettime)(struct posix_clock *pc,
73 struct k_itimer *kit, struct itimerspec *tsp);
74
75 int (*timer_settime)(struct posix_clock *pc,
76 struct k_itimer *kit, int flags,
77 struct itimerspec *tsp, struct itimerspec *old);
78 /*
79 * Optional character device methods:
80 */
81 int (*fasync) (struct posix_clock *pc,
82 int fd, struct file *file, int on);
83
84 long (*ioctl) (struct posix_clock *pc,
85 unsigned int cmd, unsigned long arg);
86
87 int (*mmap) (struct posix_clock *pc,
88 struct vm_area_struct *vma);
89
90 int (*open) (struct posix_clock *pc, fmode_t f_mode);
91
92 uint (*poll) (struct posix_clock *pc,
93 struct file *file, poll_table *wait);
94
95 int (*release) (struct posix_clock *pc);
96
97 ssize_t (*read) (struct posix_clock *pc,
98 uint flags, char __user *buf, size_t cnt);
99};
100
101/**
102 * struct posix_clock - represents a dynamic posix clock
103 *
104 * @ops: Functional interface to the clock
105 * @cdev: Character device instance for this clock
106 * @kref: Reference count.
107 * @mutex: Protects the 'zombie' field from concurrent access.
108 * @zombie: If 'zombie' is true, then the hardware has disappeared.
109 * @release: A function to free the structure when the reference count reaches
110 * zero. May be NULL if structure is statically allocated.
111 *
112 * Drivers should embed their struct posix_clock within a private
113 * structure, obtaining a reference to it during callbacks using
114 * container_of().
115 */
116struct posix_clock {
117 struct posix_clock_operations ops;
118 struct cdev cdev;
119 struct kref kref;
120 struct mutex mutex;
121 bool zombie;
122 void (*release)(struct posix_clock *clk);
123};
124
125/**
126 * posix_clock_register() - register a new clock
127 * @clk: Pointer to the clock. Caller must provide 'ops' and 'release'
128 * @devid: Allocated device id
129 *
130 * A clock driver calls this function to register itself with the
131 * clock device subsystem. If 'clk' points to dynamically allocated
132 * memory, then the caller must provide a 'release' function to free
133 * that memory.
134 *
135 * Returns zero on success, non-zero otherwise.
136 */
137int posix_clock_register(struct posix_clock *clk, dev_t devid);
138
139/**
140 * posix_clock_unregister() - unregister a clock
141 * @clk: Clock instance previously registered via posix_clock_register()
142 *
143 * A clock driver calls this function to remove itself from the clock
144 * device subsystem. The posix_clock itself will remain (in an
145 * inactive state) until its reference count drops to zero, at which
146 * point it will be deallocated with its 'release' method.
147 */
148void posix_clock_unregister(struct posix_clock *clk);
149
150#endif
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 3e23844a6990..d51243ae0726 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,6 +4,7 @@
4#include <linux/spinlock.h> 4#include <linux/spinlock.h>
5#include <linux/list.h> 5#include <linux/list.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/timex.h>
7 8
8union cpu_time_count { 9union cpu_time_count {
9 cputime_t cpu; 10 cputime_t cpu;
@@ -17,10 +18,21 @@ struct cpu_timer_list {
17 int firing; 18 int firing;
18}; 19};
19 20
21/*
22 * Bit fields within a clockid:
23 *
24 * The most significant 29 bits hold either a pid or a file descriptor.
25 *
26 * Bit 2 indicates whether a cpu clock refers to a thread or a process.
27 *
28 * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
29 *
30 * A clockid is invalid if bits 2, 1, and 0 are all set.
31 */
20#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3)) 32#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
21#define CPUCLOCK_PERTHREAD(clock) \ 33#define CPUCLOCK_PERTHREAD(clock) \
22 (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0) 34 (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
23#define CPUCLOCK_PID_MASK 7 35
24#define CPUCLOCK_PERTHREAD_MASK 4 36#define CPUCLOCK_PERTHREAD_MASK 4
25#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK) 37#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
26#define CPUCLOCK_CLOCK_MASK 3 38#define CPUCLOCK_CLOCK_MASK 3
@@ -28,12 +40,17 @@ struct cpu_timer_list {
28#define CPUCLOCK_VIRT 1 40#define CPUCLOCK_VIRT 1
29#define CPUCLOCK_SCHED 2 41#define CPUCLOCK_SCHED 2
30#define CPUCLOCK_MAX 3 42#define CPUCLOCK_MAX 3
43#define CLOCKFD CPUCLOCK_MAX
44#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
31 45
32#define MAKE_PROCESS_CPUCLOCK(pid, clock) \ 46#define MAKE_PROCESS_CPUCLOCK(pid, clock) \
33 ((~(clockid_t) (pid) << 3) | (clockid_t) (clock)) 47 ((~(clockid_t) (pid) << 3) | (clockid_t) (clock))
34#define MAKE_THREAD_CPUCLOCK(tid, clock) \ 48#define MAKE_THREAD_CPUCLOCK(tid, clock) \
35 MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK) 49 MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK)
36 50
51#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD)
52#define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3))
53
37/* POSIX.1b interval timer structure. */ 54/* POSIX.1b interval timer structure. */
38struct k_itimer { 55struct k_itimer {
39 struct list_head list; /* free/ allocate list */ 56 struct list_head list; /* free/ allocate list */
@@ -67,10 +84,11 @@ struct k_itimer {
67}; 84};
68 85
69struct k_clock { 86struct k_clock {
70 int res; /* in nanoseconds */
71 int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); 87 int (*clock_getres) (const clockid_t which_clock, struct timespec *tp);
72 int (*clock_set) (const clockid_t which_clock, struct timespec * tp); 88 int (*clock_set) (const clockid_t which_clock,
89 const struct timespec *tp);
73 int (*clock_get) (const clockid_t which_clock, struct timespec * tp); 90 int (*clock_get) (const clockid_t which_clock, struct timespec * tp);
91 int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
74 int (*timer_create) (struct k_itimer *timer); 92 int (*timer_create) (struct k_itimer *timer);
75 int (*nsleep) (const clockid_t which_clock, int flags, 93 int (*nsleep) (const clockid_t which_clock, int flags,
76 struct timespec *, struct timespec __user *); 94 struct timespec *, struct timespec __user *);
@@ -84,28 +102,14 @@ struct k_clock {
84 struct itimerspec * cur_setting); 102 struct itimerspec * cur_setting);
85}; 103};
86 104
87void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); 105extern struct k_clock clock_posix_cpu;
106extern struct k_clock clock_posix_dynamic;
88 107
89/* error handlers for timer_create, nanosleep and settime */ 108void posix_timers_register_clock(const clockid_t clock_id, struct k_clock *new_clock);
90int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *,
91 struct timespec __user *);
92int do_posix_clock_nosettime(const clockid_t, struct timespec *tp);
93 109
94/* function to call to trigger timer event */ 110/* function to call to trigger timer event */
95int posix_timer_event(struct k_itimer *timr, int si_private); 111int posix_timer_event(struct k_itimer *timr, int si_private);
96 112
97int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *ts);
98int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *ts);
99int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts);
100int posix_cpu_timer_create(struct k_itimer *timer);
101int posix_cpu_nsleep(const clockid_t which_clock, int flags,
102 struct timespec *rqtp, struct timespec __user *rmtp);
103long posix_cpu_nsleep_restart(struct restart_block *restart_block);
104int posix_cpu_timer_set(struct k_itimer *timer, int flags,
105 struct itimerspec *new, struct itimerspec *old);
106int posix_cpu_timer_del(struct k_itimer *timer);
107void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp);
108
109void posix_cpu_timer_schedule(struct k_itimer *timer); 113void posix_cpu_timer_schedule(struct k_itimer *timer);
110 114
111void run_posix_cpu_timers(struct task_struct *task); 115void run_posix_cpu_timers(struct task_struct *task);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 092a04f874a8..a1147e5dd245 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -102,11 +102,8 @@
102 102
103extern long arch_ptrace(struct task_struct *child, long request, 103extern long arch_ptrace(struct task_struct *child, long request,
104 unsigned long addr, unsigned long data); 104 unsigned long addr, unsigned long data);
105extern int ptrace_traceme(void);
106extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); 105extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
107extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); 106extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
108extern int ptrace_attach(struct task_struct *tsk);
109extern int ptrace_detach(struct task_struct *, unsigned int);
110extern void ptrace_disable(struct task_struct *); 107extern void ptrace_disable(struct task_struct *);
111extern int ptrace_check_attach(struct task_struct *task, int kill); 108extern int ptrace_check_attach(struct task_struct *task, int kill);
112extern int ptrace_request(struct task_struct *child, long request, 109extern int ptrace_request(struct task_struct *child, long request,
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index 3b94c91f20a6..6deef5dc95fb 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -63,6 +63,7 @@ extern const struct xattr_handler reiserfs_xattr_trusted_handler;
63extern const struct xattr_handler reiserfs_xattr_security_handler; 63extern const struct xattr_handler reiserfs_xattr_security_handler;
64#ifdef CONFIG_REISERFS_FS_SECURITY 64#ifdef CONFIG_REISERFS_FS_SECURITY
65int reiserfs_security_init(struct inode *dir, struct inode *inode, 65int reiserfs_security_init(struct inode *dir, struct inode *inode,
66 const struct qstr *qstr,
66 struct reiserfs_security_handle *sec); 67 struct reiserfs_security_handle *sec);
67int reiserfs_security_write(struct reiserfs_transaction_handle *th, 68int reiserfs_security_write(struct reiserfs_transaction_handle *th,
68 struct inode *inode, 69 struct inode *inode,
@@ -130,6 +131,7 @@ static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
130#ifndef CONFIG_REISERFS_FS_SECURITY 131#ifndef CONFIG_REISERFS_FS_SECURITY
131static inline int reiserfs_security_init(struct inode *dir, 132static inline int reiserfs_security_init(struct inode *dir,
132 struct inode *inode, 133 struct inode *inode,
134 const struct qstr *qstr,
133 struct reiserfs_security_handle *sec) 135 struct reiserfs_security_handle *sec)
134{ 136{
135 return 0; 137 return 0;
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 8d3a2486544d..ab38ac80b0f9 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -100,6 +100,8 @@ void ring_buffer_free(struct ring_buffer *buffer);
100 100
101int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); 101int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
102 102
103void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val);
104
103struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer, 105struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer,
104 unsigned long length); 106 unsigned long length);
105int ring_buffer_unlock_commit(struct ring_buffer *buffer, 107int ring_buffer_unlock_commit(struct ring_buffer *buffer,
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index d63dcbaea169..9026b30238f3 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -14,10 +14,12 @@
14#define LINUX_RIO_REGS_H 14#define LINUX_RIO_REGS_H
15 15
16/* 16/*
17 * In RapidIO, each device has a 2MB configuration space that is 17 * In RapidIO, each device has a 16MB configuration space that is
18 * accessed via maintenance transactions. Portions of configuration 18 * accessed via maintenance transactions. Portions of configuration
19 * space are standardized and/or reserved. 19 * space are standardized and/or reserved.
20 */ 20 */
21#define RIO_MAINT_SPACE_SZ 0x1000000 /* 16MB of RapidIO mainenance space */
22
21#define RIO_DEV_ID_CAR 0x00 /* [I] Device Identity CAR */ 23#define RIO_DEV_ID_CAR 0x00 /* [I] Device Identity CAR */
22#define RIO_DEV_INFO_CAR 0x04 /* [I] Device Information CAR */ 24#define RIO_DEV_INFO_CAR 0x04 /* [I] Device Information CAR */
23#define RIO_ASM_ID_CAR 0x08 /* [I] Assembly Identity CAR */ 25#define RIO_ASM_ID_CAR 0x08 /* [I] Assembly Identity CAR */
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index a0b639f8e805..2ca7e8a78060 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -133,7 +133,6 @@ extern struct class *rtc_class;
133 * The (current) exceptions are mostly filesystem hooks: 133 * The (current) exceptions are mostly filesystem hooks:
134 * - the proc() hook for procfs 134 * - the proc() hook for procfs
135 * - non-ioctl() chardev hooks: open(), release(), read_callback() 135 * - non-ioctl() chardev hooks: open(), release(), read_callback()
136 * - periodic irq calls: irq_set_state(), irq_set_freq()
137 * 136 *
138 * REVISIT those periodic irq calls *do* have ops_lock when they're 137 * REVISIT those periodic irq calls *do* have ops_lock when they're
139 * issued through ioctl() ... 138 * issued through ioctl() ...
@@ -148,11 +147,8 @@ struct rtc_class_ops {
148 int (*set_alarm)(struct device *, struct rtc_wkalrm *); 147 int (*set_alarm)(struct device *, struct rtc_wkalrm *);
149 int (*proc)(struct device *, struct seq_file *); 148 int (*proc)(struct device *, struct seq_file *);
150 int (*set_mmss)(struct device *, unsigned long secs); 149 int (*set_mmss)(struct device *, unsigned long secs);
151 int (*irq_set_state)(struct device *, int enabled);
152 int (*irq_set_freq)(struct device *, int freq);
153 int (*read_callback)(struct device *, int data); 150 int (*read_callback)(struct device *, int data);
154 int (*alarm_irq_enable)(struct device *, unsigned int enabled); 151 int (*alarm_irq_enable)(struct device *, unsigned int enabled);
155 int (*update_irq_enable)(struct device *, unsigned int enabled);
156}; 152};
157 153
158#define RTC_DEVICE_NAME_SIZE 20 154#define RTC_DEVICE_NAME_SIZE 20
@@ -203,6 +199,18 @@ struct rtc_device
203 struct hrtimer pie_timer; /* sub second exp, so needs hrtimer */ 199 struct hrtimer pie_timer; /* sub second exp, so needs hrtimer */
204 int pie_enabled; 200 int pie_enabled;
205 struct work_struct irqwork; 201 struct work_struct irqwork;
202
203
204#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
205 struct work_struct uie_task;
206 struct timer_list uie_timer;
207 /* Those fields are protected by rtc->irq_lock */
208 unsigned int oldsecs;
209 unsigned int uie_irq_active:1;
210 unsigned int stop_uie_polling:1;
211 unsigned int uie_task_active:1;
212 unsigned int uie_timer_active:1;
213#endif
206}; 214};
207#define to_rtc_device(d) container_of(d, struct rtc_device, dev) 215#define to_rtc_device(d) container_of(d, struct rtc_device, dev)
208 216
@@ -215,6 +223,7 @@ extern void rtc_device_unregister(struct rtc_device *rtc);
215extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); 223extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
216extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); 224extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
217extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs); 225extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs);
226int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
218extern int rtc_read_alarm(struct rtc_device *rtc, 227extern int rtc_read_alarm(struct rtc_device *rtc,
219 struct rtc_wkalrm *alrm); 228 struct rtc_wkalrm *alrm);
220extern int rtc_set_alarm(struct rtc_device *rtc, 229extern int rtc_set_alarm(struct rtc_device *rtc,
@@ -235,7 +244,10 @@ extern int rtc_irq_set_freq(struct rtc_device *rtc,
235 struct rtc_task *task, int freq); 244 struct rtc_task *task, int freq);
236extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled); 245extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled);
237extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled); 246extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled);
247extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc,
248 unsigned int enabled);
238 249
250void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode);
239void rtc_aie_update_irq(void *private); 251void rtc_aie_update_irq(void *private);
240void rtc_uie_update_irq(void *private); 252void rtc_uie_update_irq(void *private);
241enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer); 253enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer);
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index bd31808c7d8e..cc0072e93e36 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -43,14 +43,6 @@ typedef struct {
43 RW_DEP_MAP_INIT(lockname) } 43 RW_DEP_MAP_INIT(lockname) }
44#endif 44#endif
45 45
46/*
47 * RW_LOCK_UNLOCKED defeat lockdep state tracking and is hence
48 * deprecated.
49 *
50 * Please use DEFINE_RWLOCK() or __RW_LOCK_UNLOCKED() as appropriate.
51 */
52#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init)
53
54#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) 46#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
55 47
56#endif /* __LINUX_RWLOCK_TYPES_H */ 48#endif /* __LINUX_RWLOCK_TYPES_H */
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index bdfcc2527970..34701241b673 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -12,15 +12,7 @@
12#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead" 12#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
13#endif 13#endif
14 14
15#include <linux/spinlock.h>
16#include <linux/list.h>
17
18#ifdef __KERNEL__ 15#ifdef __KERNEL__
19
20#include <linux/types.h>
21
22struct rwsem_waiter;
23
24/* 16/*
25 * the rw-semaphore definition 17 * the rw-semaphore definition
26 * - if activity is 0 then there are no active readers or writers 18 * - if activity is 0 then there are no active readers or writers
@@ -37,28 +29,7 @@ struct rw_semaphore {
37#endif 29#endif
38}; 30};
39 31
40#ifdef CONFIG_DEBUG_LOCK_ALLOC 32#define RWSEM_UNLOCKED_VALUE 0x00000000
41# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
42#else
43# define __RWSEM_DEP_MAP_INIT(lockname)
44#endif
45
46#define __RWSEM_INITIALIZER(name) \
47{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \
48 __RWSEM_DEP_MAP_INIT(name) }
49
50#define DECLARE_RWSEM(name) \
51 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
52
53extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
54 struct lock_class_key *key);
55
56#define init_rwsem(sem) \
57do { \
58 static struct lock_class_key __key; \
59 \
60 __init_rwsem((sem), #sem, &__key); \
61} while (0)
62 33
63extern void __down_read(struct rw_semaphore *sem); 34extern void __down_read(struct rw_semaphore *sem);
64extern int __down_read_trylock(struct rw_semaphore *sem); 35extern int __down_read_trylock(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index efd348fe8ca7..a8afe9cd000c 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -11,6 +11,9 @@
11 11
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16
14#include <asm/system.h> 17#include <asm/system.h>
15#include <asm/atomic.h> 18#include <asm/atomic.h>
16 19
@@ -19,9 +22,57 @@ struct rw_semaphore;
19#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK 22#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
20#include <linux/rwsem-spinlock.h> /* use a generic implementation */ 23#include <linux/rwsem-spinlock.h> /* use a generic implementation */
21#else 24#else
22#include <asm/rwsem.h> /* use an arch-specific implementation */ 25/* All arch specific implementations share the same struct */
26struct rw_semaphore {
27 long count;
28 spinlock_t wait_lock;
29 struct list_head wait_list;
30#ifdef CONFIG_DEBUG_LOCK_ALLOC
31 struct lockdep_map dep_map;
32#endif
33};
34
35extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
36extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
37extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
38extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
39
40/* Include the arch specific part */
41#include <asm/rwsem.h>
42
43/* In all implementations count != 0 means locked */
44static inline int rwsem_is_locked(struct rw_semaphore *sem)
45{
46 return sem->count != 0;
47}
48
49#endif
50
51/* Common initializer macros and functions */
52
53#ifdef CONFIG_DEBUG_LOCK_ALLOC
54# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
55#else
56# define __RWSEM_DEP_MAP_INIT(lockname)
23#endif 57#endif
24 58
59#define __RWSEM_INITIALIZER(name) \
60 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED(name.wait_lock), \
61 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
62
63#define DECLARE_RWSEM(name) \
64 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
65
66extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
67 struct lock_class_key *key);
68
69#define init_rwsem(sem) \
70do { \
71 static struct lock_class_key __key; \
72 \
73 __init_rwsem((sem), #sem, &__key); \
74} while (0)
75
25/* 76/*
26 * lock for reading 77 * lock for reading
27 */ 78 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d747f948b34e..c15936fe998b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1058,6 +1058,7 @@ struct sched_class {
1058 void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); 1058 void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
1059 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); 1059 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
1060 void (*yield_task) (struct rq *rq); 1060 void (*yield_task) (struct rq *rq);
1061 bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
1061 1062
1062 void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); 1063 void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
1063 1064
@@ -1084,12 +1085,10 @@ struct sched_class {
1084 void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); 1085 void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
1085 void (*task_fork) (struct task_struct *p); 1086 void (*task_fork) (struct task_struct *p);
1086 1087
1087 void (*switched_from) (struct rq *this_rq, struct task_struct *task, 1088 void (*switched_from) (struct rq *this_rq, struct task_struct *task);
1088 int running); 1089 void (*switched_to) (struct rq *this_rq, struct task_struct *task);
1089 void (*switched_to) (struct rq *this_rq, struct task_struct *task,
1090 int running);
1091 void (*prio_changed) (struct rq *this_rq, struct task_struct *task, 1090 void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
1092 int oldprio, int running); 1091 int oldprio);
1093 1092
1094 unsigned int (*get_rr_interval) (struct rq *rq, 1093 unsigned int (*get_rr_interval) (struct rq *rq,
1095 struct task_struct *task); 1094 struct task_struct *task);
@@ -1715,7 +1714,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
1715/* 1714/*
1716 * Per process flags 1715 * Per process flags
1717 */ 1716 */
1718#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd */
1719#define PF_STARTING 0x00000002 /* being created */ 1717#define PF_STARTING 0x00000002 /* being created */
1720#define PF_EXITING 0x00000004 /* getting shut down */ 1718#define PF_EXITING 0x00000004 /* getting shut down */
1721#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ 1719#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
@@ -1744,7 +1742,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
1744#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ 1742#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1745#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ 1743#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
1746#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ 1744#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
1747#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ 1745#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
1748#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ 1746#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */
1749 1747
1750/* 1748/*
@@ -1945,8 +1943,6 @@ int sched_rt_handler(struct ctl_table *table, int write,
1945 void __user *buffer, size_t *lenp, 1943 void __user *buffer, size_t *lenp,
1946 loff_t *ppos); 1944 loff_t *ppos);
1947 1945
1948extern unsigned int sysctl_sched_compat_yield;
1949
1950#ifdef CONFIG_SCHED_AUTOGROUP 1946#ifdef CONFIG_SCHED_AUTOGROUP
1951extern unsigned int sysctl_sched_autogroup_enabled; 1947extern unsigned int sysctl_sched_autogroup_enabled;
1952 1948
@@ -1977,6 +1973,7 @@ static inline int rt_mutex_getprio(struct task_struct *p)
1977# define rt_mutex_adjust_pi(p) do { } while (0) 1973# define rt_mutex_adjust_pi(p) do { } while (0)
1978#endif 1974#endif
1979 1975
1976extern bool yield_to(struct task_struct *p, bool preempt);
1980extern void set_user_nice(struct task_struct *p, long nice); 1977extern void set_user_nice(struct task_struct *p, long nice);
1981extern int task_prio(const struct task_struct *p); 1978extern int task_prio(const struct task_struct *p);
1982extern int task_nice(const struct task_struct *p); 1979extern int task_nice(const struct task_struct *p);
@@ -2049,7 +2046,7 @@ extern void release_uids(struct user_namespace *ns);
2049 2046
2050#include <asm/current.h> 2047#include <asm/current.h>
2051 2048
2052extern void do_timer(unsigned long ticks); 2049extern void xtime_update(unsigned long ticks);
2053 2050
2054extern int wake_up_state(struct task_struct *tsk, unsigned int state); 2051extern int wake_up_state(struct task_struct *tsk, unsigned int state);
2055extern int wake_up_process(struct task_struct *tsk); 2052extern int wake_up_process(struct task_struct *tsk);
@@ -2578,13 +2575,6 @@ static inline void inc_syscw(struct task_struct *tsk)
2578#define TASK_SIZE_OF(tsk) TASK_SIZE 2575#define TASK_SIZE_OF(tsk) TASK_SIZE
2579#endif 2576#endif
2580 2577
2581/*
2582 * Call the function if the target task is executing on a CPU right now:
2583 */
2584extern void task_oncpu_function_call(struct task_struct *p,
2585 void (*func) (void *info), void *info);
2586
2587
2588#ifdef CONFIG_MM_OWNER 2578#ifdef CONFIG_MM_OWNER
2589extern void mm_update_next_owner(struct mm_struct *mm); 2579extern void mm_update_next_owner(struct mm_struct *mm);
2590extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); 2580extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
diff --git a/include/linux/security.h b/include/linux/security.h
index b2b7f9749f5e..83d9227abf02 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -25,6 +25,7 @@
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/fsnotify.h> 26#include <linux/fsnotify.h>
27#include <linux/binfmts.h> 27#include <linux/binfmts.h>
28#include <linux/dcache.h>
28#include <linux/signal.h> 29#include <linux/signal.h>
29#include <linux/resource.h> 30#include <linux/resource.h>
30#include <linux/sem.h> 31#include <linux/sem.h>
@@ -53,7 +54,7 @@ struct audit_krule;
53 */ 54 */
54extern int cap_capable(struct task_struct *tsk, const struct cred *cred, 55extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
55 int cap, int audit); 56 int cap, int audit);
56extern int cap_settime(struct timespec *ts, struct timezone *tz); 57extern int cap_settime(const struct timespec *ts, const struct timezone *tz);
57extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode); 58extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
58extern int cap_ptrace_traceme(struct task_struct *parent); 59extern int cap_ptrace_traceme(struct task_struct *parent);
59extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); 60extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
@@ -267,6 +268,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
267 * @orig the original mount data copied from userspace. 268 * @orig the original mount data copied from userspace.
268 * @copy copied data which will be passed to the security module. 269 * @copy copied data which will be passed to the security module.
269 * Returns 0 if the copy was successful. 270 * Returns 0 if the copy was successful.
271 * @sb_remount:
272 * Extracts security system specifc mount options and verifys no changes
273 * are being made to those options.
274 * @sb superblock being remounted
275 * @data contains the filesystem-specific data.
276 * Return 0 if permission is granted.
270 * @sb_umount: 277 * @sb_umount:
271 * Check permission before the @mnt file system is unmounted. 278 * Check permission before the @mnt file system is unmounted.
272 * @mnt contains the mounted file system. 279 * @mnt contains the mounted file system.
@@ -315,6 +322,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
315 * then it should return -EOPNOTSUPP to skip this processing. 322 * then it should return -EOPNOTSUPP to skip this processing.
316 * @inode contains the inode structure of the newly created inode. 323 * @inode contains the inode structure of the newly created inode.
317 * @dir contains the inode structure of the parent directory. 324 * @dir contains the inode structure of the parent directory.
325 * @qstr contains the last path component of the new object
318 * @name will be set to the allocated name suffix (e.g. selinux). 326 * @name will be set to the allocated name suffix (e.g. selinux).
319 * @value will be set to the allocated attribute value. 327 * @value will be set to the allocated attribute value.
320 * @len will be set to the length of the value. 328 * @len will be set to the length of the value.
@@ -1257,12 +1265,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
1257 * @cap contains the capability <include/linux/capability.h>. 1265 * @cap contains the capability <include/linux/capability.h>.
1258 * @audit: Whether to write an audit message or not 1266 * @audit: Whether to write an audit message or not
1259 * Return 0 if the capability is granted for @tsk. 1267 * Return 0 if the capability is granted for @tsk.
1260 * @sysctl:
1261 * Check permission before accessing the @table sysctl variable in the
1262 * manner specified by @op.
1263 * @table contains the ctl_table structure for the sysctl variable.
1264 * @op contains the operation (001 = search, 002 = write, 004 = read).
1265 * Return 0 if permission is granted.
1266 * @syslog: 1268 * @syslog:
1267 * Check permission before accessing the kernel message ring or changing 1269 * Check permission before accessing the kernel message ring or changing
1268 * logging to the console. 1270 * logging to the console.
@@ -1383,11 +1385,10 @@ struct security_operations {
1383 const kernel_cap_t *permitted); 1385 const kernel_cap_t *permitted);
1384 int (*capable) (struct task_struct *tsk, const struct cred *cred, 1386 int (*capable) (struct task_struct *tsk, const struct cred *cred,
1385 int cap, int audit); 1387 int cap, int audit);
1386 int (*sysctl) (struct ctl_table *table, int op);
1387 int (*quotactl) (int cmds, int type, int id, struct super_block *sb); 1388 int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
1388 int (*quota_on) (struct dentry *dentry); 1389 int (*quota_on) (struct dentry *dentry);
1389 int (*syslog) (int type); 1390 int (*syslog) (int type);
1390 int (*settime) (struct timespec *ts, struct timezone *tz); 1391 int (*settime) (const struct timespec *ts, const struct timezone *tz);
1391 int (*vm_enough_memory) (struct mm_struct *mm, long pages); 1392 int (*vm_enough_memory) (struct mm_struct *mm, long pages);
1392 1393
1393 int (*bprm_set_creds) (struct linux_binprm *bprm); 1394 int (*bprm_set_creds) (struct linux_binprm *bprm);
@@ -1399,6 +1400,7 @@ struct security_operations {
1399 int (*sb_alloc_security) (struct super_block *sb); 1400 int (*sb_alloc_security) (struct super_block *sb);
1400 void (*sb_free_security) (struct super_block *sb); 1401 void (*sb_free_security) (struct super_block *sb);
1401 int (*sb_copy_data) (char *orig, char *copy); 1402 int (*sb_copy_data) (char *orig, char *copy);
1403 int (*sb_remount) (struct super_block *sb, void *data);
1402 int (*sb_kern_mount) (struct super_block *sb, int flags, void *data); 1404 int (*sb_kern_mount) (struct super_block *sb, int flags, void *data);
1403 int (*sb_show_options) (struct seq_file *m, struct super_block *sb); 1405 int (*sb_show_options) (struct seq_file *m, struct super_block *sb);
1404 int (*sb_statfs) (struct dentry *dentry); 1406 int (*sb_statfs) (struct dentry *dentry);
@@ -1435,7 +1437,8 @@ struct security_operations {
1435 int (*inode_alloc_security) (struct inode *inode); 1437 int (*inode_alloc_security) (struct inode *inode);
1436 void (*inode_free_security) (struct inode *inode); 1438 void (*inode_free_security) (struct inode *inode);
1437 int (*inode_init_security) (struct inode *inode, struct inode *dir, 1439 int (*inode_init_security) (struct inode *inode, struct inode *dir,
1438 char **name, void **value, size_t *len); 1440 const struct qstr *qstr, char **name,
1441 void **value, size_t *len);
1439 int (*inode_create) (struct inode *dir, 1442 int (*inode_create) (struct inode *dir,
1440 struct dentry *dentry, int mode); 1443 struct dentry *dentry, int mode);
1441 int (*inode_link) (struct dentry *old_dentry, 1444 int (*inode_link) (struct dentry *old_dentry,
@@ -1665,11 +1668,10 @@ int security_capset(struct cred *new, const struct cred *old,
1665int security_capable(const struct cred *cred, int cap); 1668int security_capable(const struct cred *cred, int cap);
1666int security_real_capable(struct task_struct *tsk, int cap); 1669int security_real_capable(struct task_struct *tsk, int cap);
1667int security_real_capable_noaudit(struct task_struct *tsk, int cap); 1670int security_real_capable_noaudit(struct task_struct *tsk, int cap);
1668int security_sysctl(struct ctl_table *table, int op);
1669int security_quotactl(int cmds, int type, int id, struct super_block *sb); 1671int security_quotactl(int cmds, int type, int id, struct super_block *sb);
1670int security_quota_on(struct dentry *dentry); 1672int security_quota_on(struct dentry *dentry);
1671int security_syslog(int type); 1673int security_syslog(int type);
1672int security_settime(struct timespec *ts, struct timezone *tz); 1674int security_settime(const struct timespec *ts, const struct timezone *tz);
1673int security_vm_enough_memory(long pages); 1675int security_vm_enough_memory(long pages);
1674int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); 1676int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
1675int security_vm_enough_memory_kern(long pages); 1677int security_vm_enough_memory_kern(long pages);
@@ -1681,6 +1683,7 @@ int security_bprm_secureexec(struct linux_binprm *bprm);
1681int security_sb_alloc(struct super_block *sb); 1683int security_sb_alloc(struct super_block *sb);
1682void security_sb_free(struct super_block *sb); 1684void security_sb_free(struct super_block *sb);
1683int security_sb_copy_data(char *orig, char *copy); 1685int security_sb_copy_data(char *orig, char *copy);
1686int security_sb_remount(struct super_block *sb, void *data);
1684int security_sb_kern_mount(struct super_block *sb, int flags, void *data); 1687int security_sb_kern_mount(struct super_block *sb, int flags, void *data);
1685int security_sb_show_options(struct seq_file *m, struct super_block *sb); 1688int security_sb_show_options(struct seq_file *m, struct super_block *sb);
1686int security_sb_statfs(struct dentry *dentry); 1689int security_sb_statfs(struct dentry *dentry);
@@ -1696,7 +1699,8 @@ int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts);
1696int security_inode_alloc(struct inode *inode); 1699int security_inode_alloc(struct inode *inode);
1697void security_inode_free(struct inode *inode); 1700void security_inode_free(struct inode *inode);
1698int security_inode_init_security(struct inode *inode, struct inode *dir, 1701int security_inode_init_security(struct inode *inode, struct inode *dir,
1699 char **name, void **value, size_t *len); 1702 const struct qstr *qstr, char **name,
1703 void **value, size_t *len);
1700int security_inode_create(struct inode *dir, struct dentry *dentry, int mode); 1704int security_inode_create(struct inode *dir, struct dentry *dentry, int mode);
1701int security_inode_link(struct dentry *old_dentry, struct inode *dir, 1705int security_inode_link(struct dentry *old_dentry, struct inode *dir,
1702 struct dentry *new_dentry); 1706 struct dentry *new_dentry);
@@ -1883,11 +1887,6 @@ int security_real_capable_noaudit(struct task_struct *tsk, int cap)
1883 return ret; 1887 return ret;
1884} 1888}
1885 1889
1886static inline int security_sysctl(struct ctl_table *table, int op)
1887{
1888 return 0;
1889}
1890
1891static inline int security_quotactl(int cmds, int type, int id, 1890static inline int security_quotactl(int cmds, int type, int id,
1892 struct super_block *sb) 1891 struct super_block *sb)
1893{ 1892{
@@ -1904,7 +1903,8 @@ static inline int security_syslog(int type)
1904 return 0; 1903 return 0;
1905} 1904}
1906 1905
1907static inline int security_settime(struct timespec *ts, struct timezone *tz) 1906static inline int security_settime(const struct timespec *ts,
1907 const struct timezone *tz)
1908{ 1908{
1909 return cap_settime(ts, tz); 1909 return cap_settime(ts, tz);
1910} 1910}
@@ -1964,6 +1964,11 @@ static inline int security_sb_copy_data(char *orig, char *copy)
1964 return 0; 1964 return 0;
1965} 1965}
1966 1966
1967static inline int security_sb_remount(struct super_block *sb, void *data)
1968{
1969 return 0;
1970}
1971
1967static inline int security_sb_kern_mount(struct super_block *sb, int flags, void *data) 1972static inline int security_sb_kern_mount(struct super_block *sb, int flags, void *data)
1968{ 1973{
1969 return 0; 1974 return 0;
@@ -2023,6 +2028,7 @@ static inline void security_inode_free(struct inode *inode)
2023 2028
2024static inline int security_inode_init_security(struct inode *inode, 2029static inline int security_inode_init_security(struct inode *inode,
2025 struct inode *dir, 2030 struct inode *dir,
2031 const struct qstr *qstr,
2026 char **name, 2032 char **name,
2027 void **value, 2033 void **value,
2028 size_t *len) 2034 size_t *len)
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 851b7783720d..73548eb13a5d 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -81,14 +81,6 @@ typedef struct spinlock {
81#define __SPIN_LOCK_UNLOCKED(lockname) \ 81#define __SPIN_LOCK_UNLOCKED(lockname) \
82 (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) 82 (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
83 83
84/*
85 * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence
86 * deprecated.
87 * Please use DEFINE_SPINLOCK() or __SPIN_LOCK_UNLOCKED() as
88 * appropriate.
89 */
90#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init)
91
92#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) 84#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
93 85
94#include <linux/rwlock_types.h> 86#include <linux/rwlock_types.h>
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 88513fd8e208..d81db8012c63 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -212,6 +212,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
212struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, 212struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
213 const struct rpc_call_ops *ops); 213 const struct rpc_call_ops *ops);
214void rpc_put_task(struct rpc_task *); 214void rpc_put_task(struct rpc_task *);
215void rpc_put_task_async(struct rpc_task *);
215void rpc_exit_task(struct rpc_task *); 216void rpc_exit_task(struct rpc_task *);
216void rpc_exit(struct rpc_task *, int); 217void rpc_exit(struct rpc_task *, int);
217void rpc_release_calldata(const struct rpc_call_ops *, void *); 218void rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 98664db1be47..1f5c18e6f4f1 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -62,6 +62,7 @@ struct robust_list_head;
62struct getcpu_cache; 62struct getcpu_cache;
63struct old_linux_dirent; 63struct old_linux_dirent;
64struct perf_event_attr; 64struct perf_event_attr;
65struct file_handle;
65 66
66#include <linux/types.h> 67#include <linux/types.h>
67#include <linux/aio_abi.h> 68#include <linux/aio_abi.h>
@@ -132,11 +133,11 @@ extern struct trace_event_functions exit_syscall_print_funcs;
132 .class = &event_class_syscall_enter, \ 133 .class = &event_class_syscall_enter, \
133 .event.funcs = &enter_syscall_print_funcs, \ 134 .event.funcs = &enter_syscall_print_funcs, \
134 .data = (void *)&__syscall_meta_##sname,\ 135 .data = (void *)&__syscall_meta_##sname,\
136 .flags = TRACE_EVENT_FL_CAP_ANY, \
135 }; \ 137 }; \
136 static struct ftrace_event_call __used \ 138 static struct ftrace_event_call __used \
137 __attribute__((section("_ftrace_events"))) \ 139 __attribute__((section("_ftrace_events"))) \
138 *__event_enter_##sname = &event_enter_##sname; \ 140 *__event_enter_##sname = &event_enter_##sname;
139 __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
140 141
141#define SYSCALL_TRACE_EXIT_EVENT(sname) \ 142#define SYSCALL_TRACE_EXIT_EVENT(sname) \
142 static struct syscall_metadata __syscall_meta_##sname; \ 143 static struct syscall_metadata __syscall_meta_##sname; \
@@ -146,11 +147,11 @@ extern struct trace_event_functions exit_syscall_print_funcs;
146 .class = &event_class_syscall_exit, \ 147 .class = &event_class_syscall_exit, \
147 .event.funcs = &exit_syscall_print_funcs, \ 148 .event.funcs = &exit_syscall_print_funcs, \
148 .data = (void *)&__syscall_meta_##sname,\ 149 .data = (void *)&__syscall_meta_##sname,\
150 .flags = TRACE_EVENT_FL_CAP_ANY, \
149 }; \ 151 }; \
150 static struct ftrace_event_call __used \ 152 static struct ftrace_event_call __used \
151 __attribute__((section("_ftrace_events"))) \ 153 __attribute__((section("_ftrace_events"))) \
152 *__event_exit_##sname = &event_exit_##sname; \ 154 *__event_exit_##sname = &event_exit_##sname;
153 __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
154 155
155#define SYSCALL_METADATA(sname, nb) \ 156#define SYSCALL_METADATA(sname, nb) \
156 SYSCALL_TRACE_ENTER_EVENT(sname); \ 157 SYSCALL_TRACE_ENTER_EVENT(sname); \
@@ -158,6 +159,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
158 static struct syscall_metadata __used \ 159 static struct syscall_metadata __used \
159 __syscall_meta_##sname = { \ 160 __syscall_meta_##sname = { \
160 .name = "sys"#sname, \ 161 .name = "sys"#sname, \
162 .syscall_nr = -1, /* Filled in at boot */ \
161 .nb_args = nb, \ 163 .nb_args = nb, \
162 .types = types_##sname, \ 164 .types = types_##sname, \
163 .args = args_##sname, \ 165 .args = args_##sname, \
@@ -175,6 +177,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
175 static struct syscall_metadata __used \ 177 static struct syscall_metadata __used \
176 __syscall_meta__##sname = { \ 178 __syscall_meta__##sname = { \
177 .name = "sys_"#sname, \ 179 .name = "sys_"#sname, \
180 .syscall_nr = -1, /* Filled in at boot */ \
178 .nb_args = 0, \ 181 .nb_args = 0, \
179 .enter_event = &event_enter__##sname, \ 182 .enter_event = &event_enter__##sname, \
180 .exit_event = &event_exit__##sname, \ 183 .exit_event = &event_exit__##sname, \
@@ -313,6 +316,8 @@ asmlinkage long sys_clock_settime(clockid_t which_clock,
313 const struct timespec __user *tp); 316 const struct timespec __user *tp);
314asmlinkage long sys_clock_gettime(clockid_t which_clock, 317asmlinkage long sys_clock_gettime(clockid_t which_clock,
315 struct timespec __user *tp); 318 struct timespec __user *tp);
319asmlinkage long sys_clock_adjtime(clockid_t which_clock,
320 struct timex __user *tx);
316asmlinkage long sys_clock_getres(clockid_t which_clock, 321asmlinkage long sys_clock_getres(clockid_t which_clock,
317 struct timespec __user *tp); 322 struct timespec __user *tp);
318asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags, 323asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags,
@@ -832,5 +837,10 @@ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
832 unsigned long prot, unsigned long flags, 837 unsigned long prot, unsigned long flags,
833 unsigned long fd, unsigned long pgoff); 838 unsigned long fd, unsigned long pgoff);
834asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); 839asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
835 840asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
841 struct file_handle __user *handle,
842 int __user *mnt_id, int flag);
843asmlinkage long sys_open_by_handle_at(int mountdirfd,
844 struct file_handle __user *handle,
845 int flags);
836#endif 846#endif
diff --git a/include/linux/syscore_ops.h b/include/linux/syscore_ops.h
new file mode 100644
index 000000000000..27b3b0bc41a9
--- /dev/null
+++ b/include/linux/syscore_ops.h
@@ -0,0 +1,29 @@
1/*
2 * syscore_ops.h - System core operations.
3 *
4 * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
5 *
6 * This file is released under the GPLv2.
7 */
8
9#ifndef _LINUX_SYSCORE_OPS_H
10#define _LINUX_SYSCORE_OPS_H
11
12#include <linux/list.h>
13
14struct syscore_ops {
15 struct list_head node;
16 int (*suspend)(void);
17 void (*resume)(void);
18 void (*shutdown)(void);
19};
20
21extern void register_syscore_ops(struct syscore_ops *ops);
22extern void unregister_syscore_ops(struct syscore_ops *ops);
23#ifdef CONFIG_PM_SLEEP
24extern int syscore_suspend(void);
25extern void syscore_resume(void);
26#endif
27extern void syscore_shutdown(void);
28
29#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7bb5cb64f3b8..11684d9e6bd2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -930,6 +930,7 @@ enum
930 930
931#ifdef __KERNEL__ 931#ifdef __KERNEL__
932#include <linux/list.h> 932#include <linux/list.h>
933#include <linux/rcupdate.h>
933 934
934/* For the /proc/sys support */ 935/* For the /proc/sys support */
935struct ctl_table; 936struct ctl_table;
@@ -1037,10 +1038,15 @@ struct ctl_table_root {
1037 struct ctl_table trees. */ 1038 struct ctl_table trees. */
1038struct ctl_table_header 1039struct ctl_table_header
1039{ 1040{
1040 struct ctl_table *ctl_table; 1041 union {
1041 struct list_head ctl_entry; 1042 struct {
1042 int used; 1043 struct ctl_table *ctl_table;
1043 int count; 1044 struct list_head ctl_entry;
1045 int used;
1046 int count;
1047 };
1048 struct rcu_head rcu;
1049 };
1044 struct completion *unregistering; 1050 struct completion *unregistering;
1045 struct ctl_table *ctl_table_arg; 1051 struct ctl_table *ctl_table_arg;
1046 struct ctl_table_root *root; 1052 struct ctl_table_root *root;
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 8651556dbd52..d3ec89fb4122 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -172,6 +172,14 @@ void thermal_zone_device_update(struct thermal_zone_device *);
172struct thermal_cooling_device *thermal_cooling_device_register(char *, void *, 172struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
173 const struct thermal_cooling_device_ops *); 173 const struct thermal_cooling_device_ops *);
174void thermal_cooling_device_unregister(struct thermal_cooling_device *); 174void thermal_cooling_device_unregister(struct thermal_cooling_device *);
175
176#ifdef CONFIG_NET
175extern int generate_netlink_event(u32 orig, enum events event); 177extern int generate_netlink_event(u32 orig, enum events event);
178#else
179static inline int generate_netlink_event(u32 orig, enum events event)
180{
181 return 0;
182}
183#endif
176 184
177#endif /* __THERMAL_H__ */ 185#endif /* __THERMAL_H__ */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index c90696544176..20fc303947d3 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -18,9 +18,6 @@ struct compat_timespec;
18struct restart_block { 18struct restart_block {
19 long (*fn)(struct restart_block *); 19 long (*fn)(struct restart_block *);
20 union { 20 union {
21 struct {
22 unsigned long arg0, arg1, arg2, arg3;
23 };
24 /* For futex_wait and futex_wait_requeue_pi */ 21 /* For futex_wait and futex_wait_requeue_pi */
25 struct { 22 struct {
26 u32 __user *uaddr; 23 u32 __user *uaddr;
diff --git a/include/linux/time.h b/include/linux/time.h
index 1e6d3b59238d..454a26205787 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -113,8 +113,6 @@ static inline struct timespec timespec_sub(struct timespec lhs,
113#define timespec_valid(ts) \ 113#define timespec_valid(ts) \
114 (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC)) 114 (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
115 115
116extern seqlock_t xtime_lock;
117
118extern void read_persistent_clock(struct timespec *ts); 116extern void read_persistent_clock(struct timespec *ts);
119extern void read_boot_clock(struct timespec *ts); 117extern void read_boot_clock(struct timespec *ts);
120extern int update_persistent_clock(struct timespec now); 118extern int update_persistent_clock(struct timespec now);
@@ -125,8 +123,9 @@ extern int timekeeping_suspended;
125unsigned long get_seconds(void); 123unsigned long get_seconds(void);
126struct timespec current_kernel_time(void); 124struct timespec current_kernel_time(void);
127struct timespec __current_kernel_time(void); /* does not take xtime_lock */ 125struct timespec __current_kernel_time(void); /* does not take xtime_lock */
128struct timespec __get_wall_to_monotonic(void); /* does not take xtime_lock */
129struct timespec get_monotonic_coarse(void); 126struct timespec get_monotonic_coarse(void);
127void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
128 struct timespec *wtom, struct timespec *sleep);
130 129
131#define CURRENT_TIME (current_kernel_time()) 130#define CURRENT_TIME (current_kernel_time())
132#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) 131#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 })
@@ -147,8 +146,9 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
147#endif 146#endif
148 147
149extern void do_gettimeofday(struct timeval *tv); 148extern void do_gettimeofday(struct timeval *tv);
150extern int do_settimeofday(struct timespec *tv); 149extern int do_settimeofday(const struct timespec *tv);
151extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); 150extern int do_sys_settimeofday(const struct timespec *tv,
151 const struct timezone *tz);
152#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) 152#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
153extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags); 153extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags);
154struct itimerval; 154struct itimerval;
@@ -162,12 +162,13 @@ extern void getnstime_raw_and_real(struct timespec *ts_raw,
162 struct timespec *ts_real); 162 struct timespec *ts_real);
163extern void getboottime(struct timespec *ts); 163extern void getboottime(struct timespec *ts);
164extern void monotonic_to_bootbased(struct timespec *ts); 164extern void monotonic_to_bootbased(struct timespec *ts);
165extern void get_monotonic_boottime(struct timespec *ts);
165 166
166extern struct timespec timespec_trunc(struct timespec t, unsigned gran); 167extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
167extern int timekeeping_valid_for_hres(void); 168extern int timekeeping_valid_for_hres(void);
168extern u64 timekeeping_max_deferment(void); 169extern u64 timekeeping_max_deferment(void);
169extern void update_wall_time(void);
170extern void timekeeping_leap_insert(int leapsecond); 170extern void timekeeping_leap_insert(int leapsecond);
171extern int timekeeping_inject_offset(struct timespec *ts);
171 172
172struct tms; 173struct tms;
173extern void do_sys_times(struct tms *); 174extern void do_sys_times(struct tms *);
@@ -292,6 +293,7 @@ struct itimerval {
292#define CLOCK_MONOTONIC_RAW 4 293#define CLOCK_MONOTONIC_RAW 4
293#define CLOCK_REALTIME_COARSE 5 294#define CLOCK_REALTIME_COARSE 5
294#define CLOCK_MONOTONIC_COARSE 6 295#define CLOCK_MONOTONIC_COARSE 6
296#define CLOCK_BOOTTIME 7
295 297
296/* 298/*
297 * The IDs of various hardware clocks: 299 * The IDs of various hardware clocks:
diff --git a/include/linux/timex.h b/include/linux/timex.h
index d23999f9499d..aa60fe7b6ed6 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -73,7 +73,7 @@ struct timex {
73 long tolerance; /* clock frequency tolerance (ppm) 73 long tolerance; /* clock frequency tolerance (ppm)
74 * (read only) 74 * (read only)
75 */ 75 */
76 struct timeval time; /* (read only) */ 76 struct timeval time; /* (read only, except for ADJ_SETOFFSET) */
77 long tick; /* (modified) usecs between clock ticks */ 77 long tick; /* (modified) usecs between clock ticks */
78 78
79 long ppsfreq; /* pps frequency (scaled ppm) (ro) */ 79 long ppsfreq; /* pps frequency (scaled ppm) (ro) */
@@ -102,6 +102,7 @@ struct timex {
102#define ADJ_STATUS 0x0010 /* clock status */ 102#define ADJ_STATUS 0x0010 /* clock status */
103#define ADJ_TIMECONST 0x0020 /* pll time constant */ 103#define ADJ_TIMECONST 0x0020 /* pll time constant */
104#define ADJ_TAI 0x0080 /* set TAI offset */ 104#define ADJ_TAI 0x0080 /* set TAI offset */
105#define ADJ_SETOFFSET 0x0100 /* add 'time' to current time */
105#define ADJ_MICRO 0x1000 /* select microsecond resolution */ 106#define ADJ_MICRO 0x1000 /* select microsecond resolution */
106#define ADJ_NANO 0x2000 /* select nanosecond resolution */ 107#define ADJ_NANO 0x2000 /* select nanosecond resolution */
107#define ADJ_TICK 0x4000 /* tick value */ 108#define ADJ_TICK 0x4000 /* tick value */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1ac11586a2f5..f584aba78ca9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -250,7 +250,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
250enum { 250enum {
251 WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ 251 WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */
252 WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ 252 WQ_UNBOUND = 1 << 1, /* not bound to any cpu */
253 WQ_FREEZEABLE = 1 << 2, /* freeze during suspend */ 253 WQ_FREEZABLE = 1 << 2, /* freeze during suspend */
254 WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ 254 WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */
255 WQ_HIGHPRI = 1 << 4, /* high priority */ 255 WQ_HIGHPRI = 1 << 4, /* high priority */
256 WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ 256 WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */
@@ -286,11 +286,15 @@ enum {
286 * any specific CPU, not concurrency managed, and all queued works are 286 * any specific CPU, not concurrency managed, and all queued works are
287 * executed immediately as long as max_active limit is not reached and 287 * executed immediately as long as max_active limit is not reached and
288 * resources are available. 288 * resources are available.
289 *
290 * system_freezable_wq is equivalent to system_wq except that it's
291 * freezable.
289 */ 292 */
290extern struct workqueue_struct *system_wq; 293extern struct workqueue_struct *system_wq;
291extern struct workqueue_struct *system_long_wq; 294extern struct workqueue_struct *system_long_wq;
292extern struct workqueue_struct *system_nrt_wq; 295extern struct workqueue_struct *system_nrt_wq;
293extern struct workqueue_struct *system_unbound_wq; 296extern struct workqueue_struct *system_unbound_wq;
297extern struct workqueue_struct *system_freezable_wq;
294 298
295extern struct workqueue_struct * 299extern struct workqueue_struct *
296__alloc_workqueue_key(const char *name, unsigned int flags, int max_active, 300__alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
@@ -318,7 +322,7 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
318/** 322/**
319 * alloc_ordered_workqueue - allocate an ordered workqueue 323 * alloc_ordered_workqueue - allocate an ordered workqueue
320 * @name: name of the workqueue 324 * @name: name of the workqueue
321 * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_MEM_RECLAIM are meaningful) 325 * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
322 * 326 *
323 * Allocate an ordered workqueue. An ordered workqueue executes at 327 * Allocate an ordered workqueue. An ordered workqueue executes at
324 * most one work item at any given time in the queued order. They are 328 * most one work item at any given time in the queued order. They are
@@ -335,8 +339,8 @@ alloc_ordered_workqueue(const char *name, unsigned int flags)
335 339
336#define create_workqueue(name) \ 340#define create_workqueue(name) \
337 alloc_workqueue((name), WQ_MEM_RECLAIM, 1) 341 alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
338#define create_freezeable_workqueue(name) \ 342#define create_freezable_workqueue(name) \
339 alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1) 343 alloc_workqueue((name), WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
340#define create_singlethread_workqueue(name) \ 344#define create_singlethread_workqueue(name) \
341 alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1) 345 alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
342 346
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index e6131ef98d8f..6050783005bd 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -42,11 +42,13 @@
42#define XATTR_SMACK_IPOUT "SMACK64IPOUT" 42#define XATTR_SMACK_IPOUT "SMACK64IPOUT"
43#define XATTR_SMACK_EXEC "SMACK64EXEC" 43#define XATTR_SMACK_EXEC "SMACK64EXEC"
44#define XATTR_SMACK_TRANSMUTE "SMACK64TRANSMUTE" 44#define XATTR_SMACK_TRANSMUTE "SMACK64TRANSMUTE"
45#define XATTR_SMACK_MMAP "SMACK64MMAP"
45#define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX 46#define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
46#define XATTR_NAME_SMACKIPIN XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN 47#define XATTR_NAME_SMACKIPIN XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
47#define XATTR_NAME_SMACKIPOUT XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT 48#define XATTR_NAME_SMACKIPOUT XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT
48#define XATTR_NAME_SMACKEXEC XATTR_SECURITY_PREFIX XATTR_SMACK_EXEC 49#define XATTR_NAME_SMACKEXEC XATTR_SECURITY_PREFIX XATTR_SMACK_EXEC
49#define XATTR_NAME_SMACKTRANSMUTE XATTR_SECURITY_PREFIX XATTR_SMACK_TRANSMUTE 50#define XATTR_NAME_SMACKTRANSMUTE XATTR_SECURITY_PREFIX XATTR_SMACK_TRANSMUTE
51#define XATTR_NAME_SMACKMMAP XATTR_SECURITY_PREFIX XATTR_SMACK_MMAP
50 52
51#define XATTR_CAPS_SUFFIX "capability" 53#define XATTR_CAPS_SUFFIX "capability"
52#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX 54#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 071fd7a8d781..6b75a6971346 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -139,6 +139,8 @@ do { \
139 */ 139 */
140 140
141enum p9_msg_t { 141enum p9_msg_t {
142 P9_TSYNCFS = 0,
143 P9_RSYNCFS,
142 P9_TLERROR = 6, 144 P9_TLERROR = 6,
143 P9_RLERROR, 145 P9_RLERROR,
144 P9_TSTATFS = 8, 146 P9_TSTATFS = 8,
@@ -688,7 +690,11 @@ struct p9_rwstat {
688 * @id: protocol operating identifier of type &p9_msg_t 690 * @id: protocol operating identifier of type &p9_msg_t
689 * @tag: transaction id of the request 691 * @tag: transaction id of the request
690 * @offset: used by marshalling routines to track currentposition in buffer 692 * @offset: used by marshalling routines to track currentposition in buffer
691 * @capacity: used by marshalling routines to track total capacity 693 * @capacity: used by marshalling routines to track total malloc'd capacity
694 * @pubuf: Payload user buffer given by the caller
695 * @pubuf: Payload kernel buffer given by the caller
696 * @pbuf_size: pubuf/pkbuf(only one will be !NULL) size to be read/write.
697 * @private: For transport layer's use.
692 * @sdata: payload 698 * @sdata: payload
693 * 699 *
694 * &p9_fcall represents the structure for all 9P RPC 700 * &p9_fcall represents the structure for all 9P RPC
@@ -705,6 +711,10 @@ struct p9_fcall {
705 711
706 size_t offset; 712 size_t offset;
707 size_t capacity; 713 size_t capacity;
714 char __user *pubuf;
715 char *pkbuf;
716 size_t pbuf_size;
717 void *private;
708 718
709 uint8_t *sdata; 719 uint8_t *sdata;
710}; 720};
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 83ba6a4d58a3..0a30977e3c1f 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -230,6 +230,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
230 gid_t gid, struct p9_qid *qid); 230 gid_t gid, struct p9_qid *qid);
231int p9_client_clunk(struct p9_fid *fid); 231int p9_client_clunk(struct p9_fid *fid);
232int p9_client_fsync(struct p9_fid *fid, int datasync); 232int p9_client_fsync(struct p9_fid *fid, int datasync);
233int p9_client_sync_fs(struct p9_fid *fid);
233int p9_client_remove(struct p9_fid *fid); 234int p9_client_remove(struct p9_fid *fid);
234int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, 235int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
235 u64 offset, u32 count); 236 u64 offset, u32 count);
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 6d5886efb102..82868f18c573 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -26,11 +26,19 @@
26#ifndef NET_9P_TRANSPORT_H 26#ifndef NET_9P_TRANSPORT_H
27#define NET_9P_TRANSPORT_H 27#define NET_9P_TRANSPORT_H
28 28
29#define P9_TRANS_PREF_PAYLOAD_MASK 0x1
30
31/* Default. Add Payload to PDU before sending it down to transport layer */
32#define P9_TRANS_PREF_PAYLOAD_DEF 0x0
33/* Send pay load seperately to transport layer along with PDU.*/
34#define P9_TRANS_PREF_PAYLOAD_SEP 0x1
35
29/** 36/**
30 * struct p9_trans_module - transport module interface 37 * struct p9_trans_module - transport module interface
31 * @list: used to maintain a list of currently available transports 38 * @list: used to maintain a list of currently available transports
32 * @name: the human-readable name of the transport 39 * @name: the human-readable name of the transport
33 * @maxsize: transport provided maximum packet size 40 * @maxsize: transport provided maximum packet size
41 * @pref: Preferences of this transport
34 * @def: set if this transport should be considered the default 42 * @def: set if this transport should be considered the default
35 * @create: member function to create a new connection on this transport 43 * @create: member function to create a new connection on this transport
36 * @request: member function to issue a request to the transport 44 * @request: member function to issue a request to the transport
@@ -47,6 +55,7 @@ struct p9_trans_module {
47 struct list_head list; 55 struct list_head list;
48 char *name; /* name of transport */ 56 char *name; /* name of transport */
49 int maxsize; /* max message size of transport */ 57 int maxsize; /* max message size of transport */
58 int pref; /* Preferences of this transport */
50 int def; /* this transport should be default */ 59 int def; /* this transport should be default */
51 struct module *owner; 60 struct module *owner;
52 int (*create)(struct p9_client *, const char *, char *); 61 int (*create)(struct p9_client *, const char *, char *);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4a3cd2cd2f5e..96e50e0ce3ca 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -89,6 +89,18 @@
89#define IPV6_ADDR_SCOPE_GLOBAL 0x0e 89#define IPV6_ADDR_SCOPE_GLOBAL 0x0e
90 90
91/* 91/*
92 * Addr flags
93 */
94#ifdef __KERNEL__
95#define IPV6_ADDR_MC_FLAG_TRANSIENT(a) \
96 ((a)->s6_addr[1] & 0x10)
97#define IPV6_ADDR_MC_FLAG_PREFIX(a) \
98 ((a)->s6_addr[1] & 0x20)
99#define IPV6_ADDR_MC_FLAG_RENDEZVOUS(a) \
100 ((a)->s6_addr[1] & 0x40)
101#endif
102
103/*
92 * fragmentation header 104 * fragmentation header
93 */ 105 */
94 106
diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
index cd85b3bc8327..e505358d8999 100644
--- a/include/net/netfilter/nf_tproxy_core.h
+++ b/include/net/netfilter/nf_tproxy_core.h
@@ -201,18 +201,8 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
201} 201}
202#endif 202#endif
203 203
204static inline void
205nf_tproxy_put_sock(struct sock *sk)
206{
207 /* TIME_WAIT inet sockets have to be handled differently */
208 if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT))
209 inet_twsk_put(inet_twsk(sk));
210 else
211 sock_put(sk);
212}
213
214/* assign a socket to the skb -- consumes sk */ 204/* assign a socket to the skb -- consumes sk */
215int 205void
216nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk); 206nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk);
217 207
218#endif 208#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 160a407c1963..04f8556313d5 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -199,7 +199,7 @@ struct tcf_proto {
199 199
200struct qdisc_skb_cb { 200struct qdisc_skb_cb {
201 unsigned int pkt_len; 201 unsigned int pkt_len;
202 char data[]; 202 long data[];
203}; 203};
204 204
205static inline int qdisc_qlen(struct Qdisc *q) 205static inline int qdisc_qlen(struct Qdisc *q)
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 8479b66c067b..3fd5064dd43a 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -261,6 +261,7 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev);
261#define CONF_ENABLE_ESR 0x0008 261#define CONF_ENABLE_ESR 0x0008
262#define CONF_ENABLE_IOCARD 0x0010 /* auto-enabled if IO resources or IRQ 262#define CONF_ENABLE_IOCARD 0x0010 /* auto-enabled if IO resources or IRQ
263 * (CONF_ENABLE_IRQ) in use */ 263 * (CONF_ENABLE_IRQ) in use */
264#define CONF_ENABLE_ZVCARD 0x0020
264 265
265/* flags used by pcmcia_loop_config() autoconfiguration */ 266/* flags used by pcmcia_loop_config() autoconfiguration */
266#define CONF_AUTO_CHECK_VCC 0x0100 /* check for matching Vcc? */ 267#define CONF_AUTO_CHECK_VCC 0x0100 /* check for matching Vcc? */
diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h
index c583193ae929..9c159f74c6d0 100644
--- a/include/scsi/sas_ata.h
+++ b/include/scsi/sas_ata.h
@@ -39,6 +39,11 @@ int sas_ata_init_host_and_port(struct domain_device *found_dev,
39 struct scsi_target *starget); 39 struct scsi_target *starget);
40 40
41void sas_ata_task_abort(struct sas_task *task); 41void sas_ata_task_abort(struct sas_task *task);
42void sas_ata_strategy_handler(struct Scsi_Host *shost);
43int sas_ata_timed_out(struct scsi_cmnd *cmd, struct sas_task *task,
44 enum blk_eh_timer_return *rtn);
45int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
46 struct list_head *done_q);
42 47
43#else 48#else
44 49
@@ -55,6 +60,23 @@ static inline int sas_ata_init_host_and_port(struct domain_device *found_dev,
55static inline void sas_ata_task_abort(struct sas_task *task) 60static inline void sas_ata_task_abort(struct sas_task *task)
56{ 61{
57} 62}
63
64static inline void sas_ata_strategy_handler(struct Scsi_Host *shost)
65{
66}
67
68static inline int sas_ata_timed_out(struct scsi_cmnd *cmd,
69 struct sas_task *task,
70 enum blk_eh_timer_return *rtn)
71{
72 return 0;
73}
74static inline int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
75 struct list_head *done_q)
76{
77 return 0;
78}
79
58#endif 80#endif
59 81
60#endif /* _SAS_ATA_H_ */ 82#endif /* _SAS_ATA_H_ */
diff --git a/include/sound/wm8903.h b/include/sound/wm8903.h
index b4a0db2307ef..1eeebd534f7e 100644
--- a/include/sound/wm8903.h
+++ b/include/sound/wm8903.h
@@ -17,13 +17,9 @@
17/* 17/*
18 * R6 (0x06) - Mic Bias Control 0 18 * R6 (0x06) - Mic Bias Control 0
19 */ 19 */
20#define WM8903_MICDET_HYST_ENA 0x0080 /* MICDET_HYST_ENA */ 20#define WM8903_MICDET_THR_MASK 0x0030 /* MICDET_THR - [5:4] */
21#define WM8903_MICDET_HYST_ENA_MASK 0x0080 /* MICDET_HYST_ENA */ 21#define WM8903_MICDET_THR_SHIFT 4 /* MICDET_THR - [5:4] */
22#define WM8903_MICDET_HYST_ENA_SHIFT 7 /* MICDET_HYST_ENA */ 22#define WM8903_MICDET_THR_WIDTH 2 /* MICDET_THR - [5:4] */
23#define WM8903_MICDET_HYST_ENA_WIDTH 1 /* MICDET_HYST_ENA */
24#define WM8903_MICDET_THR_MASK 0x0070 /* MICDET_THR - [6:4] */
25#define WM8903_MICDET_THR_SHIFT 4 /* MICDET_THR - [6:4] */
26#define WM8903_MICDET_THR_WIDTH 3 /* MICDET_THR - [6:4] */
27#define WM8903_MICSHORT_THR_MASK 0x000C /* MICSHORT_THR - [3:2] */ 23#define WM8903_MICSHORT_THR_MASK 0x000C /* MICSHORT_THR - [3:2] */
28#define WM8903_MICSHORT_THR_SHIFT 2 /* MICSHORT_THR - [3:2] */ 24#define WM8903_MICSHORT_THR_SHIFT 2 /* MICSHORT_THR - [3:2] */
29#define WM8903_MICSHORT_THR_WIDTH 2 /* MICSHORT_THR - [3:2] */ 25#define WM8903_MICSHORT_THR_WIDTH 2 /* MICSHORT_THR - [3:2] */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 07fdfb6b9a9a..0828b6c8610a 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -8,7 +8,6 @@
8#include <scsi/scsi_cmnd.h> 8#include <scsi/scsi_cmnd.h>
9#include <net/sock.h> 9#include <net/sock.h>
10#include <net/tcp.h> 10#include <net/tcp.h>
11#include "target_core_mib.h"
12 11
13#define TARGET_CORE_MOD_VERSION "v4.0.0-rc6" 12#define TARGET_CORE_MOD_VERSION "v4.0.0-rc6"
14#define SHUTDOWN_SIGS (sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGABRT)) 13#define SHUTDOWN_SIGS (sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGABRT))
@@ -195,6 +194,21 @@ typedef enum {
195 SAM_TASK_ATTR_EMULATED 194 SAM_TASK_ATTR_EMULATED
196} t10_task_attr_index_t; 195} t10_task_attr_index_t;
197 196
197/*
198 * Used for target SCSI statistics
199 */
200typedef enum {
201 SCSI_INST_INDEX,
202 SCSI_DEVICE_INDEX,
203 SCSI_AUTH_INTR_INDEX,
204 SCSI_INDEX_TYPE_MAX
205} scsi_index_t;
206
207struct scsi_index_table {
208 spinlock_t lock;
209 u32 scsi_mib_index[SCSI_INDEX_TYPE_MAX];
210} ____cacheline_aligned;
211
198struct se_cmd; 212struct se_cmd;
199 213
200struct t10_alua { 214struct t10_alua {
@@ -578,8 +592,6 @@ struct se_node_acl {
578 spinlock_t stats_lock; 592 spinlock_t stats_lock;
579 /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ 593 /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
580 atomic_t acl_pr_ref_count; 594 atomic_t acl_pr_ref_count;
581 /* Used for MIB access */
582 atomic_t mib_ref_count;
583 struct se_dev_entry *device_list; 595 struct se_dev_entry *device_list;
584 struct se_session *nacl_sess; 596 struct se_session *nacl_sess;
585 struct se_portal_group *se_tpg; 597 struct se_portal_group *se_tpg;
@@ -595,8 +607,6 @@ struct se_node_acl {
595} ____cacheline_aligned; 607} ____cacheline_aligned;
596 608
597struct se_session { 609struct se_session {
598 /* Used for MIB access */
599 atomic_t mib_ref_count;
600 u64 sess_bin_isid; 610 u64 sess_bin_isid;
601 struct se_node_acl *se_node_acl; 611 struct se_node_acl *se_node_acl;
602 struct se_portal_group *se_tpg; 612 struct se_portal_group *se_tpg;
@@ -806,7 +816,6 @@ struct se_hba {
806 /* Virtual iSCSI devices attached. */ 816 /* Virtual iSCSI devices attached. */
807 u32 dev_count; 817 u32 dev_count;
808 u32 hba_index; 818 u32 hba_index;
809 atomic_t dev_mib_access_count;
810 atomic_t load_balance_queue; 819 atomic_t load_balance_queue;
811 atomic_t left_queue_depth; 820 atomic_t left_queue_depth;
812 /* Maximum queue depth the HBA can handle. */ 821 /* Maximum queue depth the HBA can handle. */
@@ -845,6 +854,12 @@ struct se_lun {
845 854
846#define SE_LUN(c) ((struct se_lun *)(c)->se_lun) 855#define SE_LUN(c) ((struct se_lun *)(c)->se_lun)
847 856
857struct scsi_port_stats {
858 u64 cmd_pdus;
859 u64 tx_data_octets;
860 u64 rx_data_octets;
861} ____cacheline_aligned;
862
848struct se_port { 863struct se_port {
849 /* RELATIVE TARGET PORT IDENTIFER */ 864 /* RELATIVE TARGET PORT IDENTIFER */
850 u16 sep_rtpi; 865 u16 sep_rtpi;
@@ -867,6 +882,7 @@ struct se_port {
867} ____cacheline_aligned; 882} ____cacheline_aligned;
868 883
869struct se_tpg_np { 884struct se_tpg_np {
885 struct se_portal_group *tpg_np_parent;
870 struct config_group tpg_np_group; 886 struct config_group tpg_np_group;
871} ____cacheline_aligned; 887} ____cacheline_aligned;
872 888
diff --git a/include/target/target_core_transport.h b/include/target/target_core_transport.h
index 66f44e56eb80..2e8ec51f0615 100644
--- a/include/target/target_core_transport.h
+++ b/include/target/target_core_transport.h
@@ -111,6 +111,8 @@ struct se_subsystem_api;
111 111
112extern int init_se_global(void); 112extern int init_se_global(void);
113extern void release_se_global(void); 113extern void release_se_global(void);
114extern void init_scsi_index_table(void);
115extern u32 scsi_get_new_index(scsi_index_t);
114extern void transport_init_queue_obj(struct se_queue_obj *); 116extern void transport_init_queue_obj(struct se_queue_obj *);
115extern int transport_subsystem_check_init(void); 117extern int transport_subsystem_check_init(void);
116extern int transport_subsystem_register(struct se_subsystem_api *); 118extern int transport_subsystem_register(struct se_subsystem_api *);
@@ -133,6 +135,8 @@ extern void transport_complete_task(struct se_task *, int);
133extern void transport_add_task_to_execute_queue(struct se_task *, 135extern void transport_add_task_to_execute_queue(struct se_task *,
134 struct se_task *, 136 struct se_task *,
135 struct se_device *); 137 struct se_device *);
138extern void transport_remove_task_from_execute_queue(struct se_task *,
139 struct se_device *);
136unsigned char *transport_dump_cmd_direction(struct se_cmd *); 140unsigned char *transport_dump_cmd_direction(struct se_cmd *);
137extern void transport_dump_dev_state(struct se_device *, char *, int *); 141extern void transport_dump_dev_state(struct se_device *, char *, int *);
138extern void transport_dump_dev_info(struct se_device *, struct se_lun *, 142extern void transport_dump_dev_info(struct se_device *, struct se_lun *,
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index aba421d68f6f..78f18adb49c8 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -31,7 +31,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
31 0 : blk_rq_sectors(rq); 31 0 : blk_rq_sectors(rq);
32 __entry->errors = rq->errors; 32 __entry->errors = rq->errors;
33 33
34 blk_fill_rwbs_rq(__entry->rwbs, rq); 34 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
35 blk_dump_cmd(__get_str(cmd), rq); 35 blk_dump_cmd(__get_str(cmd), rq);
36 ), 36 ),
37 37
@@ -118,7 +118,7 @@ DECLARE_EVENT_CLASS(block_rq,
118 __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 118 __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
119 blk_rq_bytes(rq) : 0; 119 blk_rq_bytes(rq) : 0;
120 120
121 blk_fill_rwbs_rq(__entry->rwbs, rq); 121 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
122 blk_dump_cmd(__get_str(cmd), rq); 122 blk_dump_cmd(__get_str(cmd), rq);
123 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 123 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
124 ), 124 ),
@@ -563,7 +563,7 @@ TRACE_EVENT(block_rq_remap,
563 __entry->nr_sector = blk_rq_sectors(rq); 563 __entry->nr_sector = blk_rq_sectors(rq);
564 __entry->old_dev = dev; 564 __entry->old_dev = dev;
565 __entry->old_sector = from; 565 __entry->old_sector = from;
566 blk_fill_rwbs_rq(__entry->rwbs, rq); 566 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
567 ), 567 ),
568 568
569 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", 569 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 7eee77895cb3..4cbbcef6baa8 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -17,36 +17,36 @@ TRACE_EVENT(mce_record,
17 TP_STRUCT__entry( 17 TP_STRUCT__entry(
18 __field( u64, mcgcap ) 18 __field( u64, mcgcap )
19 __field( u64, mcgstatus ) 19 __field( u64, mcgstatus )
20 __field( u8, bank )
21 __field( u64, status ) 20 __field( u64, status )
22 __field( u64, addr ) 21 __field( u64, addr )
23 __field( u64, misc ) 22 __field( u64, misc )
24 __field( u64, ip ) 23 __field( u64, ip )
25 __field( u8, cs )
26 __field( u64, tsc ) 24 __field( u64, tsc )
27 __field( u64, walltime ) 25 __field( u64, walltime )
28 __field( u32, cpu ) 26 __field( u32, cpu )
29 __field( u32, cpuid ) 27 __field( u32, cpuid )
30 __field( u32, apicid ) 28 __field( u32, apicid )
31 __field( u32, socketid ) 29 __field( u32, socketid )
30 __field( u8, cs )
31 __field( u8, bank )
32 __field( u8, cpuvendor ) 32 __field( u8, cpuvendor )
33 ), 33 ),
34 34
35 TP_fast_assign( 35 TP_fast_assign(
36 __entry->mcgcap = m->mcgcap; 36 __entry->mcgcap = m->mcgcap;
37 __entry->mcgstatus = m->mcgstatus; 37 __entry->mcgstatus = m->mcgstatus;
38 __entry->bank = m->bank;
39 __entry->status = m->status; 38 __entry->status = m->status;
40 __entry->addr = m->addr; 39 __entry->addr = m->addr;
41 __entry->misc = m->misc; 40 __entry->misc = m->misc;
42 __entry->ip = m->ip; 41 __entry->ip = m->ip;
43 __entry->cs = m->cs;
44 __entry->tsc = m->tsc; 42 __entry->tsc = m->tsc;
45 __entry->walltime = m->time; 43 __entry->walltime = m->time;
46 __entry->cpu = m->extcpu; 44 __entry->cpu = m->extcpu;
47 __entry->cpuid = m->cpuid; 45 __entry->cpuid = m->cpuid;
48 __entry->apicid = m->apicid; 46 __entry->apicid = m->apicid;
49 __entry->socketid = m->socketid; 47 __entry->socketid = m->socketid;
48 __entry->cs = m->cs;
49 __entry->bank = m->bank;
50 __entry->cpuvendor = m->cpuvendor; 50 __entry->cpuvendor = m->cpuvendor;
51 ), 51 ),
52 52
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index c6bae36547e5..21a546d27c0c 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -108,14 +108,14 @@ TRACE_EVENT(module_request,
108 TP_ARGS(name, wait, ip), 108 TP_ARGS(name, wait, ip),
109 109
110 TP_STRUCT__entry( 110 TP_STRUCT__entry(
111 __field( bool, wait )
112 __field( unsigned long, ip ) 111 __field( unsigned long, ip )
112 __field( bool, wait )
113 __string( name, name ) 113 __string( name, name )
114 ), 114 ),
115 115
116 TP_fast_assign( 116 TP_fast_assign(
117 __entry->wait = wait;
118 __entry->ip = ip; 117 __entry->ip = ip;
118 __entry->wait = wait;
119 __assign_str(name, name); 119 __assign_str(name, name);
120 ), 120 ),
121 121
@@ -129,4 +129,3 @@ TRACE_EVENT(module_request,
129 129
130/* This part must be outside protection */ 130/* This part must be outside protection */
131#include <trace/define_trace.h> 131#include <trace/define_trace.h>
132
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index f10293c41b1e..0c68ae22da22 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -19,14 +19,14 @@ TRACE_EVENT(kfree_skb,
19 19
20 TP_STRUCT__entry( 20 TP_STRUCT__entry(
21 __field( void *, skbaddr ) 21 __field( void *, skbaddr )
22 __field( unsigned short, protocol )
23 __field( void *, location ) 22 __field( void *, location )
23 __field( unsigned short, protocol )
24 ), 24 ),
25 25
26 TP_fast_assign( 26 TP_fast_assign(
27 __entry->skbaddr = skb; 27 __entry->skbaddr = skb;
28 __entry->protocol = ntohs(skb->protocol);
29 __entry->location = location; 28 __entry->location = location;
29 __entry->protocol = ntohs(skb->protocol);
30 ), 30 ),
31 31
32 TP_printk("skbaddr=%p protocol=%u location=%p", 32 TP_printk("skbaddr=%p protocol=%u location=%p",
diff --git a/include/xen/events.h b/include/xen/events.h
index 00f53ddcc062..962da2ced5b4 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -75,11 +75,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
75int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); 75int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
76 76
77#ifdef CONFIG_PCI_MSI 77#ifdef CONFIG_PCI_MSI
78/* Allocate an irq and a pirq to be used with MSIs. */ 78int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
79#define XEN_ALLOC_PIRQ (1 << 0) 79int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
80#define XEN_ALLOC_IRQ (1 << 1) 80 int pirq, int vector, const char *name);
81void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask);
82int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
83#endif 81#endif
84 82
85/* De-allocates the above mentioned physical interrupt. */ 83/* De-allocates the above mentioned physical interrupt. */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index c2d1fa4dc1ee..61e523af3c46 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -51,11 +51,7 @@ typedef uint64_t blkif_sector_t;
51 */ 51 */
52#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 52#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
53 53
54struct blkif_request { 54struct blkif_request_rw {
55 uint8_t operation; /* BLKIF_OP_??? */
56 uint8_t nr_segments; /* number of segments */
57 blkif_vdev_t handle; /* only for read/write requests */
58 uint64_t id; /* private guest value, echoed in resp */
59 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 55 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
60 struct blkif_request_segment { 56 struct blkif_request_segment {
61 grant_ref_t gref; /* reference to I/O buffer frame */ 57 grant_ref_t gref; /* reference to I/O buffer frame */
@@ -65,6 +61,16 @@ struct blkif_request {
65 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 61 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
66}; 62};
67 63
64struct blkif_request {
65 uint8_t operation; /* BLKIF_OP_??? */
66 uint8_t nr_segments; /* number of segments */
67 blkif_vdev_t handle; /* only for read/write requests */
68 uint64_t id; /* private guest value, echoed in resp */
69 union {
70 struct blkif_request_rw rw;
71 } u;
72};
73
68struct blkif_response { 74struct blkif_response {
69 uint64_t id; /* copied from request */ 75 uint64_t id; /* copied from request */
70 uint8_t operation; /* copied from request */ 76 uint8_t operation; /* copied from request */
@@ -91,4 +97,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
91#define VDISK_REMOVABLE 0x2 97#define VDISK_REMOVABLE 0x2
92#define VDISK_READONLY 0x4 98#define VDISK_READONLY 0x4
93 99
100/* Xen-defined major numbers for virtual disks, they look strangely
101 * familiar */
102#define XEN_IDE0_MAJOR 3
103#define XEN_IDE1_MAJOR 22
104#define XEN_SCSI_DISK0_MAJOR 8
105#define XEN_SCSI_DISK1_MAJOR 65
106#define XEN_SCSI_DISK2_MAJOR 66
107#define XEN_SCSI_DISK3_MAJOR 67
108#define XEN_SCSI_DISK4_MAJOR 68
109#define XEN_SCSI_DISK5_MAJOR 69
110#define XEN_SCSI_DISK6_MAJOR 70
111#define XEN_SCSI_DISK7_MAJOR 71
112#define XEN_SCSI_DISK8_MAJOR 128
113#define XEN_SCSI_DISK9_MAJOR 129
114#define XEN_SCSI_DISK10_MAJOR 130
115#define XEN_SCSI_DISK11_MAJOR 131
116#define XEN_SCSI_DISK12_MAJOR 132
117#define XEN_SCSI_DISK13_MAJOR 133
118#define XEN_SCSI_DISK14_MAJOR 134
119#define XEN_SCSI_DISK15_MAJOR 135
120
94#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ 121#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e2f1bc..b33257bc7e83 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -30,7 +30,7 @@
30#define __HYPERVISOR_stack_switch 3 30#define __HYPERVISOR_stack_switch 3
31#define __HYPERVISOR_set_callbacks 4 31#define __HYPERVISOR_set_callbacks 4
32#define __HYPERVISOR_fpu_taskswitch 5 32#define __HYPERVISOR_fpu_taskswitch 5
33#define __HYPERVISOR_sched_op 6 33#define __HYPERVISOR_sched_op_compat 6
34#define __HYPERVISOR_dom0_op 7 34#define __HYPERVISOR_dom0_op 7
35#define __HYPERVISOR_set_debugreg 8 35#define __HYPERVISOR_set_debugreg 8
36#define __HYPERVISOR_get_debugreg 9 36#define __HYPERVISOR_get_debugreg 9
@@ -52,7 +52,7 @@
52#define __HYPERVISOR_mmuext_op 26 52#define __HYPERVISOR_mmuext_op 26
53#define __HYPERVISOR_acm_op 27 53#define __HYPERVISOR_acm_op 27
54#define __HYPERVISOR_nmi_op 28 54#define __HYPERVISOR_nmi_op 28
55#define __HYPERVISOR_sched_op_new 29 55#define __HYPERVISOR_sched_op 29
56#define __HYPERVISOR_callback_op 30 56#define __HYPERVISOR_callback_op 30
57#define __HYPERVISOR_xenoprof_op 31 57#define __HYPERVISOR_xenoprof_op 31
58#define __HYPERVISOR_event_channel_op 32 58#define __HYPERVISOR_event_channel_op 32
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 98b92154a264..03c85d7387fb 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,9 +5,9 @@
5 5
6DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); 6DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
7 7
8void xen_pre_suspend(void); 8void xen_arch_pre_suspend(void);
9void xen_post_suspend(int suspend_cancelled); 9void xen_arch_post_suspend(int suspend_cancelled);
10void xen_hvm_post_suspend(int suspend_cancelled); 10void xen_arch_hvm_post_suspend(int suspend_cancelled);
11 11
12void xen_mm_pin_all(void); 12void xen_mm_pin_all(void);
13void xen_mm_unpin_all(void); 13void xen_mm_unpin_all(void);
diff --git a/init/Kconfig b/init/Kconfig
index be788c0957d4..5721d27af626 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -287,6 +287,18 @@ config BSD_PROCESS_ACCT_V3
287 for processing it. A preliminary version of these tools is available 287 for processing it. A preliminary version of these tools is available
288 at <http://www.gnu.org/software/acct/>. 288 at <http://www.gnu.org/software/acct/>.
289 289
290config FHANDLE
291 bool "open by fhandle syscalls"
292 select EXPORTFS
293 help
294 If you say Y here, a user level program will be able to map
295 file names to handle and then later use the handle for
296 different file system operations. This is useful in implementing
297 userspace file servers, which now track files using handles instead
298 of names. The handle would remain the same even if file names
299 get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
300 syscalls.
301
290config TASKSTATS 302config TASKSTATS
291 bool "Export task/process statistics through netlink (EXPERIMENTAL)" 303 bool "Export task/process statistics through netlink (EXPERIMENTAL)"
292 depends on NET 304 depends on NET
@@ -683,6 +695,16 @@ config CGROUP_MEM_RES_CTLR_SWAP_ENABLED
683 select this option (if, for some reason, they need to disable it 695 select this option (if, for some reason, they need to disable it
684 then noswapaccount does the trick). 696 then noswapaccount does the trick).
685 697
698config CGROUP_PERF
699 bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
700 depends on PERF_EVENTS && CGROUPS
701 help
702 This option extends the per-cpu mode to restrict monitoring to
703 threads which belong to the cgroup specified and run on the
704 designated cpu.
705
706 Say N if unsure.
707
686menuconfig CGROUP_SCHED 708menuconfig CGROUP_SCHED
687 bool "Group CPU scheduler" 709 bool "Group CPU scheduler"
688 depends on EXPERIMENTAL 710 depends on EXPERIMENTAL
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d2e3c7866460..e683869365d9 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -144,9 +144,9 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
144} 144}
145 145
146/* Initialize a parent watch entry. */ 146/* Initialize a parent watch entry. */
147static struct audit_parent *audit_init_parent(struct nameidata *ndp) 147static struct audit_parent *audit_init_parent(struct path *path)
148{ 148{
149 struct inode *inode = ndp->path.dentry->d_inode; 149 struct inode *inode = path->dentry->d_inode;
150 struct audit_parent *parent; 150 struct audit_parent *parent;
151 int ret; 151 int ret;
152 152
@@ -353,53 +353,40 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
353} 353}
354 354
355/* Get path information necessary for adding watches. */ 355/* Get path information necessary for adding watches. */
356static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw) 356static int audit_get_nd(struct audit_watch *watch, struct path *parent)
357{ 357{
358 struct nameidata *ndparent, *ndwatch; 358 struct nameidata nd;
359 struct dentry *d;
359 int err; 360 int err;
360 361
361 ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL); 362 err = kern_path_parent(watch->path, &nd);
362 if (unlikely(!ndparent)) 363 if (err)
363 return -ENOMEM; 364 return err;
364 365
365 ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL); 366 if (nd.last_type != LAST_NORM) {
366 if (unlikely(!ndwatch)) { 367 path_put(&nd.path);
367 kfree(ndparent); 368 return -EINVAL;
368 return -ENOMEM;
369 } 369 }
370 370
371 err = path_lookup(path, LOOKUP_PARENT, ndparent); 371 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
372 if (err) { 372 d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
373 kfree(ndparent); 373 if (IS_ERR(d)) {
374 kfree(ndwatch); 374 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
375 return err; 375 path_put(&nd.path);
376 return PTR_ERR(d);
376 } 377 }
377 378 if (d->d_inode) {
378 err = path_lookup(path, 0, ndwatch); 379 /* update watch filter fields */
379 if (err) { 380 watch->dev = d->d_inode->i_sb->s_dev;
380 kfree(ndwatch); 381 watch->ino = d->d_inode->i_ino;
381 ndwatch = NULL;
382 } 382 }
383 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
383 384
384 *ndp = ndparent; 385 *parent = nd.path;
385 *ndw = ndwatch; 386 dput(d);
386
387 return 0; 387 return 0;
388} 388}
389 389
390/* Release resources used for watch path information. */
391static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
392{
393 if (ndp) {
394 path_put(&ndp->path);
395 kfree(ndp);
396 }
397 if (ndw) {
398 path_put(&ndw->path);
399 kfree(ndw);
400 }
401}
402
403/* Associate the given rule with an existing parent. 390/* Associate the given rule with an existing parent.
404 * Caller must hold audit_filter_mutex. */ 391 * Caller must hold audit_filter_mutex. */
405static void audit_add_to_parent(struct audit_krule *krule, 392static void audit_add_to_parent(struct audit_krule *krule,
@@ -440,31 +427,24 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
440{ 427{
441 struct audit_watch *watch = krule->watch; 428 struct audit_watch *watch = krule->watch;
442 struct audit_parent *parent; 429 struct audit_parent *parent;
443 struct nameidata *ndp = NULL, *ndw = NULL; 430 struct path parent_path;
444 int h, ret = 0; 431 int h, ret = 0;
445 432
446 mutex_unlock(&audit_filter_mutex); 433 mutex_unlock(&audit_filter_mutex);
447 434
448 /* Avoid calling path_lookup under audit_filter_mutex. */ 435 /* Avoid calling path_lookup under audit_filter_mutex. */
449 ret = audit_get_nd(watch->path, &ndp, &ndw); 436 ret = audit_get_nd(watch, &parent_path);
450 if (ret) {
451 /* caller expects mutex locked */
452 mutex_lock(&audit_filter_mutex);
453 goto error;
454 }
455 437
438 /* caller expects mutex locked */
456 mutex_lock(&audit_filter_mutex); 439 mutex_lock(&audit_filter_mutex);
457 440
458 /* update watch filter fields */ 441 if (ret)
459 if (ndw) { 442 return ret;
460 watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
461 watch->ino = ndw->path.dentry->d_inode->i_ino;
462 }
463 443
464 /* either find an old parent or attach a new one */ 444 /* either find an old parent or attach a new one */
465 parent = audit_find_parent(ndp->path.dentry->d_inode); 445 parent = audit_find_parent(parent_path.dentry->d_inode);
466 if (!parent) { 446 if (!parent) {
467 parent = audit_init_parent(ndp); 447 parent = audit_init_parent(&parent_path);
468 if (IS_ERR(parent)) { 448 if (IS_ERR(parent)) {
469 ret = PTR_ERR(parent); 449 ret = PTR_ERR(parent);
470 goto error; 450 goto error;
@@ -479,9 +459,8 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
479 h = audit_hash_ino((u32)watch->ino); 459 h = audit_hash_ino((u32)watch->ino);
480 *list = &audit_inode_hash[h]; 460 *list = &audit_inode_hash[h];
481error: 461error:
482 audit_put_nd(ndp, ndw); /* NULL args OK */ 462 path_put(&parent_path);
483 return ret; 463 return ret;
484
485} 464}
486 465
487void audit_remove_watch_rule(struct audit_krule *krule) 466void audit_remove_watch_rule(struct audit_krule *krule)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b24d7027b83c..95362d15128c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4230,20 +4230,8 @@ void cgroup_post_fork(struct task_struct *child)
4230 */ 4230 */
4231void cgroup_exit(struct task_struct *tsk, int run_callbacks) 4231void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4232{ 4232{
4233 int i;
4234 struct css_set *cg; 4233 struct css_set *cg;
4235 4234 int i;
4236 if (run_callbacks && need_forkexit_callback) {
4237 /*
4238 * modular subsystems can't use callbacks, so no need to lock
4239 * the subsys array
4240 */
4241 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4242 struct cgroup_subsys *ss = subsys[i];
4243 if (ss->exit)
4244 ss->exit(ss, tsk);
4245 }
4246 }
4247 4235
4248 /* 4236 /*
4249 * Unlink from the css_set task list if necessary. 4237 * Unlink from the css_set task list if necessary.
@@ -4261,7 +4249,24 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4261 task_lock(tsk); 4249 task_lock(tsk);
4262 cg = tsk->cgroups; 4250 cg = tsk->cgroups;
4263 tsk->cgroups = &init_css_set; 4251 tsk->cgroups = &init_css_set;
4252
4253 if (run_callbacks && need_forkexit_callback) {
4254 /*
4255 * modular subsystems can't use callbacks, so no need to lock
4256 * the subsys array
4257 */
4258 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4259 struct cgroup_subsys *ss = subsys[i];
4260 if (ss->exit) {
4261 struct cgroup *old_cgrp =
4262 rcu_dereference_raw(cg->subsys[i])->cgroup;
4263 struct cgroup *cgrp = task_cgroup(tsk, i);
4264 ss->exit(ss, cgrp, old_cgrp, tsk);
4265 }
4266 }
4267 }
4264 task_unlock(tsk); 4268 task_unlock(tsk);
4269
4265 if (cg) 4270 if (cg)
4266 put_css_set_taskexit(cg); 4271 put_css_set_taskexit(cg);
4267} 4272}
@@ -4813,6 +4818,29 @@ css_get_next(struct cgroup_subsys *ss, int id,
4813 return ret; 4818 return ret;
4814} 4819}
4815 4820
4821/*
4822 * get corresponding css from file open on cgroupfs directory
4823 */
4824struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
4825{
4826 struct cgroup *cgrp;
4827 struct inode *inode;
4828 struct cgroup_subsys_state *css;
4829
4830 inode = f->f_dentry->d_inode;
4831 /* check in cgroup filesystem dir */
4832 if (inode->i_op != &cgroup_dir_inode_operations)
4833 return ERR_PTR(-EBADF);
4834
4835 if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
4836 return ERR_PTR(-EINVAL);
4837
4838 /* get cgroup */
4839 cgrp = __d_cgrp(f->f_dentry);
4840 css = cgrp->subsys[id];
4841 return css ? css : ERR_PTR(-ENOENT);
4842}
4843
4816#ifdef CONFIG_CGROUP_DEBUG 4844#ifdef CONFIG_CGROUP_DEBUG
4817static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, 4845static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
4818 struct cgroup *cont) 4846 struct cgroup *cont)
diff --git a/kernel/compat.c b/kernel/compat.c
index c9e2ec0b34a8..38b1d2c1cbe8 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -52,6 +52,64 @@ static int compat_put_timeval(struct compat_timeval __user *o,
52 put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; 52 put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
53} 53}
54 54
55static int compat_get_timex(struct timex *txc, struct compat_timex __user *utp)
56{
57 memset(txc, 0, sizeof(struct timex));
58
59 if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
60 __get_user(txc->modes, &utp->modes) ||
61 __get_user(txc->offset, &utp->offset) ||
62 __get_user(txc->freq, &utp->freq) ||
63 __get_user(txc->maxerror, &utp->maxerror) ||
64 __get_user(txc->esterror, &utp->esterror) ||
65 __get_user(txc->status, &utp->status) ||
66 __get_user(txc->constant, &utp->constant) ||
67 __get_user(txc->precision, &utp->precision) ||
68 __get_user(txc->tolerance, &utp->tolerance) ||
69 __get_user(txc->time.tv_sec, &utp->time.tv_sec) ||
70 __get_user(txc->time.tv_usec, &utp->time.tv_usec) ||
71 __get_user(txc->tick, &utp->tick) ||
72 __get_user(txc->ppsfreq, &utp->ppsfreq) ||
73 __get_user(txc->jitter, &utp->jitter) ||
74 __get_user(txc->shift, &utp->shift) ||
75 __get_user(txc->stabil, &utp->stabil) ||
76 __get_user(txc->jitcnt, &utp->jitcnt) ||
77 __get_user(txc->calcnt, &utp->calcnt) ||
78 __get_user(txc->errcnt, &utp->errcnt) ||
79 __get_user(txc->stbcnt, &utp->stbcnt))
80 return -EFAULT;
81
82 return 0;
83}
84
85static int compat_put_timex(struct compat_timex __user *utp, struct timex *txc)
86{
87 if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
88 __put_user(txc->modes, &utp->modes) ||
89 __put_user(txc->offset, &utp->offset) ||
90 __put_user(txc->freq, &utp->freq) ||
91 __put_user(txc->maxerror, &utp->maxerror) ||
92 __put_user(txc->esterror, &utp->esterror) ||
93 __put_user(txc->status, &utp->status) ||
94 __put_user(txc->constant, &utp->constant) ||
95 __put_user(txc->precision, &utp->precision) ||
96 __put_user(txc->tolerance, &utp->tolerance) ||
97 __put_user(txc->time.tv_sec, &utp->time.tv_sec) ||
98 __put_user(txc->time.tv_usec, &utp->time.tv_usec) ||
99 __put_user(txc->tick, &utp->tick) ||
100 __put_user(txc->ppsfreq, &utp->ppsfreq) ||
101 __put_user(txc->jitter, &utp->jitter) ||
102 __put_user(txc->shift, &utp->shift) ||
103 __put_user(txc->stabil, &utp->stabil) ||
104 __put_user(txc->jitcnt, &utp->jitcnt) ||
105 __put_user(txc->calcnt, &utp->calcnt) ||
106 __put_user(txc->errcnt, &utp->errcnt) ||
107 __put_user(txc->stbcnt, &utp->stbcnt) ||
108 __put_user(txc->tai, &utp->tai))
109 return -EFAULT;
110 return 0;
111}
112
55asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, 113asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
56 struct timezone __user *tz) 114 struct timezone __user *tz)
57{ 115{
@@ -617,6 +675,29 @@ long compat_sys_clock_gettime(clockid_t which_clock,
617 return err; 675 return err;
618} 676}
619 677
678long compat_sys_clock_adjtime(clockid_t which_clock,
679 struct compat_timex __user *utp)
680{
681 struct timex txc;
682 mm_segment_t oldfs;
683 int err, ret;
684
685 err = compat_get_timex(&txc, utp);
686 if (err)
687 return err;
688
689 oldfs = get_fs();
690 set_fs(KERNEL_DS);
691 ret = sys_clock_adjtime(which_clock, (struct timex __user *) &txc);
692 set_fs(oldfs);
693
694 err = compat_put_timex(utp, &txc);
695 if (err)
696 return err;
697
698 return ret;
699}
700
620long compat_sys_clock_getres(clockid_t which_clock, 701long compat_sys_clock_getres(clockid_t which_clock,
621 struct compat_timespec __user *tp) 702 struct compat_timespec __user *tp)
622{ 703{
@@ -951,58 +1032,17 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat
951asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) 1032asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
952{ 1033{
953 struct timex txc; 1034 struct timex txc;
954 int ret; 1035 int err, ret;
955
956 memset(&txc, 0, sizeof(struct timex));
957 1036
958 if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) || 1037 err = compat_get_timex(&txc, utp);
959 __get_user(txc.modes, &utp->modes) || 1038 if (err)
960 __get_user(txc.offset, &utp->offset) || 1039 return err;
961 __get_user(txc.freq, &utp->freq) ||
962 __get_user(txc.maxerror, &utp->maxerror) ||
963 __get_user(txc.esterror, &utp->esterror) ||
964 __get_user(txc.status, &utp->status) ||
965 __get_user(txc.constant, &utp->constant) ||
966 __get_user(txc.precision, &utp->precision) ||
967 __get_user(txc.tolerance, &utp->tolerance) ||
968 __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
969 __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
970 __get_user(txc.tick, &utp->tick) ||
971 __get_user(txc.ppsfreq, &utp->ppsfreq) ||
972 __get_user(txc.jitter, &utp->jitter) ||
973 __get_user(txc.shift, &utp->shift) ||
974 __get_user(txc.stabil, &utp->stabil) ||
975 __get_user(txc.jitcnt, &utp->jitcnt) ||
976 __get_user(txc.calcnt, &utp->calcnt) ||
977 __get_user(txc.errcnt, &utp->errcnt) ||
978 __get_user(txc.stbcnt, &utp->stbcnt))
979 return -EFAULT;
980 1040
981 ret = do_adjtimex(&txc); 1041 ret = do_adjtimex(&txc);
982 1042
983 if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) || 1043 err = compat_put_timex(utp, &txc);
984 __put_user(txc.modes, &utp->modes) || 1044 if (err)
985 __put_user(txc.offset, &utp->offset) || 1045 return err;
986 __put_user(txc.freq, &utp->freq) ||
987 __put_user(txc.maxerror, &utp->maxerror) ||
988 __put_user(txc.esterror, &utp->esterror) ||
989 __put_user(txc.status, &utp->status) ||
990 __put_user(txc.constant, &utp->constant) ||
991 __put_user(txc.precision, &utp->precision) ||
992 __put_user(txc.tolerance, &utp->tolerance) ||
993 __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
994 __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
995 __put_user(txc.tick, &utp->tick) ||
996 __put_user(txc.ppsfreq, &utp->ppsfreq) ||
997 __put_user(txc.jitter, &utp->jitter) ||
998 __put_user(txc.shift, &utp->shift) ||
999 __put_user(txc.stabil, &utp->stabil) ||
1000 __put_user(txc.jitcnt, &utp->jitcnt) ||
1001 __put_user(txc.calcnt, &utp->calcnt) ||
1002 __put_user(txc.errcnt, &utp->errcnt) ||
1003 __put_user(txc.stbcnt, &utp->stbcnt) ||
1004 __put_user(txc.tai, &utp->tai))
1005 ret = -EFAULT;
1006 1046
1007 return ret; 1047 return ret;
1008} 1048}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4349935c2ad8..e92e98189032 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1575,8 +1575,10 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1575 return -ENODEV; 1575 return -ENODEV;
1576 1576
1577 trialcs = alloc_trial_cpuset(cs); 1577 trialcs = alloc_trial_cpuset(cs);
1578 if (!trialcs) 1578 if (!trialcs) {
1579 return -ENOMEM; 1579 retval = -ENOMEM;
1580 goto out;
1581 }
1580 1582
1581 switch (cft->private) { 1583 switch (cft->private) {
1582 case FILE_CPULIST: 1584 case FILE_CPULIST:
@@ -1591,6 +1593,7 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1591 } 1593 }
1592 1594
1593 free_trial_cpuset(trialcs); 1595 free_trial_cpuset(trialcs);
1596out:
1594 cgroup_unlock(); 1597 cgroup_unlock();
1595 return retval; 1598 return retval;
1596} 1599}
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a9d6dd53a6c..2343c132c5a7 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -35,7 +35,7 @@ static struct kmem_cache *cred_jar;
35static struct thread_group_cred init_tgcred = { 35static struct thread_group_cred init_tgcred = {
36 .usage = ATOMIC_INIT(2), 36 .usage = ATOMIC_INIT(2),
37 .tgid = 0, 37 .tgid = 0,
38 .lock = SPIN_LOCK_UNLOCKED, 38 .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
39}; 39};
40#endif 40#endif
41 41
diff --git a/kernel/futex.c b/kernel/futex.c
index b766d28accd6..bda415715382 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -381,15 +381,16 @@ static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
381 return NULL; 381 return NULL;
382} 382}
383 383
384static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) 384static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
385 u32 uval, u32 newval)
385{ 386{
386 u32 curval; 387 int ret;
387 388
388 pagefault_disable(); 389 pagefault_disable();
389 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 390 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
390 pagefault_enable(); 391 pagefault_enable();
391 392
392 return curval; 393 return ret;
393} 394}
394 395
395static int get_futex_value_locked(u32 *dest, u32 __user *from) 396static int get_futex_value_locked(u32 *dest, u32 __user *from)
@@ -674,7 +675,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
674 struct task_struct *task, int set_waiters) 675 struct task_struct *task, int set_waiters)
675{ 676{
676 int lock_taken, ret, ownerdied = 0; 677 int lock_taken, ret, ownerdied = 0;
677 u32 uval, newval, curval; 678 u32 uval, newval, curval, vpid = task_pid_vnr(task);
678 679
679retry: 680retry:
680 ret = lock_taken = 0; 681 ret = lock_taken = 0;
@@ -684,19 +685,17 @@ retry:
684 * (by doing a 0 -> TID atomic cmpxchg), while holding all 685 * (by doing a 0 -> TID atomic cmpxchg), while holding all
685 * the locks. It will most likely not succeed. 686 * the locks. It will most likely not succeed.
686 */ 687 */
687 newval = task_pid_vnr(task); 688 newval = vpid;
688 if (set_waiters) 689 if (set_waiters)
689 newval |= FUTEX_WAITERS; 690 newval |= FUTEX_WAITERS;
690 691
691 curval = cmpxchg_futex_value_locked(uaddr, 0, newval); 692 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
692
693 if (unlikely(curval == -EFAULT))
694 return -EFAULT; 693 return -EFAULT;
695 694
696 /* 695 /*
697 * Detect deadlocks. 696 * Detect deadlocks.
698 */ 697 */
699 if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) 698 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
700 return -EDEADLK; 699 return -EDEADLK;
701 700
702 /* 701 /*
@@ -723,14 +722,12 @@ retry:
723 */ 722 */
724 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { 723 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
725 /* Keep the OWNER_DIED bit */ 724 /* Keep the OWNER_DIED bit */
726 newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); 725 newval = (curval & ~FUTEX_TID_MASK) | vpid;
727 ownerdied = 0; 726 ownerdied = 0;
728 lock_taken = 1; 727 lock_taken = 1;
729 } 728 }
730 729
731 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 730 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
732
733 if (unlikely(curval == -EFAULT))
734 return -EFAULT; 731 return -EFAULT;
735 if (unlikely(curval != uval)) 732 if (unlikely(curval != uval))
736 goto retry; 733 goto retry;
@@ -775,6 +772,24 @@ retry:
775 return ret; 772 return ret;
776} 773}
777 774
775/**
776 * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
777 * @q: The futex_q to unqueue
778 *
779 * The q->lock_ptr must not be NULL and must be held by the caller.
780 */
781static void __unqueue_futex(struct futex_q *q)
782{
783 struct futex_hash_bucket *hb;
784
785 if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr)
786 || plist_node_empty(&q->list)))
787 return;
788
789 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
790 plist_del(&q->list, &hb->chain);
791}
792
778/* 793/*
779 * The hash bucket lock must be held when this is called. 794 * The hash bucket lock must be held when this is called.
780 * Afterwards, the futex_q must not be accessed. 795 * Afterwards, the futex_q must not be accessed.
@@ -792,7 +807,7 @@ static void wake_futex(struct futex_q *q)
792 */ 807 */
793 get_task_struct(p); 808 get_task_struct(p);
794 809
795 plist_del(&q->list, &q->list.plist); 810 __unqueue_futex(q);
796 /* 811 /*
797 * The waiting task can free the futex_q as soon as 812 * The waiting task can free the futex_q as soon as
798 * q->lock_ptr = NULL is written, without taking any locks. A 813 * q->lock_ptr = NULL is written, without taking any locks. A
@@ -843,9 +858,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
843 858
844 newval = FUTEX_WAITERS | task_pid_vnr(new_owner); 859 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
845 860
846 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 861 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
847
848 if (curval == -EFAULT)
849 ret = -EFAULT; 862 ret = -EFAULT;
850 else if (curval != uval) 863 else if (curval != uval)
851 ret = -EINVAL; 864 ret = -EINVAL;
@@ -880,10 +893,8 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
880 * There is no waiter, so we unlock the futex. The owner died 893 * There is no waiter, so we unlock the futex. The owner died
881 * bit has not to be preserved here. We are the owner: 894 * bit has not to be preserved here. We are the owner:
882 */ 895 */
883 oldval = cmpxchg_futex_value_locked(uaddr, uval, 0); 896 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
884 897 return -EFAULT;
885 if (oldval == -EFAULT)
886 return oldval;
887 if (oldval != uval) 898 if (oldval != uval)
888 return -EAGAIN; 899 return -EAGAIN;
889 900
@@ -1071,9 +1082,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1071 plist_del(&q->list, &hb1->chain); 1082 plist_del(&q->list, &hb1->chain);
1072 plist_add(&q->list, &hb2->chain); 1083 plist_add(&q->list, &hb2->chain);
1073 q->lock_ptr = &hb2->lock; 1084 q->lock_ptr = &hb2->lock;
1074#ifdef CONFIG_DEBUG_PI_LIST
1075 q->list.plist.spinlock = &hb2->lock;
1076#endif
1077 } 1085 }
1078 get_futex_key_refs(key2); 1086 get_futex_key_refs(key2);
1079 q->key = *key2; 1087 q->key = *key2;
@@ -1100,16 +1108,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1100 get_futex_key_refs(key); 1108 get_futex_key_refs(key);
1101 q->key = *key; 1109 q->key = *key;
1102 1110
1103 WARN_ON(plist_node_empty(&q->list)); 1111 __unqueue_futex(q);
1104 plist_del(&q->list, &q->list.plist);
1105 1112
1106 WARN_ON(!q->rt_waiter); 1113 WARN_ON(!q->rt_waiter);
1107 q->rt_waiter = NULL; 1114 q->rt_waiter = NULL;
1108 1115
1109 q->lock_ptr = &hb->lock; 1116 q->lock_ptr = &hb->lock;
1110#ifdef CONFIG_DEBUG_PI_LIST
1111 q->list.plist.spinlock = &hb->lock;
1112#endif
1113 1117
1114 wake_up_state(q->task, TASK_NORMAL); 1118 wake_up_state(q->task, TASK_NORMAL);
1115} 1119}
@@ -1457,9 +1461,6 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1457 prio = min(current->normal_prio, MAX_RT_PRIO); 1461 prio = min(current->normal_prio, MAX_RT_PRIO);
1458 1462
1459 plist_node_init(&q->list, prio); 1463 plist_node_init(&q->list, prio);
1460#ifdef CONFIG_DEBUG_PI_LIST
1461 q->list.plist.spinlock = &hb->lock;
1462#endif
1463 plist_add(&q->list, &hb->chain); 1464 plist_add(&q->list, &hb->chain);
1464 q->task = current; 1465 q->task = current;
1465 spin_unlock(&hb->lock); 1466 spin_unlock(&hb->lock);
@@ -1504,8 +1505,7 @@ retry:
1504 spin_unlock(lock_ptr); 1505 spin_unlock(lock_ptr);
1505 goto retry; 1506 goto retry;
1506 } 1507 }
1507 WARN_ON(plist_node_empty(&q->list)); 1508 __unqueue_futex(q);
1508 plist_del(&q->list, &q->list.plist);
1509 1509
1510 BUG_ON(q->pi_state); 1510 BUG_ON(q->pi_state);
1511 1511
@@ -1525,8 +1525,7 @@ retry:
1525static void unqueue_me_pi(struct futex_q *q) 1525static void unqueue_me_pi(struct futex_q *q)
1526 __releases(q->lock_ptr) 1526 __releases(q->lock_ptr)
1527{ 1527{
1528 WARN_ON(plist_node_empty(&q->list)); 1528 __unqueue_futex(q);
1529 plist_del(&q->list, &q->list.plist);
1530 1529
1531 BUG_ON(!q->pi_state); 1530 BUG_ON(!q->pi_state);
1532 free_pi_state(q->pi_state); 1531 free_pi_state(q->pi_state);
@@ -1556,10 +1555,10 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1556 1555
1557 /* 1556 /*
1558 * We are here either because we stole the rtmutex from the 1557 * We are here either because we stole the rtmutex from the
1559 * pending owner or we are the pending owner which failed to 1558 * previous highest priority waiter or we are the highest priority
1560 * get the rtmutex. We have to replace the pending owner TID 1559 * waiter but failed to get the rtmutex the first time.
1561 * in the user space variable. This must be atomic as we have 1560 * We have to replace the newowner TID in the user space variable.
1562 * to preserve the owner died bit here. 1561 * This must be atomic as we have to preserve the owner died bit here.
1563 * 1562 *
1564 * Note: We write the user space value _before_ changing the pi_state 1563 * Note: We write the user space value _before_ changing the pi_state
1565 * because we can fault here. Imagine swapped out pages or a fork 1564 * because we can fault here. Imagine swapped out pages or a fork
@@ -1578,9 +1577,7 @@ retry:
1578 while (1) { 1577 while (1) {
1579 newval = (uval & FUTEX_OWNER_DIED) | newtid; 1578 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1580 1579
1581 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 1580 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1582
1583 if (curval == -EFAULT)
1584 goto handle_fault; 1581 goto handle_fault;
1585 if (curval == uval) 1582 if (curval == uval)
1586 break; 1583 break;
@@ -1608,8 +1605,8 @@ retry:
1608 1605
1609 /* 1606 /*
1610 * To handle the page fault we need to drop the hash bucket 1607 * To handle the page fault we need to drop the hash bucket
1611 * lock here. That gives the other task (either the pending 1608 * lock here. That gives the other task (either the highest priority
1612 * owner itself or the task which stole the rtmutex) the 1609 * waiter itself or the task which stole the rtmutex) the
1613 * chance to try the fixup of the pi_state. So once we are 1610 * chance to try the fixup of the pi_state. So once we are
1614 * back from handling the fault we need to check the pi_state 1611 * back from handling the fault we need to check the pi_state
1615 * after reacquiring the hash bucket lock and before trying to 1612 * after reacquiring the hash bucket lock and before trying to
@@ -1685,18 +1682,20 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1685 /* 1682 /*
1686 * pi_state is incorrect, some other task did a lock steal and 1683 * pi_state is incorrect, some other task did a lock steal and
1687 * we returned due to timeout or signal without taking the 1684 * we returned due to timeout or signal without taking the
1688 * rt_mutex. Too late. We can access the rt_mutex_owner without 1685 * rt_mutex. Too late.
1689 * locking, as the other task is now blocked on the hash bucket
1690 * lock. Fix the state up.
1691 */ 1686 */
1687 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1692 owner = rt_mutex_owner(&q->pi_state->pi_mutex); 1688 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1689 if (!owner)
1690 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1691 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1693 ret = fixup_pi_state_owner(uaddr, q, owner); 1692 ret = fixup_pi_state_owner(uaddr, q, owner);
1694 goto out; 1693 goto out;
1695 } 1694 }
1696 1695
1697 /* 1696 /*
1698 * Paranoia check. If we did not take the lock, then we should not be 1697 * Paranoia check. If we did not take the lock, then we should not be
1699 * the owner, nor the pending owner, of the rt_mutex. 1698 * the owner of the rt_mutex.
1700 */ 1699 */
1701 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) 1700 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1702 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " 1701 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
@@ -1781,13 +1780,14 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1781 * 1780 *
1782 * The basic logical guarantee of a futex is that it blocks ONLY 1781 * The basic logical guarantee of a futex is that it blocks ONLY
1783 * if cond(var) is known to be true at the time of blocking, for 1782 * if cond(var) is known to be true at the time of blocking, for
1784 * any cond. If we queued after testing *uaddr, that would open 1783 * any cond. If we locked the hash-bucket after testing *uaddr, that
1785 * a race condition where we could block indefinitely with 1784 * would open a race condition where we could block indefinitely with
1786 * cond(var) false, which would violate the guarantee. 1785 * cond(var) false, which would violate the guarantee.
1787 * 1786 *
1788 * A consequence is that futex_wait() can return zero and absorb 1787 * On the other hand, we insert q and release the hash-bucket only
1789 * a wakeup when *uaddr != val on entry to the syscall. This is 1788 * after testing *uaddr. This guarantees that futex_wait() will NOT
1790 * rare, but normal. 1789 * absorb a wakeup if *uaddr does not match the desired values
1790 * while the syscall executes.
1791 */ 1791 */
1792retry: 1792retry:
1793 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); 1793 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
@@ -2046,9 +2046,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2046{ 2046{
2047 struct futex_hash_bucket *hb; 2047 struct futex_hash_bucket *hb;
2048 struct futex_q *this, *next; 2048 struct futex_q *this, *next;
2049 u32 uval;
2050 struct plist_head *head; 2049 struct plist_head *head;
2051 union futex_key key = FUTEX_KEY_INIT; 2050 union futex_key key = FUTEX_KEY_INIT;
2051 u32 uval, vpid = task_pid_vnr(current);
2052 int ret; 2052 int ret;
2053 2053
2054retry: 2054retry:
@@ -2057,7 +2057,7 @@ retry:
2057 /* 2057 /*
2058 * We release only a lock we actually own: 2058 * We release only a lock we actually own:
2059 */ 2059 */
2060 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 2060 if ((uval & FUTEX_TID_MASK) != vpid)
2061 return -EPERM; 2061 return -EPERM;
2062 2062
2063 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); 2063 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
@@ -2072,17 +2072,14 @@ retry:
2072 * again. If it succeeds then we can return without waking 2072 * again. If it succeeds then we can return without waking
2073 * anyone else up: 2073 * anyone else up:
2074 */ 2074 */
2075 if (!(uval & FUTEX_OWNER_DIED)) 2075 if (!(uval & FUTEX_OWNER_DIED) &&
2076 uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0); 2076 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2077
2078
2079 if (unlikely(uval == -EFAULT))
2080 goto pi_faulted; 2077 goto pi_faulted;
2081 /* 2078 /*
2082 * Rare case: we managed to release the lock atomically, 2079 * Rare case: we managed to release the lock atomically,
2083 * no need to wake anyone else up: 2080 * no need to wake anyone else up:
2084 */ 2081 */
2085 if (unlikely(uval == task_pid_vnr(current))) 2082 if (unlikely(uval == vpid))
2086 goto out_unlock; 2083 goto out_unlock;
2087 2084
2088 /* 2085 /*
@@ -2167,7 +2164,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2167 * We were woken prior to requeue by a timeout or a signal. 2164 * We were woken prior to requeue by a timeout or a signal.
2168 * Unqueue the futex_q and determine which it was. 2165 * Unqueue the futex_q and determine which it was.
2169 */ 2166 */
2170 plist_del(&q->list, &q->list.plist); 2167 plist_del(&q->list, &hb->chain);
2171 2168
2172 /* Handle spurious wakeups gracefully */ 2169 /* Handle spurious wakeups gracefully */
2173 ret = -EWOULDBLOCK; 2170 ret = -EWOULDBLOCK;
@@ -2463,11 +2460,20 @@ retry:
2463 * userspace. 2460 * userspace.
2464 */ 2461 */
2465 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 2462 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2466 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); 2463 /*
2467 2464 * We are not holding a lock here, but we want to have
2468 if (nval == -EFAULT) 2465 * the pagefault_disable/enable() protection because
2469 return -1; 2466 * we want to handle the fault gracefully. If the
2470 2467 * access fails we try to fault in the futex with R/W
2468 * verification via get_user_pages. get_user() above
2469 * does not guarantee R/W access. If that fails we
2470 * give up and leave the futex locked.
2471 */
2472 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2473 if (fault_in_user_writeable(uaddr))
2474 return -1;
2475 goto retry;
2476 }
2471 if (nval != uval) 2477 if (nval != uval)
2472 goto retry; 2478 goto retry;
2473 2479
@@ -2678,8 +2684,7 @@ static int __init futex_init(void)
2678 * implementation, the non-functional ones will return 2684 * implementation, the non-functional ones will return
2679 * -ENOSYS. 2685 * -ENOSYS.
2680 */ 2686 */
2681 curval = cmpxchg_futex_value_locked(NULL, 0, 0); 2687 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2682 if (curval == -EFAULT)
2683 futex_cmpxchg_enabled = 1; 2688 futex_cmpxchg_enabled = 1;
2684 2689
2685 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { 2690 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0c8d7c048615..9017478c5d4c 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -53,11 +53,10 @@
53/* 53/*
54 * The timer bases: 54 * The timer bases:
55 * 55 *
56 * Note: If we want to add new timer bases, we have to skip the two 56 * There are more clockids then hrtimer bases. Thus, we index
57 * clock ids captured by the cpu-timers. We do this by holding empty 57 * into the timer bases by the hrtimer_base_type enum. When trying
58 * entries rather than doing math adjustment of the clock ids. 58 * to reach a base using a clockid, hrtimer_clockid_to_base()
59 * This ensures that we capture erroneous accesses to these clock ids 59 * is used to convert from clockid to the proper hrtimer_base_type.
60 * rather than moving them into the range of valid clock id's.
61 */ 60 */
62DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 61DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
63{ 62{
@@ -74,30 +73,39 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
74 .get_time = &ktime_get, 73 .get_time = &ktime_get,
75 .resolution = KTIME_LOW_RES, 74 .resolution = KTIME_LOW_RES,
76 }, 75 },
76 {
77 .index = CLOCK_BOOTTIME,
78 .get_time = &ktime_get_boottime,
79 .resolution = KTIME_LOW_RES,
80 },
77 } 81 }
78}; 82};
79 83
84static int hrtimer_clock_to_base_table[MAX_CLOCKS];
85
86static inline int hrtimer_clockid_to_base(clockid_t clock_id)
87{
88 return hrtimer_clock_to_base_table[clock_id];
89}
90
91
80/* 92/*
81 * Get the coarse grained time at the softirq based on xtime and 93 * Get the coarse grained time at the softirq based on xtime and
82 * wall_to_monotonic. 94 * wall_to_monotonic.
83 */ 95 */
84static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) 96static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
85{ 97{
86 ktime_t xtim, tomono; 98 ktime_t xtim, mono, boot;
87 struct timespec xts, tom; 99 struct timespec xts, tom, slp;
88 unsigned long seq;
89 100
90 do { 101 get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
91 seq = read_seqbegin(&xtime_lock);
92 xts = __current_kernel_time();
93 tom = __get_wall_to_monotonic();
94 } while (read_seqretry(&xtime_lock, seq));
95 102
96 xtim = timespec_to_ktime(xts); 103 xtim = timespec_to_ktime(xts);
97 tomono = timespec_to_ktime(tom); 104 mono = ktime_add(xtim, timespec_to_ktime(tom));
98 base->clock_base[CLOCK_REALTIME].softirq_time = xtim; 105 boot = ktime_add(mono, timespec_to_ktime(slp));
99 base->clock_base[CLOCK_MONOTONIC].softirq_time = 106 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
100 ktime_add(xtim, tomono); 107 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
108 base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
101} 109}
102 110
103/* 111/*
@@ -184,10 +192,11 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
184 struct hrtimer_cpu_base *new_cpu_base; 192 struct hrtimer_cpu_base *new_cpu_base;
185 int this_cpu = smp_processor_id(); 193 int this_cpu = smp_processor_id();
186 int cpu = hrtimer_get_target(this_cpu, pinned); 194 int cpu = hrtimer_get_target(this_cpu, pinned);
195 int basenum = hrtimer_clockid_to_base(base->index);
187 196
188again: 197again:
189 new_cpu_base = &per_cpu(hrtimer_bases, cpu); 198 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
190 new_base = &new_cpu_base->clock_base[base->index]; 199 new_base = &new_cpu_base->clock_base[basenum];
191 200
192 if (base != new_base) { 201 if (base != new_base) {
193 /* 202 /*
@@ -334,6 +343,11 @@ EXPORT_SYMBOL_GPL(ktime_add_safe);
334 343
335static struct debug_obj_descr hrtimer_debug_descr; 344static struct debug_obj_descr hrtimer_debug_descr;
336 345
346static void *hrtimer_debug_hint(void *addr)
347{
348 return ((struct hrtimer *) addr)->function;
349}
350
337/* 351/*
338 * fixup_init is called when: 352 * fixup_init is called when:
339 * - an active object is initialized 353 * - an active object is initialized
@@ -393,6 +407,7 @@ static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
393 407
394static struct debug_obj_descr hrtimer_debug_descr = { 408static struct debug_obj_descr hrtimer_debug_descr = {
395 .name = "hrtimer", 409 .name = "hrtimer",
410 .debug_hint = hrtimer_debug_hint,
396 .fixup_init = hrtimer_fixup_init, 411 .fixup_init = hrtimer_fixup_init,
397 .fixup_activate = hrtimer_fixup_activate, 412 .fixup_activate = hrtimer_fixup_activate,
398 .fixup_free = hrtimer_fixup_free, 413 .fixup_free = hrtimer_fixup_free,
@@ -611,24 +626,23 @@ static int hrtimer_reprogram(struct hrtimer *timer,
611static void retrigger_next_event(void *arg) 626static void retrigger_next_event(void *arg)
612{ 627{
613 struct hrtimer_cpu_base *base; 628 struct hrtimer_cpu_base *base;
614 struct timespec realtime_offset, wtm; 629 struct timespec realtime_offset, wtm, sleep;
615 unsigned long seq;
616 630
617 if (!hrtimer_hres_active()) 631 if (!hrtimer_hres_active())
618 return; 632 return;
619 633
620 do { 634 get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm,
621 seq = read_seqbegin(&xtime_lock); 635 &sleep);
622 wtm = __get_wall_to_monotonic();
623 } while (read_seqretry(&xtime_lock, seq));
624 set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); 636 set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
625 637
626 base = &__get_cpu_var(hrtimer_bases); 638 base = &__get_cpu_var(hrtimer_bases);
627 639
628 /* Adjust CLOCK_REALTIME offset */ 640 /* Adjust CLOCK_REALTIME offset */
629 raw_spin_lock(&base->lock); 641 raw_spin_lock(&base->lock);
630 base->clock_base[CLOCK_REALTIME].offset = 642 base->clock_base[HRTIMER_BASE_REALTIME].offset =
631 timespec_to_ktime(realtime_offset); 643 timespec_to_ktime(realtime_offset);
644 base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
645 timespec_to_ktime(sleep);
632 646
633 hrtimer_force_reprogram(base, 0); 647 hrtimer_force_reprogram(base, 0);
634 raw_spin_unlock(&base->lock); 648 raw_spin_unlock(&base->lock);
@@ -673,14 +687,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
673} 687}
674 688
675/* 689/*
676 * Initialize the high resolution related parts of a hrtimer
677 */
678static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
679{
680}
681
682
683/*
684 * When High resolution timers are active, try to reprogram. Note, that in case 690 * When High resolution timers are active, try to reprogram. Note, that in case
685 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry 691 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
686 * check happens. The timer gets enqueued into the rbtree. The reprogramming 692 * check happens. The timer gets enqueued into the rbtree. The reprogramming
@@ -725,8 +731,9 @@ static int hrtimer_switch_to_hres(void)
725 return 0; 731 return 0;
726 } 732 }
727 base->hres_active = 1; 733 base->hres_active = 1;
728 base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES; 734 base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES;
729 base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES; 735 base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES;
736 base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES;
730 737
731 tick_setup_sched_timer(); 738 tick_setup_sched_timer();
732 739
@@ -750,7 +757,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
750 return 0; 757 return 0;
751} 758}
752static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } 759static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
753static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
754 760
755#endif /* CONFIG_HIGH_RES_TIMERS */ 761#endif /* CONFIG_HIGH_RES_TIMERS */
756 762
@@ -1121,6 +1127,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1121 enum hrtimer_mode mode) 1127 enum hrtimer_mode mode)
1122{ 1128{
1123 struct hrtimer_cpu_base *cpu_base; 1129 struct hrtimer_cpu_base *cpu_base;
1130 int base;
1124 1131
1125 memset(timer, 0, sizeof(struct hrtimer)); 1132 memset(timer, 0, sizeof(struct hrtimer));
1126 1133
@@ -1129,8 +1136,8 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1129 if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) 1136 if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
1130 clock_id = CLOCK_MONOTONIC; 1137 clock_id = CLOCK_MONOTONIC;
1131 1138
1132 timer->base = &cpu_base->clock_base[clock_id]; 1139 base = hrtimer_clockid_to_base(clock_id);
1133 hrtimer_init_timer_hres(timer); 1140 timer->base = &cpu_base->clock_base[base];
1134 timerqueue_init(&timer->node); 1141 timerqueue_init(&timer->node);
1135 1142
1136#ifdef CONFIG_TIMER_STATS 1143#ifdef CONFIG_TIMER_STATS
@@ -1165,9 +1172,10 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
1165int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) 1172int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
1166{ 1173{
1167 struct hrtimer_cpu_base *cpu_base; 1174 struct hrtimer_cpu_base *cpu_base;
1175 int base = hrtimer_clockid_to_base(which_clock);
1168 1176
1169 cpu_base = &__raw_get_cpu_var(hrtimer_bases); 1177 cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1170 *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution); 1178 *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
1171 1179
1172 return 0; 1180 return 0;
1173} 1181}
@@ -1714,6 +1722,10 @@ static struct notifier_block __cpuinitdata hrtimers_nb = {
1714 1722
1715void __init hrtimers_init(void) 1723void __init hrtimers_init(void)
1716{ 1724{
1725 hrtimer_clock_to_base_table[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME;
1726 hrtimer_clock_to_base_table[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC;
1727 hrtimer_clock_to_base_table[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME;
1728
1717 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, 1729 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
1718 (void *)(long)smp_processor_id()); 1730 (void *)(long)smp_processor_id());
1719 register_cpu_notifier(&hrtimers_nb); 1731 register_cpu_notifier(&hrtimers_nb);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 8e42fec7686d..09bef82d74cb 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -1,5 +1,6 @@
1# Select this to activate the generic irq options below
1config HAVE_GENERIC_HARDIRQS 2config HAVE_GENERIC_HARDIRQS
2 def_bool n 3 bool
3 4
4if HAVE_GENERIC_HARDIRQS 5if HAVE_GENERIC_HARDIRQS
5menu "IRQ subsystem" 6menu "IRQ subsystem"
@@ -11,26 +12,44 @@ config GENERIC_HARDIRQS
11 12
12# Select this to disable the deprecated stuff 13# Select this to disable the deprecated stuff
13config GENERIC_HARDIRQS_NO_DEPRECATED 14config GENERIC_HARDIRQS_NO_DEPRECATED
14 def_bool n 15 bool
16
17config GENERIC_HARDIRQS_NO_COMPAT
18 bool
15 19
16# Options selectable by the architecture code 20# Options selectable by the architecture code
21
22# Make sparse irq Kconfig switch below available
17config HAVE_SPARSE_IRQ 23config HAVE_SPARSE_IRQ
18 def_bool n 24 bool
19 25
26# Enable the generic irq autoprobe mechanism
20config GENERIC_IRQ_PROBE 27config GENERIC_IRQ_PROBE
21 def_bool n 28 bool
29
30# Use the generic /proc/interrupts implementation
31config GENERIC_IRQ_SHOW
32 bool
22 33
34# Support for delayed migration from interrupt context
23config GENERIC_PENDING_IRQ 35config GENERIC_PENDING_IRQ
24 def_bool n 36 bool
25 37
38# Alpha specific irq affinity mechanism
26config AUTO_IRQ_AFFINITY 39config AUTO_IRQ_AFFINITY
27 def_bool n 40 bool
28
29config IRQ_PER_CPU
30 def_bool n
31 41
42# Tasklet based software resend for pending interrupts on enable_irq()
32config HARDIRQS_SW_RESEND 43config HARDIRQS_SW_RESEND
33 def_bool n 44 bool
45
46# Preflow handler support for fasteoi (sparc64)
47config IRQ_PREFLOW_FASTEOI
48 bool
49
50# Support forced irq threading
51config IRQ_FORCED_THREADING
52 bool
34 53
35config SPARSE_IRQ 54config SPARSE_IRQ
36 bool "Support sparse irq numbering" 55 bool "Support sparse irq numbering"
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 505798f86c36..394784c57060 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -17,7 +17,7 @@
17/* 17/*
18 * Autodetection depends on the fact that any interrupt that 18 * Autodetection depends on the fact that any interrupt that
19 * comes in on to an unassigned handler will get stuck with 19 * comes in on to an unassigned handler will get stuck with
20 * "IRQ_WAITING" cleared and the interrupt disabled. 20 * "IRQS_WAITING" cleared and the interrupt disabled.
21 */ 21 */
22static DEFINE_MUTEX(probing_active); 22static DEFINE_MUTEX(probing_active);
23 23
@@ -32,7 +32,6 @@ unsigned long probe_irq_on(void)
32{ 32{
33 struct irq_desc *desc; 33 struct irq_desc *desc;
34 unsigned long mask = 0; 34 unsigned long mask = 0;
35 unsigned int status;
36 int i; 35 int i;
37 36
38 /* 37 /*
@@ -46,13 +45,7 @@ unsigned long probe_irq_on(void)
46 */ 45 */
47 for_each_irq_desc_reverse(i, desc) { 46 for_each_irq_desc_reverse(i, desc) {
48 raw_spin_lock_irq(&desc->lock); 47 raw_spin_lock_irq(&desc->lock);
49 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 48 if (!desc->action && irq_settings_can_probe(desc)) {
50 /*
51 * An old-style architecture might still have
52 * the handle_bad_irq handler there:
53 */
54 compat_irq_chip_set_default_handler(desc);
55
56 /* 49 /*
57 * Some chips need to know about probing in 50 * Some chips need to know about probing in
58 * progress: 51 * progress:
@@ -60,7 +53,7 @@ unsigned long probe_irq_on(void)
60 if (desc->irq_data.chip->irq_set_type) 53 if (desc->irq_data.chip->irq_set_type)
61 desc->irq_data.chip->irq_set_type(&desc->irq_data, 54 desc->irq_data.chip->irq_set_type(&desc->irq_data,
62 IRQ_TYPE_PROBE); 55 IRQ_TYPE_PROBE);
63 desc->irq_data.chip->irq_startup(&desc->irq_data); 56 irq_startup(desc);
64 } 57 }
65 raw_spin_unlock_irq(&desc->lock); 58 raw_spin_unlock_irq(&desc->lock);
66 } 59 }
@@ -75,10 +68,12 @@ unsigned long probe_irq_on(void)
75 */ 68 */
76 for_each_irq_desc_reverse(i, desc) { 69 for_each_irq_desc_reverse(i, desc) {
77 raw_spin_lock_irq(&desc->lock); 70 raw_spin_lock_irq(&desc->lock);
78 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 71 if (!desc->action && irq_settings_can_probe(desc)) {
79 desc->status |= IRQ_AUTODETECT | IRQ_WAITING; 72 desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
80 if (desc->irq_data.chip->irq_startup(&desc->irq_data)) 73 if (irq_startup(desc)) {
81 desc->status |= IRQ_PENDING; 74 irq_compat_set_pending(desc);
75 desc->istate |= IRQS_PENDING;
76 }
82 } 77 }
83 raw_spin_unlock_irq(&desc->lock); 78 raw_spin_unlock_irq(&desc->lock);
84 } 79 }
@@ -93,13 +88,12 @@ unsigned long probe_irq_on(void)
93 */ 88 */
94 for_each_irq_desc(i, desc) { 89 for_each_irq_desc(i, desc) {
95 raw_spin_lock_irq(&desc->lock); 90 raw_spin_lock_irq(&desc->lock);
96 status = desc->status;
97 91
98 if (status & IRQ_AUTODETECT) { 92 if (desc->istate & IRQS_AUTODETECT) {
99 /* It triggered already - consider it spurious. */ 93 /* It triggered already - consider it spurious. */
100 if (!(status & IRQ_WAITING)) { 94 if (!(desc->istate & IRQS_WAITING)) {
101 desc->status = status & ~IRQ_AUTODETECT; 95 desc->istate &= ~IRQS_AUTODETECT;
102 desc->irq_data.chip->irq_shutdown(&desc->irq_data); 96 irq_shutdown(desc);
103 } else 97 } else
104 if (i < 32) 98 if (i < 32)
105 mask |= 1 << i; 99 mask |= 1 << i;
@@ -125,20 +119,18 @@ EXPORT_SYMBOL(probe_irq_on);
125 */ 119 */
126unsigned int probe_irq_mask(unsigned long val) 120unsigned int probe_irq_mask(unsigned long val)
127{ 121{
128 unsigned int status, mask = 0; 122 unsigned int mask = 0;
129 struct irq_desc *desc; 123 struct irq_desc *desc;
130 int i; 124 int i;
131 125
132 for_each_irq_desc(i, desc) { 126 for_each_irq_desc(i, desc) {
133 raw_spin_lock_irq(&desc->lock); 127 raw_spin_lock_irq(&desc->lock);
134 status = desc->status; 128 if (desc->istate & IRQS_AUTODETECT) {
135 129 if (i < 16 && !(desc->istate & IRQS_WAITING))
136 if (status & IRQ_AUTODETECT) {
137 if (i < 16 && !(status & IRQ_WAITING))
138 mask |= 1 << i; 130 mask |= 1 << i;
139 131
140 desc->status = status & ~IRQ_AUTODETECT; 132 desc->istate &= ~IRQS_AUTODETECT;
141 desc->irq_data.chip->irq_shutdown(&desc->irq_data); 133 irq_shutdown(desc);
142 } 134 }
143 raw_spin_unlock_irq(&desc->lock); 135 raw_spin_unlock_irq(&desc->lock);
144 } 136 }
@@ -169,20 +161,18 @@ int probe_irq_off(unsigned long val)
169{ 161{
170 int i, irq_found = 0, nr_of_irqs = 0; 162 int i, irq_found = 0, nr_of_irqs = 0;
171 struct irq_desc *desc; 163 struct irq_desc *desc;
172 unsigned int status;
173 164
174 for_each_irq_desc(i, desc) { 165 for_each_irq_desc(i, desc) {
175 raw_spin_lock_irq(&desc->lock); 166 raw_spin_lock_irq(&desc->lock);
176 status = desc->status;
177 167
178 if (status & IRQ_AUTODETECT) { 168 if (desc->istate & IRQS_AUTODETECT) {
179 if (!(status & IRQ_WAITING)) { 169 if (!(desc->istate & IRQS_WAITING)) {
180 if (!nr_of_irqs) 170 if (!nr_of_irqs)
181 irq_found = i; 171 irq_found = i;
182 nr_of_irqs++; 172 nr_of_irqs++;
183 } 173 }
184 desc->status = status & ~IRQ_AUTODETECT; 174 desc->istate &= ~IRQS_AUTODETECT;
185 desc->irq_data.chip->irq_shutdown(&desc->irq_data); 175 irq_shutdown(desc);
186 } 176 }
187 raw_spin_unlock_irq(&desc->lock); 177 raw_spin_unlock_irq(&desc->lock);
188 } 178 }
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index baa5c4acad83..c9c0601f0615 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -19,140 +19,110 @@
19#include "internals.h" 19#include "internals.h"
20 20
21/** 21/**
22 * set_irq_chip - set the irq chip for an irq 22 * irq_set_chip - set the irq chip for an irq
23 * @irq: irq number 23 * @irq: irq number
24 * @chip: pointer to irq chip description structure 24 * @chip: pointer to irq chip description structure
25 */ 25 */
26int set_irq_chip(unsigned int irq, struct irq_chip *chip) 26int irq_set_chip(unsigned int irq, struct irq_chip *chip)
27{ 27{
28 struct irq_desc *desc = irq_to_desc(irq);
29 unsigned long flags; 28 unsigned long flags;
29 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
30 30
31 if (!desc) { 31 if (!desc)
32 WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
33 return -EINVAL; 32 return -EINVAL;
34 }
35 33
36 if (!chip) 34 if (!chip)
37 chip = &no_irq_chip; 35 chip = &no_irq_chip;
38 36
39 raw_spin_lock_irqsave(&desc->lock, flags);
40 irq_chip_set_defaults(chip); 37 irq_chip_set_defaults(chip);
41 desc->irq_data.chip = chip; 38 desc->irq_data.chip = chip;
42 raw_spin_unlock_irqrestore(&desc->lock, flags); 39 irq_put_desc_unlock(desc, flags);
43
44 return 0; 40 return 0;
45} 41}
46EXPORT_SYMBOL(set_irq_chip); 42EXPORT_SYMBOL(irq_set_chip);
47 43
48/** 44/**
49 * set_irq_type - set the irq trigger type for an irq 45 * irq_set_type - set the irq trigger type for an irq
50 * @irq: irq number 46 * @irq: irq number
51 * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h 47 * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
52 */ 48 */
53int set_irq_type(unsigned int irq, unsigned int type) 49int irq_set_irq_type(unsigned int irq, unsigned int type)
54{ 50{
55 struct irq_desc *desc = irq_to_desc(irq);
56 unsigned long flags; 51 unsigned long flags;
57 int ret = -ENXIO; 52 struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
53 int ret = 0;
58 54
59 if (!desc) { 55 if (!desc)
60 printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq); 56 return -EINVAL;
61 return -ENODEV;
62 }
63 57
64 type &= IRQ_TYPE_SENSE_MASK; 58 type &= IRQ_TYPE_SENSE_MASK;
65 if (type == IRQ_TYPE_NONE) 59 if (type != IRQ_TYPE_NONE)
66 return 0; 60 ret = __irq_set_trigger(desc, irq, type);
67 61 irq_put_desc_busunlock(desc, flags);
68 raw_spin_lock_irqsave(&desc->lock, flags);
69 ret = __irq_set_trigger(desc, irq, type);
70 raw_spin_unlock_irqrestore(&desc->lock, flags);
71 return ret; 62 return ret;
72} 63}
73EXPORT_SYMBOL(set_irq_type); 64EXPORT_SYMBOL(irq_set_irq_type);
74 65
75/** 66/**
76 * set_irq_data - set irq type data for an irq 67 * irq_set_handler_data - set irq handler data for an irq
77 * @irq: Interrupt number 68 * @irq: Interrupt number
78 * @data: Pointer to interrupt specific data 69 * @data: Pointer to interrupt specific data
79 * 70 *
80 * Set the hardware irq controller data for an irq 71 * Set the hardware irq controller data for an irq
81 */ 72 */
82int set_irq_data(unsigned int irq, void *data) 73int irq_set_handler_data(unsigned int irq, void *data)
83{ 74{
84 struct irq_desc *desc = irq_to_desc(irq);
85 unsigned long flags; 75 unsigned long flags;
76 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
86 77
87 if (!desc) { 78 if (!desc)
88 printk(KERN_ERR
89 "Trying to install controller data for IRQ%d\n", irq);
90 return -EINVAL; 79 return -EINVAL;
91 }
92
93 raw_spin_lock_irqsave(&desc->lock, flags);
94 desc->irq_data.handler_data = data; 80 desc->irq_data.handler_data = data;
95 raw_spin_unlock_irqrestore(&desc->lock, flags); 81 irq_put_desc_unlock(desc, flags);
96 return 0; 82 return 0;
97} 83}
98EXPORT_SYMBOL(set_irq_data); 84EXPORT_SYMBOL(irq_set_handler_data);
99 85
100/** 86/**
101 * set_irq_msi - set MSI descriptor data for an irq 87 * irq_set_msi_desc - set MSI descriptor data for an irq
102 * @irq: Interrupt number 88 * @irq: Interrupt number
103 * @entry: Pointer to MSI descriptor data 89 * @entry: Pointer to MSI descriptor data
104 * 90 *
105 * Set the MSI descriptor entry for an irq 91 * Set the MSI descriptor entry for an irq
106 */ 92 */
107int set_irq_msi(unsigned int irq, struct msi_desc *entry) 93int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
108{ 94{
109 struct irq_desc *desc = irq_to_desc(irq);
110 unsigned long flags; 95 unsigned long flags;
96 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
111 97
112 if (!desc) { 98 if (!desc)
113 printk(KERN_ERR
114 "Trying to install msi data for IRQ%d\n", irq);
115 return -EINVAL; 99 return -EINVAL;
116 }
117
118 raw_spin_lock_irqsave(&desc->lock, flags);
119 desc->irq_data.msi_desc = entry; 100 desc->irq_data.msi_desc = entry;
120 if (entry) 101 if (entry)
121 entry->irq = irq; 102 entry->irq = irq;
122 raw_spin_unlock_irqrestore(&desc->lock, flags); 103 irq_put_desc_unlock(desc, flags);
123 return 0; 104 return 0;
124} 105}
125 106
126/** 107/**
127 * set_irq_chip_data - set irq chip data for an irq 108 * irq_set_chip_data - set irq chip data for an irq
128 * @irq: Interrupt number 109 * @irq: Interrupt number
129 * @data: Pointer to chip specific data 110 * @data: Pointer to chip specific data
130 * 111 *
131 * Set the hardware irq chip data for an irq 112 * Set the hardware irq chip data for an irq
132 */ 113 */
133int set_irq_chip_data(unsigned int irq, void *data) 114int irq_set_chip_data(unsigned int irq, void *data)
134{ 115{
135 struct irq_desc *desc = irq_to_desc(irq);
136 unsigned long flags; 116 unsigned long flags;
117 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
137 118
138 if (!desc) { 119 if (!desc)
139 printk(KERN_ERR
140 "Trying to install chip data for IRQ%d\n", irq);
141 return -EINVAL;
142 }
143
144 if (!desc->irq_data.chip) {
145 printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
146 return -EINVAL; 120 return -EINVAL;
147 }
148
149 raw_spin_lock_irqsave(&desc->lock, flags);
150 desc->irq_data.chip_data = data; 121 desc->irq_data.chip_data = data;
151 raw_spin_unlock_irqrestore(&desc->lock, flags); 122 irq_put_desc_unlock(desc, flags);
152
153 return 0; 123 return 0;
154} 124}
155EXPORT_SYMBOL(set_irq_chip_data); 125EXPORT_SYMBOL(irq_set_chip_data);
156 126
157struct irq_data *irq_get_irq_data(unsigned int irq) 127struct irq_data *irq_get_irq_data(unsigned int irq)
158{ 128{
@@ -162,72 +132,75 @@ struct irq_data *irq_get_irq_data(unsigned int irq)
162} 132}
163EXPORT_SYMBOL_GPL(irq_get_irq_data); 133EXPORT_SYMBOL_GPL(irq_get_irq_data);
164 134
165/** 135static void irq_state_clr_disabled(struct irq_desc *desc)
166 * set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq
167 *
168 * @irq: Interrupt number
169 * @nest: 0 to clear / 1 to set the IRQ_NESTED_THREAD flag
170 *
171 * The IRQ_NESTED_THREAD flag indicates that on
172 * request_threaded_irq() no separate interrupt thread should be
173 * created for the irq as the handler are called nested in the
174 * context of a demultiplexing interrupt handler thread.
175 */
176void set_irq_nested_thread(unsigned int irq, int nest)
177{ 136{
178 struct irq_desc *desc = irq_to_desc(irq); 137 desc->istate &= ~IRQS_DISABLED;
179 unsigned long flags; 138 irq_compat_clr_disabled(desc);
180
181 if (!desc)
182 return;
183
184 raw_spin_lock_irqsave(&desc->lock, flags);
185 if (nest)
186 desc->status |= IRQ_NESTED_THREAD;
187 else
188 desc->status &= ~IRQ_NESTED_THREAD;
189 raw_spin_unlock_irqrestore(&desc->lock, flags);
190} 139}
191EXPORT_SYMBOL_GPL(set_irq_nested_thread);
192 140
193/* 141static void irq_state_set_disabled(struct irq_desc *desc)
194 * default enable function
195 */
196static void default_enable(struct irq_data *data)
197{ 142{
198 struct irq_desc *desc = irq_data_to_desc(data); 143 desc->istate |= IRQS_DISABLED;
144 irq_compat_set_disabled(desc);
145}
199 146
200 desc->irq_data.chip->irq_unmask(&desc->irq_data); 147static void irq_state_clr_masked(struct irq_desc *desc)
201 desc->status &= ~IRQ_MASKED; 148{
149 desc->istate &= ~IRQS_MASKED;
150 irq_compat_clr_masked(desc);
202} 151}
203 152
204/* 153static void irq_state_set_masked(struct irq_desc *desc)
205 * default disable function
206 */
207static void default_disable(struct irq_data *data)
208{ 154{
155 desc->istate |= IRQS_MASKED;
156 irq_compat_set_masked(desc);
209} 157}
210 158
211/* 159int irq_startup(struct irq_desc *desc)
212 * default startup function
213 */
214static unsigned int default_startup(struct irq_data *data)
215{ 160{
216 struct irq_desc *desc = irq_data_to_desc(data); 161 irq_state_clr_disabled(desc);
162 desc->depth = 0;
163
164 if (desc->irq_data.chip->irq_startup) {
165 int ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
166 irq_state_clr_masked(desc);
167 return ret;
168 }
217 169
218 desc->irq_data.chip->irq_enable(data); 170 irq_enable(desc);
219 return 0; 171 return 0;
220} 172}
221 173
222/* 174void irq_shutdown(struct irq_desc *desc)
223 * default shutdown function
224 */
225static void default_shutdown(struct irq_data *data)
226{ 175{
227 struct irq_desc *desc = irq_data_to_desc(data); 176 irq_state_set_disabled(desc);
177 desc->depth = 1;
178 if (desc->irq_data.chip->irq_shutdown)
179 desc->irq_data.chip->irq_shutdown(&desc->irq_data);
180 if (desc->irq_data.chip->irq_disable)
181 desc->irq_data.chip->irq_disable(&desc->irq_data);
182 else
183 desc->irq_data.chip->irq_mask(&desc->irq_data);
184 irq_state_set_masked(desc);
185}
228 186
229 desc->irq_data.chip->irq_mask(&desc->irq_data); 187void irq_enable(struct irq_desc *desc)
230 desc->status |= IRQ_MASKED; 188{
189 irq_state_clr_disabled(desc);
190 if (desc->irq_data.chip->irq_enable)
191 desc->irq_data.chip->irq_enable(&desc->irq_data);
192 else
193 desc->irq_data.chip->irq_unmask(&desc->irq_data);
194 irq_state_clr_masked(desc);
195}
196
197void irq_disable(struct irq_desc *desc)
198{
199 irq_state_set_disabled(desc);
200 if (desc->irq_data.chip->irq_disable) {
201 desc->irq_data.chip->irq_disable(&desc->irq_data);
202 irq_state_set_masked(desc);
203 }
231} 204}
232 205
233#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED 206#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
@@ -315,10 +288,6 @@ static void compat_bus_sync_unlock(struct irq_data *data)
315void irq_chip_set_defaults(struct irq_chip *chip) 288void irq_chip_set_defaults(struct irq_chip *chip)
316{ 289{
317#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED 290#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
318 /*
319 * Compat fixup functions need to be before we set the
320 * defaults for enable/disable/startup/shutdown
321 */
322 if (chip->enable) 291 if (chip->enable)
323 chip->irq_enable = compat_irq_enable; 292 chip->irq_enable = compat_irq_enable;
324 if (chip->disable) 293 if (chip->disable)
@@ -327,33 +296,8 @@ void irq_chip_set_defaults(struct irq_chip *chip)
327 chip->irq_shutdown = compat_irq_shutdown; 296 chip->irq_shutdown = compat_irq_shutdown;
328 if (chip->startup) 297 if (chip->startup)
329 chip->irq_startup = compat_irq_startup; 298 chip->irq_startup = compat_irq_startup;
330#endif
331 /*
332 * The real defaults
333 */
334 if (!chip->irq_enable)
335 chip->irq_enable = default_enable;
336 if (!chip->irq_disable)
337 chip->irq_disable = default_disable;
338 if (!chip->irq_startup)
339 chip->irq_startup = default_startup;
340 /*
341 * We use chip->irq_disable, when the user provided its own. When
342 * we have default_disable set for chip->irq_disable, then we need
343 * to use default_shutdown, otherwise the irq line is not
344 * disabled on free_irq():
345 */
346 if (!chip->irq_shutdown)
347 chip->irq_shutdown = chip->irq_disable != default_disable ?
348 chip->irq_disable : default_shutdown;
349
350#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
351 if (!chip->end) 299 if (!chip->end)
352 chip->end = dummy_irq_chip.end; 300 chip->end = dummy_irq_chip.end;
353
354 /*
355 * Now fix up the remaining compat handlers
356 */
357 if (chip->bus_lock) 301 if (chip->bus_lock)
358 chip->irq_bus_lock = compat_bus_lock; 302 chip->irq_bus_lock = compat_bus_lock;
359 if (chip->bus_sync_unlock) 303 if (chip->bus_sync_unlock)
@@ -388,22 +332,22 @@ static inline void mask_ack_irq(struct irq_desc *desc)
388 if (desc->irq_data.chip->irq_ack) 332 if (desc->irq_data.chip->irq_ack)
389 desc->irq_data.chip->irq_ack(&desc->irq_data); 333 desc->irq_data.chip->irq_ack(&desc->irq_data);
390 } 334 }
391 desc->status |= IRQ_MASKED; 335 irq_state_set_masked(desc);
392} 336}
393 337
394static inline void mask_irq(struct irq_desc *desc) 338void mask_irq(struct irq_desc *desc)
395{ 339{
396 if (desc->irq_data.chip->irq_mask) { 340 if (desc->irq_data.chip->irq_mask) {
397 desc->irq_data.chip->irq_mask(&desc->irq_data); 341 desc->irq_data.chip->irq_mask(&desc->irq_data);
398 desc->status |= IRQ_MASKED; 342 irq_state_set_masked(desc);
399 } 343 }
400} 344}
401 345
402static inline void unmask_irq(struct irq_desc *desc) 346void unmask_irq(struct irq_desc *desc)
403{ 347{
404 if (desc->irq_data.chip->irq_unmask) { 348 if (desc->irq_data.chip->irq_unmask) {
405 desc->irq_data.chip->irq_unmask(&desc->irq_data); 349 desc->irq_data.chip->irq_unmask(&desc->irq_data);
406 desc->status &= ~IRQ_MASKED; 350 irq_state_clr_masked(desc);
407 } 351 }
408} 352}
409 353
@@ -428,10 +372,11 @@ void handle_nested_irq(unsigned int irq)
428 kstat_incr_irqs_this_cpu(irq, desc); 372 kstat_incr_irqs_this_cpu(irq, desc);
429 373
430 action = desc->action; 374 action = desc->action;
431 if (unlikely(!action || (desc->status & IRQ_DISABLED))) 375 if (unlikely(!action || (desc->istate & IRQS_DISABLED)))
432 goto out_unlock; 376 goto out_unlock;
433 377
434 desc->status |= IRQ_INPROGRESS; 378 irq_compat_set_progress(desc);
379 desc->istate |= IRQS_INPROGRESS;
435 raw_spin_unlock_irq(&desc->lock); 380 raw_spin_unlock_irq(&desc->lock);
436 381
437 action_ret = action->thread_fn(action->irq, action->dev_id); 382 action_ret = action->thread_fn(action->irq, action->dev_id);
@@ -439,13 +384,21 @@ void handle_nested_irq(unsigned int irq)
439 note_interrupt(irq, desc, action_ret); 384 note_interrupt(irq, desc, action_ret);
440 385
441 raw_spin_lock_irq(&desc->lock); 386 raw_spin_lock_irq(&desc->lock);
442 desc->status &= ~IRQ_INPROGRESS; 387 desc->istate &= ~IRQS_INPROGRESS;
388 irq_compat_clr_progress(desc);
443 389
444out_unlock: 390out_unlock:
445 raw_spin_unlock_irq(&desc->lock); 391 raw_spin_unlock_irq(&desc->lock);
446} 392}
447EXPORT_SYMBOL_GPL(handle_nested_irq); 393EXPORT_SYMBOL_GPL(handle_nested_irq);
448 394
395static bool irq_check_poll(struct irq_desc *desc)
396{
397 if (!(desc->istate & IRQS_POLL_INPROGRESS))
398 return false;
399 return irq_wait_for_poll(desc);
400}
401
449/** 402/**
450 * handle_simple_irq - Simple and software-decoded IRQs. 403 * handle_simple_irq - Simple and software-decoded IRQs.
451 * @irq: the interrupt number 404 * @irq: the interrupt number
@@ -461,29 +414,20 @@ EXPORT_SYMBOL_GPL(handle_nested_irq);
461void 414void
462handle_simple_irq(unsigned int irq, struct irq_desc *desc) 415handle_simple_irq(unsigned int irq, struct irq_desc *desc)
463{ 416{
464 struct irqaction *action;
465 irqreturn_t action_ret;
466
467 raw_spin_lock(&desc->lock); 417 raw_spin_lock(&desc->lock);
468 418
469 if (unlikely(desc->status & IRQ_INPROGRESS)) 419 if (unlikely(desc->istate & IRQS_INPROGRESS))
470 goto out_unlock; 420 if (!irq_check_poll(desc))
471 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 421 goto out_unlock;
422
423 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
472 kstat_incr_irqs_this_cpu(irq, desc); 424 kstat_incr_irqs_this_cpu(irq, desc);
473 425
474 action = desc->action; 426 if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
475 if (unlikely(!action || (desc->status & IRQ_DISABLED)))
476 goto out_unlock; 427 goto out_unlock;
477 428
478 desc->status |= IRQ_INPROGRESS; 429 handle_irq_event(desc);
479 raw_spin_unlock(&desc->lock);
480 430
481 action_ret = handle_IRQ_event(irq, action);
482 if (!noirqdebug)
483 note_interrupt(irq, desc, action_ret);
484
485 raw_spin_lock(&desc->lock);
486 desc->status &= ~IRQ_INPROGRESS;
487out_unlock: 431out_unlock:
488 raw_spin_unlock(&desc->lock); 432 raw_spin_unlock(&desc->lock);
489} 433}
@@ -501,42 +445,42 @@ out_unlock:
501void 445void
502handle_level_irq(unsigned int irq, struct irq_desc *desc) 446handle_level_irq(unsigned int irq, struct irq_desc *desc)
503{ 447{
504 struct irqaction *action;
505 irqreturn_t action_ret;
506
507 raw_spin_lock(&desc->lock); 448 raw_spin_lock(&desc->lock);
508 mask_ack_irq(desc); 449 mask_ack_irq(desc);
509 450
510 if (unlikely(desc->status & IRQ_INPROGRESS)) 451 if (unlikely(desc->istate & IRQS_INPROGRESS))
511 goto out_unlock; 452 if (!irq_check_poll(desc))
512 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 453 goto out_unlock;
454
455 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
513 kstat_incr_irqs_this_cpu(irq, desc); 456 kstat_incr_irqs_this_cpu(irq, desc);
514 457
515 /* 458 /*
516 * If its disabled or no action available 459 * If its disabled or no action available
517 * keep it masked and get out of here 460 * keep it masked and get out of here
518 */ 461 */
519 action = desc->action; 462 if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
520 if (unlikely(!action || (desc->status & IRQ_DISABLED)))
521 goto out_unlock; 463 goto out_unlock;
522 464
523 desc->status |= IRQ_INPROGRESS; 465 handle_irq_event(desc);
524 raw_spin_unlock(&desc->lock);
525
526 action_ret = handle_IRQ_event(irq, action);
527 if (!noirqdebug)
528 note_interrupt(irq, desc, action_ret);
529 466
530 raw_spin_lock(&desc->lock); 467 if (!(desc->istate & (IRQS_DISABLED | IRQS_ONESHOT)))
531 desc->status &= ~IRQ_INPROGRESS;
532
533 if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT)))
534 unmask_irq(desc); 468 unmask_irq(desc);
535out_unlock: 469out_unlock:
536 raw_spin_unlock(&desc->lock); 470 raw_spin_unlock(&desc->lock);
537} 471}
538EXPORT_SYMBOL_GPL(handle_level_irq); 472EXPORT_SYMBOL_GPL(handle_level_irq);
539 473
474#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
475static inline void preflow_handler(struct irq_desc *desc)
476{
477 if (desc->preflow_handler)
478 desc->preflow_handler(&desc->irq_data);
479}
480#else
481static inline void preflow_handler(struct irq_desc *desc) { }
482#endif
483
540/** 484/**
541 * handle_fasteoi_irq - irq handler for transparent controllers 485 * handle_fasteoi_irq - irq handler for transparent controllers
542 * @irq: the interrupt number 486 * @irq: the interrupt number
@@ -550,42 +494,41 @@ EXPORT_SYMBOL_GPL(handle_level_irq);
550void 494void
551handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) 495handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
552{ 496{
553 struct irqaction *action;
554 irqreturn_t action_ret;
555
556 raw_spin_lock(&desc->lock); 497 raw_spin_lock(&desc->lock);
557 498
558 if (unlikely(desc->status & IRQ_INPROGRESS)) 499 if (unlikely(desc->istate & IRQS_INPROGRESS))
559 goto out; 500 if (!irq_check_poll(desc))
501 goto out;
560 502
561 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 503 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
562 kstat_incr_irqs_this_cpu(irq, desc); 504 kstat_incr_irqs_this_cpu(irq, desc);
563 505
564 /* 506 /*
565 * If its disabled or no action available 507 * If its disabled or no action available
566 * then mask it and get out of here: 508 * then mask it and get out of here:
567 */ 509 */
568 action = desc->action; 510 if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) {
569 if (unlikely(!action || (desc->status & IRQ_DISABLED))) { 511 irq_compat_set_pending(desc);
570 desc->status |= IRQ_PENDING; 512 desc->istate |= IRQS_PENDING;
571 mask_irq(desc); 513 mask_irq(desc);
572 goto out; 514 goto out;
573 } 515 }
574 516
575 desc->status |= IRQ_INPROGRESS; 517 if (desc->istate & IRQS_ONESHOT)
576 desc->status &= ~IRQ_PENDING; 518 mask_irq(desc);
577 raw_spin_unlock(&desc->lock);
578 519
579 action_ret = handle_IRQ_event(irq, action); 520 preflow_handler(desc);
580 if (!noirqdebug) 521 handle_irq_event(desc);
581 note_interrupt(irq, desc, action_ret);
582 522
583 raw_spin_lock(&desc->lock); 523out_eoi:
584 desc->status &= ~IRQ_INPROGRESS;
585out:
586 desc->irq_data.chip->irq_eoi(&desc->irq_data); 524 desc->irq_data.chip->irq_eoi(&desc->irq_data);
587 525out_unlock:
588 raw_spin_unlock(&desc->lock); 526 raw_spin_unlock(&desc->lock);
527 return;
528out:
529 if (!(desc->irq_data.chip->flags & IRQCHIP_EOI_IF_HANDLED))
530 goto out_eoi;
531 goto out_unlock;
589} 532}
590 533
591/** 534/**
@@ -609,32 +552,28 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
609{ 552{
610 raw_spin_lock(&desc->lock); 553 raw_spin_lock(&desc->lock);
611 554
612 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 555 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
613
614 /* 556 /*
615 * If we're currently running this IRQ, or its disabled, 557 * If we're currently running this IRQ, or its disabled,
616 * we shouldn't process the IRQ. Mark it pending, handle 558 * we shouldn't process the IRQ. Mark it pending, handle
617 * the necessary masking and go out 559 * the necessary masking and go out
618 */ 560 */
619 if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) || 561 if (unlikely((desc->istate & (IRQS_DISABLED | IRQS_INPROGRESS) ||
620 !desc->action)) { 562 !desc->action))) {
621 desc->status |= (IRQ_PENDING | IRQ_MASKED); 563 if (!irq_check_poll(desc)) {
622 mask_ack_irq(desc); 564 irq_compat_set_pending(desc);
623 goto out_unlock; 565 desc->istate |= IRQS_PENDING;
566 mask_ack_irq(desc);
567 goto out_unlock;
568 }
624 } 569 }
625 kstat_incr_irqs_this_cpu(irq, desc); 570 kstat_incr_irqs_this_cpu(irq, desc);
626 571
627 /* Start handling the irq */ 572 /* Start handling the irq */
628 desc->irq_data.chip->irq_ack(&desc->irq_data); 573 desc->irq_data.chip->irq_ack(&desc->irq_data);
629 574
630 /* Mark the IRQ currently in progress.*/
631 desc->status |= IRQ_INPROGRESS;
632
633 do { 575 do {
634 struct irqaction *action = desc->action; 576 if (unlikely(!desc->action)) {
635 irqreturn_t action_ret;
636
637 if (unlikely(!action)) {
638 mask_irq(desc); 577 mask_irq(desc);
639 goto out_unlock; 578 goto out_unlock;
640 } 579 }
@@ -644,22 +583,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
644 * one, we could have masked the irq. 583 * one, we could have masked the irq.
645 * Renable it, if it was not disabled in meantime. 584 * Renable it, if it was not disabled in meantime.
646 */ 585 */
647 if (unlikely((desc->status & 586 if (unlikely(desc->istate & IRQS_PENDING)) {
648 (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) == 587 if (!(desc->istate & IRQS_DISABLED) &&
649 (IRQ_PENDING | IRQ_MASKED))) { 588 (desc->istate & IRQS_MASKED))
650 unmask_irq(desc); 589 unmask_irq(desc);
651 } 590 }
652 591
653 desc->status &= ~IRQ_PENDING; 592 handle_irq_event(desc);
654 raw_spin_unlock(&desc->lock);
655 action_ret = handle_IRQ_event(irq, action);
656 if (!noirqdebug)
657 note_interrupt(irq, desc, action_ret);
658 raw_spin_lock(&desc->lock);
659 593
660 } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); 594 } while ((desc->istate & IRQS_PENDING) &&
595 !(desc->istate & IRQS_DISABLED));
661 596
662 desc->status &= ~IRQ_INPROGRESS;
663out_unlock: 597out_unlock:
664 raw_spin_unlock(&desc->lock); 598 raw_spin_unlock(&desc->lock);
665} 599}
@@ -674,103 +608,84 @@ out_unlock:
674void 608void
675handle_percpu_irq(unsigned int irq, struct irq_desc *desc) 609handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
676{ 610{
677 irqreturn_t action_ret; 611 struct irq_chip *chip = irq_desc_get_chip(desc);
678 612
679 kstat_incr_irqs_this_cpu(irq, desc); 613 kstat_incr_irqs_this_cpu(irq, desc);
680 614
681 if (desc->irq_data.chip->irq_ack) 615 if (chip->irq_ack)
682 desc->irq_data.chip->irq_ack(&desc->irq_data); 616 chip->irq_ack(&desc->irq_data);
683 617
684 action_ret = handle_IRQ_event(irq, desc->action); 618 handle_irq_event_percpu(desc, desc->action);
685 if (!noirqdebug)
686 note_interrupt(irq, desc, action_ret);
687 619
688 if (desc->irq_data.chip->irq_eoi) 620 if (chip->irq_eoi)
689 desc->irq_data.chip->irq_eoi(&desc->irq_data); 621 chip->irq_eoi(&desc->irq_data);
690} 622}
691 623
692void 624void
693__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, 625__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
694 const char *name) 626 const char *name)
695{ 627{
696 struct irq_desc *desc = irq_to_desc(irq);
697 unsigned long flags; 628 unsigned long flags;
629 struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
698 630
699 if (!desc) { 631 if (!desc)
700 printk(KERN_ERR
701 "Trying to install type control for IRQ%d\n", irq);
702 return; 632 return;
703 }
704 633
705 if (!handle) 634 if (!handle) {
706 handle = handle_bad_irq; 635 handle = handle_bad_irq;
707 else if (desc->irq_data.chip == &no_irq_chip) { 636 } else {
708 printk(KERN_WARNING "Trying to install %sinterrupt handler " 637 if (WARN_ON(desc->irq_data.chip == &no_irq_chip))
709 "for IRQ%d\n", is_chained ? "chained " : "", irq); 638 goto out;
710 /*
711 * Some ARM implementations install a handler for really dumb
712 * interrupt hardware without setting an irq_chip. This worked
713 * with the ARM no_irq_chip but the check in setup_irq would
714 * prevent us to setup the interrupt at all. Switch it to
715 * dummy_irq_chip for easy transition.
716 */
717 desc->irq_data.chip = &dummy_irq_chip;
718 } 639 }
719 640
720 chip_bus_lock(desc);
721 raw_spin_lock_irqsave(&desc->lock, flags);
722
723 /* Uninstall? */ 641 /* Uninstall? */
724 if (handle == handle_bad_irq) { 642 if (handle == handle_bad_irq) {
725 if (desc->irq_data.chip != &no_irq_chip) 643 if (desc->irq_data.chip != &no_irq_chip)
726 mask_ack_irq(desc); 644 mask_ack_irq(desc);
727 desc->status |= IRQ_DISABLED; 645 irq_compat_set_disabled(desc);
646 desc->istate |= IRQS_DISABLED;
728 desc->depth = 1; 647 desc->depth = 1;
729 } 648 }
730 desc->handle_irq = handle; 649 desc->handle_irq = handle;
731 desc->name = name; 650 desc->name = name;
732 651
733 if (handle != handle_bad_irq && is_chained) { 652 if (handle != handle_bad_irq && is_chained) {
734 desc->status &= ~IRQ_DISABLED; 653 irq_settings_set_noprobe(desc);
735 desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; 654 irq_settings_set_norequest(desc);
736 desc->depth = 0; 655 irq_startup(desc);
737 desc->irq_data.chip->irq_startup(&desc->irq_data);
738 } 656 }
739 raw_spin_unlock_irqrestore(&desc->lock, flags); 657out:
740 chip_bus_sync_unlock(desc); 658 irq_put_desc_busunlock(desc, flags);
741}
742EXPORT_SYMBOL_GPL(__set_irq_handler);
743
744void
745set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
746 irq_flow_handler_t handle)
747{
748 set_irq_chip(irq, chip);
749 __set_irq_handler(irq, handle, 0, NULL);
750} 659}
660EXPORT_SYMBOL_GPL(__irq_set_handler);
751 661
752void 662void
753set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, 663irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
754 irq_flow_handler_t handle, const char *name) 664 irq_flow_handler_t handle, const char *name)
755{ 665{
756 set_irq_chip(irq, chip); 666 irq_set_chip(irq, chip);
757 __set_irq_handler(irq, handle, 0, name); 667 __irq_set_handler(irq, handle, 0, name);
758} 668}
759 669
760void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) 670void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
761{ 671{
762 struct irq_desc *desc = irq_to_desc(irq);
763 unsigned long flags; 672 unsigned long flags;
673 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
764 674
765 if (!desc) 675 if (!desc)
766 return; 676 return;
677 irq_settings_clr_and_set(desc, clr, set);
678
679 irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
680 IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT);
681 if (irq_settings_has_no_balance_set(desc))
682 irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
683 if (irq_settings_is_per_cpu(desc))
684 irqd_set(&desc->irq_data, IRQD_PER_CPU);
685 if (irq_settings_can_move_pcntxt(desc))
686 irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT);
767 687
768 /* Sanitize flags */ 688 irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc));
769 set &= IRQF_MODIFY_MASK;
770 clr &= IRQF_MODIFY_MASK;
771 689
772 raw_spin_lock_irqsave(&desc->lock, flags); 690 irq_put_desc_unlock(desc, flags);
773 desc->status &= ~clr;
774 desc->status |= set;
775 raw_spin_unlock_irqrestore(&desc->lock, flags);
776} 691}
diff --git a/kernel/irq/compat.h b/kernel/irq/compat.h
new file mode 100644
index 000000000000..6bbaf66aca85
--- /dev/null
+++ b/kernel/irq/compat.h
@@ -0,0 +1,72 @@
1/*
2 * Compat layer for transition period
3 */
4#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
5static inline void irq_compat_set_progress(struct irq_desc *desc)
6{
7 desc->status |= IRQ_INPROGRESS;
8}
9
10static inline void irq_compat_clr_progress(struct irq_desc *desc)
11{
12 desc->status &= ~IRQ_INPROGRESS;
13}
14static inline void irq_compat_set_disabled(struct irq_desc *desc)
15{
16 desc->status |= IRQ_DISABLED;
17}
18static inline void irq_compat_clr_disabled(struct irq_desc *desc)
19{
20 desc->status &= ~IRQ_DISABLED;
21}
22static inline void irq_compat_set_pending(struct irq_desc *desc)
23{
24 desc->status |= IRQ_PENDING;
25}
26
27static inline void irq_compat_clr_pending(struct irq_desc *desc)
28{
29 desc->status &= ~IRQ_PENDING;
30}
31static inline void irq_compat_set_masked(struct irq_desc *desc)
32{
33 desc->status |= IRQ_MASKED;
34}
35
36static inline void irq_compat_clr_masked(struct irq_desc *desc)
37{
38 desc->status &= ~IRQ_MASKED;
39}
40static inline void irq_compat_set_move_pending(struct irq_desc *desc)
41{
42 desc->status |= IRQ_MOVE_PENDING;
43}
44
45static inline void irq_compat_clr_move_pending(struct irq_desc *desc)
46{
47 desc->status &= ~IRQ_MOVE_PENDING;
48}
49static inline void irq_compat_set_affinity(struct irq_desc *desc)
50{
51 desc->status |= IRQ_AFFINITY_SET;
52}
53
54static inline void irq_compat_clr_affinity(struct irq_desc *desc)
55{
56 desc->status &= ~IRQ_AFFINITY_SET;
57}
58#else
59static inline void irq_compat_set_progress(struct irq_desc *desc) { }
60static inline void irq_compat_clr_progress(struct irq_desc *desc) { }
61static inline void irq_compat_set_disabled(struct irq_desc *desc) { }
62static inline void irq_compat_clr_disabled(struct irq_desc *desc) { }
63static inline void irq_compat_set_pending(struct irq_desc *desc) { }
64static inline void irq_compat_clr_pending(struct irq_desc *desc) { }
65static inline void irq_compat_set_masked(struct irq_desc *desc) { }
66static inline void irq_compat_clr_masked(struct irq_desc *desc) { }
67static inline void irq_compat_set_move_pending(struct irq_desc *desc) { }
68static inline void irq_compat_clr_move_pending(struct irq_desc *desc) { }
69static inline void irq_compat_set_affinity(struct irq_desc *desc) { }
70static inline void irq_compat_clr_affinity(struct irq_desc *desc) { }
71#endif
72
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
new file mode 100644
index 000000000000..d1a33b7fa61d
--- /dev/null
+++ b/kernel/irq/debug.h
@@ -0,0 +1,40 @@
1/*
2 * Debugging printout:
3 */
4
5#include <linux/kallsyms.h>
6
7#define P(f) if (desc->status & f) printk("%14s set\n", #f)
8#define PS(f) if (desc->istate & f) printk("%14s set\n", #f)
9
10static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
11{
12 printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
13 irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
14 printk("->handle_irq(): %p, ", desc->handle_irq);
15 print_symbol("%s\n", (unsigned long)desc->handle_irq);
16 printk("->irq_data.chip(): %p, ", desc->irq_data.chip);
17 print_symbol("%s\n", (unsigned long)desc->irq_data.chip);
18 printk("->action(): %p\n", desc->action);
19 if (desc->action) {
20 printk("->action->handler(): %p, ", desc->action->handler);
21 print_symbol("%s\n", (unsigned long)desc->action->handler);
22 }
23
24 P(IRQ_LEVEL);
25 P(IRQ_PER_CPU);
26 P(IRQ_NOPROBE);
27 P(IRQ_NOREQUEST);
28 P(IRQ_NOAUTOEN);
29
30 PS(IRQS_AUTODETECT);
31 PS(IRQS_INPROGRESS);
32 PS(IRQS_REPLAY);
33 PS(IRQS_WAITING);
34 PS(IRQS_DISABLED);
35 PS(IRQS_PENDING);
36 PS(IRQS_MASKED);
37}
38
39#undef P
40#undef PS
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 3540a7190122..517561fc7317 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -51,30 +51,92 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
51 "but no thread function available.", irq, action->name); 51 "but no thread function available.", irq, action->name);
52} 52}
53 53
54/** 54static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
55 * handle_IRQ_event - irq action chain handler 55{
56 * @irq: the interrupt number 56 /*
57 * @action: the interrupt action chain for this irq 57 * Wake up the handler thread for this action. In case the
58 * 58 * thread crashed and was killed we just pretend that we
59 * Handles the action chain of an irq event 59 * handled the interrupt. The hardirq handler has disabled the
60 */ 60 * device interrupt, so no irq storm is lurking. If the
61irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) 61 * RUNTHREAD bit is already set, nothing to do.
62 */
63 if (test_bit(IRQTF_DIED, &action->thread_flags) ||
64 test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
65 return;
66
67 /*
68 * It's safe to OR the mask lockless here. We have only two
69 * places which write to threads_oneshot: This code and the
70 * irq thread.
71 *
72 * This code is the hard irq context and can never run on two
73 * cpus in parallel. If it ever does we have more serious
74 * problems than this bitmask.
75 *
76 * The irq threads of this irq which clear their "running" bit
77 * in threads_oneshot are serialized via desc->lock against
78 * each other and they are serialized against this code by
79 * IRQS_INPROGRESS.
80 *
81 * Hard irq handler:
82 *
83 * spin_lock(desc->lock);
84 * desc->state |= IRQS_INPROGRESS;
85 * spin_unlock(desc->lock);
86 * set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
87 * desc->threads_oneshot |= mask;
88 * spin_lock(desc->lock);
89 * desc->state &= ~IRQS_INPROGRESS;
90 * spin_unlock(desc->lock);
91 *
92 * irq thread:
93 *
94 * again:
95 * spin_lock(desc->lock);
96 * if (desc->state & IRQS_INPROGRESS) {
97 * spin_unlock(desc->lock);
98 * while(desc->state & IRQS_INPROGRESS)
99 * cpu_relax();
100 * goto again;
101 * }
102 * if (!test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
103 * desc->threads_oneshot &= ~mask;
104 * spin_unlock(desc->lock);
105 *
106 * So either the thread waits for us to clear IRQS_INPROGRESS
107 * or we are waiting in the flow handler for desc->lock to be
108 * released before we reach this point. The thread also checks
109 * IRQTF_RUNTHREAD under desc->lock. If set it leaves
110 * threads_oneshot untouched and runs the thread another time.
111 */
112 desc->threads_oneshot |= action->thread_mask;
113 wake_up_process(action->thread);
114}
115
116irqreturn_t
117handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
62{ 118{
63 irqreturn_t ret, retval = IRQ_NONE; 119 irqreturn_t retval = IRQ_NONE;
64 unsigned int status = 0; 120 unsigned int random = 0, irq = desc->irq_data.irq;
65 121
66 do { 122 do {
123 irqreturn_t res;
124
67 trace_irq_handler_entry(irq, action); 125 trace_irq_handler_entry(irq, action);
68 ret = action->handler(irq, action->dev_id); 126 res = action->handler(irq, action->dev_id);
69 trace_irq_handler_exit(irq, action, ret); 127 trace_irq_handler_exit(irq, action, res);
70 128
71 switch (ret) { 129 if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
130 irq, action->handler))
131 local_irq_disable();
132
133 switch (res) {
72 case IRQ_WAKE_THREAD: 134 case IRQ_WAKE_THREAD:
73 /* 135 /*
74 * Set result to handled so the spurious check 136 * Set result to handled so the spurious check
75 * does not trigger. 137 * does not trigger.
76 */ 138 */
77 ret = IRQ_HANDLED; 139 res = IRQ_HANDLED;
78 140
79 /* 141 /*
80 * Catch drivers which return WAKE_THREAD but 142 * Catch drivers which return WAKE_THREAD but
@@ -85,36 +147,56 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
85 break; 147 break;
86 } 148 }
87 149
88 /* 150 irq_wake_thread(desc, action);
89 * Wake up the handler thread for this
90 * action. In case the thread crashed and was
91 * killed we just pretend that we handled the
92 * interrupt. The hardirq handler above has
93 * disabled the device interrupt, so no irq
94 * storm is lurking.
95 */
96 if (likely(!test_bit(IRQTF_DIED,
97 &action->thread_flags))) {
98 set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
99 wake_up_process(action->thread);
100 }
101 151
102 /* Fall through to add to randomness */ 152 /* Fall through to add to randomness */
103 case IRQ_HANDLED: 153 case IRQ_HANDLED:
104 status |= action->flags; 154 random |= action->flags;
105 break; 155 break;
106 156
107 default: 157 default:
108 break; 158 break;
109 } 159 }
110 160
111 retval |= ret; 161 retval |= res;
112 action = action->next; 162 action = action->next;
113 } while (action); 163 } while (action);
114 164
115 if (status & IRQF_SAMPLE_RANDOM) 165 if (random & IRQF_SAMPLE_RANDOM)
116 add_interrupt_randomness(irq); 166 add_interrupt_randomness(irq);
117 local_irq_disable();
118 167
168 if (!noirqdebug)
169 note_interrupt(irq, desc, retval);
119 return retval; 170 return retval;
120} 171}
172
173irqreturn_t handle_irq_event(struct irq_desc *desc)
174{
175 struct irqaction *action = desc->action;
176 irqreturn_t ret;
177
178 irq_compat_clr_pending(desc);
179 desc->istate &= ~IRQS_PENDING;
180 irq_compat_set_progress(desc);
181 desc->istate |= IRQS_INPROGRESS;
182 raw_spin_unlock(&desc->lock);
183
184 ret = handle_irq_event_percpu(desc, action);
185
186 raw_spin_lock(&desc->lock);
187 desc->istate &= ~IRQS_INPROGRESS;
188 irq_compat_clr_progress(desc);
189 return ret;
190}
191
192/**
193 * handle_IRQ_event - irq action chain handler
194 * @irq: the interrupt number
195 * @action: the interrupt action chain for this irq
196 *
197 * Handles the action chain of an irq event
198 */
199irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
200{
201 return handle_irq_event_percpu(irq_to_desc(irq), action);
202}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4571ae7e085a..6c6ec9a49027 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -1,27 +1,101 @@
1/* 1/*
2 * IRQ subsystem internal functions and variables: 2 * IRQ subsystem internal functions and variables:
3 *
4 * Do not ever include this file from anything else than
5 * kernel/irq/. Do not even think about using any information outside
6 * of this file for your non core code.
3 */ 7 */
4#include <linux/irqdesc.h> 8#include <linux/irqdesc.h>
5 9
10#ifdef CONFIG_SPARSE_IRQ
11# define IRQ_BITMAP_BITS (NR_IRQS + 8196)
12#else
13# define IRQ_BITMAP_BITS NR_IRQS
14#endif
15
16#define istate core_internal_state__do_not_mess_with_it
17
18#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
19# define status status_use_accessors
20#endif
21
6extern int noirqdebug; 22extern int noirqdebug;
7 23
24/*
25 * Bits used by threaded handlers:
26 * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
27 * IRQTF_DIED - handler thread died
28 * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
29 * IRQTF_AFFINITY - irq thread is requested to adjust affinity
30 * IRQTF_FORCED_THREAD - irq action is force threaded
31 */
32enum {
33 IRQTF_RUNTHREAD,
34 IRQTF_DIED,
35 IRQTF_WARNED,
36 IRQTF_AFFINITY,
37 IRQTF_FORCED_THREAD,
38};
39
40/*
41 * Bit masks for desc->state
42 *
43 * IRQS_AUTODETECT - autodetection in progress
44 * IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt
45 * detection
46 * IRQS_POLL_INPROGRESS - polling in progress
47 * IRQS_INPROGRESS - Interrupt in progress
48 * IRQS_ONESHOT - irq is not unmasked in primary handler
49 * IRQS_REPLAY - irq is replayed
50 * IRQS_WAITING - irq is waiting
51 * IRQS_DISABLED - irq is disabled
52 * IRQS_PENDING - irq is pending and replayed later
53 * IRQS_MASKED - irq is masked
54 * IRQS_SUSPENDED - irq is suspended
55 */
56enum {
57 IRQS_AUTODETECT = 0x00000001,
58 IRQS_SPURIOUS_DISABLED = 0x00000002,
59 IRQS_POLL_INPROGRESS = 0x00000008,
60 IRQS_INPROGRESS = 0x00000010,
61 IRQS_ONESHOT = 0x00000020,
62 IRQS_REPLAY = 0x00000040,
63 IRQS_WAITING = 0x00000080,
64 IRQS_DISABLED = 0x00000100,
65 IRQS_PENDING = 0x00000200,
66 IRQS_MASKED = 0x00000400,
67 IRQS_SUSPENDED = 0x00000800,
68};
69
70#include "compat.h"
71#include "debug.h"
72#include "settings.h"
73
8#define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) 74#define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data)
9 75
10/* Set default functions for irq_chip structures: */ 76/* Set default functions for irq_chip structures: */
11extern void irq_chip_set_defaults(struct irq_chip *chip); 77extern void irq_chip_set_defaults(struct irq_chip *chip);
12 78
13/* Set default handler: */
14extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
15
16extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, 79extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
17 unsigned long flags); 80 unsigned long flags);
18extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); 81extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
19extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); 82extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
20 83
84extern int irq_startup(struct irq_desc *desc);
85extern void irq_shutdown(struct irq_desc *desc);
86extern void irq_enable(struct irq_desc *desc);
87extern void irq_disable(struct irq_desc *desc);
88extern void mask_irq(struct irq_desc *desc);
89extern void unmask_irq(struct irq_desc *desc);
90
21extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); 91extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
22 92
93irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action);
94irqreturn_t handle_irq_event(struct irq_desc *desc);
95
23/* Resending of interrupts :*/ 96/* Resending of interrupts :*/
24void check_irq_resend(struct irq_desc *desc, unsigned int irq); 97void check_irq_resend(struct irq_desc *desc, unsigned int irq);
98bool irq_wait_for_poll(struct irq_desc *desc);
25 99
26#ifdef CONFIG_PROC_FS 100#ifdef CONFIG_PROC_FS
27extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); 101extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
@@ -37,20 +111,10 @@ static inline void unregister_handler_proc(unsigned int irq,
37 struct irqaction *action) { } 111 struct irqaction *action) { }
38#endif 112#endif
39 113
40extern int irq_select_affinity_usr(unsigned int irq); 114extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
41 115
42extern void irq_set_thread_affinity(struct irq_desc *desc); 116extern void irq_set_thread_affinity(struct irq_desc *desc);
43 117
44#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
45static inline void irq_end(unsigned int irq, struct irq_desc *desc)
46{
47 if (desc->irq_data.chip && desc->irq_data.chip->end)
48 desc->irq_data.chip->end(irq);
49}
50#else
51static inline void irq_end(unsigned int irq, struct irq_desc *desc) { }
52#endif
53
54/* Inline functions for support of irq chips on slow busses */ 118/* Inline functions for support of irq chips on slow busses */
55static inline void chip_bus_lock(struct irq_desc *desc) 119static inline void chip_bus_lock(struct irq_desc *desc)
56{ 120{
@@ -64,43 +128,60 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc)
64 desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data); 128 desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data);
65} 129}
66 130
131struct irq_desc *
132__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus);
133void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus);
134
135static inline struct irq_desc *
136irq_get_desc_buslock(unsigned int irq, unsigned long *flags)
137{
138 return __irq_get_desc_lock(irq, flags, true);
139}
140
141static inline void
142irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags)
143{
144 __irq_put_desc_unlock(desc, flags, true);
145}
146
147static inline struct irq_desc *
148irq_get_desc_lock(unsigned int irq, unsigned long *flags)
149{
150 return __irq_get_desc_lock(irq, flags, false);
151}
152
153static inline void
154irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
155{
156 __irq_put_desc_unlock(desc, flags, false);
157}
158
67/* 159/*
68 * Debugging printout: 160 * Manipulation functions for irq_data.state
69 */ 161 */
162static inline void irqd_set_move_pending(struct irq_data *d)
163{
164 d->state_use_accessors |= IRQD_SETAFFINITY_PENDING;
165 irq_compat_set_move_pending(irq_data_to_desc(d));
166}
70 167
71#include <linux/kallsyms.h> 168static inline void irqd_clr_move_pending(struct irq_data *d)
72 169{
73#define P(f) if (desc->status & f) printk("%14s set\n", #f) 170 d->state_use_accessors &= ~IRQD_SETAFFINITY_PENDING;
171 irq_compat_clr_move_pending(irq_data_to_desc(d));
172}
74 173
75static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) 174static inline void irqd_clear(struct irq_data *d, unsigned int mask)
76{ 175{
77 printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", 176 d->state_use_accessors &= ~mask;
78 irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
79 printk("->handle_irq(): %p, ", desc->handle_irq);
80 print_symbol("%s\n", (unsigned long)desc->handle_irq);
81 printk("->irq_data.chip(): %p, ", desc->irq_data.chip);
82 print_symbol("%s\n", (unsigned long)desc->irq_data.chip);
83 printk("->action(): %p\n", desc->action);
84 if (desc->action) {
85 printk("->action->handler(): %p, ", desc->action->handler);
86 print_symbol("%s\n", (unsigned long)desc->action->handler);
87 }
88
89 P(IRQ_INPROGRESS);
90 P(IRQ_DISABLED);
91 P(IRQ_PENDING);
92 P(IRQ_REPLAY);
93 P(IRQ_AUTODETECT);
94 P(IRQ_WAITING);
95 P(IRQ_LEVEL);
96 P(IRQ_MASKED);
97#ifdef CONFIG_IRQ_PER_CPU
98 P(IRQ_PER_CPU);
99#endif
100 P(IRQ_NOPROBE);
101 P(IRQ_NOREQUEST);
102 P(IRQ_NOAUTOEN);
103} 177}
104 178
105#undef P 179static inline void irqd_set(struct irq_data *d, unsigned int mask)
180{
181 d->state_use_accessors |= mask;
182}
106 183
184static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
185{
186 return d->state_use_accessors & mask;
187}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 282f20230e67..dbccc799407f 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -79,7 +79,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
79 desc->irq_data.chip_data = NULL; 79 desc->irq_data.chip_data = NULL;
80 desc->irq_data.handler_data = NULL; 80 desc->irq_data.handler_data = NULL;
81 desc->irq_data.msi_desc = NULL; 81 desc->irq_data.msi_desc = NULL;
82 desc->status = IRQ_DEFAULT_INIT_FLAGS; 82 irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
83 desc->istate = IRQS_DISABLED;
83 desc->handle_irq = handle_bad_irq; 84 desc->handle_irq = handle_bad_irq;
84 desc->depth = 1; 85 desc->depth = 1;
85 desc->irq_count = 0; 86 desc->irq_count = 0;
@@ -94,7 +95,7 @@ int nr_irqs = NR_IRQS;
94EXPORT_SYMBOL_GPL(nr_irqs); 95EXPORT_SYMBOL_GPL(nr_irqs);
95 96
96static DEFINE_MUTEX(sparse_irq_lock); 97static DEFINE_MUTEX(sparse_irq_lock);
97static DECLARE_BITMAP(allocated_irqs, NR_IRQS); 98static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
98 99
99#ifdef CONFIG_SPARSE_IRQ 100#ifdef CONFIG_SPARSE_IRQ
100 101
@@ -206,6 +207,14 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
206 return NULL; 207 return NULL;
207} 208}
208 209
210static int irq_expand_nr_irqs(unsigned int nr)
211{
212 if (nr > IRQ_BITMAP_BITS)
213 return -ENOMEM;
214 nr_irqs = nr;
215 return 0;
216}
217
209int __init early_irq_init(void) 218int __init early_irq_init(void)
210{ 219{
211 int i, initcnt, node = first_online_node; 220 int i, initcnt, node = first_online_node;
@@ -217,6 +226,15 @@ int __init early_irq_init(void)
217 initcnt = arch_probe_nr_irqs(); 226 initcnt = arch_probe_nr_irqs();
218 printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt); 227 printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt);
219 228
229 if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS))
230 nr_irqs = IRQ_BITMAP_BITS;
231
232 if (WARN_ON(initcnt > IRQ_BITMAP_BITS))
233 initcnt = IRQ_BITMAP_BITS;
234
235 if (initcnt > nr_irqs)
236 nr_irqs = initcnt;
237
220 for (i = 0; i < initcnt; i++) { 238 for (i = 0; i < initcnt; i++) {
221 desc = alloc_desc(i, node); 239 desc = alloc_desc(i, node);
222 set_bit(i, allocated_irqs); 240 set_bit(i, allocated_irqs);
@@ -229,7 +247,7 @@ int __init early_irq_init(void)
229 247
230struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { 248struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
231 [0 ... NR_IRQS-1] = { 249 [0 ... NR_IRQS-1] = {
232 .status = IRQ_DEFAULT_INIT_FLAGS, 250 .istate = IRQS_DISABLED,
233 .handle_irq = handle_bad_irq, 251 .handle_irq = handle_bad_irq,
234 .depth = 1, 252 .depth = 1,
235 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), 253 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
@@ -251,8 +269,8 @@ int __init early_irq_init(void)
251 for (i = 0; i < count; i++) { 269 for (i = 0; i < count; i++) {
252 desc[i].irq_data.irq = i; 270 desc[i].irq_data.irq = i;
253 desc[i].irq_data.chip = &no_irq_chip; 271 desc[i].irq_data.chip = &no_irq_chip;
254 /* TODO : do this allocation on-demand ... */
255 desc[i].kstat_irqs = alloc_percpu(unsigned int); 272 desc[i].kstat_irqs = alloc_percpu(unsigned int);
273 irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
256 alloc_masks(desc + i, GFP_KERNEL, node); 274 alloc_masks(desc + i, GFP_KERNEL, node);
257 desc_smp_init(desc + i, node); 275 desc_smp_init(desc + i, node);
258 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); 276 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
@@ -277,24 +295,14 @@ static void free_desc(unsigned int irq)
277 295
278static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) 296static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
279{ 297{
280#if defined(CONFIG_KSTAT_IRQS_ONDEMAND)
281 struct irq_desc *desc;
282 unsigned int i;
283
284 for (i = 0; i < cnt; i++) {
285 desc = irq_to_desc(start + i);
286 if (desc && !desc->kstat_irqs) {
287 unsigned int __percpu *stats = alloc_percpu(unsigned int);
288
289 if (!stats)
290 return -1;
291 if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL)
292 free_percpu(stats);
293 }
294 }
295#endif
296 return start; 298 return start;
297} 299}
300
301static int irq_expand_nr_irqs(unsigned int nr)
302{
303 return -ENOMEM;
304}
305
298#endif /* !CONFIG_SPARSE_IRQ */ 306#endif /* !CONFIG_SPARSE_IRQ */
299 307
300/* Dynamic interrupt handling */ 308/* Dynamic interrupt handling */
@@ -338,14 +346,17 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
338 346
339 mutex_lock(&sparse_irq_lock); 347 mutex_lock(&sparse_irq_lock);
340 348
341 start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0); 349 start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
350 from, cnt, 0);
342 ret = -EEXIST; 351 ret = -EEXIST;
343 if (irq >=0 && start != irq) 352 if (irq >=0 && start != irq)
344 goto err; 353 goto err;
345 354
346 ret = -ENOMEM; 355 if (start + cnt > nr_irqs) {
347 if (start >= nr_irqs) 356 ret = irq_expand_nr_irqs(start + cnt);
348 goto err; 357 if (ret)
358 goto err;
359 }
349 360
350 bitmap_set(allocated_irqs, start, cnt); 361 bitmap_set(allocated_irqs, start, cnt);
351 mutex_unlock(&sparse_irq_lock); 362 mutex_unlock(&sparse_irq_lock);
@@ -392,6 +403,26 @@ unsigned int irq_get_next_irq(unsigned int offset)
392 return find_next_bit(allocated_irqs, nr_irqs, offset); 403 return find_next_bit(allocated_irqs, nr_irqs, offset);
393} 404}
394 405
406struct irq_desc *
407__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus)
408{
409 struct irq_desc *desc = irq_to_desc(irq);
410
411 if (desc) {
412 if (bus)
413 chip_bus_lock(desc);
414 raw_spin_lock_irqsave(&desc->lock, *flags);
415 }
416 return desc;
417}
418
419void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus)
420{
421 raw_spin_unlock_irqrestore(&desc->lock, flags);
422 if (bus)
423 chip_bus_sync_unlock(desc);
424}
425
395/** 426/**
396 * dynamic_irq_cleanup - cleanup a dynamically allocated irq 427 * dynamic_irq_cleanup - cleanup a dynamically allocated irq
397 * @irq: irq number to initialize 428 * @irq: irq number to initialize
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0caa59f747dd..acd599a43bfb 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,17 @@
17 17
18#include "internals.h" 18#include "internals.h"
19 19
20#ifdef CONFIG_IRQ_FORCED_THREADING
21__read_mostly bool force_irqthreads;
22
23static int __init setup_forced_irqthreads(char *arg)
24{
25 force_irqthreads = true;
26 return 0;
27}
28early_param("threadirqs", setup_forced_irqthreads);
29#endif
30
20/** 31/**
21 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 32 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
22 * @irq: interrupt number to wait for 33 * @irq: interrupt number to wait for
@@ -30,7 +41,7 @@
30void synchronize_irq(unsigned int irq) 41void synchronize_irq(unsigned int irq)
31{ 42{
32 struct irq_desc *desc = irq_to_desc(irq); 43 struct irq_desc *desc = irq_to_desc(irq);
33 unsigned int status; 44 unsigned int state;
34 45
35 if (!desc) 46 if (!desc)
36 return; 47 return;
@@ -42,16 +53,16 @@ void synchronize_irq(unsigned int irq)
42 * Wait until we're out of the critical section. This might 53 * Wait until we're out of the critical section. This might
43 * give the wrong answer due to the lack of memory barriers. 54 * give the wrong answer due to the lack of memory barriers.
44 */ 55 */
45 while (desc->status & IRQ_INPROGRESS) 56 while (desc->istate & IRQS_INPROGRESS)
46 cpu_relax(); 57 cpu_relax();
47 58
48 /* Ok, that indicated we're done: double-check carefully. */ 59 /* Ok, that indicated we're done: double-check carefully. */
49 raw_spin_lock_irqsave(&desc->lock, flags); 60 raw_spin_lock_irqsave(&desc->lock, flags);
50 status = desc->status; 61 state = desc->istate;
51 raw_spin_unlock_irqrestore(&desc->lock, flags); 62 raw_spin_unlock_irqrestore(&desc->lock, flags);
52 63
53 /* Oops, that failed? */ 64 /* Oops, that failed? */
54 } while (status & IRQ_INPROGRESS); 65 } while (state & IRQS_INPROGRESS);
55 66
56 /* 67 /*
57 * We made sure that no hardirq handler is running. Now verify 68 * We made sure that no hardirq handler is running. Now verify
@@ -73,8 +84,8 @@ int irq_can_set_affinity(unsigned int irq)
73{ 84{
74 struct irq_desc *desc = irq_to_desc(irq); 85 struct irq_desc *desc = irq_to_desc(irq);
75 86
76 if (CHECK_IRQ_PER_CPU(desc->status) || !desc->irq_data.chip || 87 if (!desc || !irqd_can_balance(&desc->irq_data) ||
77 !desc->irq_data.chip->irq_set_affinity) 88 !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity)
78 return 0; 89 return 0;
79 90
80 return 1; 91 return 1;
@@ -100,67 +111,169 @@ void irq_set_thread_affinity(struct irq_desc *desc)
100 } 111 }
101} 112}
102 113
114#ifdef CONFIG_GENERIC_PENDING_IRQ
115static inline bool irq_can_move_pcntxt(struct irq_desc *desc)
116{
117 return irq_settings_can_move_pcntxt(desc);
118}
119static inline bool irq_move_pending(struct irq_desc *desc)
120{
121 return irqd_is_setaffinity_pending(&desc->irq_data);
122}
123static inline void
124irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
125{
126 cpumask_copy(desc->pending_mask, mask);
127}
128static inline void
129irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
130{
131 cpumask_copy(mask, desc->pending_mask);
132}
133#else
134static inline bool irq_can_move_pcntxt(struct irq_desc *desc) { return true; }
135static inline bool irq_move_pending(struct irq_desc *desc) { return false; }
136static inline void
137irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { }
138static inline void
139irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
140#endif
141
103/** 142/**
104 * irq_set_affinity - Set the irq affinity of a given irq 143 * irq_set_affinity - Set the irq affinity of a given irq
105 * @irq: Interrupt to set affinity 144 * @irq: Interrupt to set affinity
106 * @cpumask: cpumask 145 * @cpumask: cpumask
107 * 146 *
108 */ 147 */
109int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) 148int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
110{ 149{
111 struct irq_desc *desc = irq_to_desc(irq); 150 struct irq_desc *desc = irq_to_desc(irq);
112 struct irq_chip *chip = desc->irq_data.chip; 151 struct irq_chip *chip = desc->irq_data.chip;
113 unsigned long flags; 152 unsigned long flags;
153 int ret = 0;
114 154
115 if (!chip->irq_set_affinity) 155 if (!chip->irq_set_affinity)
116 return -EINVAL; 156 return -EINVAL;
117 157
118 raw_spin_lock_irqsave(&desc->lock, flags); 158 raw_spin_lock_irqsave(&desc->lock, flags);
119 159
120#ifdef CONFIG_GENERIC_PENDING_IRQ 160 if (irq_can_move_pcntxt(desc)) {
121 if (desc->status & IRQ_MOVE_PCNTXT) { 161 ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
122 if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) { 162 switch (ret) {
123 cpumask_copy(desc->irq_data.affinity, cpumask); 163 case IRQ_SET_MASK_OK:
164 cpumask_copy(desc->irq_data.affinity, mask);
165 case IRQ_SET_MASK_OK_NOCOPY:
124 irq_set_thread_affinity(desc); 166 irq_set_thread_affinity(desc);
167 ret = 0;
125 } 168 }
169 } else {
170 irqd_set_move_pending(&desc->irq_data);
171 irq_copy_pending(desc, mask);
126 } 172 }
127 else { 173
128 desc->status |= IRQ_MOVE_PENDING; 174 if (desc->affinity_notify) {
129 cpumask_copy(desc->pending_mask, cpumask); 175 kref_get(&desc->affinity_notify->kref);
130 } 176 schedule_work(&desc->affinity_notify->work);
131#else
132 if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) {
133 cpumask_copy(desc->irq_data.affinity, cpumask);
134 irq_set_thread_affinity(desc);
135 } 177 }
136#endif 178 irq_compat_set_affinity(desc);
137 desc->status |= IRQ_AFFINITY_SET; 179 irqd_set(&desc->irq_data, IRQD_AFFINITY_SET);
138 raw_spin_unlock_irqrestore(&desc->lock, flags); 180 raw_spin_unlock_irqrestore(&desc->lock, flags);
139 return 0; 181 return ret;
140} 182}
141 183
142int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) 184int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
143{ 185{
186 unsigned long flags;
187 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
188
189 if (!desc)
190 return -EINVAL;
191 desc->affinity_hint = m;
192 irq_put_desc_unlock(desc, flags);
193 return 0;
194}
195EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
196
197static void irq_affinity_notify(struct work_struct *work)
198{
199 struct irq_affinity_notify *notify =
200 container_of(work, struct irq_affinity_notify, work);
201 struct irq_desc *desc = irq_to_desc(notify->irq);
202 cpumask_var_t cpumask;
203 unsigned long flags;
204
205 if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL))
206 goto out;
207
208 raw_spin_lock_irqsave(&desc->lock, flags);
209 if (irq_move_pending(desc))
210 irq_get_pending(cpumask, desc);
211 else
212 cpumask_copy(cpumask, desc->irq_data.affinity);
213 raw_spin_unlock_irqrestore(&desc->lock, flags);
214
215 notify->notify(notify, cpumask);
216
217 free_cpumask_var(cpumask);
218out:
219 kref_put(&notify->kref, notify->release);
220}
221
222/**
223 * irq_set_affinity_notifier - control notification of IRQ affinity changes
224 * @irq: Interrupt for which to enable/disable notification
225 * @notify: Context for notification, or %NULL to disable
226 * notification. Function pointers must be initialised;
227 * the other fields will be initialised by this function.
228 *
229 * Must be called in process context. Notification may only be enabled
230 * after the IRQ is allocated and must be disabled before the IRQ is
231 * freed using free_irq().
232 */
233int
234irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
235{
144 struct irq_desc *desc = irq_to_desc(irq); 236 struct irq_desc *desc = irq_to_desc(irq);
237 struct irq_affinity_notify *old_notify;
145 unsigned long flags; 238 unsigned long flags;
146 239
240 /* The release function is promised process context */
241 might_sleep();
242
147 if (!desc) 243 if (!desc)
148 return -EINVAL; 244 return -EINVAL;
149 245
246 /* Complete initialisation of *notify */
247 if (notify) {
248 notify->irq = irq;
249 kref_init(&notify->kref);
250 INIT_WORK(&notify->work, irq_affinity_notify);
251 }
252
150 raw_spin_lock_irqsave(&desc->lock, flags); 253 raw_spin_lock_irqsave(&desc->lock, flags);
151 desc->affinity_hint = m; 254 old_notify = desc->affinity_notify;
255 desc->affinity_notify = notify;
152 raw_spin_unlock_irqrestore(&desc->lock, flags); 256 raw_spin_unlock_irqrestore(&desc->lock, flags);
153 257
258 if (old_notify)
259 kref_put(&old_notify->kref, old_notify->release);
260
154 return 0; 261 return 0;
155} 262}
156EXPORT_SYMBOL_GPL(irq_set_affinity_hint); 263EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
157 264
158#ifndef CONFIG_AUTO_IRQ_AFFINITY 265#ifndef CONFIG_AUTO_IRQ_AFFINITY
159/* 266/*
160 * Generic version of the affinity autoselector. 267 * Generic version of the affinity autoselector.
161 */ 268 */
162static int setup_affinity(unsigned int irq, struct irq_desc *desc) 269static int
270setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
163{ 271{
272 struct irq_chip *chip = irq_desc_get_chip(desc);
273 struct cpumask *set = irq_default_affinity;
274 int ret;
275
276 /* Excludes PER_CPU and NO_BALANCE interrupts */
164 if (!irq_can_set_affinity(irq)) 277 if (!irq_can_set_affinity(irq))
165 return 0; 278 return 0;
166 279
@@ -168,22 +281,29 @@ static int setup_affinity(unsigned int irq, struct irq_desc *desc)
168 * Preserve an userspace affinity setup, but make sure that 281 * Preserve an userspace affinity setup, but make sure that
169 * one of the targets is online. 282 * one of the targets is online.
170 */ 283 */
171 if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { 284 if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
172 if (cpumask_any_and(desc->irq_data.affinity, cpu_online_mask) 285 if (cpumask_intersects(desc->irq_data.affinity,
173 < nr_cpu_ids) 286 cpu_online_mask))
174 goto set_affinity; 287 set = desc->irq_data.affinity;
175 else 288 else {
176 desc->status &= ~IRQ_AFFINITY_SET; 289 irq_compat_clr_affinity(desc);
290 irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
291 }
177 } 292 }
178 293
179 cpumask_and(desc->irq_data.affinity, cpu_online_mask, irq_default_affinity); 294 cpumask_and(mask, cpu_online_mask, set);
180set_affinity: 295 ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
181 desc->irq_data.chip->irq_set_affinity(&desc->irq_data, desc->irq_data.affinity, false); 296 switch (ret) {
182 297 case IRQ_SET_MASK_OK:
298 cpumask_copy(desc->irq_data.affinity, mask);
299 case IRQ_SET_MASK_OK_NOCOPY:
300 irq_set_thread_affinity(desc);
301 }
183 return 0; 302 return 0;
184} 303}
185#else 304#else
186static inline int setup_affinity(unsigned int irq, struct irq_desc *d) 305static inline int
306setup_affinity(unsigned int irq, struct irq_desc *d, struct cpumask *mask)
187{ 307{
188 return irq_select_affinity(irq); 308 return irq_select_affinity(irq);
189} 309}
@@ -192,23 +312,21 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *d)
192/* 312/*
193 * Called when affinity is set via /proc/irq 313 * Called when affinity is set via /proc/irq
194 */ 314 */
195int irq_select_affinity_usr(unsigned int irq) 315int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask)
196{ 316{
197 struct irq_desc *desc = irq_to_desc(irq); 317 struct irq_desc *desc = irq_to_desc(irq);
198 unsigned long flags; 318 unsigned long flags;
199 int ret; 319 int ret;
200 320
201 raw_spin_lock_irqsave(&desc->lock, flags); 321 raw_spin_lock_irqsave(&desc->lock, flags);
202 ret = setup_affinity(irq, desc); 322 ret = setup_affinity(irq, desc, mask);
203 if (!ret)
204 irq_set_thread_affinity(desc);
205 raw_spin_unlock_irqrestore(&desc->lock, flags); 323 raw_spin_unlock_irqrestore(&desc->lock, flags);
206
207 return ret; 324 return ret;
208} 325}
209 326
210#else 327#else
211static inline int setup_affinity(unsigned int irq, struct irq_desc *desc) 328static inline int
329setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
212{ 330{
213 return 0; 331 return 0;
214} 332}
@@ -219,13 +337,23 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
219 if (suspend) { 337 if (suspend) {
220 if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)) 338 if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
221 return; 339 return;
222 desc->status |= IRQ_SUSPENDED; 340 desc->istate |= IRQS_SUSPENDED;
223 } 341 }
224 342
225 if (!desc->depth++) { 343 if (!desc->depth++)
226 desc->status |= IRQ_DISABLED; 344 irq_disable(desc);
227 desc->irq_data.chip->irq_disable(&desc->irq_data); 345}
228 } 346
347static int __disable_irq_nosync(unsigned int irq)
348{
349 unsigned long flags;
350 struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
351
352 if (!desc)
353 return -EINVAL;
354 __disable_irq(desc, irq, false);
355 irq_put_desc_busunlock(desc, flags);
356 return 0;
229} 357}
230 358
231/** 359/**
@@ -241,17 +369,7 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
241 */ 369 */
242void disable_irq_nosync(unsigned int irq) 370void disable_irq_nosync(unsigned int irq)
243{ 371{
244 struct irq_desc *desc = irq_to_desc(irq); 372 __disable_irq_nosync(irq);
245 unsigned long flags;
246
247 if (!desc)
248 return;
249
250 chip_bus_lock(desc);
251 raw_spin_lock_irqsave(&desc->lock, flags);
252 __disable_irq(desc, irq, false);
253 raw_spin_unlock_irqrestore(&desc->lock, flags);
254 chip_bus_sync_unlock(desc);
255} 373}
256EXPORT_SYMBOL(disable_irq_nosync); 374EXPORT_SYMBOL(disable_irq_nosync);
257 375
@@ -269,21 +387,24 @@ EXPORT_SYMBOL(disable_irq_nosync);
269 */ 387 */
270void disable_irq(unsigned int irq) 388void disable_irq(unsigned int irq)
271{ 389{
272 struct irq_desc *desc = irq_to_desc(irq); 390 if (!__disable_irq_nosync(irq))
273
274 if (!desc)
275 return;
276
277 disable_irq_nosync(irq);
278 if (desc->action)
279 synchronize_irq(irq); 391 synchronize_irq(irq);
280} 392}
281EXPORT_SYMBOL(disable_irq); 393EXPORT_SYMBOL(disable_irq);
282 394
283void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) 395void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
284{ 396{
285 if (resume) 397 if (resume) {
286 desc->status &= ~IRQ_SUSPENDED; 398 if (!(desc->istate & IRQS_SUSPENDED)) {
399 if (!desc->action)
400 return;
401 if (!(desc->action->flags & IRQF_FORCE_RESUME))
402 return;
403 /* Pretend that it got disabled ! */
404 desc->depth++;
405 }
406 desc->istate &= ~IRQS_SUSPENDED;
407 }
287 408
288 switch (desc->depth) { 409 switch (desc->depth) {
289 case 0: 410 case 0:
@@ -291,12 +412,11 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
291 WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); 412 WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
292 break; 413 break;
293 case 1: { 414 case 1: {
294 unsigned int status = desc->status & ~IRQ_DISABLED; 415 if (desc->istate & IRQS_SUSPENDED)
295
296 if (desc->status & IRQ_SUSPENDED)
297 goto err_out; 416 goto err_out;
298 /* Prevent probing on this irq: */ 417 /* Prevent probing on this irq: */
299 desc->status = status | IRQ_NOPROBE; 418 irq_settings_set_noprobe(desc);
419 irq_enable(desc);
300 check_irq_resend(desc, irq); 420 check_irq_resend(desc, irq);
301 /* fall-through */ 421 /* fall-through */
302 } 422 }
@@ -318,21 +438,18 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
318 */ 438 */
319void enable_irq(unsigned int irq) 439void enable_irq(unsigned int irq)
320{ 440{
321 struct irq_desc *desc = irq_to_desc(irq);
322 unsigned long flags; 441 unsigned long flags;
442 struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
323 443
324 if (!desc) 444 if (!desc)
325 return; 445 return;
446 if (WARN(!desc->irq_data.chip,
447 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
448 goto out;
326 449
327 if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable,
328 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
329 return;
330
331 chip_bus_lock(desc);
332 raw_spin_lock_irqsave(&desc->lock, flags);
333 __enable_irq(desc, irq, false); 450 __enable_irq(desc, irq, false);
334 raw_spin_unlock_irqrestore(&desc->lock, flags); 451out:
335 chip_bus_sync_unlock(desc); 452 irq_put_desc_busunlock(desc, flags);
336} 453}
337EXPORT_SYMBOL(enable_irq); 454EXPORT_SYMBOL(enable_irq);
338 455
@@ -348,7 +465,7 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
348} 465}
349 466
350/** 467/**
351 * set_irq_wake - control irq power management wakeup 468 * irq_set_irq_wake - control irq power management wakeup
352 * @irq: interrupt to control 469 * @irq: interrupt to control
353 * @on: enable/disable power management wakeup 470 * @on: enable/disable power management wakeup
354 * 471 *
@@ -359,23 +476,22 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
359 * Wakeup mode lets this IRQ wake the system from sleep 476 * Wakeup mode lets this IRQ wake the system from sleep
360 * states like "suspend to RAM". 477 * states like "suspend to RAM".
361 */ 478 */
362int set_irq_wake(unsigned int irq, unsigned int on) 479int irq_set_irq_wake(unsigned int irq, unsigned int on)
363{ 480{
364 struct irq_desc *desc = irq_to_desc(irq);
365 unsigned long flags; 481 unsigned long flags;
482 struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
366 int ret = 0; 483 int ret = 0;
367 484
368 /* wakeup-capable irqs can be shared between drivers that 485 /* wakeup-capable irqs can be shared between drivers that
369 * don't need to have the same sleep mode behaviors. 486 * don't need to have the same sleep mode behaviors.
370 */ 487 */
371 raw_spin_lock_irqsave(&desc->lock, flags);
372 if (on) { 488 if (on) {
373 if (desc->wake_depth++ == 0) { 489 if (desc->wake_depth++ == 0) {
374 ret = set_irq_wake_real(irq, on); 490 ret = set_irq_wake_real(irq, on);
375 if (ret) 491 if (ret)
376 desc->wake_depth = 0; 492 desc->wake_depth = 0;
377 else 493 else
378 desc->status |= IRQ_WAKEUP; 494 irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE);
379 } 495 }
380 } else { 496 } else {
381 if (desc->wake_depth == 0) { 497 if (desc->wake_depth == 0) {
@@ -385,14 +501,13 @@ int set_irq_wake(unsigned int irq, unsigned int on)
385 if (ret) 501 if (ret)
386 desc->wake_depth = 1; 502 desc->wake_depth = 1;
387 else 503 else
388 desc->status &= ~IRQ_WAKEUP; 504 irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE);
389 } 505 }
390 } 506 }
391 507 irq_put_desc_busunlock(desc, flags);
392 raw_spin_unlock_irqrestore(&desc->lock, flags);
393 return ret; 508 return ret;
394} 509}
395EXPORT_SYMBOL(set_irq_wake); 510EXPORT_SYMBOL(irq_set_irq_wake);
396 511
397/* 512/*
398 * Internal function that tells the architecture code whether a 513 * Internal function that tells the architecture code whether a
@@ -401,43 +516,27 @@ EXPORT_SYMBOL(set_irq_wake);
401 */ 516 */
402int can_request_irq(unsigned int irq, unsigned long irqflags) 517int can_request_irq(unsigned int irq, unsigned long irqflags)
403{ 518{
404 struct irq_desc *desc = irq_to_desc(irq);
405 struct irqaction *action;
406 unsigned long flags; 519 unsigned long flags;
520 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
521 int canrequest = 0;
407 522
408 if (!desc) 523 if (!desc)
409 return 0; 524 return 0;
410 525
411 if (desc->status & IRQ_NOREQUEST) 526 if (irq_settings_can_request(desc)) {
412 return 0; 527 if (desc->action)
413 528 if (irqflags & desc->action->flags & IRQF_SHARED)
414 raw_spin_lock_irqsave(&desc->lock, flags); 529 canrequest =1;
415 action = desc->action; 530 }
416 if (action) 531 irq_put_desc_unlock(desc, flags);
417 if (irqflags & action->flags & IRQF_SHARED) 532 return canrequest;
418 action = NULL;
419
420 raw_spin_unlock_irqrestore(&desc->lock, flags);
421
422 return !action;
423}
424
425void compat_irq_chip_set_default_handler(struct irq_desc *desc)
426{
427 /*
428 * If the architecture still has not overriden
429 * the flow handler then zap the default. This
430 * should catch incorrect flow-type setting.
431 */
432 if (desc->handle_irq == &handle_bad_irq)
433 desc->handle_irq = NULL;
434} 533}
435 534
436int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, 535int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
437 unsigned long flags) 536 unsigned long flags)
438{ 537{
439 int ret;
440 struct irq_chip *chip = desc->irq_data.chip; 538 struct irq_chip *chip = desc->irq_data.chip;
539 int ret, unmask = 0;
441 540
442 if (!chip || !chip->irq_set_type) { 541 if (!chip || !chip->irq_set_type) {
443 /* 542 /*
@@ -449,23 +548,43 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
449 return 0; 548 return 0;
450 } 549 }
451 550
551 flags &= IRQ_TYPE_SENSE_MASK;
552
553 if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
554 if (!(desc->istate & IRQS_MASKED))
555 mask_irq(desc);
556 if (!(desc->istate & IRQS_DISABLED))
557 unmask = 1;
558 }
559
452 /* caller masked out all except trigger mode flags */ 560 /* caller masked out all except trigger mode flags */
453 ret = chip->irq_set_type(&desc->irq_data, flags); 561 ret = chip->irq_set_type(&desc->irq_data, flags);
454 562
455 if (ret) 563 switch (ret) {
456 pr_err("setting trigger mode %lu for irq %u failed (%pF)\n", 564 case IRQ_SET_MASK_OK:
457 flags, irq, chip->irq_set_type); 565 irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK);
458 else { 566 irqd_set(&desc->irq_data, flags);
459 if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) 567
460 flags |= IRQ_LEVEL; 568 case IRQ_SET_MASK_OK_NOCOPY:
461 /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */ 569 flags = irqd_get_trigger_type(&desc->irq_data);
462 desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); 570 irq_settings_set_trigger_mask(desc, flags);
463 desc->status |= flags; 571 irqd_clear(&desc->irq_data, IRQD_LEVEL);
572 irq_settings_clr_level(desc);
573 if (flags & IRQ_TYPE_LEVEL_MASK) {
574 irq_settings_set_level(desc);
575 irqd_set(&desc->irq_data, IRQD_LEVEL);
576 }
464 577
465 if (chip != desc->irq_data.chip) 578 if (chip != desc->irq_data.chip)
466 irq_chip_set_defaults(desc->irq_data.chip); 579 irq_chip_set_defaults(desc->irq_data.chip);
580 ret = 0;
581 break;
582 default:
583 pr_err("setting trigger mode %lu for irq %u failed (%pF)\n",
584 flags, irq, chip->irq_set_type);
467 } 585 }
468 586 if (unmask)
587 unmask_irq(desc);
469 return ret; 588 return ret;
470} 589}
471 590
@@ -509,8 +628,11 @@ static int irq_wait_for_interrupt(struct irqaction *action)
509 * handler finished. unmask if the interrupt has not been disabled and 628 * handler finished. unmask if the interrupt has not been disabled and
510 * is marked MASKED. 629 * is marked MASKED.
511 */ 630 */
512static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) 631static void irq_finalize_oneshot(struct irq_desc *desc,
632 struct irqaction *action, bool force)
513{ 633{
634 if (!(desc->istate & IRQS_ONESHOT))
635 return;
514again: 636again:
515 chip_bus_lock(desc); 637 chip_bus_lock(desc);
516 raw_spin_lock_irq(&desc->lock); 638 raw_spin_lock_irq(&desc->lock);
@@ -522,26 +644,44 @@ again:
522 * The thread is faster done than the hard interrupt handler 644 * The thread is faster done than the hard interrupt handler
523 * on the other CPU. If we unmask the irq line then the 645 * on the other CPU. If we unmask the irq line then the
524 * interrupt can come in again and masks the line, leaves due 646 * interrupt can come in again and masks the line, leaves due
525 * to IRQ_INPROGRESS and the irq line is masked forever. 647 * to IRQS_INPROGRESS and the irq line is masked forever.
648 *
649 * This also serializes the state of shared oneshot handlers
650 * versus "desc->threads_onehsot |= action->thread_mask;" in
651 * irq_wake_thread(). See the comment there which explains the
652 * serialization.
526 */ 653 */
527 if (unlikely(desc->status & IRQ_INPROGRESS)) { 654 if (unlikely(desc->istate & IRQS_INPROGRESS)) {
528 raw_spin_unlock_irq(&desc->lock); 655 raw_spin_unlock_irq(&desc->lock);
529 chip_bus_sync_unlock(desc); 656 chip_bus_sync_unlock(desc);
530 cpu_relax(); 657 cpu_relax();
531 goto again; 658 goto again;
532 } 659 }
533 660
534 if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { 661 /*
535 desc->status &= ~IRQ_MASKED; 662 * Now check again, whether the thread should run. Otherwise
663 * we would clear the threads_oneshot bit of this thread which
664 * was just set.
665 */
666 if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
667 goto out_unlock;
668
669 desc->threads_oneshot &= ~action->thread_mask;
670
671 if (!desc->threads_oneshot && !(desc->istate & IRQS_DISABLED) &&
672 (desc->istate & IRQS_MASKED)) {
673 irq_compat_clr_masked(desc);
674 desc->istate &= ~IRQS_MASKED;
536 desc->irq_data.chip->irq_unmask(&desc->irq_data); 675 desc->irq_data.chip->irq_unmask(&desc->irq_data);
537 } 676 }
677out_unlock:
538 raw_spin_unlock_irq(&desc->lock); 678 raw_spin_unlock_irq(&desc->lock);
539 chip_bus_sync_unlock(desc); 679 chip_bus_sync_unlock(desc);
540} 680}
541 681
542#ifdef CONFIG_SMP 682#ifdef CONFIG_SMP
543/* 683/*
544 * Check whether we need to change the affinity of the interrupt thread. 684 * Check whether we need to chasnge the affinity of the interrupt thread.
545 */ 685 */
546static void 686static void
547irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) 687irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
@@ -573,6 +713,32 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
573#endif 713#endif
574 714
575/* 715/*
716 * Interrupts which are not explicitely requested as threaded
717 * interrupts rely on the implicit bh/preempt disable of the hard irq
718 * context. So we need to disable bh here to avoid deadlocks and other
719 * side effects.
720 */
721static void
722irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
723{
724 local_bh_disable();
725 action->thread_fn(action->irq, action->dev_id);
726 irq_finalize_oneshot(desc, action, false);
727 local_bh_enable();
728}
729
730/*
731 * Interrupts explicitely requested as threaded interupts want to be
732 * preemtible - many of them need to sleep and wait for slow busses to
733 * complete.
734 */
735static void irq_thread_fn(struct irq_desc *desc, struct irqaction *action)
736{
737 action->thread_fn(action->irq, action->dev_id);
738 irq_finalize_oneshot(desc, action, false);
739}
740
741/*
576 * Interrupt handler thread 742 * Interrupt handler thread
577 */ 743 */
578static int irq_thread(void *data) 744static int irq_thread(void *data)
@@ -582,7 +748,14 @@ static int irq_thread(void *data)
582 }; 748 };
583 struct irqaction *action = data; 749 struct irqaction *action = data;
584 struct irq_desc *desc = irq_to_desc(action->irq); 750 struct irq_desc *desc = irq_to_desc(action->irq);
585 int wake, oneshot = desc->status & IRQ_ONESHOT; 751 void (*handler_fn)(struct irq_desc *desc, struct irqaction *action);
752 int wake;
753
754 if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD,
755 &action->thread_flags))
756 handler_fn = irq_forced_thread_fn;
757 else
758 handler_fn = irq_thread_fn;
586 759
587 sched_setscheduler(current, SCHED_FIFO, &param); 760 sched_setscheduler(current, SCHED_FIFO, &param);
588 current->irqaction = action; 761 current->irqaction = action;
@@ -594,23 +767,20 @@ static int irq_thread(void *data)
594 atomic_inc(&desc->threads_active); 767 atomic_inc(&desc->threads_active);
595 768
596 raw_spin_lock_irq(&desc->lock); 769 raw_spin_lock_irq(&desc->lock);
597 if (unlikely(desc->status & IRQ_DISABLED)) { 770 if (unlikely(desc->istate & IRQS_DISABLED)) {
598 /* 771 /*
599 * CHECKME: We might need a dedicated 772 * CHECKME: We might need a dedicated
600 * IRQ_THREAD_PENDING flag here, which 773 * IRQ_THREAD_PENDING flag here, which
601 * retriggers the thread in check_irq_resend() 774 * retriggers the thread in check_irq_resend()
602 * but AFAICT IRQ_PENDING should be fine as it 775 * but AFAICT IRQS_PENDING should be fine as it
603 * retriggers the interrupt itself --- tglx 776 * retriggers the interrupt itself --- tglx
604 */ 777 */
605 desc->status |= IRQ_PENDING; 778 irq_compat_set_pending(desc);
779 desc->istate |= IRQS_PENDING;
606 raw_spin_unlock_irq(&desc->lock); 780 raw_spin_unlock_irq(&desc->lock);
607 } else { 781 } else {
608 raw_spin_unlock_irq(&desc->lock); 782 raw_spin_unlock_irq(&desc->lock);
609 783 handler_fn(desc, action);
610 action->thread_fn(action->irq, action->dev_id);
611
612 if (oneshot)
613 irq_finalize_oneshot(action->irq, desc);
614 } 784 }
615 785
616 wake = atomic_dec_and_test(&desc->threads_active); 786 wake = atomic_dec_and_test(&desc->threads_active);
@@ -619,6 +789,9 @@ static int irq_thread(void *data)
619 wake_up(&desc->wait_for_threads); 789 wake_up(&desc->wait_for_threads);
620 } 790 }
621 791
792 /* Prevent a stale desc->threads_oneshot */
793 irq_finalize_oneshot(desc, action, true);
794
622 /* 795 /*
623 * Clear irqaction. Otherwise exit_irq_thread() would make 796 * Clear irqaction. Otherwise exit_irq_thread() would make
624 * fuzz about an active irq thread going into nirvana. 797 * fuzz about an active irq thread going into nirvana.
@@ -633,6 +806,7 @@ static int irq_thread(void *data)
633void exit_irq_thread(void) 806void exit_irq_thread(void)
634{ 807{
635 struct task_struct *tsk = current; 808 struct task_struct *tsk = current;
809 struct irq_desc *desc;
636 810
637 if (!tsk->irqaction) 811 if (!tsk->irqaction)
638 return; 812 return;
@@ -641,6 +815,14 @@ void exit_irq_thread(void)
641 "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", 815 "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
642 tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq); 816 tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
643 817
818 desc = irq_to_desc(tsk->irqaction->irq);
819
820 /*
821 * Prevent a stale desc->threads_oneshot. Must be called
822 * before setting the IRQTF_DIED flag.
823 */
824 irq_finalize_oneshot(desc, tsk->irqaction, true);
825
644 /* 826 /*
645 * Set the THREAD DIED flag to prevent further wakeups of the 827 * Set the THREAD DIED flag to prevent further wakeups of the
646 * soon to be gone threaded handler. 828 * soon to be gone threaded handler.
@@ -648,6 +830,22 @@ void exit_irq_thread(void)
648 set_bit(IRQTF_DIED, &tsk->irqaction->flags); 830 set_bit(IRQTF_DIED, &tsk->irqaction->flags);
649} 831}
650 832
833static void irq_setup_forced_threading(struct irqaction *new)
834{
835 if (!force_irqthreads)
836 return;
837 if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
838 return;
839
840 new->flags |= IRQF_ONESHOT;
841
842 if (!new->thread_fn) {
843 set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
844 new->thread_fn = new->handler;
845 new->handler = irq_default_primary_handler;
846 }
847}
848
651/* 849/*
652 * Internal function to register an irqaction - typically used to 850 * Internal function to register an irqaction - typically used to
653 * allocate special interrupts that are part of the architecture. 851 * allocate special interrupts that are part of the architecture.
@@ -657,9 +855,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
657{ 855{
658 struct irqaction *old, **old_ptr; 856 struct irqaction *old, **old_ptr;
659 const char *old_name = NULL; 857 const char *old_name = NULL;
660 unsigned long flags; 858 unsigned long flags, thread_mask = 0;
661 int nested, shared = 0; 859 int ret, nested, shared = 0;
662 int ret; 860 cpumask_var_t mask;
663 861
664 if (!desc) 862 if (!desc)
665 return -EINVAL; 863 return -EINVAL;
@@ -683,15 +881,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
683 rand_initialize_irq(irq); 881 rand_initialize_irq(irq);
684 } 882 }
685 883
686 /* Oneshot interrupts are not allowed with shared */
687 if ((new->flags & IRQF_ONESHOT) && (new->flags & IRQF_SHARED))
688 return -EINVAL;
689
690 /* 884 /*
691 * Check whether the interrupt nests into another interrupt 885 * Check whether the interrupt nests into another interrupt
692 * thread. 886 * thread.
693 */ 887 */
694 nested = desc->status & IRQ_NESTED_THREAD; 888 nested = irq_settings_is_nested_thread(desc);
695 if (nested) { 889 if (nested) {
696 if (!new->thread_fn) 890 if (!new->thread_fn)
697 return -EINVAL; 891 return -EINVAL;
@@ -701,6 +895,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
701 * dummy function which warns when called. 895 * dummy function which warns when called.
702 */ 896 */
703 new->handler = irq_nested_primary_handler; 897 new->handler = irq_nested_primary_handler;
898 } else {
899 irq_setup_forced_threading(new);
704 } 900 }
705 901
706 /* 902 /*
@@ -724,6 +920,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
724 new->thread = t; 920 new->thread = t;
725 } 921 }
726 922
923 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
924 ret = -ENOMEM;
925 goto out_thread;
926 }
927
727 /* 928 /*
728 * The following block of code has to be executed atomically 929 * The following block of code has to be executed atomically
729 */ 930 */
@@ -735,29 +936,40 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
735 * Can't share interrupts unless both agree to and are 936 * Can't share interrupts unless both agree to and are
736 * the same type (level, edge, polarity). So both flag 937 * the same type (level, edge, polarity). So both flag
737 * fields must have IRQF_SHARED set and the bits which 938 * fields must have IRQF_SHARED set and the bits which
738 * set the trigger type must match. 939 * set the trigger type must match. Also all must
940 * agree on ONESHOT.
739 */ 941 */
740 if (!((old->flags & new->flags) & IRQF_SHARED) || 942 if (!((old->flags & new->flags) & IRQF_SHARED) ||
741 ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK)) { 943 ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) ||
944 ((old->flags ^ new->flags) & IRQF_ONESHOT)) {
742 old_name = old->name; 945 old_name = old->name;
743 goto mismatch; 946 goto mismatch;
744 } 947 }
745 948
746#if defined(CONFIG_IRQ_PER_CPU)
747 /* All handlers must agree on per-cpuness */ 949 /* All handlers must agree on per-cpuness */
748 if ((old->flags & IRQF_PERCPU) != 950 if ((old->flags & IRQF_PERCPU) !=
749 (new->flags & IRQF_PERCPU)) 951 (new->flags & IRQF_PERCPU))
750 goto mismatch; 952 goto mismatch;
751#endif
752 953
753 /* add new interrupt at end of irq queue */ 954 /* add new interrupt at end of irq queue */
754 do { 955 do {
956 thread_mask |= old->thread_mask;
755 old_ptr = &old->next; 957 old_ptr = &old->next;
756 old = *old_ptr; 958 old = *old_ptr;
757 } while (old); 959 } while (old);
758 shared = 1; 960 shared = 1;
759 } 961 }
760 962
963 /*
964 * Setup the thread mask for this irqaction. Unlikely to have
965 * 32 resp 64 irqs sharing one line, but who knows.
966 */
967 if (new->flags & IRQF_ONESHOT && thread_mask == ~0UL) {
968 ret = -EBUSY;
969 goto out_mask;
970 }
971 new->thread_mask = 1 << ffz(thread_mask);
972
761 if (!shared) { 973 if (!shared) {
762 irq_chip_set_defaults(desc->irq_data.chip); 974 irq_chip_set_defaults(desc->irq_data.chip);
763 975
@@ -769,42 +981,44 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
769 new->flags & IRQF_TRIGGER_MASK); 981 new->flags & IRQF_TRIGGER_MASK);
770 982
771 if (ret) 983 if (ret)
772 goto out_thread; 984 goto out_mask;
773 } else 985 }
774 compat_irq_chip_set_default_handler(desc);
775#if defined(CONFIG_IRQ_PER_CPU)
776 if (new->flags & IRQF_PERCPU)
777 desc->status |= IRQ_PER_CPU;
778#endif
779 986
780 desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | IRQ_ONESHOT | 987 desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
781 IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED); 988 IRQS_INPROGRESS | IRQS_ONESHOT | \
989 IRQS_WAITING);
990
991 if (new->flags & IRQF_PERCPU) {
992 irqd_set(&desc->irq_data, IRQD_PER_CPU);
993 irq_settings_set_per_cpu(desc);
994 }
782 995
783 if (new->flags & IRQF_ONESHOT) 996 if (new->flags & IRQF_ONESHOT)
784 desc->status |= IRQ_ONESHOT; 997 desc->istate |= IRQS_ONESHOT;
785 998
786 if (!(desc->status & IRQ_NOAUTOEN)) { 999 if (irq_settings_can_autoenable(desc))
787 desc->depth = 0; 1000 irq_startup(desc);
788 desc->status &= ~IRQ_DISABLED; 1001 else
789 desc->irq_data.chip->irq_startup(&desc->irq_data);
790 } else
791 /* Undo nested disables: */ 1002 /* Undo nested disables: */
792 desc->depth = 1; 1003 desc->depth = 1;
793 1004
794 /* Exclude IRQ from balancing if requested */ 1005 /* Exclude IRQ from balancing if requested */
795 if (new->flags & IRQF_NOBALANCING) 1006 if (new->flags & IRQF_NOBALANCING) {
796 desc->status |= IRQ_NO_BALANCING; 1007 irq_settings_set_no_balancing(desc);
1008 irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
1009 }
797 1010
798 /* Set default affinity mask once everything is setup */ 1011 /* Set default affinity mask once everything is setup */
799 setup_affinity(irq, desc); 1012 setup_affinity(irq, desc, mask);
800 1013
801 } else if ((new->flags & IRQF_TRIGGER_MASK) 1014 } else if (new->flags & IRQF_TRIGGER_MASK) {
802 && (new->flags & IRQF_TRIGGER_MASK) 1015 unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;
803 != (desc->status & IRQ_TYPE_SENSE_MASK)) { 1016 unsigned int omsk = irq_settings_get_trigger_mask(desc);
804 /* hope the handler works with the actual trigger mode... */ 1017
805 pr_warning("IRQ %d uses trigger mode %d; requested %d\n", 1018 if (nmsk != omsk)
806 irq, (int)(desc->status & IRQ_TYPE_SENSE_MASK), 1019 /* hope the handler works with current trigger mode */
807 (int)(new->flags & IRQF_TRIGGER_MASK)); 1020 pr_warning("IRQ %d uses trigger mode %u; requested %u\n",
1021 irq, nmsk, omsk);
808 } 1022 }
809 1023
810 new->irq = irq; 1024 new->irq = irq;
@@ -818,8 +1032,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
818 * Check whether we disabled the irq via the spurious handler 1032 * Check whether we disabled the irq via the spurious handler
819 * before. Reenable it and give it another chance. 1033 * before. Reenable it and give it another chance.
820 */ 1034 */
821 if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) { 1035 if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
822 desc->status &= ~IRQ_SPURIOUS_DISABLED; 1036 desc->istate &= ~IRQS_SPURIOUS_DISABLED;
823 __enable_irq(desc, irq, false); 1037 __enable_irq(desc, irq, false);
824 } 1038 }
825 1039
@@ -849,6 +1063,9 @@ mismatch:
849#endif 1063#endif
850 ret = -EBUSY; 1064 ret = -EBUSY;
851 1065
1066out_mask:
1067 free_cpumask_var(mask);
1068
852out_thread: 1069out_thread:
853 raw_spin_unlock_irqrestore(&desc->lock, flags); 1070 raw_spin_unlock_irqrestore(&desc->lock, flags);
854 if (new->thread) { 1071 if (new->thread) {
@@ -871,9 +1088,14 @@ out_thread:
871 */ 1088 */
872int setup_irq(unsigned int irq, struct irqaction *act) 1089int setup_irq(unsigned int irq, struct irqaction *act)
873{ 1090{
1091 int retval;
874 struct irq_desc *desc = irq_to_desc(irq); 1092 struct irq_desc *desc = irq_to_desc(irq);
875 1093
876 return __setup_irq(irq, desc, act); 1094 chip_bus_lock(desc);
1095 retval = __setup_irq(irq, desc, act);
1096 chip_bus_sync_unlock(desc);
1097
1098 return retval;
877} 1099}
878EXPORT_SYMBOL_GPL(setup_irq); 1100EXPORT_SYMBOL_GPL(setup_irq);
879 1101
@@ -924,13 +1146,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
924#endif 1146#endif
925 1147
926 /* If this was the last handler, shut down the IRQ line: */ 1148 /* If this was the last handler, shut down the IRQ line: */
927 if (!desc->action) { 1149 if (!desc->action)
928 desc->status |= IRQ_DISABLED; 1150 irq_shutdown(desc);
929 if (desc->irq_data.chip->irq_shutdown)
930 desc->irq_data.chip->irq_shutdown(&desc->irq_data);
931 else
932 desc->irq_data.chip->irq_disable(&desc->irq_data);
933 }
934 1151
935#ifdef CONFIG_SMP 1152#ifdef CONFIG_SMP
936 /* make sure affinity_hint is cleaned up */ 1153 /* make sure affinity_hint is cleaned up */
@@ -1004,6 +1221,11 @@ void free_irq(unsigned int irq, void *dev_id)
1004 if (!desc) 1221 if (!desc)
1005 return; 1222 return;
1006 1223
1224#ifdef CONFIG_SMP
1225 if (WARN_ON(desc->affinity_notify))
1226 desc->affinity_notify = NULL;
1227#endif
1228
1007 chip_bus_lock(desc); 1229 chip_bus_lock(desc);
1008 kfree(__free_irq(irq, dev_id)); 1230 kfree(__free_irq(irq, dev_id));
1009 chip_bus_sync_unlock(desc); 1231 chip_bus_sync_unlock(desc);
@@ -1074,7 +1296,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
1074 if (!desc) 1296 if (!desc)
1075 return -EINVAL; 1297 return -EINVAL;
1076 1298
1077 if (desc->status & IRQ_NOREQUEST) 1299 if (!irq_settings_can_request(desc))
1078 return -EINVAL; 1300 return -EINVAL;
1079 1301
1080 if (!handler) { 1302 if (!handler) {
@@ -1100,7 +1322,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
1100 if (retval) 1322 if (retval)
1101 kfree(action); 1323 kfree(action);
1102 1324
1103#ifdef CONFIG_DEBUG_SHIRQ 1325#ifdef CONFIG_DEBUG_SHIRQ_FIXME
1104 if (!retval && (irqflags & IRQF_SHARED)) { 1326 if (!retval && (irqflags & IRQF_SHARED)) {
1105 /* 1327 /*
1106 * It's a shared IRQ -- the driver ought to be prepared for it 1328 * It's a shared IRQ -- the driver ought to be prepared for it
@@ -1149,7 +1371,7 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler,
1149 if (!desc) 1371 if (!desc)
1150 return -EINVAL; 1372 return -EINVAL;
1151 1373
1152 if (desc->status & IRQ_NESTED_THREAD) { 1374 if (irq_settings_is_nested_thread(desc)) {
1153 ret = request_threaded_irq(irq, NULL, handler, 1375 ret = request_threaded_irq(irq, NULL, handler,
1154 flags, name, dev_id); 1376 flags, name, dev_id);
1155 return !ret ? IRQC_IS_NESTED : ret; 1377 return !ret ? IRQC_IS_NESTED : ret;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 441fd629ff04..ec4806d4778b 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,23 +4,23 @@
4 4
5#include "internals.h" 5#include "internals.h"
6 6
7void move_masked_irq(int irq) 7void irq_move_masked_irq(struct irq_data *idata)
8{ 8{
9 struct irq_desc *desc = irq_to_desc(irq); 9 struct irq_desc *desc = irq_data_to_desc(idata);
10 struct irq_chip *chip = desc->irq_data.chip; 10 struct irq_chip *chip = idata->chip;
11 11
12 if (likely(!(desc->status & IRQ_MOVE_PENDING))) 12 if (likely(!irqd_is_setaffinity_pending(&desc->irq_data)))
13 return; 13 return;
14 14
15 /* 15 /*
16 * Paranoia: cpu-local interrupts shouldn't be calling in here anyway. 16 * Paranoia: cpu-local interrupts shouldn't be calling in here anyway.
17 */ 17 */
18 if (CHECK_IRQ_PER_CPU(desc->status)) { 18 if (!irqd_can_balance(&desc->irq_data)) {
19 WARN_ON(1); 19 WARN_ON(1);
20 return; 20 return;
21 } 21 }
22 22
23 desc->status &= ~IRQ_MOVE_PENDING; 23 irqd_clr_move_pending(&desc->irq_data);
24 24
25 if (unlikely(cpumask_empty(desc->pending_mask))) 25 if (unlikely(cpumask_empty(desc->pending_mask)))
26 return; 26 return;
@@ -53,15 +53,20 @@ void move_masked_irq(int irq)
53 cpumask_clear(desc->pending_mask); 53 cpumask_clear(desc->pending_mask);
54} 54}
55 55
56void move_native_irq(int irq) 56void move_masked_irq(int irq)
57{
58 irq_move_masked_irq(irq_get_irq_data(irq));
59}
60
61void irq_move_irq(struct irq_data *idata)
57{ 62{
58 struct irq_desc *desc = irq_to_desc(irq); 63 struct irq_desc *desc = irq_data_to_desc(idata);
59 bool masked; 64 bool masked;
60 65
61 if (likely(!(desc->status & IRQ_MOVE_PENDING))) 66 if (likely(!irqd_is_setaffinity_pending(idata)))
62 return; 67 return;
63 68
64 if (unlikely(desc->status & IRQ_DISABLED)) 69 if (unlikely(desc->istate & IRQS_DISABLED))
65 return; 70 return;
66 71
67 /* 72 /*
@@ -69,10 +74,15 @@ void move_native_irq(int irq)
69 * threaded interrupt with ONESHOT set, we can end up with an 74 * threaded interrupt with ONESHOT set, we can end up with an
70 * interrupt storm. 75 * interrupt storm.
71 */ 76 */
72 masked = desc->status & IRQ_MASKED; 77 masked = desc->istate & IRQS_MASKED;
73 if (!masked) 78 if (!masked)
74 desc->irq_data.chip->irq_mask(&desc->irq_data); 79 idata->chip->irq_mask(idata);
75 move_masked_irq(irq); 80 irq_move_masked_irq(idata);
76 if (!masked) 81 if (!masked)
77 desc->irq_data.chip->irq_unmask(&desc->irq_data); 82 idata->chip->irq_unmask(idata);
83}
84
85void move_native_irq(int irq)
86{
87 irq_move_irq(irq_get_irq_data(irq));
78} 88}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 0d4005d85b03..f76fc00c9877 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -18,7 +18,7 @@
18 * During system-wide suspend or hibernation device drivers need to be prevented 18 * During system-wide suspend or hibernation device drivers need to be prevented
19 * from receiving interrupts and this function is provided for this purpose. 19 * from receiving interrupts and this function is provided for this purpose.
20 * It marks all interrupt lines in use, except for the timer ones, as disabled 20 * It marks all interrupt lines in use, except for the timer ones, as disabled
21 * and sets the IRQ_SUSPENDED flag for each of them. 21 * and sets the IRQS_SUSPENDED flag for each of them.
22 */ 22 */
23void suspend_device_irqs(void) 23void suspend_device_irqs(void)
24{ 24{
@@ -34,7 +34,7 @@ void suspend_device_irqs(void)
34 } 34 }
35 35
36 for_each_irq_desc(irq, desc) 36 for_each_irq_desc(irq, desc)
37 if (desc->status & IRQ_SUSPENDED) 37 if (desc->istate & IRQS_SUSPENDED)
38 synchronize_irq(irq); 38 synchronize_irq(irq);
39} 39}
40EXPORT_SYMBOL_GPL(suspend_device_irqs); 40EXPORT_SYMBOL_GPL(suspend_device_irqs);
@@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(suspend_device_irqs);
43 * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() 43 * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs()
44 * 44 *
45 * Enable all interrupt lines previously disabled by suspend_device_irqs() that 45 * Enable all interrupt lines previously disabled by suspend_device_irqs() that
46 * have the IRQ_SUSPENDED flag set. 46 * have the IRQS_SUSPENDED flag set.
47 */ 47 */
48void resume_device_irqs(void) 48void resume_device_irqs(void)
49{ 49{
@@ -53,9 +53,6 @@ void resume_device_irqs(void)
53 for_each_irq_desc(irq, desc) { 53 for_each_irq_desc(irq, desc) {
54 unsigned long flags; 54 unsigned long flags;
55 55
56 if (!(desc->status & IRQ_SUSPENDED))
57 continue;
58
59 raw_spin_lock_irqsave(&desc->lock, flags); 56 raw_spin_lock_irqsave(&desc->lock, flags);
60 __enable_irq(desc, irq, true); 57 __enable_irq(desc, irq, true);
61 raw_spin_unlock_irqrestore(&desc->lock, flags); 58 raw_spin_unlock_irqrestore(&desc->lock, flags);
@@ -71,9 +68,24 @@ int check_wakeup_irqs(void)
71 struct irq_desc *desc; 68 struct irq_desc *desc;
72 int irq; 69 int irq;
73 70
74 for_each_irq_desc(irq, desc) 71 for_each_irq_desc(irq, desc) {
75 if ((desc->status & IRQ_WAKEUP) && (desc->status & IRQ_PENDING)) 72 if (irqd_is_wakeup_set(&desc->irq_data)) {
76 return -EBUSY; 73 if (desc->istate & IRQS_PENDING)
74 return -EBUSY;
75 continue;
76 }
77 /*
78 * Check the non wakeup interrupts whether they need
79 * to be masked before finally going into suspend
80 * state. That's for hardware which has no wakeup
81 * source configuration facility. The chip
82 * implementation indicates that with
83 * IRQCHIP_MASK_ON_SUSPEND.
84 */
85 if (desc->istate & IRQS_SUSPENDED &&
86 irq_desc_get_chip(desc)->flags & IRQCHIP_MASK_ON_SUSPEND)
87 mask_irq(desc);
88 }
77 89
78 return 0; 90 return 0;
79} 91}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6c8a2a9f8a7b..4cc2e5ed0bec 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -11,6 +11,7 @@
11#include <linux/proc_fs.h> 11#include <linux/proc_fs.h>
12#include <linux/seq_file.h> 12#include <linux/seq_file.h>
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h>
14 15
15#include "internals.h" 16#include "internals.h"
16 17
@@ -24,7 +25,7 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
24 const struct cpumask *mask = desc->irq_data.affinity; 25 const struct cpumask *mask = desc->irq_data.affinity;
25 26
26#ifdef CONFIG_GENERIC_PENDING_IRQ 27#ifdef CONFIG_GENERIC_PENDING_IRQ
27 if (desc->status & IRQ_MOVE_PENDING) 28 if (irqd_is_setaffinity_pending(&desc->irq_data))
28 mask = desc->pending_mask; 29 mask = desc->pending_mask;
29#endif 30#endif
30 seq_cpumask(m, mask); 31 seq_cpumask(m, mask);
@@ -65,8 +66,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
65 cpumask_var_t new_value; 66 cpumask_var_t new_value;
66 int err; 67 int err;
67 68
68 if (!irq_to_desc(irq)->irq_data.chip->irq_set_affinity || no_irq_affinity || 69 if (!irq_can_set_affinity(irq) || no_irq_affinity)
69 irq_balancing_disabled(irq))
70 return -EIO; 70 return -EIO;
71 71
72 if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) 72 if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
@@ -89,7 +89,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
89 if (!cpumask_intersects(new_value, cpu_online_mask)) { 89 if (!cpumask_intersects(new_value, cpu_online_mask)) {
90 /* Special case for empty set - allow the architecture 90 /* Special case for empty set - allow the architecture
91 code to set default SMP affinity. */ 91 code to set default SMP affinity. */
92 err = irq_select_affinity_usr(irq) ? -EINVAL : count; 92 err = irq_select_affinity_usr(irq, new_value) ? -EINVAL : count;
93 } else { 93 } else {
94 irq_set_affinity(irq, new_value); 94 irq_set_affinity(irq, new_value);
95 err = count; 95 err = count;
@@ -357,3 +357,65 @@ void init_irq_proc(void)
357 } 357 }
358} 358}
359 359
360#ifdef CONFIG_GENERIC_IRQ_SHOW
361
362int __weak arch_show_interrupts(struct seq_file *p, int prec)
363{
364 return 0;
365}
366
367int show_interrupts(struct seq_file *p, void *v)
368{
369 static int prec;
370
371 unsigned long flags, any_count = 0;
372 int i = *(loff_t *) v, j;
373 struct irqaction *action;
374 struct irq_desc *desc;
375
376 if (i > nr_irqs)
377 return 0;
378
379 if (i == nr_irqs)
380 return arch_show_interrupts(p, prec);
381
382 /* print header and calculate the width of the first column */
383 if (i == 0) {
384 for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
385 j *= 10;
386
387 seq_printf(p, "%*s", prec + 8, "");
388 for_each_online_cpu(j)
389 seq_printf(p, "CPU%-8d", j);
390 seq_putc(p, '\n');
391 }
392
393 desc = irq_to_desc(i);
394 if (!desc)
395 return 0;
396
397 raw_spin_lock_irqsave(&desc->lock, flags);
398 for_each_online_cpu(j)
399 any_count |= kstat_irqs_cpu(i, j);
400 action = desc->action;
401 if (!action && !any_count)
402 goto out;
403
404 seq_printf(p, "%*d: ", prec, i);
405 for_each_online_cpu(j)
406 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
407 seq_printf(p, " %8s", desc->irq_data.chip->name);
408 seq_printf(p, "-%-8s", desc->name);
409
410 if (action) {
411 seq_printf(p, " %s", action->name);
412 while ((action = action->next) != NULL)
413 seq_printf(p, ", %s", action->name);
414 }
415
416 seq_putc(p, '\n');
417out:
418 raw_spin_unlock_irqrestore(&desc->lock, flags);
419 return 0;
420}
421#endif
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 891115a929aa..ad683a99b1ec 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -23,7 +23,7 @@
23#ifdef CONFIG_HARDIRQS_SW_RESEND 23#ifdef CONFIG_HARDIRQS_SW_RESEND
24 24
25/* Bitmap to handle software resend of interrupts: */ 25/* Bitmap to handle software resend of interrupts: */
26static DECLARE_BITMAP(irqs_resend, NR_IRQS); 26static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
27 27
28/* 28/*
29 * Run software resends of IRQ's 29 * Run software resends of IRQ's
@@ -55,20 +55,19 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0);
55 */ 55 */
56void check_irq_resend(struct irq_desc *desc, unsigned int irq) 56void check_irq_resend(struct irq_desc *desc, unsigned int irq)
57{ 57{
58 unsigned int status = desc->status;
59
60 /*
61 * Make sure the interrupt is enabled, before resending it:
62 */
63 desc->irq_data.chip->irq_enable(&desc->irq_data);
64
65 /* 58 /*
66 * We do not resend level type interrupts. Level type 59 * We do not resend level type interrupts. Level type
67 * interrupts are resent by hardware when they are still 60 * interrupts are resent by hardware when they are still
68 * active. 61 * active.
69 */ 62 */
70 if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { 63 if (irq_settings_is_level(desc))
71 desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY; 64 return;
65 if (desc->istate & IRQS_REPLAY)
66 return;
67 if (desc->istate & IRQS_PENDING) {
68 irq_compat_clr_pending(desc);
69 desc->istate &= ~IRQS_PENDING;
70 desc->istate |= IRQS_REPLAY;
72 71
73 if (!desc->irq_data.chip->irq_retrigger || 72 if (!desc->irq_data.chip->irq_retrigger ||
74 !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) { 73 !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) {
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
new file mode 100644
index 000000000000..0227ad358272
--- /dev/null
+++ b/kernel/irq/settings.h
@@ -0,0 +1,138 @@
1/*
2 * Internal header to deal with irq_desc->status which will be renamed
3 * to irq_desc->settings.
4 */
5enum {
6 _IRQ_DEFAULT_INIT_FLAGS = IRQ_DEFAULT_INIT_FLAGS,
7 _IRQ_PER_CPU = IRQ_PER_CPU,
8 _IRQ_LEVEL = IRQ_LEVEL,
9 _IRQ_NOPROBE = IRQ_NOPROBE,
10 _IRQ_NOREQUEST = IRQ_NOREQUEST,
11 _IRQ_NOAUTOEN = IRQ_NOAUTOEN,
12 _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT,
13 _IRQ_NO_BALANCING = IRQ_NO_BALANCING,
14 _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
15 _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
16};
17
18#define IRQ_INPROGRESS GOT_YOU_MORON
19#define IRQ_REPLAY GOT_YOU_MORON
20#define IRQ_WAITING GOT_YOU_MORON
21#define IRQ_DISABLED GOT_YOU_MORON
22#define IRQ_PENDING GOT_YOU_MORON
23#define IRQ_MASKED GOT_YOU_MORON
24#define IRQ_WAKEUP GOT_YOU_MORON
25#define IRQ_MOVE_PENDING GOT_YOU_MORON
26#define IRQ_PER_CPU GOT_YOU_MORON
27#define IRQ_NO_BALANCING GOT_YOU_MORON
28#define IRQ_AFFINITY_SET GOT_YOU_MORON
29#define IRQ_LEVEL GOT_YOU_MORON
30#define IRQ_NOPROBE GOT_YOU_MORON
31#define IRQ_NOREQUEST GOT_YOU_MORON
32#define IRQ_NOAUTOEN GOT_YOU_MORON
33#define IRQ_NESTED_THREAD GOT_YOU_MORON
34#undef IRQF_MODIFY_MASK
35#define IRQF_MODIFY_MASK GOT_YOU_MORON
36
37static inline void
38irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
39{
40 desc->status &= ~(clr & _IRQF_MODIFY_MASK);
41 desc->status |= (set & _IRQF_MODIFY_MASK);
42}
43
44static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
45{
46 return desc->status & _IRQ_PER_CPU;
47}
48
49static inline void irq_settings_set_per_cpu(struct irq_desc *desc)
50{
51 desc->status |= _IRQ_PER_CPU;
52}
53
54static inline void irq_settings_set_no_balancing(struct irq_desc *desc)
55{
56 desc->status |= _IRQ_NO_BALANCING;
57}
58
59static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc)
60{
61 return desc->status & _IRQ_NO_BALANCING;
62}
63
64static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc)
65{
66 return desc->status & IRQ_TYPE_SENSE_MASK;
67}
68
69static inline void
70irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask)
71{
72 desc->status &= ~IRQ_TYPE_SENSE_MASK;
73 desc->status |= mask & IRQ_TYPE_SENSE_MASK;
74}
75
76static inline bool irq_settings_is_level(struct irq_desc *desc)
77{
78 return desc->status & _IRQ_LEVEL;
79}
80
81static inline void irq_settings_clr_level(struct irq_desc *desc)
82{
83 desc->status &= ~_IRQ_LEVEL;
84}
85
86static inline void irq_settings_set_level(struct irq_desc *desc)
87{
88 desc->status |= _IRQ_LEVEL;
89}
90
91static inline bool irq_settings_can_request(struct irq_desc *desc)
92{
93 return !(desc->status & _IRQ_NOREQUEST);
94}
95
96static inline void irq_settings_clr_norequest(struct irq_desc *desc)
97{
98 desc->status &= ~_IRQ_NOREQUEST;
99}
100
101static inline void irq_settings_set_norequest(struct irq_desc *desc)
102{
103 desc->status |= _IRQ_NOREQUEST;
104}
105
106static inline bool irq_settings_can_probe(struct irq_desc *desc)
107{
108 return !(desc->status & _IRQ_NOPROBE);
109}
110
111static inline void irq_settings_clr_noprobe(struct irq_desc *desc)
112{
113 desc->status &= ~_IRQ_NOPROBE;
114}
115
116static inline void irq_settings_set_noprobe(struct irq_desc *desc)
117{
118 desc->status |= _IRQ_NOPROBE;
119}
120
121static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc)
122{
123 return desc->status & _IRQ_MOVE_PCNTXT;
124}
125
126static inline bool irq_settings_can_autoenable(struct irq_desc *desc)
127{
128 return !(desc->status & _IRQ_NOAUTOEN);
129}
130
131static inline bool irq_settings_is_nested_thread(struct irq_desc *desc)
132{
133 return desc->status & _IRQ_NESTED_THREAD;
134}
135
136/* Nothing should touch desc->status from now on */
137#undef status
138#define status USE_THE_PROPER_WRAPPERS_YOU_MORON
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 3089d3b9d5f3..dd586ebf9c8c 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -21,70 +21,94 @@ static int irqfixup __read_mostly;
21#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) 21#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
22static void poll_spurious_irqs(unsigned long dummy); 22static void poll_spurious_irqs(unsigned long dummy);
23static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0); 23static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
24static int irq_poll_cpu;
25static atomic_t irq_poll_active;
26
27/*
28 * We wait here for a poller to finish.
29 *
30 * If the poll runs on this CPU, then we yell loudly and return
31 * false. That will leave the interrupt line disabled in the worst
32 * case, but it should never happen.
33 *
34 * We wait until the poller is done and then recheck disabled and
35 * action (about to be disabled). Only if it's still active, we return
36 * true and let the handler run.
37 */
38bool irq_wait_for_poll(struct irq_desc *desc)
39{
40 if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
41 "irq poll in progress on cpu %d for irq %d\n",
42 smp_processor_id(), desc->irq_data.irq))
43 return false;
44
45#ifdef CONFIG_SMP
46 do {
47 raw_spin_unlock(&desc->lock);
48 while (desc->istate & IRQS_INPROGRESS)
49 cpu_relax();
50 raw_spin_lock(&desc->lock);
51 } while (desc->istate & IRQS_INPROGRESS);
52 /* Might have been disabled in meantime */
53 return !(desc->istate & IRQS_DISABLED) && desc->action;
54#else
55 return false;
56#endif
57}
58
24 59
25/* 60/*
26 * Recovery handler for misrouted interrupts. 61 * Recovery handler for misrouted interrupts.
27 */ 62 */
28static int try_one_irq(int irq, struct irq_desc *desc) 63static int try_one_irq(int irq, struct irq_desc *desc, bool force)
29{ 64{
65 irqreturn_t ret = IRQ_NONE;
30 struct irqaction *action; 66 struct irqaction *action;
31 int ok = 0, work = 0;
32 67
33 raw_spin_lock(&desc->lock); 68 raw_spin_lock(&desc->lock);
34 /* Already running on another processor */
35 if (desc->status & IRQ_INPROGRESS) {
36 /*
37 * Already running: If it is shared get the other
38 * CPU to go looking for our mystery interrupt too
39 */
40 if (desc->action && (desc->action->flags & IRQF_SHARED))
41 desc->status |= IRQ_PENDING;
42 raw_spin_unlock(&desc->lock);
43 return ok;
44 }
45 /* Honour the normal IRQ locking */
46 desc->status |= IRQ_INPROGRESS;
47 action = desc->action;
48 raw_spin_unlock(&desc->lock);
49 69
50 while (action) { 70 /* PER_CPU and nested thread interrupts are never polled */
51 /* Only shared IRQ handlers are safe to call */ 71 if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc))
52 if (action->flags & IRQF_SHARED) { 72 goto out;
53 if (action->handler(irq, action->dev_id) ==
54 IRQ_HANDLED)
55 ok = 1;
56 }
57 action = action->next;
58 }
59 local_irq_disable();
60 /* Now clean up the flags */
61 raw_spin_lock(&desc->lock);
62 action = desc->action;
63 73
64 /* 74 /*
65 * While we were looking for a fixup someone queued a real 75 * Do not poll disabled interrupts unless the spurious
66 * IRQ clashing with our walk: 76 * disabled poller asks explicitely.
67 */ 77 */
68 while ((desc->status & IRQ_PENDING) && action) { 78 if ((desc->istate & IRQS_DISABLED) && !force)
79 goto out;
80
81 /*
82 * All handlers must agree on IRQF_SHARED, so we test just the
83 * first. Check for action->next as well.
84 */
85 action = desc->action;
86 if (!action || !(action->flags & IRQF_SHARED) ||
87 (action->flags & __IRQF_TIMER) || !action->next)
88 goto out;
89
90 /* Already running on another processor */
91 if (desc->istate & IRQS_INPROGRESS) {
69 /* 92 /*
70 * Perform real IRQ processing for the IRQ we deferred 93 * Already running: If it is shared get the other
94 * CPU to go looking for our mystery interrupt too
71 */ 95 */
72 work = 1; 96 irq_compat_set_pending(desc);
73 raw_spin_unlock(&desc->lock); 97 desc->istate |= IRQS_PENDING;
74 handle_IRQ_event(irq, action); 98 goto out;
75 raw_spin_lock(&desc->lock);
76 desc->status &= ~IRQ_PENDING;
77 } 99 }
78 desc->status &= ~IRQ_INPROGRESS;
79 /*
80 * If we did actual work for the real IRQ line we must let the
81 * IRQ controller clean up too
82 */
83 if (work)
84 irq_end(irq, desc);
85 raw_spin_unlock(&desc->lock);
86 100
87 return ok; 101 /* Mark it poll in progress */
102 desc->istate |= IRQS_POLL_INPROGRESS;
103 do {
104 if (handle_irq_event(desc) == IRQ_HANDLED)
105 ret = IRQ_HANDLED;
106 action = desc->action;
107 } while ((desc->istate & IRQS_PENDING) && action);
108 desc->istate &= ~IRQS_POLL_INPROGRESS;
109out:
110 raw_spin_unlock(&desc->lock);
111 return ret == IRQ_HANDLED;
88} 112}
89 113
90static int misrouted_irq(int irq) 114static int misrouted_irq(int irq)
@@ -92,6 +116,11 @@ static int misrouted_irq(int irq)
92 struct irq_desc *desc; 116 struct irq_desc *desc;
93 int i, ok = 0; 117 int i, ok = 0;
94 118
119 if (atomic_inc_return(&irq_poll_active) == 1)
120 goto out;
121
122 irq_poll_cpu = smp_processor_id();
123
95 for_each_irq_desc(i, desc) { 124 for_each_irq_desc(i, desc) {
96 if (!i) 125 if (!i)
97 continue; 126 continue;
@@ -99,9 +128,11 @@ static int misrouted_irq(int irq)
99 if (i == irq) /* Already tried */ 128 if (i == irq) /* Already tried */
100 continue; 129 continue;
101 130
102 if (try_one_irq(i, desc)) 131 if (try_one_irq(i, desc, false))
103 ok = 1; 132 ok = 1;
104 } 133 }
134out:
135 atomic_dec(&irq_poll_active);
105 /* So the caller can adjust the irq error counts */ 136 /* So the caller can adjust the irq error counts */
106 return ok; 137 return ok;
107} 138}
@@ -111,23 +142,28 @@ static void poll_spurious_irqs(unsigned long dummy)
111 struct irq_desc *desc; 142 struct irq_desc *desc;
112 int i; 143 int i;
113 144
145 if (atomic_inc_return(&irq_poll_active) != 1)
146 goto out;
147 irq_poll_cpu = smp_processor_id();
148
114 for_each_irq_desc(i, desc) { 149 for_each_irq_desc(i, desc) {
115 unsigned int status; 150 unsigned int state;
116 151
117 if (!i) 152 if (!i)
118 continue; 153 continue;
119 154
120 /* Racy but it doesn't matter */ 155 /* Racy but it doesn't matter */
121 status = desc->status; 156 state = desc->istate;
122 barrier(); 157 barrier();
123 if (!(status & IRQ_SPURIOUS_DISABLED)) 158 if (!(state & IRQS_SPURIOUS_DISABLED))
124 continue; 159 continue;
125 160
126 local_irq_disable(); 161 local_irq_disable();
127 try_one_irq(i, desc); 162 try_one_irq(i, desc, true);
128 local_irq_enable(); 163 local_irq_enable();
129 } 164 }
130 165out:
166 atomic_dec(&irq_poll_active);
131 mod_timer(&poll_spurious_irq_timer, 167 mod_timer(&poll_spurious_irq_timer,
132 jiffies + POLL_SPURIOUS_IRQ_INTERVAL); 168 jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
133} 169}
@@ -139,15 +175,13 @@ static void poll_spurious_irqs(unsigned long dummy)
139 * 175 *
140 * (The other 100-of-100,000 interrupts may have been a correctly 176 * (The other 100-of-100,000 interrupts may have been a correctly
141 * functioning device sharing an IRQ with the failing one) 177 * functioning device sharing an IRQ with the failing one)
142 *
143 * Called under desc->lock
144 */ 178 */
145
146static void 179static void
147__report_bad_irq(unsigned int irq, struct irq_desc *desc, 180__report_bad_irq(unsigned int irq, struct irq_desc *desc,
148 irqreturn_t action_ret) 181 irqreturn_t action_ret)
149{ 182{
150 struct irqaction *action; 183 struct irqaction *action;
184 unsigned long flags;
151 185
152 if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) { 186 if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
153 printk(KERN_ERR "irq event %d: bogus return value %x\n", 187 printk(KERN_ERR "irq event %d: bogus return value %x\n",
@@ -159,6 +193,13 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
159 dump_stack(); 193 dump_stack();
160 printk(KERN_ERR "handlers:\n"); 194 printk(KERN_ERR "handlers:\n");
161 195
196 /*
197 * We need to take desc->lock here. note_interrupt() is called
198 * w/o desc->lock held, but IRQ_PROGRESS set. We might race
199 * with something else removing an action. It's ok to take
200 * desc->lock here. See synchronize_irq().
201 */
202 raw_spin_lock_irqsave(&desc->lock, flags);
162 action = desc->action; 203 action = desc->action;
163 while (action) { 204 while (action) {
164 printk(KERN_ERR "[<%p>]", action->handler); 205 printk(KERN_ERR "[<%p>]", action->handler);
@@ -167,6 +208,7 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
167 printk("\n"); 208 printk("\n");
168 action = action->next; 209 action = action->next;
169 } 210 }
211 raw_spin_unlock_irqrestore(&desc->lock, flags);
170} 212}
171 213
172static void 214static void
@@ -218,6 +260,9 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
218void note_interrupt(unsigned int irq, struct irq_desc *desc, 260void note_interrupt(unsigned int irq, struct irq_desc *desc,
219 irqreturn_t action_ret) 261 irqreturn_t action_ret)
220{ 262{
263 if (desc->istate & IRQS_POLL_INPROGRESS)
264 return;
265
221 if (unlikely(action_ret != IRQ_HANDLED)) { 266 if (unlikely(action_ret != IRQ_HANDLED)) {
222 /* 267 /*
223 * If we are seeing only the odd spurious IRQ caused by 268 * If we are seeing only the odd spurious IRQ caused by
@@ -254,9 +299,9 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
254 * Now kill the IRQ 299 * Now kill the IRQ
255 */ 300 */
256 printk(KERN_EMERG "Disabling IRQ #%d\n", irq); 301 printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
257 desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; 302 desc->istate |= IRQS_SPURIOUS_DISABLED;
258 desc->depth++; 303 desc->depth++;
259 desc->irq_data.chip->irq_disable(&desc->irq_data); 304 irq_disable(desc);
260 305
261 mod_timer(&poll_spurious_irq_timer, 306 mod_timer(&poll_spurious_irq_timer,
262 jiffies + POLL_SPURIOUS_IRQ_INTERVAL); 307 jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 999835b6112b..ed253aa24ba4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -38,13 +38,96 @@
38 38
39#include <asm/irq_regs.h> 39#include <asm/irq_regs.h>
40 40
41struct remote_function_call {
42 struct task_struct *p;
43 int (*func)(void *info);
44 void *info;
45 int ret;
46};
47
48static void remote_function(void *data)
49{
50 struct remote_function_call *tfc = data;
51 struct task_struct *p = tfc->p;
52
53 if (p) {
54 tfc->ret = -EAGAIN;
55 if (task_cpu(p) != smp_processor_id() || !task_curr(p))
56 return;
57 }
58
59 tfc->ret = tfc->func(tfc->info);
60}
61
62/**
63 * task_function_call - call a function on the cpu on which a task runs
64 * @p: the task to evaluate
65 * @func: the function to be called
66 * @info: the function call argument
67 *
68 * Calls the function @func when the task is currently running. This might
69 * be on the current CPU, which just calls the function directly
70 *
71 * returns: @func return value, or
72 * -ESRCH - when the process isn't running
73 * -EAGAIN - when the process moved away
74 */
75static int
76task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
77{
78 struct remote_function_call data = {
79 .p = p,
80 .func = func,
81 .info = info,
82 .ret = -ESRCH, /* No such (running) process */
83 };
84
85 if (task_curr(p))
86 smp_call_function_single(task_cpu(p), remote_function, &data, 1);
87
88 return data.ret;
89}
90
91/**
92 * cpu_function_call - call a function on the cpu
93 * @func: the function to be called
94 * @info: the function call argument
95 *
96 * Calls the function @func on the remote cpu.
97 *
98 * returns: @func return value or -ENXIO when the cpu is offline
99 */
100static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
101{
102 struct remote_function_call data = {
103 .p = NULL,
104 .func = func,
105 .info = info,
106 .ret = -ENXIO, /* No such CPU */
107 };
108
109 smp_call_function_single(cpu, remote_function, &data, 1);
110
111 return data.ret;
112}
113
114#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
115 PERF_FLAG_FD_OUTPUT |\
116 PERF_FLAG_PID_CGROUP)
117
41enum event_type_t { 118enum event_type_t {
42 EVENT_FLEXIBLE = 0x1, 119 EVENT_FLEXIBLE = 0x1,
43 EVENT_PINNED = 0x2, 120 EVENT_PINNED = 0x2,
44 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, 121 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
45}; 122};
46 123
47atomic_t perf_task_events __read_mostly; 124/*
125 * perf_sched_events : >0 events exist
126 * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
127 */
128atomic_t perf_sched_events __read_mostly;
129static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
130
48static atomic_t nr_mmap_events __read_mostly; 131static atomic_t nr_mmap_events __read_mostly;
49static atomic_t nr_comm_events __read_mostly; 132static atomic_t nr_comm_events __read_mostly;
50static atomic_t nr_task_events __read_mostly; 133static atomic_t nr_task_events __read_mostly;
@@ -67,7 +150,24 @@ int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
67/* 150/*
68 * max perf event sample rate 151 * max perf event sample rate
69 */ 152 */
70int sysctl_perf_event_sample_rate __read_mostly = 100000; 153#define DEFAULT_MAX_SAMPLE_RATE 100000
154int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
155static int max_samples_per_tick __read_mostly =
156 DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
157
158int perf_proc_update_handler(struct ctl_table *table, int write,
159 void __user *buffer, size_t *lenp,
160 loff_t *ppos)
161{
162 int ret = proc_dointvec(table, write, buffer, lenp, ppos);
163
164 if (ret || !write)
165 return ret;
166
167 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
168
169 return 0;
170}
71 171
72static atomic64_t perf_event_id; 172static atomic64_t perf_event_id;
73 173
@@ -75,7 +175,11 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
75 enum event_type_t event_type); 175 enum event_type_t event_type);
76 176
77static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, 177static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
78 enum event_type_t event_type); 178 enum event_type_t event_type,
179 struct task_struct *task);
180
181static void update_context_time(struct perf_event_context *ctx);
182static u64 perf_event_time(struct perf_event *event);
79 183
80void __weak perf_event_print_debug(void) { } 184void __weak perf_event_print_debug(void) { }
81 185
@@ -89,6 +193,360 @@ static inline u64 perf_clock(void)
89 return local_clock(); 193 return local_clock();
90} 194}
91 195
196static inline struct perf_cpu_context *
197__get_cpu_context(struct perf_event_context *ctx)
198{
199 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
200}
201
202#ifdef CONFIG_CGROUP_PERF
203
204/*
205 * Must ensure cgroup is pinned (css_get) before calling
206 * this function. In other words, we cannot call this function
207 * if there is no cgroup event for the current CPU context.
208 */
209static inline struct perf_cgroup *
210perf_cgroup_from_task(struct task_struct *task)
211{
212 return container_of(task_subsys_state(task, perf_subsys_id),
213 struct perf_cgroup, css);
214}
215
216static inline bool
217perf_cgroup_match(struct perf_event *event)
218{
219 struct perf_event_context *ctx = event->ctx;
220 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
221
222 return !event->cgrp || event->cgrp == cpuctx->cgrp;
223}
224
225static inline void perf_get_cgroup(struct perf_event *event)
226{
227 css_get(&event->cgrp->css);
228}
229
230static inline void perf_put_cgroup(struct perf_event *event)
231{
232 css_put(&event->cgrp->css);
233}
234
235static inline void perf_detach_cgroup(struct perf_event *event)
236{
237 perf_put_cgroup(event);
238 event->cgrp = NULL;
239}
240
241static inline int is_cgroup_event(struct perf_event *event)
242{
243 return event->cgrp != NULL;
244}
245
246static inline u64 perf_cgroup_event_time(struct perf_event *event)
247{
248 struct perf_cgroup_info *t;
249
250 t = per_cpu_ptr(event->cgrp->info, event->cpu);
251 return t->time;
252}
253
254static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
255{
256 struct perf_cgroup_info *info;
257 u64 now;
258
259 now = perf_clock();
260
261 info = this_cpu_ptr(cgrp->info);
262
263 info->time += now - info->timestamp;
264 info->timestamp = now;
265}
266
267static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
268{
269 struct perf_cgroup *cgrp_out = cpuctx->cgrp;
270 if (cgrp_out)
271 __update_cgrp_time(cgrp_out);
272}
273
274static inline void update_cgrp_time_from_event(struct perf_event *event)
275{
276 struct perf_cgroup *cgrp;
277
278 /*
279 * ensure we access cgroup data only when needed and
280 * when we know the cgroup is pinned (css_get)
281 */
282 if (!is_cgroup_event(event))
283 return;
284
285 cgrp = perf_cgroup_from_task(current);
286 /*
287 * Do not update time when cgroup is not active
288 */
289 if (cgrp == event->cgrp)
290 __update_cgrp_time(event->cgrp);
291}
292
293static inline void
294perf_cgroup_set_timestamp(struct task_struct *task,
295 struct perf_event_context *ctx)
296{
297 struct perf_cgroup *cgrp;
298 struct perf_cgroup_info *info;
299
300 /*
301 * ctx->lock held by caller
302 * ensure we do not access cgroup data
303 * unless we have the cgroup pinned (css_get)
304 */
305 if (!task || !ctx->nr_cgroups)
306 return;
307
308 cgrp = perf_cgroup_from_task(task);
309 info = this_cpu_ptr(cgrp->info);
310 info->timestamp = ctx->timestamp;
311}
312
313#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */
314#define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */
315
316/*
317 * reschedule events based on the cgroup constraint of task.
318 *
319 * mode SWOUT : schedule out everything
320 * mode SWIN : schedule in based on cgroup for next
321 */
322void perf_cgroup_switch(struct task_struct *task, int mode)
323{
324 struct perf_cpu_context *cpuctx;
325 struct pmu *pmu;
326 unsigned long flags;
327
328 /*
329 * disable interrupts to avoid geting nr_cgroup
330 * changes via __perf_event_disable(). Also
331 * avoids preemption.
332 */
333 local_irq_save(flags);
334
335 /*
336 * we reschedule only in the presence of cgroup
337 * constrained events.
338 */
339 rcu_read_lock();
340
341 list_for_each_entry_rcu(pmu, &pmus, entry) {
342
343 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
344
345 perf_pmu_disable(cpuctx->ctx.pmu);
346
347 /*
348 * perf_cgroup_events says at least one
349 * context on this CPU has cgroup events.
350 *
351 * ctx->nr_cgroups reports the number of cgroup
352 * events for a context.
353 */
354 if (cpuctx->ctx.nr_cgroups > 0) {
355
356 if (mode & PERF_CGROUP_SWOUT) {
357 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
358 /*
359 * must not be done before ctxswout due
360 * to event_filter_match() in event_sched_out()
361 */
362 cpuctx->cgrp = NULL;
363 }
364
365 if (mode & PERF_CGROUP_SWIN) {
366 /* set cgrp before ctxsw in to
367 * allow event_filter_match() to not
368 * have to pass task around
369 */
370 cpuctx->cgrp = perf_cgroup_from_task(task);
371 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
372 }
373 }
374
375 perf_pmu_enable(cpuctx->ctx.pmu);
376 }
377
378 rcu_read_unlock();
379
380 local_irq_restore(flags);
381}
382
383static inline void perf_cgroup_sched_out(struct task_struct *task)
384{
385 perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
386}
387
388static inline void perf_cgroup_sched_in(struct task_struct *task)
389{
390 perf_cgroup_switch(task, PERF_CGROUP_SWIN);
391}
392
393static inline int perf_cgroup_connect(int fd, struct perf_event *event,
394 struct perf_event_attr *attr,
395 struct perf_event *group_leader)
396{
397 struct perf_cgroup *cgrp;
398 struct cgroup_subsys_state *css;
399 struct file *file;
400 int ret = 0, fput_needed;
401
402 file = fget_light(fd, &fput_needed);
403 if (!file)
404 return -EBADF;
405
406 css = cgroup_css_from_dir(file, perf_subsys_id);
407 if (IS_ERR(css)) {
408 ret = PTR_ERR(css);
409 goto out;
410 }
411
412 cgrp = container_of(css, struct perf_cgroup, css);
413 event->cgrp = cgrp;
414
415 /* must be done before we fput() the file */
416 perf_get_cgroup(event);
417
418 /*
419 * all events in a group must monitor
420 * the same cgroup because a task belongs
421 * to only one perf cgroup at a time
422 */
423 if (group_leader && group_leader->cgrp != cgrp) {
424 perf_detach_cgroup(event);
425 ret = -EINVAL;
426 }
427out:
428 fput_light(file, fput_needed);
429 return ret;
430}
431
432static inline void
433perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
434{
435 struct perf_cgroup_info *t;
436 t = per_cpu_ptr(event->cgrp->info, event->cpu);
437 event->shadow_ctx_time = now - t->timestamp;
438}
439
440static inline void
441perf_cgroup_defer_enabled(struct perf_event *event)
442{
443 /*
444 * when the current task's perf cgroup does not match
445 * the event's, we need to remember to call the
446 * perf_mark_enable() function the first time a task with
447 * a matching perf cgroup is scheduled in.
448 */
449 if (is_cgroup_event(event) && !perf_cgroup_match(event))
450 event->cgrp_defer_enabled = 1;
451}
452
453static inline void
454perf_cgroup_mark_enabled(struct perf_event *event,
455 struct perf_event_context *ctx)
456{
457 struct perf_event *sub;
458 u64 tstamp = perf_event_time(event);
459
460 if (!event->cgrp_defer_enabled)
461 return;
462
463 event->cgrp_defer_enabled = 0;
464
465 event->tstamp_enabled = tstamp - event->total_time_enabled;
466 list_for_each_entry(sub, &event->sibling_list, group_entry) {
467 if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
468 sub->tstamp_enabled = tstamp - sub->total_time_enabled;
469 sub->cgrp_defer_enabled = 0;
470 }
471 }
472}
473#else /* !CONFIG_CGROUP_PERF */
474
475static inline bool
476perf_cgroup_match(struct perf_event *event)
477{
478 return true;
479}
480
481static inline void perf_detach_cgroup(struct perf_event *event)
482{}
483
484static inline int is_cgroup_event(struct perf_event *event)
485{
486 return 0;
487}
488
489static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event)
490{
491 return 0;
492}
493
494static inline void update_cgrp_time_from_event(struct perf_event *event)
495{
496}
497
498static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
499{
500}
501
502static inline void perf_cgroup_sched_out(struct task_struct *task)
503{
504}
505
506static inline void perf_cgroup_sched_in(struct task_struct *task)
507{
508}
509
510static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
511 struct perf_event_attr *attr,
512 struct perf_event *group_leader)
513{
514 return -EINVAL;
515}
516
517static inline void
518perf_cgroup_set_timestamp(struct task_struct *task,
519 struct perf_event_context *ctx)
520{
521}
522
523void
524perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
525{
526}
527
528static inline void
529perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
530{
531}
532
533static inline u64 perf_cgroup_event_time(struct perf_event *event)
534{
535 return 0;
536}
537
538static inline void
539perf_cgroup_defer_enabled(struct perf_event *event)
540{
541}
542
543static inline void
544perf_cgroup_mark_enabled(struct perf_event *event,
545 struct perf_event_context *ctx)
546{
547}
548#endif
549
92void perf_pmu_disable(struct pmu *pmu) 550void perf_pmu_disable(struct pmu *pmu)
93{ 551{
94 int *count = this_cpu_ptr(pmu->pmu_disable_count); 552 int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -254,7 +712,6 @@ static void perf_unpin_context(struct perf_event_context *ctx)
254 raw_spin_lock_irqsave(&ctx->lock, flags); 712 raw_spin_lock_irqsave(&ctx->lock, flags);
255 --ctx->pin_count; 713 --ctx->pin_count;
256 raw_spin_unlock_irqrestore(&ctx->lock, flags); 714 raw_spin_unlock_irqrestore(&ctx->lock, flags);
257 put_ctx(ctx);
258} 715}
259 716
260/* 717/*
@@ -271,6 +728,10 @@ static void update_context_time(struct perf_event_context *ctx)
271static u64 perf_event_time(struct perf_event *event) 728static u64 perf_event_time(struct perf_event *event)
272{ 729{
273 struct perf_event_context *ctx = event->ctx; 730 struct perf_event_context *ctx = event->ctx;
731
732 if (is_cgroup_event(event))
733 return perf_cgroup_event_time(event);
734
274 return ctx ? ctx->time : 0; 735 return ctx ? ctx->time : 0;
275} 736}
276 737
@@ -285,9 +746,20 @@ static void update_event_times(struct perf_event *event)
285 if (event->state < PERF_EVENT_STATE_INACTIVE || 746 if (event->state < PERF_EVENT_STATE_INACTIVE ||
286 event->group_leader->state < PERF_EVENT_STATE_INACTIVE) 747 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
287 return; 748 return;
288 749 /*
289 if (ctx->is_active) 750 * in cgroup mode, time_enabled represents
751 * the time the event was enabled AND active
752 * tasks were in the monitored cgroup. This is
753 * independent of the activity of the context as
754 * there may be a mix of cgroup and non-cgroup events.
755 *
756 * That is why we treat cgroup events differently
757 * here.
758 */
759 if (is_cgroup_event(event))
290 run_end = perf_event_time(event); 760 run_end = perf_event_time(event);
761 else if (ctx->is_active)
762 run_end = ctx->time;
291 else 763 else
292 run_end = event->tstamp_stopped; 764 run_end = event->tstamp_stopped;
293 765
@@ -299,6 +771,7 @@ static void update_event_times(struct perf_event *event)
299 run_end = perf_event_time(event); 771 run_end = perf_event_time(event);
300 772
301 event->total_time_running = run_end - event->tstamp_running; 773 event->total_time_running = run_end - event->tstamp_running;
774
302} 775}
303 776
304/* 777/*
@@ -347,6 +820,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
347 list_add_tail(&event->group_entry, list); 820 list_add_tail(&event->group_entry, list);
348 } 821 }
349 822
823 if (is_cgroup_event(event))
824 ctx->nr_cgroups++;
825
350 list_add_rcu(&event->event_entry, &ctx->event_list); 826 list_add_rcu(&event->event_entry, &ctx->event_list);
351 if (!ctx->nr_events) 827 if (!ctx->nr_events)
352 perf_pmu_rotate_start(ctx->pmu); 828 perf_pmu_rotate_start(ctx->pmu);
@@ -473,6 +949,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
473 949
474 event->attach_state &= ~PERF_ATTACH_CONTEXT; 950 event->attach_state &= ~PERF_ATTACH_CONTEXT;
475 951
952 if (is_cgroup_event(event))
953 ctx->nr_cgroups--;
954
476 ctx->nr_events--; 955 ctx->nr_events--;
477 if (event->attr.inherit_stat) 956 if (event->attr.inherit_stat)
478 ctx->nr_stat--; 957 ctx->nr_stat--;
@@ -544,7 +1023,8 @@ out:
544static inline int 1023static inline int
545event_filter_match(struct perf_event *event) 1024event_filter_match(struct perf_event *event)
546{ 1025{
547 return event->cpu == -1 || event->cpu == smp_processor_id(); 1026 return (event->cpu == -1 || event->cpu == smp_processor_id())
1027 && perf_cgroup_match(event);
548} 1028}
549 1029
550static void 1030static void
@@ -562,7 +1042,7 @@ event_sched_out(struct perf_event *event,
562 */ 1042 */
563 if (event->state == PERF_EVENT_STATE_INACTIVE 1043 if (event->state == PERF_EVENT_STATE_INACTIVE
564 && !event_filter_match(event)) { 1044 && !event_filter_match(event)) {
565 delta = ctx->time - event->tstamp_stopped; 1045 delta = tstamp - event->tstamp_stopped;
566 event->tstamp_running += delta; 1046 event->tstamp_running += delta;
567 event->tstamp_stopped = tstamp; 1047 event->tstamp_stopped = tstamp;
568 } 1048 }
@@ -606,47 +1086,30 @@ group_sched_out(struct perf_event *group_event,
606 cpuctx->exclusive = 0; 1086 cpuctx->exclusive = 0;
607} 1087}
608 1088
609static inline struct perf_cpu_context *
610__get_cpu_context(struct perf_event_context *ctx)
611{
612 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
613}
614
615/* 1089/*
616 * Cross CPU call to remove a performance event 1090 * Cross CPU call to remove a performance event
617 * 1091 *
618 * We disable the event on the hardware level first. After that we 1092 * We disable the event on the hardware level first. After that we
619 * remove it from the context list. 1093 * remove it from the context list.
620 */ 1094 */
621static void __perf_event_remove_from_context(void *info) 1095static int __perf_remove_from_context(void *info)
622{ 1096{
623 struct perf_event *event = info; 1097 struct perf_event *event = info;
624 struct perf_event_context *ctx = event->ctx; 1098 struct perf_event_context *ctx = event->ctx;
625 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1099 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
626 1100
627 /*
628 * If this is a task context, we need to check whether it is
629 * the current task context of this cpu. If not it has been
630 * scheduled out before the smp call arrived.
631 */
632 if (ctx->task && cpuctx->task_ctx != ctx)
633 return;
634
635 raw_spin_lock(&ctx->lock); 1101 raw_spin_lock(&ctx->lock);
636
637 event_sched_out(event, cpuctx, ctx); 1102 event_sched_out(event, cpuctx, ctx);
638
639 list_del_event(event, ctx); 1103 list_del_event(event, ctx);
640
641 raw_spin_unlock(&ctx->lock); 1104 raw_spin_unlock(&ctx->lock);
1105
1106 return 0;
642} 1107}
643 1108
644 1109
645/* 1110/*
646 * Remove the event from a task's (or a CPU's) list of events. 1111 * Remove the event from a task's (or a CPU's) list of events.
647 * 1112 *
648 * Must be called with ctx->mutex held.
649 *
650 * CPU events are removed with a smp call. For task events we only 1113 * CPU events are removed with a smp call. For task events we only
651 * call when the task is on a CPU. 1114 * call when the task is on a CPU.
652 * 1115 *
@@ -657,49 +1120,48 @@ static void __perf_event_remove_from_context(void *info)
657 * When called from perf_event_exit_task, it's OK because the 1120 * When called from perf_event_exit_task, it's OK because the
658 * context has been detached from its task. 1121 * context has been detached from its task.
659 */ 1122 */
660static void perf_event_remove_from_context(struct perf_event *event) 1123static void perf_remove_from_context(struct perf_event *event)
661{ 1124{
662 struct perf_event_context *ctx = event->ctx; 1125 struct perf_event_context *ctx = event->ctx;
663 struct task_struct *task = ctx->task; 1126 struct task_struct *task = ctx->task;
664 1127
1128 lockdep_assert_held(&ctx->mutex);
1129
665 if (!task) { 1130 if (!task) {
666 /* 1131 /*
667 * Per cpu events are removed via an smp call and 1132 * Per cpu events are removed via an smp call and
668 * the removal is always successful. 1133 * the removal is always successful.
669 */ 1134 */
670 smp_call_function_single(event->cpu, 1135 cpu_function_call(event->cpu, __perf_remove_from_context, event);
671 __perf_event_remove_from_context,
672 event, 1);
673 return; 1136 return;
674 } 1137 }
675 1138
676retry: 1139retry:
677 task_oncpu_function_call(task, __perf_event_remove_from_context, 1140 if (!task_function_call(task, __perf_remove_from_context, event))
678 event); 1141 return;
679 1142
680 raw_spin_lock_irq(&ctx->lock); 1143 raw_spin_lock_irq(&ctx->lock);
681 /* 1144 /*
682 * If the context is active we need to retry the smp call. 1145 * If we failed to find a running task, but find the context active now
1146 * that we've acquired the ctx->lock, retry.
683 */ 1147 */
684 if (ctx->nr_active && !list_empty(&event->group_entry)) { 1148 if (ctx->is_active) {
685 raw_spin_unlock_irq(&ctx->lock); 1149 raw_spin_unlock_irq(&ctx->lock);
686 goto retry; 1150 goto retry;
687 } 1151 }
688 1152
689 /* 1153 /*
690 * The lock prevents that this context is scheduled in so we 1154 * Since the task isn't running, its safe to remove the event, us
691 * can remove the event safely, if the call above did not 1155 * holding the ctx->lock ensures the task won't get scheduled in.
692 * succeed.
693 */ 1156 */
694 if (!list_empty(&event->group_entry)) 1157 list_del_event(event, ctx);
695 list_del_event(event, ctx);
696 raw_spin_unlock_irq(&ctx->lock); 1158 raw_spin_unlock_irq(&ctx->lock);
697} 1159}
698 1160
699/* 1161/*
700 * Cross CPU call to disable a performance event 1162 * Cross CPU call to disable a performance event
701 */ 1163 */
702static void __perf_event_disable(void *info) 1164static int __perf_event_disable(void *info)
703{ 1165{
704 struct perf_event *event = info; 1166 struct perf_event *event = info;
705 struct perf_event_context *ctx = event->ctx; 1167 struct perf_event_context *ctx = event->ctx;
@@ -708,9 +1170,12 @@ static void __perf_event_disable(void *info)
708 /* 1170 /*
709 * If this is a per-task event, need to check whether this 1171 * If this is a per-task event, need to check whether this
710 * event's task is the current task on this cpu. 1172 * event's task is the current task on this cpu.
1173 *
1174 * Can trigger due to concurrent perf_event_context_sched_out()
1175 * flipping contexts around.
711 */ 1176 */
712 if (ctx->task && cpuctx->task_ctx != ctx) 1177 if (ctx->task && cpuctx->task_ctx != ctx)
713 return; 1178 return -EINVAL;
714 1179
715 raw_spin_lock(&ctx->lock); 1180 raw_spin_lock(&ctx->lock);
716 1181
@@ -720,6 +1185,7 @@ static void __perf_event_disable(void *info)
720 */ 1185 */
721 if (event->state >= PERF_EVENT_STATE_INACTIVE) { 1186 if (event->state >= PERF_EVENT_STATE_INACTIVE) {
722 update_context_time(ctx); 1187 update_context_time(ctx);
1188 update_cgrp_time_from_event(event);
723 update_group_times(event); 1189 update_group_times(event);
724 if (event == event->group_leader) 1190 if (event == event->group_leader)
725 group_sched_out(event, cpuctx, ctx); 1191 group_sched_out(event, cpuctx, ctx);
@@ -729,6 +1195,8 @@ static void __perf_event_disable(void *info)
729 } 1195 }
730 1196
731 raw_spin_unlock(&ctx->lock); 1197 raw_spin_unlock(&ctx->lock);
1198
1199 return 0;
732} 1200}
733 1201
734/* 1202/*
@@ -753,13 +1221,13 @@ void perf_event_disable(struct perf_event *event)
753 /* 1221 /*
754 * Disable the event on the cpu that it's on 1222 * Disable the event on the cpu that it's on
755 */ 1223 */
756 smp_call_function_single(event->cpu, __perf_event_disable, 1224 cpu_function_call(event->cpu, __perf_event_disable, event);
757 event, 1);
758 return; 1225 return;
759 } 1226 }
760 1227
761retry: 1228retry:
762 task_oncpu_function_call(task, __perf_event_disable, event); 1229 if (!task_function_call(task, __perf_event_disable, event))
1230 return;
763 1231
764 raw_spin_lock_irq(&ctx->lock); 1232 raw_spin_lock_irq(&ctx->lock);
765 /* 1233 /*
@@ -767,6 +1235,11 @@ retry:
767 */ 1235 */
768 if (event->state == PERF_EVENT_STATE_ACTIVE) { 1236 if (event->state == PERF_EVENT_STATE_ACTIVE) {
769 raw_spin_unlock_irq(&ctx->lock); 1237 raw_spin_unlock_irq(&ctx->lock);
1238 /*
1239 * Reload the task pointer, it might have been changed by
1240 * a concurrent perf_event_context_sched_out().
1241 */
1242 task = ctx->task;
770 goto retry; 1243 goto retry;
771 } 1244 }
772 1245
@@ -778,10 +1251,48 @@ retry:
778 update_group_times(event); 1251 update_group_times(event);
779 event->state = PERF_EVENT_STATE_OFF; 1252 event->state = PERF_EVENT_STATE_OFF;
780 } 1253 }
781
782 raw_spin_unlock_irq(&ctx->lock); 1254 raw_spin_unlock_irq(&ctx->lock);
783} 1255}
784 1256
1257static void perf_set_shadow_time(struct perf_event *event,
1258 struct perf_event_context *ctx,
1259 u64 tstamp)
1260{
1261 /*
1262 * use the correct time source for the time snapshot
1263 *
1264 * We could get by without this by leveraging the
1265 * fact that to get to this function, the caller
1266 * has most likely already called update_context_time()
1267 * and update_cgrp_time_xx() and thus both timestamp
1268 * are identical (or very close). Given that tstamp is,
1269 * already adjusted for cgroup, we could say that:
1270 * tstamp - ctx->timestamp
1271 * is equivalent to
1272 * tstamp - cgrp->timestamp.
1273 *
1274 * Then, in perf_output_read(), the calculation would
1275 * work with no changes because:
1276 * - event is guaranteed scheduled in
1277 * - no scheduled out in between
1278 * - thus the timestamp would be the same
1279 *
1280 * But this is a bit hairy.
1281 *
1282 * So instead, we have an explicit cgroup call to remain
1283 * within the time time source all along. We believe it
1284 * is cleaner and simpler to understand.
1285 */
1286 if (is_cgroup_event(event))
1287 perf_cgroup_set_shadow_time(event, tstamp);
1288 else
1289 event->shadow_ctx_time = tstamp - ctx->timestamp;
1290}
1291
1292#define MAX_INTERRUPTS (~0ULL)
1293
1294static void perf_log_throttle(struct perf_event *event, int enable);
1295
785static int 1296static int
786event_sched_in(struct perf_event *event, 1297event_sched_in(struct perf_event *event,
787 struct perf_cpu_context *cpuctx, 1298 struct perf_cpu_context *cpuctx,
@@ -794,6 +1305,17 @@ event_sched_in(struct perf_event *event,
794 1305
795 event->state = PERF_EVENT_STATE_ACTIVE; 1306 event->state = PERF_EVENT_STATE_ACTIVE;
796 event->oncpu = smp_processor_id(); 1307 event->oncpu = smp_processor_id();
1308
1309 /*
1310 * Unthrottle events, since we scheduled we might have missed several
1311 * ticks already, also for a heavily scheduling task there is little
1312 * guarantee it'll get a tick in a timely manner.
1313 */
1314 if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
1315 perf_log_throttle(event, 1);
1316 event->hw.interrupts = 0;
1317 }
1318
797 /* 1319 /*
798 * The new state must be visible before we turn it on in the hardware: 1320 * The new state must be visible before we turn it on in the hardware:
799 */ 1321 */
@@ -807,7 +1329,7 @@ event_sched_in(struct perf_event *event,
807 1329
808 event->tstamp_running += tstamp - event->tstamp_stopped; 1330 event->tstamp_running += tstamp - event->tstamp_stopped;
809 1331
810 event->shadow_ctx_time = tstamp - ctx->timestamp; 1332 perf_set_shadow_time(event, ctx, tstamp);
811 1333
812 if (!is_software_event(event)) 1334 if (!is_software_event(event))
813 cpuctx->active_oncpu++; 1335 cpuctx->active_oncpu++;
@@ -928,12 +1450,15 @@ static void add_event_to_ctx(struct perf_event *event,
928 event->tstamp_stopped = tstamp; 1450 event->tstamp_stopped = tstamp;
929} 1451}
930 1452
1453static void perf_event_context_sched_in(struct perf_event_context *ctx,
1454 struct task_struct *tsk);
1455
931/* 1456/*
932 * Cross CPU call to install and enable a performance event 1457 * Cross CPU call to install and enable a performance event
933 * 1458 *
934 * Must be called with ctx->mutex held 1459 * Must be called with ctx->mutex held
935 */ 1460 */
936static void __perf_install_in_context(void *info) 1461static int __perf_install_in_context(void *info)
937{ 1462{
938 struct perf_event *event = info; 1463 struct perf_event *event = info;
939 struct perf_event_context *ctx = event->ctx; 1464 struct perf_event_context *ctx = event->ctx;
@@ -942,21 +1467,22 @@ static void __perf_install_in_context(void *info)
942 int err; 1467 int err;
943 1468
944 /* 1469 /*
945 * If this is a task context, we need to check whether it is 1470 * In case we're installing a new context to an already running task,
946 * the current task context of this cpu. If not it has been 1471 * could also happen before perf_event_task_sched_in() on architectures
947 * scheduled out before the smp call arrived. 1472 * which do context switches with IRQs enabled.
948 * Or possibly this is the right context but it isn't
949 * on this cpu because it had no events.
950 */ 1473 */
951 if (ctx->task && cpuctx->task_ctx != ctx) { 1474 if (ctx->task && !cpuctx->task_ctx)
952 if (cpuctx->task_ctx || ctx->task != current) 1475 perf_event_context_sched_in(ctx, ctx->task);
953 return;
954 cpuctx->task_ctx = ctx;
955 }
956 1476
957 raw_spin_lock(&ctx->lock); 1477 raw_spin_lock(&ctx->lock);
958 ctx->is_active = 1; 1478 ctx->is_active = 1;
959 update_context_time(ctx); 1479 update_context_time(ctx);
1480 /*
1481 * update cgrp time only if current cgrp
1482 * matches event->cgrp. Must be done before
1483 * calling add_event_to_ctx()
1484 */
1485 update_cgrp_time_from_event(event);
960 1486
961 add_event_to_ctx(event, ctx); 1487 add_event_to_ctx(event, ctx);
962 1488
@@ -997,6 +1523,8 @@ static void __perf_install_in_context(void *info)
997 1523
998unlock: 1524unlock:
999 raw_spin_unlock(&ctx->lock); 1525 raw_spin_unlock(&ctx->lock);
1526
1527 return 0;
1000} 1528}
1001 1529
1002/* 1530/*
@@ -1008,8 +1536,6 @@ unlock:
1008 * If the event is attached to a task which is on a CPU we use a smp 1536 * If the event is attached to a task which is on a CPU we use a smp
1009 * call to enable it in the task context. The task might have been 1537 * call to enable it in the task context. The task might have been
1010 * scheduled away, but we check this in the smp call again. 1538 * scheduled away, but we check this in the smp call again.
1011 *
1012 * Must be called with ctx->mutex held.
1013 */ 1539 */
1014static void 1540static void
1015perf_install_in_context(struct perf_event_context *ctx, 1541perf_install_in_context(struct perf_event_context *ctx,
@@ -1018,6 +1544,8 @@ perf_install_in_context(struct perf_event_context *ctx,
1018{ 1544{
1019 struct task_struct *task = ctx->task; 1545 struct task_struct *task = ctx->task;
1020 1546
1547 lockdep_assert_held(&ctx->mutex);
1548
1021 event->ctx = ctx; 1549 event->ctx = ctx;
1022 1550
1023 if (!task) { 1551 if (!task) {
@@ -1025,31 +1553,29 @@ perf_install_in_context(struct perf_event_context *ctx,
1025 * Per cpu events are installed via an smp call and 1553 * Per cpu events are installed via an smp call and
1026 * the install is always successful. 1554 * the install is always successful.
1027 */ 1555 */
1028 smp_call_function_single(cpu, __perf_install_in_context, 1556 cpu_function_call(cpu, __perf_install_in_context, event);
1029 event, 1);
1030 return; 1557 return;
1031 } 1558 }
1032 1559
1033retry: 1560retry:
1034 task_oncpu_function_call(task, __perf_install_in_context, 1561 if (!task_function_call(task, __perf_install_in_context, event))
1035 event); 1562 return;
1036 1563
1037 raw_spin_lock_irq(&ctx->lock); 1564 raw_spin_lock_irq(&ctx->lock);
1038 /* 1565 /*
1039 * we need to retry the smp call. 1566 * If we failed to find a running task, but find the context active now
1567 * that we've acquired the ctx->lock, retry.
1040 */ 1568 */
1041 if (ctx->is_active && list_empty(&event->group_entry)) { 1569 if (ctx->is_active) {
1042 raw_spin_unlock_irq(&ctx->lock); 1570 raw_spin_unlock_irq(&ctx->lock);
1043 goto retry; 1571 goto retry;
1044 } 1572 }
1045 1573
1046 /* 1574 /*
1047 * The lock prevents that this context is scheduled in so we 1575 * Since the task isn't running, its safe to add the event, us holding
1048 * can add the event safely, if it the call above did not 1576 * the ctx->lock ensures the task won't get scheduled in.
1049 * succeed.
1050 */ 1577 */
1051 if (list_empty(&event->group_entry)) 1578 add_event_to_ctx(event, ctx);
1052 add_event_to_ctx(event, ctx);
1053 raw_spin_unlock_irq(&ctx->lock); 1579 raw_spin_unlock_irq(&ctx->lock);
1054} 1580}
1055 1581
@@ -1078,7 +1604,7 @@ static void __perf_event_mark_enabled(struct perf_event *event,
1078/* 1604/*
1079 * Cross CPU call to enable a performance event 1605 * Cross CPU call to enable a performance event
1080 */ 1606 */
1081static void __perf_event_enable(void *info) 1607static int __perf_event_enable(void *info)
1082{ 1608{
1083 struct perf_event *event = info; 1609 struct perf_event *event = info;
1084 struct perf_event_context *ctx = event->ctx; 1610 struct perf_event_context *ctx = event->ctx;
@@ -1086,26 +1612,27 @@ static void __perf_event_enable(void *info)
1086 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1612 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1087 int err; 1613 int err;
1088 1614
1089 /* 1615 if (WARN_ON_ONCE(!ctx->is_active))
1090 * If this is a per-task event, need to check whether this 1616 return -EINVAL;
1091 * event's task is the current task on this cpu.
1092 */
1093 if (ctx->task && cpuctx->task_ctx != ctx) {
1094 if (cpuctx->task_ctx || ctx->task != current)
1095 return;
1096 cpuctx->task_ctx = ctx;
1097 }
1098 1617
1099 raw_spin_lock(&ctx->lock); 1618 raw_spin_lock(&ctx->lock);
1100 ctx->is_active = 1;
1101 update_context_time(ctx); 1619 update_context_time(ctx);
1102 1620
1103 if (event->state >= PERF_EVENT_STATE_INACTIVE) 1621 if (event->state >= PERF_EVENT_STATE_INACTIVE)
1104 goto unlock; 1622 goto unlock;
1623
1624 /*
1625 * set current task's cgroup time reference point
1626 */
1627 perf_cgroup_set_timestamp(current, ctx);
1628
1105 __perf_event_mark_enabled(event, ctx); 1629 __perf_event_mark_enabled(event, ctx);
1106 1630
1107 if (!event_filter_match(event)) 1631 if (!event_filter_match(event)) {
1632 if (is_cgroup_event(event))
1633 perf_cgroup_defer_enabled(event);
1108 goto unlock; 1634 goto unlock;
1635 }
1109 1636
1110 /* 1637 /*
1111 * If the event is in a group and isn't the group leader, 1638 * If the event is in a group and isn't the group leader,
@@ -1138,6 +1665,8 @@ static void __perf_event_enable(void *info)
1138 1665
1139unlock: 1666unlock:
1140 raw_spin_unlock(&ctx->lock); 1667 raw_spin_unlock(&ctx->lock);
1668
1669 return 0;
1141} 1670}
1142 1671
1143/* 1672/*
@@ -1158,8 +1687,7 @@ void perf_event_enable(struct perf_event *event)
1158 /* 1687 /*
1159 * Enable the event on the cpu that it's on 1688 * Enable the event on the cpu that it's on
1160 */ 1689 */
1161 smp_call_function_single(event->cpu, __perf_event_enable, 1690 cpu_function_call(event->cpu, __perf_event_enable, event);
1162 event, 1);
1163 return; 1691 return;
1164 } 1692 }
1165 1693
@@ -1178,8 +1706,15 @@ void perf_event_enable(struct perf_event *event)
1178 event->state = PERF_EVENT_STATE_OFF; 1706 event->state = PERF_EVENT_STATE_OFF;
1179 1707
1180retry: 1708retry:
1709 if (!ctx->is_active) {
1710 __perf_event_mark_enabled(event, ctx);
1711 goto out;
1712 }
1713
1181 raw_spin_unlock_irq(&ctx->lock); 1714 raw_spin_unlock_irq(&ctx->lock);
1182 task_oncpu_function_call(task, __perf_event_enable, event); 1715
1716 if (!task_function_call(task, __perf_event_enable, event))
1717 return;
1183 1718
1184 raw_spin_lock_irq(&ctx->lock); 1719 raw_spin_lock_irq(&ctx->lock);
1185 1720
@@ -1187,15 +1722,14 @@ retry:
1187 * If the context is active and the event is still off, 1722 * If the context is active and the event is still off,
1188 * we need to retry the cross-call. 1723 * we need to retry the cross-call.
1189 */ 1724 */
1190 if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) 1725 if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
1726 /*
1727 * task could have been flipped by a concurrent
1728 * perf_event_context_sched_out()
1729 */
1730 task = ctx->task;
1191 goto retry; 1731 goto retry;
1192 1732 }
1193 /*
1194 * Since we have the lock this context can't be scheduled
1195 * in, so we can change the state safely.
1196 */
1197 if (event->state == PERF_EVENT_STATE_OFF)
1198 __perf_event_mark_enabled(event, ctx);
1199 1733
1200out: 1734out:
1201 raw_spin_unlock_irq(&ctx->lock); 1735 raw_spin_unlock_irq(&ctx->lock);
@@ -1227,6 +1761,7 @@ static void ctx_sched_out(struct perf_event_context *ctx,
1227 if (likely(!ctx->nr_events)) 1761 if (likely(!ctx->nr_events))
1228 goto out; 1762 goto out;
1229 update_context_time(ctx); 1763 update_context_time(ctx);
1764 update_cgrp_time_from_cpuctx(cpuctx);
1230 1765
1231 if (!ctx->nr_active) 1766 if (!ctx->nr_active)
1232 goto out; 1767 goto out;
@@ -1339,8 +1874,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1339 } 1874 }
1340} 1875}
1341 1876
1342void perf_event_context_sched_out(struct task_struct *task, int ctxn, 1877static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
1343 struct task_struct *next) 1878 struct task_struct *next)
1344{ 1879{
1345 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; 1880 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
1346 struct perf_event_context *next_ctx; 1881 struct perf_event_context *next_ctx;
@@ -1416,6 +1951,14 @@ void __perf_event_task_sched_out(struct task_struct *task,
1416 1951
1417 for_each_task_context_nr(ctxn) 1952 for_each_task_context_nr(ctxn)
1418 perf_event_context_sched_out(task, ctxn, next); 1953 perf_event_context_sched_out(task, ctxn, next);
1954
1955 /*
1956 * if cgroup events exist on this CPU, then we need
1957 * to check if we have to switch out PMU state.
1958 * cgroup event are system-wide mode only
1959 */
1960 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
1961 perf_cgroup_sched_out(task);
1419} 1962}
1420 1963
1421static void task_ctx_sched_out(struct perf_event_context *ctx, 1964static void task_ctx_sched_out(struct perf_event_context *ctx,
@@ -1454,6 +1997,10 @@ ctx_pinned_sched_in(struct perf_event_context *ctx,
1454 if (!event_filter_match(event)) 1997 if (!event_filter_match(event))
1455 continue; 1998 continue;
1456 1999
2000 /* may need to reset tstamp_enabled */
2001 if (is_cgroup_event(event))
2002 perf_cgroup_mark_enabled(event, ctx);
2003
1457 if (group_can_go_on(event, cpuctx, 1)) 2004 if (group_can_go_on(event, cpuctx, 1))
1458 group_sched_in(event, cpuctx, ctx); 2005 group_sched_in(event, cpuctx, ctx);
1459 2006
@@ -1486,6 +2033,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
1486 if (!event_filter_match(event)) 2033 if (!event_filter_match(event))
1487 continue; 2034 continue;
1488 2035
2036 /* may need to reset tstamp_enabled */
2037 if (is_cgroup_event(event))
2038 perf_cgroup_mark_enabled(event, ctx);
2039
1489 if (group_can_go_on(event, cpuctx, can_add_hw)) { 2040 if (group_can_go_on(event, cpuctx, can_add_hw)) {
1490 if (group_sched_in(event, cpuctx, ctx)) 2041 if (group_sched_in(event, cpuctx, ctx))
1491 can_add_hw = 0; 2042 can_add_hw = 0;
@@ -1496,15 +2047,19 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
1496static void 2047static void
1497ctx_sched_in(struct perf_event_context *ctx, 2048ctx_sched_in(struct perf_event_context *ctx,
1498 struct perf_cpu_context *cpuctx, 2049 struct perf_cpu_context *cpuctx,
1499 enum event_type_t event_type) 2050 enum event_type_t event_type,
2051 struct task_struct *task)
1500{ 2052{
2053 u64 now;
2054
1501 raw_spin_lock(&ctx->lock); 2055 raw_spin_lock(&ctx->lock);
1502 ctx->is_active = 1; 2056 ctx->is_active = 1;
1503 if (likely(!ctx->nr_events)) 2057 if (likely(!ctx->nr_events))
1504 goto out; 2058 goto out;
1505 2059
1506 ctx->timestamp = perf_clock(); 2060 now = perf_clock();
1507 2061 ctx->timestamp = now;
2062 perf_cgroup_set_timestamp(task, ctx);
1508 /* 2063 /*
1509 * First go through the list and put on any pinned groups 2064 * First go through the list and put on any pinned groups
1510 * in order to give them the best chance of going on. 2065 * in order to give them the best chance of going on.
@@ -1521,11 +2076,12 @@ out:
1521} 2076}
1522 2077
1523static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, 2078static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
1524 enum event_type_t event_type) 2079 enum event_type_t event_type,
2080 struct task_struct *task)
1525{ 2081{
1526 struct perf_event_context *ctx = &cpuctx->ctx; 2082 struct perf_event_context *ctx = &cpuctx->ctx;
1527 2083
1528 ctx_sched_in(ctx, cpuctx, event_type); 2084 ctx_sched_in(ctx, cpuctx, event_type, task);
1529} 2085}
1530 2086
1531static void task_ctx_sched_in(struct perf_event_context *ctx, 2087static void task_ctx_sched_in(struct perf_event_context *ctx,
@@ -1533,15 +2089,16 @@ static void task_ctx_sched_in(struct perf_event_context *ctx,
1533{ 2089{
1534 struct perf_cpu_context *cpuctx; 2090 struct perf_cpu_context *cpuctx;
1535 2091
1536 cpuctx = __get_cpu_context(ctx); 2092 cpuctx = __get_cpu_context(ctx);
1537 if (cpuctx->task_ctx == ctx) 2093 if (cpuctx->task_ctx == ctx)
1538 return; 2094 return;
1539 2095
1540 ctx_sched_in(ctx, cpuctx, event_type); 2096 ctx_sched_in(ctx, cpuctx, event_type, NULL);
1541 cpuctx->task_ctx = ctx; 2097 cpuctx->task_ctx = ctx;
1542} 2098}
1543 2099
1544void perf_event_context_sched_in(struct perf_event_context *ctx) 2100static void perf_event_context_sched_in(struct perf_event_context *ctx,
2101 struct task_struct *task)
1545{ 2102{
1546 struct perf_cpu_context *cpuctx; 2103 struct perf_cpu_context *cpuctx;
1547 2104
@@ -1557,9 +2114,9 @@ void perf_event_context_sched_in(struct perf_event_context *ctx)
1557 */ 2114 */
1558 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 2115 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
1559 2116
1560 ctx_sched_in(ctx, cpuctx, EVENT_PINNED); 2117 ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
1561 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); 2118 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
1562 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); 2119 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
1563 2120
1564 cpuctx->task_ctx = ctx; 2121 cpuctx->task_ctx = ctx;
1565 2122
@@ -1592,14 +2149,17 @@ void __perf_event_task_sched_in(struct task_struct *task)
1592 if (likely(!ctx)) 2149 if (likely(!ctx))
1593 continue; 2150 continue;
1594 2151
1595 perf_event_context_sched_in(ctx); 2152 perf_event_context_sched_in(ctx, task);
1596 } 2153 }
2154 /*
2155 * if cgroup events exist on this CPU, then we need
2156 * to check if we have to switch in PMU state.
2157 * cgroup event are system-wide mode only
2158 */
2159 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
2160 perf_cgroup_sched_in(task);
1597} 2161}
1598 2162
1599#define MAX_INTERRUPTS (~0ULL)
1600
1601static void perf_log_throttle(struct perf_event *event, int enable);
1602
1603static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) 2163static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
1604{ 2164{
1605 u64 frequency = event->attr.sample_freq; 2165 u64 frequency = event->attr.sample_freq;
@@ -1627,7 +2187,7 @@ static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
1627 * Reduce accuracy by one bit such that @a and @b converge 2187 * Reduce accuracy by one bit such that @a and @b converge
1628 * to a similar magnitude. 2188 * to a similar magnitude.
1629 */ 2189 */
1630#define REDUCE_FLS(a, b) \ 2190#define REDUCE_FLS(a, b) \
1631do { \ 2191do { \
1632 if (a##_fls > b##_fls) { \ 2192 if (a##_fls > b##_fls) { \
1633 a >>= 1; \ 2193 a >>= 1; \
@@ -1797,7 +2357,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
1797 if (ctx) 2357 if (ctx)
1798 rotate_ctx(ctx); 2358 rotate_ctx(ctx);
1799 2359
1800 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); 2360 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current);
1801 if (ctx) 2361 if (ctx)
1802 task_ctx_sched_in(ctx, EVENT_FLEXIBLE); 2362 task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
1803 2363
@@ -1876,7 +2436,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
1876 2436
1877 raw_spin_unlock(&ctx->lock); 2437 raw_spin_unlock(&ctx->lock);
1878 2438
1879 perf_event_context_sched_in(ctx); 2439 perf_event_context_sched_in(ctx, ctx->task);
1880out: 2440out:
1881 local_irq_restore(flags); 2441 local_irq_restore(flags);
1882} 2442}
@@ -1901,8 +2461,10 @@ static void __perf_event_read(void *info)
1901 return; 2461 return;
1902 2462
1903 raw_spin_lock(&ctx->lock); 2463 raw_spin_lock(&ctx->lock);
1904 if (ctx->is_active) 2464 if (ctx->is_active) {
1905 update_context_time(ctx); 2465 update_context_time(ctx);
2466 update_cgrp_time_from_event(event);
2467 }
1906 update_event_times(event); 2468 update_event_times(event);
1907 if (event->state == PERF_EVENT_STATE_ACTIVE) 2469 if (event->state == PERF_EVENT_STATE_ACTIVE)
1908 event->pmu->read(event); 2470 event->pmu->read(event);
@@ -1933,8 +2495,10 @@ static u64 perf_event_read(struct perf_event *event)
1933 * (e.g., thread is blocked), in that case 2495 * (e.g., thread is blocked), in that case
1934 * we cannot update context time 2496 * we cannot update context time
1935 */ 2497 */
1936 if (ctx->is_active) 2498 if (ctx->is_active) {
1937 update_context_time(ctx); 2499 update_context_time(ctx);
2500 update_cgrp_time_from_event(event);
2501 }
1938 update_event_times(event); 2502 update_event_times(event);
1939 raw_spin_unlock_irqrestore(&ctx->lock, flags); 2503 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1940 } 2504 }
@@ -2213,6 +2777,9 @@ errout:
2213 2777
2214} 2778}
2215 2779
2780/*
2781 * Returns a matching context with refcount and pincount.
2782 */
2216static struct perf_event_context * 2783static struct perf_event_context *
2217find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) 2784find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2218{ 2785{
@@ -2237,6 +2804,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2237 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 2804 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
2238 ctx = &cpuctx->ctx; 2805 ctx = &cpuctx->ctx;
2239 get_ctx(ctx); 2806 get_ctx(ctx);
2807 ++ctx->pin_count;
2240 2808
2241 return ctx; 2809 return ctx;
2242 } 2810 }
@@ -2250,6 +2818,7 @@ retry:
2250 ctx = perf_lock_task_context(task, ctxn, &flags); 2818 ctx = perf_lock_task_context(task, ctxn, &flags);
2251 if (ctx) { 2819 if (ctx) {
2252 unclone_ctx(ctx); 2820 unclone_ctx(ctx);
2821 ++ctx->pin_count;
2253 raw_spin_unlock_irqrestore(&ctx->lock, flags); 2822 raw_spin_unlock_irqrestore(&ctx->lock, flags);
2254 } 2823 }
2255 2824
@@ -2271,8 +2840,10 @@ retry:
2271 err = -ESRCH; 2840 err = -ESRCH;
2272 else if (task->perf_event_ctxp[ctxn]) 2841 else if (task->perf_event_ctxp[ctxn])
2273 err = -EAGAIN; 2842 err = -EAGAIN;
2274 else 2843 else {
2844 ++ctx->pin_count;
2275 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); 2845 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
2846 }
2276 mutex_unlock(&task->perf_event_mutex); 2847 mutex_unlock(&task->perf_event_mutex);
2277 2848
2278 if (unlikely(err)) { 2849 if (unlikely(err)) {
@@ -2312,7 +2883,7 @@ static void free_event(struct perf_event *event)
2312 2883
2313 if (!event->parent) { 2884 if (!event->parent) {
2314 if (event->attach_state & PERF_ATTACH_TASK) 2885 if (event->attach_state & PERF_ATTACH_TASK)
2315 jump_label_dec(&perf_task_events); 2886 jump_label_dec(&perf_sched_events);
2316 if (event->attr.mmap || event->attr.mmap_data) 2887 if (event->attr.mmap || event->attr.mmap_data)
2317 atomic_dec(&nr_mmap_events); 2888 atomic_dec(&nr_mmap_events);
2318 if (event->attr.comm) 2889 if (event->attr.comm)
@@ -2321,6 +2892,10 @@ static void free_event(struct perf_event *event)
2321 atomic_dec(&nr_task_events); 2892 atomic_dec(&nr_task_events);
2322 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) 2893 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
2323 put_callchain_buffers(); 2894 put_callchain_buffers();
2895 if (is_cgroup_event(event)) {
2896 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
2897 jump_label_dec(&perf_sched_events);
2898 }
2324 } 2899 }
2325 2900
2326 if (event->buffer) { 2901 if (event->buffer) {
@@ -2328,6 +2903,9 @@ static void free_event(struct perf_event *event)
2328 event->buffer = NULL; 2903 event->buffer = NULL;
2329 } 2904 }
2330 2905
2906 if (is_cgroup_event(event))
2907 perf_detach_cgroup(event);
2908
2331 if (event->destroy) 2909 if (event->destroy)
2332 event->destroy(event); 2910 event->destroy(event);
2333 2911
@@ -4395,26 +4973,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
4395 if (unlikely(!is_sampling_event(event))) 4973 if (unlikely(!is_sampling_event(event)))
4396 return 0; 4974 return 0;
4397 4975
4398 if (!throttle) { 4976 if (unlikely(hwc->interrupts >= max_samples_per_tick)) {
4399 hwc->interrupts++; 4977 if (throttle) {
4400 } else { 4978 hwc->interrupts = MAX_INTERRUPTS;
4401 if (hwc->interrupts != MAX_INTERRUPTS) { 4979 perf_log_throttle(event, 0);
4402 hwc->interrupts++;
4403 if (HZ * hwc->interrupts >
4404 (u64)sysctl_perf_event_sample_rate) {
4405 hwc->interrupts = MAX_INTERRUPTS;
4406 perf_log_throttle(event, 0);
4407 ret = 1;
4408 }
4409 } else {
4410 /*
4411 * Keep re-disabling events even though on the previous
4412 * pass we disabled it - just in case we raced with a
4413 * sched-in and the event got enabled again:
4414 */
4415 ret = 1; 4980 ret = 1;
4416 } 4981 }
4417 } 4982 } else
4983 hwc->interrupts++;
4418 4984
4419 if (event->attr.freq) { 4985 if (event->attr.freq) {
4420 u64 now = perf_clock(); 4986 u64 now = perf_clock();
@@ -5051,6 +5617,10 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
5051 u64 period; 5617 u64 period;
5052 5618
5053 event = container_of(hrtimer, struct perf_event, hw.hrtimer); 5619 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
5620
5621 if (event->state != PERF_EVENT_STATE_ACTIVE)
5622 return HRTIMER_NORESTART;
5623
5054 event->pmu->read(event); 5624 event->pmu->read(event);
5055 5625
5056 perf_sample_data_init(&data, 0); 5626 perf_sample_data_init(&data, 0);
@@ -5077,9 +5647,6 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
5077 if (!is_sampling_event(event)) 5647 if (!is_sampling_event(event))
5078 return; 5648 return;
5079 5649
5080 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
5081 hwc->hrtimer.function = perf_swevent_hrtimer;
5082
5083 period = local64_read(&hwc->period_left); 5650 period = local64_read(&hwc->period_left);
5084 if (period) { 5651 if (period) {
5085 if (period < 0) 5652 if (period < 0)
@@ -5106,6 +5673,30 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
5106 } 5673 }
5107} 5674}
5108 5675
5676static void perf_swevent_init_hrtimer(struct perf_event *event)
5677{
5678 struct hw_perf_event *hwc = &event->hw;
5679
5680 if (!is_sampling_event(event))
5681 return;
5682
5683 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
5684 hwc->hrtimer.function = perf_swevent_hrtimer;
5685
5686 /*
5687 * Since hrtimers have a fixed rate, we can do a static freq->period
5688 * mapping and avoid the whole period adjust feedback stuff.
5689 */
5690 if (event->attr.freq) {
5691 long freq = event->attr.sample_freq;
5692
5693 event->attr.sample_period = NSEC_PER_SEC / freq;
5694 hwc->sample_period = event->attr.sample_period;
5695 local64_set(&hwc->period_left, hwc->sample_period);
5696 event->attr.freq = 0;
5697 }
5698}
5699
5109/* 5700/*
5110 * Software event: cpu wall time clock 5701 * Software event: cpu wall time clock
5111 */ 5702 */
@@ -5158,6 +5749,8 @@ static int cpu_clock_event_init(struct perf_event *event)
5158 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) 5749 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
5159 return -ENOENT; 5750 return -ENOENT;
5160 5751
5752 perf_swevent_init_hrtimer(event);
5753
5161 return 0; 5754 return 0;
5162} 5755}
5163 5756
@@ -5213,16 +5806,9 @@ static void task_clock_event_del(struct perf_event *event, int flags)
5213 5806
5214static void task_clock_event_read(struct perf_event *event) 5807static void task_clock_event_read(struct perf_event *event)
5215{ 5808{
5216 u64 time; 5809 u64 now = perf_clock();
5217 5810 u64 delta = now - event->ctx->timestamp;
5218 if (!in_nmi()) { 5811 u64 time = event->ctx->time + delta;
5219 update_context_time(event->ctx);
5220 time = event->ctx->time;
5221 } else {
5222 u64 now = perf_clock();
5223 u64 delta = now - event->ctx->timestamp;
5224 time = event->ctx->time + delta;
5225 }
5226 5812
5227 task_clock_event_update(event, time); 5813 task_clock_event_update(event, time);
5228} 5814}
@@ -5235,6 +5821,8 @@ static int task_clock_event_init(struct perf_event *event)
5235 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) 5821 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
5236 return -ENOENT; 5822 return -ENOENT;
5237 5823
5824 perf_swevent_init_hrtimer(event);
5825
5238 return 0; 5826 return 0;
5239} 5827}
5240 5828
@@ -5506,17 +6094,22 @@ struct pmu *perf_init_event(struct perf_event *event)
5506{ 6094{
5507 struct pmu *pmu = NULL; 6095 struct pmu *pmu = NULL;
5508 int idx; 6096 int idx;
6097 int ret;
5509 6098
5510 idx = srcu_read_lock(&pmus_srcu); 6099 idx = srcu_read_lock(&pmus_srcu);
5511 6100
5512 rcu_read_lock(); 6101 rcu_read_lock();
5513 pmu = idr_find(&pmu_idr, event->attr.type); 6102 pmu = idr_find(&pmu_idr, event->attr.type);
5514 rcu_read_unlock(); 6103 rcu_read_unlock();
5515 if (pmu) 6104 if (pmu) {
6105 ret = pmu->event_init(event);
6106 if (ret)
6107 pmu = ERR_PTR(ret);
5516 goto unlock; 6108 goto unlock;
6109 }
5517 6110
5518 list_for_each_entry_rcu(pmu, &pmus, entry) { 6111 list_for_each_entry_rcu(pmu, &pmus, entry) {
5519 int ret = pmu->event_init(event); 6112 ret = pmu->event_init(event);
5520 if (!ret) 6113 if (!ret)
5521 goto unlock; 6114 goto unlock;
5522 6115
@@ -5642,7 +6235,7 @@ done:
5642 6235
5643 if (!event->parent) { 6236 if (!event->parent) {
5644 if (event->attach_state & PERF_ATTACH_TASK) 6237 if (event->attach_state & PERF_ATTACH_TASK)
5645 jump_label_inc(&perf_task_events); 6238 jump_label_inc(&perf_sched_events);
5646 if (event->attr.mmap || event->attr.mmap_data) 6239 if (event->attr.mmap || event->attr.mmap_data)
5647 atomic_inc(&nr_mmap_events); 6240 atomic_inc(&nr_mmap_events);
5648 if (event->attr.comm) 6241 if (event->attr.comm)
@@ -5817,7 +6410,7 @@ SYSCALL_DEFINE5(perf_event_open,
5817 int err; 6410 int err;
5818 6411
5819 /* for future expandability... */ 6412 /* for future expandability... */
5820 if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) 6413 if (flags & ~PERF_FLAG_ALL)
5821 return -EINVAL; 6414 return -EINVAL;
5822 6415
5823 err = perf_copy_attr(attr_uptr, &attr); 6416 err = perf_copy_attr(attr_uptr, &attr);
@@ -5834,6 +6427,15 @@ SYSCALL_DEFINE5(perf_event_open,
5834 return -EINVAL; 6427 return -EINVAL;
5835 } 6428 }
5836 6429
6430 /*
6431 * In cgroup mode, the pid argument is used to pass the fd
6432 * opened to the cgroup directory in cgroupfs. The cpu argument
6433 * designates the cpu on which to monitor threads from that
6434 * cgroup.
6435 */
6436 if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1))
6437 return -EINVAL;
6438
5837 event_fd = get_unused_fd_flags(O_RDWR); 6439 event_fd = get_unused_fd_flags(O_RDWR);
5838 if (event_fd < 0) 6440 if (event_fd < 0)
5839 return event_fd; 6441 return event_fd;
@@ -5851,7 +6453,7 @@ SYSCALL_DEFINE5(perf_event_open,
5851 group_leader = NULL; 6453 group_leader = NULL;
5852 } 6454 }
5853 6455
5854 if (pid != -1) { 6456 if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) {
5855 task = find_lively_task_by_vpid(pid); 6457 task = find_lively_task_by_vpid(pid);
5856 if (IS_ERR(task)) { 6458 if (IS_ERR(task)) {
5857 err = PTR_ERR(task); 6459 err = PTR_ERR(task);
@@ -5865,6 +6467,19 @@ SYSCALL_DEFINE5(perf_event_open,
5865 goto err_task; 6467 goto err_task;
5866 } 6468 }
5867 6469
6470 if (flags & PERF_FLAG_PID_CGROUP) {
6471 err = perf_cgroup_connect(pid, event, &attr, group_leader);
6472 if (err)
6473 goto err_alloc;
6474 /*
6475 * one more event:
6476 * - that has cgroup constraint on event->cpu
6477 * - that may need work on context switch
6478 */
6479 atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
6480 jump_label_inc(&perf_sched_events);
6481 }
6482
5868 /* 6483 /*
5869 * Special case software events and allow them to be part of 6484 * Special case software events and allow them to be part of
5870 * any hardware group. 6485 * any hardware group.
@@ -5950,10 +6565,10 @@ SYSCALL_DEFINE5(perf_event_open,
5950 struct perf_event_context *gctx = group_leader->ctx; 6565 struct perf_event_context *gctx = group_leader->ctx;
5951 6566
5952 mutex_lock(&gctx->mutex); 6567 mutex_lock(&gctx->mutex);
5953 perf_event_remove_from_context(group_leader); 6568 perf_remove_from_context(group_leader);
5954 list_for_each_entry(sibling, &group_leader->sibling_list, 6569 list_for_each_entry(sibling, &group_leader->sibling_list,
5955 group_entry) { 6570 group_entry) {
5956 perf_event_remove_from_context(sibling); 6571 perf_remove_from_context(sibling);
5957 put_ctx(gctx); 6572 put_ctx(gctx);
5958 } 6573 }
5959 mutex_unlock(&gctx->mutex); 6574 mutex_unlock(&gctx->mutex);
@@ -5976,6 +6591,7 @@ SYSCALL_DEFINE5(perf_event_open,
5976 6591
5977 perf_install_in_context(ctx, event, cpu); 6592 perf_install_in_context(ctx, event, cpu);
5978 ++ctx->generation; 6593 ++ctx->generation;
6594 perf_unpin_context(ctx);
5979 mutex_unlock(&ctx->mutex); 6595 mutex_unlock(&ctx->mutex);
5980 6596
5981 event->owner = current; 6597 event->owner = current;
@@ -6001,6 +6617,7 @@ SYSCALL_DEFINE5(perf_event_open,
6001 return event_fd; 6617 return event_fd;
6002 6618
6003err_context: 6619err_context:
6620 perf_unpin_context(ctx);
6004 put_ctx(ctx); 6621 put_ctx(ctx);
6005err_alloc: 6622err_alloc:
6006 free_event(event); 6623 free_event(event);
@@ -6051,6 +6668,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
6051 mutex_lock(&ctx->mutex); 6668 mutex_lock(&ctx->mutex);
6052 perf_install_in_context(ctx, event, cpu); 6669 perf_install_in_context(ctx, event, cpu);
6053 ++ctx->generation; 6670 ++ctx->generation;
6671 perf_unpin_context(ctx);
6054 mutex_unlock(&ctx->mutex); 6672 mutex_unlock(&ctx->mutex);
6055 6673
6056 return event; 6674 return event;
@@ -6104,7 +6722,7 @@ __perf_event_exit_task(struct perf_event *child_event,
6104{ 6722{
6105 struct perf_event *parent_event; 6723 struct perf_event *parent_event;
6106 6724
6107 perf_event_remove_from_context(child_event); 6725 perf_remove_from_context(child_event);
6108 6726
6109 parent_event = child_event->parent; 6727 parent_event = child_event->parent;
6110 /* 6728 /*
@@ -6411,7 +7029,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
6411 return 0; 7029 return 0;
6412 } 7030 }
6413 7031
6414 child_ctx = child->perf_event_ctxp[ctxn]; 7032 child_ctx = child->perf_event_ctxp[ctxn];
6415 if (!child_ctx) { 7033 if (!child_ctx) {
6416 /* 7034 /*
6417 * This is executed from the parent task context, so 7035 * This is executed from the parent task context, so
@@ -6526,6 +7144,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6526 mutex_unlock(&parent_ctx->mutex); 7144 mutex_unlock(&parent_ctx->mutex);
6527 7145
6528 perf_unpin_context(parent_ctx); 7146 perf_unpin_context(parent_ctx);
7147 put_ctx(parent_ctx);
6529 7148
6530 return ret; 7149 return ret;
6531} 7150}
@@ -6595,9 +7214,9 @@ static void __perf_event_exit_context(void *__info)
6595 perf_pmu_rotate_stop(ctx->pmu); 7214 perf_pmu_rotate_stop(ctx->pmu);
6596 7215
6597 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) 7216 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
6598 __perf_event_remove_from_context(event); 7217 __perf_remove_from_context(event);
6599 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) 7218 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
6600 __perf_event_remove_from_context(event); 7219 __perf_remove_from_context(event);
6601} 7220}
6602 7221
6603static void perf_event_exit_cpu_context(int cpu) 7222static void perf_event_exit_cpu_context(int cpu)
@@ -6721,3 +7340,83 @@ unlock:
6721 return ret; 7340 return ret;
6722} 7341}
6723device_initcall(perf_event_sysfs_init); 7342device_initcall(perf_event_sysfs_init);
7343
7344#ifdef CONFIG_CGROUP_PERF
7345static struct cgroup_subsys_state *perf_cgroup_create(
7346 struct cgroup_subsys *ss, struct cgroup *cont)
7347{
7348 struct perf_cgroup *jc;
7349
7350 jc = kzalloc(sizeof(*jc), GFP_KERNEL);
7351 if (!jc)
7352 return ERR_PTR(-ENOMEM);
7353
7354 jc->info = alloc_percpu(struct perf_cgroup_info);
7355 if (!jc->info) {
7356 kfree(jc);
7357 return ERR_PTR(-ENOMEM);
7358 }
7359
7360 return &jc->css;
7361}
7362
7363static void perf_cgroup_destroy(struct cgroup_subsys *ss,
7364 struct cgroup *cont)
7365{
7366 struct perf_cgroup *jc;
7367 jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
7368 struct perf_cgroup, css);
7369 free_percpu(jc->info);
7370 kfree(jc);
7371}
7372
7373static int __perf_cgroup_move(void *info)
7374{
7375 struct task_struct *task = info;
7376 perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
7377 return 0;
7378}
7379
7380static void perf_cgroup_move(struct task_struct *task)
7381{
7382 task_function_call(task, __perf_cgroup_move, task);
7383}
7384
7385static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7386 struct cgroup *old_cgrp, struct task_struct *task,
7387 bool threadgroup)
7388{
7389 perf_cgroup_move(task);
7390 if (threadgroup) {
7391 struct task_struct *c;
7392 rcu_read_lock();
7393 list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
7394 perf_cgroup_move(c);
7395 }
7396 rcu_read_unlock();
7397 }
7398}
7399
7400static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
7401 struct cgroup *old_cgrp, struct task_struct *task)
7402{
7403 /*
7404 * cgroup_exit() is called in the copy_process() failure path.
7405 * Ignore this case since the task hasn't ran yet, this avoids
7406 * trying to poke a half freed task state from generic code.
7407 */
7408 if (!(task->flags & PF_EXITING))
7409 return;
7410
7411 perf_cgroup_move(task);
7412}
7413
7414struct cgroup_subsys perf_subsys = {
7415 .name = "perf_event",
7416 .subsys_id = perf_subsys_id,
7417 .create = perf_cgroup_create,
7418 .destroy = perf_cgroup_destroy,
7419 .exit = perf_cgroup_exit,
7420 .attach = perf_cgroup_attach,
7421};
7422#endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index aeaa7f846821..0da058bff8eb 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -103,11 +103,14 @@ static struct pm_qos_object *pm_qos_array[] = {
103 103
104static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, 104static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
105 size_t count, loff_t *f_pos); 105 size_t count, loff_t *f_pos);
106static ssize_t pm_qos_power_read(struct file *filp, char __user *buf,
107 size_t count, loff_t *f_pos);
106static int pm_qos_power_open(struct inode *inode, struct file *filp); 108static int pm_qos_power_open(struct inode *inode, struct file *filp);
107static int pm_qos_power_release(struct inode *inode, struct file *filp); 109static int pm_qos_power_release(struct inode *inode, struct file *filp);
108 110
109static const struct file_operations pm_qos_power_fops = { 111static const struct file_operations pm_qos_power_fops = {
110 .write = pm_qos_power_write, 112 .write = pm_qos_power_write,
113 .read = pm_qos_power_read,
111 .open = pm_qos_power_open, 114 .open = pm_qos_power_open,
112 .release = pm_qos_power_release, 115 .release = pm_qos_power_release,
113 .llseek = noop_llseek, 116 .llseek = noop_llseek,
@@ -376,6 +379,27 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp)
376} 379}
377 380
378 381
382static ssize_t pm_qos_power_read(struct file *filp, char __user *buf,
383 size_t count, loff_t *f_pos)
384{
385 s32 value;
386 unsigned long flags;
387 struct pm_qos_object *o;
388 struct pm_qos_request_list *pm_qos_req = filp->private_data;;
389
390 if (!pm_qos_req)
391 return -EINVAL;
392 if (!pm_qos_request_active(pm_qos_req))
393 return -EINVAL;
394
395 o = pm_qos_array[pm_qos_req->pm_qos_class];
396 spin_lock_irqsave(&pm_qos_lock, flags);
397 value = pm_qos_get_value(o);
398 spin_unlock_irqrestore(&pm_qos_lock, flags);
399
400 return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32));
401}
402
379static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, 403static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
380 size_t count, loff_t *f_pos) 404 size_t count, loff_t *f_pos)
381{ 405{
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 05bb7173850e..67fea9d25d55 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -176,7 +176,8 @@ static inline cputime_t virt_ticks(struct task_struct *p)
176 return p->utime; 176 return p->utime;
177} 177}
178 178
179int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) 179static int
180posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
180{ 181{
181 int error = check_clock(which_clock); 182 int error = check_clock(which_clock);
182 if (!error) { 183 if (!error) {
@@ -194,7 +195,8 @@ int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
194 return error; 195 return error;
195} 196}
196 197
197int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) 198static int
199posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
198{ 200{
199 /* 201 /*
200 * You can never reset a CPU clock, but we check for other errors 202 * You can never reset a CPU clock, but we check for other errors
@@ -317,7 +319,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
317} 319}
318 320
319 321
320int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) 322static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
321{ 323{
322 const pid_t pid = CPUCLOCK_PID(which_clock); 324 const pid_t pid = CPUCLOCK_PID(which_clock);
323 int error = -EINVAL; 325 int error = -EINVAL;
@@ -379,7 +381,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
379 * This is called from sys_timer_create() and do_cpu_nanosleep() with the 381 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
380 * new timer already all-zeros initialized. 382 * new timer already all-zeros initialized.
381 */ 383 */
382int posix_cpu_timer_create(struct k_itimer *new_timer) 384static int posix_cpu_timer_create(struct k_itimer *new_timer)
383{ 385{
384 int ret = 0; 386 int ret = 0;
385 const pid_t pid = CPUCLOCK_PID(new_timer->it_clock); 387 const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
@@ -425,7 +427,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
425 * If we return TIMER_RETRY, it's necessary to release the timer's lock 427 * If we return TIMER_RETRY, it's necessary to release the timer's lock
426 * and try again. (This happens when the timer is in the middle of firing.) 428 * and try again. (This happens when the timer is in the middle of firing.)
427 */ 429 */
428int posix_cpu_timer_del(struct k_itimer *timer) 430static int posix_cpu_timer_del(struct k_itimer *timer)
429{ 431{
430 struct task_struct *p = timer->it.cpu.task; 432 struct task_struct *p = timer->it.cpu.task;
431 int ret = 0; 433 int ret = 0;
@@ -665,8 +667,8 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
665 * If we return TIMER_RETRY, it's necessary to release the timer's lock 667 * If we return TIMER_RETRY, it's necessary to release the timer's lock
666 * and try again. (This happens when the timer is in the middle of firing.) 668 * and try again. (This happens when the timer is in the middle of firing.)
667 */ 669 */
668int posix_cpu_timer_set(struct k_itimer *timer, int flags, 670static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
669 struct itimerspec *new, struct itimerspec *old) 671 struct itimerspec *new, struct itimerspec *old)
670{ 672{
671 struct task_struct *p = timer->it.cpu.task; 673 struct task_struct *p = timer->it.cpu.task;
672 union cpu_time_count old_expires, new_expires, old_incr, val; 674 union cpu_time_count old_expires, new_expires, old_incr, val;
@@ -820,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
820 return ret; 822 return ret;
821} 823}
822 824
823void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) 825static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
824{ 826{
825 union cpu_time_count now; 827 union cpu_time_count now;
826 struct task_struct *p = timer->it.cpu.task; 828 struct task_struct *p = timer->it.cpu.task;
@@ -1481,11 +1483,13 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1481 return error; 1483 return error;
1482} 1484}
1483 1485
1484int posix_cpu_nsleep(const clockid_t which_clock, int flags, 1486static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1485 struct timespec *rqtp, struct timespec __user *rmtp) 1487
1488static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1489 struct timespec *rqtp, struct timespec __user *rmtp)
1486{ 1490{
1487 struct restart_block *restart_block = 1491 struct restart_block *restart_block =
1488 &current_thread_info()->restart_block; 1492 &current_thread_info()->restart_block;
1489 struct itimerspec it; 1493 struct itimerspec it;
1490 int error; 1494 int error;
1491 1495
@@ -1501,56 +1505,47 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1501 1505
1502 if (error == -ERESTART_RESTARTBLOCK) { 1506 if (error == -ERESTART_RESTARTBLOCK) {
1503 1507
1504 if (flags & TIMER_ABSTIME) 1508 if (flags & TIMER_ABSTIME)
1505 return -ERESTARTNOHAND; 1509 return -ERESTARTNOHAND;
1506 /* 1510 /*
1507 * Report back to the user the time still remaining. 1511 * Report back to the user the time still remaining.
1508 */ 1512 */
1509 if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) 1513 if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1510 return -EFAULT; 1514 return -EFAULT;
1511 1515
1512 restart_block->fn = posix_cpu_nsleep_restart; 1516 restart_block->fn = posix_cpu_nsleep_restart;
1513 restart_block->arg0 = which_clock; 1517 restart_block->nanosleep.index = which_clock;
1514 restart_block->arg1 = (unsigned long) rmtp; 1518 restart_block->nanosleep.rmtp = rmtp;
1515 restart_block->arg2 = rqtp->tv_sec; 1519 restart_block->nanosleep.expires = timespec_to_ns(rqtp);
1516 restart_block->arg3 = rqtp->tv_nsec;
1517 } 1520 }
1518 return error; 1521 return error;
1519} 1522}
1520 1523
1521long posix_cpu_nsleep_restart(struct restart_block *restart_block) 1524static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1522{ 1525{
1523 clockid_t which_clock = restart_block->arg0; 1526 clockid_t which_clock = restart_block->nanosleep.index;
1524 struct timespec __user *rmtp;
1525 struct timespec t; 1527 struct timespec t;
1526 struct itimerspec it; 1528 struct itimerspec it;
1527 int error; 1529 int error;
1528 1530
1529 rmtp = (struct timespec __user *) restart_block->arg1; 1531 t = ns_to_timespec(restart_block->nanosleep.expires);
1530 t.tv_sec = restart_block->arg2;
1531 t.tv_nsec = restart_block->arg3;
1532 1532
1533 restart_block->fn = do_no_restart_syscall;
1534 error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it); 1533 error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
1535 1534
1536 if (error == -ERESTART_RESTARTBLOCK) { 1535 if (error == -ERESTART_RESTARTBLOCK) {
1536 struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
1537 /* 1537 /*
1538 * Report back to the user the time still remaining. 1538 * Report back to the user the time still remaining.
1539 */ 1539 */
1540 if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) 1540 if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1541 return -EFAULT; 1541 return -EFAULT;
1542 1542
1543 restart_block->fn = posix_cpu_nsleep_restart; 1543 restart_block->nanosleep.expires = timespec_to_ns(&t);
1544 restart_block->arg0 = which_clock;
1545 restart_block->arg1 = (unsigned long) rmtp;
1546 restart_block->arg2 = t.tv_sec;
1547 restart_block->arg3 = t.tv_nsec;
1548 } 1544 }
1549 return error; 1545 return error;
1550 1546
1551} 1547}
1552 1548
1553
1554#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) 1549#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
1555#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) 1550#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
1556 1551
@@ -1594,38 +1589,37 @@ static int thread_cpu_timer_create(struct k_itimer *timer)
1594 timer->it_clock = THREAD_CLOCK; 1589 timer->it_clock = THREAD_CLOCK;
1595 return posix_cpu_timer_create(timer); 1590 return posix_cpu_timer_create(timer);
1596} 1591}
1597static int thread_cpu_nsleep(const clockid_t which_clock, int flags, 1592
1598 struct timespec *rqtp, struct timespec __user *rmtp) 1593struct k_clock clock_posix_cpu = {
1599{ 1594 .clock_getres = posix_cpu_clock_getres,
1600 return -EINVAL; 1595 .clock_set = posix_cpu_clock_set,
1601} 1596 .clock_get = posix_cpu_clock_get,
1602static long thread_cpu_nsleep_restart(struct restart_block *restart_block) 1597 .timer_create = posix_cpu_timer_create,
1603{ 1598 .nsleep = posix_cpu_nsleep,
1604 return -EINVAL; 1599 .nsleep_restart = posix_cpu_nsleep_restart,
1605} 1600 .timer_set = posix_cpu_timer_set,
1601 .timer_del = posix_cpu_timer_del,
1602 .timer_get = posix_cpu_timer_get,
1603};
1606 1604
1607static __init int init_posix_cpu_timers(void) 1605static __init int init_posix_cpu_timers(void)
1608{ 1606{
1609 struct k_clock process = { 1607 struct k_clock process = {
1610 .clock_getres = process_cpu_clock_getres, 1608 .clock_getres = process_cpu_clock_getres,
1611 .clock_get = process_cpu_clock_get, 1609 .clock_get = process_cpu_clock_get,
1612 .clock_set = do_posix_clock_nosettime, 1610 .timer_create = process_cpu_timer_create,
1613 .timer_create = process_cpu_timer_create, 1611 .nsleep = process_cpu_nsleep,
1614 .nsleep = process_cpu_nsleep, 1612 .nsleep_restart = process_cpu_nsleep_restart,
1615 .nsleep_restart = process_cpu_nsleep_restart,
1616 }; 1613 };
1617 struct k_clock thread = { 1614 struct k_clock thread = {
1618 .clock_getres = thread_cpu_clock_getres, 1615 .clock_getres = thread_cpu_clock_getres,
1619 .clock_get = thread_cpu_clock_get, 1616 .clock_get = thread_cpu_clock_get,
1620 .clock_set = do_posix_clock_nosettime, 1617 .timer_create = thread_cpu_timer_create,
1621 .timer_create = thread_cpu_timer_create,
1622 .nsleep = thread_cpu_nsleep,
1623 .nsleep_restart = thread_cpu_nsleep_restart,
1624 }; 1618 };
1625 struct timespec ts; 1619 struct timespec ts;
1626 1620
1627 register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); 1621 posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
1628 register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); 1622 posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
1629 1623
1630 cputime_to_timespec(cputime_one_jiffy, &ts); 1624 cputime_to_timespec(cputime_one_jiffy, &ts);
1631 onecputick = ts.tv_nsec; 1625 onecputick = ts.tv_nsec;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 93bd2eb2bc53..4c0124919f9a 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -41,6 +41,7 @@
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/compiler.h> 42#include <linux/compiler.h>
43#include <linux/idr.h> 43#include <linux/idr.h>
44#include <linux/posix-clock.h>
44#include <linux/posix-timers.h> 45#include <linux/posix-timers.h>
45#include <linux/syscalls.h> 46#include <linux/syscalls.h>
46#include <linux/wait.h> 47#include <linux/wait.h>
@@ -81,6 +82,14 @@ static DEFINE_SPINLOCK(idr_lock);
81#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" 82#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
82#endif 83#endif
83 84
85/*
86 * parisc wants ENOTSUP instead of EOPNOTSUPP
87 */
88#ifndef ENOTSUP
89# define ENANOSLEEP_NOTSUP EOPNOTSUPP
90#else
91# define ENANOSLEEP_NOTSUP ENOTSUP
92#endif
84 93
85/* 94/*
86 * The timer ID is turned into a timer address by idr_find(). 95 * The timer ID is turned into a timer address by idr_find().
@@ -94,11 +103,7 @@ static DEFINE_SPINLOCK(idr_lock);
94/* 103/*
95 * CLOCKs: The POSIX standard calls for a couple of clocks and allows us 104 * CLOCKs: The POSIX standard calls for a couple of clocks and allows us
96 * to implement others. This structure defines the various 105 * to implement others. This structure defines the various
97 * clocks and allows the possibility of adding others. We 106 * clocks.
98 * provide an interface to add clocks to the table and expect
99 * the "arch" code to add at least one clock that is high
100 * resolution. Here we define the standard CLOCK_REALTIME as a
101 * 1/HZ resolution clock.
102 * 107 *
103 * RESOLUTION: Clock resolution is used to round up timer and interval 108 * RESOLUTION: Clock resolution is used to round up timer and interval
104 * times, NOT to report clock times, which are reported with as 109 * times, NOT to report clock times, which are reported with as
@@ -108,20 +113,13 @@ static DEFINE_SPINLOCK(idr_lock);
108 * necessary code is written. The standard says we should say 113 * necessary code is written. The standard says we should say
109 * something about this issue in the documentation... 114 * something about this issue in the documentation...
110 * 115 *
111 * FUNCTIONS: The CLOCKs structure defines possible functions to handle 116 * FUNCTIONS: The CLOCKs structure defines possible functions to
112 * various clock functions. For clocks that use the standard 117 * handle various clock functions.
113 * system timer code these entries should be NULL. This will
114 * allow dispatch without the overhead of indirect function
115 * calls. CLOCKS that depend on other sources (e.g. WWV or GPS)
116 * must supply functions here, even if the function just returns
117 * ENOSYS. The standard POSIX timer management code assumes the
118 * following: 1.) The k_itimer struct (sched.h) is used for the
119 * timer. 2.) The list, it_lock, it_clock, it_id and it_pid
120 * fields are not modified by timer code.
121 * 118 *
122 * At this time all functions EXCEPT clock_nanosleep can be 119 * The standard POSIX timer management code assumes the
123 * redirected by the CLOCKS structure. Clock_nanosleep is in 120 * following: 1.) The k_itimer struct (sched.h) is used for
124 * there, but the code ignores it. 121 * the timer. 2.) The list, it_lock, it_clock, it_id and
122 * it_pid fields are not modified by timer code.
125 * 123 *
126 * Permissions: It is assumed that the clock_settime() function defined 124 * Permissions: It is assumed that the clock_settime() function defined
127 * for each clock will take care of permission checks. Some 125 * for each clock will take care of permission checks. Some
@@ -138,6 +136,7 @@ static struct k_clock posix_clocks[MAX_CLOCKS];
138 */ 136 */
139static int common_nsleep(const clockid_t, int flags, struct timespec *t, 137static int common_nsleep(const clockid_t, int flags, struct timespec *t,
140 struct timespec __user *rmtp); 138 struct timespec __user *rmtp);
139static int common_timer_create(struct k_itimer *new_timer);
141static void common_timer_get(struct k_itimer *, struct itimerspec *); 140static void common_timer_get(struct k_itimer *, struct itimerspec *);
142static int common_timer_set(struct k_itimer *, int, 141static int common_timer_set(struct k_itimer *, int,
143 struct itimerspec *, struct itimerspec *); 142 struct itimerspec *, struct itimerspec *);
@@ -158,76 +157,24 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
158 spin_unlock_irqrestore(&timr->it_lock, flags); 157 spin_unlock_irqrestore(&timr->it_lock, flags);
159} 158}
160 159
161/* 160/* Get clock_realtime */
162 * Call the k_clock hook function if non-null, or the default function. 161static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp)
163 */
164#define CLOCK_DISPATCH(clock, call, arglist) \
165 ((clock) < 0 ? posix_cpu_##call arglist : \
166 (posix_clocks[clock].call != NULL \
167 ? (*posix_clocks[clock].call) arglist : common_##call arglist))
168
169/*
170 * Default clock hook functions when the struct k_clock passed
171 * to register_posix_clock leaves a function pointer null.
172 *
173 * The function common_CALL is the default implementation for
174 * the function pointer CALL in struct k_clock.
175 */
176
177static inline int common_clock_getres(const clockid_t which_clock,
178 struct timespec *tp)
179{
180 tp->tv_sec = 0;
181 tp->tv_nsec = posix_clocks[which_clock].res;
182 return 0;
183}
184
185/*
186 * Get real time for posix timers
187 */
188static int common_clock_get(clockid_t which_clock, struct timespec *tp)
189{ 162{
190 ktime_get_real_ts(tp); 163 ktime_get_real_ts(tp);
191 return 0; 164 return 0;
192} 165}
193 166
194static inline int common_clock_set(const clockid_t which_clock, 167/* Set clock_realtime */
195 struct timespec *tp) 168static int posix_clock_realtime_set(const clockid_t which_clock,
169 const struct timespec *tp)
196{ 170{
197 return do_sys_settimeofday(tp, NULL); 171 return do_sys_settimeofday(tp, NULL);
198} 172}
199 173
200static int common_timer_create(struct k_itimer *new_timer) 174static int posix_clock_realtime_adj(const clockid_t which_clock,
201{ 175 struct timex *t)
202 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
203 return 0;
204}
205
206static int no_timer_create(struct k_itimer *new_timer)
207{
208 return -EOPNOTSUPP;
209}
210
211static int no_nsleep(const clockid_t which_clock, int flags,
212 struct timespec *tsave, struct timespec __user *rmtp)
213{
214 return -EOPNOTSUPP;
215}
216
217/*
218 * Return nonzero if we know a priori this clockid_t value is bogus.
219 */
220static inline int invalid_clockid(const clockid_t which_clock)
221{ 176{
222 if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */ 177 return do_adjtimex(t);
223 return 0;
224 if ((unsigned) which_clock >= MAX_CLOCKS)
225 return 1;
226 if (posix_clocks[which_clock].clock_getres != NULL)
227 return 0;
228 if (posix_clocks[which_clock].res != 0)
229 return 0;
230 return 1;
231} 178}
232 179
233/* 180/*
@@ -240,7 +187,7 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
240} 187}
241 188
242/* 189/*
243 * Get monotonic time for posix timers 190 * Get monotonic-raw time for posix timers
244 */ 191 */
245static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) 192static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
246{ 193{
@@ -267,46 +214,70 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp
267 *tp = ktime_to_timespec(KTIME_LOW_RES); 214 *tp = ktime_to_timespec(KTIME_LOW_RES);
268 return 0; 215 return 0;
269} 216}
217
218static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
219{
220 get_monotonic_boottime(tp);
221 return 0;
222}
223
224
270/* 225/*
271 * Initialize everything, well, just everything in Posix clocks/timers ;) 226 * Initialize everything, well, just everything in Posix clocks/timers ;)
272 */ 227 */
273static __init int init_posix_timers(void) 228static __init int init_posix_timers(void)
274{ 229{
275 struct k_clock clock_realtime = { 230 struct k_clock clock_realtime = {
276 .clock_getres = hrtimer_get_res, 231 .clock_getres = hrtimer_get_res,
232 .clock_get = posix_clock_realtime_get,
233 .clock_set = posix_clock_realtime_set,
234 .clock_adj = posix_clock_realtime_adj,
235 .nsleep = common_nsleep,
236 .nsleep_restart = hrtimer_nanosleep_restart,
237 .timer_create = common_timer_create,
238 .timer_set = common_timer_set,
239 .timer_get = common_timer_get,
240 .timer_del = common_timer_del,
277 }; 241 };
278 struct k_clock clock_monotonic = { 242 struct k_clock clock_monotonic = {
279 .clock_getres = hrtimer_get_res, 243 .clock_getres = hrtimer_get_res,
280 .clock_get = posix_ktime_get_ts, 244 .clock_get = posix_ktime_get_ts,
281 .clock_set = do_posix_clock_nosettime, 245 .nsleep = common_nsleep,
246 .nsleep_restart = hrtimer_nanosleep_restart,
247 .timer_create = common_timer_create,
248 .timer_set = common_timer_set,
249 .timer_get = common_timer_get,
250 .timer_del = common_timer_del,
282 }; 251 };
283 struct k_clock clock_monotonic_raw = { 252 struct k_clock clock_monotonic_raw = {
284 .clock_getres = hrtimer_get_res, 253 .clock_getres = hrtimer_get_res,
285 .clock_get = posix_get_monotonic_raw, 254 .clock_get = posix_get_monotonic_raw,
286 .clock_set = do_posix_clock_nosettime,
287 .timer_create = no_timer_create,
288 .nsleep = no_nsleep,
289 }; 255 };
290 struct k_clock clock_realtime_coarse = { 256 struct k_clock clock_realtime_coarse = {
291 .clock_getres = posix_get_coarse_res, 257 .clock_getres = posix_get_coarse_res,
292 .clock_get = posix_get_realtime_coarse, 258 .clock_get = posix_get_realtime_coarse,
293 .clock_set = do_posix_clock_nosettime,
294 .timer_create = no_timer_create,
295 .nsleep = no_nsleep,
296 }; 259 };
297 struct k_clock clock_monotonic_coarse = { 260 struct k_clock clock_monotonic_coarse = {
298 .clock_getres = posix_get_coarse_res, 261 .clock_getres = posix_get_coarse_res,
299 .clock_get = posix_get_monotonic_coarse, 262 .clock_get = posix_get_monotonic_coarse,
300 .clock_set = do_posix_clock_nosettime, 263 };
301 .timer_create = no_timer_create, 264 struct k_clock clock_boottime = {
302 .nsleep = no_nsleep, 265 .clock_getres = hrtimer_get_res,
266 .clock_get = posix_get_boottime,
267 .nsleep = common_nsleep,
268 .nsleep_restart = hrtimer_nanosleep_restart,
269 .timer_create = common_timer_create,
270 .timer_set = common_timer_set,
271 .timer_get = common_timer_get,
272 .timer_del = common_timer_del,
303 }; 273 };
304 274
305 register_posix_clock(CLOCK_REALTIME, &clock_realtime); 275 posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
306 register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); 276 posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
307 register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); 277 posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
308 register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); 278 posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
309 register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); 279 posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
280 posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
310 281
311 posix_timers_cache = kmem_cache_create("posix_timers_cache", 282 posix_timers_cache = kmem_cache_create("posix_timers_cache",
312 sizeof (struct k_itimer), 0, SLAB_PANIC, 283 sizeof (struct k_itimer), 0, SLAB_PANIC,
@@ -482,17 +453,29 @@ static struct pid *good_sigevent(sigevent_t * event)
482 return task_pid(rtn); 453 return task_pid(rtn);
483} 454}
484 455
485void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock) 456void posix_timers_register_clock(const clockid_t clock_id,
457 struct k_clock *new_clock)
486{ 458{
487 if ((unsigned) clock_id >= MAX_CLOCKS) { 459 if ((unsigned) clock_id >= MAX_CLOCKS) {
488 printk("POSIX clock register failed for clock_id %d\n", 460 printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
461 clock_id);
462 return;
463 }
464
465 if (!new_clock->clock_get) {
466 printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
467 clock_id);
468 return;
469 }
470 if (!new_clock->clock_getres) {
471 printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
489 clock_id); 472 clock_id);
490 return; 473 return;
491 } 474 }
492 475
493 posix_clocks[clock_id] = *new_clock; 476 posix_clocks[clock_id] = *new_clock;
494} 477}
495EXPORT_SYMBOL_GPL(register_posix_clock); 478EXPORT_SYMBOL_GPL(posix_timers_register_clock);
496 479
497static struct k_itimer * alloc_posix_timer(void) 480static struct k_itimer * alloc_posix_timer(void)
498{ 481{
@@ -523,19 +506,39 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
523 kmem_cache_free(posix_timers_cache, tmr); 506 kmem_cache_free(posix_timers_cache, tmr);
524} 507}
525 508
509static struct k_clock *clockid_to_kclock(const clockid_t id)
510{
511 if (id < 0)
512 return (id & CLOCKFD_MASK) == CLOCKFD ?
513 &clock_posix_dynamic : &clock_posix_cpu;
514
515 if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres)
516 return NULL;
517 return &posix_clocks[id];
518}
519
520static int common_timer_create(struct k_itimer *new_timer)
521{
522 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
523 return 0;
524}
525
526/* Create a POSIX.1b interval timer. */ 526/* Create a POSIX.1b interval timer. */
527 527
528SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, 528SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
529 struct sigevent __user *, timer_event_spec, 529 struct sigevent __user *, timer_event_spec,
530 timer_t __user *, created_timer_id) 530 timer_t __user *, created_timer_id)
531{ 531{
532 struct k_clock *kc = clockid_to_kclock(which_clock);
532 struct k_itimer *new_timer; 533 struct k_itimer *new_timer;
533 int error, new_timer_id; 534 int error, new_timer_id;
534 sigevent_t event; 535 sigevent_t event;
535 int it_id_set = IT_ID_NOT_SET; 536 int it_id_set = IT_ID_NOT_SET;
536 537
537 if (invalid_clockid(which_clock)) 538 if (!kc)
538 return -EINVAL; 539 return -EINVAL;
540 if (!kc->timer_create)
541 return -EOPNOTSUPP;
539 542
540 new_timer = alloc_posix_timer(); 543 new_timer = alloc_posix_timer();
541 if (unlikely(!new_timer)) 544 if (unlikely(!new_timer))
@@ -597,7 +600,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
597 goto out; 600 goto out;
598 } 601 }
599 602
600 error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer)); 603 error = kc->timer_create(new_timer);
601 if (error) 604 if (error)
602 goto out; 605 goto out;
603 606
@@ -607,7 +610,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
607 spin_unlock_irq(&current->sighand->siglock); 610 spin_unlock_irq(&current->sighand->siglock);
608 611
609 return 0; 612 return 0;
610 /* 613 /*
611 * In the case of the timer belonging to another task, after 614 * In the case of the timer belonging to another task, after
612 * the task is unlocked, the timer is owned by the other task 615 * the task is unlocked, the timer is owned by the other task
613 * and may cease to exist at any time. Don't use or modify 616 * and may cease to exist at any time. Don't use or modify
@@ -709,22 +712,28 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
709SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, 712SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
710 struct itimerspec __user *, setting) 713 struct itimerspec __user *, setting)
711{ 714{
712 struct k_itimer *timr;
713 struct itimerspec cur_setting; 715 struct itimerspec cur_setting;
716 struct k_itimer *timr;
717 struct k_clock *kc;
714 unsigned long flags; 718 unsigned long flags;
719 int ret = 0;
715 720
716 timr = lock_timer(timer_id, &flags); 721 timr = lock_timer(timer_id, &flags);
717 if (!timr) 722 if (!timr)
718 return -EINVAL; 723 return -EINVAL;
719 724
720 CLOCK_DISPATCH(timr->it_clock, timer_get, (timr, &cur_setting)); 725 kc = clockid_to_kclock(timr->it_clock);
726 if (WARN_ON_ONCE(!kc || !kc->timer_get))
727 ret = -EINVAL;
728 else
729 kc->timer_get(timr, &cur_setting);
721 730
722 unlock_timer(timr, flags); 731 unlock_timer(timr, flags);
723 732
724 if (copy_to_user(setting, &cur_setting, sizeof (cur_setting))) 733 if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
725 return -EFAULT; 734 return -EFAULT;
726 735
727 return 0; 736 return ret;
728} 737}
729 738
730/* 739/*
@@ -813,6 +822,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
813 int error = 0; 822 int error = 0;
814 unsigned long flag; 823 unsigned long flag;
815 struct itimerspec *rtn = old_setting ? &old_spec : NULL; 824 struct itimerspec *rtn = old_setting ? &old_spec : NULL;
825 struct k_clock *kc;
816 826
817 if (!new_setting) 827 if (!new_setting)
818 return -EINVAL; 828 return -EINVAL;
@@ -828,8 +838,11 @@ retry:
828 if (!timr) 838 if (!timr)
829 return -EINVAL; 839 return -EINVAL;
830 840
831 error = CLOCK_DISPATCH(timr->it_clock, timer_set, 841 kc = clockid_to_kclock(timr->it_clock);
832 (timr, flags, &new_spec, rtn)); 842 if (WARN_ON_ONCE(!kc || !kc->timer_set))
843 error = -EINVAL;
844 else
845 error = kc->timer_set(timr, flags, &new_spec, rtn);
833 846
834 unlock_timer(timr, flag); 847 unlock_timer(timr, flag);
835 if (error == TIMER_RETRY) { 848 if (error == TIMER_RETRY) {
@@ -844,7 +857,7 @@ retry:
844 return error; 857 return error;
845} 858}
846 859
847static inline int common_timer_del(struct k_itimer *timer) 860static int common_timer_del(struct k_itimer *timer)
848{ 861{
849 timer->it.real.interval.tv64 = 0; 862 timer->it.real.interval.tv64 = 0;
850 863
@@ -855,7 +868,11 @@ static inline int common_timer_del(struct k_itimer *timer)
855 868
856static inline int timer_delete_hook(struct k_itimer *timer) 869static inline int timer_delete_hook(struct k_itimer *timer)
857{ 870{
858 return CLOCK_DISPATCH(timer->it_clock, timer_del, (timer)); 871 struct k_clock *kc = clockid_to_kclock(timer->it_clock);
872
873 if (WARN_ON_ONCE(!kc || !kc->timer_del))
874 return -EINVAL;
875 return kc->timer_del(timer);
859} 876}
860 877
861/* Delete a POSIX.1b interval timer. */ 878/* Delete a POSIX.1b interval timer. */
@@ -927,69 +944,76 @@ void exit_itimers(struct signal_struct *sig)
927 } 944 }
928} 945}
929 946
930/* Not available / possible... functions */
931int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
932{
933 return -EINVAL;
934}
935EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
936
937int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
938 struct timespec *t, struct timespec __user *r)
939{
940#ifndef ENOTSUP
941 return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */
942#else /* parisc does define it separately. */
943 return -ENOTSUP;
944#endif
945}
946EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep);
947
948SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, 947SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
949 const struct timespec __user *, tp) 948 const struct timespec __user *, tp)
950{ 949{
950 struct k_clock *kc = clockid_to_kclock(which_clock);
951 struct timespec new_tp; 951 struct timespec new_tp;
952 952
953 if (invalid_clockid(which_clock)) 953 if (!kc || !kc->clock_set)
954 return -EINVAL; 954 return -EINVAL;
955
955 if (copy_from_user(&new_tp, tp, sizeof (*tp))) 956 if (copy_from_user(&new_tp, tp, sizeof (*tp)))
956 return -EFAULT; 957 return -EFAULT;
957 958
958 return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp)); 959 return kc->clock_set(which_clock, &new_tp);
959} 960}
960 961
961SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, 962SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
962 struct timespec __user *,tp) 963 struct timespec __user *,tp)
963{ 964{
965 struct k_clock *kc = clockid_to_kclock(which_clock);
964 struct timespec kernel_tp; 966 struct timespec kernel_tp;
965 int error; 967 int error;
966 968
967 if (invalid_clockid(which_clock)) 969 if (!kc)
968 return -EINVAL; 970 return -EINVAL;
969 error = CLOCK_DISPATCH(which_clock, clock_get, 971
970 (which_clock, &kernel_tp)); 972 error = kc->clock_get(which_clock, &kernel_tp);
973
971 if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) 974 if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
972 error = -EFAULT; 975 error = -EFAULT;
973 976
974 return error; 977 return error;
978}
979
980SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
981 struct timex __user *, utx)
982{
983 struct k_clock *kc = clockid_to_kclock(which_clock);
984 struct timex ktx;
985 int err;
986
987 if (!kc)
988 return -EINVAL;
989 if (!kc->clock_adj)
990 return -EOPNOTSUPP;
991
992 if (copy_from_user(&ktx, utx, sizeof(ktx)))
993 return -EFAULT;
994
995 err = kc->clock_adj(which_clock, &ktx);
996
997 if (!err && copy_to_user(utx, &ktx, sizeof(ktx)))
998 return -EFAULT;
975 999
1000 return err;
976} 1001}
977 1002
978SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, 1003SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
979 struct timespec __user *, tp) 1004 struct timespec __user *, tp)
980{ 1005{
1006 struct k_clock *kc = clockid_to_kclock(which_clock);
981 struct timespec rtn_tp; 1007 struct timespec rtn_tp;
982 int error; 1008 int error;
983 1009
984 if (invalid_clockid(which_clock)) 1010 if (!kc)
985 return -EINVAL; 1011 return -EINVAL;
986 1012
987 error = CLOCK_DISPATCH(which_clock, clock_getres, 1013 error = kc->clock_getres(which_clock, &rtn_tp);
988 (which_clock, &rtn_tp));
989 1014
990 if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) { 1015 if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
991 error = -EFAULT; 1016 error = -EFAULT;
992 }
993 1017
994 return error; 1018 return error;
995} 1019}
@@ -1009,10 +1033,13 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
1009 const struct timespec __user *, rqtp, 1033 const struct timespec __user *, rqtp,
1010 struct timespec __user *, rmtp) 1034 struct timespec __user *, rmtp)
1011{ 1035{
1036 struct k_clock *kc = clockid_to_kclock(which_clock);
1012 struct timespec t; 1037 struct timespec t;
1013 1038
1014 if (invalid_clockid(which_clock)) 1039 if (!kc)
1015 return -EINVAL; 1040 return -EINVAL;
1041 if (!kc->nsleep)
1042 return -ENANOSLEEP_NOTSUP;
1016 1043
1017 if (copy_from_user(&t, rqtp, sizeof (struct timespec))) 1044 if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
1018 return -EFAULT; 1045 return -EFAULT;
@@ -1020,27 +1047,20 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
1020 if (!timespec_valid(&t)) 1047 if (!timespec_valid(&t))
1021 return -EINVAL; 1048 return -EINVAL;
1022 1049
1023 return CLOCK_DISPATCH(which_clock, nsleep, 1050 return kc->nsleep(which_clock, flags, &t, rmtp);
1024 (which_clock, flags, &t, rmtp));
1025}
1026
1027/*
1028 * nanosleep_restart for monotonic and realtime clocks
1029 */
1030static int common_nsleep_restart(struct restart_block *restart_block)
1031{
1032 return hrtimer_nanosleep_restart(restart_block);
1033} 1051}
1034 1052
1035/* 1053/*
1036 * This will restart clock_nanosleep. This is required only by 1054 * This will restart clock_nanosleep. This is required only by
1037 * compat_clock_nanosleep_restart for now. 1055 * compat_clock_nanosleep_restart for now.
1038 */ 1056 */
1039long 1057long clock_nanosleep_restart(struct restart_block *restart_block)
1040clock_nanosleep_restart(struct restart_block *restart_block)
1041{ 1058{
1042 clockid_t which_clock = restart_block->arg0; 1059 clockid_t which_clock = restart_block->nanosleep.index;
1060 struct k_clock *kc = clockid_to_kclock(which_clock);
1061
1062 if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
1063 return -EINVAL;
1043 1064
1044 return CLOCK_DISPATCH(which_clock, nsleep_restart, 1065 return kc->nsleep_restart(restart_block);
1045 (restart_block));
1046} 1066}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 265729966ece..4603f08dc47b 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -1,125 +1,12 @@
1config PM
2 bool "Power Management support"
3 depends on !IA64_HP_SIM
4 ---help---
5 "Power Management" means that parts of your computer are shut
6 off or put into a power conserving "sleep" mode if they are not
7 being used. There are two competing standards for doing this: APM
8 and ACPI. If you want to use either one, say Y here and then also
9 to the requisite support below.
10
11 Power Management is most important for battery powered laptop
12 computers; if you have a laptop, check out the Linux Laptop home
13 page on the WWW at <http://www.linux-on-laptops.com/> or
14 Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/>
15 and the Battery Powered Linux mini-HOWTO, available from
16 <http://www.tldp.org/docs.html#howto>.
17
18 Note that, even if you say N here, Linux on the x86 architecture
19 will issue the hlt instruction if nothing is to be done, thereby
20 sending the processor to sleep and saving power.
21
22config PM_DEBUG
23 bool "Power Management Debug Support"
24 depends on PM
25 ---help---
26 This option enables various debugging support in the Power Management
27 code. This is helpful when debugging and reporting PM bugs, like
28 suspend support.
29
30config PM_ADVANCED_DEBUG
31 bool "Extra PM attributes in sysfs for low-level debugging/testing"
32 depends on PM_DEBUG
33 default n
34 ---help---
35 Add extra sysfs attributes allowing one to access some Power Management
36 fields of device objects from user space. If you are not a kernel
37 developer interested in debugging/testing Power Management, say "no".
38
39config PM_VERBOSE
40 bool "Verbose Power Management debugging"
41 depends on PM_DEBUG
42 default n
43 ---help---
44 This option enables verbose messages from the Power Management code.
45
46config CAN_PM_TRACE
47 def_bool y
48 depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL
49
50config PM_TRACE
51 bool
52 help
53 This enables code to save the last PM event point across
54 reboot. The architecture needs to support this, x86 for
55 example does by saving things in the RTC, see below.
56
57 The architecture specific code must provide the extern
58 functions from <linux/resume-trace.h> as well as the
59 <asm/resume-trace.h> header with a TRACE_RESUME() macro.
60
61 The way the information is presented is architecture-
62 dependent, x86 will print the information during a
63 late_initcall.
64
65config PM_TRACE_RTC
66 bool "Suspend/resume event tracing"
67 depends on CAN_PM_TRACE
68 depends on X86
69 select PM_TRACE
70 default n
71 ---help---
72 This enables some cheesy code to save the last PM event point in the
73 RTC across reboots, so that you can debug a machine that just hangs
74 during suspend (or more commonly, during resume).
75
76 To use this debugging feature you should attempt to suspend the
77 machine, reboot it and then run
78
79 dmesg -s 1000000 | grep 'hash matches'
80
81 CAUTION: this option will cause your machine's real-time clock to be
82 set to an invalid time after a resume.
83
84config PM_SLEEP_SMP
85 bool
86 depends on SMP
87 depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
88 depends on PM_SLEEP
89 select HOTPLUG
90 select HOTPLUG_CPU
91 default y
92
93config PM_SLEEP
94 bool
95 depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
96 default y
97
98config PM_SLEEP_ADVANCED_DEBUG
99 bool
100 depends on PM_ADVANCED_DEBUG
101 default n
102
103config SUSPEND 1config SUSPEND
104 bool "Suspend to RAM and standby" 2 bool "Suspend to RAM and standby"
105 depends on PM && ARCH_SUSPEND_POSSIBLE 3 depends on ARCH_SUSPEND_POSSIBLE
106 default y 4 default y
107 ---help--- 5 ---help---
108 Allow the system to enter sleep states in which main memory is 6 Allow the system to enter sleep states in which main memory is
109 powered and thus its contents are preserved, such as the 7 powered and thus its contents are preserved, such as the
110 suspend-to-RAM state (e.g. the ACPI S3 state). 8 suspend-to-RAM state (e.g. the ACPI S3 state).
111 9
112config PM_TEST_SUSPEND
113 bool "Test suspend/resume and wakealarm during bootup"
114 depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
115 ---help---
116 This option will let you suspend your machine during bootup, and
117 make it wake up a few seconds later using an RTC wakeup alarm.
118 Enable this with a kernel parameter like "test_suspend=mem".
119
120 You probably want to have your system's RTC driver statically
121 linked, ensuring that it's available when this test runs.
122
123config SUSPEND_FREEZER 10config SUSPEND_FREEZER
124 bool "Enable freezer for suspend to RAM/standby" \ 11 bool "Enable freezer for suspend to RAM/standby" \
125 if ARCH_WANTS_FREEZER_CONTROL || BROKEN 12 if ARCH_WANTS_FREEZER_CONTROL || BROKEN
@@ -133,7 +20,7 @@ config SUSPEND_FREEZER
133 20
134config HIBERNATION 21config HIBERNATION
135 bool "Hibernation (aka 'suspend to disk')" 22 bool "Hibernation (aka 'suspend to disk')"
136 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE 23 depends on SWAP && ARCH_HIBERNATION_POSSIBLE
137 select LZO_COMPRESS 24 select LZO_COMPRESS
138 select LZO_DECOMPRESS 25 select LZO_DECOMPRESS
139 ---help--- 26 ---help---
@@ -196,6 +83,106 @@ config PM_STD_PARTITION
196 suspended image to. It will simply pick the first available swap 83 suspended image to. It will simply pick the first available swap
197 device. 84 device.
198 85
86config PM_SLEEP
87 def_bool y
88 depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
89
90config PM_SLEEP_SMP
91 def_bool y
92 depends on SMP
93 depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
94 depends on PM_SLEEP
95 select HOTPLUG
96 select HOTPLUG_CPU
97
98config PM_RUNTIME
99 bool "Run-time PM core functionality"
100 depends on !IA64_HP_SIM
101 ---help---
102 Enable functionality allowing I/O devices to be put into energy-saving
103 (low power) states at run time (or autosuspended) after a specified
104 period of inactivity and woken up in response to a hardware-generated
105 wake-up event or a driver's request.
106
107 Hardware support is generally required for this functionality to work
108 and the bus type drivers of the buses the devices are on are
109 responsible for the actual handling of the autosuspend requests and
110 wake-up events.
111
112config PM
113 def_bool y
114 depends on PM_SLEEP || PM_RUNTIME
115
116config PM_DEBUG
117 bool "Power Management Debug Support"
118 depends on PM
119 ---help---
120 This option enables various debugging support in the Power Management
121 code. This is helpful when debugging and reporting PM bugs, like
122 suspend support.
123
124config PM_VERBOSE
125 bool "Verbose Power Management debugging"
126 depends on PM_DEBUG
127 ---help---
128 This option enables verbose messages from the Power Management code.
129
130config PM_ADVANCED_DEBUG
131 bool "Extra PM attributes in sysfs for low-level debugging/testing"
132 depends on PM_DEBUG
133 ---help---
134 Add extra sysfs attributes allowing one to access some Power Management
135 fields of device objects from user space. If you are not a kernel
136 developer interested in debugging/testing Power Management, say "no".
137
138config PM_TEST_SUSPEND
139 bool "Test suspend/resume and wakealarm during bootup"
140 depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
141 ---help---
142 This option will let you suspend your machine during bootup, and
143 make it wake up a few seconds later using an RTC wakeup alarm.
144 Enable this with a kernel parameter like "test_suspend=mem".
145
146 You probably want to have your system's RTC driver statically
147 linked, ensuring that it's available when this test runs.
148
149config CAN_PM_TRACE
150 def_bool y
151 depends on PM_DEBUG && PM_SLEEP
152
153config PM_TRACE
154 bool
155 help
156 This enables code to save the last PM event point across
157 reboot. The architecture needs to support this, x86 for
158 example does by saving things in the RTC, see below.
159
160 The architecture specific code must provide the extern
161 functions from <linux/resume-trace.h> as well as the
162 <asm/resume-trace.h> header with a TRACE_RESUME() macro.
163
164 The way the information is presented is architecture-
165 dependent, x86 will print the information during a
166 late_initcall.
167
168config PM_TRACE_RTC
169 bool "Suspend/resume event tracing"
170 depends on CAN_PM_TRACE
171 depends on X86
172 select PM_TRACE
173 ---help---
174 This enables some cheesy code to save the last PM event point in the
175 RTC across reboots, so that you can debug a machine that just hangs
176 during suspend (or more commonly, during resume).
177
178 To use this debugging feature you should attempt to suspend the
179 machine, reboot it and then run
180
181 dmesg -s 1000000 | grep 'hash matches'
182
183 CAUTION: this option will cause your machine's real-time clock to be
184 set to an invalid time after a resume.
185
199config APM_EMULATION 186config APM_EMULATION
200 tristate "Advanced Power Management Emulation" 187 tristate "Advanced Power Management Emulation"
201 depends on PM && SYS_SUPPORTS_APM_EMULATION 188 depends on PM && SYS_SUPPORTS_APM_EMULATION
@@ -222,31 +209,11 @@ config APM_EMULATION
222 anything, try disabling/enabling this option (or disabling/enabling 209 anything, try disabling/enabling this option (or disabling/enabling
223 APM in your BIOS). 210 APM in your BIOS).
224 211
225config PM_RUNTIME
226 bool "Run-time PM core functionality"
227 depends on PM
228 ---help---
229 Enable functionality allowing I/O devices to be put into energy-saving
230 (low power) states at run time (or autosuspended) after a specified
231 period of inactivity and woken up in response to a hardware-generated
232 wake-up event or a driver's request.
233
234 Hardware support is generally required for this functionality to work
235 and the bus type drivers of the buses the devices are on are
236 responsible for the actual handling of the autosuspend requests and
237 wake-up events.
238
239config PM_OPS
240 bool
241 depends on PM_SLEEP || PM_RUNTIME
242 default y
243
244config ARCH_HAS_OPP 212config ARCH_HAS_OPP
245 bool 213 bool
246 214
247config PM_OPP 215config PM_OPP
248 bool "Operating Performance Point (OPP) Layer library" 216 bool "Operating Performance Point (OPP) Layer library"
249 depends on PM
250 depends on ARCH_HAS_OPP 217 depends on ARCH_HAS_OPP
251 ---help--- 218 ---help---
252 SOCs have a standard set of tuples consisting of frequency and 219 SOCs have a standard set of tuples consisting of frequency and
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 1832bd264219..aeabd26e3342 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -23,6 +23,7 @@
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/gfp.h> 25#include <linux/gfp.h>
26#include <linux/syscore_ops.h>
26#include <scsi/scsi_scan.h> 27#include <scsi/scsi_scan.h>
27#include <asm/suspend.h> 28#include <asm/suspend.h>
28 29
@@ -272,6 +273,8 @@ static int create_image(int platform_mode)
272 local_irq_disable(); 273 local_irq_disable();
273 274
274 error = sysdev_suspend(PMSG_FREEZE); 275 error = sysdev_suspend(PMSG_FREEZE);
276 if (!error)
277 error = syscore_suspend();
275 if (error) { 278 if (error) {
276 printk(KERN_ERR "PM: Some system devices failed to power down, " 279 printk(KERN_ERR "PM: Some system devices failed to power down, "
277 "aborting hibernation\n"); 280 "aborting hibernation\n");
@@ -295,6 +298,7 @@ static int create_image(int platform_mode)
295 } 298 }
296 299
297 Power_up: 300 Power_up:
301 syscore_resume();
298 sysdev_resume(); 302 sysdev_resume();
299 /* NOTE: dpm_resume_noirq() is just a resume() for devices 303 /* NOTE: dpm_resume_noirq() is just a resume() for devices
300 * that suspended with irqs off ... no overall powerup. 304 * that suspended with irqs off ... no overall powerup.
@@ -403,6 +407,8 @@ static int resume_target_kernel(bool platform_mode)
403 local_irq_disable(); 407 local_irq_disable();
404 408
405 error = sysdev_suspend(PMSG_QUIESCE); 409 error = sysdev_suspend(PMSG_QUIESCE);
410 if (!error)
411 error = syscore_suspend();
406 if (error) 412 if (error)
407 goto Enable_irqs; 413 goto Enable_irqs;
408 414
@@ -429,6 +435,7 @@ static int resume_target_kernel(bool platform_mode)
429 restore_processor_state(); 435 restore_processor_state();
430 touch_softlockup_watchdog(); 436 touch_softlockup_watchdog();
431 437
438 syscore_resume();
432 sysdev_resume(); 439 sysdev_resume();
433 440
434 Enable_irqs: 441 Enable_irqs:
@@ -516,6 +523,7 @@ int hibernation_platform_enter(void)
516 523
517 local_irq_disable(); 524 local_irq_disable();
518 sysdev_suspend(PMSG_HIBERNATE); 525 sysdev_suspend(PMSG_HIBERNATE);
526 syscore_suspend();
519 if (pm_wakeup_pending()) { 527 if (pm_wakeup_pending()) {
520 error = -EAGAIN; 528 error = -EAGAIN;
521 goto Power_up; 529 goto Power_up;
@@ -526,6 +534,7 @@ int hibernation_platform_enter(void)
526 while (1); 534 while (1);
527 535
528 Power_up: 536 Power_up:
537 syscore_resume();
529 sysdev_resume(); 538 sysdev_resume();
530 local_irq_enable(); 539 local_irq_enable();
531 enable_nonboot_cpus(); 540 enable_nonboot_cpus();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 7b5db6a8561e..8eaba5f27b10 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -17,9 +17,6 @@
17 17
18DEFINE_MUTEX(pm_mutex); 18DEFINE_MUTEX(pm_mutex);
19 19
20unsigned int pm_flags;
21EXPORT_SYMBOL(pm_flags);
22
23#ifdef CONFIG_PM_SLEEP 20#ifdef CONFIG_PM_SLEEP
24 21
25/* Routines for PM-transition notifications */ 22/* Routines for PM-transition notifications */
@@ -326,7 +323,7 @@ EXPORT_SYMBOL_GPL(pm_wq);
326 323
327static int __init pm_start_workqueue(void) 324static int __init pm_start_workqueue(void)
328{ 325{
329 pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0); 326 pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0);
330 327
331 return pm_wq ? 0 : -ENOMEM; 328 return pm_wq ? 0 : -ENOMEM;
332} 329}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index d6d2a10320e0..0cf3a27a6c9d 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -22,7 +22,7 @@
22 */ 22 */
23#define TIMEOUT (20 * HZ) 23#define TIMEOUT (20 * HZ)
24 24
25static inline int freezeable(struct task_struct * p) 25static inline int freezable(struct task_struct * p)
26{ 26{
27 if ((p == current) || 27 if ((p == current) ||
28 (p->flags & PF_NOFREEZE) || 28 (p->flags & PF_NOFREEZE) ||
@@ -53,7 +53,7 @@ static int try_to_freeze_tasks(bool sig_only)
53 todo = 0; 53 todo = 0;
54 read_lock(&tasklist_lock); 54 read_lock(&tasklist_lock);
55 do_each_thread(g, p) { 55 do_each_thread(g, p) {
56 if (frozen(p) || !freezeable(p)) 56 if (frozen(p) || !freezable(p))
57 continue; 57 continue;
58 58
59 if (!freeze_task(p, sig_only)) 59 if (!freeze_task(p, sig_only))
@@ -167,7 +167,7 @@ static void thaw_tasks(bool nosig_only)
167 167
168 read_lock(&tasklist_lock); 168 read_lock(&tasklist_lock);
169 do_each_thread(g, p) { 169 do_each_thread(g, p) {
170 if (!freezeable(p)) 170 if (!freezable(p))
171 continue; 171 continue;
172 172
173 if (nosig_only && should_send_signal(p)) 173 if (nosig_only && should_send_signal(p))
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 0dac75ea4456..ca0aacc24874 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -42,15 +42,15 @@ static void swsusp_unset_page_forbidden(struct page *);
42 42
43/* 43/*
44 * Preferred image size in bytes (tunable via /sys/power/image_size). 44 * Preferred image size in bytes (tunable via /sys/power/image_size).
45 * When it is set to N, swsusp will do its best to ensure the image 45 * When it is set to N, the image creating code will do its best to
46 * size will not exceed N bytes, but if that is impossible, it will 46 * ensure the image size will not exceed N bytes, but if that is
47 * try to create the smallest image possible. 47 * impossible, it will try to create the smallest image possible.
48 */ 48 */
49unsigned long image_size; 49unsigned long image_size;
50 50
51void __init hibernate_image_size_init(void) 51void __init hibernate_image_size_init(void)
52{ 52{
53 image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; 53 image_size = (totalram_pages / 3) * PAGE_SIZE;
54} 54}
55 55
56/* List of PBEs needed for restoring the pages that were allocated before 56/* List of PBEs needed for restoring the pages that were allocated before
@@ -1519,11 +1519,8 @@ static int
1519swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, 1519swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1520 unsigned int nr_pages, unsigned int nr_highmem) 1520 unsigned int nr_pages, unsigned int nr_highmem)
1521{ 1521{
1522 int error = 0;
1523
1524 if (nr_highmem > 0) { 1522 if (nr_highmem > 0) {
1525 error = get_highmem_buffer(PG_ANY); 1523 if (get_highmem_buffer(PG_ANY))
1526 if (error)
1527 goto err_out; 1524 goto err_out;
1528 if (nr_highmem > alloc_highmem) { 1525 if (nr_highmem > alloc_highmem) {
1529 nr_highmem -= alloc_highmem; 1526 nr_highmem -= alloc_highmem;
@@ -1546,7 +1543,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1546 1543
1547 err_out: 1544 err_out:
1548 swsusp_free(); 1545 swsusp_free();
1549 return error; 1546 return -ENOMEM;
1550} 1547}
1551 1548
1552asmlinkage int swsusp_save(void) 1549asmlinkage int swsusp_save(void)
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index de6f86bfa303..2814c32aed51 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -22,6 +22,7 @@
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/suspend.h> 24#include <linux/suspend.h>
25#include <linux/syscore_ops.h>
25#include <trace/events/power.h> 26#include <trace/events/power.h>
26 27
27#include "power.h" 28#include "power.h"
@@ -163,11 +164,14 @@ static int suspend_enter(suspend_state_t state)
163 BUG_ON(!irqs_disabled()); 164 BUG_ON(!irqs_disabled());
164 165
165 error = sysdev_suspend(PMSG_SUSPEND); 166 error = sysdev_suspend(PMSG_SUSPEND);
167 if (!error)
168 error = syscore_suspend();
166 if (!error) { 169 if (!error) {
167 if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { 170 if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
168 error = suspend_ops->enter(state); 171 error = suspend_ops->enter(state);
169 events_check_enabled = false; 172 events_check_enabled = false;
170 } 173 }
174 syscore_resume();
171 sysdev_resume(); 175 sysdev_resume();
172 } 176 }
173 177
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1708b1e2972d..e2302e40b360 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -163,7 +163,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
163 return !err; 163 return !err;
164} 164}
165 165
166int ptrace_attach(struct task_struct *task) 166static int ptrace_attach(struct task_struct *task)
167{ 167{
168 int retval; 168 int retval;
169 169
@@ -219,7 +219,7 @@ out:
219 * Performs checks and sets PT_PTRACED. 219 * Performs checks and sets PT_PTRACED.
220 * Should be used by all ptrace implementations for PTRACE_TRACEME. 220 * Should be used by all ptrace implementations for PTRACE_TRACEME.
221 */ 221 */
222int ptrace_traceme(void) 222static int ptrace_traceme(void)
223{ 223{
224 int ret = -EPERM; 224 int ret = -EPERM;
225 225
@@ -293,7 +293,7 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
293 return false; 293 return false;
294} 294}
295 295
296int ptrace_detach(struct task_struct *child, unsigned int data) 296static int ptrace_detach(struct task_struct *child, unsigned int data)
297{ 297{
298 bool dead = false; 298 bool dead = false;
299 299
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a23a57a976d1..f3240e987928 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -214,11 +214,12 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
214 * Ensure that queued callbacks are all executed. 214 * Ensure that queued callbacks are all executed.
215 * If we detect that we are nested in a RCU read-side critical 215 * If we detect that we are nested in a RCU read-side critical
216 * section, we should simply fail, otherwise we would deadlock. 216 * section, we should simply fail, otherwise we would deadlock.
217 * Note that the machinery to reliably determine whether
218 * or not we are in an RCU read-side critical section
219 * exists only in the preemptible RCU implementations
220 * (TINY_PREEMPT_RCU and TREE_PREEMPT_RCU), which is why
221 * DEBUG_OBJECTS_RCU_HEAD is disallowed if !PREEMPT.
217 */ 222 */
218#ifndef CONFIG_PREEMPT
219 WARN_ON(1);
220 return 0;
221#else
222 if (rcu_preempt_depth() != 0 || preempt_count() != 0 || 223 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
223 irqs_disabled()) { 224 irqs_disabled()) {
224 WARN_ON(1); 225 WARN_ON(1);
@@ -229,7 +230,6 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
229 rcu_barrier_bh(); 230 rcu_barrier_bh();
230 debug_object_free(head, &rcuhead_debug_descr); 231 debug_object_free(head, &rcuhead_debug_descr);
231 return 1; 232 return 1;
232#endif
233 default: 233 default:
234 return 0; 234 return 0;
235 } 235 }
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 015abaea962a..3cb8e362e883 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -852,7 +852,7 @@ void exit_rcu(void)
852 if (t->rcu_read_lock_nesting == 0) 852 if (t->rcu_read_lock_nesting == 0)
853 return; 853 return;
854 t->rcu_read_lock_nesting = 1; 854 t->rcu_read_lock_nesting = 1;
855 rcu_read_unlock(); 855 __rcu_read_unlock();
856} 856}
857 857
858#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ 858#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 89613f97ff26..c224da41890c 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -47,7 +47,6 @@
47#include <linux/srcu.h> 47#include <linux/srcu.h>
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include <asm/byteorder.h> 49#include <asm/byteorder.h>
50#include <linux/sched.h>
51 50
52MODULE_LICENSE("GPL"); 51MODULE_LICENSE("GPL");
53MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " 52MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index ddabb54bb5c8..3c7cbc2c33be 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -215,7 +215,6 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
215 put_pid(waiter->deadlock_task_pid); 215 put_pid(waiter->deadlock_task_pid);
216 TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); 216 TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
217 TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); 217 TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
218 TRACE_WARN_ON(waiter->task);
219 memset(waiter, 0x22, sizeof(*waiter)); 218 memset(waiter, 0x22, sizeof(*waiter));
220} 219}
221 220
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 66cb89bc5ef1..5c9ccd380966 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -9,7 +9,6 @@
9#include <linux/kthread.h> 9#include <linux/kthread.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/smp_lock.h>
13#include <linux/spinlock.h> 12#include <linux/spinlock.h>
14#include <linux/sysdev.h> 13#include <linux/sysdev.h>
15#include <linux/timer.h> 14#include <linux/timer.h>
@@ -27,7 +26,6 @@ struct test_thread_data {
27 int opcode; 26 int opcode;
28 int opdata; 27 int opdata;
29 int mutexes[MAX_RT_TEST_MUTEXES]; 28 int mutexes[MAX_RT_TEST_MUTEXES];
30 int bkl;
31 int event; 29 int event;
32 struct sys_device sysdev; 30 struct sys_device sysdev;
33}; 31};
@@ -46,9 +44,8 @@ enum test_opcodes {
46 RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */ 44 RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
47 RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */ 45 RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
48 RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */ 46 RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
49 RTTEST_LOCKBKL, /* 9 Lock BKL */ 47 /* 9, 10 - reserved for BKL commemoration */
50 RTTEST_UNLOCKBKL, /* 10 Unlock BKL */ 48 RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */
51 RTTEST_SIGNAL, /* 11 Signal other test thread, data = thread id */
52 RTTEST_RESETEVENT = 98, /* 98 Reset event counter */ 49 RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
53 RTTEST_RESET = 99, /* 99 Reset all pending operations */ 50 RTTEST_RESET = 99, /* 99 Reset all pending operations */
54}; 51};
@@ -74,13 +71,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
74 td->mutexes[i] = 0; 71 td->mutexes[i] = 0;
75 } 72 }
76 } 73 }
77
78 if (!lockwakeup && td->bkl == 4) {
79#ifdef CONFIG_LOCK_KERNEL
80 unlock_kernel();
81#endif
82 td->bkl = 0;
83 }
84 return 0; 74 return 0;
85 75
86 case RTTEST_RESETEVENT: 76 case RTTEST_RESETEVENT:
@@ -131,25 +121,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
131 td->mutexes[id] = 0; 121 td->mutexes[id] = 0;
132 return 0; 122 return 0;
133 123
134 case RTTEST_LOCKBKL:
135 if (td->bkl)
136 return 0;
137 td->bkl = 1;
138#ifdef CONFIG_LOCK_KERNEL
139 lock_kernel();
140#endif
141 td->bkl = 4;
142 return 0;
143
144 case RTTEST_UNLOCKBKL:
145 if (td->bkl != 4)
146 break;
147#ifdef CONFIG_LOCK_KERNEL
148 unlock_kernel();
149#endif
150 td->bkl = 0;
151 return 0;
152
153 default: 124 default:
154 break; 125 break;
155 } 126 }
@@ -196,7 +167,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
196 td->event = atomic_add_return(1, &rttest_event); 167 td->event = atomic_add_return(1, &rttest_event);
197 break; 168 break;
198 169
199 case RTTEST_LOCKBKL:
200 default: 170 default:
201 break; 171 break;
202 } 172 }
@@ -229,8 +199,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
229 td->event = atomic_add_return(1, &rttest_event); 199 td->event = atomic_add_return(1, &rttest_event);
230 return; 200 return;
231 201
232 case RTTEST_LOCKBKL:
233 return;
234 default: 202 default:
235 return; 203 return;
236 } 204 }
@@ -380,11 +348,11 @@ static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute
380 spin_lock(&rttest_lock); 348 spin_lock(&rttest_lock);
381 349
382 curr += sprintf(curr, 350 curr += sprintf(curr,
383 "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, K: %d, M:", 351 "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
384 td->opcode, td->event, tsk->state, 352 td->opcode, td->event, tsk->state,
385 (MAX_RT_PRIO - 1) - tsk->prio, 353 (MAX_RT_PRIO - 1) - tsk->prio,
386 (MAX_RT_PRIO - 1) - tsk->normal_prio, 354 (MAX_RT_PRIO - 1) - tsk->normal_prio,
387 tsk->pi_blocked_on, td->bkl); 355 tsk->pi_blocked_on);
388 356
389 for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--) 357 for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
390 curr += sprintf(curr, "%d", td->mutexes[i]); 358 curr += sprintf(curr, "%d", td->mutexes[i]);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a9604815786a..ab449117aaf2 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -20,41 +20,34 @@
20/* 20/*
21 * lock->owner state tracking: 21 * lock->owner state tracking:
22 * 22 *
23 * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1 23 * lock->owner holds the task_struct pointer of the owner. Bit 0
24 * are used to keep track of the "owner is pending" and "lock has 24 * is used to keep track of the "lock has waiters" state.
25 * waiters" state.
26 * 25 *
27 * owner bit1 bit0 26 * owner bit0
28 * NULL 0 0 lock is free (fast acquire possible) 27 * NULL 0 lock is free (fast acquire possible)
29 * NULL 0 1 invalid state 28 * NULL 1 lock is free and has waiters and the top waiter
30 * NULL 1 0 Transitional State* 29 * is going to take the lock*
31 * NULL 1 1 invalid state 30 * taskpointer 0 lock is held (fast release possible)
32 * taskpointer 0 0 lock is held (fast release possible) 31 * taskpointer 1 lock is held and has waiters**
33 * taskpointer 0 1 task is pending owner
34 * taskpointer 1 0 lock is held and has waiters
35 * taskpointer 1 1 task is pending owner and lock has more waiters
36 *
37 * Pending ownership is assigned to the top (highest priority)
38 * waiter of the lock, when the lock is released. The thread is woken
39 * up and can now take the lock. Until the lock is taken (bit 0
40 * cleared) a competing higher priority thread can steal the lock
41 * which puts the woken up thread back on the waiters list.
42 * 32 *
43 * The fast atomic compare exchange based acquire and release is only 33 * The fast atomic compare exchange based acquire and release is only
44 * possible when bit 0 and 1 of lock->owner are 0. 34 * possible when bit 0 of lock->owner is 0.
35 *
36 * (*) It also can be a transitional state when grabbing the lock
37 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
38 * we need to set the bit0 before looking at the lock, and the owner may be
39 * NULL in this small time, hence this can be a transitional state.
45 * 40 *
46 * (*) There's a small time where the owner can be NULL and the 41 * (**) There is a small time when bit 0 is set but there are no
47 * "lock has waiters" bit is set. This can happen when grabbing the lock. 42 * waiters. This can happen when grabbing the lock in the slow path.
48 * To prevent a cmpxchg of the owner releasing the lock, we need to set this 43 * To prevent a cmpxchg of the owner releasing the lock, we need to
49 * bit before looking at the lock, hence the reason this is a transitional 44 * set this bit before looking at the lock.
50 * state.
51 */ 45 */
52 46
53static void 47static void
54rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, 48rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
55 unsigned long mask)
56{ 49{
57 unsigned long val = (unsigned long)owner | mask; 50 unsigned long val = (unsigned long)owner;
58 51
59 if (rt_mutex_has_waiters(lock)) 52 if (rt_mutex_has_waiters(lock))
60 val |= RT_MUTEX_HAS_WAITERS; 53 val |= RT_MUTEX_HAS_WAITERS;
@@ -203,15 +196,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
203 * reached or the state of the chain has changed while we 196 * reached or the state of the chain has changed while we
204 * dropped the locks. 197 * dropped the locks.
205 */ 198 */
206 if (!waiter || !waiter->task) 199 if (!waiter)
207 goto out_unlock_pi; 200 goto out_unlock_pi;
208 201
209 /* 202 /*
210 * Check the orig_waiter state. After we dropped the locks, 203 * Check the orig_waiter state. After we dropped the locks,
211 * the previous owner of the lock might have released the lock 204 * the previous owner of the lock might have released the lock.
212 * and made us the pending owner:
213 */ 205 */
214 if (orig_waiter && !orig_waiter->task) 206 if (orig_waiter && !rt_mutex_owner(orig_lock))
215 goto out_unlock_pi; 207 goto out_unlock_pi;
216 208
217 /* 209 /*
@@ -254,6 +246,17 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
254 246
255 /* Release the task */ 247 /* Release the task */
256 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 248 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
249 if (!rt_mutex_owner(lock)) {
250 /*
251 * If the requeue above changed the top waiter, then we need
252 * to wake the new top waiter up to try to get the lock.
253 */
254
255 if (top_waiter != rt_mutex_top_waiter(lock))
256 wake_up_process(rt_mutex_top_waiter(lock)->task);
257 raw_spin_unlock(&lock->wait_lock);
258 goto out_put_task;
259 }
257 put_task_struct(task); 260 put_task_struct(task);
258 261
259 /* Grab the next task */ 262 /* Grab the next task */
@@ -296,78 +299,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
296} 299}
297 300
298/* 301/*
299 * Optimization: check if we can steal the lock from the
300 * assigned pending owner [which might not have taken the
301 * lock yet]:
302 */
303static inline int try_to_steal_lock(struct rt_mutex *lock,
304 struct task_struct *task)
305{
306 struct task_struct *pendowner = rt_mutex_owner(lock);
307 struct rt_mutex_waiter *next;
308 unsigned long flags;
309
310 if (!rt_mutex_owner_pending(lock))
311 return 0;
312
313 if (pendowner == task)
314 return 1;
315
316 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
317 if (task->prio >= pendowner->prio) {
318 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
319 return 0;
320 }
321
322 /*
323 * Check if a waiter is enqueued on the pending owners
324 * pi_waiters list. Remove it and readjust pending owners
325 * priority.
326 */
327 if (likely(!rt_mutex_has_waiters(lock))) {
328 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
329 return 1;
330 }
331
332 /* No chain handling, pending owner is not blocked on anything: */
333 next = rt_mutex_top_waiter(lock);
334 plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
335 __rt_mutex_adjust_prio(pendowner);
336 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
337
338 /*
339 * We are going to steal the lock and a waiter was
340 * enqueued on the pending owners pi_waiters queue. So
341 * we have to enqueue this waiter into
342 * task->pi_waiters list. This covers the case,
343 * where task is boosted because it holds another
344 * lock and gets unboosted because the booster is
345 * interrupted, so we would delay a waiter with higher
346 * priority as task->normal_prio.
347 *
348 * Note: in the rare case of a SCHED_OTHER task changing
349 * its priority and thus stealing the lock, next->task
350 * might be task:
351 */
352 if (likely(next->task != task)) {
353 raw_spin_lock_irqsave(&task->pi_lock, flags);
354 plist_add(&next->pi_list_entry, &task->pi_waiters);
355 __rt_mutex_adjust_prio(task);
356 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
357 }
358 return 1;
359}
360
361/*
362 * Try to take an rt-mutex 302 * Try to take an rt-mutex
363 * 303 *
364 * This fails
365 * - when the lock has a real owner
366 * - when a different pending owner exists and has higher priority than current
367 *
368 * Must be called with lock->wait_lock held. 304 * Must be called with lock->wait_lock held.
305 *
306 * @lock: the lock to be acquired.
307 * @task: the task which wants to acquire the lock
308 * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
369 */ 309 */
370static int try_to_take_rt_mutex(struct rt_mutex *lock) 310static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
311 struct rt_mutex_waiter *waiter)
371{ 312{
372 /* 313 /*
373 * We have to be careful here if the atomic speedups are 314 * We have to be careful here if the atomic speedups are
@@ -390,15 +331,52 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
390 */ 331 */
391 mark_rt_mutex_waiters(lock); 332 mark_rt_mutex_waiters(lock);
392 333
393 if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) 334 if (rt_mutex_owner(lock))
394 return 0; 335 return 0;
395 336
337 /*
338 * It will get the lock because of one of these conditions:
339 * 1) there is no waiter
340 * 2) higher priority than waiters
341 * 3) it is top waiter
342 */
343 if (rt_mutex_has_waiters(lock)) {
344 if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
345 if (!waiter || waiter != rt_mutex_top_waiter(lock))
346 return 0;
347 }
348 }
349
350 if (waiter || rt_mutex_has_waiters(lock)) {
351 unsigned long flags;
352 struct rt_mutex_waiter *top;
353
354 raw_spin_lock_irqsave(&task->pi_lock, flags);
355
356 /* remove the queued waiter. */
357 if (waiter) {
358 plist_del(&waiter->list_entry, &lock->wait_list);
359 task->pi_blocked_on = NULL;
360 }
361
362 /*
363 * We have to enqueue the top waiter(if it exists) into
364 * task->pi_waiters list.
365 */
366 if (rt_mutex_has_waiters(lock)) {
367 top = rt_mutex_top_waiter(lock);
368 top->pi_list_entry.prio = top->list_entry.prio;
369 plist_add(&top->pi_list_entry, &task->pi_waiters);
370 }
371 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
372 }
373
396 /* We got the lock. */ 374 /* We got the lock. */
397 debug_rt_mutex_lock(lock); 375 debug_rt_mutex_lock(lock);
398 376
399 rt_mutex_set_owner(lock, current, 0); 377 rt_mutex_set_owner(lock, task);
400 378
401 rt_mutex_deadlock_account_lock(lock, current); 379 rt_mutex_deadlock_account_lock(lock, task);
402 380
403 return 1; 381 return 1;
404} 382}
@@ -436,6 +414,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
436 414
437 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 415 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
438 416
417 if (!owner)
418 return 0;
419
439 if (waiter == rt_mutex_top_waiter(lock)) { 420 if (waiter == rt_mutex_top_waiter(lock)) {
440 raw_spin_lock_irqsave(&owner->pi_lock, flags); 421 raw_spin_lock_irqsave(&owner->pi_lock, flags);
441 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); 422 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -472,21 +453,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
472/* 453/*
473 * Wake up the next waiter on the lock. 454 * Wake up the next waiter on the lock.
474 * 455 *
475 * Remove the top waiter from the current tasks waiter list and from 456 * Remove the top waiter from the current tasks waiter list and wake it up.
476 * the lock waiter list. Set it as pending owner. Then wake it up.
477 * 457 *
478 * Called with lock->wait_lock held. 458 * Called with lock->wait_lock held.
479 */ 459 */
480static void wakeup_next_waiter(struct rt_mutex *lock) 460static void wakeup_next_waiter(struct rt_mutex *lock)
481{ 461{
482 struct rt_mutex_waiter *waiter; 462 struct rt_mutex_waiter *waiter;
483 struct task_struct *pendowner;
484 unsigned long flags; 463 unsigned long flags;
485 464
486 raw_spin_lock_irqsave(&current->pi_lock, flags); 465 raw_spin_lock_irqsave(&current->pi_lock, flags);
487 466
488 waiter = rt_mutex_top_waiter(lock); 467 waiter = rt_mutex_top_waiter(lock);
489 plist_del(&waiter->list_entry, &lock->wait_list);
490 468
491 /* 469 /*
492 * Remove it from current->pi_waiters. We do not adjust a 470 * Remove it from current->pi_waiters. We do not adjust a
@@ -495,43 +473,19 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
495 * lock->wait_lock. 473 * lock->wait_lock.
496 */ 474 */
497 plist_del(&waiter->pi_list_entry, &current->pi_waiters); 475 plist_del(&waiter->pi_list_entry, &current->pi_waiters);
498 pendowner = waiter->task;
499 waiter->task = NULL;
500 476
501 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); 477 rt_mutex_set_owner(lock, NULL);
502 478
503 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 479 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
504 480
505 /* 481 wake_up_process(waiter->task);
506 * Clear the pi_blocked_on variable and enqueue a possible
507 * waiter into the pi_waiters list of the pending owner. This
508 * prevents that in case the pending owner gets unboosted a
509 * waiter with higher priority than pending-owner->normal_prio
510 * is blocked on the unboosted (pending) owner.
511 */
512 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
513
514 WARN_ON(!pendowner->pi_blocked_on);
515 WARN_ON(pendowner->pi_blocked_on != waiter);
516 WARN_ON(pendowner->pi_blocked_on->lock != lock);
517
518 pendowner->pi_blocked_on = NULL;
519
520 if (rt_mutex_has_waiters(lock)) {
521 struct rt_mutex_waiter *next;
522
523 next = rt_mutex_top_waiter(lock);
524 plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
525 }
526 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
527
528 wake_up_process(pendowner);
529} 482}
530 483
531/* 484/*
532 * Remove a waiter from a lock 485 * Remove a waiter from a lock and give up
533 * 486 *
534 * Must be called with lock->wait_lock held 487 * Must be called with lock->wait_lock held and
488 * have just failed to try_to_take_rt_mutex().
535 */ 489 */
536static void remove_waiter(struct rt_mutex *lock, 490static void remove_waiter(struct rt_mutex *lock,
537 struct rt_mutex_waiter *waiter) 491 struct rt_mutex_waiter *waiter)
@@ -543,11 +497,13 @@ static void remove_waiter(struct rt_mutex *lock,
543 497
544 raw_spin_lock_irqsave(&current->pi_lock, flags); 498 raw_spin_lock_irqsave(&current->pi_lock, flags);
545 plist_del(&waiter->list_entry, &lock->wait_list); 499 plist_del(&waiter->list_entry, &lock->wait_list);
546 waiter->task = NULL;
547 current->pi_blocked_on = NULL; 500 current->pi_blocked_on = NULL;
548 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 501 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
549 502
550 if (first && owner != current) { 503 if (!owner)
504 return;
505
506 if (first) {
551 507
552 raw_spin_lock_irqsave(&owner->pi_lock, flags); 508 raw_spin_lock_irqsave(&owner->pi_lock, flags);
553 509
@@ -614,21 +570,19 @@ void rt_mutex_adjust_pi(struct task_struct *task)
614 * or TASK_UNINTERRUPTIBLE) 570 * or TASK_UNINTERRUPTIBLE)
615 * @timeout: the pre-initialized and started timer, or NULL for none 571 * @timeout: the pre-initialized and started timer, or NULL for none
616 * @waiter: the pre-initialized rt_mutex_waiter 572 * @waiter: the pre-initialized rt_mutex_waiter
617 * @detect_deadlock: passed to task_blocks_on_rt_mutex
618 * 573 *
619 * lock->wait_lock must be held by the caller. 574 * lock->wait_lock must be held by the caller.
620 */ 575 */
621static int __sched 576static int __sched
622__rt_mutex_slowlock(struct rt_mutex *lock, int state, 577__rt_mutex_slowlock(struct rt_mutex *lock, int state,
623 struct hrtimer_sleeper *timeout, 578 struct hrtimer_sleeper *timeout,
624 struct rt_mutex_waiter *waiter, 579 struct rt_mutex_waiter *waiter)
625 int detect_deadlock)
626{ 580{
627 int ret = 0; 581 int ret = 0;
628 582
629 for (;;) { 583 for (;;) {
630 /* Try to acquire the lock: */ 584 /* Try to acquire the lock: */
631 if (try_to_take_rt_mutex(lock)) 585 if (try_to_take_rt_mutex(lock, current, waiter))
632 break; 586 break;
633 587
634 /* 588 /*
@@ -645,39 +599,11 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
645 break; 599 break;
646 } 600 }
647 601
648 /*
649 * waiter->task is NULL the first time we come here and
650 * when we have been woken up by the previous owner
651 * but the lock got stolen by a higher prio task.
652 */
653 if (!waiter->task) {
654 ret = task_blocks_on_rt_mutex(lock, waiter, current,
655 detect_deadlock);
656 /*
657 * If we got woken up by the owner then start loop
658 * all over without going into schedule to try
659 * to get the lock now:
660 */
661 if (unlikely(!waiter->task)) {
662 /*
663 * Reset the return value. We might
664 * have returned with -EDEADLK and the
665 * owner released the lock while we
666 * were walking the pi chain.
667 */
668 ret = 0;
669 continue;
670 }
671 if (unlikely(ret))
672 break;
673 }
674
675 raw_spin_unlock(&lock->wait_lock); 602 raw_spin_unlock(&lock->wait_lock);
676 603
677 debug_rt_mutex_print_deadlock(waiter); 604 debug_rt_mutex_print_deadlock(waiter);
678 605
679 if (waiter->task) 606 schedule_rt_mutex(lock);
680 schedule_rt_mutex(lock);
681 607
682 raw_spin_lock(&lock->wait_lock); 608 raw_spin_lock(&lock->wait_lock);
683 set_current_state(state); 609 set_current_state(state);
@@ -698,12 +624,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
698 int ret = 0; 624 int ret = 0;
699 625
700 debug_rt_mutex_init_waiter(&waiter); 626 debug_rt_mutex_init_waiter(&waiter);
701 waiter.task = NULL;
702 627
703 raw_spin_lock(&lock->wait_lock); 628 raw_spin_lock(&lock->wait_lock);
704 629
705 /* Try to acquire the lock again: */ 630 /* Try to acquire the lock again: */
706 if (try_to_take_rt_mutex(lock)) { 631 if (try_to_take_rt_mutex(lock, current, NULL)) {
707 raw_spin_unlock(&lock->wait_lock); 632 raw_spin_unlock(&lock->wait_lock);
708 return 0; 633 return 0;
709 } 634 }
@@ -717,12 +642,14 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
717 timeout->task = NULL; 642 timeout->task = NULL;
718 } 643 }
719 644
720 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, 645 ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
721 detect_deadlock); 646
647 if (likely(!ret))
648 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
722 649
723 set_current_state(TASK_RUNNING); 650 set_current_state(TASK_RUNNING);
724 651
725 if (unlikely(waiter.task)) 652 if (unlikely(ret))
726 remove_waiter(lock, &waiter); 653 remove_waiter(lock, &waiter);
727 654
728 /* 655 /*
@@ -737,14 +664,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
737 if (unlikely(timeout)) 664 if (unlikely(timeout))
738 hrtimer_cancel(&timeout->timer); 665 hrtimer_cancel(&timeout->timer);
739 666
740 /*
741 * Readjust priority, when we did not get the lock. We might
742 * have been the pending owner and boosted. Since we did not
743 * take the lock, the PI boost has to go.
744 */
745 if (unlikely(ret))
746 rt_mutex_adjust_prio(current);
747
748 debug_rt_mutex_free_waiter(&waiter); 667 debug_rt_mutex_free_waiter(&waiter);
749 668
750 return ret; 669 return ret;
@@ -762,7 +681,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
762 681
763 if (likely(rt_mutex_owner(lock) != current)) { 682 if (likely(rt_mutex_owner(lock) != current)) {
764 683
765 ret = try_to_take_rt_mutex(lock); 684 ret = try_to_take_rt_mutex(lock, current, NULL);
766 /* 685 /*
767 * try_to_take_rt_mutex() sets the lock waiters 686 * try_to_take_rt_mutex() sets the lock waiters
768 * bit unconditionally. Clean this up. 687 * bit unconditionally. Clean this up.
@@ -992,7 +911,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
992{ 911{
993 __rt_mutex_init(lock, NULL); 912 __rt_mutex_init(lock, NULL);
994 debug_rt_mutex_proxy_lock(lock, proxy_owner); 913 debug_rt_mutex_proxy_lock(lock, proxy_owner);
995 rt_mutex_set_owner(lock, proxy_owner, 0); 914 rt_mutex_set_owner(lock, proxy_owner);
996 rt_mutex_deadlock_account_lock(lock, proxy_owner); 915 rt_mutex_deadlock_account_lock(lock, proxy_owner);
997} 916}
998 917
@@ -1008,7 +927,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1008 struct task_struct *proxy_owner) 927 struct task_struct *proxy_owner)
1009{ 928{
1010 debug_rt_mutex_proxy_unlock(lock); 929 debug_rt_mutex_proxy_unlock(lock);
1011 rt_mutex_set_owner(lock, NULL, 0); 930 rt_mutex_set_owner(lock, NULL);
1012 rt_mutex_deadlock_account_unlock(proxy_owner); 931 rt_mutex_deadlock_account_unlock(proxy_owner);
1013} 932}
1014 933
@@ -1034,20 +953,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1034 953
1035 raw_spin_lock(&lock->wait_lock); 954 raw_spin_lock(&lock->wait_lock);
1036 955
1037 mark_rt_mutex_waiters(lock); 956 if (try_to_take_rt_mutex(lock, task, NULL)) {
1038
1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
1040 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock);
1042 rt_mutex_set_owner(lock, task, 0);
1043 raw_spin_unlock(&lock->wait_lock); 957 raw_spin_unlock(&lock->wait_lock);
1044 rt_mutex_deadlock_account_lock(lock, task);
1045 return 1; 958 return 1;
1046 } 959 }
1047 960
1048 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 961 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
1049 962
1050 if (ret && !waiter->task) { 963 if (ret && !rt_mutex_owner(lock)) {
1051 /* 964 /*
1052 * Reset the return value. We might have 965 * Reset the return value. We might have
1053 * returned with -EDEADLK and the owner 966 * returned with -EDEADLK and the owner
@@ -1056,6 +969,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1056 */ 969 */
1057 ret = 0; 970 ret = 0;
1058 } 971 }
972
973 if (unlikely(ret))
974 remove_waiter(lock, waiter);
975
1059 raw_spin_unlock(&lock->wait_lock); 976 raw_spin_unlock(&lock->wait_lock);
1060 977
1061 debug_rt_mutex_print_deadlock(waiter); 978 debug_rt_mutex_print_deadlock(waiter);
@@ -1110,12 +1027,11 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1110 1027
1111 set_current_state(TASK_INTERRUPTIBLE); 1028 set_current_state(TASK_INTERRUPTIBLE);
1112 1029
1113 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, 1030 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1114 detect_deadlock);
1115 1031
1116 set_current_state(TASK_RUNNING); 1032 set_current_state(TASK_RUNNING);
1117 1033
1118 if (unlikely(waiter->task)) 1034 if (unlikely(ret))
1119 remove_waiter(lock, waiter); 1035 remove_waiter(lock, waiter);
1120 1036
1121 /* 1037 /*
@@ -1126,13 +1042,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1126 1042
1127 raw_spin_unlock(&lock->wait_lock); 1043 raw_spin_unlock(&lock->wait_lock);
1128 1044
1129 /*
1130 * Readjust priority, when we did not get the lock. We might have been
1131 * the pending owner and boosted. Since we did not take the lock, the
1132 * PI boost has to go.
1133 */
1134 if (unlikely(ret))
1135 rt_mutex_adjust_prio(current);
1136
1137 return ret; 1045 return ret;
1138} 1046}
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 97a2f81866af..53a66c85261b 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -91,9 +91,8 @@ task_top_pi_waiter(struct task_struct *p)
91/* 91/*
92 * lock->owner state tracking: 92 * lock->owner state tracking:
93 */ 93 */
94#define RT_MUTEX_OWNER_PENDING 1UL 94#define RT_MUTEX_HAS_WAITERS 1UL
95#define RT_MUTEX_HAS_WAITERS 2UL 95#define RT_MUTEX_OWNER_MASKALL 1UL
96#define RT_MUTEX_OWNER_MASKALL 3UL
97 96
98static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) 97static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
99{ 98{
@@ -101,17 +100,6 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
101 ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); 100 ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
102} 101}
103 102
104static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
105{
106 return (struct task_struct *)
107 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
108}
109
110static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock)
111{
112 return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
113}
114
115/* 103/*
116 * PI-futex support (proxy locking functions, etc.): 104 * PI-futex support (proxy locking functions, etc.):
117 */ 105 */
diff --git a/kernel/sched.c b/kernel/sched.c
index 18d38e4ec7ba..c8e40b7005c0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -324,7 +324,7 @@ struct cfs_rq {
324 * 'curr' points to currently running entity on this cfs_rq. 324 * 'curr' points to currently running entity on this cfs_rq.
325 * It is set to NULL otherwise (i.e when none are currently running). 325 * It is set to NULL otherwise (i.e when none are currently running).
326 */ 326 */
327 struct sched_entity *curr, *next, *last; 327 struct sched_entity *curr, *next, *last, *skip;
328 328
329 unsigned int nr_spread_over; 329 unsigned int nr_spread_over;
330 330
@@ -606,9 +606,6 @@ static inline struct task_group *task_group(struct task_struct *p)
606 struct task_group *tg; 606 struct task_group *tg;
607 struct cgroup_subsys_state *css; 607 struct cgroup_subsys_state *css;
608 608
609 if (p->flags & PF_EXITING)
610 return &root_task_group;
611
612 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 609 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
613 lockdep_is_held(&task_rq(p)->lock)); 610 lockdep_is_held(&task_rq(p)->lock));
614 tg = container_of(css, struct task_group, css); 611 tg = container_of(css, struct task_group, css);
@@ -1686,6 +1683,39 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1686 __release(rq2->lock); 1683 __release(rq2->lock);
1687} 1684}
1688 1685
1686#else /* CONFIG_SMP */
1687
1688/*
1689 * double_rq_lock - safely lock two runqueues
1690 *
1691 * Note this does not disable interrupts like task_rq_lock,
1692 * you need to do so manually before calling.
1693 */
1694static void double_rq_lock(struct rq *rq1, struct rq *rq2)
1695 __acquires(rq1->lock)
1696 __acquires(rq2->lock)
1697{
1698 BUG_ON(!irqs_disabled());
1699 BUG_ON(rq1 != rq2);
1700 raw_spin_lock(&rq1->lock);
1701 __acquire(rq2->lock); /* Fake it out ;) */
1702}
1703
1704/*
1705 * double_rq_unlock - safely unlock two runqueues
1706 *
1707 * Note this does not restore interrupts like task_rq_unlock,
1708 * you need to do so manually after calling.
1709 */
1710static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1711 __releases(rq1->lock)
1712 __releases(rq2->lock)
1713{
1714 BUG_ON(rq1 != rq2);
1715 raw_spin_unlock(&rq1->lock);
1716 __release(rq2->lock);
1717}
1718
1689#endif 1719#endif
1690 1720
1691static void calc_load_account_idle(struct rq *this_rq); 1721static void calc_load_account_idle(struct rq *this_rq);
@@ -1880,7 +1910,7 @@ void account_system_vtime(struct task_struct *curr)
1880 */ 1910 */
1881 if (hardirq_count()) 1911 if (hardirq_count())
1882 __this_cpu_add(cpu_hardirq_time, delta); 1912 __this_cpu_add(cpu_hardirq_time, delta);
1883 else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) 1913 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
1884 __this_cpu_add(cpu_softirq_time, delta); 1914 __this_cpu_add(cpu_softirq_time, delta);
1885 1915
1886 irq_time_write_end(); 1916 irq_time_write_end();
@@ -1920,8 +1950,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
1920 sched_rt_avg_update(rq, irq_delta); 1950 sched_rt_avg_update(rq, irq_delta);
1921} 1951}
1922 1952
1953static int irqtime_account_hi_update(void)
1954{
1955 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
1956 unsigned long flags;
1957 u64 latest_ns;
1958 int ret = 0;
1959
1960 local_irq_save(flags);
1961 latest_ns = this_cpu_read(cpu_hardirq_time);
1962 if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
1963 ret = 1;
1964 local_irq_restore(flags);
1965 return ret;
1966}
1967
1968static int irqtime_account_si_update(void)
1969{
1970 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
1971 unsigned long flags;
1972 u64 latest_ns;
1973 int ret = 0;
1974
1975 local_irq_save(flags);
1976 latest_ns = this_cpu_read(cpu_softirq_time);
1977 if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
1978 ret = 1;
1979 local_irq_restore(flags);
1980 return ret;
1981}
1982
1923#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 1983#else /* CONFIG_IRQ_TIME_ACCOUNTING */
1924 1984
1985#define sched_clock_irqtime (0)
1986
1925static void update_rq_clock_task(struct rq *rq, s64 delta) 1987static void update_rq_clock_task(struct rq *rq, s64 delta)
1926{ 1988{
1927 rq->clock_task += delta; 1989 rq->clock_task += delta;
@@ -2025,14 +2087,14 @@ inline int task_curr(const struct task_struct *p)
2025 2087
2026static inline void check_class_changed(struct rq *rq, struct task_struct *p, 2088static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2027 const struct sched_class *prev_class, 2089 const struct sched_class *prev_class,
2028 int oldprio, int running) 2090 int oldprio)
2029{ 2091{
2030 if (prev_class != p->sched_class) { 2092 if (prev_class != p->sched_class) {
2031 if (prev_class->switched_from) 2093 if (prev_class->switched_from)
2032 prev_class->switched_from(rq, p, running); 2094 prev_class->switched_from(rq, p);
2033 p->sched_class->switched_to(rq, p, running); 2095 p->sched_class->switched_to(rq, p);
2034 } else 2096 } else if (oldprio != p->prio)
2035 p->sched_class->prio_changed(rq, p, oldprio, running); 2097 p->sched_class->prio_changed(rq, p, oldprio);
2036} 2098}
2037 2099
2038static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) 2100static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
@@ -2224,7 +2286,10 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
2224 * yield - it could be a while. 2286 * yield - it could be a while.
2225 */ 2287 */
2226 if (unlikely(on_rq)) { 2288 if (unlikely(on_rq)) {
2227 schedule_timeout_uninterruptible(1); 2289 ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
2290
2291 set_current_state(TASK_UNINTERRUPTIBLE);
2292 schedule_hrtimeout(&to, HRTIMER_MODE_REL);
2228 continue; 2293 continue;
2229 } 2294 }
2230 2295
@@ -2265,27 +2330,6 @@ void kick_process(struct task_struct *p)
2265EXPORT_SYMBOL_GPL(kick_process); 2330EXPORT_SYMBOL_GPL(kick_process);
2266#endif /* CONFIG_SMP */ 2331#endif /* CONFIG_SMP */
2267 2332
2268/**
2269 * task_oncpu_function_call - call a function on the cpu on which a task runs
2270 * @p: the task to evaluate
2271 * @func: the function to be called
2272 * @info: the function call argument
2273 *
2274 * Calls the function @func when the task is currently running. This might
2275 * be on the current CPU, which just calls the function directly
2276 */
2277void task_oncpu_function_call(struct task_struct *p,
2278 void (*func) (void *info), void *info)
2279{
2280 int cpu;
2281
2282 preempt_disable();
2283 cpu = task_cpu(p);
2284 if (task_curr(p))
2285 smp_call_function_single(cpu, func, info, 1);
2286 preempt_enable();
2287}
2288
2289#ifdef CONFIG_SMP 2333#ifdef CONFIG_SMP
2290/* 2334/*
2291 * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. 2335 * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
@@ -2566,6 +2610,7 @@ static void __sched_fork(struct task_struct *p)
2566 p->se.sum_exec_runtime = 0; 2610 p->se.sum_exec_runtime = 0;
2567 p->se.prev_sum_exec_runtime = 0; 2611 p->se.prev_sum_exec_runtime = 0;
2568 p->se.nr_migrations = 0; 2612 p->se.nr_migrations = 0;
2613 p->se.vruntime = 0;
2569 2614
2570#ifdef CONFIG_SCHEDSTATS 2615#ifdef CONFIG_SCHEDSTATS
2571 memset(&p->se.statistics, 0, sizeof(p->se.statistics)); 2616 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -2776,9 +2821,12 @@ static inline void
2776prepare_task_switch(struct rq *rq, struct task_struct *prev, 2821prepare_task_switch(struct rq *rq, struct task_struct *prev,
2777 struct task_struct *next) 2822 struct task_struct *next)
2778{ 2823{
2824 sched_info_switch(prev, next);
2825 perf_event_task_sched_out(prev, next);
2779 fire_sched_out_preempt_notifiers(prev, next); 2826 fire_sched_out_preempt_notifiers(prev, next);
2780 prepare_lock_switch(rq, next); 2827 prepare_lock_switch(rq, next);
2781 prepare_arch_switch(next); 2828 prepare_arch_switch(next);
2829 trace_sched_switch(prev, next);
2782} 2830}
2783 2831
2784/** 2832/**
@@ -2911,7 +2959,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2911 struct mm_struct *mm, *oldmm; 2959 struct mm_struct *mm, *oldmm;
2912 2960
2913 prepare_task_switch(rq, prev, next); 2961 prepare_task_switch(rq, prev, next);
2914 trace_sched_switch(prev, next); 2962
2915 mm = next->mm; 2963 mm = next->mm;
2916 oldmm = prev->active_mm; 2964 oldmm = prev->active_mm;
2917 /* 2965 /*
@@ -3568,6 +3616,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
3568} 3616}
3569 3617
3570/* 3618/*
3619 * Account system cpu time to a process and desired cpustat field
3620 * @p: the process that the cpu time gets accounted to
3621 * @cputime: the cpu time spent in kernel space since the last update
3622 * @cputime_scaled: cputime scaled by cpu frequency
3623 * @target_cputime64: pointer to cpustat field that has to be updated
3624 */
3625static inline
3626void __account_system_time(struct task_struct *p, cputime_t cputime,
3627 cputime_t cputime_scaled, cputime64_t *target_cputime64)
3628{
3629 cputime64_t tmp = cputime_to_cputime64(cputime);
3630
3631 /* Add system time to process. */
3632 p->stime = cputime_add(p->stime, cputime);
3633 p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
3634 account_group_system_time(p, cputime);
3635
3636 /* Add system time to cpustat. */
3637 *target_cputime64 = cputime64_add(*target_cputime64, tmp);
3638 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
3639
3640 /* Account for system time used */
3641 acct_update_integrals(p);
3642}
3643
3644/*
3571 * Account system cpu time to a process. 3645 * Account system cpu time to a process.
3572 * @p: the process that the cpu time gets accounted to 3646 * @p: the process that the cpu time gets accounted to
3573 * @hardirq_offset: the offset to subtract from hardirq_count() 3647 * @hardirq_offset: the offset to subtract from hardirq_count()
@@ -3578,36 +3652,26 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
3578 cputime_t cputime, cputime_t cputime_scaled) 3652 cputime_t cputime, cputime_t cputime_scaled)
3579{ 3653{
3580 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 3654 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3581 cputime64_t tmp; 3655 cputime64_t *target_cputime64;
3582 3656
3583 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 3657 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
3584 account_guest_time(p, cputime, cputime_scaled); 3658 account_guest_time(p, cputime, cputime_scaled);
3585 return; 3659 return;
3586 } 3660 }
3587 3661
3588 /* Add system time to process. */
3589 p->stime = cputime_add(p->stime, cputime);
3590 p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
3591 account_group_system_time(p, cputime);
3592
3593 /* Add system time to cpustat. */
3594 tmp = cputime_to_cputime64(cputime);
3595 if (hardirq_count() - hardirq_offset) 3662 if (hardirq_count() - hardirq_offset)
3596 cpustat->irq = cputime64_add(cpustat->irq, tmp); 3663 target_cputime64 = &cpustat->irq;
3597 else if (in_serving_softirq()) 3664 else if (in_serving_softirq())
3598 cpustat->softirq = cputime64_add(cpustat->softirq, tmp); 3665 target_cputime64 = &cpustat->softirq;
3599 else 3666 else
3600 cpustat->system = cputime64_add(cpustat->system, tmp); 3667 target_cputime64 = &cpustat->system;
3601 3668
3602 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); 3669 __account_system_time(p, cputime, cputime_scaled, target_cputime64);
3603
3604 /* Account for system time used */
3605 acct_update_integrals(p);
3606} 3670}
3607 3671
3608/* 3672/*
3609 * Account for involuntary wait time. 3673 * Account for involuntary wait time.
3610 * @steal: the cpu time spent in involuntary wait 3674 * @cputime: the cpu time spent in involuntary wait
3611 */ 3675 */
3612void account_steal_time(cputime_t cputime) 3676void account_steal_time(cputime_t cputime)
3613{ 3677{
@@ -3635,6 +3699,73 @@ void account_idle_time(cputime_t cputime)
3635 3699
3636#ifndef CONFIG_VIRT_CPU_ACCOUNTING 3700#ifndef CONFIG_VIRT_CPU_ACCOUNTING
3637 3701
3702#ifdef CONFIG_IRQ_TIME_ACCOUNTING
3703/*
3704 * Account a tick to a process and cpustat
3705 * @p: the process that the cpu time gets accounted to
3706 * @user_tick: is the tick from userspace
3707 * @rq: the pointer to rq
3708 *
3709 * Tick demultiplexing follows the order
3710 * - pending hardirq update
3711 * - pending softirq update
3712 * - user_time
3713 * - idle_time
3714 * - system time
3715 * - check for guest_time
3716 * - else account as system_time
3717 *
3718 * Check for hardirq is done both for system and user time as there is
3719 * no timer going off while we are on hardirq and hence we may never get an
3720 * opportunity to update it solely in system time.
3721 * p->stime and friends are only updated on system time and not on irq
3722 * softirq as those do not count in task exec_runtime any more.
3723 */
3724static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
3725 struct rq *rq)
3726{
3727 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
3728 cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
3729 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3730
3731 if (irqtime_account_hi_update()) {
3732 cpustat->irq = cputime64_add(cpustat->irq, tmp);
3733 } else if (irqtime_account_si_update()) {
3734 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
3735 } else if (this_cpu_ksoftirqd() == p) {
3736 /*
3737 * ksoftirqd time do not get accounted in cpu_softirq_time.
3738 * So, we have to handle it separately here.
3739 * Also, p->stime needs to be updated for ksoftirqd.
3740 */
3741 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
3742 &cpustat->softirq);
3743 } else if (user_tick) {
3744 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
3745 } else if (p == rq->idle) {
3746 account_idle_time(cputime_one_jiffy);
3747 } else if (p->flags & PF_VCPU) { /* System time or guest time */
3748 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
3749 } else {
3750 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
3751 &cpustat->system);
3752 }
3753}
3754
3755static void irqtime_account_idle_ticks(int ticks)
3756{
3757 int i;
3758 struct rq *rq = this_rq();
3759
3760 for (i = 0; i < ticks; i++)
3761 irqtime_account_process_tick(current, 0, rq);
3762}
3763#else /* CONFIG_IRQ_TIME_ACCOUNTING */
3764static void irqtime_account_idle_ticks(int ticks) {}
3765static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
3766 struct rq *rq) {}
3767#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
3768
3638/* 3769/*
3639 * Account a single tick of cpu time. 3770 * Account a single tick of cpu time.
3640 * @p: the process that the cpu time gets accounted to 3771 * @p: the process that the cpu time gets accounted to
@@ -3645,6 +3776,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
3645 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 3776 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
3646 struct rq *rq = this_rq(); 3777 struct rq *rq = this_rq();
3647 3778
3779 if (sched_clock_irqtime) {
3780 irqtime_account_process_tick(p, user_tick, rq);
3781 return;
3782 }
3783
3648 if (user_tick) 3784 if (user_tick)
3649 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 3785 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
3650 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 3786 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
@@ -3670,6 +3806,12 @@ void account_steal_ticks(unsigned long ticks)
3670 */ 3806 */
3671void account_idle_ticks(unsigned long ticks) 3807void account_idle_ticks(unsigned long ticks)
3672{ 3808{
3809
3810 if (sched_clock_irqtime) {
3811 irqtime_account_idle_ticks(ticks);
3812 return;
3813 }
3814
3673 account_idle_time(jiffies_to_cputime(ticks)); 3815 account_idle_time(jiffies_to_cputime(ticks));
3674} 3816}
3675 3817
@@ -3989,9 +4131,6 @@ need_resched_nonpreemptible:
3989 rq->skip_clock_update = 0; 4131 rq->skip_clock_update = 0;
3990 4132
3991 if (likely(prev != next)) { 4133 if (likely(prev != next)) {
3992 sched_info_switch(prev, next);
3993 perf_event_task_sched_out(prev, next);
3994
3995 rq->nr_switches++; 4134 rq->nr_switches++;
3996 rq->curr = next; 4135 rq->curr = next;
3997 ++*switch_count; 4136 ++*switch_count;
@@ -4213,6 +4352,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
4213{ 4352{
4214 __wake_up_common(q, mode, 1, 0, key); 4353 __wake_up_common(q, mode, 1, 0, key);
4215} 4354}
4355EXPORT_SYMBOL_GPL(__wake_up_locked_key);
4216 4356
4217/** 4357/**
4218 * __wake_up_sync_key - wake up threads blocked on a waitqueue. 4358 * __wake_up_sync_key - wake up threads blocked on a waitqueue.
@@ -4570,11 +4710,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
4570 4710
4571 if (running) 4711 if (running)
4572 p->sched_class->set_curr_task(rq); 4712 p->sched_class->set_curr_task(rq);
4573 if (on_rq) { 4713 if (on_rq)
4574 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); 4714 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
4575 4715
4576 check_class_changed(rq, p, prev_class, oldprio, running); 4716 check_class_changed(rq, p, prev_class, oldprio);
4577 }
4578 task_rq_unlock(rq, &flags); 4717 task_rq_unlock(rq, &flags);
4579} 4718}
4580 4719
@@ -4822,12 +4961,15 @@ recheck:
4822 param->sched_priority > rlim_rtprio) 4961 param->sched_priority > rlim_rtprio)
4823 return -EPERM; 4962 return -EPERM;
4824 } 4963 }
4964
4825 /* 4965 /*
4826 * Like positive nice levels, dont allow tasks to 4966 * Treat SCHED_IDLE as nice 20. Only allow a switch to
4827 * move out of SCHED_IDLE either: 4967 * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
4828 */ 4968 */
4829 if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) 4969 if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
4830 return -EPERM; 4970 if (!can_nice(p, TASK_NICE(p)))
4971 return -EPERM;
4972 }
4831 4973
4832 /* can't change other user's priorities */ 4974 /* can't change other user's priorities */
4833 if (!check_same_owner(p)) 4975 if (!check_same_owner(p))
@@ -4902,11 +5044,10 @@ recheck:
4902 5044
4903 if (running) 5045 if (running)
4904 p->sched_class->set_curr_task(rq); 5046 p->sched_class->set_curr_task(rq);
4905 if (on_rq) { 5047 if (on_rq)
4906 activate_task(rq, p, 0); 5048 activate_task(rq, p, 0);
4907 5049
4908 check_class_changed(rq, p, prev_class, oldprio, running); 5050 check_class_changed(rq, p, prev_class, oldprio);
4909 }
4910 __task_rq_unlock(rq); 5051 __task_rq_unlock(rq);
4911 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 5052 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4912 5053
@@ -5323,6 +5464,65 @@ void __sched yield(void)
5323} 5464}
5324EXPORT_SYMBOL(yield); 5465EXPORT_SYMBOL(yield);
5325 5466
5467/**
5468 * yield_to - yield the current processor to another thread in
5469 * your thread group, or accelerate that thread toward the
5470 * processor it's on.
5471 *
5472 * It's the caller's job to ensure that the target task struct
5473 * can't go away on us before we can do any checks.
5474 *
5475 * Returns true if we indeed boosted the target task.
5476 */
5477bool __sched yield_to(struct task_struct *p, bool preempt)
5478{
5479 struct task_struct *curr = current;
5480 struct rq *rq, *p_rq;
5481 unsigned long flags;
5482 bool yielded = 0;
5483
5484 local_irq_save(flags);
5485 rq = this_rq();
5486
5487again:
5488 p_rq = task_rq(p);
5489 double_rq_lock(rq, p_rq);
5490 while (task_rq(p) != p_rq) {
5491 double_rq_unlock(rq, p_rq);
5492 goto again;
5493 }
5494
5495 if (!curr->sched_class->yield_to_task)
5496 goto out;
5497
5498 if (curr->sched_class != p->sched_class)
5499 goto out;
5500
5501 if (task_running(p_rq, p) || p->state)
5502 goto out;
5503
5504 yielded = curr->sched_class->yield_to_task(rq, p, preempt);
5505 if (yielded) {
5506 schedstat_inc(rq, yld_count);
5507 /*
5508 * Make p's CPU reschedule; pick_next_entity takes care of
5509 * fairness.
5510 */
5511 if (preempt && rq != p_rq)
5512 resched_task(p_rq->curr);
5513 }
5514
5515out:
5516 double_rq_unlock(rq, p_rq);
5517 local_irq_restore(flags);
5518
5519 if (yielded)
5520 schedule();
5521
5522 return yielded;
5523}
5524EXPORT_SYMBOL_GPL(yield_to);
5525
5326/* 5526/*
5327 * This task is about to go to sleep on IO. Increment rq->nr_iowait so 5527 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
5328 * that process accounting knows that this is a task in IO wait state. 5528 * that process accounting knows that this is a task in IO wait state.
@@ -5571,7 +5771,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5571 * The idle tasks have their own, simple scheduling class: 5771 * The idle tasks have their own, simple scheduling class:
5572 */ 5772 */
5573 idle->sched_class = &idle_sched_class; 5773 idle->sched_class = &idle_sched_class;
5574 ftrace_graph_init_task(idle); 5774 ftrace_graph_init_idle_task(idle, cpu);
5575} 5775}
5576 5776
5577/* 5777/*
@@ -7796,6 +7996,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
7796 INIT_LIST_HEAD(&cfs_rq->tasks); 7996 INIT_LIST_HEAD(&cfs_rq->tasks);
7797#ifdef CONFIG_FAIR_GROUP_SCHED 7997#ifdef CONFIG_FAIR_GROUP_SCHED
7798 cfs_rq->rq = rq; 7998 cfs_rq->rq = rq;
7999 /* allow initial update_cfs_load() to truncate */
8000#ifdef CONFIG_SMP
8001 cfs_rq->load_stamp = 1;
8002#endif
7799#endif 8003#endif
7800 cfs_rq->min_vruntime = (u64)(-(1LL << 20)); 8004 cfs_rq->min_vruntime = (u64)(-(1LL << 20));
7801} 8005}
@@ -8109,6 +8313,8 @@ EXPORT_SYMBOL(__might_sleep);
8109#ifdef CONFIG_MAGIC_SYSRQ 8313#ifdef CONFIG_MAGIC_SYSRQ
8110static void normalize_task(struct rq *rq, struct task_struct *p) 8314static void normalize_task(struct rq *rq, struct task_struct *p)
8111{ 8315{
8316 const struct sched_class *prev_class = p->sched_class;
8317 int old_prio = p->prio;
8112 int on_rq; 8318 int on_rq;
8113 8319
8114 on_rq = p->se.on_rq; 8320 on_rq = p->se.on_rq;
@@ -8119,6 +8325,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
8119 activate_task(rq, p, 0); 8325 activate_task(rq, p, 0);
8120 resched_task(rq->curr); 8326 resched_task(rq->curr);
8121 } 8327 }
8328
8329 check_class_changed(rq, p, prev_class, old_prio);
8122} 8330}
8123 8331
8124void normalize_rt_tasks(void) 8332void normalize_rt_tasks(void)
@@ -8510,7 +8718,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8510 /* Propagate contribution to hierarchy */ 8718 /* Propagate contribution to hierarchy */
8511 raw_spin_lock_irqsave(&rq->lock, flags); 8719 raw_spin_lock_irqsave(&rq->lock, flags);
8512 for_each_sched_entity(se) 8720 for_each_sched_entity(se)
8513 update_cfs_shares(group_cfs_rq(se), 0); 8721 update_cfs_shares(group_cfs_rq(se));
8514 raw_spin_unlock_irqrestore(&rq->lock, flags); 8722 raw_spin_unlock_irqrestore(&rq->lock, flags);
8515 } 8723 }
8516 8724
@@ -8884,7 +9092,8 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
8884} 9092}
8885 9093
8886static void 9094static void
8887cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task) 9095cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
9096 struct cgroup *old_cgrp, struct task_struct *task)
8888{ 9097{
8889 /* 9098 /*
8890 * cgroup_exit() is called in the copy_process() failure path. 9099 * cgroup_exit() is called in the copy_process() failure path.
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 9fb656283157..5946ac515602 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -12,7 +12,6 @@ static atomic_t autogroup_seq_nr;
12static void __init autogroup_init(struct task_struct *init_task) 12static void __init autogroup_init(struct task_struct *init_task)
13{ 13{
14 autogroup_default.tg = &root_task_group; 14 autogroup_default.tg = &root_task_group;
15 root_task_group.autogroup = &autogroup_default;
16 kref_init(&autogroup_default.kref); 15 kref_init(&autogroup_default.kref);
17 init_rwsem(&autogroup_default.lock); 16 init_rwsem(&autogroup_default.lock);
18 init_task->signal->autogroup = &autogroup_default; 17 init_task->signal->autogroup = &autogroup_default;
@@ -130,7 +129,7 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
130 129
131static inline bool task_group_is_autogroup(struct task_group *tg) 130static inline bool task_group_is_autogroup(struct task_group *tg)
132{ 131{
133 return tg != &root_task_group && tg->autogroup; 132 return !!tg->autogroup;
134} 133}
135 134
136static inline struct task_group * 135static inline struct task_group *
@@ -161,11 +160,15 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
161 160
162 p->signal->autogroup = autogroup_kref_get(ag); 161 p->signal->autogroup = autogroup_kref_get(ag);
163 162
163 if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
164 goto out;
165
164 t = p; 166 t = p;
165 do { 167 do {
166 sched_move_task(t); 168 sched_move_task(t);
167 } while_each_thread(p, t); 169 } while_each_thread(p, t);
168 170
171out:
169 unlock_task_sighand(p, &flags); 172 unlock_task_sighand(p, &flags);
170 autogroup_kref_put(prev); 173 autogroup_kref_put(prev);
171} 174}
@@ -247,10 +250,14 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
247{ 250{
248 struct autogroup *ag = autogroup_task_get(p); 251 struct autogroup *ag = autogroup_task_get(p);
249 252
253 if (!task_group_is_autogroup(ag->tg))
254 goto out;
255
250 down_read(&ag->lock); 256 down_read(&ag->lock);
251 seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); 257 seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
252 up_read(&ag->lock); 258 up_read(&ag->lock);
253 259
260out:
254 autogroup_kref_put(ag); 261 autogroup_kref_put(ag);
255} 262}
256#endif /* CONFIG_PROC_FS */ 263#endif /* CONFIG_PROC_FS */
@@ -258,9 +265,7 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
258#ifdef CONFIG_SCHED_DEBUG 265#ifdef CONFIG_SCHED_DEBUG
259static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) 266static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
260{ 267{
261 int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); 268 if (!task_group_is_autogroup(tg))
262
263 if (!enabled || !tg->autogroup)
264 return 0; 269 return 0;
265 270
266 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); 271 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 7b859ffe5dad..05577055cfca 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -1,6 +1,11 @@
1#ifdef CONFIG_SCHED_AUTOGROUP 1#ifdef CONFIG_SCHED_AUTOGROUP
2 2
3struct autogroup { 3struct autogroup {
4 /*
5 * reference doesn't mean how many thread attach to this
6 * autogroup now. It just stands for the number of task
7 * could use this autogroup.
8 */
4 struct kref kref; 9 struct kref kref;
5 struct task_group *tg; 10 struct task_group *tg;
6 struct rw_semaphore lock; 11 struct rw_semaphore lock;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index eb6cb8edd075..7bacd83a4158 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -179,7 +179,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
179 179
180 raw_spin_lock_irqsave(&rq->lock, flags); 180 raw_spin_lock_irqsave(&rq->lock, flags);
181 if (cfs_rq->rb_leftmost) 181 if (cfs_rq->rb_leftmost)
182 MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime; 182 MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
183 last = __pick_last_entity(cfs_rq); 183 last = __pick_last_entity(cfs_rq);
184 if (last) 184 if (last)
185 max_vruntime = last->vruntime; 185 max_vruntime = last->vruntime;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0c26e2df450e..3f7ec9e27ee1 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -69,14 +69,6 @@ static unsigned int sched_nr_latency = 8;
69unsigned int sysctl_sched_child_runs_first __read_mostly; 69unsigned int sysctl_sched_child_runs_first __read_mostly;
70 70
71/* 71/*
72 * sys_sched_yield() compat mode
73 *
74 * This option switches the agressive yield implementation of the
75 * old scheduler back on.
76 */
77unsigned int __read_mostly sysctl_sched_compat_yield;
78
79/*
80 * SCHED_OTHER wake-up granularity. 72 * SCHED_OTHER wake-up granularity.
81 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) 73 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
82 * 74 *
@@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
419 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 411 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
420} 412}
421 413
422static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) 414static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
423{ 415{
424 struct rb_node *left = cfs_rq->rb_leftmost; 416 struct rb_node *left = cfs_rq->rb_leftmost;
425 417
@@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
429 return rb_entry(left, struct sched_entity, run_node); 421 return rb_entry(left, struct sched_entity, run_node);
430} 422}
431 423
424static struct sched_entity *__pick_next_entity(struct sched_entity *se)
425{
426 struct rb_node *next = rb_next(&se->run_node);
427
428 if (!next)
429 return NULL;
430
431 return rb_entry(next, struct sched_entity, run_node);
432}
433
434#ifdef CONFIG_SCHED_DEBUG
432static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) 435static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
433{ 436{
434 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); 437 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
@@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
443 * Scheduling class statistics methods: 446 * Scheduling class statistics methods:
444 */ 447 */
445 448
446#ifdef CONFIG_SCHED_DEBUG
447int sched_proc_update_handler(struct ctl_table *table, int write, 449int sched_proc_update_handler(struct ctl_table *table, int write,
448 void __user *buffer, size_t *lenp, 450 void __user *buffer, size_t *lenp,
449 loff_t *ppos) 451 loff_t *ppos)
@@ -540,7 +542,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
540} 542}
541 543
542static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); 544static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
543static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); 545static void update_cfs_shares(struct cfs_rq *cfs_rq);
544 546
545/* 547/*
546 * Update the current task's runtime statistics. Skip current tasks that 548 * Update the current task's runtime statistics. Skip current tasks that
@@ -733,6 +735,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
733 now - cfs_rq->load_last > 4 * period) { 735 now - cfs_rq->load_last > 4 * period) {
734 cfs_rq->load_period = 0; 736 cfs_rq->load_period = 0;
735 cfs_rq->load_avg = 0; 737 cfs_rq->load_avg = 0;
738 delta = period - 1;
736 } 739 }
737 740
738 cfs_rq->load_stamp = now; 741 cfs_rq->load_stamp = now;
@@ -763,16 +766,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
763 list_del_leaf_cfs_rq(cfs_rq); 766 list_del_leaf_cfs_rq(cfs_rq);
764} 767}
765 768
766static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, 769static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
767 long weight_delta)
768{ 770{
769 long load_weight, load, shares; 771 long load_weight, load, shares;
770 772
771 load = cfs_rq->load.weight + weight_delta; 773 load = cfs_rq->load.weight;
772 774
773 load_weight = atomic_read(&tg->load_weight); 775 load_weight = atomic_read(&tg->load_weight);
774 load_weight -= cfs_rq->load_contribution;
775 load_weight += load; 776 load_weight += load;
777 load_weight -= cfs_rq->load_contribution;
776 778
777 shares = (tg->shares * load); 779 shares = (tg->shares * load);
778 if (load_weight) 780 if (load_weight)
@@ -790,7 +792,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
790{ 792{
791 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { 793 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
792 update_cfs_load(cfs_rq, 0); 794 update_cfs_load(cfs_rq, 0);
793 update_cfs_shares(cfs_rq, 0); 795 update_cfs_shares(cfs_rq);
794 } 796 }
795} 797}
796# else /* CONFIG_SMP */ 798# else /* CONFIG_SMP */
@@ -798,8 +800,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
798{ 800{
799} 801}
800 802
801static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, 803static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
802 long weight_delta)
803{ 804{
804 return tg->shares; 805 return tg->shares;
805} 806}
@@ -824,7 +825,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
824 account_entity_enqueue(cfs_rq, se); 825 account_entity_enqueue(cfs_rq, se);
825} 826}
826 827
827static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) 828static void update_cfs_shares(struct cfs_rq *cfs_rq)
828{ 829{
829 struct task_group *tg; 830 struct task_group *tg;
830 struct sched_entity *se; 831 struct sched_entity *se;
@@ -838,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
838 if (likely(se->load.weight == tg->shares)) 839 if (likely(se->load.weight == tg->shares))
839 return; 840 return;
840#endif 841#endif
841 shares = calc_cfs_shares(cfs_rq, tg, weight_delta); 842 shares = calc_cfs_shares(cfs_rq, tg);
842 843
843 reweight_entity(cfs_rq_of(se), se, shares); 844 reweight_entity(cfs_rq_of(se), se, shares);
844} 845}
@@ -847,7 +848,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
847{ 848{
848} 849}
849 850
850static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) 851static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
851{ 852{
852} 853}
853 854
@@ -978,8 +979,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
978 */ 979 */
979 update_curr(cfs_rq); 980 update_curr(cfs_rq);
980 update_cfs_load(cfs_rq, 0); 981 update_cfs_load(cfs_rq, 0);
981 update_cfs_shares(cfs_rq, se->load.weight);
982 account_entity_enqueue(cfs_rq, se); 982 account_entity_enqueue(cfs_rq, se);
983 update_cfs_shares(cfs_rq);
983 984
984 if (flags & ENQUEUE_WAKEUP) { 985 if (flags & ENQUEUE_WAKEUP) {
985 place_entity(cfs_rq, se, 0); 986 place_entity(cfs_rq, se, 0);
@@ -996,19 +997,49 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
996 list_add_leaf_cfs_rq(cfs_rq); 997 list_add_leaf_cfs_rq(cfs_rq);
997} 998}
998 999
999static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 1000static void __clear_buddies_last(struct sched_entity *se)
1001{
1002 for_each_sched_entity(se) {
1003 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1004 if (cfs_rq->last == se)
1005 cfs_rq->last = NULL;
1006 else
1007 break;
1008 }
1009}
1010
1011static void __clear_buddies_next(struct sched_entity *se)
1000{ 1012{
1001 if (!se || cfs_rq->last == se) 1013 for_each_sched_entity(se) {
1002 cfs_rq->last = NULL; 1014 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1015 if (cfs_rq->next == se)
1016 cfs_rq->next = NULL;
1017 else
1018 break;
1019 }
1020}
1003 1021
1004 if (!se || cfs_rq->next == se) 1022static void __clear_buddies_skip(struct sched_entity *se)
1005 cfs_rq->next = NULL; 1023{
1024 for_each_sched_entity(se) {
1025 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1026 if (cfs_rq->skip == se)
1027 cfs_rq->skip = NULL;
1028 else
1029 break;
1030 }
1006} 1031}
1007 1032
1008static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 1033static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
1009{ 1034{
1010 for_each_sched_entity(se) 1035 if (cfs_rq->last == se)
1011 __clear_buddies(cfs_rq_of(se), se); 1036 __clear_buddies_last(se);
1037
1038 if (cfs_rq->next == se)
1039 __clear_buddies_next(se);
1040
1041 if (cfs_rq->skip == se)
1042 __clear_buddies_skip(se);
1012} 1043}
1013 1044
1014static void 1045static void
@@ -1041,7 +1072,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1041 update_cfs_load(cfs_rq, 0); 1072 update_cfs_load(cfs_rq, 0);
1042 account_entity_dequeue(cfs_rq, se); 1073 account_entity_dequeue(cfs_rq, se);
1043 update_min_vruntime(cfs_rq); 1074 update_min_vruntime(cfs_rq);
1044 update_cfs_shares(cfs_rq, 0); 1075 update_cfs_shares(cfs_rq);
1045 1076
1046 /* 1077 /*
1047 * Normalize the entity after updating the min_vruntime because the 1078 * Normalize the entity after updating the min_vruntime because the
@@ -1084,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
1084 return; 1115 return;
1085 1116
1086 if (cfs_rq->nr_running > 1) { 1117 if (cfs_rq->nr_running > 1) {
1087 struct sched_entity *se = __pick_next_entity(cfs_rq); 1118 struct sched_entity *se = __pick_first_entity(cfs_rq);
1088 s64 delta = curr->vruntime - se->vruntime; 1119 s64 delta = curr->vruntime - se->vruntime;
1089 1120
1090 if (delta < 0) 1121 if (delta < 0)
@@ -1128,13 +1159,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
1128static int 1159static int
1129wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); 1160wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
1130 1161
1162/*
1163 * Pick the next process, keeping these things in mind, in this order:
1164 * 1) keep things fair between processes/task groups
1165 * 2) pick the "next" process, since someone really wants that to run
1166 * 3) pick the "last" process, for cache locality
1167 * 4) do not run the "skip" process, if something else is available
1168 */
1131static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 1169static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
1132{ 1170{
1133 struct sched_entity *se = __pick_next_entity(cfs_rq); 1171 struct sched_entity *se = __pick_first_entity(cfs_rq);
1134 struct sched_entity *left = se; 1172 struct sched_entity *left = se;
1135 1173
1136 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) 1174 /*
1137 se = cfs_rq->next; 1175 * Avoid running the skip buddy, if running something else can
1176 * be done without getting too unfair.
1177 */
1178 if (cfs_rq->skip == se) {
1179 struct sched_entity *second = __pick_next_entity(se);
1180 if (second && wakeup_preempt_entity(second, left) < 1)
1181 se = second;
1182 }
1138 1183
1139 /* 1184 /*
1140 * Prefer last buddy, try to return the CPU to a preempted task. 1185 * Prefer last buddy, try to return the CPU to a preempted task.
@@ -1142,6 +1187,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
1142 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) 1187 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
1143 se = cfs_rq->last; 1188 se = cfs_rq->last;
1144 1189
1190 /*
1191 * Someone really wants this to run. If it's not unfair, run it.
1192 */
1193 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
1194 se = cfs_rq->next;
1195
1145 clear_buddies(cfs_rq, se); 1196 clear_buddies(cfs_rq, se);
1146 1197
1147 return se; 1198 return se;
@@ -1282,7 +1333,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1282 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1333 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1283 1334
1284 update_cfs_load(cfs_rq, 0); 1335 update_cfs_load(cfs_rq, 0);
1285 update_cfs_shares(cfs_rq, 0); 1336 update_cfs_shares(cfs_rq);
1286 } 1337 }
1287 1338
1288 hrtick_update(rq); 1339 hrtick_update(rq);
@@ -1312,58 +1363,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1312 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1363 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1313 1364
1314 update_cfs_load(cfs_rq, 0); 1365 update_cfs_load(cfs_rq, 0);
1315 update_cfs_shares(cfs_rq, 0); 1366 update_cfs_shares(cfs_rq);
1316 } 1367 }
1317 1368
1318 hrtick_update(rq); 1369 hrtick_update(rq);
1319} 1370}
1320 1371
1321/*
1322 * sched_yield() support is very simple - we dequeue and enqueue.
1323 *
1324 * If compat_yield is turned on then we requeue to the end of the tree.
1325 */
1326static void yield_task_fair(struct rq *rq)
1327{
1328 struct task_struct *curr = rq->curr;
1329 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1330 struct sched_entity *rightmost, *se = &curr->se;
1331
1332 /*
1333 * Are we the only task in the tree?
1334 */
1335 if (unlikely(cfs_rq->nr_running == 1))
1336 return;
1337
1338 clear_buddies(cfs_rq, se);
1339
1340 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
1341 update_rq_clock(rq);
1342 /*
1343 * Update run-time statistics of the 'current'.
1344 */
1345 update_curr(cfs_rq);
1346
1347 return;
1348 }
1349 /*
1350 * Find the rightmost entry in the rbtree:
1351 */
1352 rightmost = __pick_last_entity(cfs_rq);
1353 /*
1354 * Already in the rightmost position?
1355 */
1356 if (unlikely(!rightmost || entity_before(rightmost, se)))
1357 return;
1358
1359 /*
1360 * Minimally necessary key value to be last in the tree:
1361 * Upon rescheduling, sched_class::put_prev_task() will place
1362 * 'current' within the tree based on its new key value.
1363 */
1364 se->vruntime = rightmost->vruntime + 1;
1365}
1366
1367#ifdef CONFIG_SMP 1372#ifdef CONFIG_SMP
1368 1373
1369static void task_waking_fair(struct rq *rq, struct task_struct *p) 1374static void task_waking_fair(struct rq *rq, struct task_struct *p)
@@ -1834,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se)
1834 } 1839 }
1835} 1840}
1836 1841
1842static void set_skip_buddy(struct sched_entity *se)
1843{
1844 if (likely(task_of(se)->policy != SCHED_IDLE)) {
1845 for_each_sched_entity(se)
1846 cfs_rq_of(se)->skip = se;
1847 }
1848}
1849
1837/* 1850/*
1838 * Preempt the current task with a newly woken task if needed: 1851 * Preempt the current task with a newly woken task if needed:
1839 */ 1852 */
@@ -1857,16 +1870,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1857 if (test_tsk_need_resched(curr)) 1870 if (test_tsk_need_resched(curr))
1858 return; 1871 return;
1859 1872
1873 /* Idle tasks are by definition preempted by non-idle tasks. */
1874 if (unlikely(curr->policy == SCHED_IDLE) &&
1875 likely(p->policy != SCHED_IDLE))
1876 goto preempt;
1877
1860 /* 1878 /*
1861 * Batch and idle tasks do not preempt (their preemption is driven by 1879 * Batch and idle tasks do not preempt non-idle tasks (their preemption
1862 * the tick): 1880 * is driven by the tick):
1863 */ 1881 */
1864 if (unlikely(p->policy != SCHED_NORMAL)) 1882 if (unlikely(p->policy != SCHED_NORMAL))
1865 return; 1883 return;
1866 1884
1867 /* Idle tasks are by definition preempted by everybody. */
1868 if (unlikely(curr->policy == SCHED_IDLE))
1869 goto preempt;
1870 1885
1871 if (!sched_feat(WAKEUP_PREEMPT)) 1886 if (!sched_feat(WAKEUP_PREEMPT))
1872 return; 1887 return;
@@ -1932,6 +1947,51 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
1932 } 1947 }
1933} 1948}
1934 1949
1950/*
1951 * sched_yield() is very simple
1952 *
1953 * The magic of dealing with the ->skip buddy is in pick_next_entity.
1954 */
1955static void yield_task_fair(struct rq *rq)
1956{
1957 struct task_struct *curr = rq->curr;
1958 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1959 struct sched_entity *se = &curr->se;
1960
1961 /*
1962 * Are we the only task in the tree?
1963 */
1964 if (unlikely(rq->nr_running == 1))
1965 return;
1966
1967 clear_buddies(cfs_rq, se);
1968
1969 if (curr->policy != SCHED_BATCH) {
1970 update_rq_clock(rq);
1971 /*
1972 * Update run-time statistics of the 'current'.
1973 */
1974 update_curr(cfs_rq);
1975 }
1976
1977 set_skip_buddy(se);
1978}
1979
1980static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt)
1981{
1982 struct sched_entity *se = &p->se;
1983
1984 if (!se->on_rq)
1985 return false;
1986
1987 /* Tell the scheduler that we'd really like pse to run next. */
1988 set_next_buddy(se);
1989
1990 yield_task_fair(rq);
1991
1992 return true;
1993}
1994
1935#ifdef CONFIG_SMP 1995#ifdef CONFIG_SMP
1936/************************************************** 1996/**************************************************
1937 * Fair scheduling class load-balancing methods: 1997 * Fair scheduling class load-balancing methods:
@@ -2123,7 +2183,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)
2123 * We need to update shares after updating tg->load_weight in 2183 * We need to update shares after updating tg->load_weight in
2124 * order to adjust the weight of groups with long running tasks. 2184 * order to adjust the weight of groups with long running tasks.
2125 */ 2185 */
2126 update_cfs_shares(cfs_rq, 0); 2186 update_cfs_shares(cfs_rq);
2127 2187
2128 raw_spin_unlock_irqrestore(&rq->lock, flags); 2188 raw_spin_unlock_irqrestore(&rq->lock, flags);
2129 2189
@@ -2610,7 +2670,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2610 * @this_cpu: Cpu for which load balance is currently performed. 2670 * @this_cpu: Cpu for which load balance is currently performed.
2611 * @idle: Idle status of this_cpu 2671 * @idle: Idle status of this_cpu
2612 * @load_idx: Load index of sched_domain of this_cpu for load calc. 2672 * @load_idx: Load index of sched_domain of this_cpu for load calc.
2613 * @sd_idle: Idle status of the sched_domain containing group.
2614 * @local_group: Does group contain this_cpu. 2673 * @local_group: Does group contain this_cpu.
2615 * @cpus: Set of cpus considered for load balancing. 2674 * @cpus: Set of cpus considered for load balancing.
2616 * @balance: Should we balance. 2675 * @balance: Should we balance.
@@ -2618,7 +2677,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2618 */ 2677 */
2619static inline void update_sg_lb_stats(struct sched_domain *sd, 2678static inline void update_sg_lb_stats(struct sched_domain *sd,
2620 struct sched_group *group, int this_cpu, 2679 struct sched_group *group, int this_cpu,
2621 enum cpu_idle_type idle, int load_idx, int *sd_idle, 2680 enum cpu_idle_type idle, int load_idx,
2622 int local_group, const struct cpumask *cpus, 2681 int local_group, const struct cpumask *cpus,
2623 int *balance, struct sg_lb_stats *sgs) 2682 int *balance, struct sg_lb_stats *sgs)
2624{ 2683{
@@ -2638,9 +2697,6 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2638 for_each_cpu_and(i, sched_group_cpus(group), cpus) { 2697 for_each_cpu_and(i, sched_group_cpus(group), cpus) {
2639 struct rq *rq = cpu_rq(i); 2698 struct rq *rq = cpu_rq(i);
2640 2699
2641 if (*sd_idle && rq->nr_running)
2642 *sd_idle = 0;
2643
2644 /* Bias balancing toward cpus of our domain */ 2700 /* Bias balancing toward cpus of our domain */
2645 if (local_group) { 2701 if (local_group) {
2646 if (idle_cpu(i) && !first_idle_cpu) { 2702 if (idle_cpu(i) && !first_idle_cpu) {
@@ -2685,7 +2741,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2685 2741
2686 /* 2742 /*
2687 * Consider the group unbalanced when the imbalance is larger 2743 * Consider the group unbalanced when the imbalance is larger
2688 * than the average weight of two tasks. 2744 * than the average weight of a task.
2689 * 2745 *
2690 * APZ: with cgroup the avg task weight can vary wildly and 2746 * APZ: with cgroup the avg task weight can vary wildly and
2691 * might not be a suitable number - should we keep a 2747 * might not be a suitable number - should we keep a
@@ -2695,7 +2751,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2695 if (sgs->sum_nr_running) 2751 if (sgs->sum_nr_running)
2696 avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; 2752 avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
2697 2753
2698 if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1) 2754 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
2699 sgs->group_imb = 1; 2755 sgs->group_imb = 1;
2700 2756
2701 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2757 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
@@ -2755,15 +2811,13 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
2755 * @sd: sched_domain whose statistics are to be updated. 2811 * @sd: sched_domain whose statistics are to be updated.
2756 * @this_cpu: Cpu for which load balance is currently performed. 2812 * @this_cpu: Cpu for which load balance is currently performed.
2757 * @idle: Idle status of this_cpu 2813 * @idle: Idle status of this_cpu
2758 * @sd_idle: Idle status of the sched_domain containing sg.
2759 * @cpus: Set of cpus considered for load balancing. 2814 * @cpus: Set of cpus considered for load balancing.
2760 * @balance: Should we balance. 2815 * @balance: Should we balance.
2761 * @sds: variable to hold the statistics for this sched_domain. 2816 * @sds: variable to hold the statistics for this sched_domain.
2762 */ 2817 */
2763static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, 2818static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2764 enum cpu_idle_type idle, int *sd_idle, 2819 enum cpu_idle_type idle, const struct cpumask *cpus,
2765 const struct cpumask *cpus, int *balance, 2820 int *balance, struct sd_lb_stats *sds)
2766 struct sd_lb_stats *sds)
2767{ 2821{
2768 struct sched_domain *child = sd->child; 2822 struct sched_domain *child = sd->child;
2769 struct sched_group *sg = sd->groups; 2823 struct sched_group *sg = sd->groups;
@@ -2781,7 +2835,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2781 2835
2782 local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg)); 2836 local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
2783 memset(&sgs, 0, sizeof(sgs)); 2837 memset(&sgs, 0, sizeof(sgs));
2784 update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle, 2838 update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx,
2785 local_group, cpus, balance, &sgs); 2839 local_group, cpus, balance, &sgs);
2786 2840
2787 if (local_group && !(*balance)) 2841 if (local_group && !(*balance))
@@ -3033,7 +3087,6 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3033 * @imbalance: Variable which stores amount of weighted load which should 3087 * @imbalance: Variable which stores amount of weighted load which should
3034 * be moved to restore balance/put a group to idle. 3088 * be moved to restore balance/put a group to idle.
3035 * @idle: The idle status of this_cpu. 3089 * @idle: The idle status of this_cpu.
3036 * @sd_idle: The idleness of sd
3037 * @cpus: The set of CPUs under consideration for load-balancing. 3090 * @cpus: The set of CPUs under consideration for load-balancing.
3038 * @balance: Pointer to a variable indicating if this_cpu 3091 * @balance: Pointer to a variable indicating if this_cpu
3039 * is the appropriate cpu to perform load balancing at this_level. 3092 * is the appropriate cpu to perform load balancing at this_level.
@@ -3046,7 +3099,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3046static struct sched_group * 3099static struct sched_group *
3047find_busiest_group(struct sched_domain *sd, int this_cpu, 3100find_busiest_group(struct sched_domain *sd, int this_cpu,
3048 unsigned long *imbalance, enum cpu_idle_type idle, 3101 unsigned long *imbalance, enum cpu_idle_type idle,
3049 int *sd_idle, const struct cpumask *cpus, int *balance) 3102 const struct cpumask *cpus, int *balance)
3050{ 3103{
3051 struct sd_lb_stats sds; 3104 struct sd_lb_stats sds;
3052 3105
@@ -3056,22 +3109,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3056 * Compute the various statistics relavent for load balancing at 3109 * Compute the various statistics relavent for load balancing at
3057 * this level. 3110 * this level.
3058 */ 3111 */
3059 update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus, 3112 update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds);
3060 balance, &sds); 3113
3061 3114 /*
3062 /* Cases where imbalance does not exist from POV of this_cpu */ 3115 * this_cpu is not the appropriate cpu to perform load balancing at
3063 /* 1) this_cpu is not the appropriate cpu to perform load balancing 3116 * this level.
3064 * at this level.
3065 * 2) There is no busy sibling group to pull from.
3066 * 3) This group is the busiest group.
3067 * 4) This group is more busy than the avg busieness at this
3068 * sched_domain.
3069 * 5) The imbalance is within the specified limit.
3070 *
3071 * Note: when doing newidle balance, if the local group has excess
3072 * capacity (i.e. nr_running < group_capacity) and the busiest group
3073 * does not have any capacity, we force a load balance to pull tasks
3074 * to the local group. In this case, we skip past checks 3, 4 and 5.
3075 */ 3117 */
3076 if (!(*balance)) 3118 if (!(*balance))
3077 goto ret; 3119 goto ret;
@@ -3080,41 +3122,55 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3080 check_asym_packing(sd, &sds, this_cpu, imbalance)) 3122 check_asym_packing(sd, &sds, this_cpu, imbalance))
3081 return sds.busiest; 3123 return sds.busiest;
3082 3124
3125 /* There is no busy sibling group to pull tasks from */
3083 if (!sds.busiest || sds.busiest_nr_running == 0) 3126 if (!sds.busiest || sds.busiest_nr_running == 0)
3084 goto out_balanced; 3127 goto out_balanced;
3085 3128
3086 /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ 3129 /*
3130 * If the busiest group is imbalanced the below checks don't
3131 * work because they assumes all things are equal, which typically
3132 * isn't true due to cpus_allowed constraints and the like.
3133 */
3134 if (sds.group_imb)
3135 goto force_balance;
3136
3137 /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
3087 if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity && 3138 if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
3088 !sds.busiest_has_capacity) 3139 !sds.busiest_has_capacity)
3089 goto force_balance; 3140 goto force_balance;
3090 3141
3142 /*
3143 * If the local group is more busy than the selected busiest group
3144 * don't try and pull any tasks.
3145 */
3091 if (sds.this_load >= sds.max_load) 3146 if (sds.this_load >= sds.max_load)
3092 goto out_balanced; 3147 goto out_balanced;
3093 3148
3149 /*
3150 * Don't pull any tasks if this group is already above the domain
3151 * average load.
3152 */
3094 sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; 3153 sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
3095
3096 if (sds.this_load >= sds.avg_load) 3154 if (sds.this_load >= sds.avg_load)
3097 goto out_balanced; 3155 goto out_balanced;
3098 3156
3099 /* 3157 if (idle == CPU_IDLE) {
3100 * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
3101 * And to check for busy balance use !idle_cpu instead of
3102 * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
3103 * even when they are idle.
3104 */
3105 if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
3106 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
3107 goto out_balanced;
3108 } else {
3109 /* 3158 /*
3110 * This cpu is idle. If the busiest group load doesn't 3159 * This cpu is idle. If the busiest group load doesn't
3111 * have more tasks than the number of available cpu's and 3160 * have more tasks than the number of available cpu's and
3112 * there is no imbalance between this and busiest group 3161 * there is no imbalance between this and busiest group
3113 * wrt to idle cpu's, it is balanced. 3162 * wrt to idle cpu's, it is balanced.
3114 */ 3163 */
3115 if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && 3164 if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
3116 sds.busiest_nr_running <= sds.busiest_group_weight) 3165 sds.busiest_nr_running <= sds.busiest_group_weight)
3117 goto out_balanced; 3166 goto out_balanced;
3167 } else {
3168 /*
3169 * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use
3170 * imbalance_pct to be conservative.
3171 */
3172 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
3173 goto out_balanced;
3118 } 3174 }
3119 3175
3120force_balance: 3176force_balance:
@@ -3193,7 +3249,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
3193/* Working cpumask for load_balance and load_balance_newidle. */ 3249/* Working cpumask for load_balance and load_balance_newidle. */
3194static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); 3250static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
3195 3251
3196static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle, 3252static int need_active_balance(struct sched_domain *sd, int idle,
3197 int busiest_cpu, int this_cpu) 3253 int busiest_cpu, int this_cpu)
3198{ 3254{
3199 if (idle == CPU_NEWLY_IDLE) { 3255 if (idle == CPU_NEWLY_IDLE) {
@@ -3225,10 +3281,6 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
3225 * move_tasks() will succeed. ld_moved will be true and this 3281 * move_tasks() will succeed. ld_moved will be true and this
3226 * active balance code will not be triggered. 3282 * active balance code will not be triggered.
3227 */ 3283 */
3228 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3229 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3230 return 0;
3231
3232 if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) 3284 if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
3233 return 0; 3285 return 0;
3234 } 3286 }
@@ -3246,7 +3298,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3246 struct sched_domain *sd, enum cpu_idle_type idle, 3298 struct sched_domain *sd, enum cpu_idle_type idle,
3247 int *balance) 3299 int *balance)
3248{ 3300{
3249 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 3301 int ld_moved, all_pinned = 0, active_balance = 0;
3250 struct sched_group *group; 3302 struct sched_group *group;
3251 unsigned long imbalance; 3303 unsigned long imbalance;
3252 struct rq *busiest; 3304 struct rq *busiest;
@@ -3255,20 +3307,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3255 3307
3256 cpumask_copy(cpus, cpu_active_mask); 3308 cpumask_copy(cpus, cpu_active_mask);
3257 3309
3258 /*
3259 * When power savings policy is enabled for the parent domain, idle
3260 * sibling can pick up load irrespective of busy siblings. In this case,
3261 * let the state of idle sibling percolate up as CPU_IDLE, instead of
3262 * portraying it as CPU_NOT_IDLE.
3263 */
3264 if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
3265 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3266 sd_idle = 1;
3267
3268 schedstat_inc(sd, lb_count[idle]); 3310 schedstat_inc(sd, lb_count[idle]);
3269 3311
3270redo: 3312redo:
3271 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, 3313 group = find_busiest_group(sd, this_cpu, &imbalance, idle,
3272 cpus, balance); 3314 cpus, balance);
3273 3315
3274 if (*balance == 0) 3316 if (*balance == 0)
@@ -3330,8 +3372,7 @@ redo:
3330 if (idle != CPU_NEWLY_IDLE) 3372 if (idle != CPU_NEWLY_IDLE)
3331 sd->nr_balance_failed++; 3373 sd->nr_balance_failed++;
3332 3374
3333 if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest), 3375 if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) {
3334 this_cpu)) {
3335 raw_spin_lock_irqsave(&busiest->lock, flags); 3376 raw_spin_lock_irqsave(&busiest->lock, flags);
3336 3377
3337 /* don't kick the active_load_balance_cpu_stop, 3378 /* don't kick the active_load_balance_cpu_stop,
@@ -3386,10 +3427,6 @@ redo:
3386 sd->balance_interval *= 2; 3427 sd->balance_interval *= 2;
3387 } 3428 }
3388 3429
3389 if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3390 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3391 ld_moved = -1;
3392
3393 goto out; 3430 goto out;
3394 3431
3395out_balanced: 3432out_balanced:
@@ -3403,11 +3440,7 @@ out_one_pinned:
3403 (sd->balance_interval < sd->max_interval)) 3440 (sd->balance_interval < sd->max_interval))
3404 sd->balance_interval *= 2; 3441 sd->balance_interval *= 2;
3405 3442
3406 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 3443 ld_moved = 0;
3407 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3408 ld_moved = -1;
3409 else
3410 ld_moved = 0;
3411out: 3444out:
3412 return ld_moved; 3445 return ld_moved;
3413} 3446}
@@ -3831,8 +3864,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3831 if (load_balance(cpu, rq, sd, idle, &balance)) { 3864 if (load_balance(cpu, rq, sd, idle, &balance)) {
3832 /* 3865 /*
3833 * We've pulled tasks over so either we're no 3866 * We've pulled tasks over so either we're no
3834 * longer idle, or one of our SMT siblings is 3867 * longer idle.
3835 * not idle.
3836 */ 3868 */
3837 idle = CPU_NOT_IDLE; 3869 idle = CPU_NOT_IDLE;
3838 } 3870 }
@@ -4079,33 +4111,62 @@ static void task_fork_fair(struct task_struct *p)
4079 * Priority of the task has changed. Check to see if we preempt 4111 * Priority of the task has changed. Check to see if we preempt
4080 * the current task. 4112 * the current task.
4081 */ 4113 */
4082static void prio_changed_fair(struct rq *rq, struct task_struct *p, 4114static void
4083 int oldprio, int running) 4115prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
4084{ 4116{
4117 if (!p->se.on_rq)
4118 return;
4119
4085 /* 4120 /*
4086 * Reschedule if we are currently running on this runqueue and 4121 * Reschedule if we are currently running on this runqueue and
4087 * our priority decreased, or if we are not currently running on 4122 * our priority decreased, or if we are not currently running on
4088 * this runqueue and our priority is higher than the current's 4123 * this runqueue and our priority is higher than the current's
4089 */ 4124 */
4090 if (running) { 4125 if (rq->curr == p) {
4091 if (p->prio > oldprio) 4126 if (p->prio > oldprio)
4092 resched_task(rq->curr); 4127 resched_task(rq->curr);
4093 } else 4128 } else
4094 check_preempt_curr(rq, p, 0); 4129 check_preempt_curr(rq, p, 0);
4095} 4130}
4096 4131
4132static void switched_from_fair(struct rq *rq, struct task_struct *p)
4133{
4134 struct sched_entity *se = &p->se;
4135 struct cfs_rq *cfs_rq = cfs_rq_of(se);
4136
4137 /*
4138 * Ensure the task's vruntime is normalized, so that when its
4139 * switched back to the fair class the enqueue_entity(.flags=0) will
4140 * do the right thing.
4141 *
4142 * If it was on_rq, then the dequeue_entity(.flags=0) will already
4143 * have normalized the vruntime, if it was !on_rq, then only when
4144 * the task is sleeping will it still have non-normalized vruntime.
4145 */
4146 if (!se->on_rq && p->state != TASK_RUNNING) {
4147 /*
4148 * Fix up our vruntime so that the current sleep doesn't
4149 * cause 'unlimited' sleep bonus.
4150 */
4151 place_entity(cfs_rq, se, 0);
4152 se->vruntime -= cfs_rq->min_vruntime;
4153 }
4154}
4155
4097/* 4156/*
4098 * We switched to the sched_fair class. 4157 * We switched to the sched_fair class.
4099 */ 4158 */
4100static void switched_to_fair(struct rq *rq, struct task_struct *p, 4159static void switched_to_fair(struct rq *rq, struct task_struct *p)
4101 int running)
4102{ 4160{
4161 if (!p->se.on_rq)
4162 return;
4163
4103 /* 4164 /*
4104 * We were most likely switched from sched_rt, so 4165 * We were most likely switched from sched_rt, so
4105 * kick off the schedule if running, otherwise just see 4166 * kick off the schedule if running, otherwise just see
4106 * if we can still preempt the current task. 4167 * if we can still preempt the current task.
4107 */ 4168 */
4108 if (running) 4169 if (rq->curr == p)
4109 resched_task(rq->curr); 4170 resched_task(rq->curr);
4110 else 4171 else
4111 check_preempt_curr(rq, p, 0); 4172 check_preempt_curr(rq, p, 0);
@@ -4171,6 +4232,7 @@ static const struct sched_class fair_sched_class = {
4171 .enqueue_task = enqueue_task_fair, 4232 .enqueue_task = enqueue_task_fair,
4172 .dequeue_task = dequeue_task_fair, 4233 .dequeue_task = dequeue_task_fair,
4173 .yield_task = yield_task_fair, 4234 .yield_task = yield_task_fair,
4235 .yield_to_task = yield_to_task_fair,
4174 4236
4175 .check_preempt_curr = check_preempt_wakeup, 4237 .check_preempt_curr = check_preempt_wakeup,
4176 4238
@@ -4191,6 +4253,7 @@ static const struct sched_class fair_sched_class = {
4191 .task_fork = task_fork_fair, 4253 .task_fork = task_fork_fair,
4192 4254
4193 .prio_changed = prio_changed_fair, 4255 .prio_changed = prio_changed_fair,
4256 .switched_from = switched_from_fair,
4194 .switched_to = switched_to_fair, 4257 .switched_to = switched_to_fair,
4195 4258
4196 .get_rr_interval = get_rr_interval_fair, 4259 .get_rr_interval = get_rr_interval_fair,
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 9fa0f402c87c..c82f26c1b7c3 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -52,31 +52,15 @@ static void set_curr_task_idle(struct rq *rq)
52{ 52{
53} 53}
54 54
55static void switched_to_idle(struct rq *rq, struct task_struct *p, 55static void switched_to_idle(struct rq *rq, struct task_struct *p)
56 int running)
57{ 56{
58 /* Can this actually happen?? */ 57 BUG();
59 if (running)
60 resched_task(rq->curr);
61 else
62 check_preempt_curr(rq, p, 0);
63} 58}
64 59
65static void prio_changed_idle(struct rq *rq, struct task_struct *p, 60static void
66 int oldprio, int running) 61prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
67{ 62{
68 /* This can happen for hot plug CPUS */ 63 BUG();
69
70 /*
71 * Reschedule if we are currently running on this runqueue and
72 * our priority decreased, or if we are not currently running on
73 * this runqueue and our priority is higher than the current's
74 */
75 if (running) {
76 if (p->prio > oldprio)
77 resched_task(rq->curr);
78 } else
79 check_preempt_curr(rq, p, 0);
80} 64}
81 65
82static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) 66static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index ad6267714c84..db308cb08b75 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -210,11 +210,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
210 210
211static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 211static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
212{ 212{
213 int this_cpu = smp_processor_id();
214 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 213 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
215 struct sched_rt_entity *rt_se; 214 struct sched_rt_entity *rt_se;
216 215
217 rt_se = rt_rq->tg->rt_se[this_cpu]; 216 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
217
218 rt_se = rt_rq->tg->rt_se[cpu];
218 219
219 if (rt_rq->rt_nr_running) { 220 if (rt_rq->rt_nr_running) {
220 if (rt_se && !on_rt_rq(rt_se)) 221 if (rt_se && !on_rt_rq(rt_se))
@@ -226,10 +227,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
226 227
227static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 228static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
228{ 229{
229 int this_cpu = smp_processor_id();
230 struct sched_rt_entity *rt_se; 230 struct sched_rt_entity *rt_se;
231 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
231 232
232 rt_se = rt_rq->tg->rt_se[this_cpu]; 233 rt_se = rt_rq->tg->rt_se[cpu];
233 234
234 if (rt_se && on_rt_rq(rt_se)) 235 if (rt_se && on_rt_rq(rt_se))
235 dequeue_rt_entity(rt_se); 236 dequeue_rt_entity(rt_se);
@@ -565,8 +566,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
565 if (rt_rq->rt_time || rt_rq->rt_nr_running) 566 if (rt_rq->rt_time || rt_rq->rt_nr_running)
566 idle = 0; 567 idle = 0;
567 raw_spin_unlock(&rt_rq->rt_runtime_lock); 568 raw_spin_unlock(&rt_rq->rt_runtime_lock);
568 } else if (rt_rq->rt_nr_running) 569 } else if (rt_rq->rt_nr_running) {
569 idle = 0; 570 idle = 0;
571 if (!rt_rq_throttled(rt_rq))
572 enqueue = 1;
573 }
570 574
571 if (enqueue) 575 if (enqueue)
572 sched_rt_rq_enqueue(rt_rq); 576 sched_rt_rq_enqueue(rt_rq);
@@ -1595,8 +1599,7 @@ static void rq_offline_rt(struct rq *rq)
1595 * When switch from the rt queue, we bring ourselves to a position 1599 * When switch from the rt queue, we bring ourselves to a position
1596 * that we might want to pull RT tasks from other runqueues. 1600 * that we might want to pull RT tasks from other runqueues.
1597 */ 1601 */
1598static void switched_from_rt(struct rq *rq, struct task_struct *p, 1602static void switched_from_rt(struct rq *rq, struct task_struct *p)
1599 int running)
1600{ 1603{
1601 /* 1604 /*
1602 * If there are other RT tasks then we will reschedule 1605 * If there are other RT tasks then we will reschedule
@@ -1605,7 +1608,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
1605 * we may need to handle the pulling of RT tasks 1608 * we may need to handle the pulling of RT tasks
1606 * now. 1609 * now.
1607 */ 1610 */
1608 if (!rq->rt.rt_nr_running) 1611 if (p->se.on_rq && !rq->rt.rt_nr_running)
1609 pull_rt_task(rq); 1612 pull_rt_task(rq);
1610} 1613}
1611 1614
@@ -1624,8 +1627,7 @@ static inline void init_sched_rt_class(void)
1624 * with RT tasks. In this case we try to push them off to 1627 * with RT tasks. In this case we try to push them off to
1625 * other runqueues. 1628 * other runqueues.
1626 */ 1629 */
1627static void switched_to_rt(struct rq *rq, struct task_struct *p, 1630static void switched_to_rt(struct rq *rq, struct task_struct *p)
1628 int running)
1629{ 1631{
1630 int check_resched = 1; 1632 int check_resched = 1;
1631 1633
@@ -1636,7 +1638,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
1636 * If that current running task is also an RT task 1638 * If that current running task is also an RT task
1637 * then see if we can move to another run queue. 1639 * then see if we can move to another run queue.
1638 */ 1640 */
1639 if (!running) { 1641 if (p->se.on_rq && rq->curr != p) {
1640#ifdef CONFIG_SMP 1642#ifdef CONFIG_SMP
1641 if (rq->rt.overloaded && push_rt_task(rq) && 1643 if (rq->rt.overloaded && push_rt_task(rq) &&
1642 /* Don't resched if we changed runqueues */ 1644 /* Don't resched if we changed runqueues */
@@ -1652,10 +1654,13 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
1652 * Priority of the task has changed. This may cause 1654 * Priority of the task has changed. This may cause
1653 * us to initiate a push or pull. 1655 * us to initiate a push or pull.
1654 */ 1656 */
1655static void prio_changed_rt(struct rq *rq, struct task_struct *p, 1657static void
1656 int oldprio, int running) 1658prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
1657{ 1659{
1658 if (running) { 1660 if (!p->se.on_rq)
1661 return;
1662
1663 if (rq->curr == p) {
1659#ifdef CONFIG_SMP 1664#ifdef CONFIG_SMP
1660 /* 1665 /*
1661 * If our priority decreases while running, we 1666 * If our priority decreases while running, we
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 2bf6b47058c1..84ec9bcf82d9 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -59,14 +59,13 @@ static void set_curr_task_stop(struct rq *rq)
59{ 59{
60} 60}
61 61
62static void switched_to_stop(struct rq *rq, struct task_struct *p, 62static void switched_to_stop(struct rq *rq, struct task_struct *p)
63 int running)
64{ 63{
65 BUG(); /* its impossible to change to this class */ 64 BUG(); /* its impossible to change to this class */
66} 65}
67 66
68static void prio_changed_stop(struct rq *rq, struct task_struct *p, 67static void
69 int oldprio, int running) 68prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio)
70{ 69{
71 BUG(); /* how!?, what priority? */ 70 BUG(); /* how!?, what priority? */
72} 71}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 68eb5efec388..56e5dec837f0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(irq_stat);
54 54
55static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; 55static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
56 56
57static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); 57DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58 58
59char *softirq_to_name[NR_SOFTIRQS] = { 59char *softirq_to_name[NR_SOFTIRQS] = {
60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", 60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
@@ -311,9 +311,21 @@ void irq_enter(void)
311} 311}
312 312
313#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 313#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
314# define invoke_softirq() __do_softirq() 314static inline void invoke_softirq(void)
315{
316 if (!force_irqthreads)
317 __do_softirq();
318 else
319 wakeup_softirqd();
320}
315#else 321#else
316# define invoke_softirq() do_softirq() 322static inline void invoke_softirq(void)
323{
324 if (!force_irqthreads)
325 do_softirq();
326 else
327 wakeup_softirqd();
328}
317#endif 329#endif
318 330
319/* 331/*
@@ -721,7 +733,6 @@ static int run_ksoftirqd(void * __bind_cpu)
721{ 733{
722 set_current_state(TASK_INTERRUPTIBLE); 734 set_current_state(TASK_INTERRUPTIBLE);
723 735
724 current->flags |= PF_KSOFTIRQD;
725 while (!kthread_should_stop()) { 736 while (!kthread_should_stop()) {
726 preempt_disable(); 737 preempt_disable();
727 if (!local_softirq_pending()) { 738 if (!local_softirq_pending()) {
@@ -738,7 +749,10 @@ static int run_ksoftirqd(void * __bind_cpu)
738 don't process */ 749 don't process */
739 if (cpu_is_offline((long)__bind_cpu)) 750 if (cpu_is_offline((long)__bind_cpu))
740 goto wait_to_die; 751 goto wait_to_die;
741 do_softirq(); 752 local_irq_disable();
753 if (local_softirq_pending())
754 __do_softirq();
755 local_irq_enable();
742 preempt_enable_no_resched(); 756 preempt_enable_no_resched();
743 cond_resched(); 757 cond_resched();
744 preempt_disable(); 758 preempt_disable();
diff --git a/kernel/sys.c b/kernel/sys.c
index 18da702ec813..1ad48b3b9068 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -37,6 +37,7 @@
37#include <linux/ptrace.h> 37#include <linux/ptrace.h>
38#include <linux/fs_struct.h> 38#include <linux/fs_struct.h>
39#include <linux/gfp.h> 39#include <linux/gfp.h>
40#include <linux/syscore_ops.h>
40 41
41#include <linux/compat.h> 42#include <linux/compat.h>
42#include <linux/syscalls.h> 43#include <linux/syscalls.h>
@@ -298,6 +299,7 @@ void kernel_restart_prepare(char *cmd)
298 system_state = SYSTEM_RESTART; 299 system_state = SYSTEM_RESTART;
299 device_shutdown(); 300 device_shutdown();
300 sysdev_shutdown(); 301 sysdev_shutdown();
302 syscore_shutdown();
301} 303}
302 304
303/** 305/**
@@ -336,6 +338,7 @@ void kernel_halt(void)
336{ 338{
337 kernel_shutdown_prepare(SYSTEM_HALT); 339 kernel_shutdown_prepare(SYSTEM_HALT);
338 sysdev_shutdown(); 340 sysdev_shutdown();
341 syscore_shutdown();
339 printk(KERN_EMERG "System halted.\n"); 342 printk(KERN_EMERG "System halted.\n");
340 kmsg_dump(KMSG_DUMP_HALT); 343 kmsg_dump(KMSG_DUMP_HALT);
341 machine_halt(); 344 machine_halt();
@@ -355,6 +358,7 @@ void kernel_power_off(void)
355 pm_power_off_prepare(); 358 pm_power_off_prepare();
356 disable_nonboot_cpus(); 359 disable_nonboot_cpus();
357 sysdev_shutdown(); 360 sysdev_shutdown();
361 syscore_shutdown();
358 printk(KERN_EMERG "Power down.\n"); 362 printk(KERN_EMERG "Power down.\n");
359 kmsg_dump(KMSG_DUMP_POWEROFF); 363 kmsg_dump(KMSG_DUMP_POWEROFF);
360 machine_power_off(); 364 machine_power_off();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c782fe9924c7..25cc41cd8f33 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -186,3 +186,8 @@ cond_syscall(sys_perf_event_open);
186/* fanotify! */ 186/* fanotify! */
187cond_syscall(sys_fanotify_init); 187cond_syscall(sys_fanotify_init);
188cond_syscall(sys_fanotify_mark); 188cond_syscall(sys_fanotify_mark);
189
190/* open by handle */
191cond_syscall(sys_name_to_handle_at);
192cond_syscall(sys_open_by_handle_at);
193cond_syscall(compat_sys_open_by_handle_at);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0f1bd83db985..40245d697602 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -194,9 +194,9 @@ static int sysrq_sysctl_handler(ctl_table *table, int write,
194static struct ctl_table root_table[]; 194static struct ctl_table root_table[];
195static struct ctl_table_root sysctl_table_root; 195static struct ctl_table_root sysctl_table_root;
196static struct ctl_table_header root_table_header = { 196static struct ctl_table_header root_table_header = {
197 .count = 1, 197 {{.count = 1,
198 .ctl_table = root_table, 198 .ctl_table = root_table,
199 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), 199 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
200 .root = &sysctl_table_root, 200 .root = &sysctl_table_root,
201 .set = &sysctl_table_root.default_set, 201 .set = &sysctl_table_root.default_set,
202}; 202};
@@ -361,20 +361,13 @@ static struct ctl_table kern_table[] = {
361 .mode = 0644, 361 .mode = 0644,
362 .proc_handler = sched_rt_handler, 362 .proc_handler = sched_rt_handler,
363 }, 363 },
364 {
365 .procname = "sched_compat_yield",
366 .data = &sysctl_sched_compat_yield,
367 .maxlen = sizeof(unsigned int),
368 .mode = 0644,
369 .proc_handler = proc_dointvec,
370 },
371#ifdef CONFIG_SCHED_AUTOGROUP 364#ifdef CONFIG_SCHED_AUTOGROUP
372 { 365 {
373 .procname = "sched_autogroup_enabled", 366 .procname = "sched_autogroup_enabled",
374 .data = &sysctl_sched_autogroup_enabled, 367 .data = &sysctl_sched_autogroup_enabled,
375 .maxlen = sizeof(unsigned int), 368 .maxlen = sizeof(unsigned int),
376 .mode = 0644, 369 .mode = 0644,
377 .proc_handler = proc_dointvec, 370 .proc_handler = proc_dointvec_minmax,
378 .extra1 = &zero, 371 .extra1 = &zero,
379 .extra2 = &one, 372 .extra2 = &one,
380 }, 373 },
@@ -948,7 +941,7 @@ static struct ctl_table kern_table[] = {
948 .data = &sysctl_perf_event_sample_rate, 941 .data = &sysctl_perf_event_sample_rate,
949 .maxlen = sizeof(sysctl_perf_event_sample_rate), 942 .maxlen = sizeof(sysctl_perf_event_sample_rate),
950 .mode = 0644, 943 .mode = 0644,
951 .proc_handler = proc_dointvec, 944 .proc_handler = perf_proc_update_handler,
952 }, 945 },
953#endif 946#endif
954#ifdef CONFIG_KMEMCHECK 947#ifdef CONFIG_KMEMCHECK
@@ -1567,11 +1560,16 @@ void sysctl_head_get(struct ctl_table_header *head)
1567 spin_unlock(&sysctl_lock); 1560 spin_unlock(&sysctl_lock);
1568} 1561}
1569 1562
1563static void free_head(struct rcu_head *rcu)
1564{
1565 kfree(container_of(rcu, struct ctl_table_header, rcu));
1566}
1567
1570void sysctl_head_put(struct ctl_table_header *head) 1568void sysctl_head_put(struct ctl_table_header *head)
1571{ 1569{
1572 spin_lock(&sysctl_lock); 1570 spin_lock(&sysctl_lock);
1573 if (!--head->count) 1571 if (!--head->count)
1574 kfree(head); 1572 call_rcu(&head->rcu, free_head);
1575 spin_unlock(&sysctl_lock); 1573 spin_unlock(&sysctl_lock);
1576} 1574}
1577 1575
@@ -1685,13 +1683,8 @@ static int test_perm(int mode, int op)
1685 1683
1686int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) 1684int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1687{ 1685{
1688 int error;
1689 int mode; 1686 int mode;
1690 1687
1691 error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1692 if (error)
1693 return error;
1694
1695 if (root->permissions) 1688 if (root->permissions)
1696 mode = root->permissions(root, current->nsproxy, table); 1689 mode = root->permissions(root, current->nsproxy, table);
1697 else 1690 else
@@ -1948,10 +1941,10 @@ void unregister_sysctl_table(struct ctl_table_header * header)
1948 start_unregistering(header); 1941 start_unregistering(header);
1949 if (!--header->parent->count) { 1942 if (!--header->parent->count) {
1950 WARN_ON(1); 1943 WARN_ON(1);
1951 kfree(header->parent); 1944 call_rcu(&header->parent->rcu, free_head);
1952 } 1945 }
1953 if (!--header->count) 1946 if (!--header->count)
1954 kfree(header); 1947 call_rcu(&header->rcu, free_head);
1955 spin_unlock(&sysctl_lock); 1948 spin_unlock(&sysctl_lock);
1956} 1949}
1957 1950
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index b875bedf7c9a..3b8e028b9601 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1321,13 +1321,11 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1321 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) 1321 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1322{ 1322{
1323 const struct bin_table *table = NULL; 1323 const struct bin_table *table = NULL;
1324 struct nameidata nd;
1325 struct vfsmount *mnt; 1324 struct vfsmount *mnt;
1326 struct file *file; 1325 struct file *file;
1327 ssize_t result; 1326 ssize_t result;
1328 char *pathname; 1327 char *pathname;
1329 int flags; 1328 int flags;
1330 int acc_mode;
1331 1329
1332 pathname = sysctl_getname(name, nlen, &table); 1330 pathname = sysctl_getname(name, nlen, &table);
1333 result = PTR_ERR(pathname); 1331 result = PTR_ERR(pathname);
@@ -1337,28 +1335,17 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1337 /* How should the sysctl be accessed? */ 1335 /* How should the sysctl be accessed? */
1338 if (oldval && oldlen && newval && newlen) { 1336 if (oldval && oldlen && newval && newlen) {
1339 flags = O_RDWR; 1337 flags = O_RDWR;
1340 acc_mode = MAY_READ | MAY_WRITE;
1341 } else if (newval && newlen) { 1338 } else if (newval && newlen) {
1342 flags = O_WRONLY; 1339 flags = O_WRONLY;
1343 acc_mode = MAY_WRITE;
1344 } else if (oldval && oldlen) { 1340 } else if (oldval && oldlen) {
1345 flags = O_RDONLY; 1341 flags = O_RDONLY;
1346 acc_mode = MAY_READ;
1347 } else { 1342 } else {
1348 result = 0; 1343 result = 0;
1349 goto out_putname; 1344 goto out_putname;
1350 } 1345 }
1351 1346
1352 mnt = current->nsproxy->pid_ns->proc_mnt; 1347 mnt = current->nsproxy->pid_ns->proc_mnt;
1353 result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd); 1348 file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
1354 if (result)
1355 goto out_putname;
1356
1357 result = may_open(&nd.path, acc_mode, flags);
1358 if (result)
1359 goto out_putpath;
1360
1361 file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred());
1362 result = PTR_ERR(file); 1349 result = PTR_ERR(file);
1363 if (IS_ERR(file)) 1350 if (IS_ERR(file))
1364 goto out_putname; 1351 goto out_putname;
@@ -1370,10 +1357,6 @@ out_putname:
1370 putname(pathname); 1357 putname(pathname);
1371out: 1358out:
1372 return result; 1359 return result;
1373
1374out_putpath:
1375 path_put(&nd.path);
1376 goto out_putname;
1377} 1360}
1378 1361
1379 1362
diff --git a/kernel/time.c b/kernel/time.c
index 32174359576f..8e8dc6d705c9 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -150,7 +150,7 @@ static inline void warp_clock(void)
150 * various programs will get confused when the clock gets warped. 150 * various programs will get confused when the clock gets warped.
151 */ 151 */
152 152
153int do_sys_settimeofday(struct timespec *tv, struct timezone *tz) 153int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
154{ 154{
155 static int firsttime = 1; 155 static int firsttime = 1;
156 int error = 0; 156 int error = 0;
@@ -645,7 +645,7 @@ u64 nsec_to_clock_t(u64 x)
645} 645}
646 646
647/** 647/**
648 * nsecs_to_jiffies - Convert nsecs in u64 to jiffies 648 * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
649 * 649 *
650 * @n: nsecs in u64 650 * @n: nsecs in u64
651 * 651 *
@@ -657,7 +657,7 @@ u64 nsec_to_clock_t(u64 x)
657 * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) 657 * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
658 * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years 658 * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
659 */ 659 */
660unsigned long nsecs_to_jiffies(u64 n) 660u64 nsecs_to_jiffies64(u64 n)
661{ 661{
662#if (NSEC_PER_SEC % HZ) == 0 662#if (NSEC_PER_SEC % HZ) == 0
663 /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ 663 /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
@@ -674,22 +674,23 @@ unsigned long nsecs_to_jiffies(u64 n)
674#endif 674#endif
675} 675}
676 676
677#if (BITS_PER_LONG < 64) 677/**
678u64 get_jiffies_64(void) 678 * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
679 *
680 * @n: nsecs in u64
681 *
682 * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
683 * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
684 * for scheduler, not for use in device drivers to calculate timeout value.
685 *
686 * note:
687 * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
688 * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
689 */
690unsigned long nsecs_to_jiffies(u64 n)
679{ 691{
680 unsigned long seq; 692 return (unsigned long)nsecs_to_jiffies64(n);
681 u64 ret;
682
683 do {
684 seq = read_seqbegin(&xtime_lock);
685 ret = jiffies_64;
686 } while (read_seqretry(&xtime_lock, seq));
687 return ret;
688} 693}
689EXPORT_SYMBOL(get_jiffies_64);
690#endif
691
692EXPORT_SYMBOL(jiffies);
693 694
694/* 695/*
695 * Add two timespec values and do a safety check for overflow. 696 * Add two timespec values and do a safety check for overflow.
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ee266620b06c..b0425991e9ac 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,5 @@
1obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o 1obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
2obj-y += timeconv.o posix-clock.o
2 3
3obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o 4obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
4obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o 5obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d7395fdfb9f3..0d74b9ba90c8 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,7 +18,6 @@
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/tick.h>
22 21
23#include "tick-internal.h" 22#include "tick-internal.h"
24 23
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 5404a8456909..b2fa506667c0 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -22,8 +22,11 @@
22************************************************************************/ 22************************************************************************/
23#include <linux/clocksource.h> 23#include <linux/clocksource.h>
24#include <linux/jiffies.h> 24#include <linux/jiffies.h>
25#include <linux/module.h>
25#include <linux/init.h> 26#include <linux/init.h>
26 27
28#include "tick-internal.h"
29
27/* The Jiffies based clocksource is the lowest common 30/* The Jiffies based clocksource is the lowest common
28 * denominator clock source which should function on 31 * denominator clock source which should function on
29 * all systems. It has the same coarse resolution as 32 * all systems. It has the same coarse resolution as
@@ -64,6 +67,23 @@ struct clocksource clocksource_jiffies = {
64 .shift = JIFFIES_SHIFT, 67 .shift = JIFFIES_SHIFT,
65}; 68};
66 69
70#if (BITS_PER_LONG < 64)
71u64 get_jiffies_64(void)
72{
73 unsigned long seq;
74 u64 ret;
75
76 do {
77 seq = read_seqbegin(&xtime_lock);
78 ret = jiffies_64;
79 } while (read_seqretry(&xtime_lock, seq));
80 return ret;
81}
82EXPORT_SYMBOL(get_jiffies_64);
83#endif
84
85EXPORT_SYMBOL(jiffies);
86
67static int __init init_jiffies_clocksource(void) 87static int __init init_jiffies_clocksource(void)
68{ 88{
69 return clocksource_register(&clocksource_jiffies); 89 return clocksource_register(&clocksource_jiffies);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 5c00242fa921..5f1bb8e2008f 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -16,6 +16,8 @@
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/module.h> 17#include <linux/module.h>
18 18
19#include "tick-internal.h"
20
19/* 21/*
20 * NTP timekeeping variables: 22 * NTP timekeeping variables:
21 */ 23 */
@@ -646,6 +648,17 @@ int do_adjtimex(struct timex *txc)
646 hrtimer_cancel(&leap_timer); 648 hrtimer_cancel(&leap_timer);
647 } 649 }
648 650
651 if (txc->modes & ADJ_SETOFFSET) {
652 struct timespec delta;
653 delta.tv_sec = txc->time.tv_sec;
654 delta.tv_nsec = txc->time.tv_usec;
655 if (!(txc->modes & ADJ_NANO))
656 delta.tv_nsec *= 1000;
657 result = timekeeping_inject_offset(&delta);
658 if (result)
659 return result;
660 }
661
649 getnstimeofday(&ts); 662 getnstimeofday(&ts);
650 663
651 write_seqlock_irq(&xtime_lock); 664 write_seqlock_irq(&xtime_lock);
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
new file mode 100644
index 000000000000..25028dd4fa18
--- /dev/null
+++ b/kernel/time/posix-clock.c
@@ -0,0 +1,451 @@
1/*
2 * posix-clock.c - support for dynamic clock devices
3 *
4 * Copyright (C) 2010 OMICRON electronics GmbH
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#include <linux/device.h>
21#include <linux/file.h>
22#include <linux/mutex.h>
23#include <linux/posix-clock.h>
24#include <linux/slab.h>
25#include <linux/syscalls.h>
26#include <linux/uaccess.h>
27
28static void delete_clock(struct kref *kref);
29
30/*
31 * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
32 */
33static struct posix_clock *get_posix_clock(struct file *fp)
34{
35 struct posix_clock *clk = fp->private_data;
36
37 mutex_lock(&clk->mutex);
38
39 if (!clk->zombie)
40 return clk;
41
42 mutex_unlock(&clk->mutex);
43
44 return NULL;
45}
46
47static void put_posix_clock(struct posix_clock *clk)
48{
49 mutex_unlock(&clk->mutex);
50}
51
52static ssize_t posix_clock_read(struct file *fp, char __user *buf,
53 size_t count, loff_t *ppos)
54{
55 struct posix_clock *clk = get_posix_clock(fp);
56 int err = -EINVAL;
57
58 if (!clk)
59 return -ENODEV;
60
61 if (clk->ops.read)
62 err = clk->ops.read(clk, fp->f_flags, buf, count);
63
64 put_posix_clock(clk);
65
66 return err;
67}
68
69static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
70{
71 struct posix_clock *clk = get_posix_clock(fp);
72 int result = 0;
73
74 if (!clk)
75 return -ENODEV;
76
77 if (clk->ops.poll)
78 result = clk->ops.poll(clk, fp, wait);
79
80 put_posix_clock(clk);
81
82 return result;
83}
84
85static int posix_clock_fasync(int fd, struct file *fp, int on)
86{
87 struct posix_clock *clk = get_posix_clock(fp);
88 int err = 0;
89
90 if (!clk)
91 return -ENODEV;
92
93 if (clk->ops.fasync)
94 err = clk->ops.fasync(clk, fd, fp, on);
95
96 put_posix_clock(clk);
97
98 return err;
99}
100
101static int posix_clock_mmap(struct file *fp, struct vm_area_struct *vma)
102{
103 struct posix_clock *clk = get_posix_clock(fp);
104 int err = -ENODEV;
105
106 if (!clk)
107 return -ENODEV;
108
109 if (clk->ops.mmap)
110 err = clk->ops.mmap(clk, vma);
111
112 put_posix_clock(clk);
113
114 return err;
115}
116
117static long posix_clock_ioctl(struct file *fp,
118 unsigned int cmd, unsigned long arg)
119{
120 struct posix_clock *clk = get_posix_clock(fp);
121 int err = -ENOTTY;
122
123 if (!clk)
124 return -ENODEV;
125
126 if (clk->ops.ioctl)
127 err = clk->ops.ioctl(clk, cmd, arg);
128
129 put_posix_clock(clk);
130
131 return err;
132}
133
134#ifdef CONFIG_COMPAT
135static long posix_clock_compat_ioctl(struct file *fp,
136 unsigned int cmd, unsigned long arg)
137{
138 struct posix_clock *clk = get_posix_clock(fp);
139 int err = -ENOTTY;
140
141 if (!clk)
142 return -ENODEV;
143
144 if (clk->ops.ioctl)
145 err = clk->ops.ioctl(clk, cmd, arg);
146
147 put_posix_clock(clk);
148
149 return err;
150}
151#endif
152
153static int posix_clock_open(struct inode *inode, struct file *fp)
154{
155 int err;
156 struct posix_clock *clk =
157 container_of(inode->i_cdev, struct posix_clock, cdev);
158
159 mutex_lock(&clk->mutex);
160
161 if (clk->zombie) {
162 err = -ENODEV;
163 goto out;
164 }
165 if (clk->ops.open)
166 err = clk->ops.open(clk, fp->f_mode);
167 else
168 err = 0;
169
170 if (!err) {
171 kref_get(&clk->kref);
172 fp->private_data = clk;
173 }
174out:
175 mutex_unlock(&clk->mutex);
176 return err;
177}
178
179static int posix_clock_release(struct inode *inode, struct file *fp)
180{
181 struct posix_clock *clk = fp->private_data;
182 int err = 0;
183
184 if (clk->ops.release)
185 err = clk->ops.release(clk);
186
187 kref_put(&clk->kref, delete_clock);
188
189 fp->private_data = NULL;
190
191 return err;
192}
193
194static const struct file_operations posix_clock_file_operations = {
195 .owner = THIS_MODULE,
196 .llseek = no_llseek,
197 .read = posix_clock_read,
198 .poll = posix_clock_poll,
199 .unlocked_ioctl = posix_clock_ioctl,
200 .open = posix_clock_open,
201 .release = posix_clock_release,
202 .fasync = posix_clock_fasync,
203 .mmap = posix_clock_mmap,
204#ifdef CONFIG_COMPAT
205 .compat_ioctl = posix_clock_compat_ioctl,
206#endif
207};
208
209int posix_clock_register(struct posix_clock *clk, dev_t devid)
210{
211 int err;
212
213 kref_init(&clk->kref);
214 mutex_init(&clk->mutex);
215
216 cdev_init(&clk->cdev, &posix_clock_file_operations);
217 clk->cdev.owner = clk->ops.owner;
218 err = cdev_add(&clk->cdev, devid, 1);
219 if (err)
220 goto no_cdev;
221
222 return err;
223no_cdev:
224 mutex_destroy(&clk->mutex);
225 return err;
226}
227EXPORT_SYMBOL_GPL(posix_clock_register);
228
229static void delete_clock(struct kref *kref)
230{
231 struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
232 mutex_destroy(&clk->mutex);
233 if (clk->release)
234 clk->release(clk);
235}
236
237void posix_clock_unregister(struct posix_clock *clk)
238{
239 cdev_del(&clk->cdev);
240
241 mutex_lock(&clk->mutex);
242 clk->zombie = true;
243 mutex_unlock(&clk->mutex);
244
245 kref_put(&clk->kref, delete_clock);
246}
247EXPORT_SYMBOL_GPL(posix_clock_unregister);
248
249struct posix_clock_desc {
250 struct file *fp;
251 struct posix_clock *clk;
252};
253
254static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd)
255{
256 struct file *fp = fget(CLOCKID_TO_FD(id));
257 int err = -EINVAL;
258
259 if (!fp)
260 return err;
261
262 if (fp->f_op->open != posix_clock_open || !fp->private_data)
263 goto out;
264
265 cd->fp = fp;
266 cd->clk = get_posix_clock(fp);
267
268 err = cd->clk ? 0 : -ENODEV;
269out:
270 if (err)
271 fput(fp);
272 return err;
273}
274
275static void put_clock_desc(struct posix_clock_desc *cd)
276{
277 put_posix_clock(cd->clk);
278 fput(cd->fp);
279}
280
281static int pc_clock_adjtime(clockid_t id, struct timex *tx)
282{
283 struct posix_clock_desc cd;
284 int err;
285
286 err = get_clock_desc(id, &cd);
287 if (err)
288 return err;
289
290 if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
291 err = -EACCES;
292 goto out;
293 }
294
295 if (cd.clk->ops.clock_adjtime)
296 err = cd.clk->ops.clock_adjtime(cd.clk, tx);
297 else
298 err = -EOPNOTSUPP;
299out:
300 put_clock_desc(&cd);
301
302 return err;
303}
304
305static int pc_clock_gettime(clockid_t id, struct timespec *ts)
306{
307 struct posix_clock_desc cd;
308 int err;
309
310 err = get_clock_desc(id, &cd);
311 if (err)
312 return err;
313
314 if (cd.clk->ops.clock_gettime)
315 err = cd.clk->ops.clock_gettime(cd.clk, ts);
316 else
317 err = -EOPNOTSUPP;
318
319 put_clock_desc(&cd);
320
321 return err;
322}
323
324static int pc_clock_getres(clockid_t id, struct timespec *ts)
325{
326 struct posix_clock_desc cd;
327 int err;
328
329 err = get_clock_desc(id, &cd);
330 if (err)
331 return err;
332
333 if (cd.clk->ops.clock_getres)
334 err = cd.clk->ops.clock_getres(cd.clk, ts);
335 else
336 err = -EOPNOTSUPP;
337
338 put_clock_desc(&cd);
339
340 return err;
341}
342
343static int pc_clock_settime(clockid_t id, const struct timespec *ts)
344{
345 struct posix_clock_desc cd;
346 int err;
347
348 err = get_clock_desc(id, &cd);
349 if (err)
350 return err;
351
352 if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
353 err = -EACCES;
354 goto out;
355 }
356
357 if (cd.clk->ops.clock_settime)
358 err = cd.clk->ops.clock_settime(cd.clk, ts);
359 else
360 err = -EOPNOTSUPP;
361out:
362 put_clock_desc(&cd);
363
364 return err;
365}
366
367static int pc_timer_create(struct k_itimer *kit)
368{
369 clockid_t id = kit->it_clock;
370 struct posix_clock_desc cd;
371 int err;
372
373 err = get_clock_desc(id, &cd);
374 if (err)
375 return err;
376
377 if (cd.clk->ops.timer_create)
378 err = cd.clk->ops.timer_create(cd.clk, kit);
379 else
380 err = -EOPNOTSUPP;
381
382 put_clock_desc(&cd);
383
384 return err;
385}
386
387static int pc_timer_delete(struct k_itimer *kit)
388{
389 clockid_t id = kit->it_clock;
390 struct posix_clock_desc cd;
391 int err;
392
393 err = get_clock_desc(id, &cd);
394 if (err)
395 return err;
396
397 if (cd.clk->ops.timer_delete)
398 err = cd.clk->ops.timer_delete(cd.clk, kit);
399 else
400 err = -EOPNOTSUPP;
401
402 put_clock_desc(&cd);
403
404 return err;
405}
406
407static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts)
408{
409 clockid_t id = kit->it_clock;
410 struct posix_clock_desc cd;
411
412 if (get_clock_desc(id, &cd))
413 return;
414
415 if (cd.clk->ops.timer_gettime)
416 cd.clk->ops.timer_gettime(cd.clk, kit, ts);
417
418 put_clock_desc(&cd);
419}
420
421static int pc_timer_settime(struct k_itimer *kit, int flags,
422 struct itimerspec *ts, struct itimerspec *old)
423{
424 clockid_t id = kit->it_clock;
425 struct posix_clock_desc cd;
426 int err;
427
428 err = get_clock_desc(id, &cd);
429 if (err)
430 return err;
431
432 if (cd.clk->ops.timer_settime)
433 err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old);
434 else
435 err = -EOPNOTSUPP;
436
437 put_clock_desc(&cd);
438
439 return err;
440}
441
442struct k_clock clock_posix_dynamic = {
443 .clock_getres = pc_clock_getres,
444 .clock_set = pc_clock_settime,
445 .clock_get = pc_clock_gettime,
446 .clock_adj = pc_clock_adjtime,
447 .timer_create = pc_timer_create,
448 .timer_set = pc_timer_settime,
449 .timer_del = pc_timer_delete,
450 .timer_get = pc_timer_gettime,
451};
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 48b2761b5668..da800ffa810c 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -18,7 +18,6 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/profile.h> 19#include <linux/profile.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/tick.h>
22 21
23#include "tick-internal.h" 22#include "tick-internal.h"
24 23
@@ -600,4 +599,14 @@ int tick_broadcast_oneshot_active(void)
600 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; 599 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
601} 600}
602 601
602/*
603 * Check whether the broadcast device supports oneshot.
604 */
605bool tick_broadcast_oneshot_available(void)
606{
607 struct clock_event_device *bc = tick_broadcast_device.evtdev;
608
609 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
610}
611
603#endif 612#endif
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 051bc80a0c43..119528de8235 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -18,7 +18,6 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/profile.h> 19#include <linux/profile.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/tick.h>
22 21
23#include <asm/irq_regs.h> 22#include <asm/irq_regs.h>
24 23
@@ -51,7 +50,11 @@ int tick_is_oneshot_available(void)
51{ 50{
52 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 51 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
53 52
54 return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); 53 if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT))
54 return 0;
55 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
56 return 1;
57 return tick_broadcast_oneshot_available();
55} 58}
56 59
57/* 60/*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 290eefbc1f60..1009b06d6f89 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -1,6 +1,10 @@
1/* 1/*
2 * tick internal variable and functions used by low/high res code 2 * tick internal variable and functions used by low/high res code
3 */ 3 */
4#include <linux/hrtimer.h>
5#include <linux/tick.h>
6
7#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
4 8
5#define TICK_DO_TIMER_NONE -1 9#define TICK_DO_TIMER_NONE -1
6#define TICK_DO_TIMER_BOOT -2 10#define TICK_DO_TIMER_BOOT -2
@@ -36,6 +40,7 @@ extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
36extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); 40extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
37extern int tick_broadcast_oneshot_active(void); 41extern int tick_broadcast_oneshot_active(void);
38extern void tick_check_oneshot_broadcast(int cpu); 42extern void tick_check_oneshot_broadcast(int cpu);
43bool tick_broadcast_oneshot_available(void);
39# else /* BROADCAST */ 44# else /* BROADCAST */
40static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) 45static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
41{ 46{
@@ -46,6 +51,7 @@ static inline void tick_broadcast_switch_to_oneshot(void) { }
46static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } 51static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
47static inline int tick_broadcast_oneshot_active(void) { return 0; } 52static inline int tick_broadcast_oneshot_active(void) { return 0; }
48static inline void tick_check_oneshot_broadcast(int cpu) { } 53static inline void tick_check_oneshot_broadcast(int cpu) { }
54static inline bool tick_broadcast_oneshot_available(void) { return true; }
49# endif /* !BROADCAST */ 55# endif /* !BROADCAST */
50 56
51#else /* !ONESHOT */ 57#else /* !ONESHOT */
@@ -76,6 +82,7 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
76 return 0; 82 return 0;
77} 83}
78static inline int tick_broadcast_oneshot_active(void) { return 0; } 84static inline int tick_broadcast_oneshot_active(void) { return 0; }
85static inline bool tick_broadcast_oneshot_available(void) { return false; }
79#endif /* !TICK_ONESHOT */ 86#endif /* !TICK_ONESHOT */
80 87
81/* 88/*
@@ -132,3 +139,8 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
132{ 139{
133 return !(dev->features & CLOCK_EVT_FEAT_DUMMY); 140 return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
134} 141}
142
143#endif
144
145extern void do_timer(unsigned long ticks);
146extern seqlock_t xtime_lock;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 5cbc101f908b..2d04411a5f05 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -18,7 +18,6 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/profile.h> 19#include <linux/profile.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/tick.h>
22 21
23#include "tick-internal.h" 22#include "tick-internal.h"
24 23
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c55ea2433471..d5097c44b407 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -19,7 +19,6 @@
19#include <linux/percpu.h> 19#include <linux/percpu.h>
20#include <linux/profile.h> 20#include <linux/profile.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/tick.h>
23#include <linux/module.h> 22#include <linux/module.h>
24 23
25#include <asm/irq_regs.h> 24#include <asm/irq_regs.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d27c7562902c..3bd7e3d5c632 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -353,7 +353,7 @@ EXPORT_SYMBOL(do_gettimeofday);
353 * 353 *
354 * Sets the time of day to the new time and update NTP and notify hrtimers 354 * Sets the time of day to the new time and update NTP and notify hrtimers
355 */ 355 */
356int do_settimeofday(struct timespec *tv) 356int do_settimeofday(const struct timespec *tv)
357{ 357{
358 struct timespec ts_delta; 358 struct timespec ts_delta;
359 unsigned long flags; 359 unsigned long flags;
@@ -387,6 +387,42 @@ int do_settimeofday(struct timespec *tv)
387 387
388EXPORT_SYMBOL(do_settimeofday); 388EXPORT_SYMBOL(do_settimeofday);
389 389
390
391/**
392 * timekeeping_inject_offset - Adds or subtracts from the current time.
393 * @tv: pointer to the timespec variable containing the offset
394 *
395 * Adds or subtracts an offset value from the current time.
396 */
397int timekeeping_inject_offset(struct timespec *ts)
398{
399 unsigned long flags;
400
401 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
402 return -EINVAL;
403
404 write_seqlock_irqsave(&xtime_lock, flags);
405
406 timekeeping_forward_now();
407
408 xtime = timespec_add(xtime, *ts);
409 wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
410
411 timekeeper.ntp_error = 0;
412 ntp_clear();
413
414 update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
415 timekeeper.mult);
416
417 write_sequnlock_irqrestore(&xtime_lock, flags);
418
419 /* signal hrtimers about time change */
420 clock_was_set();
421
422 return 0;
423}
424EXPORT_SYMBOL(timekeeping_inject_offset);
425
390/** 426/**
391 * change_clocksource - Swaps clocksources if a new one is available 427 * change_clocksource - Swaps clocksources if a new one is available
392 * 428 *
@@ -779,7 +815,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
779 * 815 *
780 * Called from the timer interrupt, must hold a write on xtime_lock. 816 * Called from the timer interrupt, must hold a write on xtime_lock.
781 */ 817 */
782void update_wall_time(void) 818static void update_wall_time(void)
783{ 819{
784 struct clocksource *clock; 820 struct clocksource *clock;
785 cycle_t offset; 821 cycle_t offset;
@@ -871,7 +907,7 @@ void update_wall_time(void)
871 * getboottime - Return the real time of system boot. 907 * getboottime - Return the real time of system boot.
872 * @ts: pointer to the timespec to be set 908 * @ts: pointer to the timespec to be set
873 * 909 *
874 * Returns the time of day in a timespec. 910 * Returns the wall-time of boot in a timespec.
875 * 911 *
876 * This is based on the wall_to_monotonic offset and the total suspend 912 * This is based on the wall_to_monotonic offset and the total suspend
877 * time. Calls to settimeofday will affect the value returned (which 913 * time. Calls to settimeofday will affect the value returned (which
@@ -889,6 +925,55 @@ void getboottime(struct timespec *ts)
889} 925}
890EXPORT_SYMBOL_GPL(getboottime); 926EXPORT_SYMBOL_GPL(getboottime);
891 927
928
929/**
930 * get_monotonic_boottime - Returns monotonic time since boot
931 * @ts: pointer to the timespec to be set
932 *
933 * Returns the monotonic time since boot in a timespec.
934 *
935 * This is similar to CLOCK_MONTONIC/ktime_get_ts, but also
936 * includes the time spent in suspend.
937 */
938void get_monotonic_boottime(struct timespec *ts)
939{
940 struct timespec tomono, sleep;
941 unsigned int seq;
942 s64 nsecs;
943
944 WARN_ON(timekeeping_suspended);
945
946 do {
947 seq = read_seqbegin(&xtime_lock);
948 *ts = xtime;
949 tomono = wall_to_monotonic;
950 sleep = total_sleep_time;
951 nsecs = timekeeping_get_ns();
952
953 } while (read_seqretry(&xtime_lock, seq));
954
955 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
956 ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs);
957}
958EXPORT_SYMBOL_GPL(get_monotonic_boottime);
959
960/**
961 * ktime_get_boottime - Returns monotonic time since boot in a ktime
962 *
963 * Returns the monotonic time since boot in a ktime
964 *
965 * This is similar to CLOCK_MONTONIC/ktime_get, but also
966 * includes the time spent in suspend.
967 */
968ktime_t ktime_get_boottime(void)
969{
970 struct timespec ts;
971
972 get_monotonic_boottime(&ts);
973 return timespec_to_ktime(ts);
974}
975EXPORT_SYMBOL_GPL(ktime_get_boottime);
976
892/** 977/**
893 * monotonic_to_bootbased - Convert the monotonic time to boot based. 978 * monotonic_to_bootbased - Convert the monotonic time to boot based.
894 * @ts: pointer to the timespec to be converted 979 * @ts: pointer to the timespec to be converted
@@ -910,11 +995,6 @@ struct timespec __current_kernel_time(void)
910 return xtime; 995 return xtime;
911} 996}
912 997
913struct timespec __get_wall_to_monotonic(void)
914{
915 return wall_to_monotonic;
916}
917
918struct timespec current_kernel_time(void) 998struct timespec current_kernel_time(void)
919{ 999{
920 struct timespec now; 1000 struct timespec now;
@@ -946,3 +1026,48 @@ struct timespec get_monotonic_coarse(void)
946 now.tv_nsec + mono.tv_nsec); 1026 now.tv_nsec + mono.tv_nsec);
947 return now; 1027 return now;
948} 1028}
1029
1030/*
1031 * The 64-bit jiffies value is not atomic - you MUST NOT read it
1032 * without sampling the sequence number in xtime_lock.
1033 * jiffies is defined in the linker script...
1034 */
1035void do_timer(unsigned long ticks)
1036{
1037 jiffies_64 += ticks;
1038 update_wall_time();
1039 calc_global_load(ticks);
1040}
1041
1042/**
1043 * get_xtime_and_monotonic_and_sleep_offset() - get xtime, wall_to_monotonic,
1044 * and sleep offsets.
1045 * @xtim: pointer to timespec to be set with xtime
1046 * @wtom: pointer to timespec to be set with wall_to_monotonic
1047 * @sleep: pointer to timespec to be set with time in suspend
1048 */
1049void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1050 struct timespec *wtom, struct timespec *sleep)
1051{
1052 unsigned long seq;
1053
1054 do {
1055 seq = read_seqbegin(&xtime_lock);
1056 *xtim = xtime;
1057 *wtom = wall_to_monotonic;
1058 *sleep = total_sleep_time;
1059 } while (read_seqretry(&xtime_lock, seq));
1060}
1061
1062/**
1063 * xtime_update() - advances the timekeeping infrastructure
1064 * @ticks: number of ticks, that have elapsed since the last call.
1065 *
1066 * Must be called with interrupts disabled.
1067 */
1068void xtime_update(unsigned long ticks)
1069{
1070 write_seqlock(&xtime_lock);
1071 do_timer(ticks);
1072 write_sequnlock(&xtime_lock);
1073}
diff --git a/kernel/timer.c b/kernel/timer.c
index d6459923d245..fd6198692b57 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -404,6 +404,11 @@ static void timer_stats_account_timer(struct timer_list *timer) {}
404 404
405static struct debug_obj_descr timer_debug_descr; 405static struct debug_obj_descr timer_debug_descr;
406 406
407static void *timer_debug_hint(void *addr)
408{
409 return ((struct timer_list *) addr)->function;
410}
411
407/* 412/*
408 * fixup_init is called when: 413 * fixup_init is called when:
409 * - an active object is initialized 414 * - an active object is initialized
@@ -477,6 +482,7 @@ static int timer_fixup_free(void *addr, enum debug_obj_state state)
477 482
478static struct debug_obj_descr timer_debug_descr = { 483static struct debug_obj_descr timer_debug_descr = {
479 .name = "timer_list", 484 .name = "timer_list",
485 .debug_hint = timer_debug_hint,
480 .fixup_init = timer_fixup_init, 486 .fixup_init = timer_fixup_init,
481 .fixup_activate = timer_fixup_activate, 487 .fixup_activate = timer_fixup_activate,
482 .fixup_free = timer_fixup_free, 488 .fixup_free = timer_fixup_free,
@@ -964,6 +970,25 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
964 * add_timer_on(). Upon exit the timer is not queued and the handler is 970 * add_timer_on(). Upon exit the timer is not queued and the handler is
965 * not running on any CPU. 971 * not running on any CPU.
966 * 972 *
973 * Note: You must not hold locks that are held in interrupt context
974 * while calling this function. Even if the lock has nothing to do
975 * with the timer in question. Here's why:
976 *
977 * CPU0 CPU1
978 * ---- ----
979 * <SOFTIRQ>
980 * call_timer_fn();
981 * base->running_timer = mytimer;
982 * spin_lock_irq(somelock);
983 * <IRQ>
984 * spin_lock(somelock);
985 * del_timer_sync(mytimer);
986 * while (base->running_timer == mytimer);
987 *
988 * Now del_timer_sync() will never return and never release somelock.
989 * The interrupt on the other CPU is waiting to grab somelock but
990 * it has interrupted the softirq that CPU0 is waiting to finish.
991 *
967 * The function returns whether it has deactivated a pending timer or not. 992 * The function returns whether it has deactivated a pending timer or not.
968 */ 993 */
969int del_timer_sync(struct timer_list *timer) 994int del_timer_sync(struct timer_list *timer)
@@ -971,6 +996,10 @@ int del_timer_sync(struct timer_list *timer)
971#ifdef CONFIG_LOCKDEP 996#ifdef CONFIG_LOCKDEP
972 unsigned long flags; 997 unsigned long flags;
973 998
999 /*
1000 * If lockdep gives a backtrace here, please reference
1001 * the synchronization rules above.
1002 */
974 local_irq_save(flags); 1003 local_irq_save(flags);
975 lock_map_acquire(&timer->lockdep_map); 1004 lock_map_acquire(&timer->lockdep_map);
976 lock_map_release(&timer->lockdep_map); 1005 lock_map_release(&timer->lockdep_map);
@@ -1295,19 +1324,6 @@ void run_local_timers(void)
1295 raise_softirq(TIMER_SOFTIRQ); 1324 raise_softirq(TIMER_SOFTIRQ);
1296} 1325}
1297 1326
1298/*
1299 * The 64-bit jiffies value is not atomic - you MUST NOT read it
1300 * without sampling the sequence number in xtime_lock.
1301 * jiffies is defined in the linker script...
1302 */
1303
1304void do_timer(unsigned long ticks)
1305{
1306 jiffies_64 += ticks;
1307 update_wall_time();
1308 calc_global_load(ticks);
1309}
1310
1311#ifdef __ARCH_WANT_SYS_ALARM 1327#ifdef __ARCH_WANT_SYS_ALARM
1312 1328
1313/* 1329/*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d95721f33702..cbafed7d4f38 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1827,21 +1827,5 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1827 rwbs[i] = '\0'; 1827 rwbs[i] = '\0';
1828} 1828}
1829 1829
1830void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
1831{
1832 int rw = rq->cmd_flags & 0x03;
1833 int bytes;
1834
1835 if (rq->cmd_flags & REQ_DISCARD)
1836 rw |= REQ_DISCARD;
1837
1838 if (rq->cmd_flags & REQ_SECURE)
1839 rw |= REQ_SECURE;
1840
1841 bytes = blk_rq_bytes(rq);
1842
1843 blk_fill_rwbs(rwbs, rw, bytes);
1844}
1845
1846#endif /* CONFIG_EVENT_TRACING */ 1830#endif /* CONFIG_EVENT_TRACING */
1847 1831
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3dadae83883..888b611897d3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3328,7 +3328,7 @@ static int start_graph_tracing(void)
3328 /* The cpu_boot init_task->ret_stack will never be freed */ 3328 /* The cpu_boot init_task->ret_stack will never be freed */
3329 for_each_online_cpu(cpu) { 3329 for_each_online_cpu(cpu) {
3330 if (!idle_task(cpu)->ret_stack) 3330 if (!idle_task(cpu)->ret_stack)
3331 ftrace_graph_init_task(idle_task(cpu)); 3331 ftrace_graph_init_idle_task(idle_task(cpu), cpu);
3332 } 3332 }
3333 3333
3334 do { 3334 do {
@@ -3418,6 +3418,49 @@ void unregister_ftrace_graph(void)
3418 mutex_unlock(&ftrace_lock); 3418 mutex_unlock(&ftrace_lock);
3419} 3419}
3420 3420
3421static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
3422
3423static void
3424graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
3425{
3426 atomic_set(&t->tracing_graph_pause, 0);
3427 atomic_set(&t->trace_overrun, 0);
3428 t->ftrace_timestamp = 0;
3429 /* make curr_ret_stack visable before we add the ret_stack */
3430 smp_wmb();
3431 t->ret_stack = ret_stack;
3432}
3433
3434/*
3435 * Allocate a return stack for the idle task. May be the first
3436 * time through, or it may be done by CPU hotplug online.
3437 */
3438void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
3439{
3440 t->curr_ret_stack = -1;
3441 /*
3442 * The idle task has no parent, it either has its own
3443 * stack or no stack at all.
3444 */
3445 if (t->ret_stack)
3446 WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
3447
3448 if (ftrace_graph_active) {
3449 struct ftrace_ret_stack *ret_stack;
3450
3451 ret_stack = per_cpu(idle_ret_stack, cpu);
3452 if (!ret_stack) {
3453 ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
3454 * sizeof(struct ftrace_ret_stack),
3455 GFP_KERNEL);
3456 if (!ret_stack)
3457 return;
3458 per_cpu(idle_ret_stack, cpu) = ret_stack;
3459 }
3460 graph_init_task(t, ret_stack);
3461 }
3462}
3463
3421/* Allocate a return stack for newly created task */ 3464/* Allocate a return stack for newly created task */
3422void ftrace_graph_init_task(struct task_struct *t) 3465void ftrace_graph_init_task(struct task_struct *t)
3423{ 3466{
@@ -3433,12 +3476,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3433 GFP_KERNEL); 3476 GFP_KERNEL);
3434 if (!ret_stack) 3477 if (!ret_stack)
3435 return; 3478 return;
3436 atomic_set(&t->tracing_graph_pause, 0); 3479 graph_init_task(t, ret_stack);
3437 atomic_set(&t->trace_overrun, 0);
3438 t->ftrace_timestamp = 0;
3439 /* make curr_ret_stack visable before we add the ret_stack */
3440 smp_wmb();
3441 t->ret_stack = ret_stack;
3442 } 3480 }
3443} 3481}
3444 3482
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bd1c35a4fbcc..db7b439d23ee 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5,7 +5,6 @@
5 */ 5 */
6#include <linux/ring_buffer.h> 6#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h> 7#include <linux/trace_clock.h>
8#include <linux/ftrace_irq.h>
9#include <linux/spinlock.h> 8#include <linux/spinlock.h>
10#include <linux/debugfs.h> 9#include <linux/debugfs.h>
11#include <linux/uaccess.h> 10#include <linux/uaccess.h>
@@ -1429,6 +1428,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1429} 1428}
1430EXPORT_SYMBOL_GPL(ring_buffer_resize); 1429EXPORT_SYMBOL_GPL(ring_buffer_resize);
1431 1430
1431void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1432{
1433 mutex_lock(&buffer->mutex);
1434 if (val)
1435 buffer->flags |= RB_FL_OVERWRITE;
1436 else
1437 buffer->flags &= ~RB_FL_OVERWRITE;
1438 mutex_unlock(&buffer->mutex);
1439}
1440EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1441
1432static inline void * 1442static inline void *
1433__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) 1443__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
1434{ 1444{
@@ -2162,11 +2172,19 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2162 if (likely(ts >= cpu_buffer->write_stamp)) { 2172 if (likely(ts >= cpu_buffer->write_stamp)) {
2163 delta = diff; 2173 delta = diff;
2164 if (unlikely(test_time_stamp(delta))) { 2174 if (unlikely(test_time_stamp(delta))) {
2175 int local_clock_stable = 1;
2176#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2177 local_clock_stable = sched_clock_stable;
2178#endif
2165 WARN_ONCE(delta > (1ULL << 59), 2179 WARN_ONCE(delta > (1ULL << 59),
2166 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", 2180 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2167 (unsigned long long)delta, 2181 (unsigned long long)delta,
2168 (unsigned long long)ts, 2182 (unsigned long long)ts,
2169 (unsigned long long)cpu_buffer->write_stamp); 2183 (unsigned long long)cpu_buffer->write_stamp,
2184 local_clock_stable ? "" :
2185 "If you just came from a suspend/resume,\n"
2186 "please switch to the trace global clock:\n"
2187 " echo global > /sys/kernel/debug/tracing/trace_clock\n");
2170 add_timestamp = 1; 2188 add_timestamp = 1;
2171 } 2189 }
2172 } 2190 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dc53ecb80589..9541c27c1cf2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -41,8 +41,6 @@
41#include "trace.h" 41#include "trace.h"
42#include "trace_output.h" 42#include "trace_output.h"
43 43
44#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
45
46/* 44/*
47 * On boot up, the ring buffer is set to the minimum size, so that 45 * On boot up, the ring buffer is set to the minimum size, so that
48 * we do not waste memory on systems that are not using tracing. 46 * we do not waste memory on systems that are not using tracing.
@@ -340,7 +338,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
340/* trace_flags holds trace_options default values */ 338/* trace_flags holds trace_options default values */
341unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 339unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
342 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 340 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
343 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD; 341 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE;
344 342
345static int trace_stop_count; 343static int trace_stop_count;
346static DEFINE_SPINLOCK(tracing_start_lock); 344static DEFINE_SPINLOCK(tracing_start_lock);
@@ -425,6 +423,7 @@ static const char *trace_options[] = {
425 "sleep-time", 423 "sleep-time",
426 "graph-time", 424 "graph-time",
427 "record-cmd", 425 "record-cmd",
426 "overwrite",
428 NULL 427 NULL
429}; 428};
430 429
@@ -780,6 +779,11 @@ __acquires(kernel_lock)
780 tracing_reset_online_cpus(tr); 779 tracing_reset_online_cpus(tr);
781 780
782 current_trace = type; 781 current_trace = type;
782
783 /* If we expanded the buffers, make sure the max is expanded too */
784 if (ring_buffer_expanded && type->use_max_tr)
785 ring_buffer_resize(max_tr.buffer, trace_buf_size);
786
783 /* the test is responsible for initializing and enabling */ 787 /* the test is responsible for initializing and enabling */
784 pr_info("Testing tracer %s: ", type->name); 788 pr_info("Testing tracer %s: ", type->name);
785 ret = type->selftest(type, tr); 789 ret = type->selftest(type, tr);
@@ -792,6 +796,10 @@ __acquires(kernel_lock)
792 /* Only reset on passing, to avoid touching corrupted buffers */ 796 /* Only reset on passing, to avoid touching corrupted buffers */
793 tracing_reset_online_cpus(tr); 797 tracing_reset_online_cpus(tr);
794 798
799 /* Shrink the max buffer again */
800 if (ring_buffer_expanded && type->use_max_tr)
801 ring_buffer_resize(max_tr.buffer, 1);
802
795 printk(KERN_CONT "PASSED\n"); 803 printk(KERN_CONT "PASSED\n");
796 } 804 }
797#endif 805#endif
@@ -1102,7 +1110,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1102 1110
1103 entry->preempt_count = pc & 0xff; 1111 entry->preempt_count = pc & 0xff;
1104 entry->pid = (tsk) ? tsk->pid : 0; 1112 entry->pid = (tsk) ? tsk->pid : 0;
1105 entry->lock_depth = (tsk) ? tsk->lock_depth : 0;
1106 entry->flags = 1113 entry->flags =
1107#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 1114#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1108 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 1115 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1749,10 +1756,9 @@ static void print_lat_help_header(struct seq_file *m)
1749 seq_puts(m, "# | / _----=> need-resched \n"); 1756 seq_puts(m, "# | / _----=> need-resched \n");
1750 seq_puts(m, "# || / _---=> hardirq/softirq \n"); 1757 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1751 seq_puts(m, "# ||| / _--=> preempt-depth \n"); 1758 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1752 seq_puts(m, "# |||| /_--=> lock-depth \n"); 1759 seq_puts(m, "# |||| / delay \n");
1753 seq_puts(m, "# |||||/ delay \n"); 1760 seq_puts(m, "# cmd pid ||||| time | caller \n");
1754 seq_puts(m, "# cmd pid |||||| time | caller \n"); 1761 seq_puts(m, "# \\ / ||||| \\ | / \n");
1755 seq_puts(m, "# \\ / |||||| \\ | / \n");
1756} 1762}
1757 1763
1758static void print_func_help_header(struct seq_file *m) 1764static void print_func_help_header(struct seq_file *m)
@@ -2529,6 +2535,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2529 2535
2530 if (mask == TRACE_ITER_RECORD_CMD) 2536 if (mask == TRACE_ITER_RECORD_CMD)
2531 trace_event_enable_cmd_record(enabled); 2537 trace_event_enable_cmd_record(enabled);
2538
2539 if (mask == TRACE_ITER_OVERWRITE)
2540 ring_buffer_change_overwrite(global_trace.buffer, enabled);
2532} 2541}
2533 2542
2534static ssize_t 2543static ssize_t
@@ -2710,6 +2719,10 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2710 2719
2711 mutex_lock(&trace_types_lock); 2720 mutex_lock(&trace_types_lock);
2712 if (tracer_enabled ^ val) { 2721 if (tracer_enabled ^ val) {
2722
2723 /* Only need to warn if this is used to change the state */
2724 WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on");
2725
2713 if (val) { 2726 if (val) {
2714 tracer_enabled = 1; 2727 tracer_enabled = 1;
2715 if (current_trace->start) 2728 if (current_trace->start)
@@ -4551,9 +4564,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
4551__init static int tracer_alloc_buffers(void) 4564__init static int tracer_alloc_buffers(void)
4552{ 4565{
4553 int ring_buf_size; 4566 int ring_buf_size;
4567 enum ring_buffer_flags rb_flags;
4554 int i; 4568 int i;
4555 int ret = -ENOMEM; 4569 int ret = -ENOMEM;
4556 4570
4571
4557 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 4572 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
4558 goto out; 4573 goto out;
4559 4574
@@ -4566,12 +4581,13 @@ __init static int tracer_alloc_buffers(void)
4566 else 4581 else
4567 ring_buf_size = 1; 4582 ring_buf_size = 1;
4568 4583
4584 rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
4585
4569 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 4586 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
4570 cpumask_copy(tracing_cpumask, cpu_all_mask); 4587 cpumask_copy(tracing_cpumask, cpu_all_mask);
4571 4588
4572 /* TODO: make the number of buffers hot pluggable with CPUS */ 4589 /* TODO: make the number of buffers hot pluggable with CPUS */
4573 global_trace.buffer = ring_buffer_alloc(ring_buf_size, 4590 global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags);
4574 TRACE_BUFFER_FLAGS);
4575 if (!global_trace.buffer) { 4591 if (!global_trace.buffer) {
4576 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); 4592 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
4577 WARN_ON(1); 4593 WARN_ON(1);
@@ -4581,7 +4597,7 @@ __init static int tracer_alloc_buffers(void)
4581 4597
4582 4598
4583#ifdef CONFIG_TRACER_MAX_TRACE 4599#ifdef CONFIG_TRACER_MAX_TRACE
4584 max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS); 4600 max_tr.buffer = ring_buffer_alloc(1, rb_flags);
4585 if (!max_tr.buffer) { 4601 if (!max_tr.buffer) {
4586 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); 4602 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
4587 WARN_ON(1); 4603 WARN_ON(1);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9021f8c0c0c3..5e9dfc6286dd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,8 +272,8 @@ struct tracer {
272 /* If you handled the flag setting, return 0 */ 272 /* If you handled the flag setting, return 0 */
273 int (*set_flag)(u32 old_flags, u32 bit, int set); 273 int (*set_flag)(u32 old_flags, u32 bit, int set);
274 struct tracer *next; 274 struct tracer *next;
275 int print_max;
276 struct tracer_flags *flags; 275 struct tracer_flags *flags;
276 int print_max;
277 int use_max_tr; 277 int use_max_tr;
278}; 278};
279 279
@@ -606,6 +606,7 @@ enum trace_iterator_flags {
606 TRACE_ITER_SLEEP_TIME = 0x40000, 606 TRACE_ITER_SLEEP_TIME = 0x40000,
607 TRACE_ITER_GRAPH_TIME = 0x80000, 607 TRACE_ITER_GRAPH_TIME = 0x80000,
608 TRACE_ITER_RECORD_CMD = 0x100000, 608 TRACE_ITER_RECORD_CMD = 0x100000,
609 TRACE_ITER_OVERWRITE = 0x200000,
609}; 610};
610 611
611/* 612/*
@@ -661,8 +662,10 @@ struct ftrace_event_field {
661}; 662};
662 663
663struct event_filter { 664struct event_filter {
664 int n_preds; 665 int n_preds; /* Number assigned */
665 struct filter_pred **preds; 666 int a_preds; /* allocated */
667 struct filter_pred *preds;
668 struct filter_pred *root;
666 char *filter_string; 669 char *filter_string;
667}; 670};
668 671
@@ -674,11 +677,23 @@ struct event_subsystem {
674 int nr_events; 677 int nr_events;
675}; 678};
676 679
680#define FILTER_PRED_INVALID ((unsigned short)-1)
681#define FILTER_PRED_IS_RIGHT (1 << 15)
682#define FILTER_PRED_FOLD (1 << 15)
683
684/*
685 * The max preds is the size of unsigned short with
686 * two flags at the MSBs. One bit is used for both the IS_RIGHT
687 * and FOLD flags. The other is reserved.
688 *
689 * 2^14 preds is way more than enough.
690 */
691#define MAX_FILTER_PRED 16384
692
677struct filter_pred; 693struct filter_pred;
678struct regex; 694struct regex;
679 695
680typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, 696typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
681 int val1, int val2);
682 697
683typedef int (*regex_match_func)(char *str, struct regex *r, int len); 698typedef int (*regex_match_func)(char *str, struct regex *r, int len);
684 699
@@ -700,11 +715,23 @@ struct filter_pred {
700 filter_pred_fn_t fn; 715 filter_pred_fn_t fn;
701 u64 val; 716 u64 val;
702 struct regex regex; 717 struct regex regex;
703 char *field_name; 718 /*
719 * Leaf nodes use field_name, ops is used by AND and OR
720 * nodes. The field_name is always freed when freeing a pred.
721 * We can overload field_name for ops and have it freed
722 * as well.
723 */
724 union {
725 char *field_name;
726 unsigned short *ops;
727 };
704 int offset; 728 int offset;
705 int not; 729 int not;
706 int op; 730 int op;
707 int pop_n; 731 unsigned short index;
732 unsigned short parent;
733 unsigned short left;
734 unsigned short right;
708}; 735};
709 736
710extern struct list_head ftrace_common_fields; 737extern struct list_head ftrace_common_fields;
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 6cf223764be8..1516cb3ec549 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -109,12 +109,12 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
109 */ 109 */
110#define FTRACE_CTX_FIELDS \ 110#define FTRACE_CTX_FIELDS \
111 __field( unsigned int, prev_pid ) \ 111 __field( unsigned int, prev_pid ) \
112 __field( unsigned int, next_pid ) \
113 __field( unsigned int, next_cpu ) \
112 __field( unsigned char, prev_prio ) \ 114 __field( unsigned char, prev_prio ) \
113 __field( unsigned char, prev_state ) \ 115 __field( unsigned char, prev_state ) \
114 __field( unsigned int, next_pid ) \
115 __field( unsigned char, next_prio ) \ 116 __field( unsigned char, next_prio ) \
116 __field( unsigned char, next_state ) \ 117 __field( unsigned char, next_state )
117 __field( unsigned int, next_cpu )
118 118
119FTRACE_ENTRY(context_switch, ctx_switch_entry, 119FTRACE_ENTRY(context_switch, ctx_switch_entry,
120 120
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5f499e0438a4..e88f74fe1d4c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
116 __common_field(unsigned char, flags); 116 __common_field(unsigned char, flags);
117 __common_field(unsigned char, preempt_count); 117 __common_field(unsigned char, preempt_count);
118 __common_field(int, pid); 118 __common_field(int, pid);
119 __common_field(int, lock_depth);
120 119
121 return ret; 120 return ret;
122} 121}
@@ -326,6 +325,7 @@ int trace_set_clr_event(const char *system, const char *event, int set)
326{ 325{
327 return __ftrace_set_clr_event(NULL, system, event, set); 326 return __ftrace_set_clr_event(NULL, system, event, set);
328} 327}
328EXPORT_SYMBOL_GPL(trace_set_clr_event);
329 329
330/* 128 should be much more than enough */ 330/* 128 should be much more than enough */
331#define EVENT_BUF_SIZE 127 331#define EVENT_BUF_SIZE 127
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 36d40104b17f..3249b4f77ef0 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -123,9 +123,13 @@ struct filter_parse_state {
123 } operand; 123 } operand;
124}; 124};
125 125
126struct pred_stack {
127 struct filter_pred **preds;
128 int index;
129};
130
126#define DEFINE_COMPARISON_PRED(type) \ 131#define DEFINE_COMPARISON_PRED(type) \
127static int filter_pred_##type(struct filter_pred *pred, void *event, \ 132static int filter_pred_##type(struct filter_pred *pred, void *event) \
128 int val1, int val2) \
129{ \ 133{ \
130 type *addr = (type *)(event + pred->offset); \ 134 type *addr = (type *)(event + pred->offset); \
131 type val = (type)pred->val; \ 135 type val = (type)pred->val; \
@@ -152,8 +156,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event, \
152} 156}
153 157
154#define DEFINE_EQUALITY_PRED(size) \ 158#define DEFINE_EQUALITY_PRED(size) \
155static int filter_pred_##size(struct filter_pred *pred, void *event, \ 159static int filter_pred_##size(struct filter_pred *pred, void *event) \
156 int val1, int val2) \
157{ \ 160{ \
158 u##size *addr = (u##size *)(event + pred->offset); \ 161 u##size *addr = (u##size *)(event + pred->offset); \
159 u##size val = (u##size)pred->val; \ 162 u##size val = (u##size)pred->val; \
@@ -178,23 +181,8 @@ DEFINE_EQUALITY_PRED(32);
178DEFINE_EQUALITY_PRED(16); 181DEFINE_EQUALITY_PRED(16);
179DEFINE_EQUALITY_PRED(8); 182DEFINE_EQUALITY_PRED(8);
180 183
181static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
182 void *event __attribute((unused)),
183 int val1, int val2)
184{
185 return val1 && val2;
186}
187
188static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
189 void *event __attribute((unused)),
190 int val1, int val2)
191{
192 return val1 || val2;
193}
194
195/* Filter predicate for fixed sized arrays of characters */ 184/* Filter predicate for fixed sized arrays of characters */
196static int filter_pred_string(struct filter_pred *pred, void *event, 185static int filter_pred_string(struct filter_pred *pred, void *event)
197 int val1, int val2)
198{ 186{
199 char *addr = (char *)(event + pred->offset); 187 char *addr = (char *)(event + pred->offset);
200 int cmp, match; 188 int cmp, match;
@@ -207,8 +195,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
207} 195}
208 196
209/* Filter predicate for char * pointers */ 197/* Filter predicate for char * pointers */
210static int filter_pred_pchar(struct filter_pred *pred, void *event, 198static int filter_pred_pchar(struct filter_pred *pred, void *event)
211 int val1, int val2)
212{ 199{
213 char **addr = (char **)(event + pred->offset); 200 char **addr = (char **)(event + pred->offset);
214 int cmp, match; 201 int cmp, match;
@@ -231,8 +218,7 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
231 * and add it to the address of the entry, and at last we have 218 * and add it to the address of the entry, and at last we have
232 * the address of the string. 219 * the address of the string.
233 */ 220 */
234static int filter_pred_strloc(struct filter_pred *pred, void *event, 221static int filter_pred_strloc(struct filter_pred *pred, void *event)
235 int val1, int val2)
236{ 222{
237 u32 str_item = *(u32 *)(event + pred->offset); 223 u32 str_item = *(u32 *)(event + pred->offset);
238 int str_loc = str_item & 0xffff; 224 int str_loc = str_item & 0xffff;
@@ -247,8 +233,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
247 return match; 233 return match;
248} 234}
249 235
250static int filter_pred_none(struct filter_pred *pred, void *event, 236static int filter_pred_none(struct filter_pred *pred, void *event)
251 int val1, int val2)
252{ 237{
253 return 0; 238 return 0;
254} 239}
@@ -377,32 +362,147 @@ static void filter_build_regex(struct filter_pred *pred)
377 pred->not ^= not; 362 pred->not ^= not;
378} 363}
379 364
365enum move_type {
366 MOVE_DOWN,
367 MOVE_UP_FROM_LEFT,
368 MOVE_UP_FROM_RIGHT
369};
370
371static struct filter_pred *
372get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
373 int index, enum move_type *move)
374{
375 if (pred->parent & FILTER_PRED_IS_RIGHT)
376 *move = MOVE_UP_FROM_RIGHT;
377 else
378 *move = MOVE_UP_FROM_LEFT;
379 pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
380
381 return pred;
382}
383
384/*
385 * A series of AND or ORs where found together. Instead of
386 * climbing up and down the tree branches, an array of the
387 * ops were made in order of checks. We can just move across
388 * the array and short circuit if needed.
389 */
390static int process_ops(struct filter_pred *preds,
391 struct filter_pred *op, void *rec)
392{
393 struct filter_pred *pred;
394 int type;
395 int match;
396 int i;
397
398 /*
399 * Micro-optimization: We set type to true if op
400 * is an OR and false otherwise (AND). Then we
401 * just need to test if the match is equal to
402 * the type, and if it is, we can short circuit the
403 * rest of the checks:
404 *
405 * if ((match && op->op == OP_OR) ||
406 * (!match && op->op == OP_AND))
407 * return match;
408 */
409 type = op->op == OP_OR;
410
411 for (i = 0; i < op->val; i++) {
412 pred = &preds[op->ops[i]];
413 match = pred->fn(pred, rec);
414 if (!!match == type)
415 return match;
416 }
417 return match;
418}
419
380/* return 1 if event matches, 0 otherwise (discard) */ 420/* return 1 if event matches, 0 otherwise (discard) */
381int filter_match_preds(struct event_filter *filter, void *rec) 421int filter_match_preds(struct event_filter *filter, void *rec)
382{ 422{
383 int match, top = 0, val1 = 0, val2 = 0; 423 int match = -1;
384 int stack[MAX_FILTER_PRED]; 424 enum move_type move = MOVE_DOWN;
425 struct filter_pred *preds;
385 struct filter_pred *pred; 426 struct filter_pred *pred;
386 int i; 427 struct filter_pred *root;
428 int n_preds;
429 int done = 0;
430
431 /* no filter is considered a match */
432 if (!filter)
433 return 1;
434
435 n_preds = filter->n_preds;
436
437 if (!n_preds)
438 return 1;
439
440 /*
441 * n_preds, root and filter->preds are protect with preemption disabled.
442 */
443 preds = rcu_dereference_sched(filter->preds);
444 root = rcu_dereference_sched(filter->root);
445 if (!root)
446 return 1;
447
448 pred = root;
387 449
388 for (i = 0; i < filter->n_preds; i++) { 450 /* match is currently meaningless */
389 pred = filter->preds[i]; 451 match = -1;
390 if (!pred->pop_n) { 452
391 match = pred->fn(pred, rec, val1, val2); 453 do {
392 stack[top++] = match; 454 switch (move) {
455 case MOVE_DOWN:
456 /* only AND and OR have children */
457 if (pred->left != FILTER_PRED_INVALID) {
458 /* If ops is set, then it was folded. */
459 if (!pred->ops) {
460 /* keep going to down the left side */
461 pred = &preds[pred->left];
462 continue;
463 }
464 /* We can treat folded ops as a leaf node */
465 match = process_ops(preds, pred, rec);
466 } else
467 match = pred->fn(pred, rec);
468 /* If this pred is the only pred */
469 if (pred == root)
470 break;
471 pred = get_pred_parent(pred, preds,
472 pred->parent, &move);
473 continue;
474 case MOVE_UP_FROM_LEFT:
475 /*
476 * Check for short circuits.
477 *
478 * Optimization: !!match == (pred->op == OP_OR)
479 * is the same as:
480 * if ((match && pred->op == OP_OR) ||
481 * (!match && pred->op == OP_AND))
482 */
483 if (!!match == (pred->op == OP_OR)) {
484 if (pred == root)
485 break;
486 pred = get_pred_parent(pred, preds,
487 pred->parent, &move);
488 continue;
489 }
490 /* now go down the right side of the tree. */
491 pred = &preds[pred->right];
492 move = MOVE_DOWN;
493 continue;
494 case MOVE_UP_FROM_RIGHT:
495 /* We finished this equation. */
496 if (pred == root)
497 break;
498 pred = get_pred_parent(pred, preds,
499 pred->parent, &move);
393 continue; 500 continue;
394 } 501 }
395 if (pred->pop_n > top) { 502 done = 1;
396 WARN_ON_ONCE(1); 503 } while (!done);
397 return 0;
398 }
399 val1 = stack[--top];
400 val2 = stack[--top];
401 match = pred->fn(pred, rec, val1, val2);
402 stack[top++] = match;
403 }
404 504
405 return stack[--top]; 505 return match;
406} 506}
407EXPORT_SYMBOL_GPL(filter_match_preds); 507EXPORT_SYMBOL_GPL(filter_match_preds);
408 508
@@ -414,6 +514,9 @@ static void parse_error(struct filter_parse_state *ps, int err, int pos)
414 514
415static void remove_filter_string(struct event_filter *filter) 515static void remove_filter_string(struct event_filter *filter)
416{ 516{
517 if (!filter)
518 return;
519
417 kfree(filter->filter_string); 520 kfree(filter->filter_string);
418 filter->filter_string = NULL; 521 filter->filter_string = NULL;
419} 522}
@@ -473,9 +576,10 @@ static void append_filter_err(struct filter_parse_state *ps,
473 576
474void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) 577void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
475{ 578{
476 struct event_filter *filter = call->filter; 579 struct event_filter *filter;
477 580
478 mutex_lock(&event_mutex); 581 mutex_lock(&event_mutex);
582 filter = call->filter;
479 if (filter && filter->filter_string) 583 if (filter && filter->filter_string)
480 trace_seq_printf(s, "%s\n", filter->filter_string); 584 trace_seq_printf(s, "%s\n", filter->filter_string);
481 else 585 else
@@ -486,9 +590,10 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
486void print_subsystem_event_filter(struct event_subsystem *system, 590void print_subsystem_event_filter(struct event_subsystem *system,
487 struct trace_seq *s) 591 struct trace_seq *s)
488{ 592{
489 struct event_filter *filter = system->filter; 593 struct event_filter *filter;
490 594
491 mutex_lock(&event_mutex); 595 mutex_lock(&event_mutex);
596 filter = system->filter;
492 if (filter && filter->filter_string) 597 if (filter && filter->filter_string)
493 trace_seq_printf(s, "%s\n", filter->filter_string); 598 trace_seq_printf(s, "%s\n", filter->filter_string);
494 else 599 else
@@ -539,10 +644,58 @@ static void filter_clear_pred(struct filter_pred *pred)
539 pred->regex.len = 0; 644 pred->regex.len = 0;
540} 645}
541 646
542static int filter_set_pred(struct filter_pred *dest, 647static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
648{
649 stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
650 if (!stack->preds)
651 return -ENOMEM;
652 stack->index = n_preds;
653 return 0;
654}
655
656static void __free_pred_stack(struct pred_stack *stack)
657{
658 kfree(stack->preds);
659 stack->index = 0;
660}
661
662static int __push_pred_stack(struct pred_stack *stack,
663 struct filter_pred *pred)
664{
665 int index = stack->index;
666
667 if (WARN_ON(index == 0))
668 return -ENOSPC;
669
670 stack->preds[--index] = pred;
671 stack->index = index;
672 return 0;
673}
674
675static struct filter_pred *
676__pop_pred_stack(struct pred_stack *stack)
677{
678 struct filter_pred *pred;
679 int index = stack->index;
680
681 pred = stack->preds[index++];
682 if (!pred)
683 return NULL;
684
685 stack->index = index;
686 return pred;
687}
688
689static int filter_set_pred(struct event_filter *filter,
690 int idx,
691 struct pred_stack *stack,
543 struct filter_pred *src, 692 struct filter_pred *src,
544 filter_pred_fn_t fn) 693 filter_pred_fn_t fn)
545{ 694{
695 struct filter_pred *dest = &filter->preds[idx];
696 struct filter_pred *left;
697 struct filter_pred *right;
698
546 *dest = *src; 699 *dest = *src;
547 if (src->field_name) { 700 if (src->field_name) {
548 dest->field_name = kstrdup(src->field_name, GFP_KERNEL); 701 dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
@@ -550,116 +703,140 @@ static int filter_set_pred(struct filter_pred *dest,
550 return -ENOMEM; 703 return -ENOMEM;
551 } 704 }
552 dest->fn = fn; 705 dest->fn = fn;
706 dest->index = idx;
553 707
554 return 0; 708 if (dest->op == OP_OR || dest->op == OP_AND) {
709 right = __pop_pred_stack(stack);
710 left = __pop_pred_stack(stack);
711 if (!left || !right)
712 return -EINVAL;
713 /*
714 * If both children can be folded
715 * and they are the same op as this op or a leaf,
716 * then this op can be folded.
717 */
718 if (left->index & FILTER_PRED_FOLD &&
719 (left->op == dest->op ||
720 left->left == FILTER_PRED_INVALID) &&
721 right->index & FILTER_PRED_FOLD &&
722 (right->op == dest->op ||
723 right->left == FILTER_PRED_INVALID))
724 dest->index |= FILTER_PRED_FOLD;
725
726 dest->left = left->index & ~FILTER_PRED_FOLD;
727 dest->right = right->index & ~FILTER_PRED_FOLD;
728 left->parent = dest->index & ~FILTER_PRED_FOLD;
729 right->parent = dest->index | FILTER_PRED_IS_RIGHT;
730 } else {
731 /*
732 * Make dest->left invalid to be used as a quick
733 * way to know this is a leaf node.
734 */
735 dest->left = FILTER_PRED_INVALID;
736
737 /* All leafs allow folding the parent ops. */
738 dest->index |= FILTER_PRED_FOLD;
739 }
740
741 return __push_pred_stack(stack, dest);
555} 742}
556 743
557static void filter_disable_preds(struct ftrace_event_call *call) 744static void __free_preds(struct event_filter *filter)
558{ 745{
559 struct event_filter *filter = call->filter;
560 int i; 746 int i;
561 747
562 call->flags &= ~TRACE_EVENT_FL_FILTERED; 748 if (filter->preds) {
749 for (i = 0; i < filter->a_preds; i++)
750 kfree(filter->preds[i].field_name);
751 kfree(filter->preds);
752 filter->preds = NULL;
753 }
754 filter->a_preds = 0;
563 filter->n_preds = 0; 755 filter->n_preds = 0;
564
565 for (i = 0; i < MAX_FILTER_PRED; i++)
566 filter->preds[i]->fn = filter_pred_none;
567} 756}
568 757
569static void __free_preds(struct event_filter *filter) 758static void filter_disable(struct ftrace_event_call *call)
570{ 759{
571 int i; 760 call->flags &= ~TRACE_EVENT_FL_FILTERED;
761}
572 762
763static void __free_filter(struct event_filter *filter)
764{
573 if (!filter) 765 if (!filter)
574 return; 766 return;
575 767
576 for (i = 0; i < MAX_FILTER_PRED; i++) { 768 __free_preds(filter);
577 if (filter->preds[i])
578 filter_free_pred(filter->preds[i]);
579 }
580 kfree(filter->preds);
581 kfree(filter->filter_string); 769 kfree(filter->filter_string);
582 kfree(filter); 770 kfree(filter);
583} 771}
584 772
773/*
774 * Called when destroying the ftrace_event_call.
775 * The call is being freed, so we do not need to worry about
776 * the call being currently used. This is for module code removing
777 * the tracepoints from within it.
778 */
585void destroy_preds(struct ftrace_event_call *call) 779void destroy_preds(struct ftrace_event_call *call)
586{ 780{
587 __free_preds(call->filter); 781 __free_filter(call->filter);
588 call->filter = NULL; 782 call->filter = NULL;
589 call->flags &= ~TRACE_EVENT_FL_FILTERED;
590} 783}
591 784
592static struct event_filter *__alloc_preds(void) 785static struct event_filter *__alloc_filter(void)
593{ 786{
594 struct event_filter *filter; 787 struct event_filter *filter;
788
789 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
790 return filter;
791}
792
793static int __alloc_preds(struct event_filter *filter, int n_preds)
794{
595 struct filter_pred *pred; 795 struct filter_pred *pred;
596 int i; 796 int i;
597 797
598 filter = kzalloc(sizeof(*filter), GFP_KERNEL); 798 if (filter->preds)
599 if (!filter) 799 __free_preds(filter);
600 return ERR_PTR(-ENOMEM);
601 800
602 filter->n_preds = 0; 801 filter->preds =
802 kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL);
603 803
604 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
605 if (!filter->preds) 804 if (!filter->preds)
606 goto oom; 805 return -ENOMEM;
607 806
608 for (i = 0; i < MAX_FILTER_PRED; i++) { 807 filter->a_preds = n_preds;
609 pred = kzalloc(sizeof(*pred), GFP_KERNEL); 808 filter->n_preds = 0;
610 if (!pred) 809
611 goto oom; 810 for (i = 0; i < n_preds; i++) {
811 pred = &filter->preds[i];
612 pred->fn = filter_pred_none; 812 pred->fn = filter_pred_none;
613 filter->preds[i] = pred;
614 } 813 }
615 814
616 return filter;
617
618oom:
619 __free_preds(filter);
620 return ERR_PTR(-ENOMEM);
621}
622
623static int init_preds(struct ftrace_event_call *call)
624{
625 if (call->filter)
626 return 0;
627
628 call->flags &= ~TRACE_EVENT_FL_FILTERED;
629 call->filter = __alloc_preds();
630 if (IS_ERR(call->filter))
631 return PTR_ERR(call->filter);
632
633 return 0; 815 return 0;
634} 816}
635 817
636static int init_subsystem_preds(struct event_subsystem *system) 818static void filter_free_subsystem_preds(struct event_subsystem *system)
637{ 819{
638 struct ftrace_event_call *call; 820 struct ftrace_event_call *call;
639 int err;
640 821
641 list_for_each_entry(call, &ftrace_events, list) { 822 list_for_each_entry(call, &ftrace_events, list) {
642 if (strcmp(call->class->system, system->name) != 0) 823 if (strcmp(call->class->system, system->name) != 0)
643 continue; 824 continue;
644 825
645 err = init_preds(call); 826 filter_disable(call);
646 if (err) 827 remove_filter_string(call->filter);
647 return err;
648 } 828 }
649
650 return 0;
651} 829}
652 830
653static void filter_free_subsystem_preds(struct event_subsystem *system) 831static void filter_free_subsystem_filters(struct event_subsystem *system)
654{ 832{
655 struct ftrace_event_call *call; 833 struct ftrace_event_call *call;
656 834
657 list_for_each_entry(call, &ftrace_events, list) { 835 list_for_each_entry(call, &ftrace_events, list) {
658 if (strcmp(call->class->system, system->name) != 0) 836 if (strcmp(call->class->system, system->name) != 0)
659 continue; 837 continue;
660 838 __free_filter(call->filter);
661 filter_disable_preds(call); 839 call->filter = NULL;
662 remove_filter_string(call->filter);
663 } 840 }
664} 841}
665 842
@@ -667,18 +844,19 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
667 struct ftrace_event_call *call, 844 struct ftrace_event_call *call,
668 struct event_filter *filter, 845 struct event_filter *filter,
669 struct filter_pred *pred, 846 struct filter_pred *pred,
847 struct pred_stack *stack,
670 filter_pred_fn_t fn) 848 filter_pred_fn_t fn)
671{ 849{
672 int idx, err; 850 int idx, err;
673 851
674 if (filter->n_preds == MAX_FILTER_PRED) { 852 if (WARN_ON(filter->n_preds == filter->a_preds)) {
675 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); 853 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
676 return -ENOSPC; 854 return -ENOSPC;
677 } 855 }
678 856
679 idx = filter->n_preds; 857 idx = filter->n_preds;
680 filter_clear_pred(filter->preds[idx]); 858 filter_clear_pred(&filter->preds[idx]);
681 err = filter_set_pred(filter->preds[idx], pred, fn); 859 err = filter_set_pred(filter, idx, stack, pred, fn);
682 if (err) 860 if (err)
683 return err; 861 return err;
684 862
@@ -763,6 +941,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
763 struct ftrace_event_call *call, 941 struct ftrace_event_call *call,
764 struct event_filter *filter, 942 struct event_filter *filter,
765 struct filter_pred *pred, 943 struct filter_pred *pred,
944 struct pred_stack *stack,
766 bool dry_run) 945 bool dry_run)
767{ 946{
768 struct ftrace_event_field *field; 947 struct ftrace_event_field *field;
@@ -770,17 +949,12 @@ static int filter_add_pred(struct filter_parse_state *ps,
770 unsigned long long val; 949 unsigned long long val;
771 int ret; 950 int ret;
772 951
773 pred->fn = filter_pred_none; 952 fn = pred->fn = filter_pred_none;
774 953
775 if (pred->op == OP_AND) { 954 if (pred->op == OP_AND)
776 pred->pop_n = 2;
777 fn = filter_pred_and;
778 goto add_pred_fn; 955 goto add_pred_fn;
779 } else if (pred->op == OP_OR) { 956 else if (pred->op == OP_OR)
780 pred->pop_n = 2;
781 fn = filter_pred_or;
782 goto add_pred_fn; 957 goto add_pred_fn;
783 }
784 958
785 field = find_event_field(call, pred->field_name); 959 field = find_event_field(call, pred->field_name);
786 if (!field) { 960 if (!field) {
@@ -829,7 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
829 1003
830add_pred_fn: 1004add_pred_fn:
831 if (!dry_run) 1005 if (!dry_run)
832 return filter_add_pred_fn(ps, call, filter, pred, fn); 1006 return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
833 return 0; 1007 return 0;
834} 1008}
835 1009
@@ -1187,6 +1361,234 @@ static int check_preds(struct filter_parse_state *ps)
1187 return 0; 1361 return 0;
1188} 1362}
1189 1363
1364static int count_preds(struct filter_parse_state *ps)
1365{
1366 struct postfix_elt *elt;
1367 int n_preds = 0;
1368
1369 list_for_each_entry(elt, &ps->postfix, list) {
1370 if (elt->op == OP_NONE)
1371 continue;
1372 n_preds++;
1373 }
1374
1375 return n_preds;
1376}
1377
1378/*
1379 * The tree is walked at filtering of an event. If the tree is not correctly
1380 * built, it may cause an infinite loop. Check here that the tree does
1381 * indeed terminate.
1382 */
1383static int check_pred_tree(struct event_filter *filter,
1384 struct filter_pred *root)
1385{
1386 struct filter_pred *preds;
1387 struct filter_pred *pred;
1388 enum move_type move = MOVE_DOWN;
1389 int count = 0;
1390 int done = 0;
1391 int max;
1392
1393 /*
1394 * The max that we can hit a node is three times.
1395 * Once going down, once coming up from left, and
1396 * once coming up from right. This is more than enough
1397 * since leafs are only hit a single time.
1398 */
1399 max = 3 * filter->n_preds;
1400
1401 preds = filter->preds;
1402 if (!preds)
1403 return -EINVAL;
1404 pred = root;
1405
1406 do {
1407 if (WARN_ON(count++ > max))
1408 return -EINVAL;
1409
1410 switch (move) {
1411 case MOVE_DOWN:
1412 if (pred->left != FILTER_PRED_INVALID) {
1413 pred = &preds[pred->left];
1414 continue;
1415 }
1416 /* A leaf at the root is just a leaf in the tree */
1417 if (pred == root)
1418 break;
1419 pred = get_pred_parent(pred, preds,
1420 pred->parent, &move);
1421 continue;
1422 case MOVE_UP_FROM_LEFT:
1423 pred = &preds[pred->right];
1424 move = MOVE_DOWN;
1425 continue;
1426 case MOVE_UP_FROM_RIGHT:
1427 if (pred == root)
1428 break;
1429 pred = get_pred_parent(pred, preds,
1430 pred->parent, &move);
1431 continue;
1432 }
1433 done = 1;
1434 } while (!done);
1435
1436 /* We are fine. */
1437 return 0;
1438}
1439
1440static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
1441{
1442 struct filter_pred *pred;
1443 enum move_type move = MOVE_DOWN;
1444 int count = 0;
1445 int done = 0;
1446
1447 pred = root;
1448
1449 do {
1450 switch (move) {
1451 case MOVE_DOWN:
1452 if (pred->left != FILTER_PRED_INVALID) {
1453 pred = &preds[pred->left];
1454 continue;
1455 }
1456 /* A leaf at the root is just a leaf in the tree */
1457 if (pred == root)
1458 return 1;
1459 count++;
1460 pred = get_pred_parent(pred, preds,
1461 pred->parent, &move);
1462 continue;
1463 case MOVE_UP_FROM_LEFT:
1464 pred = &preds[pred->right];
1465 move = MOVE_DOWN;
1466 continue;
1467 case MOVE_UP_FROM_RIGHT:
1468 if (pred == root)
1469 break;
1470 pred = get_pred_parent(pred, preds,
1471 pred->parent, &move);
1472 continue;
1473 }
1474 done = 1;
1475 } while (!done);
1476
1477 return count;
1478}
1479
1480static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
1481{
1482 struct filter_pred *pred;
1483 enum move_type move = MOVE_DOWN;
1484 int count = 0;
1485 int children;
1486 int done = 0;
1487
1488 /* No need to keep the fold flag */
1489 root->index &= ~FILTER_PRED_FOLD;
1490
1491 /* If the root is a leaf then do nothing */
1492 if (root->left == FILTER_PRED_INVALID)
1493 return 0;
1494
1495 /* count the children */
1496 children = count_leafs(preds, &preds[root->left]);
1497 children += count_leafs(preds, &preds[root->right]);
1498
1499 root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL);
1500 if (!root->ops)
1501 return -ENOMEM;
1502
1503 root->val = children;
1504
1505 pred = root;
1506 do {
1507 switch (move) {
1508 case MOVE_DOWN:
1509 if (pred->left != FILTER_PRED_INVALID) {
1510 pred = &preds[pred->left];
1511 continue;
1512 }
1513 if (WARN_ON(count == children))
1514 return -EINVAL;
1515 pred->index &= ~FILTER_PRED_FOLD;
1516 root->ops[count++] = pred->index;
1517 pred = get_pred_parent(pred, preds,
1518 pred->parent, &move);
1519 continue;
1520 case MOVE_UP_FROM_LEFT:
1521 pred = &preds[pred->right];
1522 move = MOVE_DOWN;
1523 continue;
1524 case MOVE_UP_FROM_RIGHT:
1525 if (pred == root)
1526 break;
1527 pred = get_pred_parent(pred, preds,
1528 pred->parent, &move);
1529 continue;
1530 }
1531 done = 1;
1532 } while (!done);
1533
1534 return 0;
1535}
1536
1537/*
1538 * To optimize the processing of the ops, if we have several "ors" or
1539 * "ands" together, we can put them in an array and process them all
1540 * together speeding up the filter logic.
1541 */
1542static int fold_pred_tree(struct event_filter *filter,
1543 struct filter_pred *root)
1544{
1545 struct filter_pred *preds;
1546 struct filter_pred *pred;
1547 enum move_type move = MOVE_DOWN;
1548 int done = 0;
1549 int err;
1550
1551 preds = filter->preds;
1552 if (!preds)
1553 return -EINVAL;
1554 pred = root;
1555
1556 do {
1557 switch (move) {
1558 case MOVE_DOWN:
1559 if (pred->index & FILTER_PRED_FOLD) {
1560 err = fold_pred(preds, pred);
1561 if (err)
1562 return err;
1563 /* Folded nodes are like leafs */
1564 } else if (pred->left != FILTER_PRED_INVALID) {
1565 pred = &preds[pred->left];
1566 continue;
1567 }
1568
1569 /* A leaf at the root is just a leaf in the tree */
1570 if (pred == root)
1571 break;
1572 pred = get_pred_parent(pred, preds,
1573 pred->parent, &move);
1574 continue;
1575 case MOVE_UP_FROM_LEFT:
1576 pred = &preds[pred->right];
1577 move = MOVE_DOWN;
1578 continue;
1579 case MOVE_UP_FROM_RIGHT:
1580 if (pred == root)
1581 break;
1582 pred = get_pred_parent(pred, preds,
1583 pred->parent, &move);
1584 continue;
1585 }
1586 done = 1;
1587 } while (!done);
1588
1589 return 0;
1590}
1591
1190static int replace_preds(struct ftrace_event_call *call, 1592static int replace_preds(struct ftrace_event_call *call,
1191 struct event_filter *filter, 1593 struct event_filter *filter,
1192 struct filter_parse_state *ps, 1594 struct filter_parse_state *ps,
@@ -1195,14 +1597,32 @@ static int replace_preds(struct ftrace_event_call *call,
1195{ 1597{
1196 char *operand1 = NULL, *operand2 = NULL; 1598 char *operand1 = NULL, *operand2 = NULL;
1197 struct filter_pred *pred; 1599 struct filter_pred *pred;
1600 struct filter_pred *root;
1198 struct postfix_elt *elt; 1601 struct postfix_elt *elt;
1602 struct pred_stack stack = { }; /* init to NULL */
1199 int err; 1603 int err;
1200 int n_preds = 0; 1604 int n_preds = 0;
1201 1605
1606 n_preds = count_preds(ps);
1607 if (n_preds >= MAX_FILTER_PRED) {
1608 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1609 return -ENOSPC;
1610 }
1611
1202 err = check_preds(ps); 1612 err = check_preds(ps);
1203 if (err) 1613 if (err)
1204 return err; 1614 return err;
1205 1615
1616 if (!dry_run) {
1617 err = __alloc_pred_stack(&stack, n_preds);
1618 if (err)
1619 return err;
1620 err = __alloc_preds(filter, n_preds);
1621 if (err)
1622 goto fail;
1623 }
1624
1625 n_preds = 0;
1206 list_for_each_entry(elt, &ps->postfix, list) { 1626 list_for_each_entry(elt, &ps->postfix, list) {
1207 if (elt->op == OP_NONE) { 1627 if (elt->op == OP_NONE) {
1208 if (!operand1) 1628 if (!operand1)
@@ -1211,14 +1631,16 @@ static int replace_preds(struct ftrace_event_call *call,
1211 operand2 = elt->operand; 1631 operand2 = elt->operand;
1212 else { 1632 else {
1213 parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); 1633 parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
1214 return -EINVAL; 1634 err = -EINVAL;
1635 goto fail;
1215 } 1636 }
1216 continue; 1637 continue;
1217 } 1638 }
1218 1639
1219 if (n_preds++ == MAX_FILTER_PRED) { 1640 if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
1220 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); 1641 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1221 return -ENOSPC; 1642 err = -ENOSPC;
1643 goto fail;
1222 } 1644 }
1223 1645
1224 if (elt->op == OP_AND || elt->op == OP_OR) { 1646 if (elt->op == OP_AND || elt->op == OP_OR) {
@@ -1228,76 +1650,181 @@ static int replace_preds(struct ftrace_event_call *call,
1228 1650
1229 if (!operand1 || !operand2) { 1651 if (!operand1 || !operand2) {
1230 parse_error(ps, FILT_ERR_MISSING_FIELD, 0); 1652 parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
1231 return -EINVAL; 1653 err = -EINVAL;
1654 goto fail;
1232 } 1655 }
1233 1656
1234 pred = create_pred(elt->op, operand1, operand2); 1657 pred = create_pred(elt->op, operand1, operand2);
1235add_pred: 1658add_pred:
1236 if (!pred) 1659 if (!pred) {
1237 return -ENOMEM; 1660 err = -ENOMEM;
1238 err = filter_add_pred(ps, call, filter, pred, dry_run); 1661 goto fail;
1662 }
1663 err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
1239 filter_free_pred(pred); 1664 filter_free_pred(pred);
1240 if (err) 1665 if (err)
1241 return err; 1666 goto fail;
1242 1667
1243 operand1 = operand2 = NULL; 1668 operand1 = operand2 = NULL;
1244 } 1669 }
1245 1670
1246 return 0; 1671 if (!dry_run) {
1672 /* We should have one item left on the stack */
1673 pred = __pop_pred_stack(&stack);
1674 if (!pred)
1675 return -EINVAL;
1676 /* This item is where we start from in matching */
1677 root = pred;
1678 /* Make sure the stack is empty */
1679 pred = __pop_pred_stack(&stack);
1680 if (WARN_ON(pred)) {
1681 err = -EINVAL;
1682 filter->root = NULL;
1683 goto fail;
1684 }
1685 err = check_pred_tree(filter, root);
1686 if (err)
1687 goto fail;
1688
1689 /* Optimize the tree */
1690 err = fold_pred_tree(filter, root);
1691 if (err)
1692 goto fail;
1693
1694 /* We don't set root until we know it works */
1695 barrier();
1696 filter->root = root;
1697 }
1698
1699 err = 0;
1700fail:
1701 __free_pred_stack(&stack);
1702 return err;
1247} 1703}
1248 1704
1705struct filter_list {
1706 struct list_head list;
1707 struct event_filter *filter;
1708};
1709
1249static int replace_system_preds(struct event_subsystem *system, 1710static int replace_system_preds(struct event_subsystem *system,
1250 struct filter_parse_state *ps, 1711 struct filter_parse_state *ps,
1251 char *filter_string) 1712 char *filter_string)
1252{ 1713{
1253 struct ftrace_event_call *call; 1714 struct ftrace_event_call *call;
1715 struct filter_list *filter_item;
1716 struct filter_list *tmp;
1717 LIST_HEAD(filter_list);
1254 bool fail = true; 1718 bool fail = true;
1255 int err; 1719 int err;
1256 1720
1257 list_for_each_entry(call, &ftrace_events, list) { 1721 list_for_each_entry(call, &ftrace_events, list) {
1258 struct event_filter *filter = call->filter;
1259 1722
1260 if (strcmp(call->class->system, system->name) != 0) 1723 if (strcmp(call->class->system, system->name) != 0)
1261 continue; 1724 continue;
1262 1725
1263 /* try to see if the filter can be applied */ 1726 /*
1264 err = replace_preds(call, filter, ps, filter_string, true); 1727 * Try to see if the filter can be applied
1728 * (filter arg is ignored on dry_run)
1729 */
1730 err = replace_preds(call, NULL, ps, filter_string, true);
1265 if (err) 1731 if (err)
1732 goto fail;
1733 }
1734
1735 list_for_each_entry(call, &ftrace_events, list) {
1736 struct event_filter *filter;
1737
1738 if (strcmp(call->class->system, system->name) != 0)
1266 continue; 1739 continue;
1267 1740
1268 /* really apply the filter */ 1741 filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
1269 filter_disable_preds(call); 1742 if (!filter_item)
1270 err = replace_preds(call, filter, ps, filter_string, false); 1743 goto fail_mem;
1744
1745 list_add_tail(&filter_item->list, &filter_list);
1746
1747 filter_item->filter = __alloc_filter();
1748 if (!filter_item->filter)
1749 goto fail_mem;
1750 filter = filter_item->filter;
1751
1752 /* Can only fail on no memory */
1753 err = replace_filter_string(filter, filter_string);
1271 if (err) 1754 if (err)
1272 filter_disable_preds(call); 1755 goto fail_mem;
1273 else { 1756
1757 err = replace_preds(call, filter, ps, filter_string, false);
1758 if (err) {
1759 filter_disable(call);
1760 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1761 append_filter_err(ps, filter);
1762 } else
1274 call->flags |= TRACE_EVENT_FL_FILTERED; 1763 call->flags |= TRACE_EVENT_FL_FILTERED;
1275 replace_filter_string(filter, filter_string); 1764 /*
1276 } 1765 * Regardless of if this returned an error, we still
1766 * replace the filter for the call.
1767 */
1768 filter = call->filter;
1769 call->filter = filter_item->filter;
1770 filter_item->filter = filter;
1771
1277 fail = false; 1772 fail = false;
1278 } 1773 }
1279 1774
1280 if (fail) { 1775 if (fail)
1281 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); 1776 goto fail;
1282 return -EINVAL; 1777
1778 /*
1779 * The calls can still be using the old filters.
1780 * Do a synchronize_sched() to ensure all calls are
1781 * done with them before we free them.
1782 */
1783 synchronize_sched();
1784 list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
1785 __free_filter(filter_item->filter);
1786 list_del(&filter_item->list);
1787 kfree(filter_item);
1283 } 1788 }
1284 return 0; 1789 return 0;
1790 fail:
1791 /* No call succeeded */
1792 list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
1793 list_del(&filter_item->list);
1794 kfree(filter_item);
1795 }
1796 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1797 return -EINVAL;
1798 fail_mem:
1799 /* If any call succeeded, we still need to sync */
1800 if (!fail)
1801 synchronize_sched();
1802 list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
1803 __free_filter(filter_item->filter);
1804 list_del(&filter_item->list);
1805 kfree(filter_item);
1806 }
1807 return -ENOMEM;
1285} 1808}
1286 1809
1287int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1810int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1288{ 1811{
1289 int err;
1290 struct filter_parse_state *ps; 1812 struct filter_parse_state *ps;
1813 struct event_filter *filter;
1814 struct event_filter *tmp;
1815 int err = 0;
1291 1816
1292 mutex_lock(&event_mutex); 1817 mutex_lock(&event_mutex);
1293 1818
1294 err = init_preds(call);
1295 if (err)
1296 goto out_unlock;
1297
1298 if (!strcmp(strstrip(filter_string), "0")) { 1819 if (!strcmp(strstrip(filter_string), "0")) {
1299 filter_disable_preds(call); 1820 filter_disable(call);
1300 remove_filter_string(call->filter); 1821 filter = call->filter;
1822 if (!filter)
1823 goto out_unlock;
1824 call->filter = NULL;
1825 /* Make sure the filter is not being used */
1826 synchronize_sched();
1827 __free_filter(filter);
1301 goto out_unlock; 1828 goto out_unlock;
1302 } 1829 }
1303 1830
@@ -1306,22 +1833,41 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1306 if (!ps) 1833 if (!ps)
1307 goto out_unlock; 1834 goto out_unlock;
1308 1835
1309 filter_disable_preds(call); 1836 filter = __alloc_filter();
1310 replace_filter_string(call->filter, filter_string); 1837 if (!filter) {
1838 kfree(ps);
1839 goto out_unlock;
1840 }
1841
1842 replace_filter_string(filter, filter_string);
1311 1843
1312 parse_init(ps, filter_ops, filter_string); 1844 parse_init(ps, filter_ops, filter_string);
1313 err = filter_parse(ps); 1845 err = filter_parse(ps);
1314 if (err) { 1846 if (err) {
1315 append_filter_err(ps, call->filter); 1847 append_filter_err(ps, filter);
1316 goto out; 1848 goto out;
1317 } 1849 }
1318 1850
1319 err = replace_preds(call, call->filter, ps, filter_string, false); 1851 err = replace_preds(call, filter, ps, filter_string, false);
1320 if (err) 1852 if (err) {
1321 append_filter_err(ps, call->filter); 1853 filter_disable(call);
1322 else 1854 append_filter_err(ps, filter);
1855 } else
1323 call->flags |= TRACE_EVENT_FL_FILTERED; 1856 call->flags |= TRACE_EVENT_FL_FILTERED;
1324out: 1857out:
1858 /*
1859 * Always swap the call filter with the new filter
1860 * even if there was an error. If there was an error
1861 * in the filter, we disable the filter and show the error
1862 * string
1863 */
1864 tmp = call->filter;
1865 call->filter = filter;
1866 if (tmp) {
1867 /* Make sure the call is done with the filter */
1868 synchronize_sched();
1869 __free_filter(tmp);
1870 }
1325 filter_opstack_clear(ps); 1871 filter_opstack_clear(ps);
1326 postfix_clear(ps); 1872 postfix_clear(ps);
1327 kfree(ps); 1873 kfree(ps);
@@ -1334,18 +1880,21 @@ out_unlock:
1334int apply_subsystem_event_filter(struct event_subsystem *system, 1880int apply_subsystem_event_filter(struct event_subsystem *system,
1335 char *filter_string) 1881 char *filter_string)
1336{ 1882{
1337 int err;
1338 struct filter_parse_state *ps; 1883 struct filter_parse_state *ps;
1884 struct event_filter *filter;
1885 int err = 0;
1339 1886
1340 mutex_lock(&event_mutex); 1887 mutex_lock(&event_mutex);
1341 1888
1342 err = init_subsystem_preds(system);
1343 if (err)
1344 goto out_unlock;
1345
1346 if (!strcmp(strstrip(filter_string), "0")) { 1889 if (!strcmp(strstrip(filter_string), "0")) {
1347 filter_free_subsystem_preds(system); 1890 filter_free_subsystem_preds(system);
1348 remove_filter_string(system->filter); 1891 remove_filter_string(system->filter);
1892 filter = system->filter;
1893 system->filter = NULL;
1894 /* Ensure all filters are no longer used */
1895 synchronize_sched();
1896 filter_free_subsystem_filters(system);
1897 __free_filter(filter);
1349 goto out_unlock; 1898 goto out_unlock;
1350 } 1899 }
1351 1900
@@ -1354,7 +1903,17 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1354 if (!ps) 1903 if (!ps)
1355 goto out_unlock; 1904 goto out_unlock;
1356 1905
1357 replace_filter_string(system->filter, filter_string); 1906 filter = __alloc_filter();
1907 if (!filter)
1908 goto out;
1909
1910 replace_filter_string(filter, filter_string);
1911 /*
1912 * No event actually uses the system filter
1913 * we can free it without synchronize_sched().
1914 */
1915 __free_filter(system->filter);
1916 system->filter = filter;
1358 1917
1359 parse_init(ps, filter_ops, filter_string); 1918 parse_init(ps, filter_ops, filter_string);
1360 err = filter_parse(ps); 1919 err = filter_parse(ps);
@@ -1384,7 +1943,7 @@ void ftrace_profile_free_filter(struct perf_event *event)
1384 struct event_filter *filter = event->filter; 1943 struct event_filter *filter = event->filter;
1385 1944
1386 event->filter = NULL; 1945 event->filter = NULL;
1387 __free_preds(filter); 1946 __free_filter(filter);
1388} 1947}
1389 1948
1390int ftrace_profile_set_filter(struct perf_event *event, int event_id, 1949int ftrace_profile_set_filter(struct perf_event *event, int event_id,
@@ -1410,8 +1969,8 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1410 if (event->filter) 1969 if (event->filter)
1411 goto out_unlock; 1970 goto out_unlock;
1412 1971
1413 filter = __alloc_preds(); 1972 filter = __alloc_filter();
1414 if (IS_ERR(filter)) { 1973 if (!filter) {
1415 err = PTR_ERR(filter); 1974 err = PTR_ERR(filter);
1416 goto out_unlock; 1975 goto out_unlock;
1417 } 1976 }
@@ -1419,7 +1978,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1419 err = -ENOMEM; 1978 err = -ENOMEM;
1420 ps = kzalloc(sizeof(*ps), GFP_KERNEL); 1979 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1421 if (!ps) 1980 if (!ps)
1422 goto free_preds; 1981 goto free_filter;
1423 1982
1424 parse_init(ps, filter_ops, filter_str); 1983 parse_init(ps, filter_ops, filter_str);
1425 err = filter_parse(ps); 1984 err = filter_parse(ps);
@@ -1435,9 +1994,9 @@ free_ps:
1435 postfix_clear(ps); 1994 postfix_clear(ps);
1436 kfree(ps); 1995 kfree(ps);
1437 1996
1438free_preds: 1997free_filter:
1439 if (err) 1998 if (err)
1440 __free_preds(filter); 1999 __free_filter(filter);
1441 2000
1442out_unlock: 2001out_unlock:
1443 mutex_unlock(&event_mutex); 2002 mutex_unlock(&event_mutex);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2dec9bcde8b4..8435b43b1782 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -353,6 +353,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
353 kfree(data); 353 kfree(data);
354} 354}
355 355
356/* Bitfield fetch function */
357struct bitfield_fetch_param {
358 struct fetch_param orig;
359 unsigned char hi_shift;
360 unsigned char low_shift;
361};
362
363#define DEFINE_FETCH_bitfield(type) \
364static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
365 void *data, void *dest) \
366{ \
367 struct bitfield_fetch_param *bprm = data; \
368 type buf = 0; \
369 call_fetch(&bprm->orig, regs, &buf); \
370 if (buf) { \
371 buf <<= bprm->hi_shift; \
372 buf >>= bprm->low_shift; \
373 } \
374 *(type *)dest = buf; \
375}
376DEFINE_BASIC_FETCH_FUNCS(bitfield)
377#define fetch_bitfield_string NULL
378#define fetch_bitfield_string_size NULL
379
380static __kprobes void
381free_bitfield_fetch_param(struct bitfield_fetch_param *data)
382{
383 /*
384 * Don't check the bitfield itself, because this must be the
385 * last fetch function.
386 */
387 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
388 free_deref_fetch_param(data->orig.data);
389 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
390 free_symbol_cache(data->orig.data);
391 kfree(data);
392}
356/* Default (unsigned long) fetch type */ 393/* Default (unsigned long) fetch type */
357#define __DEFAULT_FETCH_TYPE(t) u##t 394#define __DEFAULT_FETCH_TYPE(t) u##t
358#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) 395#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
@@ -367,6 +404,7 @@ enum {
367 FETCH_MTD_memory, 404 FETCH_MTD_memory,
368 FETCH_MTD_symbol, 405 FETCH_MTD_symbol,
369 FETCH_MTD_deref, 406 FETCH_MTD_deref,
407 FETCH_MTD_bitfield,
370 FETCH_MTD_END, 408 FETCH_MTD_END,
371}; 409};
372 410
@@ -387,6 +425,7 @@ ASSIGN_FETCH_FUNC(retval, ftype), \
387ASSIGN_FETCH_FUNC(memory, ftype), \ 425ASSIGN_FETCH_FUNC(memory, ftype), \
388ASSIGN_FETCH_FUNC(symbol, ftype), \ 426ASSIGN_FETCH_FUNC(symbol, ftype), \
389ASSIGN_FETCH_FUNC(deref, ftype), \ 427ASSIGN_FETCH_FUNC(deref, ftype), \
428ASSIGN_FETCH_FUNC(bitfield, ftype), \
390 } \ 429 } \
391 } 430 }
392 431
@@ -430,9 +469,33 @@ static const struct fetch_type *find_fetch_type(const char *type)
430 if (!type) 469 if (!type)
431 type = DEFAULT_FETCH_TYPE_STR; 470 type = DEFAULT_FETCH_TYPE_STR;
432 471
472 /* Special case: bitfield */
473 if (*type == 'b') {
474 unsigned long bs;
475 type = strchr(type, '/');
476 if (!type)
477 goto fail;
478 type++;
479 if (strict_strtoul(type, 0, &bs))
480 goto fail;
481 switch (bs) {
482 case 8:
483 return find_fetch_type("u8");
484 case 16:
485 return find_fetch_type("u16");
486 case 32:
487 return find_fetch_type("u32");
488 case 64:
489 return find_fetch_type("u64");
490 default:
491 goto fail;
492 }
493 }
494
433 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) 495 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
434 if (strcmp(type, fetch_type_table[i].name) == 0) 496 if (strcmp(type, fetch_type_table[i].name) == 0)
435 return &fetch_type_table[i]; 497 return &fetch_type_table[i];
498fail:
436 return NULL; 499 return NULL;
437} 500}
438 501
@@ -586,7 +649,9 @@ error:
586 649
587static void free_probe_arg(struct probe_arg *arg) 650static void free_probe_arg(struct probe_arg *arg)
588{ 651{
589 if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) 652 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
653 free_bitfield_fetch_param(arg->fetch.data);
654 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
590 free_deref_fetch_param(arg->fetch.data); 655 free_deref_fetch_param(arg->fetch.data);
591 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) 656 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
592 free_symbol_cache(arg->fetch.data); 657 free_symbol_cache(arg->fetch.data);
@@ -767,16 +832,15 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
767 } 832 }
768 break; 833 break;
769 case '+': /* deref memory */ 834 case '+': /* deref memory */
835 arg++; /* Skip '+', because strict_strtol() rejects it. */
770 case '-': 836 case '-':
771 tmp = strchr(arg, '('); 837 tmp = strchr(arg, '(');
772 if (!tmp) 838 if (!tmp)
773 break; 839 break;
774 *tmp = '\0'; 840 *tmp = '\0';
775 ret = strict_strtol(arg + 1, 0, &offset); 841 ret = strict_strtol(arg, 0, &offset);
776 if (ret) 842 if (ret)
777 break; 843 break;
778 if (arg[0] == '-')
779 offset = -offset;
780 arg = tmp + 1; 844 arg = tmp + 1;
781 tmp = strrchr(arg, ')'); 845 tmp = strrchr(arg, ')');
782 if (tmp) { 846 if (tmp) {
@@ -807,6 +871,41 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
807 return ret; 871 return ret;
808} 872}
809 873
874#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
875
876/* Bitfield type needs to be parsed into a fetch function */
877static int __parse_bitfield_probe_arg(const char *bf,
878 const struct fetch_type *t,
879 struct fetch_param *f)
880{
881 struct bitfield_fetch_param *bprm;
882 unsigned long bw, bo;
883 char *tail;
884
885 if (*bf != 'b')
886 return 0;
887
888 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
889 if (!bprm)
890 return -ENOMEM;
891 bprm->orig = *f;
892 f->fn = t->fetch[FETCH_MTD_bitfield];
893 f->data = (void *)bprm;
894
895 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
896 if (bw == 0 || *tail != '@')
897 return -EINVAL;
898
899 bf = tail + 1;
900 bo = simple_strtoul(bf, &tail, 0);
901 if (tail == bf || *tail != '/')
902 return -EINVAL;
903
904 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
905 bprm->low_shift = bprm->hi_shift + bo;
906 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
907}
908
810/* String length checking wrapper */ 909/* String length checking wrapper */
811static int parse_probe_arg(char *arg, struct trace_probe *tp, 910static int parse_probe_arg(char *arg, struct trace_probe *tp,
812 struct probe_arg *parg, int is_return) 911 struct probe_arg *parg, int is_return)
@@ -836,6 +935,8 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
836 parg->offset = tp->size; 935 parg->offset = tp->size;
837 tp->size += parg->type->size; 936 tp->size += parg->type->size;
838 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); 937 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
938 if (ret >= 0 && t != NULL)
939 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
839 if (ret >= 0) { 940 if (ret >= 0) {
840 parg->fetch_size.fn = get_fetch_size_function(parg->type, 941 parg->fetch_size.fn = get_fetch_size_function(parg->type,
841 parg->fetch.fn); 942 parg->fetch.fn);
@@ -1130,7 +1231,7 @@ static int command_trace_probe(const char *buf)
1130 return ret; 1231 return ret;
1131} 1232}
1132 1233
1133#define WRITE_BUFSIZE 128 1234#define WRITE_BUFSIZE 4096
1134 1235
1135static ssize_t probes_write(struct file *file, const char __user *buffer, 1236static ssize_t probes_write(struct file *file, const char __user *buffer,
1136 size_t count, loff_t *ppos) 1237 size_t count, loff_t *ppos)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 02272baa2206..456be9063c2d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -529,24 +529,34 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
529 * @entry: The trace entry field from the ring buffer 529 * @entry: The trace entry field from the ring buffer
530 * 530 *
531 * Prints the generic fields of irqs off, in hard or softirq, preempt 531 * Prints the generic fields of irqs off, in hard or softirq, preempt
532 * count and lock depth. 532 * count.
533 */ 533 */
534int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) 534int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
535{ 535{
536 int hardirq, softirq; 536 char hardsoft_irq;
537 char need_resched;
538 char irqs_off;
539 int hardirq;
540 int softirq;
537 int ret; 541 int ret;
538 542
539 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 543 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
540 softirq = entry->flags & TRACE_FLAG_SOFTIRQ; 544 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
541 545
546 irqs_off =
547 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
548 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
549 '.';
550 need_resched =
551 (entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.';
552 hardsoft_irq =
553 (hardirq && softirq) ? 'H' :
554 hardirq ? 'h' :
555 softirq ? 's' :
556 '.';
557
542 if (!trace_seq_printf(s, "%c%c%c", 558 if (!trace_seq_printf(s, "%c%c%c",
543 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : 559 irqs_off, need_resched, hardsoft_irq))
544 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
545 'X' : '.',
546 (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
547 'N' : '.',
548 (hardirq && softirq) ? 'H' :
549 hardirq ? 'h' : softirq ? 's' : '.'))
550 return 0; 560 return 0;
551 561
552 if (entry->preempt_count) 562 if (entry->preempt_count)
@@ -554,13 +564,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
554 else 564 else
555 ret = trace_seq_putc(s, '.'); 565 ret = trace_seq_putc(s, '.');
556 566
557 if (!ret) 567 return ret;
558 return 0;
559
560 if (entry->lock_depth < 0)
561 return trace_seq_putc(s, '.');
562
563 return trace_seq_printf(s, "%d", entry->lock_depth);
564} 568}
565 569
566static int 570static int
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 8f758d070c43..7e62c0a18456 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -247,51 +247,3 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr)
247 ctx_trace = tr; 247 ctx_trace = tr;
248} 248}
249 249
250static void stop_sched_trace(struct trace_array *tr)
251{
252 tracing_stop_sched_switch_record();
253}
254
255static int sched_switch_trace_init(struct trace_array *tr)
256{
257 ctx_trace = tr;
258 tracing_reset_online_cpus(tr);
259 tracing_start_sched_switch_record();
260 return 0;
261}
262
263static void sched_switch_trace_reset(struct trace_array *tr)
264{
265 if (sched_ref)
266 stop_sched_trace(tr);
267}
268
269static void sched_switch_trace_start(struct trace_array *tr)
270{
271 sched_stopped = 0;
272}
273
274static void sched_switch_trace_stop(struct trace_array *tr)
275{
276 sched_stopped = 1;
277}
278
279static struct tracer sched_switch_trace __read_mostly =
280{
281 .name = "sched_switch",
282 .init = sched_switch_trace_init,
283 .reset = sched_switch_trace_reset,
284 .start = sched_switch_trace_start,
285 .stop = sched_switch_trace_stop,
286 .wait_pipe = poll_wait_pipe,
287#ifdef CONFIG_FTRACE_SELFTEST
288 .selftest = trace_selftest_startup_sched_switch,
289#endif
290};
291
292__init static int init_sched_switch_trace(void)
293{
294 return register_tracer(&sched_switch_trace);
295}
296device_initcall(init_sched_switch_trace);
297
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5c9fe08d2093..ee7b5a0bb9f8 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -60,6 +60,19 @@ extern struct syscall_metadata *__stop_syscalls_metadata[];
60 60
61static struct syscall_metadata **syscalls_metadata; 61static struct syscall_metadata **syscalls_metadata;
62 62
63#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
64static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
65{
66 /*
67 * Only compare after the "sys" prefix. Archs that use
68 * syscall wrappers may have syscalls symbols aliases prefixed
69 * with "SyS" instead of "sys", leading to an unwanted
70 * mismatch.
71 */
72 return !strcmp(sym + 3, name + 3);
73}
74#endif
75
63static __init struct syscall_metadata * 76static __init struct syscall_metadata *
64find_syscall_meta(unsigned long syscall) 77find_syscall_meta(unsigned long syscall)
65{ 78{
@@ -72,14 +85,11 @@ find_syscall_meta(unsigned long syscall)
72 stop = __stop_syscalls_metadata; 85 stop = __stop_syscalls_metadata;
73 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 86 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
74 87
88 if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
89 return NULL;
90
75 for ( ; start < stop; start++) { 91 for ( ; start < stop; start++) {
76 /* 92 if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
77 * Only compare after the "sys" prefix. Archs that use
78 * syscall wrappers may have syscalls symbols aliases prefixed
79 * with "SyS" instead of "sys", leading to an unwanted
80 * mismatch.
81 */
82 if ((*start)->name && !strcmp((*start)->name + 3, str + 3))
83 return *start; 93 return *start;
84 } 94 }
85 return NULL; 95 return NULL;
@@ -359,7 +369,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
359 int num; 369 int num;
360 370
361 num = ((struct syscall_metadata *)call->data)->syscall_nr; 371 num = ((struct syscall_metadata *)call->data)->syscall_nr;
362 if (num < 0 || num >= NR_syscalls) 372 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
363 return -ENOSYS; 373 return -ENOSYS;
364 mutex_lock(&syscall_trace_lock); 374 mutex_lock(&syscall_trace_lock);
365 if (!sys_refcount_enter) 375 if (!sys_refcount_enter)
@@ -377,7 +387,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
377 int num; 387 int num;
378 388
379 num = ((struct syscall_metadata *)call->data)->syscall_nr; 389 num = ((struct syscall_metadata *)call->data)->syscall_nr;
380 if (num < 0 || num >= NR_syscalls) 390 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
381 return; 391 return;
382 mutex_lock(&syscall_trace_lock); 392 mutex_lock(&syscall_trace_lock);
383 sys_refcount_enter--; 393 sys_refcount_enter--;
@@ -393,7 +403,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
393 int num; 403 int num;
394 404
395 num = ((struct syscall_metadata *)call->data)->syscall_nr; 405 num = ((struct syscall_metadata *)call->data)->syscall_nr;
396 if (num < 0 || num >= NR_syscalls) 406 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
397 return -ENOSYS; 407 return -ENOSYS;
398 mutex_lock(&syscall_trace_lock); 408 mutex_lock(&syscall_trace_lock);
399 if (!sys_refcount_exit) 409 if (!sys_refcount_exit)
@@ -411,7 +421,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
411 int num; 421 int num;
412 422
413 num = ((struct syscall_metadata *)call->data)->syscall_nr; 423 num = ((struct syscall_metadata *)call->data)->syscall_nr;
414 if (num < 0 || num >= NR_syscalls) 424 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
415 return; 425 return;
416 mutex_lock(&syscall_trace_lock); 426 mutex_lock(&syscall_trace_lock);
417 sys_refcount_exit--; 427 sys_refcount_exit--;
@@ -424,6 +434,14 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
424int init_syscall_trace(struct ftrace_event_call *call) 434int init_syscall_trace(struct ftrace_event_call *call)
425{ 435{
426 int id; 436 int id;
437 int num;
438
439 num = ((struct syscall_metadata *)call->data)->syscall_nr;
440 if (num < 0 || num >= NR_syscalls) {
441 pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
442 ((struct syscall_metadata *)call->data)->name);
443 return -ENOSYS;
444 }
427 445
428 if (set_syscall_print_fmt(call) < 0) 446 if (set_syscall_print_fmt(call) < 0)
429 return -ENOMEM; 447 return -ENOMEM;
@@ -438,7 +456,7 @@ int init_syscall_trace(struct ftrace_event_call *call)
438 return id; 456 return id;
439} 457}
440 458
441unsigned long __init arch_syscall_addr(int nr) 459unsigned long __init __weak arch_syscall_addr(int nr)
442{ 460{
443 return (unsigned long)sys_call_table[nr]; 461 return (unsigned long)sys_call_table[nr];
444} 462}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 11869faa6819..5ca7ce9ce754 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -79,7 +79,9 @@ enum {
79 MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ 79 MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
80 IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ 80 IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
81 81
82 MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */ 82 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
83 /* call for help after 10ms
84 (min two ticks) */
83 MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ 85 MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
84 CREATE_COOLDOWN = HZ, /* time to breath after fail */ 86 CREATE_COOLDOWN = HZ, /* time to breath after fail */
85 TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ 87 TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */
@@ -249,10 +251,12 @@ struct workqueue_struct *system_wq __read_mostly;
249struct workqueue_struct *system_long_wq __read_mostly; 251struct workqueue_struct *system_long_wq __read_mostly;
250struct workqueue_struct *system_nrt_wq __read_mostly; 252struct workqueue_struct *system_nrt_wq __read_mostly;
251struct workqueue_struct *system_unbound_wq __read_mostly; 253struct workqueue_struct *system_unbound_wq __read_mostly;
254struct workqueue_struct *system_freezable_wq __read_mostly;
252EXPORT_SYMBOL_GPL(system_wq); 255EXPORT_SYMBOL_GPL(system_wq);
253EXPORT_SYMBOL_GPL(system_long_wq); 256EXPORT_SYMBOL_GPL(system_long_wq);
254EXPORT_SYMBOL_GPL(system_nrt_wq); 257EXPORT_SYMBOL_GPL(system_nrt_wq);
255EXPORT_SYMBOL_GPL(system_unbound_wq); 258EXPORT_SYMBOL_GPL(system_unbound_wq);
259EXPORT_SYMBOL_GPL(system_freezable_wq);
256 260
257#define CREATE_TRACE_POINTS 261#define CREATE_TRACE_POINTS
258#include <trace/events/workqueue.h> 262#include <trace/events/workqueue.h>
@@ -314,6 +318,11 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
314 318
315static struct debug_obj_descr work_debug_descr; 319static struct debug_obj_descr work_debug_descr;
316 320
321static void *work_debug_hint(void *addr)
322{
323 return ((struct work_struct *) addr)->func;
324}
325
317/* 326/*
318 * fixup_init is called when: 327 * fixup_init is called when:
319 * - an active object is initialized 328 * - an active object is initialized
@@ -385,6 +394,7 @@ static int work_fixup_free(void *addr, enum debug_obj_state state)
385 394
386static struct debug_obj_descr work_debug_descr = { 395static struct debug_obj_descr work_debug_descr = {
387 .name = "work_struct", 396 .name = "work_struct",
397 .debug_hint = work_debug_hint,
388 .fixup_init = work_fixup_init, 398 .fixup_init = work_fixup_init,
389 .fixup_activate = work_fixup_activate, 399 .fixup_activate = work_fixup_activate,
390 .fixup_free = work_fixup_free, 400 .fixup_free = work_fixup_free,
@@ -2047,6 +2057,15 @@ repeat:
2047 move_linked_works(work, scheduled, &n); 2057 move_linked_works(work, scheduled, &n);
2048 2058
2049 process_scheduled_works(rescuer); 2059 process_scheduled_works(rescuer);
2060
2061 /*
2062 * Leave this gcwq. If keep_working() is %true, notify a
2063 * regular worker; otherwise, we end up with 0 concurrency
2064 * and stalling the execution.
2065 */
2066 if (keep_working(gcwq))
2067 wake_up_worker(gcwq);
2068
2050 spin_unlock_irq(&gcwq->lock); 2069 spin_unlock_irq(&gcwq->lock);
2051 } 2070 }
2052 2071
@@ -2956,7 +2975,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
2956 */ 2975 */
2957 spin_lock(&workqueue_lock); 2976 spin_lock(&workqueue_lock);
2958 2977
2959 if (workqueue_freezing && wq->flags & WQ_FREEZEABLE) 2978 if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
2960 for_each_cwq_cpu(cpu, wq) 2979 for_each_cwq_cpu(cpu, wq)
2961 get_cwq(cpu, wq)->max_active = 0; 2980 get_cwq(cpu, wq)->max_active = 0;
2962 2981
@@ -3068,7 +3087,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
3068 3087
3069 spin_lock_irq(&gcwq->lock); 3088 spin_lock_irq(&gcwq->lock);
3070 3089
3071 if (!(wq->flags & WQ_FREEZEABLE) || 3090 if (!(wq->flags & WQ_FREEZABLE) ||
3072 !(gcwq->flags & GCWQ_FREEZING)) 3091 !(gcwq->flags & GCWQ_FREEZING))
3073 get_cwq(gcwq->cpu, wq)->max_active = max_active; 3092 get_cwq(gcwq->cpu, wq)->max_active = max_active;
3074 3093
@@ -3318,7 +3337,7 @@ static int __cpuinit trustee_thread(void *__gcwq)
3318 * want to get it over with ASAP - spam rescuers, wake up as 3337 * want to get it over with ASAP - spam rescuers, wake up as
3319 * many idlers as necessary and create new ones till the 3338 * many idlers as necessary and create new ones till the
3320 * worklist is empty. Note that if the gcwq is frozen, there 3339 * worklist is empty. Note that if the gcwq is frozen, there
3321 * may be frozen works in freezeable cwqs. Don't declare 3340 * may be frozen works in freezable cwqs. Don't declare
3322 * completion while frozen. 3341 * completion while frozen.
3323 */ 3342 */
3324 while (gcwq->nr_workers != gcwq->nr_idle || 3343 while (gcwq->nr_workers != gcwq->nr_idle ||
@@ -3576,9 +3595,9 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
3576/** 3595/**
3577 * freeze_workqueues_begin - begin freezing workqueues 3596 * freeze_workqueues_begin - begin freezing workqueues
3578 * 3597 *
3579 * Start freezing workqueues. After this function returns, all 3598 * Start freezing workqueues. After this function returns, all freezable
3580 * freezeable workqueues will queue new works to their frozen_works 3599 * workqueues will queue new works to their frozen_works list instead of
3581 * list instead of gcwq->worklist. 3600 * gcwq->worklist.
3582 * 3601 *
3583 * CONTEXT: 3602 * CONTEXT:
3584 * Grabs and releases workqueue_lock and gcwq->lock's. 3603 * Grabs and releases workqueue_lock and gcwq->lock's.
@@ -3604,7 +3623,7 @@ void freeze_workqueues_begin(void)
3604 list_for_each_entry(wq, &workqueues, list) { 3623 list_for_each_entry(wq, &workqueues, list) {
3605 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3624 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3606 3625
3607 if (cwq && wq->flags & WQ_FREEZEABLE) 3626 if (cwq && wq->flags & WQ_FREEZABLE)
3608 cwq->max_active = 0; 3627 cwq->max_active = 0;
3609 } 3628 }
3610 3629
@@ -3615,7 +3634,7 @@ void freeze_workqueues_begin(void)
3615} 3634}
3616 3635
3617/** 3636/**
3618 * freeze_workqueues_busy - are freezeable workqueues still busy? 3637 * freeze_workqueues_busy - are freezable workqueues still busy?
3619 * 3638 *
3620 * Check whether freezing is complete. This function must be called 3639 * Check whether freezing is complete. This function must be called
3621 * between freeze_workqueues_begin() and thaw_workqueues(). 3640 * between freeze_workqueues_begin() and thaw_workqueues().
@@ -3624,8 +3643,8 @@ void freeze_workqueues_begin(void)
3624 * Grabs and releases workqueue_lock. 3643 * Grabs and releases workqueue_lock.
3625 * 3644 *
3626 * RETURNS: 3645 * RETURNS:
3627 * %true if some freezeable workqueues are still busy. %false if 3646 * %true if some freezable workqueues are still busy. %false if freezing
3628 * freezing is complete. 3647 * is complete.
3629 */ 3648 */
3630bool freeze_workqueues_busy(void) 3649bool freeze_workqueues_busy(void)
3631{ 3650{
@@ -3645,7 +3664,7 @@ bool freeze_workqueues_busy(void)
3645 list_for_each_entry(wq, &workqueues, list) { 3664 list_for_each_entry(wq, &workqueues, list) {
3646 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3665 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3647 3666
3648 if (!cwq || !(wq->flags & WQ_FREEZEABLE)) 3667 if (!cwq || !(wq->flags & WQ_FREEZABLE))
3649 continue; 3668 continue;
3650 3669
3651 BUG_ON(cwq->nr_active < 0); 3670 BUG_ON(cwq->nr_active < 0);
@@ -3690,7 +3709,7 @@ void thaw_workqueues(void)
3690 list_for_each_entry(wq, &workqueues, list) { 3709 list_for_each_entry(wq, &workqueues, list) {
3691 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3710 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3692 3711
3693 if (!cwq || !(wq->flags & WQ_FREEZEABLE)) 3712 if (!cwq || !(wq->flags & WQ_FREEZABLE))
3694 continue; 3713 continue;
3695 3714
3696 /* restore max_active and repopulate worklist */ 3715 /* restore max_active and repopulate worklist */
@@ -3764,8 +3783,10 @@ static int __init init_workqueues(void)
3764 system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); 3783 system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
3765 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, 3784 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
3766 WQ_UNBOUND_MAX_ACTIVE); 3785 WQ_UNBOUND_MAX_ACTIVE);
3786 system_freezable_wq = alloc_workqueue("events_freezable",
3787 WQ_FREEZABLE, 0);
3767 BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || 3788 BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
3768 !system_unbound_wq); 3789 !system_unbound_wq || !system_freezable_wq);
3769 return 0; 3790 return 0;
3770} 3791}
3771early_initcall(init_workqueues); 3792early_initcall(init_workqueues);
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index deebcc57d4e6..9d86e45086f5 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -249,14 +249,17 @@ static struct debug_bucket *get_bucket(unsigned long addr)
249 249
250static void debug_print_object(struct debug_obj *obj, char *msg) 250static void debug_print_object(struct debug_obj *obj, char *msg)
251{ 251{
252 struct debug_obj_descr *descr = obj->descr;
252 static int limit; 253 static int limit;
253 254
254 if (limit < 5 && obj->descr != descr_test) { 255 if (limit < 5 && descr != descr_test) {
256 void *hint = descr->debug_hint ?
257 descr->debug_hint(obj->object) : NULL;
255 limit++; 258 limit++;
256 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " 259 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
257 "object type: %s\n", 260 "object type: %s hint: %pS\n",
258 msg, obj_states[obj->state], obj->astate, 261 msg, obj_states[obj->state], obj->astate,
259 obj->descr->name); 262 descr->name, hint);
260 } 263 }
261 debug_objects_warnings++; 264 debug_objects_warnings++;
262} 265}
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 344c710d16ca..b8029a5583ff 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -35,6 +35,31 @@ void __list_add(struct list_head *new,
35} 35}
36EXPORT_SYMBOL(__list_add); 36EXPORT_SYMBOL(__list_add);
37 37
38void __list_del_entry(struct list_head *entry)
39{
40 struct list_head *prev, *next;
41
42 prev = entry->prev;
43 next = entry->next;
44
45 if (WARN(next == LIST_POISON1,
46 "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
47 entry, LIST_POISON1) ||
48 WARN(prev == LIST_POISON2,
49 "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
50 entry, LIST_POISON2) ||
51 WARN(prev->next != entry,
52 "list_del corruption. prev->next should be %p, "
53 "but was %p\n", entry, prev->next) ||
54 WARN(next->prev != entry,
55 "list_del corruption. next->prev should be %p, "
56 "but was %p\n", entry, next->prev))
57 return;
58
59 __list_del(prev, next);
60}
61EXPORT_SYMBOL(__list_del_entry);
62
38/** 63/**
39 * list_del - deletes entry from list. 64 * list_del - deletes entry from list.
40 * @entry: the element to delete from the list. 65 * @entry: the element to delete from the list.
@@ -43,19 +68,7 @@ EXPORT_SYMBOL(__list_add);
43 */ 68 */
44void list_del(struct list_head *entry) 69void list_del(struct list_head *entry)
45{ 70{
46 WARN(entry->next == LIST_POISON1, 71 __list_del_entry(entry);
47 "list_del corruption, next is LIST_POISON1 (%p)\n",
48 LIST_POISON1);
49 WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2,
50 "list_del corruption, prev is LIST_POISON2 (%p)\n",
51 LIST_POISON2);
52 WARN(entry->prev->next != entry,
53 "list_del corruption. prev->next should be %p, "
54 "but was %p\n", entry, entry->prev->next);
55 WARN(entry->next->prev != entry,
56 "list_del corruption. next->prev should be %p, "
57 "but was %p\n", entry, entry->next->prev);
58 __list_del(entry->prev, entry->next);
59 entry->next = LIST_POISON1; 72 entry->next = LIST_POISON1;
60 entry->prev = LIST_POISON2; 73 entry->prev = LIST_POISON2;
61} 74}
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 5021cbc34411..ac09f2226dc7 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -148,7 +148,7 @@ nla_policy_len(const struct nla_policy *p, int n)
148{ 148{
149 int i, len = 0; 149 int i, len = 0;
150 150
151 for (i = 0; i < n; i++) { 151 for (i = 0; i < n; i++, p++) {
152 if (p->len) 152 if (p->len)
153 len += nla_total_size(p->len); 153 len += nla_total_size(p->len);
154 else if (nla_attr_minlen[p->type]) 154 else if (nla_attr_minlen[p->type])
diff --git a/lib/plist.c b/lib/plist.c
index 1471988d9190..0ae7e6431726 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -28,6 +28,8 @@
28 28
29#ifdef CONFIG_DEBUG_PI_LIST 29#ifdef CONFIG_DEBUG_PI_LIST
30 30
31static struct plist_head test_head;
32
31static void plist_check_prev_next(struct list_head *t, struct list_head *p, 33static void plist_check_prev_next(struct list_head *t, struct list_head *p,
32 struct list_head *n) 34 struct list_head *n)
33{ 35{
@@ -54,12 +56,13 @@ static void plist_check_list(struct list_head *top)
54 56
55static void plist_check_head(struct plist_head *head) 57static void plist_check_head(struct plist_head *head)
56{ 58{
57 WARN_ON(!head->rawlock && !head->spinlock); 59 WARN_ON(head != &test_head && !head->rawlock && !head->spinlock);
58 if (head->rawlock) 60 if (head->rawlock)
59 WARN_ON_SMP(!raw_spin_is_locked(head->rawlock)); 61 WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
60 if (head->spinlock) 62 if (head->spinlock)
61 WARN_ON_SMP(!spin_is_locked(head->spinlock)); 63 WARN_ON_SMP(!spin_is_locked(head->spinlock));
62 plist_check_list(&head->prio_list); 64 if (!plist_head_empty(head))
65 plist_check_list(&plist_first(head)->prio_list);
63 plist_check_list(&head->node_list); 66 plist_check_list(&head->node_list);
64} 67}
65 68
@@ -75,25 +78,33 @@ static void plist_check_head(struct plist_head *head)
75 */ 78 */
76void plist_add(struct plist_node *node, struct plist_head *head) 79void plist_add(struct plist_node *node, struct plist_head *head)
77{ 80{
78 struct plist_node *iter; 81 struct plist_node *first, *iter, *prev = NULL;
82 struct list_head *node_next = &head->node_list;
79 83
80 plist_check_head(head); 84 plist_check_head(head);
81 WARN_ON(!plist_node_empty(node)); 85 WARN_ON(!plist_node_empty(node));
86 WARN_ON(!list_empty(&node->prio_list));
87
88 if (plist_head_empty(head))
89 goto ins_node;
82 90
83 list_for_each_entry(iter, &head->prio_list, plist.prio_list) { 91 first = iter = plist_first(head);
84 if (node->prio < iter->prio) 92
85 goto lt_prio; 93 do {
86 else if (node->prio == iter->prio) { 94 if (node->prio < iter->prio) {
87 iter = list_entry(iter->plist.prio_list.next, 95 node_next = &iter->node_list;
88 struct plist_node, plist.prio_list); 96 break;
89 goto eq_prio;
90 } 97 }
91 }
92 98
93lt_prio: 99 prev = iter;
94 list_add_tail(&node->plist.prio_list, &iter->plist.prio_list); 100 iter = list_entry(iter->prio_list.next,
95eq_prio: 101 struct plist_node, prio_list);
96 list_add_tail(&node->plist.node_list, &iter->plist.node_list); 102 } while (iter != first);
103
104 if (!prev || prev->prio != node->prio)
105 list_add_tail(&node->prio_list, &iter->prio_list);
106ins_node:
107 list_add_tail(&node->node_list, node_next);
97 108
98 plist_check_head(head); 109 plist_check_head(head);
99} 110}
@@ -108,14 +119,98 @@ void plist_del(struct plist_node *node, struct plist_head *head)
108{ 119{
109 plist_check_head(head); 120 plist_check_head(head);
110 121
111 if (!list_empty(&node->plist.prio_list)) { 122 if (!list_empty(&node->prio_list)) {
112 struct plist_node *next = plist_first(&node->plist); 123 if (node->node_list.next != &head->node_list) {
124 struct plist_node *next;
125
126 next = list_entry(node->node_list.next,
127 struct plist_node, node_list);
113 128
114 list_move_tail(&next->plist.prio_list, &node->plist.prio_list); 129 /* add the next plist_node into prio_list */
115 list_del_init(&node->plist.prio_list); 130 if (list_empty(&next->prio_list))
131 list_add(&next->prio_list, &node->prio_list);
132 }
133 list_del_init(&node->prio_list);
116 } 134 }
117 135
118 list_del_init(&node->plist.node_list); 136 list_del_init(&node->node_list);
119 137
120 plist_check_head(head); 138 plist_check_head(head);
121} 139}
140
141#ifdef CONFIG_DEBUG_PI_LIST
142#include <linux/sched.h>
143#include <linux/module.h>
144#include <linux/init.h>
145
146static struct plist_node __initdata test_node[241];
147
148static void __init plist_test_check(int nr_expect)
149{
150 struct plist_node *first, *prio_pos, *node_pos;
151
152 if (plist_head_empty(&test_head)) {
153 BUG_ON(nr_expect != 0);
154 return;
155 }
156
157 prio_pos = first = plist_first(&test_head);
158 plist_for_each(node_pos, &test_head) {
159 if (nr_expect-- < 0)
160 break;
161 if (node_pos == first)
162 continue;
163 if (node_pos->prio == prio_pos->prio) {
164 BUG_ON(!list_empty(&node_pos->prio_list));
165 continue;
166 }
167
168 BUG_ON(prio_pos->prio > node_pos->prio);
169 BUG_ON(prio_pos->prio_list.next != &node_pos->prio_list);
170 prio_pos = node_pos;
171 }
172
173 BUG_ON(nr_expect != 0);
174 BUG_ON(prio_pos->prio_list.next != &first->prio_list);
175}
176
177static int __init plist_test(void)
178{
179 int nr_expect = 0, i, loop;
180 unsigned int r = local_clock();
181
182 printk(KERN_INFO "start plist test\n");
183 plist_head_init(&test_head, NULL);
184 for (i = 0; i < ARRAY_SIZE(test_node); i++)
185 plist_node_init(test_node + i, 0);
186
187 for (loop = 0; loop < 1000; loop++) {
188 r = r * 193939 % 47629;
189 i = r % ARRAY_SIZE(test_node);
190 if (plist_node_empty(test_node + i)) {
191 r = r * 193939 % 47629;
192 test_node[i].prio = r % 99;
193 plist_add(test_node + i, &test_head);
194 nr_expect++;
195 } else {
196 plist_del(test_node + i, &test_head);
197 nr_expect--;
198 }
199 plist_test_check(nr_expect);
200 }
201
202 for (i = 0; i < ARRAY_SIZE(test_node); i++) {
203 if (plist_node_empty(test_node + i))
204 continue;
205 plist_del(test_node + i, &test_head);
206 nr_expect--;
207 plist_test_check(nr_expect);
208 }
209
210 printk(KERN_INFO "end plist test\n");
211 return 0;
212}
213
214module_init(plist_test);
215
216#endif
diff --git a/lib/rwsem.c b/lib/rwsem.c
index f236d7cd5cf3..aa7c3052261f 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -222,8 +222,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
222/* 222/*
223 * wait for the read lock to be granted 223 * wait for the read lock to be granted
224 */ 224 */
225asmregparm struct rw_semaphore __sched * 225struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
226rwsem_down_read_failed(struct rw_semaphore *sem)
227{ 226{
228 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, 227 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
229 -RWSEM_ACTIVE_READ_BIAS); 228 -RWSEM_ACTIVE_READ_BIAS);
@@ -232,8 +231,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
232/* 231/*
233 * wait for the write lock to be granted 232 * wait for the write lock to be granted
234 */ 233 */
235asmregparm struct rw_semaphore __sched * 234struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
236rwsem_down_write_failed(struct rw_semaphore *sem)
237{ 235{
238 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, 236 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
239 -RWSEM_ACTIVE_WRITE_BIAS); 237 -RWSEM_ACTIVE_WRITE_BIAS);
@@ -243,7 +241,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
243 * handle waking up a waiter on the semaphore 241 * handle waking up a waiter on the semaphore
244 * - up_read/up_write has decremented the active part of count if we come here 242 * - up_read/up_write has decremented the active part of count if we come here
245 */ 243 */
246asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) 244struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
247{ 245{
248 unsigned long flags; 246 unsigned long flags;
249 247
@@ -263,7 +261,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
263 * - caller incremented waiting part of count and discovered it still negative 261 * - caller incremented waiting part of count and discovered it still negative
264 * - just wake up any readers at the front of the queue 262 * - just wake up any readers at the front of the queue
265 */ 263 */
266asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) 264struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
267{ 265{
268 unsigned long flags; 266 unsigned long flags;
269 267
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c47bbe11b804..93ca08b8a451 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -686,8 +686,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
686 /* 686 /*
687 * Ensure that the address returned is DMA'ble 687 * Ensure that the address returned is DMA'ble
688 */ 688 */
689 if (!dma_capable(dev, dev_addr, size)) 689 if (!dma_capable(dev, dev_addr, size)) {
690 panic("map_single: bounce buffer is not DMA'ble"); 690 swiotlb_tbl_unmap_single(dev, map, size, dir);
691 dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer);
692 }
691 693
692 return dev_addr; 694 return dev_addr;
693} 695}
diff --git a/mm/Makefile b/mm/Makefile
index 2b1b575ae712..42a8326c3e3d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -7,7 +7,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ 7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
8 vmalloc.o pagewalk.o pgtable-generic.o 8 vmalloc.o pagewalk.o pgtable-generic.o
9 9
10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ 10obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
11 maccess.o page_alloc.o page-writeback.o \ 11 maccess.o page_alloc.o page-writeback.o \
12 readahead.o swap.o truncate.o vmscan.o shmem.o \ 12 readahead.o swap.o truncate.o vmscan.o shmem.o \
13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
@@ -15,6 +15,12 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
15 $(mmu-y) 15 $(mmu-y)
16obj-y += init-mm.o 16obj-y += init-mm.o
17 17
18ifdef CONFIG_NO_BOOTMEM
19 obj-y += nobootmem.o
20else
21 obj-y += bootmem.o
22endif
23
18obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 24obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
19 25
20obj-$(CONFIG_BOUNCE) += bounce.o 26obj-$(CONFIG_BOUNCE) += bounce.o
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 13b0caa9793c..07aeb89e396e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -23,6 +23,13 @@
23 23
24#include "internal.h" 24#include "internal.h"
25 25
26#ifndef CONFIG_NEED_MULTIPLE_NODES
27struct pglist_data __refdata contig_page_data = {
28 .bdata = &bootmem_node_data[0]
29};
30EXPORT_SYMBOL(contig_page_data);
31#endif
32
26unsigned long max_low_pfn; 33unsigned long max_low_pfn;
27unsigned long min_low_pfn; 34unsigned long min_low_pfn;
28unsigned long max_pfn; 35unsigned long max_pfn;
@@ -35,7 +42,6 @@ unsigned long max_pfn;
35unsigned long saved_max_pfn; 42unsigned long saved_max_pfn;
36#endif 43#endif
37 44
38#ifndef CONFIG_NO_BOOTMEM
39bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; 45bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
40 46
41static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); 47static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
@@ -146,7 +152,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
146 min_low_pfn = start; 152 min_low_pfn = start;
147 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); 153 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
148} 154}
149#endif 155
150/* 156/*
151 * free_bootmem_late - free bootmem pages directly to page allocator 157 * free_bootmem_late - free bootmem pages directly to page allocator
152 * @addr: starting address of the range 158 * @addr: starting address of the range
@@ -171,53 +177,6 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
171 } 177 }
172} 178}
173 179
174#ifdef CONFIG_NO_BOOTMEM
175static void __init __free_pages_memory(unsigned long start, unsigned long end)
176{
177 int i;
178 unsigned long start_aligned, end_aligned;
179 int order = ilog2(BITS_PER_LONG);
180
181 start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
182 end_aligned = end & ~(BITS_PER_LONG - 1);
183
184 if (end_aligned <= start_aligned) {
185 for (i = start; i < end; i++)
186 __free_pages_bootmem(pfn_to_page(i), 0);
187
188 return;
189 }
190
191 for (i = start; i < start_aligned; i++)
192 __free_pages_bootmem(pfn_to_page(i), 0);
193
194 for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
195 __free_pages_bootmem(pfn_to_page(i), order);
196
197 for (i = end_aligned; i < end; i++)
198 __free_pages_bootmem(pfn_to_page(i), 0);
199}
200
201unsigned long __init free_all_memory_core_early(int nodeid)
202{
203 int i;
204 u64 start, end;
205 unsigned long count = 0;
206 struct range *range = NULL;
207 int nr_range;
208
209 nr_range = get_free_all_memory_range(&range, nodeid);
210
211 for (i = 0; i < nr_range; i++) {
212 start = range[i].start;
213 end = range[i].end;
214 count += end - start;
215 __free_pages_memory(start, end);
216 }
217
218 return count;
219}
220#else
221static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) 180static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
222{ 181{
223 int aligned; 182 int aligned;
@@ -278,7 +237,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
278 237
279 return count; 238 return count;
280} 239}
281#endif
282 240
283/** 241/**
284 * free_all_bootmem_node - release a node's free pages to the buddy allocator 242 * free_all_bootmem_node - release a node's free pages to the buddy allocator
@@ -289,12 +247,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
289unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) 247unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
290{ 248{
291 register_page_bootmem_info_node(pgdat); 249 register_page_bootmem_info_node(pgdat);
292#ifdef CONFIG_NO_BOOTMEM
293 /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
294 return 0;
295#else
296 return free_all_bootmem_core(pgdat->bdata); 250 return free_all_bootmem_core(pgdat->bdata);
297#endif
298} 251}
299 252
300/** 253/**
@@ -304,16 +257,6 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
304 */ 257 */
305unsigned long __init free_all_bootmem(void) 258unsigned long __init free_all_bootmem(void)
306{ 259{
307#ifdef CONFIG_NO_BOOTMEM
308 /*
309 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
310 * because in some case like Node0 doesnt have RAM installed
311 * low ram will be on Node1
312 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
313 * will be used instead of only Node0 related
314 */
315 return free_all_memory_core_early(MAX_NUMNODES);
316#else
317 unsigned long total_pages = 0; 260 unsigned long total_pages = 0;
318 bootmem_data_t *bdata; 261 bootmem_data_t *bdata;
319 262
@@ -321,10 +264,8 @@ unsigned long __init free_all_bootmem(void)
321 total_pages += free_all_bootmem_core(bdata); 264 total_pages += free_all_bootmem_core(bdata);
322 265
323 return total_pages; 266 return total_pages;
324#endif
325} 267}
326 268
327#ifndef CONFIG_NO_BOOTMEM
328static void __init __free(bootmem_data_t *bdata, 269static void __init __free(bootmem_data_t *bdata,
329 unsigned long sidx, unsigned long eidx) 270 unsigned long sidx, unsigned long eidx)
330{ 271{
@@ -419,7 +360,6 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
419 } 360 }
420 BUG(); 361 BUG();
421} 362}
422#endif
423 363
424/** 364/**
425 * free_bootmem_node - mark a page range as usable 365 * free_bootmem_node - mark a page range as usable
@@ -434,10 +374,6 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
434void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 374void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
435 unsigned long size) 375 unsigned long size)
436{ 376{
437#ifdef CONFIG_NO_BOOTMEM
438 kmemleak_free_part(__va(physaddr), size);
439 memblock_x86_free_range(physaddr, physaddr + size);
440#else
441 unsigned long start, end; 377 unsigned long start, end;
442 378
443 kmemleak_free_part(__va(physaddr), size); 379 kmemleak_free_part(__va(physaddr), size);
@@ -446,7 +382,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
446 end = PFN_DOWN(physaddr + size); 382 end = PFN_DOWN(physaddr + size);
447 383
448 mark_bootmem_node(pgdat->bdata, start, end, 0, 0); 384 mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
449#endif
450} 385}
451 386
452/** 387/**
@@ -460,10 +395,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
460 */ 395 */
461void __init free_bootmem(unsigned long addr, unsigned long size) 396void __init free_bootmem(unsigned long addr, unsigned long size)
462{ 397{
463#ifdef CONFIG_NO_BOOTMEM
464 kmemleak_free_part(__va(addr), size);
465 memblock_x86_free_range(addr, addr + size);
466#else
467 unsigned long start, end; 398 unsigned long start, end;
468 399
469 kmemleak_free_part(__va(addr), size); 400 kmemleak_free_part(__va(addr), size);
@@ -472,7 +403,6 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
472 end = PFN_DOWN(addr + size); 403 end = PFN_DOWN(addr + size);
473 404
474 mark_bootmem(start, end, 0, 0); 405 mark_bootmem(start, end, 0, 0);
475#endif
476} 406}
477 407
478/** 408/**
@@ -489,17 +419,12 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
489int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 419int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
490 unsigned long size, int flags) 420 unsigned long size, int flags)
491{ 421{
492#ifdef CONFIG_NO_BOOTMEM
493 panic("no bootmem");
494 return 0;
495#else
496 unsigned long start, end; 422 unsigned long start, end;
497 423
498 start = PFN_DOWN(physaddr); 424 start = PFN_DOWN(physaddr);
499 end = PFN_UP(physaddr + size); 425 end = PFN_UP(physaddr + size);
500 426
501 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); 427 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
502#endif
503} 428}
504 429
505/** 430/**
@@ -515,20 +440,14 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
515int __init reserve_bootmem(unsigned long addr, unsigned long size, 440int __init reserve_bootmem(unsigned long addr, unsigned long size,
516 int flags) 441 int flags)
517{ 442{
518#ifdef CONFIG_NO_BOOTMEM
519 panic("no bootmem");
520 return 0;
521#else
522 unsigned long start, end; 443 unsigned long start, end;
523 444
524 start = PFN_DOWN(addr); 445 start = PFN_DOWN(addr);
525 end = PFN_UP(addr + size); 446 end = PFN_UP(addr + size);
526 447
527 return mark_bootmem(start, end, 1, flags); 448 return mark_bootmem(start, end, 1, flags);
528#endif
529} 449}
530 450
531#ifndef CONFIG_NO_BOOTMEM
532int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len, 451int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
533 int flags) 452 int flags)
534{ 453{
@@ -685,33 +604,12 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
685#endif 604#endif
686 return NULL; 605 return NULL;
687} 606}
688#endif
689 607
690static void * __init ___alloc_bootmem_nopanic(unsigned long size, 608static void * __init ___alloc_bootmem_nopanic(unsigned long size,
691 unsigned long align, 609 unsigned long align,
692 unsigned long goal, 610 unsigned long goal,
693 unsigned long limit) 611 unsigned long limit)
694{ 612{
695#ifdef CONFIG_NO_BOOTMEM
696 void *ptr;
697
698 if (WARN_ON_ONCE(slab_is_available()))
699 return kzalloc(size, GFP_NOWAIT);
700
701restart:
702
703 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
704
705 if (ptr)
706 return ptr;
707
708 if (goal != 0) {
709 goal = 0;
710 goto restart;
711 }
712
713 return NULL;
714#else
715 bootmem_data_t *bdata; 613 bootmem_data_t *bdata;
716 void *region; 614 void *region;
717 615
@@ -737,7 +635,6 @@ restart:
737 } 635 }
738 636
739 return NULL; 637 return NULL;
740#endif
741} 638}
742 639
743/** 640/**
@@ -758,10 +655,6 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
758{ 655{
759 unsigned long limit = 0; 656 unsigned long limit = 0;
760 657
761#ifdef CONFIG_NO_BOOTMEM
762 limit = -1UL;
763#endif
764
765 return ___alloc_bootmem_nopanic(size, align, goal, limit); 658 return ___alloc_bootmem_nopanic(size, align, goal, limit);
766} 659}
767 660
@@ -798,14 +691,9 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
798{ 691{
799 unsigned long limit = 0; 692 unsigned long limit = 0;
800 693
801#ifdef CONFIG_NO_BOOTMEM
802 limit = -1UL;
803#endif
804
805 return ___alloc_bootmem(size, align, goal, limit); 694 return ___alloc_bootmem(size, align, goal, limit);
806} 695}
807 696
808#ifndef CONFIG_NO_BOOTMEM
809static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, 697static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
810 unsigned long size, unsigned long align, 698 unsigned long size, unsigned long align,
811 unsigned long goal, unsigned long limit) 699 unsigned long goal, unsigned long limit)
@@ -822,7 +710,6 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
822 710
823 return ___alloc_bootmem(size, align, goal, limit); 711 return ___alloc_bootmem(size, align, goal, limit);
824} 712}
825#endif
826 713
827/** 714/**
828 * __alloc_bootmem_node - allocate boot memory from a specific node 715 * __alloc_bootmem_node - allocate boot memory from a specific node
@@ -842,24 +729,10 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
842void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, 729void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
843 unsigned long align, unsigned long goal) 730 unsigned long align, unsigned long goal)
844{ 731{
845 void *ptr;
846
847 if (WARN_ON_ONCE(slab_is_available())) 732 if (WARN_ON_ONCE(slab_is_available()))
848 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 733 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
849 734
850#ifdef CONFIG_NO_BOOTMEM 735 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
851 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
852 goal, -1ULL);
853 if (ptr)
854 return ptr;
855
856 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
857 goal, -1ULL);
858#else
859 ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
860#endif
861
862 return ptr;
863} 736}
864 737
865void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, 738void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -880,13 +753,8 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
880 unsigned long new_goal; 753 unsigned long new_goal;
881 754
882 new_goal = MAX_DMA32_PFN << PAGE_SHIFT; 755 new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
883#ifdef CONFIG_NO_BOOTMEM
884 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
885 new_goal, -1ULL);
886#else
887 ptr = alloc_bootmem_core(pgdat->bdata, size, align, 756 ptr = alloc_bootmem_core(pgdat->bdata, size, align,
888 new_goal, 0); 757 new_goal, 0);
889#endif
890 if (ptr) 758 if (ptr)
891 return ptr; 759 return ptr;
892 } 760 }
@@ -907,16 +775,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
907void * __init alloc_bootmem_section(unsigned long size, 775void * __init alloc_bootmem_section(unsigned long size,
908 unsigned long section_nr) 776 unsigned long section_nr)
909{ 777{
910#ifdef CONFIG_NO_BOOTMEM
911 unsigned long pfn, goal, limit;
912
913 pfn = section_nr_to_pfn(section_nr);
914 goal = pfn << PAGE_SHIFT;
915 limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
916
917 return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
918 SMP_CACHE_BYTES, goal, limit);
919#else
920 bootmem_data_t *bdata; 778 bootmem_data_t *bdata;
921 unsigned long pfn, goal, limit; 779 unsigned long pfn, goal, limit;
922 780
@@ -926,7 +784,6 @@ void * __init alloc_bootmem_section(unsigned long size,
926 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; 784 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
927 785
928 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); 786 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
929#endif
930} 787}
931#endif 788#endif
932 789
@@ -938,16 +795,11 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
938 if (WARN_ON_ONCE(slab_is_available())) 795 if (WARN_ON_ONCE(slab_is_available()))
939 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 796 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
940 797
941#ifdef CONFIG_NO_BOOTMEM
942 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
943 goal, -1ULL);
944#else
945 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); 798 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
946 if (ptr) 799 if (ptr)
947 return ptr; 800 return ptr;
948 801
949 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); 802 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
950#endif
951 if (ptr) 803 if (ptr)
952 return ptr; 804 return ptr;
953 805
@@ -995,21 +847,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
995void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, 847void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
996 unsigned long align, unsigned long goal) 848 unsigned long align, unsigned long goal)
997{ 849{
998 void *ptr;
999
1000 if (WARN_ON_ONCE(slab_is_available())) 850 if (WARN_ON_ONCE(slab_is_available()))
1001 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 851 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
1002 852
1003#ifdef CONFIG_NO_BOOTMEM 853 return ___alloc_bootmem_node(pgdat->bdata, size, align,
1004 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
1005 goal, ARCH_LOW_ADDRESS_LIMIT); 854 goal, ARCH_LOW_ADDRESS_LIMIT);
1006 if (ptr)
1007 return ptr;
1008 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
1009 goal, ARCH_LOW_ADDRESS_LIMIT);
1010#else
1011 ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
1012 goal, ARCH_LOW_ADDRESS_LIMIT);
1013#endif
1014 return ptr;
1015} 855}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3e29781ee762..113e35c47502 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -650,10 +650,10 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag)
650 650
651static inline struct page *alloc_hugepage_vma(int defrag, 651static inline struct page *alloc_hugepage_vma(int defrag,
652 struct vm_area_struct *vma, 652 struct vm_area_struct *vma,
653 unsigned long haddr) 653 unsigned long haddr, int nd)
654{ 654{
655 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), 655 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
656 HPAGE_PMD_ORDER, vma, haddr); 656 HPAGE_PMD_ORDER, vma, haddr, nd);
657} 657}
658 658
659#ifndef CONFIG_NUMA 659#ifndef CONFIG_NUMA
@@ -678,7 +678,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
678 if (unlikely(khugepaged_enter(vma))) 678 if (unlikely(khugepaged_enter(vma)))
679 return VM_FAULT_OOM; 679 return VM_FAULT_OOM;
680 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 680 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
681 vma, haddr); 681 vma, haddr, numa_node_id());
682 if (unlikely(!page)) 682 if (unlikely(!page))
683 goto out; 683 goto out;
684 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { 684 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -799,8 +799,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
799 } 799 }
800 800
801 for (i = 0; i < HPAGE_PMD_NR; i++) { 801 for (i = 0; i < HPAGE_PMD_NR; i++) {
802 pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE, 802 pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE,
803 vma, address); 803 vma, address, page_to_nid(page));
804 if (unlikely(!pages[i] || 804 if (unlikely(!pages[i] ||
805 mem_cgroup_newpage_charge(pages[i], mm, 805 mem_cgroup_newpage_charge(pages[i], mm,
806 GFP_KERNEL))) { 806 GFP_KERNEL))) {
@@ -902,7 +902,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
902 if (transparent_hugepage_enabled(vma) && 902 if (transparent_hugepage_enabled(vma) &&
903 !transparent_hugepage_debug_cow()) 903 !transparent_hugepage_debug_cow())
904 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 904 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
905 vma, haddr); 905 vma, haddr, numa_node_id());
906 else 906 else
907 new_page = NULL; 907 new_page = NULL;
908 908
@@ -1745,7 +1745,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
1745static void collapse_huge_page(struct mm_struct *mm, 1745static void collapse_huge_page(struct mm_struct *mm,
1746 unsigned long address, 1746 unsigned long address,
1747 struct page **hpage, 1747 struct page **hpage,
1748 struct vm_area_struct *vma) 1748 struct vm_area_struct *vma,
1749 int node)
1749{ 1750{
1750 pgd_t *pgd; 1751 pgd_t *pgd;
1751 pud_t *pud; 1752 pud_t *pud;
@@ -1761,6 +1762,10 @@ static void collapse_huge_page(struct mm_struct *mm,
1761#ifndef CONFIG_NUMA 1762#ifndef CONFIG_NUMA
1762 VM_BUG_ON(!*hpage); 1763 VM_BUG_ON(!*hpage);
1763 new_page = *hpage; 1764 new_page = *hpage;
1765 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1766 up_read(&mm->mmap_sem);
1767 return;
1768 }
1764#else 1769#else
1765 VM_BUG_ON(*hpage); 1770 VM_BUG_ON(*hpage);
1766 /* 1771 /*
@@ -1773,18 +1778,19 @@ static void collapse_huge_page(struct mm_struct *mm,
1773 * mmap_sem in read mode is good idea also to allow greater 1778 * mmap_sem in read mode is good idea also to allow greater
1774 * scalability. 1779 * scalability.
1775 */ 1780 */
1776 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); 1781 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
1782 node);
1777 if (unlikely(!new_page)) { 1783 if (unlikely(!new_page)) {
1778 up_read(&mm->mmap_sem); 1784 up_read(&mm->mmap_sem);
1779 *hpage = ERR_PTR(-ENOMEM); 1785 *hpage = ERR_PTR(-ENOMEM);
1780 return; 1786 return;
1781 } 1787 }
1782#endif
1783 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 1788 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1784 up_read(&mm->mmap_sem); 1789 up_read(&mm->mmap_sem);
1785 put_page(new_page); 1790 put_page(new_page);
1786 return; 1791 return;
1787 } 1792 }
1793#endif
1788 1794
1789 /* after allocating the hugepage upgrade to mmap_sem write mode */ 1795 /* after allocating the hugepage upgrade to mmap_sem write mode */
1790 up_read(&mm->mmap_sem); 1796 up_read(&mm->mmap_sem);
@@ -1919,6 +1925,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1919 struct page *page; 1925 struct page *page;
1920 unsigned long _address; 1926 unsigned long _address;
1921 spinlock_t *ptl; 1927 spinlock_t *ptl;
1928 int node = -1;
1922 1929
1923 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1930 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1924 1931
@@ -1949,6 +1956,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1949 page = vm_normal_page(vma, _address, pteval); 1956 page = vm_normal_page(vma, _address, pteval);
1950 if (unlikely(!page)) 1957 if (unlikely(!page))
1951 goto out_unmap; 1958 goto out_unmap;
1959 /*
1960 * Chose the node of the first page. This could
1961 * be more sophisticated and look at more pages,
1962 * but isn't for now.
1963 */
1964 if (node == -1)
1965 node = page_to_nid(page);
1952 VM_BUG_ON(PageCompound(page)); 1966 VM_BUG_ON(PageCompound(page));
1953 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) 1967 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
1954 goto out_unmap; 1968 goto out_unmap;
@@ -1965,7 +1979,7 @@ out_unmap:
1965 pte_unmap_unlock(pte, ptl); 1979 pte_unmap_unlock(pte, ptl);
1966 if (ret) 1980 if (ret)
1967 /* collapse_huge_page will return with the mmap_sem released */ 1981 /* collapse_huge_page will return with the mmap_sem released */
1968 collapse_huge_page(mm, address, hpage, vma); 1982 collapse_huge_page(mm, address, hpage, vma, node);
1969out: 1983out:
1970 return ret; 1984 return ret;
1971} 1985}
diff --git a/mm/memory.c b/mm/memory.c
index 8e8c18324863..5823698c2b71 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2648,6 +2648,7 @@ void unmap_mapping_range(struct address_space *mapping,
2648 details.last_index = ULONG_MAX; 2648 details.last_index = ULONG_MAX;
2649 details.i_mmap_lock = &mapping->i_mmap_lock; 2649 details.i_mmap_lock = &mapping->i_mmap_lock;
2650 2650
2651 mutex_lock(&mapping->unmap_mutex);
2651 spin_lock(&mapping->i_mmap_lock); 2652 spin_lock(&mapping->i_mmap_lock);
2652 2653
2653 /* Protect against endless unmapping loops */ 2654 /* Protect against endless unmapping loops */
@@ -2664,6 +2665,7 @@ void unmap_mapping_range(struct address_space *mapping,
2664 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) 2665 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
2665 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); 2666 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
2666 spin_unlock(&mapping->i_mmap_lock); 2667 spin_unlock(&mapping->i_mmap_lock);
2668 mutex_unlock(&mapping->unmap_mutex);
2667} 2669}
2668EXPORT_SYMBOL(unmap_mapping_range); 2670EXPORT_SYMBOL(unmap_mapping_range);
2669 2671
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 368fc9d23610..b53ec99f1428 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1524,10 +1524,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
1524} 1524}
1525 1525
1526/* Return a zonelist indicated by gfp for node representing a mempolicy */ 1526/* Return a zonelist indicated by gfp for node representing a mempolicy */
1527static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy) 1527static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
1528 int nd)
1528{ 1529{
1529 int nd = numa_node_id();
1530
1531 switch (policy->mode) { 1530 switch (policy->mode) {
1532 case MPOL_PREFERRED: 1531 case MPOL_PREFERRED:
1533 if (!(policy->flags & MPOL_F_LOCAL)) 1532 if (!(policy->flags & MPOL_F_LOCAL))
@@ -1679,7 +1678,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
1679 zl = node_zonelist(interleave_nid(*mpol, vma, addr, 1678 zl = node_zonelist(interleave_nid(*mpol, vma, addr,
1680 huge_page_shift(hstate_vma(vma))), gfp_flags); 1679 huge_page_shift(hstate_vma(vma))), gfp_flags);
1681 } else { 1680 } else {
1682 zl = policy_zonelist(gfp_flags, *mpol); 1681 zl = policy_zonelist(gfp_flags, *mpol, numa_node_id());
1683 if ((*mpol)->mode == MPOL_BIND) 1682 if ((*mpol)->mode == MPOL_BIND)
1684 *nodemask = &(*mpol)->v.nodes; 1683 *nodemask = &(*mpol)->v.nodes;
1685 } 1684 }
@@ -1820,7 +1819,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1820 */ 1819 */
1821struct page * 1820struct page *
1822alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, 1821alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
1823 unsigned long addr) 1822 unsigned long addr, int node)
1824{ 1823{
1825 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1824 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1826 struct zonelist *zl; 1825 struct zonelist *zl;
@@ -1830,13 +1829,13 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
1830 if (unlikely(pol->mode == MPOL_INTERLEAVE)) { 1829 if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
1831 unsigned nid; 1830 unsigned nid;
1832 1831
1833 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); 1832 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
1834 mpol_cond_put(pol); 1833 mpol_cond_put(pol);
1835 page = alloc_page_interleave(gfp, order, nid); 1834 page = alloc_page_interleave(gfp, order, nid);
1836 put_mems_allowed(); 1835 put_mems_allowed();
1837 return page; 1836 return page;
1838 } 1837 }
1839 zl = policy_zonelist(gfp, pol); 1838 zl = policy_zonelist(gfp, pol, node);
1840 if (unlikely(mpol_needs_cond_ref(pol))) { 1839 if (unlikely(mpol_needs_cond_ref(pol))) {
1841 /* 1840 /*
1842 * slow path: ref counted shared policy 1841 * slow path: ref counted shared policy
@@ -1892,7 +1891,8 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1892 page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); 1891 page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
1893 else 1892 else
1894 page = __alloc_pages_nodemask(gfp, order, 1893 page = __alloc_pages_nodemask(gfp, order,
1895 policy_zonelist(gfp, pol), policy_nodemask(gfp, pol)); 1894 policy_zonelist(gfp, pol, numa_node_id()),
1895 policy_nodemask(gfp, pol));
1896 put_mems_allowed(); 1896 put_mems_allowed();
1897 return page; 1897 return page;
1898} 1898}
diff --git a/mm/migrate.c b/mm/migrate.c
index 766115253807..352de555626c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1287,14 +1287,14 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1287 return -EPERM; 1287 return -EPERM;
1288 1288
1289 /* Find the mm_struct */ 1289 /* Find the mm_struct */
1290 read_lock(&tasklist_lock); 1290 rcu_read_lock();
1291 task = pid ? find_task_by_vpid(pid) : current; 1291 task = pid ? find_task_by_vpid(pid) : current;
1292 if (!task) { 1292 if (!task) {
1293 read_unlock(&tasklist_lock); 1293 rcu_read_unlock();
1294 return -ESRCH; 1294 return -ESRCH;
1295 } 1295 }
1296 mm = get_task_mm(task); 1296 mm = get_task_mm(task);
1297 read_unlock(&tasklist_lock); 1297 rcu_read_unlock();
1298 1298
1299 if (!mm) 1299 if (!mm)
1300 return -EINVAL; 1300 return -EINVAL;
diff --git a/mm/mremap.c b/mm/mremap.c
index 9925b6391b80..1de98d492ddc 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -94,9 +94,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
94 */ 94 */
95 mapping = vma->vm_file->f_mapping; 95 mapping = vma->vm_file->f_mapping;
96 spin_lock(&mapping->i_mmap_lock); 96 spin_lock(&mapping->i_mmap_lock);
97 if (new_vma->vm_truncate_count && 97 new_vma->vm_truncate_count = 0;
98 new_vma->vm_truncate_count != vma->vm_truncate_count)
99 new_vma->vm_truncate_count = 0;
100 } 98 }
101 99
102 /* 100 /*
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
new file mode 100644
index 000000000000..e2bdb07079ce
--- /dev/null
+++ b/mm/nobootmem.c
@@ -0,0 +1,435 @@
1/*
2 * bootmem - A boot-time physical memory allocator and configurator
3 *
4 * Copyright (C) 1999 Ingo Molnar
5 * 1999 Kanoj Sarcar, SGI
6 * 2008 Johannes Weiner
7 *
8 * Access to this subsystem has to be serialized externally (which is true
9 * for the boot process anyway).
10 */
11#include <linux/init.h>
12#include <linux/pfn.h>
13#include <linux/slab.h>
14#include <linux/bootmem.h>
15#include <linux/module.h>
16#include <linux/kmemleak.h>
17#include <linux/range.h>
18#include <linux/memblock.h>
19
20#include <asm/bug.h>
21#include <asm/io.h>
22#include <asm/processor.h>
23
24#include "internal.h"
25
26#ifndef CONFIG_NEED_MULTIPLE_NODES
27struct pglist_data __refdata contig_page_data;
28EXPORT_SYMBOL(contig_page_data);
29#endif
30
31unsigned long max_low_pfn;
32unsigned long min_low_pfn;
33unsigned long max_pfn;
34
35#ifdef CONFIG_CRASH_DUMP
36/*
37 * If we have booted due to a crash, max_pfn will be a very low value. We need
38 * to know the amount of memory that the previous kernel used.
39 */
40unsigned long saved_max_pfn;
41#endif
42
43static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
44 u64 goal, u64 limit)
45{
46 void *ptr;
47 u64 addr;
48
49 if (limit > memblock.current_limit)
50 limit = memblock.current_limit;
51
52 addr = find_memory_core_early(nid, size, align, goal, limit);
53
54 if (addr == MEMBLOCK_ERROR)
55 return NULL;
56
57 ptr = phys_to_virt(addr);
58 memset(ptr, 0, size);
59 memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
60 /*
61 * The min_count is set to 0 so that bootmem allocated blocks
62 * are never reported as leaks.
63 */
64 kmemleak_alloc(ptr, size, 0, 0);
65 return ptr;
66}
67
68/*
69 * free_bootmem_late - free bootmem pages directly to page allocator
70 * @addr: starting address of the range
71 * @size: size of the range in bytes
72 *
73 * This is only useful when the bootmem allocator has already been torn
74 * down, but we are still initializing the system. Pages are given directly
75 * to the page allocator, no bootmem metadata is updated because it is gone.
76 */
77void __init free_bootmem_late(unsigned long addr, unsigned long size)
78{
79 unsigned long cursor, end;
80
81 kmemleak_free_part(__va(addr), size);
82
83 cursor = PFN_UP(addr);
84 end = PFN_DOWN(addr + size);
85
86 for (; cursor < end; cursor++) {
87 __free_pages_bootmem(pfn_to_page(cursor), 0);
88 totalram_pages++;
89 }
90}
91
92static void __init __free_pages_memory(unsigned long start, unsigned long end)
93{
94 int i;
95 unsigned long start_aligned, end_aligned;
96 int order = ilog2(BITS_PER_LONG);
97
98 start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
99 end_aligned = end & ~(BITS_PER_LONG - 1);
100
101 if (end_aligned <= start_aligned) {
102 for (i = start; i < end; i++)
103 __free_pages_bootmem(pfn_to_page(i), 0);
104
105 return;
106 }
107
108 for (i = start; i < start_aligned; i++)
109 __free_pages_bootmem(pfn_to_page(i), 0);
110
111 for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
112 __free_pages_bootmem(pfn_to_page(i), order);
113
114 for (i = end_aligned; i < end; i++)
115 __free_pages_bootmem(pfn_to_page(i), 0);
116}
117
118unsigned long __init free_all_memory_core_early(int nodeid)
119{
120 int i;
121 u64 start, end;
122 unsigned long count = 0;
123 struct range *range = NULL;
124 int nr_range;
125
126 nr_range = get_free_all_memory_range(&range, nodeid);
127
128 for (i = 0; i < nr_range; i++) {
129 start = range[i].start;
130 end = range[i].end;
131 count += end - start;
132 __free_pages_memory(start, end);
133 }
134
135 return count;
136}
137
138/**
139 * free_all_bootmem_node - release a node's free pages to the buddy allocator
140 * @pgdat: node to be released
141 *
142 * Returns the number of pages actually released.
143 */
144unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
145{
146 register_page_bootmem_info_node(pgdat);
147
148 /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
149 return 0;
150}
151
152/**
153 * free_all_bootmem - release free pages to the buddy allocator
154 *
155 * Returns the number of pages actually released.
156 */
157unsigned long __init free_all_bootmem(void)
158{
159 /*
160 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
161 * because in some case like Node0 doesnt have RAM installed
162 * low ram will be on Node1
163 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
164 * will be used instead of only Node0 related
165 */
166 return free_all_memory_core_early(MAX_NUMNODES);
167}
168
169/**
170 * free_bootmem_node - mark a page range as usable
171 * @pgdat: node the range resides on
172 * @physaddr: starting address of the range
173 * @size: size of the range in bytes
174 *
175 * Partial pages will be considered reserved and left as they are.
176 *
177 * The range must reside completely on the specified node.
178 */
179void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
180 unsigned long size)
181{
182 kmemleak_free_part(__va(physaddr), size);
183 memblock_x86_free_range(physaddr, physaddr + size);
184}
185
186/**
187 * free_bootmem - mark a page range as usable
188 * @addr: starting address of the range
189 * @size: size of the range in bytes
190 *
191 * Partial pages will be considered reserved and left as they are.
192 *
193 * The range must be contiguous but may span node boundaries.
194 */
195void __init free_bootmem(unsigned long addr, unsigned long size)
196{
197 kmemleak_free_part(__va(addr), size);
198 memblock_x86_free_range(addr, addr + size);
199}
200
201static void * __init ___alloc_bootmem_nopanic(unsigned long size,
202 unsigned long align,
203 unsigned long goal,
204 unsigned long limit)
205{
206 void *ptr;
207
208 if (WARN_ON_ONCE(slab_is_available()))
209 return kzalloc(size, GFP_NOWAIT);
210
211restart:
212
213 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
214
215 if (ptr)
216 return ptr;
217
218 if (goal != 0) {
219 goal = 0;
220 goto restart;
221 }
222
223 return NULL;
224}
225
226/**
227 * __alloc_bootmem_nopanic - allocate boot memory without panicking
228 * @size: size of the request in bytes
229 * @align: alignment of the region
230 * @goal: preferred starting address of the region
231 *
232 * The goal is dropped if it can not be satisfied and the allocation will
233 * fall back to memory below @goal.
234 *
235 * Allocation may happen on any node in the system.
236 *
237 * Returns NULL on failure.
238 */
239void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
240 unsigned long goal)
241{
242 unsigned long limit = -1UL;
243
244 return ___alloc_bootmem_nopanic(size, align, goal, limit);
245}
246
247static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
248 unsigned long goal, unsigned long limit)
249{
250 void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
251
252 if (mem)
253 return mem;
254 /*
255 * Whoops, we cannot satisfy the allocation request.
256 */
257 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
258 panic("Out of memory");
259 return NULL;
260}
261
262/**
263 * __alloc_bootmem - allocate boot memory
264 * @size: size of the request in bytes
265 * @align: alignment of the region
266 * @goal: preferred starting address of the region
267 *
268 * The goal is dropped if it can not be satisfied and the allocation will
269 * fall back to memory below @goal.
270 *
271 * Allocation may happen on any node in the system.
272 *
273 * The function panics if the request can not be satisfied.
274 */
275void * __init __alloc_bootmem(unsigned long size, unsigned long align,
276 unsigned long goal)
277{
278 unsigned long limit = -1UL;
279
280 return ___alloc_bootmem(size, align, goal, limit);
281}
282
283/**
284 * __alloc_bootmem_node - allocate boot memory from a specific node
285 * @pgdat: node to allocate from
286 * @size: size of the request in bytes
287 * @align: alignment of the region
288 * @goal: preferred starting address of the region
289 *
290 * The goal is dropped if it can not be satisfied and the allocation will
291 * fall back to memory below @goal.
292 *
293 * Allocation may fall back to any node in the system if the specified node
294 * can not hold the requested memory.
295 *
296 * The function panics if the request can not be satisfied.
297 */
298void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
299 unsigned long align, unsigned long goal)
300{
301 void *ptr;
302
303 if (WARN_ON_ONCE(slab_is_available()))
304 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
305
306 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
307 goal, -1ULL);
308 if (ptr)
309 return ptr;
310
311 return __alloc_memory_core_early(MAX_NUMNODES, size, align,
312 goal, -1ULL);
313}
314
315void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
316 unsigned long align, unsigned long goal)
317{
318#ifdef MAX_DMA32_PFN
319 unsigned long end_pfn;
320
321 if (WARN_ON_ONCE(slab_is_available()))
322 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
323
324 /* update goal according ...MAX_DMA32_PFN */
325 end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
326
327 if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
328 (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
329 void *ptr;
330 unsigned long new_goal;
331
332 new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
333 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
334 new_goal, -1ULL);
335 if (ptr)
336 return ptr;
337 }
338#endif
339
340 return __alloc_bootmem_node(pgdat, size, align, goal);
341
342}
343
344#ifdef CONFIG_SPARSEMEM
345/**
346 * alloc_bootmem_section - allocate boot memory from a specific section
347 * @size: size of the request in bytes
348 * @section_nr: sparse map section to allocate from
349 *
350 * Return NULL on failure.
351 */
352void * __init alloc_bootmem_section(unsigned long size,
353 unsigned long section_nr)
354{
355 unsigned long pfn, goal, limit;
356
357 pfn = section_nr_to_pfn(section_nr);
358 goal = pfn << PAGE_SHIFT;
359 limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
360
361 return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
362 SMP_CACHE_BYTES, goal, limit);
363}
364#endif
365
366void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
367 unsigned long align, unsigned long goal)
368{
369 void *ptr;
370
371 if (WARN_ON_ONCE(slab_is_available()))
372 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
373
374 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
375 goal, -1ULL);
376 if (ptr)
377 return ptr;
378
379 return __alloc_bootmem_nopanic(size, align, goal);
380}
381
382#ifndef ARCH_LOW_ADDRESS_LIMIT
383#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
384#endif
385
386/**
387 * __alloc_bootmem_low - allocate low boot memory
388 * @size: size of the request in bytes
389 * @align: alignment of the region
390 * @goal: preferred starting address of the region
391 *
392 * The goal is dropped if it can not be satisfied and the allocation will
393 * fall back to memory below @goal.
394 *
395 * Allocation may happen on any node in the system.
396 *
397 * The function panics if the request can not be satisfied.
398 */
399void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
400 unsigned long goal)
401{
402 return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
403}
404
405/**
406 * __alloc_bootmem_low_node - allocate low boot memory from a specific node
407 * @pgdat: node to allocate from
408 * @size: size of the request in bytes
409 * @align: alignment of the region
410 * @goal: preferred starting address of the region
411 *
412 * The goal is dropped if it can not be satisfied and the allocation will
413 * fall back to memory below @goal.
414 *
415 * Allocation may fall back to any node in the system if the specified node
416 * can not hold the requested memory.
417 *
418 * The function panics if the request can not be satisfied.
419 */
420void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
421 unsigned long align, unsigned long goal)
422{
423 void *ptr;
424
425 if (WARN_ON_ONCE(slab_is_available()))
426 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
427
428 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
429 goal, ARCH_LOW_ADDRESS_LIMIT);
430 if (ptr)
431 return ptr;
432
433 return __alloc_memory_core_early(MAX_NUMNODES, size, align,
434 goal, ARCH_LOW_ADDRESS_LIMIT);
435}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a873e61e312e..bd7625676a64 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3699,13 +3699,45 @@ void __init free_bootmem_with_active_regions(int nid,
3699} 3699}
3700 3700
3701#ifdef CONFIG_HAVE_MEMBLOCK 3701#ifdef CONFIG_HAVE_MEMBLOCK
3702/*
3703 * Basic iterator support. Return the last range of PFNs for a node
3704 * Note: nid == MAX_NUMNODES returns last region regardless of node
3705 */
3706static int __meminit last_active_region_index_in_nid(int nid)
3707{
3708 int i;
3709
3710 for (i = nr_nodemap_entries - 1; i >= 0; i--)
3711 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3712 return i;
3713
3714 return -1;
3715}
3716
3717/*
3718 * Basic iterator support. Return the previous active range of PFNs for a node
3719 * Note: nid == MAX_NUMNODES returns next region regardless of node
3720 */
3721static int __meminit previous_active_region_index_in_nid(int index, int nid)
3722{
3723 for (index = index - 1; index >= 0; index--)
3724 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3725 return index;
3726
3727 return -1;
3728}
3729
3730#define for_each_active_range_index_in_nid_reverse(i, nid) \
3731 for (i = last_active_region_index_in_nid(nid); i != -1; \
3732 i = previous_active_region_index_in_nid(i, nid))
3733
3702u64 __init find_memory_core_early(int nid, u64 size, u64 align, 3734u64 __init find_memory_core_early(int nid, u64 size, u64 align,
3703 u64 goal, u64 limit) 3735 u64 goal, u64 limit)
3704{ 3736{
3705 int i; 3737 int i;
3706 3738
3707 /* Need to go over early_node_map to find out good range for node */ 3739 /* Need to go over early_node_map to find out good range for node */
3708 for_each_active_range_index_in_nid(i, nid) { 3740 for_each_active_range_index_in_nid_reverse(i, nid) {
3709 u64 addr; 3741 u64 addr;
3710 u64 ei_start, ei_last; 3742 u64 ei_start, ei_last;
3711 u64 final_start, final_end; 3743 u64 final_start, final_end;
@@ -3748,34 +3780,6 @@ int __init add_from_early_node_map(struct range *range, int az,
3748 return nr_range; 3780 return nr_range;
3749} 3781}
3750 3782
3751#ifdef CONFIG_NO_BOOTMEM
3752void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3753 u64 goal, u64 limit)
3754{
3755 void *ptr;
3756 u64 addr;
3757
3758 if (limit > memblock.current_limit)
3759 limit = memblock.current_limit;
3760
3761 addr = find_memory_core_early(nid, size, align, goal, limit);
3762
3763 if (addr == MEMBLOCK_ERROR)
3764 return NULL;
3765
3766 ptr = phys_to_virt(addr);
3767 memset(ptr, 0, size);
3768 memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
3769 /*
3770 * The min_count is set to 0 so that bootmem allocated blocks
3771 * are never reported as leaks.
3772 */
3773 kmemleak_alloc(ptr, size, 0, 0);
3774 return ptr;
3775}
3776#endif
3777
3778
3779void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) 3783void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3780{ 3784{
3781 int i; 3785 int i;
@@ -4809,15 +4813,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
4809 dma_reserve = new_dma_reserve; 4813 dma_reserve = new_dma_reserve;
4810} 4814}
4811 4815
4812#ifndef CONFIG_NEED_MULTIPLE_NODES
4813struct pglist_data __refdata contig_page_data = {
4814#ifndef CONFIG_NO_BOOTMEM
4815 .bdata = &bootmem_node_data[0]
4816#endif
4817 };
4818EXPORT_SYMBOL(contig_page_data);
4819#endif
4820
4821void __init free_area_init(unsigned long *zones_size) 4816void __init free_area_init(unsigned long *zones_size)
4822{ 4817{
4823 free_area_init_node(0, zones_size, 4818 free_area_init_node(0, zones_size,
@@ -5376,10 +5371,9 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count)
5376 for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { 5371 for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
5377 unsigned long check = pfn + iter; 5372 unsigned long check = pfn + iter;
5378 5373
5379 if (!pfn_valid_within(check)) { 5374 if (!pfn_valid_within(check))
5380 iter++;
5381 continue; 5375 continue;
5382 } 5376
5383 page = pfn_to_page(check); 5377 page = pfn_to_page(check);
5384 if (!page_count(page)) { 5378 if (!page_count(page)) {
5385 if (PageBuddy(page)) 5379 if (PageBuddy(page))
diff --git a/mm/rmap.c b/mm/rmap.c
index f21f4a1d6a1c..941bf82e8961 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -497,41 +497,51 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
497 struct mm_struct *mm = vma->vm_mm; 497 struct mm_struct *mm = vma->vm_mm;
498 int referenced = 0; 498 int referenced = 0;
499 499
500 /*
501 * Don't want to elevate referenced for mlocked page that gets this far,
502 * in order that it progresses to try_to_unmap and is moved to the
503 * unevictable list.
504 */
505 if (vma->vm_flags & VM_LOCKED) {
506 *mapcount = 0; /* break early from loop */
507 *vm_flags |= VM_LOCKED;
508 goto out;
509 }
510
511 /* Pretend the page is referenced if the task has the
512 swap token and is in the middle of a page fault. */
513 if (mm != current->mm && has_swap_token(mm) &&
514 rwsem_is_locked(&mm->mmap_sem))
515 referenced++;
516
517 if (unlikely(PageTransHuge(page))) { 500 if (unlikely(PageTransHuge(page))) {
518 pmd_t *pmd; 501 pmd_t *pmd;
519 502
520 spin_lock(&mm->page_table_lock); 503 spin_lock(&mm->page_table_lock);
504 /*
505 * rmap might return false positives; we must filter
506 * these out using page_check_address_pmd().
507 */
521 pmd = page_check_address_pmd(page, mm, address, 508 pmd = page_check_address_pmd(page, mm, address,
522 PAGE_CHECK_ADDRESS_PMD_FLAG); 509 PAGE_CHECK_ADDRESS_PMD_FLAG);
523 if (pmd && !pmd_trans_splitting(*pmd) && 510 if (!pmd) {
524 pmdp_clear_flush_young_notify(vma, address, pmd)) 511 spin_unlock(&mm->page_table_lock);
512 goto out;
513 }
514
515 if (vma->vm_flags & VM_LOCKED) {
516 spin_unlock(&mm->page_table_lock);
517 *mapcount = 0; /* break early from loop */
518 *vm_flags |= VM_LOCKED;
519 goto out;
520 }
521
522 /* go ahead even if the pmd is pmd_trans_splitting() */
523 if (pmdp_clear_flush_young_notify(vma, address, pmd))
525 referenced++; 524 referenced++;
526 spin_unlock(&mm->page_table_lock); 525 spin_unlock(&mm->page_table_lock);
527 } else { 526 } else {
528 pte_t *pte; 527 pte_t *pte;
529 spinlock_t *ptl; 528 spinlock_t *ptl;
530 529
530 /*
531 * rmap might return false positives; we must filter
532 * these out using page_check_address().
533 */
531 pte = page_check_address(page, mm, address, &ptl, 0); 534 pte = page_check_address(page, mm, address, &ptl, 0);
532 if (!pte) 535 if (!pte)
533 goto out; 536 goto out;
534 537
538 if (vma->vm_flags & VM_LOCKED) {
539 pte_unmap_unlock(pte, ptl);
540 *mapcount = 0; /* break early from loop */
541 *vm_flags |= VM_LOCKED;
542 goto out;
543 }
544
535 if (ptep_clear_flush_young_notify(vma, address, pte)) { 545 if (ptep_clear_flush_young_notify(vma, address, pte)) {
536 /* 546 /*
537 * Don't treat a reference through a sequentially read 547 * Don't treat a reference through a sequentially read
@@ -546,6 +556,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
546 pte_unmap_unlock(pte, ptl); 556 pte_unmap_unlock(pte, ptl);
547 } 557 }
548 558
559 /* Pretend the page is referenced if the task has the
560 swap token and is in the middle of a page fault. */
561 if (mm != current->mm && has_swap_token(mm) &&
562 rwsem_is_locked(&mm->mmap_sem))
563 referenced++;
564
549 (*mapcount)--; 565 (*mapcount)--;
550 566
551 if (referenced) 567 if (referenced)
diff --git a/mm/shmem.c b/mm/shmem.c
index 5ee67c990602..41f82bb59eec 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1843,8 +1843,9 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1843 1843
1844 inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE); 1844 inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
1845 if (inode) { 1845 if (inode) {
1846 error = security_inode_init_security(inode, dir, NULL, NULL, 1846 error = security_inode_init_security(inode, dir,
1847 NULL); 1847 &dentry->d_name, NULL,
1848 NULL, NULL);
1848 if (error) { 1849 if (error) {
1849 if (error != -EOPNOTSUPP) { 1850 if (error != -EOPNOTSUPP) {
1850 iput(inode); 1851 iput(inode);
@@ -1983,8 +1984,8 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
1983 if (!inode) 1984 if (!inode)
1984 return -ENOSPC; 1985 return -ENOSPC;
1985 1986
1986 error = security_inode_init_security(inode, dir, NULL, NULL, 1987 error = security_inode_init_security(inode, dir, &dentry->d_name, NULL,
1987 NULL); 1988 NULL, NULL);
1988 if (error) { 1989 if (error) {
1989 if (error != -EOPNOTSUPP) { 1990 if (error != -EOPNOTSUPP) {
1990 iput(inode); 1991 iput(inode);
@@ -2144,8 +2145,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
2144{ 2145{
2145 struct inode *inode = dentry->d_inode; 2146 struct inode *inode = dentry->d_inode;
2146 2147
2147 if (*len < 3) 2148 if (*len < 3) {
2149 *len = 3;
2148 return 255; 2150 return 255;
2151 }
2149 2152
2150 if (inode_unhashed(inode)) { 2153 if (inode_unhashed(inode)) {
2151 /* Unfortunately insert_inode_hash is not idempotent, 2154 /* Unfortunately insert_inode_hash is not idempotent,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 07a458d72fa8..0341c5700e34 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1940,7 +1940,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1940 1940
1941 error = -EINVAL; 1941 error = -EINVAL;
1942 if (S_ISBLK(inode->i_mode)) { 1942 if (S_ISBLK(inode->i_mode)) {
1943 bdev = I_BDEV(inode); 1943 bdev = bdgrab(I_BDEV(inode));
1944 error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, 1944 error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1945 sys_swapon); 1945 sys_swapon);
1946 if (error < 0) { 1946 if (error < 0) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 49feb46e77b8..d64296be00d3 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -225,6 +225,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
225 next = start; 225 next = start;
226 while (next <= end && 226 while (next <= end &&
227 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 227 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
228 mem_cgroup_uncharge_start();
228 for (i = 0; i < pagevec_count(&pvec); i++) { 229 for (i = 0; i < pagevec_count(&pvec); i++) {
229 struct page *page = pvec.pages[i]; 230 struct page *page = pvec.pages[i];
230 pgoff_t page_index = page->index; 231 pgoff_t page_index = page->index;
@@ -247,6 +248,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
247 unlock_page(page); 248 unlock_page(page);
248 } 249 }
249 pagevec_release(&pvec); 250 pagevec_release(&pvec);
251 mem_cgroup_uncharge_end();
250 cond_resched(); 252 cond_resched();
251 } 253 }
252 254
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 17497d0cd8b9..6771ea70bfe7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1841,16 +1841,28 @@ static inline bool should_continue_reclaim(struct zone *zone,
1841 if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) 1841 if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION))
1842 return false; 1842 return false;
1843 1843
1844 /* 1844 /* Consider stopping depending on scan and reclaim activity */
1845 * If we failed to reclaim and have scanned the full list, stop. 1845 if (sc->gfp_mask & __GFP_REPEAT) {
1846 * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far 1846 /*
1847 * faster but obviously would be less likely to succeed 1847 * For __GFP_REPEAT allocations, stop reclaiming if the
1848 * allocation. If this is desirable, use GFP_REPEAT to decide 1848 * full LRU list has been scanned and we are still failing
1849 * if both reclaimed and scanned should be checked or just 1849 * to reclaim pages. This full LRU scan is potentially
1850 * reclaimed 1850 * expensive but a __GFP_REPEAT caller really wants to succeed
1851 */ 1851 */
1852 if (!nr_reclaimed && !nr_scanned) 1852 if (!nr_reclaimed && !nr_scanned)
1853 return false; 1853 return false;
1854 } else {
1855 /*
1856 * For non-__GFP_REPEAT allocations which can presumably
1857 * fail without consequence, stop if we failed to reclaim
1858 * any pages from the last SWAP_CLUSTER_MAX number of
1859 * pages that were scanned. This will return to the
1860 * caller faster at the risk reclaim/compaction and
1861 * the resulting allocation attempt fails
1862 */
1863 if (!nr_reclaimed)
1864 return false;
1865 }
1854 1866
1855 /* 1867 /*
1856 * If we have not reclaimed enough pages for compaction and the 1868 * If we have not reclaimed enough pages for compaction and the
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 198a640d53a6..a0874cc1f718 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
9 util.o \ 9 util.o \
10 protocol.o \ 10 protocol.o \
11 trans_fd.o \ 11 trans_fd.o \
12 trans_common.o \
12 13
139pnet_virtio-objs := \ 149pnet_virtio-objs := \
14 trans_virtio.o \ 15 trans_virtio.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index a848bca9fbff..347ec0cd2718 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -229,10 +229,23 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
229 return ERR_PTR(-ENOMEM); 229 return ERR_PTR(-ENOMEM);
230 } 230 }
231 init_waitqueue_head(req->wq); 231 init_waitqueue_head(req->wq);
232 req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, 232 if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
233 GFP_KERNEL); 233 P9_TRANS_PREF_PAYLOAD_SEP) {
234 req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, 234 int alloc_msize = min(c->msize, 4096);
235 GFP_KERNEL); 235 req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
236 GFP_KERNEL);
237 req->tc->capacity = alloc_msize;
238 req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
239 GFP_KERNEL);
240 req->rc->capacity = alloc_msize;
241 } else {
242 req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
243 GFP_KERNEL);
244 req->tc->capacity = c->msize;
245 req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
246 GFP_KERNEL);
247 req->rc->capacity = c->msize;
248 }
236 if ((!req->tc) || (!req->rc)) { 249 if ((!req->tc) || (!req->rc)) {
237 printk(KERN_ERR "Couldn't grow tag array\n"); 250 printk(KERN_ERR "Couldn't grow tag array\n");
238 kfree(req->tc); 251 kfree(req->tc);
@@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
243 return ERR_PTR(-ENOMEM); 256 return ERR_PTR(-ENOMEM);
244 } 257 }
245 req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); 258 req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
246 req->tc->capacity = c->msize;
247 req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); 259 req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
248 req->rc->capacity = c->msize;
249 } 260 }
250 261
251 p9pdu_reset(req->tc); 262 p9pdu_reset(req->tc);
@@ -443,6 +454,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
443{ 454{
444 int8_t type; 455 int8_t type;
445 int err; 456 int err;
457 int ecode;
446 458
447 err = p9_parse_header(req->rc, NULL, &type, NULL, 0); 459 err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
448 if (err) { 460 if (err) {
@@ -450,36 +462,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
450 return err; 462 return err;
451 } 463 }
452 464
453 if (type == P9_RERROR || type == P9_RLERROR) { 465 if (type != P9_RERROR && type != P9_RLERROR)
454 int ecode; 466 return 0;
455
456 if (!p9_is_proto_dotl(c)) {
457 char *ename;
458 467
459 err = p9pdu_readf(req->rc, c->proto_version, "s?d", 468 if (!p9_is_proto_dotl(c)) {
460 &ename, &ecode); 469 char *ename;
461 if (err) 470
462 goto out_err; 471 if (req->tc->pbuf_size) {
472 /* Handle user buffers */
473 size_t len = req->rc->size - req->rc->offset;
474 if (req->tc->pubuf) {
475 /* User Buffer */
476 err = copy_from_user(
477 &req->rc->sdata[req->rc->offset],
478 req->tc->pubuf, len);
479 if (err) {
480 err = -EFAULT;
481 goto out_err;
482 }
483 } else {
484 /* Kernel Buffer */
485 memmove(&req->rc->sdata[req->rc->offset],
486 req->tc->pkbuf, len);
487 }
488 }
489 err = p9pdu_readf(req->rc, c->proto_version, "s?d",
490 &ename, &ecode);
491 if (err)
492 goto out_err;
463 493
464 if (p9_is_proto_dotu(c)) 494 if (p9_is_proto_dotu(c))
465 err = -ecode; 495 err = -ecode;
466 496
467 if (!err || !IS_ERR_VALUE(err)) { 497 if (!err || !IS_ERR_VALUE(err)) {
468 err = p9_errstr2errno(ename, strlen(ename)); 498 err = p9_errstr2errno(ename, strlen(ename));
469 499
470 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); 500 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode,
501 ename);
471 502
472 kfree(ename); 503 kfree(ename);
473 }
474 } else {
475 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
476 err = -ecode;
477
478 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
479 } 504 }
505 } else {
506 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
507 err = -ecode;
508
509 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
510 }
480 511
481 } else
482 err = 0;
483 512
484 return err; 513 return err;
485 514
@@ -1191,6 +1220,27 @@ error:
1191} 1220}
1192EXPORT_SYMBOL(p9_client_fsync); 1221EXPORT_SYMBOL(p9_client_fsync);
1193 1222
1223int p9_client_sync_fs(struct p9_fid *fid)
1224{
1225 int err = 0;
1226 struct p9_req_t *req;
1227 struct p9_client *clnt;
1228
1229 P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid);
1230
1231 clnt = fid->clnt;
1232 req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid);
1233 if (IS_ERR(req)) {
1234 err = PTR_ERR(req);
1235 goto error;
1236 }
1237 P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid);
1238 p9_free_req(clnt, req);
1239error:
1240 return err;
1241}
1242EXPORT_SYMBOL(p9_client_sync_fs);
1243
1194int p9_client_clunk(struct p9_fid *fid) 1244int p9_client_clunk(struct p9_fid *fid)
1195{ 1245{
1196 int err; 1246 int err;
@@ -1270,7 +1320,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1270 if (count < rsize) 1320 if (count < rsize)
1271 rsize = count; 1321 rsize = count;
1272 1322
1273 req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); 1323 /* Don't bother zerocopy form small IO (< 1024) */
1324 if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
1325 P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
1326 req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
1327 rsize, data, udata);
1328 } else {
1329 req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
1330 rsize);
1331 }
1274 if (IS_ERR(req)) { 1332 if (IS_ERR(req)) {
1275 err = PTR_ERR(req); 1333 err = PTR_ERR(req);
1276 goto error; 1334 goto error;
@@ -1284,13 +1342,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1284 1342
1285 P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); 1343 P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
1286 1344
1287 if (data) { 1345 if (!req->tc->pbuf_size) {
1288 memmove(data, dataptr, count); 1346 if (data) {
1289 } else { 1347 memmove(data, dataptr, count);
1290 err = copy_to_user(udata, dataptr, count); 1348 } else {
1291 if (err) { 1349 err = copy_to_user(udata, dataptr, count);
1292 err = -EFAULT; 1350 if (err) {
1293 goto free_and_error; 1351 err = -EFAULT;
1352 goto free_and_error;
1353 }
1294 } 1354 }
1295 } 1355 }
1296 p9_free_req(clnt, req); 1356 p9_free_req(clnt, req);
@@ -1323,12 +1383,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
1323 1383
1324 if (count < rsize) 1384 if (count < rsize)
1325 rsize = count; 1385 rsize = count;
1326 if (data) 1386
1327 req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, 1387 /* Don't bother zerocopy form small IO (< 1024) */
1328 rsize, data); 1388 if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
1329 else 1389 P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
1330 req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, 1390 req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset,
1331 rsize, udata); 1391 rsize, data, udata);
1392 } else {
1393
1394 if (data)
1395 req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
1396 offset, rsize, data);
1397 else
1398 req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
1399 offset, rsize, udata);
1400 }
1332 if (IS_ERR(req)) { 1401 if (IS_ERR(req)) {
1333 err = PTR_ERR(req); 1402 err = PTR_ERR(req);
1334 goto error; 1403 goto error;
@@ -1716,7 +1785,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1716 if (count < rsize) 1785 if (count < rsize)
1717 rsize = count; 1786 rsize = count;
1718 1787
1719 req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); 1788 if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
1789 P9_TRANS_PREF_PAYLOAD_SEP) {
1790 req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid,
1791 offset, rsize, data);
1792 } else {
1793 req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
1794 offset, rsize);
1795 }
1720 if (IS_ERR(req)) { 1796 if (IS_ERR(req)) {
1721 err = PTR_ERR(req); 1797 err = PTR_ERR(req);
1722 goto error; 1798 goto error;
@@ -1730,7 +1806,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1730 1806
1731 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); 1807 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
1732 1808
1733 if (data) 1809 if (!req->tc->pbuf_size && data)
1734 memmove(data, dataptr, count); 1810 memmove(data, dataptr, count);
1735 1811
1736 p9_free_req(clnt, req); 1812 p9_free_req(clnt, req);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 1e308f210928..2ce515b859b3 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -114,6 +114,26 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
114 return size - len; 114 return size - len;
115} 115}
116 116
117static size_t
118pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
119 size_t size)
120{
121 BUG_ON(pdu->size > P9_IOHDRSZ);
122 pdu->pubuf = (char __user *)udata;
123 pdu->pkbuf = (char *)kdata;
124 pdu->pbuf_size = size;
125 return 0;
126}
127
128static size_t
129pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
130{
131 BUG_ON(pdu->size > P9_READDIRHDRSZ);
132 pdu->pkbuf = (char *)kdata;
133 pdu->pbuf_size = size;
134 return 0;
135}
136
117/* 137/*
118 b - int8_t 138 b - int8_t
119 w - int16_t 139 w - int16_t
@@ -445,6 +465,25 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
445 errcode = -EFAULT; 465 errcode = -EFAULT;
446 } 466 }
447 break; 467 break;
468 case 'E':{
469 int32_t cnt = va_arg(ap, int32_t);
470 const char *k = va_arg(ap, const void *);
471 const char *u = va_arg(ap, const void *);
472 errcode = p9pdu_writef(pdu, proto_version, "d",
473 cnt);
474 if (!errcode && pdu_write_urw(pdu, k, u, cnt))
475 errcode = -EFAULT;
476 }
477 break;
478 case 'F':{
479 int32_t cnt = va_arg(ap, int32_t);
480 const char *k = va_arg(ap, const void *);
481 errcode = p9pdu_writef(pdu, proto_version, "d",
482 cnt);
483 if (!errcode && pdu_write_readdir(pdu, k, cnt))
484 errcode = -EFAULT;
485 }
486 break;
448 case 'U':{ 487 case 'U':{
449 int32_t count = va_arg(ap, int32_t); 488 int32_t count = va_arg(ap, int32_t);
450 const char __user *udata = 489 const char __user *udata =
@@ -579,6 +618,7 @@ EXPORT_SYMBOL(p9stat_read);
579 618
580int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) 619int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
581{ 620{
621 pdu->id = type;
582 return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); 622 return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
583} 623}
584 624
@@ -606,6 +646,10 @@ void p9pdu_reset(struct p9_fcall *pdu)
606{ 646{
607 pdu->offset = 0; 647 pdu->offset = 0;
608 pdu->size = 0; 648 pdu->size = 0;
649 pdu->private = NULL;
650 pdu->pubuf = NULL;
651 pdu->pkbuf = NULL;
652 pdu->pbuf_size = 0;
609} 653}
610 654
611int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, 655int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
new file mode 100644
index 000000000000..d62b9aa58df8
--- /dev/null
+++ b/net/9p/trans_common.c
@@ -0,0 +1,97 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15#include <linux/slab.h>
16#include <linux/module.h>
17#include <net/9p/9p.h>
18#include <net/9p/client.h>
19#include <linux/scatterlist.h>
20#include "trans_common.h"
21
22/**
23 * p9_release_req_pages - Release pages after the transaction.
24 * @*private: PDU's private page of struct trans_rpage_info
25 */
26void
27p9_release_req_pages(struct trans_rpage_info *rpinfo)
28{
29 int i = 0;
30
31 while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) {
32 put_page(rpinfo->rp_data[i]);
33 i++;
34 }
35}
36EXPORT_SYMBOL(p9_release_req_pages);
37
38/**
39 * p9_nr_pages - Return number of pages needed to accomodate the payload.
40 */
41int
42p9_nr_pages(struct p9_req_t *req)
43{
44 int start_page, end_page;
45 start_page = (unsigned long long)req->tc->pubuf >> PAGE_SHIFT;
46 end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size +
47 PAGE_SIZE - 1) >> PAGE_SHIFT;
48 return end_page - start_page;
49}
50EXPORT_SYMBOL(p9_nr_pages);
51
52/**
53 * payload_gup - Translates user buffer into kernel pages and
54 * pins them either for read/write through get_user_pages_fast().
55 * @req: Request to be sent to server.
56 * @pdata_off: data offset into the first page after translation (gup).
57 * @pdata_len: Total length of the IO. gup may not return requested # of pages.
58 * @nr_pages: number of pages to accomodate the payload
59 * @rw: Indicates if the pages are for read or write.
60 */
61int
62p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
63 int nr_pages, u8 rw)
64{
65 uint32_t first_page_bytes = 0;
66 uint32_t pdata_mapped_pages;
67 struct trans_rpage_info *rpinfo;
68
69 *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
70
71 if (*pdata_off)
72 first_page_bytes = min((PAGE_SIZE - *pdata_off),
73 req->tc->pbuf_size);
74
75 rpinfo = req->tc->private;
76 pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
77 nr_pages, rw, &rpinfo->rp_data[0]);
78
79 if (pdata_mapped_pages < 0) {
80 printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p"
81 "nr_pages:%d\n", pdata_mapped_pages,
82 req->tc->pubuf, nr_pages);
83 pdata_mapped_pages = 0;
84 return -EIO;
85 }
86 rpinfo->rp_nr_pages = pdata_mapped_pages;
87 if (*pdata_off) {
88 *pdata_len = first_page_bytes;
89 *pdata_len += min((req->tc->pbuf_size - *pdata_len),
90 ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT);
91 } else {
92 *pdata_len = min(req->tc->pbuf_size,
93 (size_t)pdata_mapped_pages << PAGE_SHIFT);
94 }
95 return 0;
96}
97EXPORT_SYMBOL(p9_payload_gup);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
new file mode 100644
index 000000000000..76309223bb02
--- /dev/null
+++ b/net/9p/trans_common.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15/* TRUE if it is user context */
16#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS))
17
18/**
19 * struct trans_rpage_info - To store mapped page information in PDU.
20 * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu.
21 * @rp_nr_pages: Number of mapped pages
22 * @rp_data: Array of page pointers
23 */
24struct trans_rpage_info {
25 u8 rp_alloc;
26 int rp_nr_pages;
27 struct page *rp_data[0];
28};
29
30void p9_release_req_pages(struct trans_rpage_info *);
31int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8);
32int p9_nr_pages(struct p9_req_t *);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 078eb162d9bf..a30471e51740 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -153,10 +153,11 @@ struct p9_conn {
153 unsigned long wsched; 153 unsigned long wsched;
154}; 154};
155 155
156static void p9_poll_workfn(struct work_struct *work);
157
156static DEFINE_SPINLOCK(p9_poll_lock); 158static DEFINE_SPINLOCK(p9_poll_lock);
157static LIST_HEAD(p9_poll_pending_list); 159static LIST_HEAD(p9_poll_pending_list);
158static struct workqueue_struct *p9_mux_wq; 160static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
159static struct task_struct *p9_poll_task;
160 161
161static void p9_mux_poll_stop(struct p9_conn *m) 162static void p9_mux_poll_stop(struct p9_conn *m)
162{ 163{
@@ -384,7 +385,7 @@ static void p9_read_work(struct work_struct *work)
384 385
385 if (n & POLLIN) { 386 if (n & POLLIN) {
386 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); 387 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
387 queue_work(p9_mux_wq, &m->rq); 388 schedule_work(&m->rq);
388 } else 389 } else
389 clear_bit(Rworksched, &m->wsched); 390 clear_bit(Rworksched, &m->wsched);
390 } else 391 } else
@@ -497,7 +498,7 @@ static void p9_write_work(struct work_struct *work)
497 498
498 if (n & POLLOUT) { 499 if (n & POLLOUT) {
499 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); 500 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
500 queue_work(p9_mux_wq, &m->wq); 501 schedule_work(&m->wq);
501 } else 502 } else
502 clear_bit(Wworksched, &m->wsched); 503 clear_bit(Wworksched, &m->wsched);
503 } else 504 } else
@@ -516,15 +517,14 @@ static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
516 container_of(wait, struct p9_poll_wait, wait); 517 container_of(wait, struct p9_poll_wait, wait);
517 struct p9_conn *m = pwait->conn; 518 struct p9_conn *m = pwait->conn;
518 unsigned long flags; 519 unsigned long flags;
519 DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
520 520
521 spin_lock_irqsave(&p9_poll_lock, flags); 521 spin_lock_irqsave(&p9_poll_lock, flags);
522 if (list_empty(&m->poll_pending_link)) 522 if (list_empty(&m->poll_pending_link))
523 list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); 523 list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
524 spin_unlock_irqrestore(&p9_poll_lock, flags); 524 spin_unlock_irqrestore(&p9_poll_lock, flags);
525 525
526 /* perform the default wake up operation */ 526 schedule_work(&p9_poll_work);
527 return default_wake_function(&dummy_wait, mode, sync, key); 527 return 1;
528} 528}
529 529
530/** 530/**
@@ -629,7 +629,7 @@ static void p9_poll_mux(struct p9_conn *m)
629 P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); 629 P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
630 if (!test_and_set_bit(Rworksched, &m->wsched)) { 630 if (!test_and_set_bit(Rworksched, &m->wsched)) {
631 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); 631 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
632 queue_work(p9_mux_wq, &m->rq); 632 schedule_work(&m->rq);
633 } 633 }
634 } 634 }
635 635
@@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m)
639 if ((m->wsize || !list_empty(&m->unsent_req_list)) && 639 if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
640 !test_and_set_bit(Wworksched, &m->wsched)) { 640 !test_and_set_bit(Wworksched, &m->wsched)) {
641 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); 641 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
642 queue_work(p9_mux_wq, &m->wq); 642 schedule_work(&m->wq);
643 } 643 }
644 } 644 }
645} 645}
@@ -677,7 +677,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
677 n = p9_fd_poll(m->client, NULL); 677 n = p9_fd_poll(m->client, NULL);
678 678
679 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) 679 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
680 queue_work(p9_mux_wq, &m->wq); 680 schedule_work(&m->wq);
681 681
682 return 0; 682 return 0;
683} 683}
@@ -1047,12 +1047,12 @@ static struct p9_trans_module p9_fd_trans = {
1047 * 1047 *
1048 */ 1048 */
1049 1049
1050static int p9_poll_proc(void *a) 1050static void p9_poll_workfn(struct work_struct *work)
1051{ 1051{
1052 unsigned long flags; 1052 unsigned long flags;
1053 1053
1054 P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); 1054 P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current);
1055 repeat: 1055
1056 spin_lock_irqsave(&p9_poll_lock, flags); 1056 spin_lock_irqsave(&p9_poll_lock, flags);
1057 while (!list_empty(&p9_poll_pending_list)) { 1057 while (!list_empty(&p9_poll_pending_list)) {
1058 struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, 1058 struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
@@ -1067,35 +1067,11 @@ static int p9_poll_proc(void *a)
1067 } 1067 }
1068 spin_unlock_irqrestore(&p9_poll_lock, flags); 1068 spin_unlock_irqrestore(&p9_poll_lock, flags);
1069 1069
1070 set_current_state(TASK_INTERRUPTIBLE);
1071 if (list_empty(&p9_poll_pending_list)) {
1072 P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n");
1073 schedule();
1074 }
1075 __set_current_state(TASK_RUNNING);
1076
1077 if (!kthread_should_stop())
1078 goto repeat;
1079
1080 P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); 1070 P9_DPRINTK(P9_DEBUG_TRANS, "finish\n");
1081 return 0;
1082} 1071}
1083 1072
1084int p9_trans_fd_init(void) 1073int p9_trans_fd_init(void)
1085{ 1074{
1086 p9_mux_wq = create_workqueue("v9fs");
1087 if (!p9_mux_wq) {
1088 printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
1089 return -ENOMEM;
1090 }
1091
1092 p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
1093 if (IS_ERR(p9_poll_task)) {
1094 destroy_workqueue(p9_mux_wq);
1095 printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
1096 return PTR_ERR(p9_poll_task);
1097 }
1098
1099 v9fs_register_trans(&p9_tcp_trans); 1075 v9fs_register_trans(&p9_tcp_trans);
1100 v9fs_register_trans(&p9_unix_trans); 1076 v9fs_register_trans(&p9_unix_trans);
1101 v9fs_register_trans(&p9_fd_trans); 1077 v9fs_register_trans(&p9_fd_trans);
@@ -1105,10 +1081,8 @@ int p9_trans_fd_init(void)
1105 1081
1106void p9_trans_fd_exit(void) 1082void p9_trans_fd_exit(void)
1107{ 1083{
1108 kthread_stop(p9_poll_task); 1084 flush_work_sync(&p9_poll_work);
1109 v9fs_unregister_trans(&p9_tcp_trans); 1085 v9fs_unregister_trans(&p9_tcp_trans);
1110 v9fs_unregister_trans(&p9_unix_trans); 1086 v9fs_unregister_trans(&p9_unix_trans);
1111 v9fs_unregister_trans(&p9_fd_trans); 1087 v9fs_unregister_trans(&p9_fd_trans);
1112
1113 destroy_workqueue(p9_mux_wq);
1114} 1088}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index c8f3f72ab20e..9b550ed9c711 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -45,6 +45,7 @@
45#include <linux/scatterlist.h> 45#include <linux/scatterlist.h>
46#include <linux/virtio.h> 46#include <linux/virtio.h>
47#include <linux/virtio_9p.h> 47#include <linux/virtio_9p.h>
48#include "trans_common.h"
48 49
49#define VIRTQUEUE_NUM 128 50#define VIRTQUEUE_NUM 128
50 51
@@ -155,6 +156,14 @@ static void req_done(struct virtqueue *vq)
155 rc->tag); 156 rc->tag);
156 req = p9_tag_lookup(chan->client, rc->tag); 157 req = p9_tag_lookup(chan->client, rc->tag);
157 req->status = REQ_STATUS_RCVD; 158 req->status = REQ_STATUS_RCVD;
159 if (req->tc->private) {
160 struct trans_rpage_info *rp = req->tc->private;
161 /*Release pages */
162 p9_release_req_pages(rp);
163 if (rp->rp_alloc)
164 kfree(rp);
165 req->tc->private = NULL;
166 }
158 p9_client_cb(chan->client, req); 167 p9_client_cb(chan->client, req);
159 } else { 168 } else {
160 spin_unlock_irqrestore(&chan->lock, flags); 169 spin_unlock_irqrestore(&chan->lock, flags);
@@ -203,6 +212,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
203} 212}
204 213
205/** 214/**
215 * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
216 * this takes a list of pages.
217 * @sg: scatter/gather list to pack into
218 * @start: which segment of the sg_list to start at
219 * @pdata_off: Offset into the first page
220 * @**pdata: a list of pages to add into sg.
221 * @count: amount of data to pack into the scatter/gather list
222 */
223static int
224pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
225 struct page **pdata, int count)
226{
227 int s;
228 int i = 0;
229 int index = start;
230
231 if (pdata_off) {
232 s = min((int)(PAGE_SIZE - pdata_off), count);
233 sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
234 count -= s;
235 }
236
237 while (count) {
238 BUG_ON(index > limit);
239 s = min((int)PAGE_SIZE, count);
240 sg_set_page(&sg[index++], pdata[i++], s, 0);
241 count -= s;
242 }
243 return index-start;
244}
245
246/**
206 * p9_virtio_request - issue a request 247 * p9_virtio_request - issue a request
207 * @client: client instance issuing the request 248 * @client: client instance issuing the request
208 * @req: request to be issued 249 * @req: request to be issued
@@ -212,22 +253,97 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
212static int 253static int
213p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 254p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
214{ 255{
215 int in, out; 256 int in, out, inp, outp;
216 struct virtio_chan *chan = client->trans; 257 struct virtio_chan *chan = client->trans;
217 char *rdata = (char *)req->rc+sizeof(struct p9_fcall); 258 char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
218 unsigned long flags; 259 unsigned long flags;
219 int err; 260 size_t pdata_off = 0;
261 struct trans_rpage_info *rpinfo = NULL;
262 int err, pdata_len = 0;
220 263
221 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 264 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
222 265
223req_retry: 266req_retry:
224 req->status = REQ_STATUS_SENT; 267 req->status = REQ_STATUS_SENT;
225 268
269 if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
270 int nr_pages = p9_nr_pages(req);
271 int rpinfo_size = sizeof(struct trans_rpage_info) +
272 sizeof(struct page *) * nr_pages;
273
274 if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
275 /* We can use sdata */
276 req->tc->private = req->tc->sdata + req->tc->size;
277 rpinfo = (struct trans_rpage_info *)req->tc->private;
278 rpinfo->rp_alloc = 0;
279 } else {
280 req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
281 if (!req->tc->private) {
282 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
283 "private kmalloc returned NULL");
284 return -ENOMEM;
285 }
286 rpinfo = (struct trans_rpage_info *)req->tc->private;
287 rpinfo->rp_alloc = 1;
288 }
289
290 err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
291 req->tc->id == P9_TREAD ? 1 : 0);
292 if (err < 0) {
293 if (rpinfo->rp_alloc)
294 kfree(rpinfo);
295 return err;
296 }
297 }
298
226 spin_lock_irqsave(&chan->lock, flags); 299 spin_lock_irqsave(&chan->lock, flags);
300
301 /* Handle out VirtIO ring buffers */
227 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, 302 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
228 req->tc->size); 303 req->tc->size);
229 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, 304
230 client->msize); 305 if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
306 /* We have additional write payload buffer to take care */
307 if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
308 outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
309 pdata_off, rpinfo->rp_data, pdata_len);
310 } else {
311 char *pbuf = req->tc->pubuf ? req->tc->pubuf :
312 req->tc->pkbuf;
313 outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
314 req->tc->pbuf_size);
315 }
316 out += outp;
317 }
318
319 /* Handle in VirtIO ring buffers */
320 if (req->tc->pbuf_size &&
321 ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
322 /*
323 * Take care of additional Read payload.
324 * 11 is the read/write header = PDU Header(7) + IO Size (4).
325 * Arrange in such a way that server places header in the
326 * alloced memory and payload onto the user buffer.
327 */
328 inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
329 /*
330 * Running executables in the filesystem may result in
331 * a read request with kernel buffer as opposed to user buffer.
332 */
333 if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
334 in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
335 pdata_off, rpinfo->rp_data, pdata_len);
336 } else {
337 char *pbuf = req->tc->pubuf ? req->tc->pubuf :
338 req->tc->pkbuf;
339 in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
340 pbuf, req->tc->pbuf_size);
341 }
342 in += inp;
343 } else {
344 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
345 client->msize);
346 }
231 347
232 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); 348 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
233 if (err < 0) { 349 if (err < 0) {
@@ -246,6 +362,8 @@ req_retry:
246 P9_DPRINTK(P9_DEBUG_TRANS, 362 P9_DPRINTK(P9_DEBUG_TRANS,
247 "9p debug: " 363 "9p debug: "
248 "virtio rpc add_buf returned failure"); 364 "virtio rpc add_buf returned failure");
365 if (rpinfo && rpinfo->rp_alloc)
366 kfree(rpinfo);
249 return -EIO; 367 return -EIO;
250 } 368 }
251 } 369 }
@@ -448,6 +566,7 @@ static struct p9_trans_module p9_virtio_trans = {
448 .request = p9_virtio_request, 566 .request = p9_virtio_request,
449 .cancel = p9_virtio_cancel, 567 .cancel = p9_virtio_cancel,
450 .maxsize = PAGE_SIZE*16, 568 .maxsize = PAGE_SIZE*16,
569 .pref = P9_TRANS_PREF_PAYLOAD_SEP,
451 .def = 0, 570 .def = 0,
452 .owner = THIS_MODULE, 571 .owner = THIS_MODULE,
453}; 572};
diff --git a/net/Makefile b/net/Makefile
index a3330ebe2c53..a51d9465e628 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
19obj-$(CONFIG_INET) += ipv4/ 19obj-$(CONFIG_INET) += ipv4/
20obj-$(CONFIG_XFRM) += xfrm/ 20obj-$(CONFIG_XFRM) += xfrm/
21obj-$(CONFIG_UNIX) += unix/ 21obj-$(CONFIG_UNIX) += unix/
22ifneq ($(CONFIG_IPV6),) 22obj-$(CONFIG_NET) += ipv6/
23obj-y += ipv6/
24endif
25obj-$(CONFIG_PACKET) += packet/ 23obj-$(CONFIG_PACKET) += packet/
26obj-$(CONFIG_NET_KEY) += key/ 24obj-$(CONFIG_NET_KEY) += key/
27obj-$(CONFIG_BRIDGE) += bridge/ 25obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7550abb0c96a..675614e38e14 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -859,6 +859,7 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
859 result = L2CAP_CR_SEC_BLOCK; 859 result = L2CAP_CR_SEC_BLOCK;
860 else 860 else
861 result = L2CAP_CR_BAD_PSM; 861 result = L2CAP_CR_BAD_PSM;
862 sk->sk_state = BT_DISCONN;
862 863
863 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); 864 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
864 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); 865 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 2575c2db6404..d7b9af4703d0 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -727,7 +727,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
727 break; 727 break;
728 } 728 }
729 729
730 tty_unlock();
730 schedule(); 731 schedule();
732 tty_lock();
731 } 733 }
732 set_current_state(TASK_RUNNING); 734 set_current_state(TASK_RUNNING);
733 remove_wait_queue(&dev->wait, &wait); 735 remove_wait_queue(&dev->wait, &wait);
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 9190ae462cb4..6dee7bf648a9 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -6,6 +6,7 @@ config BRIDGE
6 tristate "802.1d Ethernet Bridging" 6 tristate "802.1d Ethernet Bridging"
7 select LLC 7 select LLC
8 select STP 8 select STP
9 depends on IPV6 || IPV6=n
9 ---help--- 10 ---help---
10 If you say Y here, then your Linux box will be able to act as an 11 If you say Y here, then your Linux box will be able to act as an
11 Ethernet bridge, which means that the different Ethernet segments it 12 Ethernet bridge, which means that the different Ethernet segments it
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 6f6d8e1b776f..88e4aa9cb1f9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -80,7 +80,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
80 if (is_multicast_ether_addr(dest)) { 80 if (is_multicast_ether_addr(dest)) {
81 mdst = br_mdb_get(br, skb); 81 mdst = br_mdb_get(br, skb);
82 if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { 82 if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
83 if ((mdst && !hlist_unhashed(&mdst->mglist)) || 83 if ((mdst && mdst->mglist) ||
84 br_multicast_is_router(br)) 84 br_multicast_is_router(br))
85 skb2 = skb; 85 skb2 = skb;
86 br_multicast_forward(mdst, skb, skb2); 86 br_multicast_forward(mdst, skb, skb2);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index f701a21acb34..030a002ff8ee 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,10 +37,9 @@
37 rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) 37 rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
38 38
39#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 39#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
40static inline int ipv6_is_local_multicast(const struct in6_addr *addr) 40static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
41{ 41{
42 if (ipv6_addr_is_multicast(addr) && 42 if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
43 IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL)
44 return 1; 43 return 1;
45 return 0; 44 return 0;
46} 45}
@@ -232,8 +231,7 @@ static void br_multicast_group_expired(unsigned long data)
232 if (!netif_running(br->dev) || timer_pending(&mp->timer)) 231 if (!netif_running(br->dev) || timer_pending(&mp->timer))
233 goto out; 232 goto out;
234 233
235 if (!hlist_unhashed(&mp->mglist)) 234 mp->mglist = false;
236 hlist_del_init(&mp->mglist);
237 235
238 if (mp->ports) 236 if (mp->ports)
239 goto out; 237 goto out;
@@ -276,7 +274,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
276 del_timer(&p->query_timer); 274 del_timer(&p->query_timer);
277 call_rcu_bh(&p->rcu, br_multicast_free_pg); 275 call_rcu_bh(&p->rcu, br_multicast_free_pg);
278 276
279 if (!mp->ports && hlist_unhashed(&mp->mglist) && 277 if (!mp->ports && !mp->mglist &&
280 netif_running(br->dev)) 278 netif_running(br->dev))
281 mod_timer(&mp->timer, jiffies); 279 mod_timer(&mp->timer, jiffies);
282 280
@@ -436,7 +434,6 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
436 eth = eth_hdr(skb); 434 eth = eth_hdr(skb);
437 435
438 memcpy(eth->h_source, br->dev->dev_addr, 6); 436 memcpy(eth->h_source, br->dev->dev_addr, 6);
439 ipv6_eth_mc_map(group, eth->h_dest);
440 eth->h_proto = htons(ETH_P_IPV6); 437 eth->h_proto = htons(ETH_P_IPV6);
441 skb_put(skb, sizeof(*eth)); 438 skb_put(skb, sizeof(*eth));
442 439
@@ -448,8 +445,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
448 ip6h->payload_len = htons(8 + sizeof(*mldq)); 445 ip6h->payload_len = htons(8 + sizeof(*mldq));
449 ip6h->nexthdr = IPPROTO_HOPOPTS; 446 ip6h->nexthdr = IPPROTO_HOPOPTS;
450 ip6h->hop_limit = 1; 447 ip6h->hop_limit = 1;
451 ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); 448 ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
449 &ip6h->saddr);
452 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); 450 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
451 ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
453 452
454 hopopt = (u8 *)(ip6h + 1); 453 hopopt = (u8 *)(ip6h + 1);
455 hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ 454 hopopt[0] = IPPROTO_ICMPV6; /* next hdr */
@@ -528,7 +527,7 @@ static void br_multicast_group_query_expired(unsigned long data)
528 struct net_bridge *br = mp->br; 527 struct net_bridge *br = mp->br;
529 528
530 spin_lock(&br->multicast_lock); 529 spin_lock(&br->multicast_lock);
531 if (!netif_running(br->dev) || hlist_unhashed(&mp->mglist) || 530 if (!netif_running(br->dev) || !mp->mglist ||
532 mp->queries_sent >= br->multicast_last_member_count) 531 mp->queries_sent >= br->multicast_last_member_count)
533 goto out; 532 goto out;
534 533
@@ -719,7 +718,7 @@ static int br_multicast_add_group(struct net_bridge *br,
719 goto err; 718 goto err;
720 719
721 if (!port) { 720 if (!port) {
722 hlist_add_head(&mp->mglist, &br->mglist); 721 mp->mglist = true;
723 mod_timer(&mp->timer, now + br->multicast_membership_interval); 722 mod_timer(&mp->timer, now + br->multicast_membership_interval);
724 goto out; 723 goto out;
725 } 724 }
@@ -781,11 +780,11 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
781{ 780{
782 struct br_ip br_group; 781 struct br_ip br_group;
783 782
784 if (ipv6_is_local_multicast(group)) 783 if (!ipv6_is_transient_multicast(group))
785 return 0; 784 return 0;
786 785
787 ipv6_addr_copy(&br_group.u.ip6, group); 786 ipv6_addr_copy(&br_group.u.ip6, group);
788 br_group.proto = htons(ETH_P_IP); 787 br_group.proto = htons(ETH_P_IPV6);
789 788
790 return br_multicast_add_group(br, port, &br_group); 789 return br_multicast_add_group(br, port, &br_group);
791} 790}
@@ -1014,18 +1013,19 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
1014 1013
1015 nsrcs = skb_header_pointer(skb, 1014 nsrcs = skb_header_pointer(skb,
1016 len + offsetof(struct mld2_grec, 1015 len + offsetof(struct mld2_grec,
1017 grec_mca), 1016 grec_nsrcs),
1018 sizeof(_nsrcs), &_nsrcs); 1017 sizeof(_nsrcs), &_nsrcs);
1019 if (!nsrcs) 1018 if (!nsrcs)
1020 return -EINVAL; 1019 return -EINVAL;
1021 1020
1022 if (!pskb_may_pull(skb, 1021 if (!pskb_may_pull(skb,
1023 len + sizeof(*grec) + 1022 len + sizeof(*grec) +
1024 sizeof(struct in6_addr) * (*nsrcs))) 1023 sizeof(struct in6_addr) * ntohs(*nsrcs)))
1025 return -EINVAL; 1024 return -EINVAL;
1026 1025
1027 grec = (struct mld2_grec *)(skb->data + len); 1026 grec = (struct mld2_grec *)(skb->data + len);
1028 len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs); 1027 len += sizeof(*grec) +
1028 sizeof(struct in6_addr) * ntohs(*nsrcs);
1029 1029
1030 /* We treat these as MLDv1 reports for now. */ 1030 /* We treat these as MLDv1 reports for now. */
1031 switch (grec->grec_type) { 1031 switch (grec->grec_type) {
@@ -1165,7 +1165,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1165 1165
1166 max_delay *= br->multicast_last_member_count; 1166 max_delay *= br->multicast_last_member_count;
1167 1167
1168 if (!hlist_unhashed(&mp->mglist) && 1168 if (mp->mglist &&
1169 (timer_pending(&mp->timer) ? 1169 (timer_pending(&mp->timer) ?
1170 time_after(mp->timer.expires, now + max_delay) : 1170 time_after(mp->timer.expires, now + max_delay) :
1171 try_to_del_timer_sync(&mp->timer) >= 0)) 1171 try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1177,7 +1177,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1177 if (timer_pending(&p->timer) ? 1177 if (timer_pending(&p->timer) ?
1178 time_after(p->timer.expires, now + max_delay) : 1178 time_after(p->timer.expires, now + max_delay) :
1179 try_to_del_timer_sync(&p->timer) >= 0) 1179 try_to_del_timer_sync(&p->timer) >= 0)
1180 mod_timer(&mp->timer, now + max_delay); 1180 mod_timer(&p->timer, now + max_delay);
1181 } 1181 }
1182 1182
1183out: 1183out:
@@ -1236,7 +1236,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1236 goto out; 1236 goto out;
1237 1237
1238 max_delay *= br->multicast_last_member_count; 1238 max_delay *= br->multicast_last_member_count;
1239 if (!hlist_unhashed(&mp->mglist) && 1239 if (mp->mglist &&
1240 (timer_pending(&mp->timer) ? 1240 (timer_pending(&mp->timer) ?
1241 time_after(mp->timer.expires, now + max_delay) : 1241 time_after(mp->timer.expires, now + max_delay) :
1242 try_to_del_timer_sync(&mp->timer) >= 0)) 1242 try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1248,7 +1248,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1248 if (timer_pending(&p->timer) ? 1248 if (timer_pending(&p->timer) ?
1249 time_after(p->timer.expires, now + max_delay) : 1249 time_after(p->timer.expires, now + max_delay) :
1250 try_to_del_timer_sync(&p->timer) >= 0) 1250 try_to_del_timer_sync(&p->timer) >= 0)
1251 mod_timer(&mp->timer, now + max_delay); 1251 mod_timer(&p->timer, now + max_delay);
1252 } 1252 }
1253 1253
1254out: 1254out:
@@ -1283,7 +1283,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
1283 br->multicast_last_member_interval; 1283 br->multicast_last_member_interval;
1284 1284
1285 if (!port) { 1285 if (!port) {
1286 if (!hlist_unhashed(&mp->mglist) && 1286 if (mp->mglist &&
1287 (timer_pending(&mp->timer) ? 1287 (timer_pending(&mp->timer) ?
1288 time_after(mp->timer.expires, time) : 1288 time_after(mp->timer.expires, time) :
1289 try_to_del_timer_sync(&mp->timer) >= 0)) { 1289 try_to_del_timer_sync(&mp->timer) >= 0)) {
@@ -1341,7 +1341,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
1341{ 1341{
1342 struct br_ip br_group; 1342 struct br_ip br_group;
1343 1343
1344 if (ipv6_is_local_multicast(group)) 1344 if (!ipv6_is_transient_multicast(group))
1345 return; 1345 return;
1346 1346
1347 ipv6_addr_copy(&br_group.u.ip6, group); 1347 ipv6_addr_copy(&br_group.u.ip6, group);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 84aac7734bfc..4e1b620b6be6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -84,13 +84,13 @@ struct net_bridge_port_group {
84struct net_bridge_mdb_entry 84struct net_bridge_mdb_entry
85{ 85{
86 struct hlist_node hlist[2]; 86 struct hlist_node hlist[2];
87 struct hlist_node mglist;
88 struct net_bridge *br; 87 struct net_bridge *br;
89 struct net_bridge_port_group __rcu *ports; 88 struct net_bridge_port_group __rcu *ports;
90 struct rcu_head rcu; 89 struct rcu_head rcu;
91 struct timer_list timer; 90 struct timer_list timer;
92 struct timer_list query_timer; 91 struct timer_list query_timer;
93 struct br_ip addr; 92 struct br_ip addr;
93 bool mglist;
94 u32 queries_sent; 94 u32 queries_sent;
95}; 95};
96 96
@@ -238,7 +238,6 @@ struct net_bridge
238 spinlock_t multicast_lock; 238 spinlock_t multicast_lock;
239 struct net_bridge_mdb_htable __rcu *mdb; 239 struct net_bridge_mdb_htable __rcu *mdb;
240 struct hlist_head router_list; 240 struct hlist_head router_list;
241 struct hlist_head mglist;
242 241
243 struct timer_list multicast_router_timer; 242 struct timer_list multicast_router_timer;
244 struct timer_list multicast_querier_timer; 243 struct timer_list multicast_querier_timer;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index dff633d62e5b..05f357828a2f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -252,8 +252,12 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
252{ 252{
253 struct kvec iov = {buf, len}; 253 struct kvec iov = {buf, len};
254 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 254 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
255 int r;
255 256
256 return kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); 257 r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
258 if (r == -EAGAIN)
259 r = 0;
260 return r;
257} 261}
258 262
259/* 263/*
@@ -264,13 +268,17 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
264 size_t kvlen, size_t len, int more) 268 size_t kvlen, size_t len, int more)
265{ 269{
266 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 270 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
271 int r;
267 272
268 if (more) 273 if (more)
269 msg.msg_flags |= MSG_MORE; 274 msg.msg_flags |= MSG_MORE;
270 else 275 else
271 msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ 276 msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
272 277
273 return kernel_sendmsg(sock, &msg, iov, kvlen, len); 278 r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
279 if (r == -EAGAIN)
280 r = 0;
281 return r;
274} 282}
275 283
276 284
@@ -328,7 +336,6 @@ static void reset_connection(struct ceph_connection *con)
328 ceph_msg_put(con->out_msg); 336 ceph_msg_put(con->out_msg);
329 con->out_msg = NULL; 337 con->out_msg = NULL;
330 } 338 }
331 con->out_keepalive_pending = false;
332 con->in_seq = 0; 339 con->in_seq = 0;
333 con->in_seq_acked = 0; 340 con->in_seq_acked = 0;
334} 341}
@@ -847,6 +854,8 @@ static int write_partial_msg_pages(struct ceph_connection *con)
847 (msg->pages || msg->pagelist || msg->bio || in_trail)) 854 (msg->pages || msg->pagelist || msg->bio || in_trail))
848 kunmap(page); 855 kunmap(page);
849 856
857 if (ret == -EAGAIN)
858 ret = 0;
850 if (ret <= 0) 859 if (ret <= 0)
851 goto out; 860 goto out;
852 861
@@ -1238,8 +1247,6 @@ static int process_connect(struct ceph_connection *con)
1238 con->auth_retry); 1247 con->auth_retry);
1239 if (con->auth_retry == 2) { 1248 if (con->auth_retry == 2) {
1240 con->error_msg = "connect authorization failure"; 1249 con->error_msg = "connect authorization failure";
1241 reset_connection(con);
1242 set_bit(CLOSED, &con->state);
1243 return -1; 1250 return -1;
1244 } 1251 }
1245 con->auth_retry = 1; 1252 con->auth_retry = 1;
@@ -1705,14 +1712,6 @@ more:
1705 1712
1706 /* open the socket first? */ 1713 /* open the socket first? */
1707 if (con->sock == NULL) { 1714 if (con->sock == NULL) {
1708 /*
1709 * if we were STANDBY and are reconnecting _this_
1710 * connection, bump connect_seq now. Always bump
1711 * global_seq.
1712 */
1713 if (test_and_clear_bit(STANDBY, &con->state))
1714 con->connect_seq++;
1715
1716 prepare_write_banner(msgr, con); 1715 prepare_write_banner(msgr, con);
1717 prepare_write_connect(msgr, con, 1); 1716 prepare_write_connect(msgr, con, 1);
1718 prepare_read_banner(con); 1717 prepare_read_banner(con);
@@ -1737,16 +1736,12 @@ more_kvec:
1737 if (con->out_skip) { 1736 if (con->out_skip) {
1738 ret = write_partial_skip(con); 1737 ret = write_partial_skip(con);
1739 if (ret <= 0) 1738 if (ret <= 0)
1740 goto done; 1739 goto out;
1741 if (ret < 0) {
1742 dout("try_write write_partial_skip err %d\n", ret);
1743 goto done;
1744 }
1745 } 1740 }
1746 if (con->out_kvec_left) { 1741 if (con->out_kvec_left) {
1747 ret = write_partial_kvec(con); 1742 ret = write_partial_kvec(con);
1748 if (ret <= 0) 1743 if (ret <= 0)
1749 goto done; 1744 goto out;
1750 } 1745 }
1751 1746
1752 /* msg pages? */ 1747 /* msg pages? */
@@ -1761,11 +1756,11 @@ more_kvec:
1761 if (ret == 1) 1756 if (ret == 1)
1762 goto more_kvec; /* we need to send the footer, too! */ 1757 goto more_kvec; /* we need to send the footer, too! */
1763 if (ret == 0) 1758 if (ret == 0)
1764 goto done; 1759 goto out;
1765 if (ret < 0) { 1760 if (ret < 0) {
1766 dout("try_write write_partial_msg_pages err %d\n", 1761 dout("try_write write_partial_msg_pages err %d\n",
1767 ret); 1762 ret);
1768 goto done; 1763 goto out;
1769 } 1764 }
1770 } 1765 }
1771 1766
@@ -1789,10 +1784,9 @@ do_next:
1789 /* Nothing to do! */ 1784 /* Nothing to do! */
1790 clear_bit(WRITE_PENDING, &con->state); 1785 clear_bit(WRITE_PENDING, &con->state);
1791 dout("try_write nothing else to write.\n"); 1786 dout("try_write nothing else to write.\n");
1792done:
1793 ret = 0; 1787 ret = 0;
1794out: 1788out:
1795 dout("try_write done on %p\n", con); 1789 dout("try_write done on %p ret %d\n", con, ret);
1796 return ret; 1790 return ret;
1797} 1791}
1798 1792
@@ -1821,19 +1815,17 @@ more:
1821 dout("try_read connecting\n"); 1815 dout("try_read connecting\n");
1822 ret = read_partial_banner(con); 1816 ret = read_partial_banner(con);
1823 if (ret <= 0) 1817 if (ret <= 0)
1824 goto done;
1825 if (process_banner(con) < 0) {
1826 ret = -1;
1827 goto out; 1818 goto out;
1828 } 1819 ret = process_banner(con);
1820 if (ret < 0)
1821 goto out;
1829 } 1822 }
1830 ret = read_partial_connect(con); 1823 ret = read_partial_connect(con);
1831 if (ret <= 0) 1824 if (ret <= 0)
1832 goto done;
1833 if (process_connect(con) < 0) {
1834 ret = -1;
1835 goto out; 1825 goto out;
1836 } 1826 ret = process_connect(con);
1827 if (ret < 0)
1828 goto out;
1837 goto more; 1829 goto more;
1838 } 1830 }
1839 1831
@@ -1848,7 +1840,7 @@ more:
1848 dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); 1840 dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
1849 ret = ceph_tcp_recvmsg(con->sock, buf, skip); 1841 ret = ceph_tcp_recvmsg(con->sock, buf, skip);
1850 if (ret <= 0) 1842 if (ret <= 0)
1851 goto done; 1843 goto out;
1852 con->in_base_pos += ret; 1844 con->in_base_pos += ret;
1853 if (con->in_base_pos) 1845 if (con->in_base_pos)
1854 goto more; 1846 goto more;
@@ -1859,7 +1851,7 @@ more:
1859 */ 1851 */
1860 ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1); 1852 ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
1861 if (ret <= 0) 1853 if (ret <= 0)
1862 goto done; 1854 goto out;
1863 dout("try_read got tag %d\n", (int)con->in_tag); 1855 dout("try_read got tag %d\n", (int)con->in_tag);
1864 switch (con->in_tag) { 1856 switch (con->in_tag) {
1865 case CEPH_MSGR_TAG_MSG: 1857 case CEPH_MSGR_TAG_MSG:
@@ -1870,7 +1862,7 @@ more:
1870 break; 1862 break;
1871 case CEPH_MSGR_TAG_CLOSE: 1863 case CEPH_MSGR_TAG_CLOSE:
1872 set_bit(CLOSED, &con->state); /* fixme */ 1864 set_bit(CLOSED, &con->state); /* fixme */
1873 goto done; 1865 goto out;
1874 default: 1866 default:
1875 goto bad_tag; 1867 goto bad_tag;
1876 } 1868 }
@@ -1882,13 +1874,12 @@ more:
1882 case -EBADMSG: 1874 case -EBADMSG:
1883 con->error_msg = "bad crc"; 1875 con->error_msg = "bad crc";
1884 ret = -EIO; 1876 ret = -EIO;
1885 goto out; 1877 break;
1886 case -EIO: 1878 case -EIO:
1887 con->error_msg = "io error"; 1879 con->error_msg = "io error";
1888 goto out; 1880 break;
1889 default:
1890 goto done;
1891 } 1881 }
1882 goto out;
1892 } 1883 }
1893 if (con->in_tag == CEPH_MSGR_TAG_READY) 1884 if (con->in_tag == CEPH_MSGR_TAG_READY)
1894 goto more; 1885 goto more;
@@ -1898,15 +1889,13 @@ more:
1898 if (con->in_tag == CEPH_MSGR_TAG_ACK) { 1889 if (con->in_tag == CEPH_MSGR_TAG_ACK) {
1899 ret = read_partial_ack(con); 1890 ret = read_partial_ack(con);
1900 if (ret <= 0) 1891 if (ret <= 0)
1901 goto done; 1892 goto out;
1902 process_ack(con); 1893 process_ack(con);
1903 goto more; 1894 goto more;
1904 } 1895 }
1905 1896
1906done:
1907 ret = 0;
1908out: 1897out:
1909 dout("try_read done on %p\n", con); 1898 dout("try_read done on %p ret %d\n", con, ret);
1910 return ret; 1899 return ret;
1911 1900
1912bad_tag: 1901bad_tag:
@@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work)
1951 work.work); 1940 work.work);
1952 1941
1953 mutex_lock(&con->mutex); 1942 mutex_lock(&con->mutex);
1943 if (test_and_clear_bit(BACKOFF, &con->state)) {
1944 dout("con_work %p backing off\n", con);
1945 if (queue_delayed_work(ceph_msgr_wq, &con->work,
1946 round_jiffies_relative(con->delay))) {
1947 dout("con_work %p backoff %lu\n", con, con->delay);
1948 mutex_unlock(&con->mutex);
1949 return;
1950 } else {
1951 con->ops->put(con);
1952 dout("con_work %p FAILED to back off %lu\n", con,
1953 con->delay);
1954 }
1955 }
1954 1956
1957 if (test_bit(STANDBY, &con->state)) {
1958 dout("con_work %p STANDBY\n", con);
1959 goto done;
1960 }
1955 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ 1961 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
1956 dout("con_work CLOSED\n"); 1962 dout("con_work CLOSED\n");
1957 con_close_socket(con); 1963 con_close_socket(con);
@@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con)
2008 /* Requeue anything that hasn't been acked */ 2014 /* Requeue anything that hasn't been acked */
2009 list_splice_init(&con->out_sent, &con->out_queue); 2015 list_splice_init(&con->out_sent, &con->out_queue);
2010 2016
2011 /* If there are no messages in the queue, place the connection 2017 /* If there are no messages queued or keepalive pending, place
2012 * in a STANDBY state (i.e., don't try to reconnect just yet). */ 2018 * the connection in a STANDBY state */
2013 if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { 2019 if (list_empty(&con->out_queue) &&
2014 dout("fault setting STANDBY\n"); 2020 !test_bit(KEEPALIVE_PENDING, &con->state)) {
2021 dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
2022 clear_bit(WRITE_PENDING, &con->state);
2015 set_bit(STANDBY, &con->state); 2023 set_bit(STANDBY, &con->state);
2016 } else { 2024 } else {
2017 /* retry after a delay. */ 2025 /* retry after a delay. */
@@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con)
2019 con->delay = BASE_DELAY_INTERVAL; 2027 con->delay = BASE_DELAY_INTERVAL;
2020 else if (con->delay < MAX_DELAY_INTERVAL) 2028 else if (con->delay < MAX_DELAY_INTERVAL)
2021 con->delay *= 2; 2029 con->delay *= 2;
2022 dout("fault queueing %p delay %lu\n", con, con->delay);
2023 con->ops->get(con); 2030 con->ops->get(con);
2024 if (queue_delayed_work(ceph_msgr_wq, &con->work, 2031 if (queue_delayed_work(ceph_msgr_wq, &con->work,
2025 round_jiffies_relative(con->delay)) == 0) 2032 round_jiffies_relative(con->delay))) {
2033 dout("fault queued %p delay %lu\n", con, con->delay);
2034 } else {
2026 con->ops->put(con); 2035 con->ops->put(con);
2036 dout("fault failed to queue %p delay %lu, backoff\n",
2037 con, con->delay);
2038 /*
2039 * In many cases we see a socket state change
2040 * while con_work is running and end up
2041 * queuing (non-delayed) work, such that we
2042 * can't backoff with a delay. Set a flag so
2043 * that when con_work restarts we schedule the
2044 * delay then.
2045 */
2046 set_bit(BACKOFF, &con->state);
2047 }
2027 } 2048 }
2028 2049
2029out_unlock: 2050out_unlock:
@@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr)
2094} 2115}
2095EXPORT_SYMBOL(ceph_messenger_destroy); 2116EXPORT_SYMBOL(ceph_messenger_destroy);
2096 2117
2118static void clear_standby(struct ceph_connection *con)
2119{
2120 /* come back from STANDBY? */
2121 if (test_and_clear_bit(STANDBY, &con->state)) {
2122 mutex_lock(&con->mutex);
2123 dout("clear_standby %p and ++connect_seq\n", con);
2124 con->connect_seq++;
2125 WARN_ON(test_bit(WRITE_PENDING, &con->state));
2126 WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
2127 mutex_unlock(&con->mutex);
2128 }
2129}
2130
2097/* 2131/*
2098 * Queue up an outgoing message on the given connection. 2132 * Queue up an outgoing message on the given connection.
2099 */ 2133 */
@@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
2126 2160
2127 /* if there wasn't anything waiting to send before, queue 2161 /* if there wasn't anything waiting to send before, queue
2128 * new work */ 2162 * new work */
2163 clear_standby(con);
2129 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2164 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
2130 queue_con(con); 2165 queue_con(con);
2131} 2166}
@@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
2191 */ 2226 */
2192void ceph_con_keepalive(struct ceph_connection *con) 2227void ceph_con_keepalive(struct ceph_connection *con)
2193{ 2228{
2229 dout("con_keepalive %p\n", con);
2230 clear_standby(con);
2194 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && 2231 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
2195 test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2232 test_and_set_bit(WRITE_PENDING, &con->state) == 0)
2196 queue_con(con); 2233 queue_con(con);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 1a040e64c69f..cd9c21df87d1 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -16,22 +16,30 @@ struct page **ceph_get_direct_page_vector(const char __user *data,
16 int num_pages, bool write_page) 16 int num_pages, bool write_page)
17{ 17{
18 struct page **pages; 18 struct page **pages;
19 int rc; 19 int got = 0;
20 int rc = 0;
20 21
21 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); 22 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
22 if (!pages) 23 if (!pages)
23 return ERR_PTR(-ENOMEM); 24 return ERR_PTR(-ENOMEM);
24 25
25 down_read(&current->mm->mmap_sem); 26 down_read(&current->mm->mmap_sem);
26 rc = get_user_pages(current, current->mm, (unsigned long)data, 27 while (got < num_pages) {
27 num_pages, write_page, 0, pages, NULL); 28 rc = get_user_pages(current, current->mm,
29 (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
30 num_pages - got, write_page, 0, pages + got, NULL);
31 if (rc < 0)
32 break;
33 BUG_ON(rc == 0);
34 got += rc;
35 }
28 up_read(&current->mm->mmap_sem); 36 up_read(&current->mm->mmap_sem);
29 if (rc < num_pages) 37 if (rc < 0)
30 goto fail; 38 goto fail;
31 return pages; 39 return pages;
32 40
33fail: 41fail:
34 ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); 42 ceph_put_page_vector(pages, got, false);
35 return ERR_PTR(rc); 43 return ERR_PTR(rc);
36} 44}
37EXPORT_SYMBOL(ceph_get_direct_page_vector); 45EXPORT_SYMBOL(ceph_get_direct_page_vector);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8e726cb47ed7..6561021d22d1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1114,13 +1114,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
1114void dev_load(struct net *net, const char *name) 1114void dev_load(struct net *net, const char *name)
1115{ 1115{
1116 struct net_device *dev; 1116 struct net_device *dev;
1117 int no_module;
1117 1118
1118 rcu_read_lock(); 1119 rcu_read_lock();
1119 dev = dev_get_by_name_rcu(net, name); 1120 dev = dev_get_by_name_rcu(net, name);
1120 rcu_read_unlock(); 1121 rcu_read_unlock();
1121 1122
1122 if (!dev && capable(CAP_NET_ADMIN)) 1123 no_module = !dev;
1123 request_module("%s", name); 1124 if (no_module && capable(CAP_NET_ADMIN))
1125 no_module = request_module("netdev-%s", name);
1126 if (no_module && capable(CAP_SYS_MODULE)) {
1127 if (!request_module("%s", name))
1128 pr_err("Loading kernel module for a network device "
1129"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
1130"instead\n", name);
1131 }
1124} 1132}
1125EXPORT_SYMBOL(dev_load); 1133EXPORT_SYMBOL(dev_load);
1126 1134
@@ -1280,10 +1288,13 @@ static int __dev_close_many(struct list_head *head)
1280 1288
1281static int __dev_close(struct net_device *dev) 1289static int __dev_close(struct net_device *dev)
1282{ 1290{
1291 int retval;
1283 LIST_HEAD(single); 1292 LIST_HEAD(single);
1284 1293
1285 list_add(&dev->unreg_list, &single); 1294 list_add(&dev->unreg_list, &single);
1286 return __dev_close_many(&single); 1295 retval = __dev_close_many(&single);
1296 list_del(&single);
1297 return retval;
1287} 1298}
1288 1299
1289int dev_close_many(struct list_head *head) 1300int dev_close_many(struct list_head *head)
@@ -1325,7 +1336,7 @@ int dev_close(struct net_device *dev)
1325 1336
1326 list_add(&dev->unreg_list, &single); 1337 list_add(&dev->unreg_list, &single);
1327 dev_close_many(&single); 1338 dev_close_many(&single);
1328 1339 list_del(&single);
1329 return 0; 1340 return 0;
1330} 1341}
1331EXPORT_SYMBOL(dev_close); 1342EXPORT_SYMBOL(dev_close);
@@ -5063,6 +5074,7 @@ static void rollback_registered(struct net_device *dev)
5063 5074
5064 list_add(&dev->unreg_list, &single); 5075 list_add(&dev->unreg_list, &single);
5065 rollback_registered_many(&single); 5076 rollback_registered_many(&single);
5077 list_del(&single);
5066} 5078}
5067 5079
5068unsigned long netdev_fix_features(unsigned long features, const char *name) 5080unsigned long netdev_fix_features(unsigned long features, const char *name)
@@ -6216,6 +6228,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
6216 } 6228 }
6217 } 6229 }
6218 unregister_netdevice_many(&dev_kill_list); 6230 unregister_netdevice_many(&dev_kill_list);
6231 list_del(&dev_kill_list);
6219 rtnl_unlock(); 6232 rtnl_unlock();
6220} 6233}
6221 6234
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 508f9c18992f..133fd22ea287 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -144,7 +144,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
144 144
145 list_for_each_entry(ha, &from_list->list, list) { 145 list_for_each_entry(ha, &from_list->list, list) {
146 type = addr_type ? addr_type : ha->type; 146 type = addr_type ? addr_type : ha->type;
147 __hw_addr_del(to_list, ha->addr, addr_len, addr_type); 147 __hw_addr_del(to_list, ha->addr, addr_len, type);
148 } 148 }
149} 149}
150EXPORT_SYMBOL(__hw_addr_del_multiple); 150EXPORT_SYMBOL(__hw_addr_del_multiple);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4c461f..b5bada92f637 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3321,7 +3321,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
3321 pkt_dev->started_at); 3321 pkt_dev->started_at);
3322 ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); 3322 ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
3323 3323
3324 p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", 3324 p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
3325 (unsigned long long)ktime_to_us(elapsed), 3325 (unsigned long long)ktime_to_us(elapsed),
3326 (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), 3326 (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
3327 (unsigned long long)ktime_to_us(idle), 3327 (unsigned long long)ktime_to_us(idle),
diff --git a/net/core/scm.c b/net/core/scm.c
index bbe454450801..4c1ef026d695 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
95 int fd = fdp[i]; 95 int fd = fdp[i];
96 struct file *file; 96 struct file *file;
97 97
98 if (fd < 0 || !(file = fget(fd))) 98 if (fd < 0 || !(file = fget_raw(fd)))
99 return -EBADF; 99 return -EBADF;
100 *fpp++ = file; 100 *fpp++ = file;
101 fpl->count++; 101 fpl->count++;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 6b03f561caec..c44348adba3b 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -626,6 +626,9 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb,
626 dcb->cmd = DCB_CMD_GAPP; 626 dcb->cmd = DCB_CMD_GAPP;
627 627
628 app_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_APP); 628 app_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_APP);
629 if (!app_nest)
630 goto out_cancel;
631
629 ret = nla_put_u8(dcbnl_skb, DCB_APP_ATTR_IDTYPE, idtype); 632 ret = nla_put_u8(dcbnl_skb, DCB_APP_ATTR_IDTYPE, idtype);
630 if (ret) 633 if (ret)
631 goto out_cancel; 634 goto out_cancel;
@@ -1190,7 +1193,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
1190 goto err; 1193 goto err;
1191 } 1194 }
1192 1195
1193 if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) { 1196 if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
1194 struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); 1197 struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
1195 err = ops->ieee_setpfc(netdev, pfc); 1198 err = ops->ieee_setpfc(netdev, pfc);
1196 if (err) 1199 if (err)
@@ -1613,6 +1616,10 @@ EXPORT_SYMBOL(dcb_getapp);
1613u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) 1616u8 dcb_setapp(struct net_device *dev, struct dcb_app *new)
1614{ 1617{
1615 struct dcb_app_type *itr; 1618 struct dcb_app_type *itr;
1619 struct dcb_app_type event;
1620
1621 memcpy(&event.name, dev->name, sizeof(event.name));
1622 memcpy(&event.app, new, sizeof(event.app));
1616 1623
1617 spin_lock(&dcb_lock); 1624 spin_lock(&dcb_lock);
1618 /* Search for existing match and replace */ 1625 /* Search for existing match and replace */
@@ -1644,7 +1651,7 @@ u8 dcb_setapp(struct net_device *dev, struct dcb_app *new)
1644 } 1651 }
1645out: 1652out:
1646 spin_unlock(&dcb_lock); 1653 spin_unlock(&dcb_lock);
1647 call_dcbevent_notifiers(DCB_APP_EVENT, new); 1654 call_dcbevent_notifiers(DCB_APP_EVENT, &event);
1648 return 0; 1655 return 0;
1649} 1656}
1650EXPORT_SYMBOL(dcb_setapp); 1657EXPORT_SYMBOL(dcb_setapp);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 8cde009e8b85..4222e7a654b0 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -614,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
614 /* Caller (dccp_v4_do_rcv) will send Reset */ 614 /* Caller (dccp_v4_do_rcv) will send Reset */
615 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; 615 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
616 return 1; 616 return 1;
617 } else if (sk->sk_state == DCCP_CLOSED) {
618 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
619 return 1;
617 } 620 }
618 621
619 if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { 622 if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
@@ -668,10 +671,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
668 } 671 }
669 672
670 switch (sk->sk_state) { 673 switch (sk->sk_state) {
671 case DCCP_CLOSED:
672 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
673 return 1;
674
675 case DCCP_REQUESTING: 674 case DCCP_REQUESTING:
676 queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); 675 queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
677 if (queued >= 0) 676 if (queued >= 0)
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 739435a6af39..cfa7a5e1c5c9 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -67,8 +67,9 @@ dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
67 size_t result_len = 0; 67 size_t result_len = 0;
68 const char *data = _data, *end, *opt; 68 const char *data = _data, *end, *opt;
69 69
70 kenter("%%%d,%s,'%s',%zu", 70 kenter("%%%d,%s,'%*.*s',%zu",
71 key->serial, key->description, data, datalen); 71 key->serial, key->description,
72 (int)datalen, (int)datalen, data, datalen);
72 73
73 if (datalen <= 1 || !data || data[datalen - 1] != '\0') 74 if (datalen <= 1 || !data || data[datalen - 1] != '\0')
74 return -EINVAL; 75 return -EINVAL;
@@ -217,6 +218,19 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
217 seq_printf(m, ": %u", key->datalen); 218 seq_printf(m, ": %u", key->datalen);
218} 219}
219 220
221/*
222 * read the DNS data
223 * - the key's semaphore is read-locked
224 */
225static long dns_resolver_read(const struct key *key,
226 char __user *buffer, size_t buflen)
227{
228 if (key->type_data.x[0])
229 return key->type_data.x[0];
230
231 return user_read(key, buffer, buflen);
232}
233
220struct key_type key_type_dns_resolver = { 234struct key_type key_type_dns_resolver = {
221 .name = "dns_resolver", 235 .name = "dns_resolver",
222 .instantiate = dns_resolver_instantiate, 236 .instantiate = dns_resolver_instantiate,
@@ -224,7 +238,7 @@ struct key_type key_type_dns_resolver = {
224 .revoke = user_revoke, 238 .revoke = user_revoke,
225 .destroy = user_destroy, 239 .destroy = user_destroy,
226 .describe = dns_resolver_describe, 240 .describe = dns_resolver_describe,
227 .read = user_read, 241 .read = dns_resolver_read,
228}; 242};
229 243
230static int __init init_dns_resolver(void) 244static int __init init_dns_resolver(void)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 748cb5b337bd..036652c8166d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -670,7 +670,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
670 ifap = &ifa->ifa_next) { 670 ifap = &ifa->ifa_next) {
671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 sin_orig.sin_addr.s_addr == 672 sin_orig.sin_addr.s_addr ==
673 ifa->ifa_address) { 673 ifa->ifa_local) {
674 break; /* found */ 674 break; /* found */
675 } 675 }
676 } 676 }
@@ -1030,6 +1030,21 @@ static inline bool inetdev_valid_mtu(unsigned mtu)
1030 return mtu >= 68; 1030 return mtu >= 68;
1031} 1031}
1032 1032
1033static void inetdev_send_gratuitous_arp(struct net_device *dev,
1034 struct in_device *in_dev)
1035
1036{
1037 struct in_ifaddr *ifa = in_dev->ifa_list;
1038
1039 if (!ifa)
1040 return;
1041
1042 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1043 ifa->ifa_local, dev,
1044 ifa->ifa_local, NULL,
1045 dev->dev_addr, NULL);
1046}
1047
1033/* Called only under RTNL semaphore */ 1048/* Called only under RTNL semaphore */
1034 1049
1035static int inetdev_event(struct notifier_block *this, unsigned long event, 1050static int inetdev_event(struct notifier_block *this, unsigned long event,
@@ -1082,18 +1097,13 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1082 } 1097 }
1083 ip_mc_up(in_dev); 1098 ip_mc_up(in_dev);
1084 /* fall through */ 1099 /* fall through */
1085 case NETDEV_NOTIFY_PEERS:
1086 case NETDEV_CHANGEADDR: 1100 case NETDEV_CHANGEADDR:
1101 if (!IN_DEV_ARP_NOTIFY(in_dev))
1102 break;
1103 /* fall through */
1104 case NETDEV_NOTIFY_PEERS:
1087 /* Send gratuitous ARP to notify of link change */ 1105 /* Send gratuitous ARP to notify of link change */
1088 if (IN_DEV_ARP_NOTIFY(in_dev)) { 1106 inetdev_send_gratuitous_arp(dev, in_dev);
1089 struct in_ifaddr *ifa = in_dev->ifa_list;
1090
1091 if (ifa)
1092 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1093 ifa->ifa_address, dev,
1094 ifa->ifa_address, NULL,
1095 dev->dev_addr, NULL);
1096 }
1097 break; 1107 break;
1098 case NETDEV_DOWN: 1108 case NETDEV_DOWN:
1099 ip_mc_down(in_dev); 1109 ip_mc_down(in_dev);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c5af909cf701..3c8dfa16614d 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -505,7 +505,9 @@ restart:
505 } 505 }
506 506
507 rcu_read_unlock(); 507 rcu_read_unlock();
508 local_bh_disable();
508 inet_twsk_deschedule(tw, twdr); 509 inet_twsk_deschedule(tw, twdr);
510 local_bh_enable();
509 inet_twsk_put(tw); 511 inet_twsk_put(tw);
510 goto restart_rcu; 512 goto restart_rcu;
511 } 513 }
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index eb68a0e34e49..d1d0e2c256fc 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -775,6 +775,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
775 .fl4_dst = dst, 775 .fl4_dst = dst,
776 .fl4_src = tiph->saddr, 776 .fl4_src = tiph->saddr,
777 .fl4_tos = RT_TOS(tos), 777 .fl4_tos = RT_TOS(tos),
778 .proto = IPPROTO_GRE,
778 .fl_gre_key = tunnel->parms.o_key 779 .fl_gre_key = tunnel->parms.o_key
779 }; 780 };
780 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 781 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
@@ -1764,4 +1765,4 @@ module_exit(ipgre_fini);
1764MODULE_LICENSE("GPL"); 1765MODULE_LICENSE("GPL");
1765MODULE_ALIAS_RTNL_LINK("gre"); 1766MODULE_ALIAS_RTNL_LINK("gre");
1766MODULE_ALIAS_RTNL_LINK("gretap"); 1767MODULE_ALIAS_RTNL_LINK("gretap");
1767MODULE_ALIAS("gre0"); 1768MODULE_ALIAS_NETDEV("gre0");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 988f52fba54a..a5f58e7cbb26 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -913,4 +913,4 @@ static void __exit ipip_fini(void)
913module_init(ipip_init); 913module_init(ipip_init);
914module_exit(ipip_fini); 914module_exit(ipip_fini);
915MODULE_LICENSE("GPL"); 915MODULE_LICENSE("GPL");
916MODULE_ALIAS("tunl0"); 916MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 788a3e74834e..6ed6603c2f6d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2722,6 +2722,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2722 .destroy = ipv4_dst_destroy, 2722 .destroy = ipv4_dst_destroy,
2723 .check = ipv4_blackhole_dst_check, 2723 .check = ipv4_blackhole_dst_check,
2724 .default_mtu = ipv4_blackhole_default_mtu, 2724 .default_mtu = ipv4_blackhole_default_mtu,
2725 .default_advmss = ipv4_default_advmss,
2725 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2726 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2726}; 2727};
2727 2728
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb7f82ebf4a3..65f6c0406245 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1222,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
1222 } 1222 }
1223 1223
1224 /* D-SACK for already forgotten data... Do dumb counting. */ 1224 /* D-SACK for already forgotten data... Do dumb counting. */
1225 if (dup_sack && 1225 if (dup_sack && tp->undo_marker && tp->undo_retrans &&
1226 !after(end_seq_0, prior_snd_una) && 1226 !after(end_seq_0, prior_snd_una) &&
1227 after(end_seq_0, tp->undo_marker)) 1227 after(end_seq_0, tp->undo_marker))
1228 tp->undo_retrans--; 1228 tp->undo_retrans--;
@@ -1299,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1299 1299
1300 /* Account D-SACK for retransmitted packet. */ 1300 /* Account D-SACK for retransmitted packet. */
1301 if (dup_sack && (sacked & TCPCB_RETRANS)) { 1301 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1302 if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) 1302 if (tp->undo_marker && tp->undo_retrans &&
1303 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1303 tp->undo_retrans--; 1304 tp->undo_retrans--;
1304 if (sacked & TCPCB_SACKED_ACKED) 1305 if (sacked & TCPCB_SACKED_ACKED)
1305 state->reord = min(fack_count, state->reord); 1306 state->reord = min(fack_count, state->reord);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 406f320336e6..dfa5beb0c1c8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2162,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2162 if (!tp->retrans_stamp) 2162 if (!tp->retrans_stamp)
2163 tp->retrans_stamp = TCP_SKB_CB(skb)->when; 2163 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2164 2164
2165 tp->undo_retrans++; 2165 tp->undo_retrans += tcp_skb_pcount(skb);
2166 2166
2167 /* snd_nxt is stored to detect loss of retransmitted segment, 2167 /* snd_nxt is stored to detect loss of retransmitted segment,
2168 * see tcp_input.c tcp_sacktag_write_queue(). 2168 * see tcp_input.c tcp_sacktag_write_queue().
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4f4483e697bd..e528a42a52be 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
57MODULE_AUTHOR("Ville Nuorvala"); 57MODULE_AUTHOR("Ville Nuorvala");
58MODULE_DESCRIPTION("IPv6 tunneling device"); 58MODULE_DESCRIPTION("IPv6 tunneling device");
59MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
60MODULE_ALIAS_NETDEV("ip6tnl0");
60 61
61#ifdef IP6_TNL_DEBUG 62#ifdef IP6_TNL_DEBUG
62#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) 63#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 09c88891a753..de338037a736 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -410,7 +410,7 @@ fallback:
410 if (p != NULL) { 410 if (p != NULL) {
411 sb_add(m, "%02x", *p++); 411 sb_add(m, "%02x", *p++);
412 for (i = 1; i < len; i++) 412 for (i = 1; i < len; i++)
413 sb_add(m, ":%02x", p[i]); 413 sb_add(m, ":%02x", *p++);
414 } 414 }
415 sb_add(m, " "); 415 sb_add(m, " ");
416 416
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1c29f95695de..e7db7014e89f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -128,6 +128,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
128 .destroy = ip6_dst_destroy, 128 .destroy = ip6_dst_destroy,
129 .check = ip6_dst_check, 129 .check = ip6_dst_check,
130 .default_mtu = ip6_blackhole_default_mtu, 130 .default_mtu = ip6_blackhole_default_mtu,
131 .default_advmss = ip6_default_advmss,
131 .update_pmtu = ip6_rt_blackhole_update_pmtu, 132 .update_pmtu = ip6_rt_blackhole_update_pmtu,
132}; 133};
133 134
@@ -738,8 +739,10 @@ restart:
738 739
739 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
740 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
741 else 742 else if (!(rt->dst.flags & DST_HOST))
742 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
744 else
745 goto out2;
743 746
744 dst_release(&rt->dst); 747 dst_release(&rt->dst);
745 rt = nrt ? : net->ipv6.ip6_null_entry; 748 rt = nrt ? : net->ipv6.ip6_null_entry;
@@ -2556,14 +2559,16 @@ static
2556int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2559int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2557 void __user *buffer, size_t *lenp, loff_t *ppos) 2560 void __user *buffer, size_t *lenp, loff_t *ppos)
2558{ 2561{
2559 struct net *net = current->nsproxy->net_ns; 2562 struct net *net;
2560 int delay = net->ipv6.sysctl.flush_delay; 2563 int delay;
2561 if (write) { 2564 if (!write)
2562 proc_dointvec(ctl, write, buffer, lenp, ppos);
2563 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2564 return 0;
2565 } else
2566 return -EINVAL; 2565 return -EINVAL;
2566
2567 net = (struct net *)ctl->extra1;
2568 delay = net->ipv6.sysctl.flush_delay;
2569 proc_dointvec(ctl, write, buffer, lenp, ppos);
2570 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2571 return 0;
2567} 2572}
2568 2573
2569ctl_table ipv6_route_table_template[] = { 2574ctl_table ipv6_route_table_template[] = {
@@ -2650,6 +2655,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2650 2655
2651 if (table) { 2656 if (table) {
2652 table[0].data = &net->ipv6.sysctl.flush_delay; 2657 table[0].data = &net->ipv6.sysctl.flush_delay;
2658 table[0].extra1 = net;
2653 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2659 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2654 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2660 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2655 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2661 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8ce38f10a547..d2c16e10f650 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1290,4 +1290,4 @@ static int __init sit_init(void)
1290module_init(sit_init); 1290module_init(sit_init);
1291module_exit(sit_cleanup); 1291module_exit(sit_cleanup);
1292MODULE_LICENSE("GPL"); 1292MODULE_LICENSE("GPL");
1293MODULE_ALIAS("sit0"); 1293MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8acba456744e..7a10a8d1b2d0 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1229,6 +1229,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
1229 } 1229 }
1230 mutex_unlock(&local->iflist_mtx); 1230 mutex_unlock(&local->iflist_mtx);
1231 unregister_netdevice_many(&unreg_list); 1231 unregister_netdevice_many(&unreg_list);
1232 list_del(&unreg_list);
1232} 1233}
1233 1234
1234static u32 ieee80211_idle_off(struct ieee80211_local *local, 1235static u32 ieee80211_idle_off(struct ieee80211_local *local,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 45fbb9e33746..c9ceb4d57ab0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1033,6 +1033,12 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1033 if (is_multicast_ether_addr(hdr->addr1)) 1033 if (is_multicast_ether_addr(hdr->addr1))
1034 return; 1034 return;
1035 1035
1036 /*
1037 * In case we receive frames after disassociation.
1038 */
1039 if (!sdata->u.mgd.associated)
1040 return;
1041
1036 ieee80211_sta_reset_conn_monitor(sdata); 1042 ieee80211_sta_reset_conn_monitor(sdata);
1037} 1043}
1038 1044
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index cf68700abffa..d036597aabbe 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1210,7 +1210,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1210 switch (sdata->vif.type) { 1210 switch (sdata->vif.type) {
1211 case NL80211_IFTYPE_STATION: 1211 case NL80211_IFTYPE_STATION:
1212 changed |= BSS_CHANGED_ASSOC; 1212 changed |= BSS_CHANGED_ASSOC;
1213 mutex_lock(&sdata->u.mgd.mtx);
1213 ieee80211_bss_info_change_notify(sdata, changed); 1214 ieee80211_bss_info_change_notify(sdata, changed);
1215 mutex_unlock(&sdata->u.mgd.mtx);
1214 break; 1216 break;
1215 case NL80211_IFTYPE_ADHOC: 1217 case NL80211_IFTYPE_ADHOC:
1216 changed |= BSS_CHANGED_IBSS; 1218 changed |= BSS_CHANGED_IBSS;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 32fcbe290c04..4aa614b8a96a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -133,6 +133,7 @@ unsigned int nf_iterate(struct list_head *head,
133 133
134 /* Optimization: we don't need to hold module 134 /* Optimization: we don't need to hold module
135 reference here, since function can't sleep. --RR */ 135 reference here, since function can't sleep. --RR */
136repeat:
136 verdict = elem->hook(hook, skb, indev, outdev, okfn); 137 verdict = elem->hook(hook, skb, indev, outdev, okfn);
137 if (verdict != NF_ACCEPT) { 138 if (verdict != NF_ACCEPT) {
138#ifdef CONFIG_NETFILTER_DEBUG 139#ifdef CONFIG_NETFILTER_DEBUG
@@ -145,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head,
145#endif 146#endif
146 if (verdict != NF_REPEAT) 147 if (verdict != NF_REPEAT)
147 return verdict; 148 return verdict;
148 *i = (*i)->prev; 149 goto repeat;
149 } 150 }
150 } 151 }
151 return NF_ACCEPT; 152 return NF_ACCEPT;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 22f7ad5101ab..ba98e1308f3c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -808,9 +808,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
808 dest->u_threshold = udest->u_threshold; 808 dest->u_threshold = udest->u_threshold;
809 dest->l_threshold = udest->l_threshold; 809 dest->l_threshold = udest->l_threshold;
810 810
811 spin_lock(&dest->dst_lock); 811 spin_lock_bh(&dest->dst_lock);
812 ip_vs_dst_reset(dest); 812 ip_vs_dst_reset(dest);
813 spin_unlock(&dest->dst_lock); 813 spin_unlock_bh(&dest->dst_lock);
814 814
815 if (add) 815 if (add)
816 ip_vs_new_estimator(&dest->stats); 816 ip_vs_new_estimator(&dest->stats);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index b07393eab88e..91816998ed86 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -85,6 +85,8 @@ EXPORT_SYMBOL(nf_log_unregister);
85 85
86int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) 86int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
87{ 87{
88 if (pf >= ARRAY_SIZE(nf_loggers))
89 return -EINVAL;
88 mutex_lock(&nf_log_mutex); 90 mutex_lock(&nf_log_mutex);
89 if (__find_logger(pf, logger->name) == NULL) { 91 if (__find_logger(pf, logger->name) == NULL) {
90 mutex_unlock(&nf_log_mutex); 92 mutex_unlock(&nf_log_mutex);
@@ -98,6 +100,8 @@ EXPORT_SYMBOL(nf_log_bind_pf);
98 100
99void nf_log_unbind_pf(u_int8_t pf) 101void nf_log_unbind_pf(u_int8_t pf)
100{ 102{
103 if (pf >= ARRAY_SIZE(nf_loggers))
104 return;
101 mutex_lock(&nf_log_mutex); 105 mutex_lock(&nf_log_mutex);
102 rcu_assign_pointer(nf_loggers[pf], NULL); 106 rcu_assign_pointer(nf_loggers[pf], NULL);
103 mutex_unlock(&nf_log_mutex); 107 mutex_unlock(&nf_log_mutex);
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index 4d87befb04c0..474d621cbc2e 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -28,26 +28,23 @@ nf_tproxy_destructor(struct sk_buff *skb)
28 skb->destructor = NULL; 28 skb->destructor = NULL;
29 29
30 if (sk) 30 if (sk)
31 nf_tproxy_put_sock(sk); 31 sock_put(sk);
32} 32}
33 33
34/* consumes sk */ 34/* consumes sk */
35int 35void
36nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) 36nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
37{ 37{
38 bool transparent = (sk->sk_state == TCP_TIME_WAIT) ? 38 /* assigning tw sockets complicates things; most
39 inet_twsk(sk)->tw_transparent : 39 * skb->sk->X checks would have to test sk->sk_state first */
40 inet_sk(sk)->transparent; 40 if (sk->sk_state == TCP_TIME_WAIT) {
41 41 inet_twsk_put(inet_twsk(sk));
42 if (transparent) { 42 return;
43 skb_orphan(skb); 43 }
44 skb->sk = sk; 44
45 skb->destructor = nf_tproxy_destructor; 45 skb_orphan(skb);
46 return 1; 46 skb->sk = sk;
47 } else 47 skb->destructor = nf_tproxy_destructor;
48 nf_tproxy_put_sock(sk);
49
50 return 0;
51} 48}
52EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); 49EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
53 50
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 640678f47a2a..dcfd57eb9d02 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -33,6 +33,20 @@
33#include <net/netfilter/nf_tproxy_core.h> 33#include <net/netfilter/nf_tproxy_core.h>
34#include <linux/netfilter/xt_TPROXY.h> 34#include <linux/netfilter/xt_TPROXY.h>
35 35
36static bool tproxy_sk_is_transparent(struct sock *sk)
37{
38 if (sk->sk_state != TCP_TIME_WAIT) {
39 if (inet_sk(sk)->transparent)
40 return true;
41 sock_put(sk);
42 } else {
43 if (inet_twsk(sk)->tw_transparent)
44 return true;
45 inet_twsk_put(inet_twsk(sk));
46 }
47 return false;
48}
49
36static inline __be32 50static inline __be32
37tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) 51tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
38{ 52{
@@ -141,7 +155,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
141 skb->dev, NFT_LOOKUP_LISTENER); 155 skb->dev, NFT_LOOKUP_LISTENER);
142 156
143 /* NOTE: assign_sock consumes our sk reference */ 157 /* NOTE: assign_sock consumes our sk reference */
144 if (sk && nf_tproxy_assign_sock(skb, sk)) { 158 if (sk && tproxy_sk_is_transparent(sk)) {
145 /* This should be in a separate target, but we don't do multiple 159 /* This should be in a separate target, but we don't do multiple
146 targets on the same rule yet */ 160 targets on the same rule yet */
147 skb->mark = (skb->mark & ~mark_mask) ^ mark_value; 161 skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
@@ -149,6 +163,8 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
149 pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n", 163 pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
150 iph->protocol, &iph->daddr, ntohs(hp->dest), 164 iph->protocol, &iph->daddr, ntohs(hp->dest),
151 &laddr, ntohs(lport), skb->mark); 165 &laddr, ntohs(lport), skb->mark);
166
167 nf_tproxy_assign_sock(skb, sk);
152 return NF_ACCEPT; 168 return NF_ACCEPT;
153 } 169 }
154 170
@@ -306,7 +322,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
306 par->in, NFT_LOOKUP_LISTENER); 322 par->in, NFT_LOOKUP_LISTENER);
307 323
308 /* NOTE: assign_sock consumes our sk reference */ 324 /* NOTE: assign_sock consumes our sk reference */
309 if (sk && nf_tproxy_assign_sock(skb, sk)) { 325 if (sk && tproxy_sk_is_transparent(sk)) {
310 /* This should be in a separate target, but we don't do multiple 326 /* This should be in a separate target, but we don't do multiple
311 targets on the same rule yet */ 327 targets on the same rule yet */
312 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; 328 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
@@ -314,6 +330,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
314 pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n", 330 pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
315 tproto, &iph->saddr, ntohs(hp->source), 331 tproto, &iph->saddr, ntohs(hp->source),
316 laddr, ntohs(lport), skb->mark); 332 laddr, ntohs(lport), skb->mark);
333
334 nf_tproxy_assign_sock(skb, sk);
317 return NF_ACCEPT; 335 return NF_ACCEPT;
318 } 336 }
319 337
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 00d6ae838303..9cc46356b577 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -35,6 +35,15 @@
35#include <net/netfilter/nf_conntrack.h> 35#include <net/netfilter/nf_conntrack.h>
36#endif 36#endif
37 37
38static void
39xt_socket_put_sk(struct sock *sk)
40{
41 if (sk->sk_state == TCP_TIME_WAIT)
42 inet_twsk_put(inet_twsk(sk));
43 else
44 sock_put(sk);
45}
46
38static int 47static int
39extract_icmp4_fields(const struct sk_buff *skb, 48extract_icmp4_fields(const struct sk_buff *skb,
40 u8 *protocol, 49 u8 *protocol,
@@ -164,7 +173,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
164 (sk->sk_state == TCP_TIME_WAIT && 173 (sk->sk_state == TCP_TIME_WAIT &&
165 inet_twsk(sk)->tw_transparent)); 174 inet_twsk(sk)->tw_transparent));
166 175
167 nf_tproxy_put_sock(sk); 176 xt_socket_put_sk(sk);
168 177
169 if (wildcard || !transparent) 178 if (wildcard || !transparent)
170 sk = NULL; 179 sk = NULL;
@@ -298,7 +307,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
298 (sk->sk_state == TCP_TIME_WAIT && 307 (sk->sk_state == TCP_TIME_WAIT &&
299 inet_twsk(sk)->tw_transparent)); 308 inet_twsk(sk)->tw_transparent));
300 309
301 nf_tproxy_put_sock(sk); 310 xt_socket_put_sk(sk);
302 311
303 if (wildcard || !transparent) 312 if (wildcard || !transparent)
304 sk = NULL; 313 sk = NULL;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 478181d53c55..1f924595bdef 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1407,7 +1407,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1407 int noblock = flags&MSG_DONTWAIT; 1407 int noblock = flags&MSG_DONTWAIT;
1408 size_t copied; 1408 size_t copied;
1409 struct sk_buff *skb, *data_skb; 1409 struct sk_buff *skb, *data_skb;
1410 int err; 1410 int err, ret;
1411 1411
1412 if (flags&MSG_OOB) 1412 if (flags&MSG_OOB)
1413 return -EOPNOTSUPP; 1413 return -EOPNOTSUPP;
@@ -1470,8 +1470,13 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1470 1470
1471 skb_free_datagram(sk, skb); 1471 skb_free_datagram(sk, skb);
1472 1472
1473 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) 1473 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1474 netlink_dump(sk); 1474 ret = netlink_dump(sk);
1475 if (ret) {
1476 sk->sk_err = ret;
1477 sk->sk_error_report(sk);
1478 }
1479 }
1475 1480
1476 scm_recv(sock, msg, siocb->scm, flags); 1481 scm_recv(sock, msg, siocb->scm, flags);
1477out: 1482out:
@@ -1736,6 +1741,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1736 struct netlink_callback *cb; 1741 struct netlink_callback *cb;
1737 struct sock *sk; 1742 struct sock *sk;
1738 struct netlink_sock *nlk; 1743 struct netlink_sock *nlk;
1744 int ret;
1739 1745
1740 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 1746 cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1741 if (cb == NULL) 1747 if (cb == NULL)
@@ -1764,9 +1770,13 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1764 nlk->cb = cb; 1770 nlk->cb = cb;
1765 mutex_unlock(nlk->cb_mutex); 1771 mutex_unlock(nlk->cb_mutex);
1766 1772
1767 netlink_dump(sk); 1773 ret = netlink_dump(sk);
1774
1768 sock_put(sk); 1775 sock_put(sk);
1769 1776
1777 if (ret)
1778 return ret;
1779
1770 /* We successfully started a dump, by returning -EINTR we 1780 /* We successfully started a dump, by returning -EINTR we
1771 * signal not to send ACK even if it was requested. 1781 * signal not to send ACK even if it was requested.
1772 */ 1782 */
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 4123967d4d65..cce19f95c624 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -364,7 +364,6 @@ void rds_ib_exit(void)
364 rds_ib_sysctl_exit(); 364 rds_ib_sysctl_exit();
365 rds_ib_recv_exit(); 365 rds_ib_recv_exit();
366 rds_trans_unregister(&rds_ib_transport); 366 rds_trans_unregister(&rds_ib_transport);
367 rds_ib_fmr_exit();
368} 367}
369 368
370struct rds_transport rds_ib_transport = { 369struct rds_transport rds_ib_transport = {
@@ -400,13 +399,9 @@ int rds_ib_init(void)
400 399
401 INIT_LIST_HEAD(&rds_ib_devices); 400 INIT_LIST_HEAD(&rds_ib_devices);
402 401
403 ret = rds_ib_fmr_init();
404 if (ret)
405 goto out;
406
407 ret = ib_register_client(&rds_ib_client); 402 ret = ib_register_client(&rds_ib_client);
408 if (ret) 403 if (ret)
409 goto out_fmr_exit; 404 goto out;
410 405
411 ret = rds_ib_sysctl_init(); 406 ret = rds_ib_sysctl_init();
412 if (ret) 407 if (ret)
@@ -430,8 +425,6 @@ out_sysctl:
430 rds_ib_sysctl_exit(); 425 rds_ib_sysctl_exit();
431out_ibreg: 426out_ibreg:
432 rds_ib_unregister_client(); 427 rds_ib_unregister_client();
433out_fmr_exit:
434 rds_ib_fmr_exit();
435out: 428out:
436 return ret; 429 return ret;
437} 430}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index e34ad032b66d..4297d92788dc 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -307,8 +307,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
307void rds_ib_sync_mr(void *trans_private, int dir); 307void rds_ib_sync_mr(void *trans_private, int dir);
308void rds_ib_free_mr(void *trans_private, int invalidate); 308void rds_ib_free_mr(void *trans_private, int invalidate);
309void rds_ib_flush_mrs(void); 309void rds_ib_flush_mrs(void);
310int rds_ib_fmr_init(void);
311void rds_ib_fmr_exit(void);
312 310
313/* ib_recv.c */ 311/* ib_recv.c */
314int rds_ib_recv_init(void); 312int rds_ib_recv_init(void);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 18a833c450c8..819c35a0d9cb 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -38,8 +38,6 @@
38#include "ib.h" 38#include "ib.h"
39#include "xlist.h" 39#include "xlist.h"
40 40
41static struct workqueue_struct *rds_ib_fmr_wq;
42
43static DEFINE_PER_CPU(unsigned long, clean_list_grace); 41static DEFINE_PER_CPU(unsigned long, clean_list_grace);
44#define CLEAN_LIST_BUSY_BIT 0 42#define CLEAN_LIST_BUSY_BIT 0
45 43
@@ -307,7 +305,7 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
307 int err = 0, iter = 0; 305 int err = 0, iter = 0;
308 306
309 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) 307 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
310 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); 308 schedule_delayed_work(&pool->flush_worker, 10);
311 309
312 while (1) { 310 while (1) {
313 ibmr = rds_ib_reuse_fmr(pool); 311 ibmr = rds_ib_reuse_fmr(pool);
@@ -696,24 +694,6 @@ out_nolock:
696 return ret; 694 return ret;
697} 695}
698 696
699int rds_ib_fmr_init(void)
700{
701 rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
702 if (!rds_ib_fmr_wq)
703 return -ENOMEM;
704 return 0;
705}
706
707/*
708 * By the time this is called all the IB devices should have been torn down and
709 * had their pools freed. As each pool is freed its work struct is waited on,
710 * so the pool flushing work queue should be idle by the time we get here.
711 */
712void rds_ib_fmr_exit(void)
713{
714 destroy_workqueue(rds_ib_fmr_wq);
715}
716
717static void rds_ib_mr_pool_flush_worker(struct work_struct *work) 697static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
718{ 698{
719 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); 699 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
@@ -741,7 +721,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
741 /* If we've pinned too many pages, request a flush */ 721 /* If we've pinned too many pages, request a flush */
742 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 722 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
743 atomic_read(&pool->dirty_count) >= pool->max_items / 10) 723 atomic_read(&pool->dirty_count) >= pool->max_items / 10)
744 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); 724 schedule_delayed_work(&pool->flush_worker, 10);
745 725
746 if (invalidate) { 726 if (invalidate) {
747 if (likely(!in_interrupt())) { 727 if (likely(!in_interrupt())) {
@@ -749,8 +729,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
749 } else { 729 } else {
750 /* We get here if the user created a MR marked 730 /* We get here if the user created a MR marked
751 * as use_once and invalidate at the same time. */ 731 * as use_once and invalidate at the same time. */
752 queue_delayed_work(rds_ib_fmr_wq, 732 schedule_delayed_work(&pool->flush_worker, 10);
753 &pool->flush_worker, 10);
754 } 733 }
755 } 734 }
756 735
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 71f373c421bc..c47a511f203d 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
551 if (conn->c_loopback 551 if (conn->c_loopback
552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { 552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
554 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 554 scat = &rm->data.op_sg[sg];
555 ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
556 ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
557 return ret;
555 } 558 }
556 559
557 /* FIXME we may overallocate here */ 560 /* FIXME we may overallocate here */
diff --git a/net/rds/loop.c b/net/rds/loop.c
index aeec1d483b17..bca6761a3ca2 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
61 unsigned int hdr_off, unsigned int sg, 61 unsigned int hdr_off, unsigned int sg,
62 unsigned int off) 62 unsigned int off)
63{ 63{
64 struct scatterlist *sgp = &rm->data.op_sg[sg];
65 int ret = sizeof(struct rds_header) +
66 be32_to_cpu(rm->m_inc.i_hdr.h_len);
67
64 /* Do not send cong updates to loopback */ 68 /* Do not send cong updates to loopback */
65 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { 69 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
66 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 70 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
67 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 71 ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off);
72 goto out;
68 } 73 }
69 74
70 BUG_ON(hdr_off || sg || off); 75 BUG_ON(hdr_off || sg || off);
@@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
80 NULL); 85 NULL);
81 86
82 rds_inc_put(&rm->m_inc); 87 rds_inc_put(&rm->m_inc);
83 88out:
84 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); 89 return ret;
85} 90}
86 91
87/* 92/*
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 89315009bab1..1a2b0633fece 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -423,6 +423,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
423 goto protocol_error; 423 goto protocol_error;
424 } 424 }
425 425
426 case RXRPC_PACKET_TYPE_ACKALL:
426 case RXRPC_PACKET_TYPE_ACK: 427 case RXRPC_PACKET_TYPE_ACK:
427 /* ACK processing is done in process context */ 428 /* ACK processing is done in process context */
428 read_lock_bh(&call->state_lock); 429 read_lock_bh(&call->state_lock);
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 5ee16f0353fe..43ea7de2fc8e 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -25,6 +25,7 @@
25#include <keys/user-type.h> 25#include <keys/user-type.h>
26#include "ar-internal.h" 26#include "ar-internal.h"
27 27
28static int rxrpc_vet_description_s(const char *);
28static int rxrpc_instantiate(struct key *, const void *, size_t); 29static int rxrpc_instantiate(struct key *, const void *, size_t);
29static int rxrpc_instantiate_s(struct key *, const void *, size_t); 30static int rxrpc_instantiate_s(struct key *, const void *, size_t);
30static void rxrpc_destroy(struct key *); 31static void rxrpc_destroy(struct key *);
@@ -52,6 +53,7 @@ EXPORT_SYMBOL(key_type_rxrpc);
52 */ 53 */
53struct key_type key_type_rxrpc_s = { 54struct key_type key_type_rxrpc_s = {
54 .name = "rxrpc_s", 55 .name = "rxrpc_s",
56 .vet_description = rxrpc_vet_description_s,
55 .instantiate = rxrpc_instantiate_s, 57 .instantiate = rxrpc_instantiate_s,
56 .match = user_match, 58 .match = user_match,
57 .destroy = rxrpc_destroy_s, 59 .destroy = rxrpc_destroy_s,
@@ -59,6 +61,23 @@ struct key_type key_type_rxrpc_s = {
59}; 61};
60 62
61/* 63/*
64 * Vet the description for an RxRPC server key
65 */
66static int rxrpc_vet_description_s(const char *desc)
67{
68 unsigned long num;
69 char *p;
70
71 num = simple_strtoul(desc, &p, 10);
72 if (*p != ':' || num > 65535)
73 return -EINVAL;
74 num = simple_strtoul(p + 1, &p, 10);
75 if (*p || num < 1 || num > 255)
76 return -EINVAL;
77 return 0;
78}
79
80/*
62 * parse an RxKAD type XDR format token 81 * parse an RxKAD type XDR format token
63 * - the caller guarantees we have at least 4 words 82 * - the caller guarantees we have at least 4 words
64 */ 83 */
@@ -89,11 +108,11 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
89 return ret; 108 return ret;
90 109
91 plen -= sizeof(*token); 110 plen -= sizeof(*token);
92 token = kmalloc(sizeof(*token), GFP_KERNEL); 111 token = kzalloc(sizeof(*token), GFP_KERNEL);
93 if (!token) 112 if (!token)
94 return -ENOMEM; 113 return -ENOMEM;
95 114
96 token->kad = kmalloc(plen, GFP_KERNEL); 115 token->kad = kzalloc(plen, GFP_KERNEL);
97 if (!token->kad) { 116 if (!token->kad) {
98 kfree(token); 117 kfree(token);
99 return -ENOMEM; 118 return -ENOMEM;
@@ -731,10 +750,10 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
731 goto error; 750 goto error;
732 751
733 ret = -ENOMEM; 752 ret = -ENOMEM;
734 token = kmalloc(sizeof(*token), GFP_KERNEL); 753 token = kzalloc(sizeof(*token), GFP_KERNEL);
735 if (!token) 754 if (!token)
736 goto error; 755 goto error;
737 token->kad = kmalloc(plen, GFP_KERNEL); 756 token->kad = kzalloc(plen, GFP_KERNEL);
738 if (!token->kad) 757 if (!token->kad)
739 goto error_free; 758 goto error_free;
740 759
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 34dc598440a2..1bc698039ae2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -839,6 +839,7 @@ void dev_deactivate(struct net_device *dev)
839 839
840 list_add(&dev->unreg_list, &single); 840 list_add(&dev->unreg_list, &single);
841 dev_deactivate_many(&single); 841 dev_deactivate_many(&single);
842 list_del(&single);
842} 843}
843 844
844static void dev_init_scheduler_queue(struct net_device *dev, 845static void dev_init_scheduler_queue(struct net_device *dev,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 2cc46f0962ca..b23428f3c0dd 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2029,11 +2029,11 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
2029 *errp = sctp_make_op_error_fixed(asoc, chunk); 2029 *errp = sctp_make_op_error_fixed(asoc, chunk);
2030 2030
2031 if (*errp) { 2031 if (*errp) {
2032 sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, 2032 if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
2033 WORD_ROUND(ntohs(param.p->length))); 2033 WORD_ROUND(ntohs(param.p->length))))
2034 sctp_addto_chunk_fixed(*errp, 2034 sctp_addto_chunk_fixed(*errp,
2035 WORD_ROUND(ntohs(param.p->length)), 2035 WORD_ROUND(ntohs(param.p->length)),
2036 param.v); 2036 param.v);
2037 } else { 2037 } else {
2038 /* If there is no memory for generating the ERROR 2038 /* If there is no memory for generating the ERROR
2039 * report as specified, an ABORT will be triggered 2039 * report as specified, an ABORT will be triggered
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 243fc09b164e..3fc8624fcd17 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task)
252 252
253/* 253/*
254 * Mark an RPC call as having completed by clearing the 'active' bit 254 * Mark an RPC call as having completed by clearing the 'active' bit
255 * and then waking up all tasks that were sleeping.
255 */ 256 */
256static void rpc_mark_complete_task(struct rpc_task *task) 257static int rpc_complete_task(struct rpc_task *task)
257{ 258{
258 smp_mb__before_clear_bit(); 259 void *m = &task->tk_runstate;
260 wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
261 struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
262 unsigned long flags;
263 int ret;
264
265 spin_lock_irqsave(&wq->lock, flags);
259 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); 266 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
260 smp_mb__after_clear_bit(); 267 ret = atomic_dec_and_test(&task->tk_count);
261 wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); 268 if (waitqueue_active(wq))
269 __wake_up_locked_key(wq, TASK_NORMAL, &k);
270 spin_unlock_irqrestore(&wq->lock, flags);
271 return ret;
262} 272}
263 273
264/* 274/*
265 * Allow callers to wait for completion of an RPC call 275 * Allow callers to wait for completion of an RPC call
276 *
277 * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
278 * to enforce taking of the wq->lock and hence avoid races with
279 * rpc_complete_task().
266 */ 280 */
267int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) 281int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
268{ 282{
269 if (action == NULL) 283 if (action == NULL)
270 action = rpc_wait_bit_killable; 284 action = rpc_wait_bit_killable;
271 return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, 285 return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
272 action, TASK_KILLABLE); 286 action, TASK_KILLABLE);
273} 287}
274EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); 288EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
@@ -857,34 +871,67 @@ static void rpc_async_release(struct work_struct *work)
857 rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); 871 rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
858} 872}
859 873
860void rpc_put_task(struct rpc_task *task) 874static void rpc_release_resources_task(struct rpc_task *task)
861{ 875{
862 if (!atomic_dec_and_test(&task->tk_count))
863 return;
864 /* Release resources */
865 if (task->tk_rqstp) 876 if (task->tk_rqstp)
866 xprt_release(task); 877 xprt_release(task);
867 if (task->tk_msg.rpc_cred) 878 if (task->tk_msg.rpc_cred)
868 put_rpccred(task->tk_msg.rpc_cred); 879 put_rpccred(task->tk_msg.rpc_cred);
869 rpc_task_release_client(task); 880 rpc_task_release_client(task);
870 if (task->tk_workqueue != NULL) { 881}
882
883static void rpc_final_put_task(struct rpc_task *task,
884 struct workqueue_struct *q)
885{
886 if (q != NULL) {
871 INIT_WORK(&task->u.tk_work, rpc_async_release); 887 INIT_WORK(&task->u.tk_work, rpc_async_release);
872 queue_work(task->tk_workqueue, &task->u.tk_work); 888 queue_work(q, &task->u.tk_work);
873 } else 889 } else
874 rpc_free_task(task); 890 rpc_free_task(task);
875} 891}
892
893static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
894{
895 if (atomic_dec_and_test(&task->tk_count)) {
896 rpc_release_resources_task(task);
897 rpc_final_put_task(task, q);
898 }
899}
900
901void rpc_put_task(struct rpc_task *task)
902{
903 rpc_do_put_task(task, NULL);
904}
876EXPORT_SYMBOL_GPL(rpc_put_task); 905EXPORT_SYMBOL_GPL(rpc_put_task);
877 906
907void rpc_put_task_async(struct rpc_task *task)
908{
909 rpc_do_put_task(task, task->tk_workqueue);
910}
911EXPORT_SYMBOL_GPL(rpc_put_task_async);
912
878static void rpc_release_task(struct rpc_task *task) 913static void rpc_release_task(struct rpc_task *task)
879{ 914{
880 dprintk("RPC: %5u release task\n", task->tk_pid); 915 dprintk("RPC: %5u release task\n", task->tk_pid);
881 916
882 BUG_ON (RPC_IS_QUEUED(task)); 917 BUG_ON (RPC_IS_QUEUED(task));
883 918
884 /* Wake up anyone who is waiting for task completion */ 919 rpc_release_resources_task(task);
885 rpc_mark_complete_task(task);
886 920
887 rpc_put_task(task); 921 /*
922 * Note: at this point we have been removed from rpc_clnt->cl_tasks,
923 * so it should be safe to use task->tk_count as a test for whether
924 * or not any other processes still hold references to our rpc_task.
925 */
926 if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
927 /* Wake up anyone who may be waiting for task completion */
928 if (!rpc_complete_task(task))
929 return;
930 } else {
931 if (!atomic_dec_and_test(&task->tk_count))
932 return;
933 }
934 rpc_final_put_task(task, task->tk_workqueue);
888} 935}
889 936
890int rpciod_up(void) 937int rpciod_up(void)
@@ -908,7 +955,7 @@ static int rpciod_start(void)
908 * Create the rpciod thread and wait for it to start. 955 * Create the rpciod thread and wait for it to start.
909 */ 956 */
910 dprintk("RPC: creating workqueue rpciod\n"); 957 dprintk("RPC: creating workqueue rpciod\n");
911 wq = alloc_workqueue("rpciod", WQ_RESCUER, 0); 958 wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0);
912 rpciod_workqueue = wq; 959 rpciod_workqueue = wq;
913 return rpciod_workqueue != NULL; 960 return rpciod_workqueue != NULL;
914} 961}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9df1eadc912a..1a10dcd999ea 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1335 p, 0, length, DMA_FROM_DEVICE); 1335 p, 0, length, DMA_FROM_DEVICE);
1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { 1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1337 put_page(p); 1337 put_page(p);
1338 svc_rdma_put_context(ctxt, 1);
1338 return; 1339 return;
1339 } 1340 }
1340 atomic_inc(&xprt->sc_dma_used); 1341 atomic_inc(&xprt->sc_dma_used);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c431f5a57960..be96d429b475 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1631,7 +1631,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1631 } 1631 }
1632 xs_reclassify_socket(family, sock); 1632 xs_reclassify_socket(family, sock);
1633 1633
1634 if (xs_bind(transport, sock)) { 1634 err = xs_bind(transport, sock);
1635 if (err) {
1635 sock_release(sock); 1636 sock_release(sock);
1636 goto out; 1637 goto out;
1637 } 1638 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dd419d286204..ba5b8c208498 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
850 * Get the parent directory, calculate the hash for last 850 * Get the parent directory, calculate the hash for last
851 * component. 851 * component.
852 */ 852 */
853 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 853 err = kern_path_parent(sunaddr->sun_path, &nd);
854 if (err) 854 if (err)
855 goto out_mknod_parent; 855 goto out_mknod_parent;
856 856
@@ -1724,7 +1724,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1724 1724
1725 msg->msg_namelen = 0; 1725 msg->msg_namelen = 0;
1726 1726
1727 mutex_lock(&u->readlock); 1727 err = mutex_lock_interruptible(&u->readlock);
1728 if (err) {
1729 err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1730 goto out;
1731 }
1728 1732
1729 skb = skb_recv_datagram(sk, flags, noblock, &err); 1733 skb = skb_recv_datagram(sk, flags, noblock, &err);
1730 if (!skb) { 1734 if (!skb) {
@@ -1864,7 +1868,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1864 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1868 memset(&tmp_scm, 0, sizeof(tmp_scm));
1865 } 1869 }
1866 1870
1867 mutex_lock(&u->readlock); 1871 err = mutex_lock_interruptible(&u->readlock);
1872 if (err) {
1873 err = sock_intr_errno(timeo);
1874 goto out;
1875 }
1868 1876
1869 do { 1877 do {
1870 int chunk; 1878 int chunk;
@@ -1895,11 +1903,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1895 1903
1896 timeo = unix_stream_data_wait(sk, timeo); 1904 timeo = unix_stream_data_wait(sk, timeo);
1897 1905
1898 if (signal_pending(current)) { 1906 if (signal_pending(current)
1907 || mutex_lock_interruptible(&u->readlock)) {
1899 err = sock_intr_errno(timeo); 1908 err = sock_intr_errno(timeo);
1900 goto out; 1909 goto out;
1901 } 1910 }
1902 mutex_lock(&u->readlock); 1911
1903 continue; 1912 continue;
1904 unlock: 1913 unlock:
1905 unix_state_unlock(sk); 1914 unix_state_unlock(sk);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f89f83bf828e..b6f4b994eb35 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp)
104 /* 104 /*
105 * Socket ? 105 * Socket ?
106 */ 106 */
107 if (S_ISSOCK(inode->i_mode)) { 107 if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
108 struct socket *sock = SOCKET_I(inode); 108 struct socket *sock = SOCKET_I(inode);
109 struct sock *s = sock->sk; 109 struct sock *s = sock->sk;
110 110
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 3e5dbd4e4cd5..d112f038edf0 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -802,11 +802,11 @@ int cfg80211_wext_siwfreq(struct net_device *dev,
802 return freq; 802 return freq;
803 if (freq == 0) 803 if (freq == 0)
804 return -EINVAL; 804 return -EINVAL;
805 wdev_lock(wdev);
806 mutex_lock(&rdev->devlist_mtx); 805 mutex_lock(&rdev->devlist_mtx);
806 wdev_lock(wdev);
807 err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT); 807 err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
808 mutex_unlock(&rdev->devlist_mtx);
809 wdev_unlock(wdev); 808 wdev_unlock(wdev);
809 mutex_unlock(&rdev->devlist_mtx);
810 return err; 810 return err;
811 default: 811 default:
812 return -EOPNOTSUPP; 812 return -EOPNOTSUPP;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8b3ef404c794..6459588befc3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1340,10 +1340,13 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1340 default: 1340 default:
1341 BUG(); 1341 BUG();
1342 } 1342 }
1343 xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); 1343 xdst = dst_alloc(dst_ops);
1344 xfrm_policy_put_afinfo(afinfo); 1344 xfrm_policy_put_afinfo(afinfo);
1345 1345
1346 xdst->flo.ops = &xfrm_bundle_fc_ops; 1346 if (likely(xdst))
1347 xdst->flo.ops = &xfrm_bundle_fc_ops;
1348 else
1349 xdst = ERR_PTR(-ENOBUFS);
1347 1350
1348 return xdst; 1351 return xdst;
1349} 1352}
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index c9a16abacab4..291228e25984 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -309,12 +309,18 @@ static void do_config_file(const char *filename)
309 close(fd); 309 close(fd);
310} 310}
311 311
312/*
313 * Important: The below generated source_foo.o and deps_foo.o variable
314 * assignments are parsed not only by make, but also by the rather simple
315 * parser in scripts/mod/sumversion.c.
316 */
312static void parse_dep_file(void *map, size_t len) 317static void parse_dep_file(void *map, size_t len)
313{ 318{
314 char *m = map; 319 char *m = map;
315 char *end = m + len; 320 char *end = m + len;
316 char *p; 321 char *p;
317 char s[PATH_MAX]; 322 char s[PATH_MAX];
323 int first;
318 324
319 p = strchr(m, ':'); 325 p = strchr(m, ':');
320 if (!p) { 326 if (!p) {
@@ -322,11 +328,11 @@ static void parse_dep_file(void *map, size_t len)
322 exit(1); 328 exit(1);
323 } 329 }
324 memcpy(s, m, p-m); s[p-m] = 0; 330 memcpy(s, m, p-m); s[p-m] = 0;
325 printf("deps_%s := \\\n", target);
326 m = p+1; 331 m = p+1;
327 332
328 clear_config(); 333 clear_config();
329 334
335 first = 1;
330 while (m < end) { 336 while (m < end) {
331 while (m < end && (*m == ' ' || *m == '\\' || *m == '\n')) 337 while (m < end && (*m == ' ' || *m == '\\' || *m == '\n'))
332 m++; 338 m++;
@@ -340,9 +346,20 @@ static void parse_dep_file(void *map, size_t len)
340 if (strrcmp(s, "include/generated/autoconf.h") && 346 if (strrcmp(s, "include/generated/autoconf.h") &&
341 strrcmp(s, "arch/um/include/uml-config.h") && 347 strrcmp(s, "arch/um/include/uml-config.h") &&
342 strrcmp(s, ".ver")) { 348 strrcmp(s, ".ver")) {
343 printf(" %s \\\n", s); 349 /*
350 * Do not list the source file as dependency, so that
351 * kbuild is not confused if a .c file is rewritten
352 * into .S or vice versa. Storing it in source_* is
353 * needed for modpost to compute srcversions.
354 */
355 if (first) {
356 printf("source_%s := %s\n\n", target, s);
357 printf("deps_%s := \\\n", target);
358 } else
359 printf(" %s \\\n", s);
344 do_config_file(s); 360 do_config_file(s);
345 } 361 }
362 first = 0;
346 m = p + 1; 363 m = p + 1;
347 } 364 }
348 printf("\n%s: $(deps_%s)\n\n", target, target); 365 printf("\n%s: $(deps_%s)\n\n", target, target);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4c0383da1c9a..58848e3e392c 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2654,11 +2654,6 @@ sub process {
2654 WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr); 2654 WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
2655 } 2655 }
2656 2656
2657# SPIN_LOCK_UNLOCKED & RW_LOCK_UNLOCKED are deprecated
2658 if ($line =~ /\b(SPIN_LOCK_UNLOCKED|RW_LOCK_UNLOCKED)/) {
2659 ERROR("Use of $1 is deprecated: see Documentation/spinlocks.txt\n" . $herecurr);
2660 }
2661
2662# warn about #if 0 2657# warn about #if 0
2663 if ($line =~ /^.\s*\#\s*if\s+0\b/) { 2658 if ($line =~ /^.\s*\#\s*if\s+0\b/) {
2664 CHK("if this code is redundant consider removing it\n" . 2659 CHK("if this code is redundant consider removing it\n" .
diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
index fd81fc33d633..a4fe923c0131 100644
--- a/scripts/kconfig/streamline_config.pl
+++ b/scripts/kconfig/streamline_config.pl
@@ -1,6 +1,6 @@
1#!/usr/bin/perl -w 1#!/usr/bin/perl -w
2# 2#
3# Copywrite 2005-2009 - Steven Rostedt 3# Copyright 2005-2009 - Steven Rostedt
4# Licensed under the terms of the GNU GPL License version 2 4# Licensed under the terms of the GNU GPL License version 2
5# 5#
6# It's simple enough to figure out how this works. 6# It's simple enough to figure out how this works.
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index ecf9c7dc1825..9dfcd6d988da 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -300,8 +300,8 @@ static int is_static_library(const char *objfile)
300 return 0; 300 return 0;
301} 301}
302 302
303/* We have dir/file.o. Open dir/.file.o.cmd, look for deps_ line to 303/* We have dir/file.o. Open dir/.file.o.cmd, look for source_ and deps_ line
304 * figure out source file. */ 304 * to figure out source files. */
305static int parse_source_files(const char *objfile, struct md4_ctx *md) 305static int parse_source_files(const char *objfile, struct md4_ctx *md)
306{ 306{
307 char *cmd, *file, *line, *dir; 307 char *cmd, *file, *line, *dir;
@@ -340,6 +340,21 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
340 */ 340 */
341 while ((line = get_next_line(&pos, file, flen)) != NULL) { 341 while ((line = get_next_line(&pos, file, flen)) != NULL) {
342 char* p = line; 342 char* p = line;
343
344 if (strncmp(line, "source_", sizeof("source_")-1) == 0) {
345 p = strrchr(line, ' ');
346 if (!p) {
347 warn("malformed line: %s\n", line);
348 goto out_file;
349 }
350 p++;
351 if (!parse_file(p, md)) {
352 warn("could not open %s: %s\n",
353 p, strerror(errno));
354 goto out_file;
355 }
356 continue;
357 }
343 if (strncmp(line, "deps_", sizeof("deps_")-1) == 0) { 358 if (strncmp(line, "deps_", sizeof("deps_")-1) == 0) {
344 check_files = 1; 359 check_files = 1;
345 continue; 360 continue;
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 038b3d1e2981..f9f6f52db772 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -206,7 +206,8 @@ static uint32_t (*w2)(uint16_t);
206static int 206static int
207is_mcounted_section_name(char const *const txtname) 207is_mcounted_section_name(char const *const txtname)
208{ 208{
209 return 0 == strcmp(".text", txtname) || 209 return 0 == strcmp(".text", txtname) ||
210 0 == strcmp(".ref.text", txtname) ||
210 0 == strcmp(".sched.text", txtname) || 211 0 == strcmp(".sched.text", txtname) ||
211 0 == strcmp(".spinlock.text", txtname) || 212 0 == strcmp(".spinlock.text", txtname) ||
212 0 == strcmp(".irqentry.text", txtname) || 213 0 == strcmp(".irqentry.text", txtname) ||
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 1d7963f4ee79..4be0deea71ca 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -130,6 +130,7 @@ if ($inputfile =~ m,kernel/trace/ftrace\.o$,) {
130# Acceptable sections to record. 130# Acceptable sections to record.
131my %text_sections = ( 131my %text_sections = (
132 ".text" => 1, 132 ".text" => 1,
133 ".ref.text" => 1,
133 ".sched.text" => 1, 134 ".sched.text" => 1,
134 ".spinlock.text" => 1, 135 ".spinlock.text" => 1,
135 ".irqentry.text" => 1, 136 ".irqentry.text" => 1,
diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py
index 44423b4dcb82..8c81d76959ee 100644
--- a/scripts/rt-tester/rt-tester.py
+++ b/scripts/rt-tester/rt-tester.py
@@ -33,8 +33,6 @@ cmd_opcodes = {
33 "lockintnowait" : "6", 33 "lockintnowait" : "6",
34 "lockcont" : "7", 34 "lockcont" : "7",
35 "unlock" : "8", 35 "unlock" : "8",
36 "lockbkl" : "9",
37 "unlockbkl" : "10",
38 "signal" : "11", 36 "signal" : "11",
39 "resetevent" : "98", 37 "resetevent" : "98",
40 "reset" : "99", 38 "reset" : "99",
diff --git a/scripts/rt-tester/t2-l1-2rt-sameprio.tst b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
index 8821f27cc8be..3710c8b2090d 100644
--- a/scripts/rt-tester/t2-l1-2rt-sameprio.tst
+++ b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t2-l1-pi.tst b/scripts/rt-tester/t2-l1-pi.tst
index cde1f189a02b..b4cc95975adb 100644
--- a/scripts/rt-tester/t2-l1-pi.tst
+++ b/scripts/rt-tester/t2-l1-pi.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t2-l1-signal.tst b/scripts/rt-tester/t2-l1-signal.tst
index 3ab0bfc49950..1b57376cc1f7 100644
--- a/scripts/rt-tester/t2-l1-signal.tst
+++ b/scripts/rt-tester/t2-l1-signal.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t2-l2-2rt-deadlock.tst b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
index f4b5d5d6215f..68b10629b6f4 100644
--- a/scripts/rt-tester/t2-l2-2rt-deadlock.tst
+++ b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-1rt.tst b/scripts/rt-tester/t3-l1-pi-1rt.tst
index 63440ca2cce9..8e6c8b11ae56 100644
--- a/scripts/rt-tester/t3-l1-pi-1rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-1rt.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-2rt.tst b/scripts/rt-tester/t3-l1-pi-2rt.tst
index e5816fe67df3..69c2212fc520 100644
--- a/scripts/rt-tester/t3-l1-pi-2rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-2rt.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-3rt.tst b/scripts/rt-tester/t3-l1-pi-3rt.tst
index 718b82b5d3bb..9b0f1eb26a88 100644
--- a/scripts/rt-tester/t3-l1-pi-3rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-3rt.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-signal.tst b/scripts/rt-tester/t3-l1-pi-signal.tst
index c6e213563498..39ec74ab06ee 100644
--- a/scripts/rt-tester/t3-l1-pi-signal.tst
+++ b/scripts/rt-tester/t3-l1-pi-signal.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-steal.tst b/scripts/rt-tester/t3-l1-pi-steal.tst
index f53749d59d79..e03db7e010fa 100644
--- a/scripts/rt-tester/t3-l1-pi-steal.tst
+++ b/scripts/rt-tester/t3-l1-pi-steal.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l2-pi.tst b/scripts/rt-tester/t3-l2-pi.tst
index cdc3e4fd7bac..7b59100d3e48 100644
--- a/scripts/rt-tester/t3-l2-pi.tst
+++ b/scripts/rt-tester/t3-l2-pi.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t4-l2-pi-deboost.tst b/scripts/rt-tester/t4-l2-pi-deboost.tst
index baa14137f473..2f0e049d6443 100644
--- a/scripts/rt-tester/t4-l2-pi-deboost.tst
+++ b/scripts/rt-tester/t4-l2-pi-deboost.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
index e6ec0c81b54d..04f4034ff895 100644
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
+++ b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
index ca64f8bbf4bc..a48a6ee29ddc 100644
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
+++ b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c
index 58a12c278706..539855ff31f9 100644
--- a/scripts/selinux/genheaders/genheaders.c
+++ b/scripts/selinux/genheaders/genheaders.c
@@ -43,6 +43,8 @@ int main(int argc, char *argv[])
43 int i, j, k; 43 int i, j, k;
44 int isids_len; 44 int isids_len;
45 FILE *fout; 45 FILE *fout;
46 const char *needle = "SOCKET";
47 char *substr;
46 48
47 progname = argv[0]; 49 progname = argv[0];
48 50
@@ -88,6 +90,24 @@ int main(int argc, char *argv[])
88 fprintf(fout, "%2d\n", i); 90 fprintf(fout, "%2d\n", i);
89 } 91 }
90 fprintf(fout, "\n#define SECINITSID_NUM %d\n", i-1); 92 fprintf(fout, "\n#define SECINITSID_NUM %d\n", i-1);
93 fprintf(fout, "\nstatic inline bool security_is_socket_class(u16 kern_tclass)\n");
94 fprintf(fout, "{\n");
95 fprintf(fout, "\tbool sock = false;\n\n");
96 fprintf(fout, "\tswitch (kern_tclass) {\n");
97 for (i = 0; secclass_map[i].name; i++) {
98 struct security_class_mapping *map = &secclass_map[i];
99 substr = strstr(map->name, needle);
100 if (substr && strcmp(substr, needle) == 0)
101 fprintf(fout, "\tcase SECCLASS_%s:\n", map->name);
102 }
103 fprintf(fout, "\t\tsock = true;\n");
104 fprintf(fout, "\t\tbreak;\n");
105 fprintf(fout, "\tdefault:\n");
106 fprintf(fout, "\t\tbreak;\n");
107 fprintf(fout, "\t}\n\n");
108 fprintf(fout, "\treturn sock;\n");
109 fprintf(fout, "}\n");
110
91 fprintf(fout, "\n#endif\n"); 111 fprintf(fout, "\n#endif\n");
92 fclose(fout); 112 fclose(fout);
93 113
diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index f204869399ea..2dafe50a2e25 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -6,19 +6,47 @@ apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
6 path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \ 6 path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
7 resource.o sid.o file.o 7 resource.o sid.o file.o
8 8
9clean-files: capability_names.h af_names.h 9clean-files := capability_names.h rlim_names.h
10 10
11
12# Build a lower case string table of capability names
13# Transforms lines from
14# #define CAP_DAC_OVERRIDE 1
15# to
16# [1] = "dac_override",
11quiet_cmd_make-caps = GEN $@ 17quiet_cmd_make-caps = GEN $@
12cmd_make-caps = echo "static const char *capability_names[] = {" > $@ ; sed -n -e "/CAP_FS_MASK/d" -e "s/^\#define[ \\t]\\+CAP_\\([A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\$$/[\\2] = \"\\1\",/p" $< | tr A-Z a-z >> $@ ; echo "};" >> $@ 18cmd_make-caps = echo "static const char *capability_names[] = {" > $@ ;\
19 sed $< >>$@ -r -n -e '/CAP_FS_MASK/d' \
20 -e 's/^\#define[ \t]+CAP_([A-Z0-9_]+)[ \t]+([0-9]+)/[\2] = "\L\1",/p';\
21 echo "};" >> $@
22
13 23
24# Build a lower case string table of rlimit names.
25# Transforms lines from
26# #define RLIMIT_STACK 3 /* max stack size */
27# to
28# [RLIMIT_STACK] = "stack",
29#
30# and build a second integer table (with the second sed cmd), that maps
31# RLIMIT defines to the order defined in asm-generic/resource.h Thi is
32# required by policy load to map policy ordering of RLIMITs to internal
33# ordering for architectures that redefine an RLIMIT.
34# Transforms lines from
35# #define RLIMIT_STACK 3 /* max stack size */
36# to
37# RLIMIT_STACK,
14quiet_cmd_make-rlim = GEN $@ 38quiet_cmd_make-rlim = GEN $@
15cmd_make-rlim = echo "static const char *rlim_names[] = {" > $@ ; sed -n --e "/AF_MAX/d" -e "s/^\# \\?define[ \\t]\\+RLIMIT_\\([A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\\(.*\\)\$$/[\\2] = \"\\1\",/p" $< | tr A-Z a-z >> $@ ; echo "};" >> $@ ; echo "static const int rlim_map[] = {" >> $@ ; sed -n -e "/AF_MAX/d" -e "s/^\# \\?define[ \\t]\\+\\(RLIMIT_[A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\\(.*\\)\$$/\\1,/p" $< >> $@ ; echo "};" >> $@ 39cmd_make-rlim = echo "static const char *rlim_names[] = {" > $@ ;\
40 sed $< >> $@ -r -n \
41 -e 's/^\# ?define[ \t]+(RLIMIT_([A-Z0-9_]+)).*/[\1] = "\L\2",/p';\
42 echo "};" >> $@ ;\
43 echo "static const int rlim_map[] = {" >> $@ ;\
44 sed -r -n "s/^\# ?define[ \t]+(RLIMIT_[A-Z0-9_]+).*/\1,/p" $< >> $@ ;\
45 echo "};" >> $@
16 46
17$(obj)/capability.o : $(obj)/capability_names.h 47$(obj)/capability.o : $(obj)/capability_names.h
18$(obj)/resource.o : $(obj)/rlim_names.h 48$(obj)/resource.o : $(obj)/rlim_names.h
19$(obj)/capability_names.h : $(srctree)/include/linux/capability.h 49$(obj)/capability_names.h : $(srctree)/include/linux/capability.h
20 $(call cmd,make-caps) 50 $(call cmd,make-caps)
21$(obj)/af_names.h : $(srctree)/include/linux/socket.h
22 $(call cmd,make-af)
23$(obj)/rlim_names.h : $(srctree)/include/asm-generic/resource.h 51$(obj)/rlim_names.h : $(srctree)/include/asm-generic/resource.h
24 $(call cmd,make-rlim) 52 $(call cmd,make-rlim)
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index b7106f192b75..d21a427a35ae 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -693,11 +693,9 @@ static struct kernel_param_ops param_ops_aalockpolicy = {
693 693
694static int param_set_audit(const char *val, struct kernel_param *kp); 694static int param_set_audit(const char *val, struct kernel_param *kp);
695static int param_get_audit(char *buffer, struct kernel_param *kp); 695static int param_get_audit(char *buffer, struct kernel_param *kp);
696#define param_check_audit(name, p) __param_check(name, p, int)
697 696
698static int param_set_mode(const char *val, struct kernel_param *kp); 697static int param_set_mode(const char *val, struct kernel_param *kp);
699static int param_get_mode(char *buffer, struct kernel_param *kp); 698static int param_get_mode(char *buffer, struct kernel_param *kp);
700#define param_check_mode(name, p) __param_check(name, p, int)
701 699
702/* Flag values, also controllable via /sys/module/apparmor/parameters 700/* Flag values, also controllable via /sys/module/apparmor/parameters
703 * We define special types as we want to do additional mediation. 701 * We define special types as we want to do additional mediation.
diff --git a/security/capability.c b/security/capability.c
index 2a5df2b7da83..ab3d807accc3 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -12,11 +12,6 @@
12 12
13#include <linux/security.h> 13#include <linux/security.h>
14 14
15static int cap_sysctl(ctl_table *table, int op)
16{
17 return 0;
18}
19
20static int cap_syslog(int type) 15static int cap_syslog(int type)
21{ 16{
22 return 0; 17 return 0;
@@ -59,6 +54,11 @@ static int cap_sb_copy_data(char *orig, char *copy)
59 return 0; 54 return 0;
60} 55}
61 56
57static int cap_sb_remount(struct super_block *sb, void *data)
58{
59 return 0;
60}
61
62static int cap_sb_kern_mount(struct super_block *sb, int flags, void *data) 62static int cap_sb_kern_mount(struct super_block *sb, int flags, void *data)
63{ 63{
64 return 0; 64 return 0;
@@ -118,7 +118,8 @@ static void cap_inode_free_security(struct inode *inode)
118} 118}
119 119
120static int cap_inode_init_security(struct inode *inode, struct inode *dir, 120static int cap_inode_init_security(struct inode *inode, struct inode *dir,
121 char **name, void **value, size_t *len) 121 const struct qstr *qstr, char **name,
122 void **value, size_t *len)
122{ 123{
123 return -EOPNOTSUPP; 124 return -EOPNOTSUPP;
124} 125}
@@ -880,7 +881,6 @@ void __init security_fixup_ops(struct security_operations *ops)
880 set_to_cap_if_null(ops, capable); 881 set_to_cap_if_null(ops, capable);
881 set_to_cap_if_null(ops, quotactl); 882 set_to_cap_if_null(ops, quotactl);
882 set_to_cap_if_null(ops, quota_on); 883 set_to_cap_if_null(ops, quota_on);
883 set_to_cap_if_null(ops, sysctl);
884 set_to_cap_if_null(ops, syslog); 884 set_to_cap_if_null(ops, syslog);
885 set_to_cap_if_null(ops, settime); 885 set_to_cap_if_null(ops, settime);
886 set_to_cap_if_null(ops, vm_enough_memory); 886 set_to_cap_if_null(ops, vm_enough_memory);
@@ -892,6 +892,7 @@ void __init security_fixup_ops(struct security_operations *ops)
892 set_to_cap_if_null(ops, sb_alloc_security); 892 set_to_cap_if_null(ops, sb_alloc_security);
893 set_to_cap_if_null(ops, sb_free_security); 893 set_to_cap_if_null(ops, sb_free_security);
894 set_to_cap_if_null(ops, sb_copy_data); 894 set_to_cap_if_null(ops, sb_copy_data);
895 set_to_cap_if_null(ops, sb_remount);
895 set_to_cap_if_null(ops, sb_kern_mount); 896 set_to_cap_if_null(ops, sb_kern_mount);
896 set_to_cap_if_null(ops, sb_show_options); 897 set_to_cap_if_null(ops, sb_show_options);
897 set_to_cap_if_null(ops, sb_statfs); 898 set_to_cap_if_null(ops, sb_statfs);
diff --git a/security/commoncap.c b/security/commoncap.c
index 64c2ed9c9015..dbfdaed4cc66 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -93,7 +93,7 @@ int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap,
93 * Determine whether the current process may set the system clock and timezone 93 * Determine whether the current process may set the system clock and timezone
94 * information, returning 0 if permission granted, -ve if denied. 94 * information, returning 0 if permission granted, -ve if denied.
95 */ 95 */
96int cap_settime(struct timespec *ts, struct timezone *tz) 96int cap_settime(const struct timespec *ts, const struct timezone *tz)
97{ 97{
98 if (!capable(CAP_SYS_TIME)) 98 if (!capable(CAP_SYS_TIME))
99 return -EPERM; 99 return -EPERM;
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index ac79032bdf23..08408bd71462 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -110,8 +110,7 @@ struct ima_iint_cache {
110}; 110};
111 111
112/* LIM API function definitions */ 112/* LIM API function definitions */
113int ima_must_measure(struct ima_iint_cache *iint, struct inode *inode, 113int ima_must_measure(struct inode *inode, int mask, int function);
114 int mask, int function);
115int ima_collect_measurement(struct ima_iint_cache *iint, struct file *file); 114int ima_collect_measurement(struct ima_iint_cache *iint, struct file *file);
116void ima_store_measurement(struct ima_iint_cache *iint, struct file *file, 115void ima_store_measurement(struct ima_iint_cache *iint, struct file *file,
117 const unsigned char *filename); 116 const unsigned char *filename);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index d3963de6003d..da36d2c085a4 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -105,20 +105,13 @@ err_out:
105 * mask: contains the permission mask 105 * mask: contains the permission mask
106 * fsmagic: hex value 106 * fsmagic: hex value
107 * 107 *
108 * Must be called with iint->mutex held. 108 * Return 0 to measure. For matching a DONT_MEASURE policy, no policy,
109 * 109 * or other error, return an error code.
110 * Return 0 to measure. Return 1 if already measured.
111 * For matching a DONT_MEASURE policy, no policy, or other
112 * error, return an error code.
113*/ 110*/
114int ima_must_measure(struct ima_iint_cache *iint, struct inode *inode, 111int ima_must_measure(struct inode *inode, int mask, int function)
115 int mask, int function)
116{ 112{
117 int must_measure; 113 int must_measure;
118 114
119 if (iint && iint->flags & IMA_MEASURED)
120 return 1;
121
122 must_measure = ima_match_policy(inode, function, mask); 115 must_measure = ima_match_policy(inode, function, mask);
123 return must_measure ? 0 : -EACCES; 116 return must_measure ? 0 : -EACCES;
124} 117}
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index c442e47b6785..4ae73040ab7b 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -137,11 +137,6 @@ void ima_inode_free(struct inode *inode)
137{ 137{
138 struct ima_iint_cache *iint; 138 struct ima_iint_cache *iint;
139 139
140 if (inode->i_readcount)
141 printk(KERN_INFO "%s: readcount: %u\n", __func__, inode->i_readcount);
142
143 inode->i_readcount = 0;
144
145 if (!IS_IMA(inode)) 140 if (!IS_IMA(inode))
146 return; 141 return;
147 142
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 203de979d305..39d66dc2b8e9 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -36,67 +36,17 @@ static int __init hash_setup(char *str)
36} 36}
37__setup("ima_hash=", hash_setup); 37__setup("ima_hash=", hash_setup);
38 38
39struct ima_imbalance {
40 struct hlist_node node;
41 unsigned long fsmagic;
42};
43
44/*
45 * ima_limit_imbalance - emit one imbalance message per filesystem type
46 *
47 * Maintain list of filesystem types that do not measure files properly.
48 * Return false if unknown, true if known.
49 */
50static bool ima_limit_imbalance(struct file *file)
51{
52 static DEFINE_SPINLOCK(ima_imbalance_lock);
53 static HLIST_HEAD(ima_imbalance_list);
54
55 struct super_block *sb = file->f_dentry->d_sb;
56 struct ima_imbalance *entry;
57 struct hlist_node *node;
58 bool found = false;
59
60 rcu_read_lock();
61 hlist_for_each_entry_rcu(entry, node, &ima_imbalance_list, node) {
62 if (entry->fsmagic == sb->s_magic) {
63 found = true;
64 break;
65 }
66 }
67 rcu_read_unlock();
68 if (found)
69 goto out;
70
71 entry = kmalloc(sizeof(*entry), GFP_NOFS);
72 if (!entry)
73 goto out;
74 entry->fsmagic = sb->s_magic;
75 spin_lock(&ima_imbalance_lock);
76 /*
77 * we could have raced and something else might have added this fs
78 * to the list, but we don't really care
79 */
80 hlist_add_head_rcu(&entry->node, &ima_imbalance_list);
81 spin_unlock(&ima_imbalance_lock);
82 printk(KERN_INFO "IMA: unmeasured files on fsmagic: %lX\n",
83 entry->fsmagic);
84out:
85 return found;
86}
87
88/* 39/*
89 * ima_counts_get - increment file counts 40 * ima_rdwr_violation_check
90 * 41 *
91 * Maintain read/write counters for all files, but only 42 * Only invalidate the PCR for measured files:
92 * invalidate the PCR for measured files:
93 * - Opening a file for write when already open for read, 43 * - Opening a file for write when already open for read,
94 * results in a time of measure, time of use (ToMToU) error. 44 * results in a time of measure, time of use (ToMToU) error.
95 * - Opening a file for read when already open for write, 45 * - Opening a file for read when already open for write,
96 * could result in a file measurement error. 46 * could result in a file measurement error.
97 * 47 *
98 */ 48 */
99void ima_counts_get(struct file *file) 49static void ima_rdwr_violation_check(struct file *file)
100{ 50{
101 struct dentry *dentry = file->f_path.dentry; 51 struct dentry *dentry = file->f_path.dentry;
102 struct inode *inode = dentry->d_inode; 52 struct inode *inode = dentry->d_inode;
@@ -104,32 +54,25 @@ void ima_counts_get(struct file *file)
104 int rc; 54 int rc;
105 bool send_tomtou = false, send_writers = false; 55 bool send_tomtou = false, send_writers = false;
106 56
107 if (!S_ISREG(inode->i_mode)) 57 if (!S_ISREG(inode->i_mode) || !ima_initialized)
108 return; 58 return;
109 59
110 spin_lock(&inode->i_lock); 60 mutex_lock(&inode->i_mutex); /* file metadata: permissions, xattr */
111
112 if (!ima_initialized)
113 goto out;
114 61
115 if (mode & FMODE_WRITE) { 62 if (mode & FMODE_WRITE) {
116 if (inode->i_readcount && IS_IMA(inode)) 63 if (atomic_read(&inode->i_readcount) && IS_IMA(inode))
117 send_tomtou = true; 64 send_tomtou = true;
118 goto out; 65 goto out;
119 } 66 }
120 67
121 rc = ima_must_measure(NULL, inode, MAY_READ, FILE_CHECK); 68 rc = ima_must_measure(inode, MAY_READ, FILE_CHECK);
122 if (rc < 0) 69 if (rc < 0)
123 goto out; 70 goto out;
124 71
125 if (atomic_read(&inode->i_writecount) > 0) 72 if (atomic_read(&inode->i_writecount) > 0)
126 send_writers = true; 73 send_writers = true;
127out: 74out:
128 /* remember the vfs deals with i_writecount */ 75 mutex_unlock(&inode->i_mutex);
129 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
130 inode->i_readcount++;
131
132 spin_unlock(&inode->i_lock);
133 76
134 if (send_tomtou) 77 if (send_tomtou)
135 ima_add_violation(inode, dentry->d_name.name, "invalid_pcr", 78 ima_add_violation(inode, dentry->d_name.name, "invalid_pcr",
@@ -139,71 +82,25 @@ out:
139 "open_writers"); 82 "open_writers");
140} 83}
141 84
142/*
143 * Decrement ima counts
144 */
145static void ima_dec_counts(struct inode *inode, struct file *file)
146{
147 mode_t mode = file->f_mode;
148
149 assert_spin_locked(&inode->i_lock);
150
151 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
152 if (unlikely(inode->i_readcount == 0)) {
153 if (!ima_limit_imbalance(file)) {
154 printk(KERN_INFO "%s: open/free imbalance (r:%u)\n",
155 __func__, inode->i_readcount);
156 dump_stack();
157 }
158 return;
159 }
160 inode->i_readcount--;
161 }
162}
163
164static void ima_check_last_writer(struct ima_iint_cache *iint, 85static void ima_check_last_writer(struct ima_iint_cache *iint,
165 struct inode *inode, 86 struct inode *inode,
166 struct file *file) 87 struct file *file)
167{ 88{
168 mode_t mode = file->f_mode; 89 mode_t mode = file->f_mode;
169 90
170 BUG_ON(!mutex_is_locked(&iint->mutex)); 91 mutex_lock(&iint->mutex);
171 assert_spin_locked(&inode->i_lock);
172
173 if (mode & FMODE_WRITE && 92 if (mode & FMODE_WRITE &&
174 atomic_read(&inode->i_writecount) == 1 && 93 atomic_read(&inode->i_writecount) == 1 &&
175 iint->version != inode->i_version) 94 iint->version != inode->i_version)
176 iint->flags &= ~IMA_MEASURED; 95 iint->flags &= ~IMA_MEASURED;
177}
178
179static void ima_file_free_iint(struct ima_iint_cache *iint, struct inode *inode,
180 struct file *file)
181{
182 mutex_lock(&iint->mutex);
183 spin_lock(&inode->i_lock);
184
185 ima_dec_counts(inode, file);
186 ima_check_last_writer(iint, inode, file);
187
188 spin_unlock(&inode->i_lock);
189 mutex_unlock(&iint->mutex); 96 mutex_unlock(&iint->mutex);
190} 97}
191 98
192static void ima_file_free_noiint(struct inode *inode, struct file *file)
193{
194 spin_lock(&inode->i_lock);
195
196 ima_dec_counts(inode, file);
197
198 spin_unlock(&inode->i_lock);
199}
200
201/** 99/**
202 * ima_file_free - called on __fput() 100 * ima_file_free - called on __fput()
203 * @file: pointer to file structure being freed 101 * @file: pointer to file structure being freed
204 * 102 *
205 * Flag files that changed, based on i_version; 103 * Flag files that changed, based on i_version
206 * and decrement the i_readcount.
207 */ 104 */
208void ima_file_free(struct file *file) 105void ima_file_free(struct file *file)
209{ 106{
@@ -214,12 +111,10 @@ void ima_file_free(struct file *file)
214 return; 111 return;
215 112
216 iint = ima_iint_find(inode); 113 iint = ima_iint_find(inode);
114 if (!iint)
115 return;
217 116
218 if (iint) 117 ima_check_last_writer(iint, inode, file);
219 ima_file_free_iint(iint, inode, file);
220 else
221 ima_file_free_noiint(inode, file);
222
223} 118}
224 119
225static int process_measurement(struct file *file, const unsigned char *filename, 120static int process_measurement(struct file *file, const unsigned char *filename,
@@ -232,7 +127,7 @@ static int process_measurement(struct file *file, const unsigned char *filename,
232 if (!ima_initialized || !S_ISREG(inode->i_mode)) 127 if (!ima_initialized || !S_ISREG(inode->i_mode))
233 return 0; 128 return 0;
234 129
235 rc = ima_must_measure(NULL, inode, mask, function); 130 rc = ima_must_measure(inode, mask, function);
236 if (rc != 0) 131 if (rc != 0)
237 return rc; 132 return rc;
238retry: 133retry:
@@ -246,7 +141,7 @@ retry:
246 141
247 mutex_lock(&iint->mutex); 142 mutex_lock(&iint->mutex);
248 143
249 rc = ima_must_measure(iint, inode, mask, function); 144 rc = iint->flags & IMA_MEASURED ? 1 : 0;
250 if (rc != 0) 145 if (rc != 0)
251 goto out; 146 goto out;
252 147
@@ -317,6 +212,7 @@ int ima_file_check(struct file *file, int mask)
317{ 212{
318 int rc; 213 int rc;
319 214
215 ima_rdwr_violation_check(file);
320 rc = process_measurement(file, file->f_dentry->d_name.name, 216 rc = process_measurement(file, file->f_dentry->d_name.name,
321 mask & (MAY_READ | MAY_WRITE | MAY_EXEC), 217 mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
322 FILE_CHECK); 218 FILE_CHECK);
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 07a5f35e3970..338b510e9027 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -12,9 +12,52 @@
12#include <linux/syscalls.h> 12#include <linux/syscalls.h>
13#include <linux/keyctl.h> 13#include <linux/keyctl.h>
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/slab.h>
15#include "internal.h" 16#include "internal.h"
16 17
17/* 18/*
19 * Instantiate a key with the specified compatibility multipart payload and
20 * link the key into the destination keyring if one is given.
21 *
22 * The caller must have the appropriate instantiation permit set for this to
23 * work (see keyctl_assume_authority). No other permissions are required.
24 *
25 * If successful, 0 will be returned.
26 */
27long compat_keyctl_instantiate_key_iov(
28 key_serial_t id,
29 const struct compat_iovec __user *_payload_iov,
30 unsigned ioc,
31 key_serial_t ringid)
32{
33 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
34 long ret;
35
36 if (_payload_iov == 0 || ioc == 0)
37 goto no_payload;
38
39 ret = compat_rw_copy_check_uvector(WRITE, _payload_iov, ioc,
40 ARRAY_SIZE(iovstack),
41 iovstack, &iov);
42 if (ret < 0)
43 return ret;
44 if (ret == 0)
45 goto no_payload_free;
46
47 ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
48
49 if (iov != iovstack)
50 kfree(iov);
51 return ret;
52
53no_payload_free:
54 if (iov != iovstack)
55 kfree(iov);
56no_payload:
57 return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
58}
59
60/*
18 * The key control system call, 32-bit compatibility version for 64-bit archs 61 * The key control system call, 32-bit compatibility version for 64-bit archs
19 * 62 *
20 * This should only be called if the 64-bit arch uses weird pointers in 32-bit 63 * This should only be called if the 64-bit arch uses weird pointers in 32-bit
@@ -85,6 +128,13 @@ asmlinkage long compat_sys_keyctl(u32 option,
85 case KEYCTL_SESSION_TO_PARENT: 128 case KEYCTL_SESSION_TO_PARENT:
86 return keyctl_session_to_parent(); 129 return keyctl_session_to_parent();
87 130
131 case KEYCTL_REJECT:
132 return keyctl_reject_key(arg2, arg3, arg4, arg5);
133
134 case KEYCTL_INSTANTIATE_IOV:
135 return compat_keyctl_instantiate_key_iov(
136 arg2, compat_ptr(arg3), arg4, arg5);
137
88 default: 138 default:
89 return -EOPNOTSUPP; 139 return -EOPNOTSUPP;
90 } 140 }
diff --git a/security/keys/encrypted.c b/security/keys/encrypted.c
index 9e7e4ce3fae8..69907a58a683 100644
--- a/security/keys/encrypted.c
+++ b/security/keys/encrypted.c
@@ -765,8 +765,7 @@ static long encrypted_read(const struct key *key, char __user *buffer,
765 size_t asciiblob_len; 765 size_t asciiblob_len;
766 int ret; 766 int ret;
767 767
768 epayload = rcu_dereference_protected(key->payload.data, 768 epayload = rcu_dereference_key(key);
769 rwsem_is_locked(&((struct key *)key)->sem));
770 769
771 /* returns the hex encoded iv, encrypted-data, and hmac as ascii */ 770 /* returns the hex encoded iv, encrypted-data, and hmac as ascii */
772 asciiblob_len = epayload->datablob_len + ivsize + 1 771 asciiblob_len = epayload->datablob_len + ivsize + 1
diff --git a/security/keys/internal.h b/security/keys/internal.h
index a52aa7c88b41..07a025f81902 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -214,6 +214,14 @@ extern long keyctl_assume_authority(key_serial_t);
214extern long keyctl_get_security(key_serial_t keyid, char __user *buffer, 214extern long keyctl_get_security(key_serial_t keyid, char __user *buffer,
215 size_t buflen); 215 size_t buflen);
216extern long keyctl_session_to_parent(void); 216extern long keyctl_session_to_parent(void);
217extern long keyctl_reject_key(key_serial_t, unsigned, unsigned, key_serial_t);
218extern long keyctl_instantiate_key_iov(key_serial_t,
219 const struct iovec __user *,
220 unsigned, key_serial_t);
221
222extern long keyctl_instantiate_key_common(key_serial_t,
223 const struct iovec __user *,
224 unsigned, size_t, key_serial_t);
217 225
218/* 226/*
219 * Debugging key validation 227 * Debugging key validation
diff --git a/security/keys/key.c b/security/keys/key.c
index 1c2d43dc5107..f7f9d93f08d9 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -249,6 +249,14 @@ struct key *key_alloc(struct key_type *type, const char *desc,
249 if (!desc || !*desc) 249 if (!desc || !*desc)
250 goto error; 250 goto error;
251 251
252 if (type->vet_description) {
253 ret = type->vet_description(desc);
254 if (ret < 0) {
255 key = ERR_PTR(ret);
256 goto error;
257 }
258 }
259
252 desclen = strlen(desc) + 1; 260 desclen = strlen(desc) + 1;
253 quotalen = desclen + type->def_datalen; 261 quotalen = desclen + type->def_datalen;
254 262
@@ -503,26 +511,29 @@ int key_instantiate_and_link(struct key *key,
503EXPORT_SYMBOL(key_instantiate_and_link); 511EXPORT_SYMBOL(key_instantiate_and_link);
504 512
505/** 513/**
506 * key_negate_and_link - Negatively instantiate a key and link it into the keyring. 514 * key_reject_and_link - Negatively instantiate a key and link it into the keyring.
507 * @key: The key to instantiate. 515 * @key: The key to instantiate.
508 * @timeout: The timeout on the negative key. 516 * @timeout: The timeout on the negative key.
517 * @error: The error to return when the key is hit.
509 * @keyring: Keyring to create a link in on success (or NULL). 518 * @keyring: Keyring to create a link in on success (or NULL).
510 * @authkey: The authorisation token permitting instantiation. 519 * @authkey: The authorisation token permitting instantiation.
511 * 520 *
512 * Negatively instantiate a key that's in the uninstantiated state and, if 521 * Negatively instantiate a key that's in the uninstantiated state and, if
513 * successful, set its timeout and link it in to the destination keyring if one 522 * successful, set its timeout and stored error and link it in to the
514 * is supplied. The key and any links to the key will be automatically garbage 523 * destination keyring if one is supplied. The key and any links to the key
515 * collected after the timeout expires. 524 * will be automatically garbage collected after the timeout expires.
516 * 525 *
517 * Negative keys are used to rate limit repeated request_key() calls by causing 526 * Negative keys are used to rate limit repeated request_key() calls by causing
518 * them to return -ENOKEY until the negative key expires. 527 * them to return the stored error code (typically ENOKEY) until the negative
528 * key expires.
519 * 529 *
520 * If successful, 0 is returned, the authorisation token is revoked and anyone 530 * If successful, 0 is returned, the authorisation token is revoked and anyone
521 * waiting for the key is woken up. If the key was already instantiated, 531 * waiting for the key is woken up. If the key was already instantiated,
522 * -EBUSY will be returned. 532 * -EBUSY will be returned.
523 */ 533 */
524int key_negate_and_link(struct key *key, 534int key_reject_and_link(struct key *key,
525 unsigned timeout, 535 unsigned timeout,
536 unsigned error,
526 struct key *keyring, 537 struct key *keyring,
527 struct key *authkey) 538 struct key *authkey)
528{ 539{
@@ -548,6 +559,7 @@ int key_negate_and_link(struct key *key,
548 atomic_inc(&key->user->nikeys); 559 atomic_inc(&key->user->nikeys);
549 set_bit(KEY_FLAG_NEGATIVE, &key->flags); 560 set_bit(KEY_FLAG_NEGATIVE, &key->flags);
550 set_bit(KEY_FLAG_INSTANTIATED, &key->flags); 561 set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
562 key->type_data.reject_error = -error;
551 now = current_kernel_time(); 563 now = current_kernel_time();
552 key->expiry = now.tv_sec + timeout; 564 key->expiry = now.tv_sec + timeout;
553 key_schedule_gc(key->expiry + key_gc_delay); 565 key_schedule_gc(key->expiry + key_gc_delay);
@@ -577,8 +589,7 @@ int key_negate_and_link(struct key *key,
577 589
578 return ret == 0 ? link_ret : ret; 590 return ret == 0 ? link_ret : ret;
579} 591}
580 592EXPORT_SYMBOL(key_reject_and_link);
581EXPORT_SYMBOL(key_negate_and_link);
582 593
583/* 594/*
584 * Garbage collect keys in process context so that we don't have to disable 595 * Garbage collect keys in process context so that we don't have to disable
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 31a0fd8189f1..427fddcaeb19 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -913,6 +913,21 @@ static int keyctl_change_reqkey_auth(struct key *key)
913} 913}
914 914
915/* 915/*
916 * Copy the iovec data from userspace
917 */
918static long copy_from_user_iovec(void *buffer, const struct iovec *iov,
919 unsigned ioc)
920{
921 for (; ioc > 0; ioc--) {
922 if (copy_from_user(buffer, iov->iov_base, iov->iov_len) != 0)
923 return -EFAULT;
924 buffer += iov->iov_len;
925 iov++;
926 }
927 return 0;
928}
929
930/*
916 * Instantiate a key with the specified payload and link the key into the 931 * Instantiate a key with the specified payload and link the key into the
917 * destination keyring if one is given. 932 * destination keyring if one is given.
918 * 933 *
@@ -921,10 +936,11 @@ static int keyctl_change_reqkey_auth(struct key *key)
921 * 936 *
922 * If successful, 0 will be returned. 937 * If successful, 0 will be returned.
923 */ 938 */
924long keyctl_instantiate_key(key_serial_t id, 939long keyctl_instantiate_key_common(key_serial_t id,
925 const void __user *_payload, 940 const struct iovec *payload_iov,
926 size_t plen, 941 unsigned ioc,
927 key_serial_t ringid) 942 size_t plen,
943 key_serial_t ringid)
928{ 944{
929 const struct cred *cred = current_cred(); 945 const struct cred *cred = current_cred();
930 struct request_key_auth *rka; 946 struct request_key_auth *rka;
@@ -953,7 +969,7 @@ long keyctl_instantiate_key(key_serial_t id,
953 /* pull the payload in if one was supplied */ 969 /* pull the payload in if one was supplied */
954 payload = NULL; 970 payload = NULL;
955 971
956 if (_payload) { 972 if (payload_iov) {
957 ret = -ENOMEM; 973 ret = -ENOMEM;
958 payload = kmalloc(plen, GFP_KERNEL); 974 payload = kmalloc(plen, GFP_KERNEL);
959 if (!payload) { 975 if (!payload) {
@@ -965,8 +981,8 @@ long keyctl_instantiate_key(key_serial_t id,
965 goto error; 981 goto error;
966 } 982 }
967 983
968 ret = -EFAULT; 984 ret = copy_from_user_iovec(payload, payload_iov, ioc);
969 if (copy_from_user(payload, _payload, plen) != 0) 985 if (ret < 0)
970 goto error2; 986 goto error2;
971 } 987 }
972 988
@@ -997,6 +1013,72 @@ error:
997} 1013}
998 1014
999/* 1015/*
1016 * Instantiate a key with the specified payload and link the key into the
1017 * destination keyring if one is given.
1018 *
1019 * The caller must have the appropriate instantiation permit set for this to
1020 * work (see keyctl_assume_authority). No other permissions are required.
1021 *
1022 * If successful, 0 will be returned.
1023 */
1024long keyctl_instantiate_key(key_serial_t id,
1025 const void __user *_payload,
1026 size_t plen,
1027 key_serial_t ringid)
1028{
1029 if (_payload && plen) {
1030 struct iovec iov[1] = {
1031 [0].iov_base = (void __user *)_payload,
1032 [0].iov_len = plen
1033 };
1034
1035 return keyctl_instantiate_key_common(id, iov, 1, plen, ringid);
1036 }
1037
1038 return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
1039}
1040
1041/*
1042 * Instantiate a key with the specified multipart payload and link the key into
1043 * the destination keyring if one is given.
1044 *
1045 * The caller must have the appropriate instantiation permit set for this to
1046 * work (see keyctl_assume_authority). No other permissions are required.
1047 *
1048 * If successful, 0 will be returned.
1049 */
1050long keyctl_instantiate_key_iov(key_serial_t id,
1051 const struct iovec __user *_payload_iov,
1052 unsigned ioc,
1053 key_serial_t ringid)
1054{
1055 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1056 long ret;
1057
1058 if (_payload_iov == 0 || ioc == 0)
1059 goto no_payload;
1060
1061 ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc,
1062 ARRAY_SIZE(iovstack), iovstack, &iov);
1063 if (ret < 0)
1064 return ret;
1065 if (ret == 0)
1066 goto no_payload_free;
1067
1068 ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
1069
1070 if (iov != iovstack)
1071 kfree(iov);
1072 return ret;
1073
1074no_payload_free:
1075 if (iov != iovstack)
1076 kfree(iov);
1077no_payload:
1078 return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
1079}
1080
1081/*
1000 * Negatively instantiate the key with the given timeout (in seconds) and link 1082 * Negatively instantiate the key with the given timeout (in seconds) and link
1001 * the key into the destination keyring if one is given. 1083 * the key into the destination keyring if one is given.
1002 * 1084 *
@@ -1013,12 +1095,42 @@ error:
1013 */ 1095 */
1014long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) 1096long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
1015{ 1097{
1098 return keyctl_reject_key(id, timeout, ENOKEY, ringid);
1099}
1100
1101/*
1102 * Negatively instantiate the key with the given timeout (in seconds) and error
1103 * code and link the key into the destination keyring if one is given.
1104 *
1105 * The caller must have the appropriate instantiation permit set for this to
1106 * work (see keyctl_assume_authority). No other permissions are required.
1107 *
1108 * The key and any links to the key will be automatically garbage collected
1109 * after the timeout expires.
1110 *
1111 * Negative keys are used to rate limit repeated request_key() calls by causing
1112 * them to return the specified error code until the negative key expires.
1113 *
1114 * If successful, 0 will be returned.
1115 */
1116long keyctl_reject_key(key_serial_t id, unsigned timeout, unsigned error,
1117 key_serial_t ringid)
1118{
1016 const struct cred *cred = current_cred(); 1119 const struct cred *cred = current_cred();
1017 struct request_key_auth *rka; 1120 struct request_key_auth *rka;
1018 struct key *instkey, *dest_keyring; 1121 struct key *instkey, *dest_keyring;
1019 long ret; 1122 long ret;
1020 1123
1021 kenter("%d,%u,%d", id, timeout, ringid); 1124 kenter("%d,%u,%u,%d", id, timeout, error, ringid);
1125
1126 /* must be a valid error code and mustn't be a kernel special */
1127 if (error <= 0 ||
1128 error >= MAX_ERRNO ||
1129 error == ERESTARTSYS ||
1130 error == ERESTARTNOINTR ||
1131 error == ERESTARTNOHAND ||
1132 error == ERESTART_RESTARTBLOCK)
1133 return -EINVAL;
1022 1134
1023 /* the appropriate instantiation authorisation key must have been 1135 /* the appropriate instantiation authorisation key must have been
1024 * assumed before calling this */ 1136 * assumed before calling this */
@@ -1038,7 +1150,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
1038 goto error; 1150 goto error;
1039 1151
1040 /* instantiate the key and link it into a keyring */ 1152 /* instantiate the key and link it into a keyring */
1041 ret = key_negate_and_link(rka->target_key, timeout, 1153 ret = key_reject_and_link(rka->target_key, timeout, error,
1042 dest_keyring, instkey); 1154 dest_keyring, instkey);
1043 1155
1044 key_put(dest_keyring); 1156 key_put(dest_keyring);
@@ -1492,6 +1604,19 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
1492 case KEYCTL_SESSION_TO_PARENT: 1604 case KEYCTL_SESSION_TO_PARENT:
1493 return keyctl_session_to_parent(); 1605 return keyctl_session_to_parent();
1494 1606
1607 case KEYCTL_REJECT:
1608 return keyctl_reject_key((key_serial_t) arg2,
1609 (unsigned) arg3,
1610 (unsigned) arg4,
1611 (key_serial_t) arg5);
1612
1613 case KEYCTL_INSTANTIATE_IOV:
1614 return keyctl_instantiate_key_iov(
1615 (key_serial_t) arg2,
1616 (const struct iovec __user *) arg3,
1617 (unsigned) arg4,
1618 (key_serial_t) arg5);
1619
1495 default: 1620 default:
1496 return -EOPNOTSUPP; 1621 return -EOPNOTSUPP;
1497 } 1622 }
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 5620f084dede..cdd2f3f88c88 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -352,7 +352,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
352 goto error_2; 352 goto error_2;
353 if (key->expiry && now.tv_sec >= key->expiry) 353 if (key->expiry && now.tv_sec >= key->expiry)
354 goto error_2; 354 goto error_2;
355 key_ref = ERR_PTR(-ENOKEY); 355 key_ref = ERR_PTR(key->type_data.reject_error);
356 if (kflags & (1 << KEY_FLAG_NEGATIVE)) 356 if (kflags & (1 << KEY_FLAG_NEGATIVE))
357 goto error_2; 357 goto error_2;
358 goto found; 358 goto found;
@@ -401,7 +401,7 @@ descend:
401 401
402 /* we set a different error code if we pass a negative key */ 402 /* we set a different error code if we pass a negative key */
403 if (kflags & (1 << KEY_FLAG_NEGATIVE)) { 403 if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
404 err = -ENOKEY; 404 err = key->type_data.reject_error;
405 continue; 405 continue;
406 } 406 }
407 407
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index a3dc0d460def..df3c0417ee40 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -585,7 +585,7 @@ int wait_for_key_construction(struct key *key, bool intr)
585 if (ret < 0) 585 if (ret < 0)
586 return ret; 586 return ret;
587 if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) 587 if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
588 return -ENOKEY; 588 return key->type_data.reject_error;
589 return key_validate(key); 589 return key_validate(key);
590} 590}
591EXPORT_SYMBOL(wait_for_key_construction); 591EXPORT_SYMBOL(wait_for_key_construction);
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 83fc92e297cd..c99b9368368c 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1076,8 +1076,7 @@ static long trusted_read(const struct key *key, char __user *buffer,
1076 char *bufp; 1076 char *bufp;
1077 int i; 1077 int i;
1078 1078
1079 p = rcu_dereference_protected(key->payload.data, 1079 p = rcu_dereference_key(key);
1080 rwsem_is_locked(&((struct key *)key)->sem));
1081 if (!p) 1080 if (!p)
1082 return -EINVAL; 1081 return -EINVAL;
1083 if (!buffer || buflen <= 0) 1082 if (!buffer || buflen <= 0)
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 02807fb16340..c6ca8662a468 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -184,8 +184,7 @@ long user_read(const struct key *key, char __user *buffer, size_t buflen)
184 struct user_key_payload *upayload; 184 struct user_key_payload *upayload;
185 long ret; 185 long ret;
186 186
187 upayload = rcu_dereference_protected( 187 upayload = rcu_dereference_key(key);
188 key->payload.data, rwsem_is_locked(&((struct key *)key)->sem));
189 ret = upayload->datalen; 188 ret = upayload->datalen;
190 189
191 /* we can return the data as is */ 190 /* we can return the data as is */
diff --git a/security/security.c b/security/security.c
index 7b7308ace8c5..bab9b23c3ff4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -181,11 +181,6 @@ int security_real_capable_noaudit(struct task_struct *tsk, int cap)
181 return ret; 181 return ret;
182} 182}
183 183
184int security_sysctl(struct ctl_table *table, int op)
185{
186 return security_ops->sysctl(table, op);
187}
188
189int security_quotactl(int cmds, int type, int id, struct super_block *sb) 184int security_quotactl(int cmds, int type, int id, struct super_block *sb)
190{ 185{
191 return security_ops->quotactl(cmds, type, id, sb); 186 return security_ops->quotactl(cmds, type, id, sb);
@@ -201,7 +196,7 @@ int security_syslog(int type)
201 return security_ops->syslog(type); 196 return security_ops->syslog(type);
202} 197}
203 198
204int security_settime(struct timespec *ts, struct timezone *tz) 199int security_settime(const struct timespec *ts, const struct timezone *tz)
205{ 200{
206 return security_ops->settime(ts, tz); 201 return security_ops->settime(ts, tz);
207} 202}
@@ -271,6 +266,11 @@ int security_sb_copy_data(char *orig, char *copy)
271} 266}
272EXPORT_SYMBOL(security_sb_copy_data); 267EXPORT_SYMBOL(security_sb_copy_data);
273 268
269int security_sb_remount(struct super_block *sb, void *data)
270{
271 return security_ops->sb_remount(sb, data);
272}
273
274int security_sb_kern_mount(struct super_block *sb, int flags, void *data) 274int security_sb_kern_mount(struct super_block *sb, int flags, void *data)
275{ 275{
276 return security_ops->sb_kern_mount(sb, flags, data); 276 return security_ops->sb_kern_mount(sb, flags, data);
@@ -335,11 +335,13 @@ void security_inode_free(struct inode *inode)
335} 335}
336 336
337int security_inode_init_security(struct inode *inode, struct inode *dir, 337int security_inode_init_security(struct inode *inode, struct inode *dir,
338 char **name, void **value, size_t *len) 338 const struct qstr *qstr, char **name,
339 void **value, size_t *len)
339{ 340{
340 if (unlikely(IS_PRIVATE(inode))) 341 if (unlikely(IS_PRIVATE(inode)))
341 return -EOPNOTSUPP; 342 return -EOPNOTSUPP;
342 return security_ops->inode_init_security(inode, dir, name, value, len); 343 return security_ops->inode_init_security(inode, dir, qstr, name, value,
344 len);
343} 345}
344EXPORT_SYMBOL(security_inode_init_security); 346EXPORT_SYMBOL(security_inode_init_security);
345 347
@@ -359,6 +361,7 @@ int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode)
359 return 0; 361 return 0;
360 return security_ops->path_mkdir(dir, dentry, mode); 362 return security_ops->path_mkdir(dir, dentry, mode);
361} 363}
364EXPORT_SYMBOL(security_path_mkdir);
362 365
363int security_path_rmdir(struct path *dir, struct dentry *dentry) 366int security_path_rmdir(struct path *dir, struct dentry *dentry)
364{ 367{
@@ -373,6 +376,7 @@ int security_path_unlink(struct path *dir, struct dentry *dentry)
373 return 0; 376 return 0;
374 return security_ops->path_unlink(dir, dentry); 377 return security_ops->path_unlink(dir, dentry);
375} 378}
379EXPORT_SYMBOL(security_path_unlink);
376 380
377int security_path_symlink(struct path *dir, struct dentry *dentry, 381int security_path_symlink(struct path *dir, struct dentry *dentry,
378 const char *old_name) 382 const char *old_name)
@@ -399,6 +403,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
399 return security_ops->path_rename(old_dir, old_dentry, new_dir, 403 return security_ops->path_rename(old_dir, old_dentry, new_dir,
400 new_dentry); 404 new_dentry);
401} 405}
406EXPORT_SYMBOL(security_path_rename);
402 407
403int security_path_truncate(struct path *path) 408int security_path_truncate(struct path *path)
404{ 409{
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c8d699270687..d52a92507412 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -24,9 +24,11 @@
24 */ 24 */
25 25
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/kd.h>
27#include <linux/kernel.h> 28#include <linux/kernel.h>
28#include <linux/tracehook.h> 29#include <linux/tracehook.h>
29#include <linux/errno.h> 30#include <linux/errno.h>
31#include <linux/ext2_fs.h>
30#include <linux/sched.h> 32#include <linux/sched.h>
31#include <linux/security.h> 33#include <linux/security.h>
32#include <linux/xattr.h> 34#include <linux/xattr.h>
@@ -36,14 +38,15 @@
36#include <linux/mman.h> 38#include <linux/mman.h>
37#include <linux/slab.h> 39#include <linux/slab.h>
38#include <linux/pagemap.h> 40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
39#include <linux/swap.h> 42#include <linux/swap.h>
40#include <linux/spinlock.h> 43#include <linux/spinlock.h>
41#include <linux/syscalls.h> 44#include <linux/syscalls.h>
45#include <linux/dcache.h>
42#include <linux/file.h> 46#include <linux/file.h>
43#include <linux/fdtable.h> 47#include <linux/fdtable.h>
44#include <linux/namei.h> 48#include <linux/namei.h>
45#include <linux/mount.h> 49#include <linux/mount.h>
46#include <linux/proc_fs.h>
47#include <linux/netfilter_ipv4.h> 50#include <linux/netfilter_ipv4.h>
48#include <linux/netfilter_ipv6.h> 51#include <linux/netfilter_ipv6.h>
49#include <linux/tty.h> 52#include <linux/tty.h>
@@ -70,7 +73,6 @@
70#include <net/ipv6.h> 73#include <net/ipv6.h>
71#include <linux/hugetlb.h> 74#include <linux/hugetlb.h>
72#include <linux/personality.h> 75#include <linux/personality.h>
73#include <linux/sysctl.h>
74#include <linux/audit.h> 76#include <linux/audit.h>
75#include <linux/string.h> 77#include <linux/string.h>
76#include <linux/selinux.h> 78#include <linux/selinux.h>
@@ -1120,39 +1122,35 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
1120} 1122}
1121 1123
1122#ifdef CONFIG_PROC_FS 1124#ifdef CONFIG_PROC_FS
1123static int selinux_proc_get_sid(struct proc_dir_entry *de, 1125static int selinux_proc_get_sid(struct dentry *dentry,
1124 u16 tclass, 1126 u16 tclass,
1125 u32 *sid) 1127 u32 *sid)
1126{ 1128{
1127 int buflen, rc; 1129 int rc;
1128 char *buffer, *path, *end; 1130 char *buffer, *path;
1129 1131
1130 buffer = (char *)__get_free_page(GFP_KERNEL); 1132 buffer = (char *)__get_free_page(GFP_KERNEL);
1131 if (!buffer) 1133 if (!buffer)
1132 return -ENOMEM; 1134 return -ENOMEM;
1133 1135
1134 buflen = PAGE_SIZE; 1136 path = dentry_path_raw(dentry, buffer, PAGE_SIZE);
1135 end = buffer+buflen; 1137 if (IS_ERR(path))
1136 *--end = '\0'; 1138 rc = PTR_ERR(path);
1137 buflen--; 1139 else {
1138 path = end-1; 1140 /* each process gets a /proc/PID/ entry. Strip off the
1139 *path = '/'; 1141 * PID part to get a valid selinux labeling.
1140 while (de && de != de->parent) { 1142 * e.g. /proc/1/net/rpc/nfs -> /net/rpc/nfs */
1141 buflen -= de->namelen + 1; 1143 while (path[1] >= '0' && path[1] <= '9') {
1142 if (buflen < 0) 1144 path[1] = '/';
1143 break; 1145 path++;
1144 end -= de->namelen; 1146 }
1145 memcpy(end, de->name, de->namelen); 1147 rc = security_genfs_sid("proc", path, tclass, sid);
1146 *--end = '/';
1147 path = end;
1148 de = de->parent;
1149 } 1148 }
1150 rc = security_genfs_sid("proc", path, tclass, sid);
1151 free_page((unsigned long)buffer); 1149 free_page((unsigned long)buffer);
1152 return rc; 1150 return rc;
1153} 1151}
1154#else 1152#else
1155static int selinux_proc_get_sid(struct proc_dir_entry *de, 1153static int selinux_proc_get_sid(struct dentry *dentry,
1156 u16 tclass, 1154 u16 tclass,
1157 u32 *sid) 1155 u32 *sid)
1158{ 1156{
@@ -1300,10 +1298,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
1300 1298
1301 /* Try to obtain a transition SID. */ 1299 /* Try to obtain a transition SID. */
1302 isec->sclass = inode_mode_to_security_class(inode->i_mode); 1300 isec->sclass = inode_mode_to_security_class(inode->i_mode);
1303 rc = security_transition_sid(isec->task_sid, 1301 rc = security_transition_sid(isec->task_sid, sbsec->sid,
1304 sbsec->sid, 1302 isec->sclass, NULL, &sid);
1305 isec->sclass,
1306 &sid);
1307 if (rc) 1303 if (rc)
1308 goto out_unlock; 1304 goto out_unlock;
1309 isec->sid = sid; 1305 isec->sid = sid;
@@ -1316,10 +1312,9 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
1316 isec->sid = sbsec->sid; 1312 isec->sid = sbsec->sid;
1317 1313
1318 if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) { 1314 if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) {
1319 struct proc_inode *proci = PROC_I(inode); 1315 if (opt_dentry) {
1320 if (proci->pde) {
1321 isec->sclass = inode_mode_to_security_class(inode->i_mode); 1316 isec->sclass = inode_mode_to_security_class(inode->i_mode);
1322 rc = selinux_proc_get_sid(proci->pde, 1317 rc = selinux_proc_get_sid(opt_dentry,
1323 isec->sclass, 1318 isec->sclass,
1324 &sid); 1319 &sid);
1325 if (rc) 1320 if (rc)
@@ -1578,7 +1573,7 @@ static int may_create(struct inode *dir,
1578 return rc; 1573 return rc;
1579 1574
1580 if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) { 1575 if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
1581 rc = security_transition_sid(sid, dsec->sid, tclass, &newsid); 1576 rc = security_transition_sid(sid, dsec->sid, tclass, NULL, &newsid);
1582 if (rc) 1577 if (rc)
1583 return rc; 1578 return rc;
1584 } 1579 }
@@ -1862,82 +1857,6 @@ static int selinux_capable(struct task_struct *tsk, const struct cred *cred,
1862 return task_has_capability(tsk, cred, cap, audit); 1857 return task_has_capability(tsk, cred, cap, audit);
1863} 1858}
1864 1859
1865static int selinux_sysctl_get_sid(ctl_table *table, u16 tclass, u32 *sid)
1866{
1867 int buflen, rc;
1868 char *buffer, *path, *end;
1869
1870 rc = -ENOMEM;
1871 buffer = (char *)__get_free_page(GFP_KERNEL);
1872 if (!buffer)
1873 goto out;
1874
1875 buflen = PAGE_SIZE;
1876 end = buffer+buflen;
1877 *--end = '\0';
1878 buflen--;
1879 path = end-1;
1880 *path = '/';
1881 while (table) {
1882 const char *name = table->procname;
1883 size_t namelen = strlen(name);
1884 buflen -= namelen + 1;
1885 if (buflen < 0)
1886 goto out_free;
1887 end -= namelen;
1888 memcpy(end, name, namelen);
1889 *--end = '/';
1890 path = end;
1891 table = table->parent;
1892 }
1893 buflen -= 4;
1894 if (buflen < 0)
1895 goto out_free;
1896 end -= 4;
1897 memcpy(end, "/sys", 4);
1898 path = end;
1899 rc = security_genfs_sid("proc", path, tclass, sid);
1900out_free:
1901 free_page((unsigned long)buffer);
1902out:
1903 return rc;
1904}
1905
1906static int selinux_sysctl(ctl_table *table, int op)
1907{
1908 int error = 0;
1909 u32 av;
1910 u32 tsid, sid;
1911 int rc;
1912
1913 sid = current_sid();
1914
1915 rc = selinux_sysctl_get_sid(table, (op == 0001) ?
1916 SECCLASS_DIR : SECCLASS_FILE, &tsid);
1917 if (rc) {
1918 /* Default to the well-defined sysctl SID. */
1919 tsid = SECINITSID_SYSCTL;
1920 }
1921
1922 /* The op values are "defined" in sysctl.c, thereby creating
1923 * a bad coupling between this module and sysctl.c */
1924 if (op == 001) {
1925 error = avc_has_perm(sid, tsid,
1926 SECCLASS_DIR, DIR__SEARCH, NULL);
1927 } else {
1928 av = 0;
1929 if (op & 004)
1930 av |= FILE__READ;
1931 if (op & 002)
1932 av |= FILE__WRITE;
1933 if (av)
1934 error = avc_has_perm(sid, tsid,
1935 SECCLASS_FILE, av, NULL);
1936 }
1937
1938 return error;
1939}
1940
1941static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb) 1860static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb)
1942{ 1861{
1943 const struct cred *cred = current_cred(); 1862 const struct cred *cred = current_cred();
@@ -2060,7 +1979,8 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
2060 } else { 1979 } else {
2061 /* Check for a default transition on this program. */ 1980 /* Check for a default transition on this program. */
2062 rc = security_transition_sid(old_tsec->sid, isec->sid, 1981 rc = security_transition_sid(old_tsec->sid, isec->sid,
2063 SECCLASS_PROCESS, &new_tsec->sid); 1982 SECCLASS_PROCESS, NULL,
1983 &new_tsec->sid);
2064 if (rc) 1984 if (rc)
2065 return rc; 1985 return rc;
2066 } 1986 }
@@ -2443,6 +2363,91 @@ out:
2443 return rc; 2363 return rc;
2444} 2364}
2445 2365
2366static int selinux_sb_remount(struct super_block *sb, void *data)
2367{
2368 int rc, i, *flags;
2369 struct security_mnt_opts opts;
2370 char *secdata, **mount_options;
2371 struct superblock_security_struct *sbsec = sb->s_security;
2372
2373 if (!(sbsec->flags & SE_SBINITIALIZED))
2374 return 0;
2375
2376 if (!data)
2377 return 0;
2378
2379 if (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)
2380 return 0;
2381
2382 security_init_mnt_opts(&opts);
2383 secdata = alloc_secdata();
2384 if (!secdata)
2385 return -ENOMEM;
2386 rc = selinux_sb_copy_data(data, secdata);
2387 if (rc)
2388 goto out_free_secdata;
2389
2390 rc = selinux_parse_opts_str(secdata, &opts);
2391 if (rc)
2392 goto out_free_secdata;
2393
2394 mount_options = opts.mnt_opts;
2395 flags = opts.mnt_opts_flags;
2396
2397 for (i = 0; i < opts.num_mnt_opts; i++) {
2398 u32 sid;
2399 size_t len;
2400
2401 if (flags[i] == SE_SBLABELSUPP)
2402 continue;
2403 len = strlen(mount_options[i]);
2404 rc = security_context_to_sid(mount_options[i], len, &sid);
2405 if (rc) {
2406 printk(KERN_WARNING "SELinux: security_context_to_sid"
2407 "(%s) failed for (dev %s, type %s) errno=%d\n",
2408 mount_options[i], sb->s_id, sb->s_type->name, rc);
2409 goto out_free_opts;
2410 }
2411 rc = -EINVAL;
2412 switch (flags[i]) {
2413 case FSCONTEXT_MNT:
2414 if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid))
2415 goto out_bad_option;
2416 break;
2417 case CONTEXT_MNT:
2418 if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid))
2419 goto out_bad_option;
2420 break;
2421 case ROOTCONTEXT_MNT: {
2422 struct inode_security_struct *root_isec;
2423 root_isec = sb->s_root->d_inode->i_security;
2424
2425 if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
2426 goto out_bad_option;
2427 break;
2428 }
2429 case DEFCONTEXT_MNT:
2430 if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid))
2431 goto out_bad_option;
2432 break;
2433 default:
2434 goto out_free_opts;
2435 }
2436 }
2437
2438 rc = 0;
2439out_free_opts:
2440 security_free_mnt_opts(&opts);
2441out_free_secdata:
2442 free_secdata(secdata);
2443 return rc;
2444out_bad_option:
2445 printk(KERN_WARNING "SELinux: unable to change security options "
2446 "during remount (dev %s, type=%s)\n", sb->s_id,
2447 sb->s_type->name);
2448 goto out_free_opts;
2449}
2450
2446static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) 2451static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
2447{ 2452{
2448 const struct cred *cred = current_cred(); 2453 const struct cred *cred = current_cred();
@@ -2509,8 +2514,8 @@ static void selinux_inode_free_security(struct inode *inode)
2509} 2514}
2510 2515
2511static int selinux_inode_init_security(struct inode *inode, struct inode *dir, 2516static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
2512 char **name, void **value, 2517 const struct qstr *qstr, char **name,
2513 size_t *len) 2518 void **value, size_t *len)
2514{ 2519{
2515 const struct task_security_struct *tsec = current_security(); 2520 const struct task_security_struct *tsec = current_security();
2516 struct inode_security_struct *dsec; 2521 struct inode_security_struct *dsec;
@@ -2531,7 +2536,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
2531 else if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) { 2536 else if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
2532 rc = security_transition_sid(sid, dsec->sid, 2537 rc = security_transition_sid(sid, dsec->sid,
2533 inode_mode_to_security_class(inode->i_mode), 2538 inode_mode_to_security_class(inode->i_mode),
2534 &newsid); 2539 qstr, &newsid);
2535 if (rc) { 2540 if (rc) {
2536 printk(KERN_WARNING "%s: " 2541 printk(KERN_WARNING "%s: "
2537 "security_transition_sid failed, rc=%d (dev=%s " 2542 "security_transition_sid failed, rc=%d (dev=%s "
@@ -2932,16 +2937,47 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
2932 unsigned long arg) 2937 unsigned long arg)
2933{ 2938{
2934 const struct cred *cred = current_cred(); 2939 const struct cred *cred = current_cred();
2935 u32 av = 0; 2940 int error = 0;
2936 2941
2937 if (_IOC_DIR(cmd) & _IOC_WRITE) 2942 switch (cmd) {
2938 av |= FILE__WRITE; 2943 case FIONREAD:
2939 if (_IOC_DIR(cmd) & _IOC_READ) 2944 /* fall through */
2940 av |= FILE__READ; 2945 case FIBMAP:
2941 if (!av) 2946 /* fall through */
2942 av = FILE__IOCTL; 2947 case FIGETBSZ:
2948 /* fall through */
2949 case EXT2_IOC_GETFLAGS:
2950 /* fall through */
2951 case EXT2_IOC_GETVERSION:
2952 error = file_has_perm(cred, file, FILE__GETATTR);
2953 break;
2954
2955 case EXT2_IOC_SETFLAGS:
2956 /* fall through */
2957 case EXT2_IOC_SETVERSION:
2958 error = file_has_perm(cred, file, FILE__SETATTR);
2959 break;
2960
2961 /* sys_ioctl() checks */
2962 case FIONBIO:
2963 /* fall through */
2964 case FIOASYNC:
2965 error = file_has_perm(cred, file, 0);
2966 break;
2943 2967
2944 return file_has_perm(cred, file, av); 2968 case KDSKBENT:
2969 case KDSKBSENT:
2970 error = task_has_capability(current, cred, CAP_SYS_TTY_CONFIG,
2971 SECURITY_CAP_AUDIT);
2972 break;
2973
2974 /* default case assumes that the command will go
2975 * to the file's ioctl() function.
2976 */
2977 default:
2978 error = file_has_perm(cred, file, FILE__IOCTL);
2979 }
2980 return error;
2945} 2981}
2946 2982
2947static int default_noexec; 2983static int default_noexec;
@@ -3644,9 +3680,16 @@ static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid)
3644 3680
3645/* socket security operations */ 3681/* socket security operations */
3646 3682
3647static u32 socket_sockcreate_sid(const struct task_security_struct *tsec) 3683static int socket_sockcreate_sid(const struct task_security_struct *tsec,
3684 u16 secclass, u32 *socksid)
3648{ 3685{
3649 return tsec->sockcreate_sid ? : tsec->sid; 3686 if (tsec->sockcreate_sid > SECSID_NULL) {
3687 *socksid = tsec->sockcreate_sid;
3688 return 0;
3689 }
3690
3691 return security_transition_sid(tsec->sid, tsec->sid, secclass, NULL,
3692 socksid);
3650} 3693}
3651 3694
3652static int sock_has_perm(struct task_struct *task, struct sock *sk, u32 perms) 3695static int sock_has_perm(struct task_struct *task, struct sock *sk, u32 perms)
@@ -3670,12 +3713,16 @@ static int selinux_socket_create(int family, int type,
3670 const struct task_security_struct *tsec = current_security(); 3713 const struct task_security_struct *tsec = current_security();
3671 u32 newsid; 3714 u32 newsid;
3672 u16 secclass; 3715 u16 secclass;
3716 int rc;
3673 3717
3674 if (kern) 3718 if (kern)
3675 return 0; 3719 return 0;
3676 3720
3677 newsid = socket_sockcreate_sid(tsec);
3678 secclass = socket_type_to_security_class(family, type, protocol); 3721 secclass = socket_type_to_security_class(family, type, protocol);
3722 rc = socket_sockcreate_sid(tsec, secclass, &newsid);
3723 if (rc)
3724 return rc;
3725
3679 return avc_has_perm(tsec->sid, newsid, secclass, SOCKET__CREATE, NULL); 3726 return avc_has_perm(tsec->sid, newsid, secclass, SOCKET__CREATE, NULL);
3680} 3727}
3681 3728
@@ -3687,12 +3734,16 @@ static int selinux_socket_post_create(struct socket *sock, int family,
3687 struct sk_security_struct *sksec; 3734 struct sk_security_struct *sksec;
3688 int err = 0; 3735 int err = 0;
3689 3736
3737 isec->sclass = socket_type_to_security_class(family, type, protocol);
3738
3690 if (kern) 3739 if (kern)
3691 isec->sid = SECINITSID_KERNEL; 3740 isec->sid = SECINITSID_KERNEL;
3692 else 3741 else {
3693 isec->sid = socket_sockcreate_sid(tsec); 3742 err = socket_sockcreate_sid(tsec, isec->sclass, &(isec->sid));
3743 if (err)
3744 return err;
3745 }
3694 3746
3695 isec->sclass = socket_type_to_security_class(family, type, protocol);
3696 isec->initialized = 1; 3747 isec->initialized = 1;
3697 3748
3698 if (sock->sk) { 3749 if (sock->sk) {
@@ -4002,7 +4053,6 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
4002{ 4053{
4003 int err = 0; 4054 int err = 0;
4004 struct sk_security_struct *sksec = sk->sk_security; 4055 struct sk_security_struct *sksec = sk->sk_security;
4005 u32 peer_sid;
4006 u32 sk_sid = sksec->sid; 4056 u32 sk_sid = sksec->sid;
4007 struct common_audit_data ad; 4057 struct common_audit_data ad;
4008 char *addrp; 4058 char *addrp;
@@ -4021,20 +4071,10 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
4021 return err; 4071 return err;
4022 } 4072 }
4023 4073
4024 if (selinux_policycap_netpeer) { 4074 err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad);
4025 err = selinux_skb_peerlbl_sid(skb, family, &peer_sid); 4075 if (err)
4026 if (err) 4076 return err;
4027 return err; 4077 err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad);
4028 err = avc_has_perm(sk_sid, peer_sid,
4029 SECCLASS_PEER, PEER__RECV, &ad);
4030 if (err)
4031 selinux_netlbl_err(skb, err, 0);
4032 } else {
4033 err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad);
4034 if (err)
4035 return err;
4036 err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad);
4037 }
4038 4078
4039 return err; 4079 return err;
4040} 4080}
@@ -4529,9 +4569,8 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
4529 SECCLASS_PACKET, PACKET__SEND, &ad)) 4569 SECCLASS_PACKET, PACKET__SEND, &ad))
4530 return NF_DROP_ERR(-ECONNREFUSED); 4570 return NF_DROP_ERR(-ECONNREFUSED);
4531 4571
4532 if (selinux_policycap_netpeer) 4572 if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto))
4533 if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto)) 4573 return NF_DROP_ERR(-ECONNREFUSED);
4534 return NF_DROP_ERR(-ECONNREFUSED);
4535 4574
4536 return NF_ACCEPT; 4575 return NF_ACCEPT;
4537} 4576}
@@ -4574,27 +4613,14 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
4574 * from the sending socket, otherwise use the kernel's sid */ 4613 * from the sending socket, otherwise use the kernel's sid */
4575 sk = skb->sk; 4614 sk = skb->sk;
4576 if (sk == NULL) { 4615 if (sk == NULL) {
4577 switch (family) { 4616 if (skb->skb_iif) {
4578 case PF_INET: 4617 secmark_perm = PACKET__FORWARD_OUT;
4579 if (IPCB(skb)->flags & IPSKB_FORWARDED)
4580 secmark_perm = PACKET__FORWARD_OUT;
4581 else
4582 secmark_perm = PACKET__SEND;
4583 break;
4584 case PF_INET6:
4585 if (IP6CB(skb)->flags & IP6SKB_FORWARDED)
4586 secmark_perm = PACKET__FORWARD_OUT;
4587 else
4588 secmark_perm = PACKET__SEND;
4589 break;
4590 default:
4591 return NF_DROP_ERR(-ECONNREFUSED);
4592 }
4593 if (secmark_perm == PACKET__FORWARD_OUT) {
4594 if (selinux_skb_peerlbl_sid(skb, family, &peer_sid)) 4618 if (selinux_skb_peerlbl_sid(skb, family, &peer_sid))
4595 return NF_DROP; 4619 return NF_DROP;
4596 } else 4620 } else {
4621 secmark_perm = PACKET__SEND;
4597 peer_sid = SECINITSID_KERNEL; 4622 peer_sid = SECINITSID_KERNEL;
4623 }
4598 } else { 4624 } else {
4599 struct sk_security_struct *sksec = sk->sk_security; 4625 struct sk_security_struct *sksec = sk->sk_security;
4600 peer_sid = sksec->sid; 4626 peer_sid = sksec->sid;
@@ -4848,7 +4874,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
4848 * message queue this message will be stored in 4874 * message queue this message will be stored in
4849 */ 4875 */
4850 rc = security_transition_sid(sid, isec->sid, SECCLASS_MSG, 4876 rc = security_transition_sid(sid, isec->sid, SECCLASS_MSG,
4851 &msec->sid); 4877 NULL, &msec->sid);
4852 if (rc) 4878 if (rc)
4853 return rc; 4879 return rc;
4854 } 4880 }
@@ -5402,7 +5428,6 @@ static struct security_operations selinux_ops = {
5402 .ptrace_traceme = selinux_ptrace_traceme, 5428 .ptrace_traceme = selinux_ptrace_traceme,
5403 .capget = selinux_capget, 5429 .capget = selinux_capget,
5404 .capset = selinux_capset, 5430 .capset = selinux_capset,
5405 .sysctl = selinux_sysctl,
5406 .capable = selinux_capable, 5431 .capable = selinux_capable,
5407 .quotactl = selinux_quotactl, 5432 .quotactl = selinux_quotactl,
5408 .quota_on = selinux_quota_on, 5433 .quota_on = selinux_quota_on,
@@ -5420,6 +5445,7 @@ static struct security_operations selinux_ops = {
5420 .sb_alloc_security = selinux_sb_alloc_security, 5445 .sb_alloc_security = selinux_sb_alloc_security,
5421 .sb_free_security = selinux_sb_free_security, 5446 .sb_free_security = selinux_sb_free_security,
5422 .sb_copy_data = selinux_sb_copy_data, 5447 .sb_copy_data = selinux_sb_copy_data,
5448 .sb_remount = selinux_sb_remount,
5423 .sb_kern_mount = selinux_sb_kern_mount, 5449 .sb_kern_mount = selinux_sb_kern_mount,
5424 .sb_show_options = selinux_sb_show_options, 5450 .sb_show_options = selinux_sb_show_options,
5425 .sb_statfs = selinux_sb_statfs, 5451 .sb_statfs = selinux_sb_statfs,
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 7ed3663332ec..b8c53723e09b 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -12,6 +12,10 @@
12#define COMMON_IPC_PERMS "create", "destroy", "getattr", "setattr", "read", \ 12#define COMMON_IPC_PERMS "create", "destroy", "getattr", "setattr", "read", \
13 "write", "associate", "unix_read", "unix_write" 13 "write", "associate", "unix_read", "unix_write"
14 14
15/*
16 * Note: The name for any socket class should be suffixed by "socket",
17 * and doesn't contain more than one substr of "socket".
18 */
15struct security_class_mapping secclass_map[] = { 19struct security_class_mapping secclass_map[] = {
16 { "security", 20 { "security",
17 { "compute_av", "compute_create", "compute_member", 21 { "compute_av", "compute_create", "compute_member",
@@ -132,8 +136,7 @@ struct security_class_mapping secclass_map[] = {
132 { "appletalk_socket", 136 { "appletalk_socket",
133 { COMMON_SOCK_PERMS, NULL } }, 137 { COMMON_SOCK_PERMS, NULL } },
134 { "packet", 138 { "packet",
135 { "send", "recv", "relabelto", "flow_in", "flow_out", 139 { "send", "recv", "relabelto", "forward_in", "forward_out", NULL } },
136 "forward_in", "forward_out", NULL } },
137 { "key", 140 { "key",
138 { "view", "read", "write", "search", "link", "setattr", "create", 141 { "view", "read", "write", "search", "link", "setattr", "create",
139 NULL } }, 142 NULL } },
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 671273eb1115..348eb00cb668 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -8,6 +8,7 @@
8#ifndef _SELINUX_SECURITY_H_ 8#ifndef _SELINUX_SECURITY_H_
9#define _SELINUX_SECURITY_H_ 9#define _SELINUX_SECURITY_H_
10 10
11#include <linux/dcache.h>
11#include <linux/magic.h> 12#include <linux/magic.h>
12#include <linux/types.h> 13#include <linux/types.h>
13#include "flask.h" 14#include "flask.h"
@@ -28,13 +29,14 @@
28#define POLICYDB_VERSION_POLCAP 22 29#define POLICYDB_VERSION_POLCAP 22
29#define POLICYDB_VERSION_PERMISSIVE 23 30#define POLICYDB_VERSION_PERMISSIVE 23
30#define POLICYDB_VERSION_BOUNDARY 24 31#define POLICYDB_VERSION_BOUNDARY 24
32#define POLICYDB_VERSION_FILENAME_TRANS 25
31 33
32/* Range of policy versions we understand*/ 34/* Range of policy versions we understand*/
33#define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE 35#define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE
34#ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX 36#ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
35#define POLICYDB_VERSION_MAX CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE 37#define POLICYDB_VERSION_MAX CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
36#else 38#else
37#define POLICYDB_VERSION_MAX POLICYDB_VERSION_BOUNDARY 39#define POLICYDB_VERSION_MAX POLICYDB_VERSION_FILENAME_TRANS
38#endif 40#endif
39 41
40/* Mask for just the mount related flags */ 42/* Mask for just the mount related flags */
@@ -106,8 +108,8 @@ void security_compute_av(u32 ssid, u32 tsid,
106void security_compute_av_user(u32 ssid, u32 tsid, 108void security_compute_av_user(u32 ssid, u32 tsid,
107 u16 tclass, struct av_decision *avd); 109 u16 tclass, struct av_decision *avd);
108 110
109int security_transition_sid(u32 ssid, u32 tsid, 111int security_transition_sid(u32 ssid, u32 tsid, u16 tclass,
110 u16 tclass, u32 *out_sid); 112 const struct qstr *qstr, u32 *out_sid);
111 113
112int security_transition_sid_user(u32 ssid, u32 tsid, 114int security_transition_sid_user(u32 ssid, u32 tsid,
113 u16 tclass, u32 *out_sid); 115 u16 tclass, u32 *out_sid);
diff --git a/security/selinux/ss/avtab.h b/security/selinux/ss/avtab.h
index dff0c75345c1..63ce2f9e441d 100644
--- a/security/selinux/ss/avtab.h
+++ b/security/selinux/ss/avtab.h
@@ -14,7 +14,7 @@
14 * 14 *
15 * Copyright (C) 2003 Tresys Technology, LLC 15 * Copyright (C) 2003 Tresys Technology, LLC
16 * This program is free software; you can redistribute it and/or modify 16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by 17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation, version 2. 18 * the Free Software Foundation, version 2.
19 * 19 *
20 * Updated: Yuichi Nakamura <ynakam@hitachisoft.jp> 20 * Updated: Yuichi Nakamura <ynakam@hitachisoft.jp>
@@ -27,16 +27,16 @@ struct avtab_key {
27 u16 source_type; /* source type */ 27 u16 source_type; /* source type */
28 u16 target_type; /* target type */ 28 u16 target_type; /* target type */
29 u16 target_class; /* target object class */ 29 u16 target_class; /* target object class */
30#define AVTAB_ALLOWED 1 30#define AVTAB_ALLOWED 0x0001
31#define AVTAB_AUDITALLOW 2 31#define AVTAB_AUDITALLOW 0x0002
32#define AVTAB_AUDITDENY 4 32#define AVTAB_AUDITDENY 0x0004
33#define AVTAB_AV (AVTAB_ALLOWED | AVTAB_AUDITALLOW | AVTAB_AUDITDENY) 33#define AVTAB_AV (AVTAB_ALLOWED | AVTAB_AUDITALLOW | AVTAB_AUDITDENY)
34#define AVTAB_TRANSITION 16 34#define AVTAB_TRANSITION 0x0010
35#define AVTAB_MEMBER 32 35#define AVTAB_MEMBER 0x0020
36#define AVTAB_CHANGE 64 36#define AVTAB_CHANGE 0x0040
37#define AVTAB_TYPE (AVTAB_TRANSITION | AVTAB_MEMBER | AVTAB_CHANGE) 37#define AVTAB_TYPE (AVTAB_TRANSITION | AVTAB_MEMBER | AVTAB_CHANGE)
38#define AVTAB_ENABLED_OLD 0x80000000 /* reserved for used in cond_avtab */ 38#define AVTAB_ENABLED_OLD 0x80000000 /* reserved for used in cond_avtab */
39#define AVTAB_ENABLED 0x8000 /* reserved for used in cond_avtab */ 39#define AVTAB_ENABLED 0x8000 /* reserved for used in cond_avtab */
40 u16 specified; /* what field is specified */ 40 u16 specified; /* what field is specified */
41}; 41};
42 42
@@ -86,7 +86,6 @@ void avtab_cache_destroy(void);
86 86
87#define MAX_AVTAB_HASH_BITS 11 87#define MAX_AVTAB_HASH_BITS 11
88#define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS) 88#define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS)
89#define MAX_AVTAB_HASH_MASK (MAX_AVTAB_HASH_BUCKETS-1)
90 89
91#endif /* _SS_AVTAB_H_ */ 90#endif /* _SS_AVTAB_H_ */
92 91
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h
index 1f4e93c2ae86..922f8afa89dd 100644
--- a/security/selinux/ss/ebitmap.h
+++ b/security/selinux/ss/ebitmap.h
@@ -36,7 +36,6 @@ struct ebitmap {
36}; 36};
37 37
38#define ebitmap_length(e) ((e)->highbit) 38#define ebitmap_length(e) ((e)->highbit)
39#define ebitmap_startbit(e) ((e)->node ? (e)->node->startbit : 0)
40 39
41static inline unsigned int ebitmap_start_positive(struct ebitmap *e, 40static inline unsigned int ebitmap_start_positive(struct ebitmap *e,
42 struct ebitmap_node **n) 41 struct ebitmap_node **n)
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 1ef8e4e89880..e96174216bc9 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -512,7 +512,8 @@ int mls_compute_sid(struct context *scontext,
512 struct context *tcontext, 512 struct context *tcontext,
513 u16 tclass, 513 u16 tclass,
514 u32 specified, 514 u32 specified,
515 struct context *newcontext) 515 struct context *newcontext,
516 bool sock)
516{ 517{
517 struct range_trans rtr; 518 struct range_trans rtr;
518 struct mls_range *r; 519 struct mls_range *r;
@@ -531,7 +532,7 @@ int mls_compute_sid(struct context *scontext,
531 return mls_range_set(newcontext, r); 532 return mls_range_set(newcontext, r);
532 /* Fallthrough */ 533 /* Fallthrough */
533 case AVTAB_CHANGE: 534 case AVTAB_CHANGE:
534 if (tclass == policydb.process_class) 535 if ((tclass == policydb.process_class) || (sock == true))
535 /* Use the process MLS attributes. */ 536 /* Use the process MLS attributes. */
536 return mls_context_cpy(newcontext, scontext); 537 return mls_context_cpy(newcontext, scontext);
537 else 538 else
diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h
index cd9152632e54..037bf9d82d41 100644
--- a/security/selinux/ss/mls.h
+++ b/security/selinux/ss/mls.h
@@ -49,7 +49,8 @@ int mls_compute_sid(struct context *scontext,
49 struct context *tcontext, 49 struct context *tcontext,
50 u16 tclass, 50 u16 tclass,
51 u32 specified, 51 u32 specified,
52 struct context *newcontext); 52 struct context *newcontext,
53 bool sock);
53 54
54int mls_setup_user_range(struct context *fromcon, struct user_datum *user, 55int mls_setup_user_range(struct context *fromcon, struct user_datum *user,
55 struct context *usercon); 56 struct context *usercon);
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 57363562f0f8..e7b850ad57ee 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -123,6 +123,11 @@ static struct policydb_compat_info policydb_compat[] = {
123 .sym_num = SYM_NUM, 123 .sym_num = SYM_NUM,
124 .ocon_num = OCON_NUM, 124 .ocon_num = OCON_NUM,
125 }, 125 },
126 {
127 .version = POLICYDB_VERSION_FILENAME_TRANS,
128 .sym_num = SYM_NUM,
129 .ocon_num = OCON_NUM,
130 },
126}; 131};
127 132
128static struct policydb_compat_info *policydb_lookup_compat(int version) 133static struct policydb_compat_info *policydb_lookup_compat(int version)
@@ -704,6 +709,7 @@ void policydb_destroy(struct policydb *p)
704 int i; 709 int i;
705 struct role_allow *ra, *lra = NULL; 710 struct role_allow *ra, *lra = NULL;
706 struct role_trans *tr, *ltr = NULL; 711 struct role_trans *tr, *ltr = NULL;
712 struct filename_trans *ft, *nft;
707 713
708 for (i = 0; i < SYM_NUM; i++) { 714 for (i = 0; i < SYM_NUM; i++) {
709 cond_resched(); 715 cond_resched();
@@ -781,6 +787,15 @@ void policydb_destroy(struct policydb *p)
781 } 787 }
782 flex_array_free(p->type_attr_map_array); 788 flex_array_free(p->type_attr_map_array);
783 } 789 }
790
791 ft = p->filename_trans;
792 while (ft) {
793 nft = ft->next;
794 kfree(ft->name);
795 kfree(ft);
796 ft = nft;
797 }
798
784 ebitmap_destroy(&p->policycaps); 799 ebitmap_destroy(&p->policycaps);
785 ebitmap_destroy(&p->permissive_map); 800 ebitmap_destroy(&p->permissive_map);
786 801
@@ -1788,6 +1803,76 @@ out:
1788 return rc; 1803 return rc;
1789} 1804}
1790 1805
1806static int filename_trans_read(struct policydb *p, void *fp)
1807{
1808 struct filename_trans *ft, *last;
1809 u32 nel, len;
1810 char *name;
1811 __le32 buf[4];
1812 int rc, i;
1813
1814 if (p->policyvers < POLICYDB_VERSION_FILENAME_TRANS)
1815 return 0;
1816
1817 rc = next_entry(buf, fp, sizeof(u32));
1818 if (rc)
1819 goto out;
1820 nel = le32_to_cpu(buf[0]);
1821
1822 printk(KERN_ERR "%s: nel=%d\n", __func__, nel);
1823
1824 last = p->filename_trans;
1825 while (last && last->next)
1826 last = last->next;
1827
1828 for (i = 0; i < nel; i++) {
1829 rc = -ENOMEM;
1830 ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1831 if (!ft)
1832 goto out;
1833
1834 /* add it to the tail of the list */
1835 if (!last)
1836 p->filename_trans = ft;
1837 else
1838 last->next = ft;
1839 last = ft;
1840
1841 /* length of the path component string */
1842 rc = next_entry(buf, fp, sizeof(u32));
1843 if (rc)
1844 goto out;
1845 len = le32_to_cpu(buf[0]);
1846
1847 rc = -ENOMEM;
1848 name = kmalloc(len + 1, GFP_KERNEL);
1849 if (!name)
1850 goto out;
1851
1852 ft->name = name;
1853
1854 /* path component string */
1855 rc = next_entry(name, fp, len);
1856 if (rc)
1857 goto out;
1858 name[len] = 0;
1859
1860 printk(KERN_ERR "%s: ft=%p ft->name=%p ft->name=%s\n", __func__, ft, ft->name, ft->name);
1861
1862 rc = next_entry(buf, fp, sizeof(u32) * 4);
1863 if (rc)
1864 goto out;
1865
1866 ft->stype = le32_to_cpu(buf[0]);
1867 ft->ttype = le32_to_cpu(buf[1]);
1868 ft->tclass = le32_to_cpu(buf[2]);
1869 ft->otype = le32_to_cpu(buf[3]);
1870 }
1871 rc = 0;
1872out:
1873 return rc;
1874}
1875
1791static int genfs_read(struct policydb *p, void *fp) 1876static int genfs_read(struct policydb *p, void *fp)
1792{ 1877{
1793 int i, j, rc; 1878 int i, j, rc;
@@ -2251,6 +2336,10 @@ int policydb_read(struct policydb *p, void *fp)
2251 lra = ra; 2336 lra = ra;
2252 } 2337 }
2253 2338
2339 rc = filename_trans_read(p, fp);
2340 if (rc)
2341 goto bad;
2342
2254 rc = policydb_index(p); 2343 rc = policydb_index(p);
2255 if (rc) 2344 if (rc)
2256 goto bad; 2345 goto bad;
@@ -3025,6 +3114,43 @@ static int range_write(struct policydb *p, void *fp)
3025 return 0; 3114 return 0;
3026} 3115}
3027 3116
3117static int filename_trans_write(struct policydb *p, void *fp)
3118{
3119 struct filename_trans *ft;
3120 u32 len, nel = 0;
3121 __le32 buf[4];
3122 int rc;
3123
3124 for (ft = p->filename_trans; ft; ft = ft->next)
3125 nel++;
3126
3127 buf[0] = cpu_to_le32(nel);
3128 rc = put_entry(buf, sizeof(u32), 1, fp);
3129 if (rc)
3130 return rc;
3131
3132 for (ft = p->filename_trans; ft; ft = ft->next) {
3133 len = strlen(ft->name);
3134 buf[0] = cpu_to_le32(len);
3135 rc = put_entry(buf, sizeof(u32), 1, fp);
3136 if (rc)
3137 return rc;
3138
3139 rc = put_entry(ft->name, sizeof(char), len, fp);
3140 if (rc)
3141 return rc;
3142
3143 buf[0] = ft->stype;
3144 buf[1] = ft->ttype;
3145 buf[2] = ft->tclass;
3146 buf[3] = ft->otype;
3147
3148 rc = put_entry(buf, sizeof(u32), 4, fp);
3149 if (rc)
3150 return rc;
3151 }
3152 return 0;
3153}
3028/* 3154/*
3029 * Write the configuration data in a policy database 3155 * Write the configuration data in a policy database
3030 * structure to a policy database binary representation 3156 * structure to a policy database binary representation
@@ -3135,6 +3261,10 @@ int policydb_write(struct policydb *p, void *fp)
3135 if (rc) 3261 if (rc)
3136 return rc; 3262 return rc;
3137 3263
3264 rc = filename_trans_write(p, fp);
3265 if (rc)
3266 return rc;
3267
3138 rc = ocontext_write(p, info, fp); 3268 rc = ocontext_write(p, info, fp);
3139 if (rc) 3269 if (rc)
3140 return rc; 3270 return rc;
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index 4e3ab9d0b315..732ea4a68682 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -77,6 +77,15 @@ struct role_trans {
77 struct role_trans *next; 77 struct role_trans *next;
78}; 78};
79 79
80struct filename_trans {
81 struct filename_trans *next;
82 u32 stype; /* current process */
83 u32 ttype; /* parent dir context */
84 u16 tclass; /* class of new object */
85 const char *name; /* last path component */
86 u32 otype; /* expected of new object */
87};
88
80struct role_allow { 89struct role_allow {
81 u32 role; /* current role */ 90 u32 role; /* current role */
82 u32 new_role; /* new role */ 91 u32 new_role; /* new role */
@@ -217,6 +226,9 @@ struct policydb {
217 /* role transitions */ 226 /* role transitions */
218 struct role_trans *role_tr; 227 struct role_trans *role_tr;
219 228
229 /* file transitions with the last path component */
230 struct filename_trans *filename_trans;
231
220 /* bools indexed by (value - 1) */ 232 /* bools indexed by (value - 1) */
221 struct cond_bool_datum **bool_val_to_struct; 233 struct cond_bool_datum **bool_val_to_struct;
222 /* type enforcement conditional access vectors and transitions */ 234 /* type enforcement conditional access vectors and transitions */
@@ -302,7 +314,7 @@ static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes)
302 return 0; 314 return 0;
303} 315}
304 316
305static inline int put_entry(void *buf, size_t bytes, int num, struct policy_file *fp) 317static inline int put_entry(const void *buf, size_t bytes, int num, struct policy_file *fp)
306{ 318{
307 size_t len = bytes * num; 319 size_t len = bytes * num;
308 320
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index a03cfaf0ee07..3e7544d2a07b 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -201,6 +201,21 @@ static u16 unmap_class(u16 tclass)
201 return tclass; 201 return tclass;
202} 202}
203 203
204/*
205 * Get kernel value for class from its policy value
206 */
207static u16 map_class(u16 pol_value)
208{
209 u16 i;
210
211 for (i = 1; i < current_mapping_size; i++) {
212 if (current_mapping[i].value == pol_value)
213 return i;
214 }
215
216 return pol_value;
217}
218
204static void map_decision(u16 tclass, struct av_decision *avd, 219static void map_decision(u16 tclass, struct av_decision *avd,
205 int allow_unknown) 220 int allow_unknown)
206{ 221{
@@ -1343,10 +1358,27 @@ out:
1343 return -EACCES; 1358 return -EACCES;
1344} 1359}
1345 1360
1361static void filename_compute_type(struct policydb *p, struct context *newcontext,
1362 u32 scon, u32 tcon, u16 tclass,
1363 const struct qstr *qstr)
1364{
1365 struct filename_trans *ft;
1366 for (ft = p->filename_trans; ft; ft = ft->next) {
1367 if (ft->stype == scon &&
1368 ft->ttype == tcon &&
1369 ft->tclass == tclass &&
1370 !strcmp(ft->name, qstr->name)) {
1371 newcontext->type = ft->otype;
1372 return;
1373 }
1374 }
1375}
1376
1346static int security_compute_sid(u32 ssid, 1377static int security_compute_sid(u32 ssid,
1347 u32 tsid, 1378 u32 tsid,
1348 u16 orig_tclass, 1379 u16 orig_tclass,
1349 u32 specified, 1380 u32 specified,
1381 const struct qstr *qstr,
1350 u32 *out_sid, 1382 u32 *out_sid,
1351 bool kern) 1383 bool kern)
1352{ 1384{
@@ -1357,6 +1389,7 @@ static int security_compute_sid(u32 ssid,
1357 struct avtab_node *node; 1389 struct avtab_node *node;
1358 u16 tclass; 1390 u16 tclass;
1359 int rc = 0; 1391 int rc = 0;
1392 bool sock;
1360 1393
1361 if (!ss_initialized) { 1394 if (!ss_initialized) {
1362 switch (orig_tclass) { 1395 switch (orig_tclass) {
@@ -1374,10 +1407,13 @@ static int security_compute_sid(u32 ssid,
1374 1407
1375 read_lock(&policy_rwlock); 1408 read_lock(&policy_rwlock);
1376 1409
1377 if (kern) 1410 if (kern) {
1378 tclass = unmap_class(orig_tclass); 1411 tclass = unmap_class(orig_tclass);
1379 else 1412 sock = security_is_socket_class(orig_tclass);
1413 } else {
1380 tclass = orig_tclass; 1414 tclass = orig_tclass;
1415 sock = security_is_socket_class(map_class(tclass));
1416 }
1381 1417
1382 scontext = sidtab_search(&sidtab, ssid); 1418 scontext = sidtab_search(&sidtab, ssid);
1383 if (!scontext) { 1419 if (!scontext) {
@@ -1408,7 +1444,7 @@ static int security_compute_sid(u32 ssid,
1408 } 1444 }
1409 1445
1410 /* Set the role and type to default values. */ 1446 /* Set the role and type to default values. */
1411 if (tclass == policydb.process_class) { 1447 if ((tclass == policydb.process_class) || (sock == true)) {
1412 /* Use the current role and type of process. */ 1448 /* Use the current role and type of process. */
1413 newcontext.role = scontext->role; 1449 newcontext.role = scontext->role;
1414 newcontext.type = scontext->type; 1450 newcontext.type = scontext->type;
@@ -1442,6 +1478,11 @@ static int security_compute_sid(u32 ssid,
1442 newcontext.type = avdatum->data; 1478 newcontext.type = avdatum->data;
1443 } 1479 }
1444 1480
1481 /* if we have a qstr this is a file trans check so check those rules */
1482 if (qstr)
1483 filename_compute_type(&policydb, &newcontext, scontext->type,
1484 tcontext->type, tclass, qstr);
1485
1445 /* Check for class-specific changes. */ 1486 /* Check for class-specific changes. */
1446 if (tclass == policydb.process_class) { 1487 if (tclass == policydb.process_class) {
1447 if (specified & AVTAB_TRANSITION) { 1488 if (specified & AVTAB_TRANSITION) {
@@ -1460,7 +1501,8 @@ static int security_compute_sid(u32 ssid,
1460 1501
1461 /* Set the MLS attributes. 1502 /* Set the MLS attributes.
1462 This is done last because it may allocate memory. */ 1503 This is done last because it may allocate memory. */
1463 rc = mls_compute_sid(scontext, tcontext, tclass, specified, &newcontext); 1504 rc = mls_compute_sid(scontext, tcontext, tclass, specified,
1505 &newcontext, sock);
1464 if (rc) 1506 if (rc)
1465 goto out_unlock; 1507 goto out_unlock;
1466 1508
@@ -1495,22 +1537,17 @@ out:
1495 * if insufficient memory is available, or %0 if the new SID was 1537 * if insufficient memory is available, or %0 if the new SID was
1496 * computed successfully. 1538 * computed successfully.
1497 */ 1539 */
1498int security_transition_sid(u32 ssid, 1540int security_transition_sid(u32 ssid, u32 tsid, u16 tclass,
1499 u32 tsid, 1541 const struct qstr *qstr, u32 *out_sid)
1500 u16 tclass,
1501 u32 *out_sid)
1502{ 1542{
1503 return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, 1543 return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION,
1504 out_sid, true); 1544 qstr, out_sid, true);
1505} 1545}
1506 1546
1507int security_transition_sid_user(u32 ssid, 1547int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, u32 *out_sid)
1508 u32 tsid,
1509 u16 tclass,
1510 u32 *out_sid)
1511{ 1548{
1512 return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, 1549 return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION,
1513 out_sid, false); 1550 NULL, out_sid, false);
1514} 1551}
1515 1552
1516/** 1553/**
@@ -1531,8 +1568,8 @@ int security_member_sid(u32 ssid,
1531 u16 tclass, 1568 u16 tclass,
1532 u32 *out_sid) 1569 u32 *out_sid)
1533{ 1570{
1534 return security_compute_sid(ssid, tsid, tclass, AVTAB_MEMBER, out_sid, 1571 return security_compute_sid(ssid, tsid, tclass, AVTAB_MEMBER, NULL,
1535 false); 1572 out_sid, false);
1536} 1573}
1537 1574
1538/** 1575/**
@@ -1553,8 +1590,8 @@ int security_change_sid(u32 ssid,
1553 u16 tclass, 1590 u16 tclass,
1554 u32 *out_sid) 1591 u32 *out_sid)
1555{ 1592{
1556 return security_compute_sid(ssid, tsid, tclass, AVTAB_CHANGE, out_sid, 1593 return security_compute_sid(ssid, tsid, tclass, AVTAB_CHANGE, NULL,
1557 false); 1594 out_sid, false);
1558} 1595}
1559 1596
1560/* Clone the SID into the new SID table. */ 1597/* Clone the SID into the new SID table. */
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index fff78d3b51a2..728c57e3d65d 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -208,7 +208,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
208 if (!uctx) 208 if (!uctx)
209 goto not_from_user; 209 goto not_from_user;
210 210
211 if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX) 211 if (uctx->ctx_alg != XFRM_SC_ALG_SELINUX)
212 return -EINVAL; 212 return -EINVAL;
213 213
214 str_len = uctx->ctx_len; 214 str_len = uctx->ctx_len;
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 129c4eb8ffb1..b449cfdad21c 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -52,13 +52,16 @@ struct socket_smack {
52struct inode_smack { 52struct inode_smack {
53 char *smk_inode; /* label of the fso */ 53 char *smk_inode; /* label of the fso */
54 char *smk_task; /* label of the task */ 54 char *smk_task; /* label of the task */
55 char *smk_mmap; /* label of the mmap domain */
55 struct mutex smk_lock; /* initialization lock */ 56 struct mutex smk_lock; /* initialization lock */
56 int smk_flags; /* smack inode flags */ 57 int smk_flags; /* smack inode flags */
57}; 58};
58 59
59struct task_smack { 60struct task_smack {
60 char *smk_task; /* label used for access control */ 61 char *smk_task; /* label for access control */
61 char *smk_forked; /* label when forked */ 62 char *smk_forked; /* label when forked */
63 struct list_head smk_rules; /* per task access rules */
64 struct mutex smk_rules_lock; /* lock for the rules */
62}; 65};
63 66
64#define SMK_INODE_INSTANT 0x01 /* inode is instantiated */ 67#define SMK_INODE_INSTANT 0x01 /* inode is instantiated */
@@ -152,12 +155,6 @@ struct smack_known {
152#define SMACK_MAGIC 0x43415d53 /* "SMAC" */ 155#define SMACK_MAGIC 0x43415d53 /* "SMAC" */
153 156
154/* 157/*
155 * A limit on the number of entries in the lists
156 * makes some of the list administration easier.
157 */
158#define SMACK_LIST_MAX 10000
159
160/*
161 * CIPSO defaults. 158 * CIPSO defaults.
162 */ 159 */
163#define SMACK_CIPSO_DOI_DEFAULT 3 /* Historical */ 160#define SMACK_CIPSO_DOI_DEFAULT 3 /* Historical */
@@ -174,9 +171,7 @@ struct smack_known {
174/* 171/*
175 * Just to make the common cases easier to deal with 172 * Just to make the common cases easier to deal with
176 */ 173 */
177#define MAY_ANY (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC)
178#define MAY_ANYREAD (MAY_READ | MAY_EXEC) 174#define MAY_ANYREAD (MAY_READ | MAY_EXEC)
179#define MAY_ANYWRITE (MAY_WRITE | MAY_APPEND)
180#define MAY_READWRITE (MAY_READ | MAY_WRITE) 175#define MAY_READWRITE (MAY_READ | MAY_WRITE)
181#define MAY_NOT 0 176#define MAY_NOT 0
182 177
@@ -202,7 +197,7 @@ struct inode_smack *new_inode_smack(char *);
202/* 197/*
203 * These functions are in smack_access.c 198 * These functions are in smack_access.c
204 */ 199 */
205int smk_access_entry(char *, char *); 200int smk_access_entry(char *, char *, struct list_head *);
206int smk_access(char *, char *, int, struct smk_audit_info *); 201int smk_access(char *, char *, int, struct smk_audit_info *);
207int smk_curacc(char *, u32, struct smk_audit_info *); 202int smk_curacc(char *, u32, struct smk_audit_info *);
208int smack_to_cipso(const char *, struct smack_cipso *); 203int smack_to_cipso(const char *, struct smack_cipso *);
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 7ba8478f599e..86453db4333d 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -70,10 +70,11 @@ int log_policy = SMACK_AUDIT_DENIED;
70 * smk_access_entry - look up matching access rule 70 * smk_access_entry - look up matching access rule
71 * @subject_label: a pointer to the subject's Smack label 71 * @subject_label: a pointer to the subject's Smack label
72 * @object_label: a pointer to the object's Smack label 72 * @object_label: a pointer to the object's Smack label
73 * @rule_list: the list of rules to search
73 * 74 *
74 * This function looks up the subject/object pair in the 75 * This function looks up the subject/object pair in the
75 * access rule list and returns pointer to the matching rule if found, 76 * access rule list and returns the access mode. If no
76 * NULL otherwise. 77 * entry is found returns -ENOENT.
77 * 78 *
78 * NOTE: 79 * NOTE:
79 * Even though Smack labels are usually shared on smack_list 80 * Even though Smack labels are usually shared on smack_list
@@ -85,13 +86,13 @@ int log_policy = SMACK_AUDIT_DENIED;
85 * will be on the list, so checking the pointers may be a worthwhile 86 * will be on the list, so checking the pointers may be a worthwhile
86 * optimization. 87 * optimization.
87 */ 88 */
88int smk_access_entry(char *subject_label, char *object_label) 89int smk_access_entry(char *subject_label, char *object_label,
90 struct list_head *rule_list)
89{ 91{
90 u32 may = MAY_NOT; 92 int may = -ENOENT;
91 struct smack_rule *srp; 93 struct smack_rule *srp;
92 94
93 rcu_read_lock(); 95 list_for_each_entry_rcu(srp, rule_list, list) {
94 list_for_each_entry_rcu(srp, &smack_rule_list, list) {
95 if (srp->smk_subject == subject_label || 96 if (srp->smk_subject == subject_label ||
96 strcmp(srp->smk_subject, subject_label) == 0) { 97 strcmp(srp->smk_subject, subject_label) == 0) {
97 if (srp->smk_object == object_label || 98 if (srp->smk_object == object_label ||
@@ -101,7 +102,6 @@ int smk_access_entry(char *subject_label, char *object_label)
101 } 102 }
102 } 103 }
103 } 104 }
104 rcu_read_unlock();
105 105
106 return may; 106 return may;
107} 107}
@@ -129,7 +129,7 @@ int smk_access_entry(char *subject_label, char *object_label)
129int smk_access(char *subject_label, char *object_label, int request, 129int smk_access(char *subject_label, char *object_label, int request,
130 struct smk_audit_info *a) 130 struct smk_audit_info *a)
131{ 131{
132 u32 may = MAY_NOT; 132 int may = MAY_NOT;
133 int rc = 0; 133 int rc = 0;
134 134
135 /* 135 /*
@@ -181,13 +181,14 @@ int smk_access(char *subject_label, char *object_label, int request,
181 * Beyond here an explicit relationship is required. 181 * Beyond here an explicit relationship is required.
182 * If the requested access is contained in the available 182 * If the requested access is contained in the available
183 * access (e.g. read is included in readwrite) it's 183 * access (e.g. read is included in readwrite) it's
184 * good. 184 * good. A negative response from smk_access_entry()
185 */ 185 * indicates there is no entry for this pair.
186 may = smk_access_entry(subject_label, object_label);
187 /*
188 * This is a bit map operation.
189 */ 186 */
190 if ((request & may) == request) 187 rcu_read_lock();
188 may = smk_access_entry(subject_label, object_label, &smack_rule_list);
189 rcu_read_unlock();
190
191 if (may > 0 && (request & may) == request)
191 goto out_audit; 192 goto out_audit;
192 193
193 rc = -EACCES; 194 rc = -EACCES;
@@ -212,12 +213,27 @@ out_audit:
212 */ 213 */
213int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a) 214int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
214{ 215{
216 struct task_smack *tsp = current_security();
217 char *sp = smk_of_task(tsp);
218 int may;
215 int rc; 219 int rc;
216 char *sp = smk_of_current();
217 220
221 /*
222 * Check the global rule list
223 */
218 rc = smk_access(sp, obj_label, mode, NULL); 224 rc = smk_access(sp, obj_label, mode, NULL);
219 if (rc == 0) 225 if (rc == 0) {
220 goto out_audit; 226 /*
227 * If there is an entry in the task's rule list
228 * it can further restrict access.
229 */
230 may = smk_access_entry(sp, obj_label, &tsp->smk_rules);
231 if (may < 0)
232 goto out_audit;
233 if ((mode & may) == mode)
234 goto out_audit;
235 rc = -EACCES;
236 }
221 237
222 /* 238 /*
223 * Return if a specific label has been designated as the 239 * Return if a specific label has been designated as the
@@ -228,7 +244,7 @@ int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
228 goto out_audit; 244 goto out_audit;
229 245
230 if (capable(CAP_MAC_OVERRIDE)) 246 if (capable(CAP_MAC_OVERRIDE))
231 return 0; 247 rc = 0;
232 248
233out_audit: 249out_audit:
234#ifdef CONFIG_AUDIT 250#ifdef CONFIG_AUDIT
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 533bf3255d7f..23c7a6d0c80c 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -33,6 +33,7 @@
33#include <net/cipso_ipv4.h> 33#include <net/cipso_ipv4.h>
34#include <linux/audit.h> 34#include <linux/audit.h>
35#include <linux/magic.h> 35#include <linux/magic.h>
36#include <linux/dcache.h>
36#include "smack.h" 37#include "smack.h"
37 38
38#define task_security(task) (task_cred_xxx((task), security)) 39#define task_security(task) (task_cred_xxx((task), security))
@@ -84,6 +85,56 @@ struct inode_smack *new_inode_smack(char *smack)
84 return isp; 85 return isp;
85} 86}
86 87
88/**
89 * new_task_smack - allocate a task security blob
90 * @smack: a pointer to the Smack label to use in the blob
91 *
92 * Returns the new blob or NULL if there's no memory available
93 */
94static struct task_smack *new_task_smack(char *task, char *forked, gfp_t gfp)
95{
96 struct task_smack *tsp;
97
98 tsp = kzalloc(sizeof(struct task_smack), gfp);
99 if (tsp == NULL)
100 return NULL;
101
102 tsp->smk_task = task;
103 tsp->smk_forked = forked;
104 INIT_LIST_HEAD(&tsp->smk_rules);
105 mutex_init(&tsp->smk_rules_lock);
106
107 return tsp;
108}
109
110/**
111 * smk_copy_rules - copy a rule set
112 * @nhead - new rules header pointer
113 * @ohead - old rules header pointer
114 *
115 * Returns 0 on success, -ENOMEM on error
116 */
117static int smk_copy_rules(struct list_head *nhead, struct list_head *ohead,
118 gfp_t gfp)
119{
120 struct smack_rule *nrp;
121 struct smack_rule *orp;
122 int rc = 0;
123
124 INIT_LIST_HEAD(nhead);
125
126 list_for_each_entry_rcu(orp, ohead, list) {
127 nrp = kzalloc(sizeof(struct smack_rule), gfp);
128 if (nrp == NULL) {
129 rc = -ENOMEM;
130 break;
131 }
132 *nrp = *orp;
133 list_add_rcu(&nrp->list, nhead);
134 }
135 return rc;
136}
137
87/* 138/*
88 * LSM hooks. 139 * LSM hooks.
89 * We he, that is fun! 140 * We he, that is fun!
@@ -102,23 +153,17 @@ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
102{ 153{
103 int rc; 154 int rc;
104 struct smk_audit_info ad; 155 struct smk_audit_info ad;
105 char *sp, *tsp; 156 char *tsp;
106 157
107 rc = cap_ptrace_access_check(ctp, mode); 158 rc = cap_ptrace_access_check(ctp, mode);
108 if (rc != 0) 159 if (rc != 0)
109 return rc; 160 return rc;
110 161
111 sp = smk_of_current();
112 tsp = smk_of_task(task_security(ctp)); 162 tsp = smk_of_task(task_security(ctp));
113 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); 163 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
114 smk_ad_setfield_u_tsk(&ad, ctp); 164 smk_ad_setfield_u_tsk(&ad, ctp);
115 165
116 /* we won't log here, because rc can be overriden */ 166 rc = smk_curacc(tsp, MAY_READWRITE, &ad);
117 rc = smk_access(sp, tsp, MAY_READWRITE, NULL);
118 if (rc != 0 && capable(CAP_MAC_OVERRIDE))
119 rc = 0;
120
121 smack_log(sp, tsp, MAY_READWRITE, rc, &ad);
122 return rc; 167 return rc;
123} 168}
124 169
@@ -134,23 +179,17 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
134{ 179{
135 int rc; 180 int rc;
136 struct smk_audit_info ad; 181 struct smk_audit_info ad;
137 char *sp, *tsp; 182 char *tsp;
138 183
139 rc = cap_ptrace_traceme(ptp); 184 rc = cap_ptrace_traceme(ptp);
140 if (rc != 0) 185 if (rc != 0)
141 return rc; 186 return rc;
142 187
188 tsp = smk_of_task(task_security(ptp));
143 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); 189 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
144 smk_ad_setfield_u_tsk(&ad, ptp); 190 smk_ad_setfield_u_tsk(&ad, ptp);
145 191
146 sp = smk_of_current(); 192 rc = smk_curacc(tsp, MAY_READWRITE, &ad);
147 tsp = smk_of_task(task_security(ptp));
148 /* we won't log here, because rc can be overriden */
149 rc = smk_access(tsp, sp, MAY_READWRITE, NULL);
150 if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE))
151 rc = 0;
152
153 smack_log(tsp, sp, MAY_READWRITE, rc, &ad);
154 return rc; 193 return rc;
155} 194}
156 195
@@ -463,6 +502,7 @@ static void smack_inode_free_security(struct inode *inode)
463 * smack_inode_init_security - copy out the smack from an inode 502 * smack_inode_init_security - copy out the smack from an inode
464 * @inode: the inode 503 * @inode: the inode
465 * @dir: unused 504 * @dir: unused
505 * @qstr: unused
466 * @name: where to put the attribute name 506 * @name: where to put the attribute name
467 * @value: where to put the attribute value 507 * @value: where to put the attribute value
468 * @len: where to put the length of the attribute 508 * @len: where to put the length of the attribute
@@ -470,11 +510,12 @@ static void smack_inode_free_security(struct inode *inode)
470 * Returns 0 if it all works out, -ENOMEM if there's no memory 510 * Returns 0 if it all works out, -ENOMEM if there's no memory
471 */ 511 */
472static int smack_inode_init_security(struct inode *inode, struct inode *dir, 512static int smack_inode_init_security(struct inode *inode, struct inode *dir,
473 char **name, void **value, size_t *len) 513 const struct qstr *qstr, char **name,
514 void **value, size_t *len)
474{ 515{
475 char *isp = smk_of_inode(inode); 516 char *isp = smk_of_inode(inode);
476 char *dsp = smk_of_inode(dir); 517 char *dsp = smk_of_inode(dir);
477 u32 may; 518 int may;
478 519
479 if (name) { 520 if (name) {
480 *name = kstrdup(XATTR_SMACK_SUFFIX, GFP_KERNEL); 521 *name = kstrdup(XATTR_SMACK_SUFFIX, GFP_KERNEL);
@@ -483,14 +524,17 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir,
483 } 524 }
484 525
485 if (value) { 526 if (value) {
486 may = smk_access_entry(smk_of_current(), dsp); 527 rcu_read_lock();
528 may = smk_access_entry(smk_of_current(), dsp, &smack_rule_list);
529 rcu_read_unlock();
487 530
488 /* 531 /*
489 * If the access rule allows transmutation and 532 * If the access rule allows transmutation and
490 * the directory requests transmutation then 533 * the directory requests transmutation then
491 * by all means transmute. 534 * by all means transmute.
492 */ 535 */
493 if (((may & MAY_TRANSMUTE) != 0) && smk_inode_transmutable(dir)) 536 if (may > 0 && ((may & MAY_TRANSMUTE) != 0) &&
537 smk_inode_transmutable(dir))
494 isp = dsp; 538 isp = dsp;
495 539
496 *value = kstrdup(isp, GFP_KERNEL); 540 *value = kstrdup(isp, GFP_KERNEL);
@@ -716,7 +760,8 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
716 if (strcmp(name, XATTR_NAME_SMACK) == 0 || 760 if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
717 strcmp(name, XATTR_NAME_SMACKIPIN) == 0 || 761 strcmp(name, XATTR_NAME_SMACKIPIN) == 0 ||
718 strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 || 762 strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 ||
719 strcmp(name, XATTR_NAME_SMACKEXEC) == 0) { 763 strcmp(name, XATTR_NAME_SMACKEXEC) == 0 ||
764 strcmp(name, XATTR_NAME_SMACKMMAP) == 0) {
720 if (!capable(CAP_MAC_ADMIN)) 765 if (!capable(CAP_MAC_ADMIN))
721 rc = -EPERM; 766 rc = -EPERM;
722 /* 767 /*
@@ -773,6 +818,12 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name,
773 isp->smk_task = nsp; 818 isp->smk_task = nsp;
774 else 819 else
775 isp->smk_task = smack_known_invalid.smk_known; 820 isp->smk_task = smack_known_invalid.smk_known;
821 } else if (strcmp(name, XATTR_NAME_SMACKMMAP) == 0) {
822 nsp = smk_import(value, size);
823 if (nsp != NULL)
824 isp->smk_mmap = nsp;
825 else
826 isp->smk_mmap = smack_known_invalid.smk_known;
776 } else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) 827 } else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0)
777 isp->smk_flags |= SMK_INODE_TRANSMUTE; 828 isp->smk_flags |= SMK_INODE_TRANSMUTE;
778 829
@@ -815,7 +866,8 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
815 strcmp(name, XATTR_NAME_SMACKIPIN) == 0 || 866 strcmp(name, XATTR_NAME_SMACKIPIN) == 0 ||
816 strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 || 867 strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 ||
817 strcmp(name, XATTR_NAME_SMACKEXEC) == 0 || 868 strcmp(name, XATTR_NAME_SMACKEXEC) == 0 ||
818 strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) { 869 strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0 ||
870 strcmp(name, XATTR_NAME_SMACKMMAP)) {
819 if (!capable(CAP_MAC_ADMIN)) 871 if (!capable(CAP_MAC_ADMIN))
820 rc = -EPERM; 872 rc = -EPERM;
821 } else 873 } else
@@ -829,6 +881,7 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
829 if (rc == 0) { 881 if (rc == 0) {
830 isp = dentry->d_inode->i_security; 882 isp = dentry->d_inode->i_security;
831 isp->smk_task = NULL; 883 isp->smk_task = NULL;
884 isp->smk_mmap = NULL;
832 } 885 }
833 886
834 return rc; 887 return rc;
@@ -1060,6 +1113,126 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
1060} 1113}
1061 1114
1062/** 1115/**
1116 * smack_file_mmap :
1117 * Check permissions for a mmap operation. The @file may be NULL, e.g.
1118 * if mapping anonymous memory.
1119 * @file contains the file structure for file to map (may be NULL).
1120 * @reqprot contains the protection requested by the application.
1121 * @prot contains the protection that will be applied by the kernel.
1122 * @flags contains the operational flags.
1123 * Return 0 if permission is granted.
1124 */
1125static int smack_file_mmap(struct file *file,
1126 unsigned long reqprot, unsigned long prot,
1127 unsigned long flags, unsigned long addr,
1128 unsigned long addr_only)
1129{
1130 struct smack_rule *srp;
1131 struct task_smack *tsp;
1132 char *sp;
1133 char *msmack;
1134 char *osmack;
1135 struct inode_smack *isp;
1136 struct dentry *dp;
1137 int may;
1138 int mmay;
1139 int tmay;
1140 int rc;
1141
1142 /* do DAC check on address space usage */
1143 rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only);
1144 if (rc || addr_only)
1145 return rc;
1146
1147 if (file == NULL || file->f_dentry == NULL)
1148 return 0;
1149
1150 dp = file->f_dentry;
1151
1152 if (dp->d_inode == NULL)
1153 return 0;
1154
1155 isp = dp->d_inode->i_security;
1156 if (isp->smk_mmap == NULL)
1157 return 0;
1158 msmack = isp->smk_mmap;
1159
1160 tsp = current_security();
1161 sp = smk_of_current();
1162 rc = 0;
1163
1164 rcu_read_lock();
1165 /*
1166 * For each Smack rule associated with the subject
1167 * label verify that the SMACK64MMAP also has access
1168 * to that rule's object label.
1169 *
1170 * Because neither of the labels comes
1171 * from the networking code it is sufficient
1172 * to compare pointers.
1173 */
1174 list_for_each_entry_rcu(srp, &smack_rule_list, list) {
1175 if (srp->smk_subject != sp)
1176 continue;
1177
1178 osmack = srp->smk_object;
1179 /*
1180 * Matching labels always allows access.
1181 */
1182 if (msmack == osmack)
1183 continue;
1184 /*
1185 * If there is a matching local rule take
1186 * that into account as well.
1187 */
1188 may = smk_access_entry(srp->smk_subject, osmack,
1189 &tsp->smk_rules);
1190 if (may == -ENOENT)
1191 may = srp->smk_access;
1192 else
1193 may &= srp->smk_access;
1194 /*
1195 * If may is zero the SMACK64MMAP subject can't
1196 * possibly have less access.
1197 */
1198 if (may == 0)
1199 continue;
1200
1201 /*
1202 * Fetch the global list entry.
1203 * If there isn't one a SMACK64MMAP subject
1204 * can't have as much access as current.
1205 */
1206 mmay = smk_access_entry(msmack, osmack, &smack_rule_list);
1207 if (mmay == -ENOENT) {
1208 rc = -EACCES;
1209 break;
1210 }
1211 /*
1212 * If there is a local entry it modifies the
1213 * potential access, too.
1214 */
1215 tmay = smk_access_entry(msmack, osmack, &tsp->smk_rules);
1216 if (tmay != -ENOENT)
1217 mmay &= tmay;
1218
1219 /*
1220 * If there is any access available to current that is
1221 * not available to a SMACK64MMAP subject
1222 * deny access.
1223 */
1224 if ((may | mmay) != mmay) {
1225 rc = -EACCES;
1226 break;
1227 }
1228 }
1229
1230 rcu_read_unlock();
1231
1232 return rc;
1233}
1234
1235/**
1063 * smack_file_set_fowner - set the file security blob value 1236 * smack_file_set_fowner - set the file security blob value
1064 * @file: object in question 1237 * @file: object in question
1065 * 1238 *
@@ -1095,6 +1268,7 @@ static int smack_file_send_sigiotask(struct task_struct *tsk,
1095 * struct fown_struct is never outside the context of a struct file 1268 * struct fown_struct is never outside the context of a struct file
1096 */ 1269 */
1097 file = container_of(fown, struct file, f_owner); 1270 file = container_of(fown, struct file, f_owner);
1271
1098 /* we don't log here as rc can be overriden */ 1272 /* we don't log here as rc can be overriden */
1099 rc = smk_access(file->f_security, tsp, MAY_WRITE, NULL); 1273 rc = smk_access(file->f_security, tsp, MAY_WRITE, NULL);
1100 if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE)) 1274 if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE))
@@ -1145,9 +1319,14 @@ static int smack_file_receive(struct file *file)
1145 */ 1319 */
1146static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp) 1320static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
1147{ 1321{
1148 cred->security = kzalloc(sizeof(struct task_smack), gfp); 1322 struct task_smack *tsp;
1149 if (cred->security == NULL) 1323
1324 tsp = new_task_smack(NULL, NULL, gfp);
1325 if (tsp == NULL)
1150 return -ENOMEM; 1326 return -ENOMEM;
1327
1328 cred->security = tsp;
1329
1151 return 0; 1330 return 0;
1152} 1331}
1153 1332
@@ -1156,13 +1335,24 @@ static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
1156 * smack_cred_free - "free" task-level security credentials 1335 * smack_cred_free - "free" task-level security credentials
1157 * @cred: the credentials in question 1336 * @cred: the credentials in question
1158 * 1337 *
1159 * Smack isn't using copies of blobs. Everyone
1160 * points to an immutable list. The blobs never go away.
1161 * There is no leak here.
1162 */ 1338 */
1163static void smack_cred_free(struct cred *cred) 1339static void smack_cred_free(struct cred *cred)
1164{ 1340{
1165 kfree(cred->security); 1341 struct task_smack *tsp = cred->security;
1342 struct smack_rule *rp;
1343 struct list_head *l;
1344 struct list_head *n;
1345
1346 if (tsp == NULL)
1347 return;
1348 cred->security = NULL;
1349
1350 list_for_each_safe(l, n, &tsp->smk_rules) {
1351 rp = list_entry(l, struct smack_rule, list);
1352 list_del(&rp->list);
1353 kfree(rp);
1354 }
1355 kfree(tsp);
1166} 1356}
1167 1357
1168/** 1358/**
@@ -1178,13 +1368,16 @@ static int smack_cred_prepare(struct cred *new, const struct cred *old,
1178{ 1368{
1179 struct task_smack *old_tsp = old->security; 1369 struct task_smack *old_tsp = old->security;
1180 struct task_smack *new_tsp; 1370 struct task_smack *new_tsp;
1371 int rc;
1181 1372
1182 new_tsp = kzalloc(sizeof(struct task_smack), gfp); 1373 new_tsp = new_task_smack(old_tsp->smk_task, old_tsp->smk_task, gfp);
1183 if (new_tsp == NULL) 1374 if (new_tsp == NULL)
1184 return -ENOMEM; 1375 return -ENOMEM;
1185 1376
1186 new_tsp->smk_task = old_tsp->smk_task; 1377 rc = smk_copy_rules(&new_tsp->smk_rules, &old_tsp->smk_rules, gfp);
1187 new_tsp->smk_forked = old_tsp->smk_task; 1378 if (rc != 0)
1379 return rc;
1380
1188 new->security = new_tsp; 1381 new->security = new_tsp;
1189 return 0; 1382 return 0;
1190} 1383}
@@ -1203,6 +1396,11 @@ static void smack_cred_transfer(struct cred *new, const struct cred *old)
1203 1396
1204 new_tsp->smk_task = old_tsp->smk_task; 1397 new_tsp->smk_task = old_tsp->smk_task;
1205 new_tsp->smk_forked = old_tsp->smk_task; 1398 new_tsp->smk_forked = old_tsp->smk_task;
1399 mutex_init(&new_tsp->smk_rules_lock);
1400 INIT_LIST_HEAD(&new_tsp->smk_rules);
1401
1402
1403 /* cbs copy rule list */
1206} 1404}
1207 1405
1208/** 1406/**
@@ -2419,6 +2617,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
2419 } 2617 }
2420 } 2618 }
2421 isp->smk_task = smk_fetch(XATTR_NAME_SMACKEXEC, inode, dp); 2619 isp->smk_task = smk_fetch(XATTR_NAME_SMACKEXEC, inode, dp);
2620 isp->smk_mmap = smk_fetch(XATTR_NAME_SMACKMMAP, inode, dp);
2422 2621
2423 dput(dp); 2622 dput(dp);
2424 break; 2623 break;
@@ -2478,6 +2677,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value)
2478static int smack_setprocattr(struct task_struct *p, char *name, 2677static int smack_setprocattr(struct task_struct *p, char *name,
2479 void *value, size_t size) 2678 void *value, size_t size)
2480{ 2679{
2680 int rc;
2481 struct task_smack *tsp; 2681 struct task_smack *tsp;
2482 struct task_smack *oldtsp; 2682 struct task_smack *oldtsp;
2483 struct cred *new; 2683 struct cred *new;
@@ -2513,13 +2713,16 @@ static int smack_setprocattr(struct task_struct *p, char *name,
2513 new = prepare_creds(); 2713 new = prepare_creds();
2514 if (new == NULL) 2714 if (new == NULL)
2515 return -ENOMEM; 2715 return -ENOMEM;
2516 tsp = kzalloc(sizeof(struct task_smack), GFP_KERNEL); 2716
2717 tsp = new_task_smack(newsmack, oldtsp->smk_forked, GFP_KERNEL);
2517 if (tsp == NULL) { 2718 if (tsp == NULL) {
2518 kfree(new); 2719 kfree(new);
2519 return -ENOMEM; 2720 return -ENOMEM;
2520 } 2721 }
2521 tsp->smk_task = newsmack; 2722 rc = smk_copy_rules(&tsp->smk_rules, &oldtsp->smk_rules, GFP_KERNEL);
2522 tsp->smk_forked = oldtsp->smk_forked; 2723 if (rc != 0)
2724 return rc;
2725
2523 new->security = tsp; 2726 new->security = tsp;
2524 commit_creds(new); 2727 commit_creds(new);
2525 return size; 2728 return size;
@@ -3221,6 +3424,7 @@ struct security_operations smack_ops = {
3221 .file_ioctl = smack_file_ioctl, 3424 .file_ioctl = smack_file_ioctl,
3222 .file_lock = smack_file_lock, 3425 .file_lock = smack_file_lock,
3223 .file_fcntl = smack_file_fcntl, 3426 .file_fcntl = smack_file_fcntl,
3427 .file_mmap = smack_file_mmap,
3224 .file_set_fowner = smack_file_set_fowner, 3428 .file_set_fowner = smack_file_set_fowner,
3225 .file_send_sigiotask = smack_file_send_sigiotask, 3429 .file_send_sigiotask = smack_file_send_sigiotask,
3226 .file_receive = smack_file_receive, 3430 .file_receive = smack_file_receive,
@@ -3334,23 +3538,20 @@ static __init int smack_init(void)
3334 struct cred *cred; 3538 struct cred *cred;
3335 struct task_smack *tsp; 3539 struct task_smack *tsp;
3336 3540
3337 tsp = kzalloc(sizeof(struct task_smack), GFP_KERNEL); 3541 if (!security_module_enable(&smack_ops))
3542 return 0;
3543
3544 tsp = new_task_smack(smack_known_floor.smk_known,
3545 smack_known_floor.smk_known, GFP_KERNEL);
3338 if (tsp == NULL) 3546 if (tsp == NULL)
3339 return -ENOMEM; 3547 return -ENOMEM;
3340 3548
3341 if (!security_module_enable(&smack_ops)) {
3342 kfree(tsp);
3343 return 0;
3344 }
3345
3346 printk(KERN_INFO "Smack: Initializing.\n"); 3549 printk(KERN_INFO "Smack: Initializing.\n");
3347 3550
3348 /* 3551 /*
3349 * Set the security state for the initial task. 3552 * Set the security state for the initial task.
3350 */ 3553 */
3351 cred = (struct cred *) current->cred; 3554 cred = (struct cred *) current->cred;
3352 tsp->smk_forked = smack_known_floor.smk_known;
3353 tsp->smk_task = smack_known_floor.smk_known;
3354 cred->security = tsp; 3555 cred->security = tsp;
3355 3556
3356 /* initialize the smack_know_list */ 3557 /* initialize the smack_know_list */
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 362d5eda948b..90d1bbaaa6f3 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -43,6 +43,7 @@ enum smk_inos {
43 SMK_NETLBLADDR = 8, /* single label hosts */ 43 SMK_NETLBLADDR = 8, /* single label hosts */
44 SMK_ONLYCAP = 9, /* the only "capable" label */ 44 SMK_ONLYCAP = 9, /* the only "capable" label */
45 SMK_LOGGING = 10, /* logging */ 45 SMK_LOGGING = 10, /* logging */
46 SMK_LOAD_SELF = 11, /* task specific rules */
46}; 47};
47 48
48/* 49/*
@@ -135,104 +136,30 @@ static void smk_netlabel_audit_set(struct netlbl_audit *nap)
135#define SMK_NETLBLADDRMIN 9 136#define SMK_NETLBLADDRMIN 9
136#define SMK_NETLBLADDRMAX 42 137#define SMK_NETLBLADDRMAX 42
137 138
138/*
139 * Seq_file read operations for /smack/load
140 */
141
142static void *load_seq_start(struct seq_file *s, loff_t *pos)
143{
144 if (*pos == SEQ_READ_FINISHED)
145 return NULL;
146 if (list_empty(&smack_rule_list))
147 return NULL;
148 return smack_rule_list.next;
149}
150
151static void *load_seq_next(struct seq_file *s, void *v, loff_t *pos)
152{
153 struct list_head *list = v;
154
155 if (list_is_last(list, &smack_rule_list)) {
156 *pos = SEQ_READ_FINISHED;
157 return NULL;
158 }
159 return list->next;
160}
161
162static int load_seq_show(struct seq_file *s, void *v)
163{
164 struct list_head *list = v;
165 struct smack_rule *srp =
166 list_entry(list, struct smack_rule, list);
167
168 seq_printf(s, "%s %s", (char *)srp->smk_subject,
169 (char *)srp->smk_object);
170
171 seq_putc(s, ' ');
172
173 if (srp->smk_access & MAY_READ)
174 seq_putc(s, 'r');
175 if (srp->smk_access & MAY_WRITE)
176 seq_putc(s, 'w');
177 if (srp->smk_access & MAY_EXEC)
178 seq_putc(s, 'x');
179 if (srp->smk_access & MAY_APPEND)
180 seq_putc(s, 'a');
181 if (srp->smk_access & MAY_TRANSMUTE)
182 seq_putc(s, 't');
183 if (srp->smk_access == 0)
184 seq_putc(s, '-');
185
186 seq_putc(s, '\n');
187
188 return 0;
189}
190
191static void load_seq_stop(struct seq_file *s, void *v)
192{
193 /* No-op */
194}
195
196static const struct seq_operations load_seq_ops = {
197 .start = load_seq_start,
198 .next = load_seq_next,
199 .show = load_seq_show,
200 .stop = load_seq_stop,
201};
202
203/**
204 * smk_open_load - open() for /smack/load
205 * @inode: inode structure representing file
206 * @file: "load" file pointer
207 *
208 * For reading, use load_seq_* seq_file reading operations.
209 */
210static int smk_open_load(struct inode *inode, struct file *file)
211{
212 return seq_open(file, &load_seq_ops);
213}
214
215/** 139/**
216 * smk_set_access - add a rule to the rule list 140 * smk_set_access - add a rule to the rule list
217 * @srp: the new rule to add 141 * @srp: the new rule to add
142 * @rule_list: the list of rules
143 * @rule_lock: the rule list lock
218 * 144 *
219 * Looks through the current subject/object/access list for 145 * Looks through the current subject/object/access list for
220 * the subject/object pair and replaces the access that was 146 * the subject/object pair and replaces the access that was
221 * there. If the pair isn't found add it with the specified 147 * there. If the pair isn't found add it with the specified
222 * access. 148 * access.
223 * 149 *
150 * Returns 1 if a rule was found to exist already, 0 if it is new
224 * Returns 0 if nothing goes wrong or -ENOMEM if it fails 151 * Returns 0 if nothing goes wrong or -ENOMEM if it fails
225 * during the allocation of the new pair to add. 152 * during the allocation of the new pair to add.
226 */ 153 */
227static int smk_set_access(struct smack_rule *srp) 154static int smk_set_access(struct smack_rule *srp, struct list_head *rule_list,
155 struct mutex *rule_lock)
228{ 156{
229 struct smack_rule *sp; 157 struct smack_rule *sp;
230 int ret = 0; 158 int found = 0;
231 int found;
232 mutex_lock(&smack_list_lock);
233 159
234 found = 0; 160 mutex_lock(rule_lock);
235 list_for_each_entry_rcu(sp, &smack_rule_list, list) { 161
162 list_for_each_entry_rcu(sp, rule_list, list) {
236 if (sp->smk_subject == srp->smk_subject && 163 if (sp->smk_subject == srp->smk_subject &&
237 sp->smk_object == srp->smk_object) { 164 sp->smk_object == srp->smk_object) {
238 found = 1; 165 found = 1;
@@ -241,19 +168,21 @@ static int smk_set_access(struct smack_rule *srp)
241 } 168 }
242 } 169 }
243 if (found == 0) 170 if (found == 0)
244 list_add_rcu(&srp->list, &smack_rule_list); 171 list_add_rcu(&srp->list, rule_list);
245 172
246 mutex_unlock(&smack_list_lock); 173 mutex_unlock(rule_lock);
247 174
248 return ret; 175 return found;
249} 176}
250 177
251/** 178/**
252 * smk_write_load - write() for /smack/load 179 * smk_write_load_list - write() for any /smack/load
253 * @file: file pointer, not actually used 180 * @file: file pointer, not actually used
254 * @buf: where to get the data from 181 * @buf: where to get the data from
255 * @count: bytes sent 182 * @count: bytes sent
256 * @ppos: where to start - must be 0 183 * @ppos: where to start - must be 0
184 * @rule_list: the list of rules to write to
185 * @rule_lock: lock for the rule list
257 * 186 *
258 * Get one smack access rule from above. 187 * Get one smack access rule from above.
259 * The format is exactly: 188 * The format is exactly:
@@ -263,21 +192,19 @@ static int smk_set_access(struct smack_rule *srp)
263 * 192 *
264 * writes must be SMK_LABELLEN+SMK_LABELLEN+SMK_ACCESSLEN bytes. 193 * writes must be SMK_LABELLEN+SMK_LABELLEN+SMK_ACCESSLEN bytes.
265 */ 194 */
266static ssize_t smk_write_load(struct file *file, const char __user *buf, 195static ssize_t smk_write_load_list(struct file *file, const char __user *buf,
267 size_t count, loff_t *ppos) 196 size_t count, loff_t *ppos,
197 struct list_head *rule_list,
198 struct mutex *rule_lock)
268{ 199{
269 struct smack_rule *rule; 200 struct smack_rule *rule;
270 char *data; 201 char *data;
271 int rc = -EINVAL; 202 int rc = -EINVAL;
272 203
273 /* 204 /*
274 * Must have privilege.
275 * No partial writes. 205 * No partial writes.
276 * Enough data must be present. 206 * Enough data must be present.
277 */ 207 */
278 if (!capable(CAP_MAC_ADMIN))
279 return -EPERM;
280
281 if (*ppos != 0) 208 if (*ppos != 0)
282 return -EINVAL; 209 return -EINVAL;
283 /* 210 /*
@@ -372,11 +299,13 @@ static ssize_t smk_write_load(struct file *file, const char __user *buf,
372 goto out_free_rule; 299 goto out_free_rule;
373 } 300 }
374 301
375 rc = smk_set_access(rule); 302 rc = count;
376 303 /*
377 if (!rc) 304 * smk_set_access returns true if there was already a rule
378 rc = count; 305 * for the subject/object pair, and false if it was new.
379 goto out; 306 */
307 if (!smk_set_access(rule, rule_list, rule_lock))
308 goto out;
380 309
381out_free_rule: 310out_free_rule:
382 kfree(rule); 311 kfree(rule);
@@ -385,6 +314,108 @@ out:
385 return rc; 314 return rc;
386} 315}
387 316
317
318/*
319 * Seq_file read operations for /smack/load
320 */
321
322static void *load_seq_start(struct seq_file *s, loff_t *pos)
323{
324 if (*pos == SEQ_READ_FINISHED)
325 return NULL;
326 if (list_empty(&smack_rule_list))
327 return NULL;
328 return smack_rule_list.next;
329}
330
331static void *load_seq_next(struct seq_file *s, void *v, loff_t *pos)
332{
333 struct list_head *list = v;
334
335 if (list_is_last(list, &smack_rule_list)) {
336 *pos = SEQ_READ_FINISHED;
337 return NULL;
338 }
339 return list->next;
340}
341
342static int load_seq_show(struct seq_file *s, void *v)
343{
344 struct list_head *list = v;
345 struct smack_rule *srp =
346 list_entry(list, struct smack_rule, list);
347
348 seq_printf(s, "%s %s", (char *)srp->smk_subject,
349 (char *)srp->smk_object);
350
351 seq_putc(s, ' ');
352
353 if (srp->smk_access & MAY_READ)
354 seq_putc(s, 'r');
355 if (srp->smk_access & MAY_WRITE)
356 seq_putc(s, 'w');
357 if (srp->smk_access & MAY_EXEC)
358 seq_putc(s, 'x');
359 if (srp->smk_access & MAY_APPEND)
360 seq_putc(s, 'a');
361 if (srp->smk_access & MAY_TRANSMUTE)
362 seq_putc(s, 't');
363 if (srp->smk_access == 0)
364 seq_putc(s, '-');
365
366 seq_putc(s, '\n');
367
368 return 0;
369}
370
371static void load_seq_stop(struct seq_file *s, void *v)
372{
373 /* No-op */
374}
375
376static const struct seq_operations load_seq_ops = {
377 .start = load_seq_start,
378 .next = load_seq_next,
379 .show = load_seq_show,
380 .stop = load_seq_stop,
381};
382
383/**
384 * smk_open_load - open() for /smack/load
385 * @inode: inode structure representing file
386 * @file: "load" file pointer
387 *
388 * For reading, use load_seq_* seq_file reading operations.
389 */
390static int smk_open_load(struct inode *inode, struct file *file)
391{
392 return seq_open(file, &load_seq_ops);
393}
394
395/**
396 * smk_write_load - write() for /smack/load
397 * @file: file pointer, not actually used
398 * @buf: where to get the data from
399 * @count: bytes sent
400 * @ppos: where to start - must be 0
401 *
402 */
403static ssize_t smk_write_load(struct file *file, const char __user *buf,
404 size_t count, loff_t *ppos)
405{
406
407 /*
408 * Must have privilege.
409 * No partial writes.
410 * Enough data must be present.
411 */
412 if (!capable(CAP_MAC_ADMIN))
413 return -EPERM;
414
415 return smk_write_load_list(file, buf, count, ppos, &smack_rule_list,
416 &smack_list_lock);
417}
418
388static const struct file_operations smk_load_ops = { 419static const struct file_operations smk_load_ops = {
389 .open = smk_open_load, 420 .open = smk_open_load,
390 .read = seq_read, 421 .read = seq_read,
@@ -1288,6 +1319,112 @@ static const struct file_operations smk_logging_ops = {
1288 .write = smk_write_logging, 1319 .write = smk_write_logging,
1289 .llseek = default_llseek, 1320 .llseek = default_llseek,
1290}; 1321};
1322
1323/*
1324 * Seq_file read operations for /smack/load-self
1325 */
1326
1327static void *load_self_seq_start(struct seq_file *s, loff_t *pos)
1328{
1329 struct task_smack *tsp = current_security();
1330
1331 if (*pos == SEQ_READ_FINISHED)
1332 return NULL;
1333 if (list_empty(&tsp->smk_rules))
1334 return NULL;
1335 return tsp->smk_rules.next;
1336}
1337
1338static void *load_self_seq_next(struct seq_file *s, void *v, loff_t *pos)
1339{
1340 struct task_smack *tsp = current_security();
1341 struct list_head *list = v;
1342
1343 if (list_is_last(list, &tsp->smk_rules)) {
1344 *pos = SEQ_READ_FINISHED;
1345 return NULL;
1346 }
1347 return list->next;
1348}
1349
1350static int load_self_seq_show(struct seq_file *s, void *v)
1351{
1352 struct list_head *list = v;
1353 struct smack_rule *srp =
1354 list_entry(list, struct smack_rule, list);
1355
1356 seq_printf(s, "%s %s", (char *)srp->smk_subject,
1357 (char *)srp->smk_object);
1358
1359 seq_putc(s, ' ');
1360
1361 if (srp->smk_access & MAY_READ)
1362 seq_putc(s, 'r');
1363 if (srp->smk_access & MAY_WRITE)
1364 seq_putc(s, 'w');
1365 if (srp->smk_access & MAY_EXEC)
1366 seq_putc(s, 'x');
1367 if (srp->smk_access & MAY_APPEND)
1368 seq_putc(s, 'a');
1369 if (srp->smk_access & MAY_TRANSMUTE)
1370 seq_putc(s, 't');
1371 if (srp->smk_access == 0)
1372 seq_putc(s, '-');
1373
1374 seq_putc(s, '\n');
1375
1376 return 0;
1377}
1378
1379static void load_self_seq_stop(struct seq_file *s, void *v)
1380{
1381 /* No-op */
1382}
1383
1384static const struct seq_operations load_self_seq_ops = {
1385 .start = load_self_seq_start,
1386 .next = load_self_seq_next,
1387 .show = load_self_seq_show,
1388 .stop = load_self_seq_stop,
1389};
1390
1391
1392/**
1393 * smk_open_load_self - open() for /smack/load-self
1394 * @inode: inode structure representing file
1395 * @file: "load" file pointer
1396 *
1397 * For reading, use load_seq_* seq_file reading operations.
1398 */
1399static int smk_open_load_self(struct inode *inode, struct file *file)
1400{
1401 return seq_open(file, &load_self_seq_ops);
1402}
1403
1404/**
1405 * smk_write_load_self - write() for /smack/load-self
1406 * @file: file pointer, not actually used
1407 * @buf: where to get the data from
1408 * @count: bytes sent
1409 * @ppos: where to start - must be 0
1410 *
1411 */
1412static ssize_t smk_write_load_self(struct file *file, const char __user *buf,
1413 size_t count, loff_t *ppos)
1414{
1415 struct task_smack *tsp = current_security();
1416
1417 return smk_write_load_list(file, buf, count, ppos, &tsp->smk_rules,
1418 &tsp->smk_rules_lock);
1419}
1420
1421static const struct file_operations smk_load_self_ops = {
1422 .open = smk_open_load_self,
1423 .read = seq_read,
1424 .llseek = seq_lseek,
1425 .write = smk_write_load_self,
1426 .release = seq_release,
1427};
1291/** 1428/**
1292 * smk_fill_super - fill the /smackfs superblock 1429 * smk_fill_super - fill the /smackfs superblock
1293 * @sb: the empty superblock 1430 * @sb: the empty superblock
@@ -1304,23 +1441,26 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
1304 struct inode *root_inode; 1441 struct inode *root_inode;
1305 1442
1306 static struct tree_descr smack_files[] = { 1443 static struct tree_descr smack_files[] = {
1307 [SMK_LOAD] = 1444 [SMK_LOAD] = {
1308 {"load", &smk_load_ops, S_IRUGO|S_IWUSR}, 1445 "load", &smk_load_ops, S_IRUGO|S_IWUSR},
1309 [SMK_CIPSO] = 1446 [SMK_CIPSO] = {
1310 {"cipso", &smk_cipso_ops, S_IRUGO|S_IWUSR}, 1447 "cipso", &smk_cipso_ops, S_IRUGO|S_IWUSR},
1311 [SMK_DOI] = 1448 [SMK_DOI] = {
1312 {"doi", &smk_doi_ops, S_IRUGO|S_IWUSR}, 1449 "doi", &smk_doi_ops, S_IRUGO|S_IWUSR},
1313 [SMK_DIRECT] = 1450 [SMK_DIRECT] = {
1314 {"direct", &smk_direct_ops, S_IRUGO|S_IWUSR}, 1451 "direct", &smk_direct_ops, S_IRUGO|S_IWUSR},
1315 [SMK_AMBIENT] = 1452 [SMK_AMBIENT] = {
1316 {"ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR}, 1453 "ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR},
1317 [SMK_NETLBLADDR] = 1454 [SMK_NETLBLADDR] = {
1318 {"netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR}, 1455 "netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR},
1319 [SMK_ONLYCAP] = 1456 [SMK_ONLYCAP] = {
1320 {"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR}, 1457 "onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR},
1321 [SMK_LOGGING] = 1458 [SMK_LOGGING] = {
1322 {"logging", &smk_logging_ops, S_IRUGO|S_IWUSR}, 1459 "logging", &smk_logging_ops, S_IRUGO|S_IWUSR},
1323 /* last one */ {""} 1460 [SMK_LOAD_SELF] = {
1461 "load-self", &smk_load_self_ops, S_IRUGO|S_IWUGO},
1462 /* last one */
1463 {""}
1324 }; 1464 };
1325 1465
1326 rc = simple_fill_super(sb, SMACK_MAGIC, smack_files); 1466 rc = simple_fill_super(sb, SMACK_MAGIC, smack_files);
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 9d32f182301e..cb09f1fce910 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -927,7 +927,7 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
927 struct path *path, const int flag) 927 struct path *path, const int flag)
928{ 928{
929 const u8 acc_mode = ACC_MODE(flag); 929 const u8 acc_mode = ACC_MODE(flag);
930 int error = -ENOMEM; 930 int error = 0;
931 struct tomoyo_path_info buf; 931 struct tomoyo_path_info buf;
932 struct tomoyo_request_info r; 932 struct tomoyo_request_info r;
933 int idx; 933 int idx;
@@ -938,9 +938,6 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
938 buf.name = NULL; 938 buf.name = NULL;
939 r.mode = TOMOYO_CONFIG_DISABLED; 939 r.mode = TOMOYO_CONFIG_DISABLED;
940 idx = tomoyo_read_lock(); 940 idx = tomoyo_read_lock();
941 if (!tomoyo_get_realpath(&buf, path))
942 goto out;
943 error = 0;
944 /* 941 /*
945 * If the filename is specified by "deny_rewrite" keyword, 942 * If the filename is specified by "deny_rewrite" keyword,
946 * we need to check "allow_rewrite" permission when the filename is not 943 * we need to check "allow_rewrite" permission when the filename is not
diff --git a/sound/core/jack.c b/sound/core/jack.c
index 4902ae568730..53b53e97c896 100644
--- a/sound/core/jack.c
+++ b/sound/core/jack.c
@@ -141,6 +141,7 @@ int snd_jack_new(struct snd_card *card, const char *id, int type,
141 141
142fail_input: 142fail_input:
143 input_free_device(jack->input_dev); 143 input_free_device(jack->input_dev);
144 kfree(jack->id);
144 kfree(jack); 145 kfree(jack);
145 return err; 146 return err;
146} 147}
diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c
index 23f49f356e0f..16c0bdfbb164 100644
--- a/sound/pci/au88x0/au88x0_core.c
+++ b/sound/pci/au88x0/au88x0_core.c
@@ -1252,11 +1252,19 @@ static void vortex_adbdma_resetup(vortex_t *vortex, int adbdma) {
1252static int inline vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma) 1252static int inline vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma)
1253{ 1253{
1254 stream_t *dma = &vortex->dma_adb[adbdma]; 1254 stream_t *dma = &vortex->dma_adb[adbdma];
1255 int temp; 1255 int temp, page, delta;
1256 1256
1257 temp = hwread(vortex->mmio, VORTEX_ADBDMA_STAT + (adbdma << 2)); 1257 temp = hwread(vortex->mmio, VORTEX_ADBDMA_STAT + (adbdma << 2));
1258 temp = (dma->period_virt * dma->period_bytes) + (temp & (dma->period_bytes - 1)); 1258 page = (temp & ADB_SUBBUF_MASK) >> ADB_SUBBUF_SHIFT;
1259 return temp; 1259 if (dma->nr_periods >= 4)
1260 delta = (page - dma->period_real) & 3;
1261 else {
1262 delta = (page - dma->period_real);
1263 if (delta < 0)
1264 delta += dma->nr_periods;
1265 }
1266 return (dma->period_virt + delta) * dma->period_bytes
1267 + (temp & (dma->period_bytes - 1));
1260} 1268}
1261 1269
1262static void vortex_adbdma_startfifo(vortex_t * vortex, int adbdma) 1270static void vortex_adbdma_startfifo(vortex_t * vortex, int adbdma)
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 0baffcdee8f9..fcedad9a5fef 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2308,6 +2308,7 @@ static struct snd_pci_quirk position_fix_list[] __devinitdata = {
2308 SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB), 2308 SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB),
2309 SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB), 2309 SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB),
2310 SND_PCI_QUIRK(0x1043, 0x81e7, "ASUS M2V", POS_FIX_LPIB), 2310 SND_PCI_QUIRK(0x1043, 0x81e7, "ASUS M2V", POS_FIX_LPIB),
2311 SND_PCI_QUIRK(0x1043, 0x8410, "ASUS", POS_FIX_LPIB),
2311 SND_PCI_QUIRK(0x104d, 0x9069, "Sony VPCS11V9E", POS_FIX_LPIB), 2312 SND_PCI_QUIRK(0x104d, 0x9069, "Sony VPCS11V9E", POS_FIX_LPIB),
2312 SND_PCI_QUIRK(0x1106, 0x3288, "ASUS M2V-MX SE", POS_FIX_LPIB), 2313 SND_PCI_QUIRK(0x1106, 0x3288, "ASUS M2V-MX SE", POS_FIX_LPIB),
2313 SND_PCI_QUIRK(0x1179, 0xff10, "Toshiba A100-259", POS_FIX_LPIB), 2314 SND_PCI_QUIRK(0x1179, 0xff10, "Toshiba A100-259", POS_FIX_LPIB),
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index a07b031090d8..067982f4f182 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -1039,9 +1039,11 @@ static struct hda_verb cs_errata_init_verbs[] = {
1039 {0x11, AC_VERB_SET_PROC_COEF, 0x0008}, 1039 {0x11, AC_VERB_SET_PROC_COEF, 0x0008},
1040 {0x11, AC_VERB_SET_PROC_STATE, 0x00}, 1040 {0x11, AC_VERB_SET_PROC_STATE, 0x00},
1041 1041
1042#if 0 /* Don't to set to D3 as we are in power-up sequence */
1042 {0x07, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Rx: D3 */ 1043 {0x07, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Rx: D3 */
1043 {0x08, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Tx: D3 */ 1044 {0x08, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Tx: D3 */
1044 /*{0x01, AC_VERB_SET_POWER_STATE, 0x03},*/ /* AFG: D3 This is already handled */ 1045 /*{0x01, AC_VERB_SET_POWER_STATE, 0x03},*/ /* AFG: D3 This is already handled */
1046#endif
1045 1047
1046 {} /* terminator */ 1048 {} /* terminator */
1047}; 1049};
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index fbe97d32140d..4d5004e693f0 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -3114,6 +3114,8 @@ static struct snd_pci_quirk cxt5066_cfg_tbl[] = {
3114 SND_PCI_QUIRK(0x1028, 0x0401, "Dell Vostro 1014", CXT5066_DELL_VOSTRO), 3114 SND_PCI_QUIRK(0x1028, 0x0401, "Dell Vostro 1014", CXT5066_DELL_VOSTRO),
3115 SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTRO), 3115 SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTRO),
3116 SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD), 3116 SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD),
3117 SND_PCI_QUIRK(0x1028, 0x050f, "Dell Inspiron", CXT5066_IDEAPAD),
3118 SND_PCI_QUIRK(0x1028, 0x0510, "Dell Vostro", CXT5066_IDEAPAD),
3117 SND_PCI_QUIRK(0x103c, 0x360b, "HP G60", CXT5066_HP_LAPTOP), 3119 SND_PCI_QUIRK(0x103c, 0x360b, "HP G60", CXT5066_HP_LAPTOP),
3118 SND_PCI_QUIRK(0x1043, 0x13f3, "Asus A52J", CXT5066_ASUS), 3120 SND_PCI_QUIRK(0x1043, 0x13f3, "Asus A52J", CXT5066_ASUS),
3119 SND_PCI_QUIRK(0x1043, 0x1643, "Asus K52JU", CXT5066_ASUS), 3121 SND_PCI_QUIRK(0x1043, 0x1643, "Asus K52JU", CXT5066_ASUS),
@@ -3410,7 +3412,7 @@ static void cx_auto_parse_output(struct hda_codec *codec)
3410 } 3412 }
3411 } 3413 }
3412 spec->multiout.dac_nids = spec->private_dac_nids; 3414 spec->multiout.dac_nids = spec->private_dac_nids;
3413 spec->multiout.max_channels = nums * 2; 3415 spec->multiout.max_channels = spec->multiout.num_dacs * 2;
3414 3416
3415 if (cfg->hp_outs > 0) 3417 if (cfg->hp_outs > 0)
3416 spec->auto_mute = 1; 3418 spec->auto_mute = 1;
@@ -3729,9 +3731,9 @@ static int cx_auto_init(struct hda_codec *codec)
3729 return 0; 3731 return 0;
3730} 3732}
3731 3733
3732static int cx_auto_add_volume(struct hda_codec *codec, const char *basename, 3734static int cx_auto_add_volume_idx(struct hda_codec *codec, const char *basename,
3733 const char *dir, int cidx, 3735 const char *dir, int cidx,
3734 hda_nid_t nid, int hda_dir) 3736 hda_nid_t nid, int hda_dir, int amp_idx)
3735{ 3737{
3736 static char name[32]; 3738 static char name[32];
3737 static struct snd_kcontrol_new knew[] = { 3739 static struct snd_kcontrol_new knew[] = {
@@ -3743,7 +3745,8 @@ static int cx_auto_add_volume(struct hda_codec *codec, const char *basename,
3743 3745
3744 for (i = 0; i < 2; i++) { 3746 for (i = 0; i < 2; i++) {
3745 struct snd_kcontrol *kctl; 3747 struct snd_kcontrol *kctl;
3746 knew[i].private_value = HDA_COMPOSE_AMP_VAL(nid, 3, 0, hda_dir); 3748 knew[i].private_value = HDA_COMPOSE_AMP_VAL(nid, 3, amp_idx,
3749 hda_dir);
3747 knew[i].subdevice = HDA_SUBDEV_AMP_FLAG; 3750 knew[i].subdevice = HDA_SUBDEV_AMP_FLAG;
3748 knew[i].index = cidx; 3751 knew[i].index = cidx;
3749 snprintf(name, sizeof(name), "%s%s %s", basename, dir, sfx[i]); 3752 snprintf(name, sizeof(name), "%s%s %s", basename, dir, sfx[i]);
@@ -3759,6 +3762,9 @@ static int cx_auto_add_volume(struct hda_codec *codec, const char *basename,
3759 return 0; 3762 return 0;
3760} 3763}
3761 3764
3765#define cx_auto_add_volume(codec, str, dir, cidx, nid, hda_dir) \
3766 cx_auto_add_volume_idx(codec, str, dir, cidx, nid, hda_dir, 0)
3767
3762#define cx_auto_add_pb_volume(codec, nid, str, idx) \ 3768#define cx_auto_add_pb_volume(codec, nid, str, idx) \
3763 cx_auto_add_volume(codec, str, " Playback", idx, nid, HDA_OUTPUT) 3769 cx_auto_add_volume(codec, str, " Playback", idx, nid, HDA_OUTPUT)
3764 3770
@@ -3808,29 +3814,60 @@ static int cx_auto_build_input_controls(struct hda_codec *codec)
3808 struct conexant_spec *spec = codec->spec; 3814 struct conexant_spec *spec = codec->spec;
3809 struct auto_pin_cfg *cfg = &spec->autocfg; 3815 struct auto_pin_cfg *cfg = &spec->autocfg;
3810 static const char *prev_label; 3816 static const char *prev_label;
3811 int i, err, cidx; 3817 int i, err, cidx, conn_len;
3818 hda_nid_t conn[HDA_MAX_CONNECTIONS];
3819
3820 int multi_adc_volume = 0; /* If the ADC nid has several input volumes */
3821 int adc_nid = spec->adc_nids[0];
3822
3823 conn_len = snd_hda_get_connections(codec, adc_nid, conn,
3824 HDA_MAX_CONNECTIONS);
3825 if (conn_len < 0)
3826 return conn_len;
3827
3828 multi_adc_volume = cfg->num_inputs > 1 && conn_len > 1;
3829 if (!multi_adc_volume) {
3830 err = cx_auto_add_volume(codec, "Capture", "", 0, adc_nid,
3831 HDA_INPUT);
3832 if (err < 0)
3833 return err;
3834 }
3812 3835
3813 err = cx_auto_add_volume(codec, "Capture", "", 0, spec->adc_nids[0],
3814 HDA_INPUT);
3815 if (err < 0)
3816 return err;
3817 prev_label = NULL; 3836 prev_label = NULL;
3818 cidx = 0; 3837 cidx = 0;
3819 for (i = 0; i < cfg->num_inputs; i++) { 3838 for (i = 0; i < cfg->num_inputs; i++) {
3820 hda_nid_t nid = cfg->inputs[i].pin; 3839 hda_nid_t nid = cfg->inputs[i].pin;
3821 const char *label; 3840 const char *label;
3822 if (!(get_wcaps(codec, nid) & AC_WCAP_IN_AMP)) 3841 int j;
3842 int pin_amp = get_wcaps(codec, nid) & AC_WCAP_IN_AMP;
3843 if (!pin_amp && !multi_adc_volume)
3823 continue; 3844 continue;
3845
3824 label = hda_get_autocfg_input_label(codec, cfg, i); 3846 label = hda_get_autocfg_input_label(codec, cfg, i);
3825 if (label == prev_label) 3847 if (label == prev_label)
3826 cidx++; 3848 cidx++;
3827 else 3849 else
3828 cidx = 0; 3850 cidx = 0;
3829 prev_label = label; 3851 prev_label = label;
3830 err = cx_auto_add_volume(codec, label, " Capture", cidx, 3852
3831 nid, HDA_INPUT); 3853 if (pin_amp) {
3832 if (err < 0) 3854 err = cx_auto_add_volume(codec, label, " Boost", cidx,
3833 return err; 3855 nid, HDA_INPUT);
3856 if (err < 0)
3857 return err;
3858 }
3859
3860 if (!multi_adc_volume)
3861 continue;
3862 for (j = 0; j < conn_len; j++) {
3863 if (conn[j] == nid) {
3864 err = cx_auto_add_volume_idx(codec, label,
3865 " Capture", cidx, adc_nid, HDA_INPUT, j);
3866 if (err < 0)
3867 return err;
3868 break;
3869 }
3870 }
3834 } 3871 }
3835 return 0; 3872 return 0;
3836} 3873}
@@ -3902,6 +3939,8 @@ static struct hda_codec_preset snd_hda_preset_conexant[] = {
3902 .patch = patch_cxt5066 }, 3939 .patch = patch_cxt5066 },
3903 { .id = 0x14f15069, .name = "CX20585", 3940 { .id = 0x14f15069, .name = "CX20585",
3904 .patch = patch_cxt5066 }, 3941 .patch = patch_cxt5066 },
3942 { .id = 0x14f1506e, .name = "CX20590",
3943 .patch = patch_cxt5066 },
3905 { .id = 0x14f15097, .name = "CX20631", 3944 { .id = 0x14f15097, .name = "CX20631",
3906 .patch = patch_conexant_auto }, 3945 .patch = patch_conexant_auto },
3907 { .id = 0x14f15098, .name = "CX20632", 3946 { .id = 0x14f15098, .name = "CX20632",
@@ -3928,6 +3967,7 @@ MODULE_ALIAS("snd-hda-codec-id:14f15066");
3928MODULE_ALIAS("snd-hda-codec-id:14f15067"); 3967MODULE_ALIAS("snd-hda-codec-id:14f15067");
3929MODULE_ALIAS("snd-hda-codec-id:14f15068"); 3968MODULE_ALIAS("snd-hda-codec-id:14f15068");
3930MODULE_ALIAS("snd-hda-codec-id:14f15069"); 3969MODULE_ALIAS("snd-hda-codec-id:14f15069");
3970MODULE_ALIAS("snd-hda-codec-id:14f1506e");
3931MODULE_ALIAS("snd-hda-codec-id:14f15097"); 3971MODULE_ALIAS("snd-hda-codec-id:14f15097");
3932MODULE_ALIAS("snd-hda-codec-id:14f15098"); 3972MODULE_ALIAS("snd-hda-codec-id:14f15098");
3933MODULE_ALIAS("snd-hda-codec-id:14f150a1"); 3973MODULE_ALIAS("snd-hda-codec-id:14f150a1");
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index a58767736727..ec0fa2dd0a27 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -1634,6 +1634,9 @@ static struct hda_codec_preset snd_hda_preset_hdmi[] = {
1634{ .id = 0x10de0012, .name = "GPU 12 HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1634{ .id = 0x10de0012, .name = "GPU 12 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1635{ .id = 0x10de0013, .name = "GPU 13 HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1635{ .id = 0x10de0013, .name = "GPU 13 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1636{ .id = 0x10de0014, .name = "GPU 14 HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1636{ .id = 0x10de0014, .name = "GPU 14 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1637{ .id = 0x10de0015, .name = "GPU 15 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1638{ .id = 0x10de0016, .name = "GPU 16 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1639/* 17 is known to be absent */
1637{ .id = 0x10de0018, .name = "GPU 18 HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1640{ .id = 0x10de0018, .name = "GPU 18 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1638{ .id = 0x10de0019, .name = "GPU 19 HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1641{ .id = 0x10de0019, .name = "GPU 19 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1639{ .id = 0x10de001a, .name = "GPU 1a HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1642{ .id = 0x10de001a, .name = "GPU 1a HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
@@ -1676,6 +1679,8 @@ MODULE_ALIAS("snd-hda-codec-id:10de0011");
1676MODULE_ALIAS("snd-hda-codec-id:10de0012"); 1679MODULE_ALIAS("snd-hda-codec-id:10de0012");
1677MODULE_ALIAS("snd-hda-codec-id:10de0013"); 1680MODULE_ALIAS("snd-hda-codec-id:10de0013");
1678MODULE_ALIAS("snd-hda-codec-id:10de0014"); 1681MODULE_ALIAS("snd-hda-codec-id:10de0014");
1682MODULE_ALIAS("snd-hda-codec-id:10de0015");
1683MODULE_ALIAS("snd-hda-codec-id:10de0016");
1679MODULE_ALIAS("snd-hda-codec-id:10de0018"); 1684MODULE_ALIAS("snd-hda-codec-id:10de0018");
1680MODULE_ALIAS("snd-hda-codec-id:10de0019"); 1685MODULE_ALIAS("snd-hda-codec-id:10de0019");
1681MODULE_ALIAS("snd-hda-codec-id:10de001a"); 1686MODULE_ALIAS("snd-hda-codec-id:10de001a");
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3328a259a242..4261bb8eec1d 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1133,11 +1133,8 @@ static void alc_automute_speaker(struct hda_codec *codec, int pinctl)
1133 nid = spec->autocfg.hp_pins[i]; 1133 nid = spec->autocfg.hp_pins[i];
1134 if (!nid) 1134 if (!nid)
1135 break; 1135 break;
1136 if (snd_hda_jack_detect(codec, nid)) { 1136 alc_report_jack(codec, nid);
1137 spec->jack_present = 1; 1137 spec->jack_present |= snd_hda_jack_detect(codec, nid);
1138 break;
1139 }
1140 alc_report_jack(codec, spec->autocfg.hp_pins[i]);
1141 } 1138 }
1142 1139
1143 mute = spec->jack_present ? HDA_AMP_MUTE : 0; 1140 mute = spec->jack_present ? HDA_AMP_MUTE : 0;
@@ -15015,7 +15012,7 @@ static struct snd_pci_quirk alc269_cfg_tbl[] = {
15015 SND_PCI_QUIRK(0x1043, 0x11e3, "ASUS U33Jc", ALC269VB_AMIC), 15012 SND_PCI_QUIRK(0x1043, 0x11e3, "ASUS U33Jc", ALC269VB_AMIC),
15016 SND_PCI_QUIRK(0x1043, 0x1273, "ASUS UL80Jt", ALC269VB_AMIC), 15013 SND_PCI_QUIRK(0x1043, 0x1273, "ASUS UL80Jt", ALC269VB_AMIC),
15017 SND_PCI_QUIRK(0x1043, 0x1283, "ASUS U53Jc", ALC269_AMIC), 15014 SND_PCI_QUIRK(0x1043, 0x1283, "ASUS U53Jc", ALC269_AMIC),
15018 SND_PCI_QUIRK(0x1043, 0x12b3, "ASUS N82Jv", ALC269_AMIC), 15015 SND_PCI_QUIRK(0x1043, 0x12b3, "ASUS N82JV", ALC269VB_AMIC),
15019 SND_PCI_QUIRK(0x1043, 0x12d3, "ASUS N61Jv", ALC269_AMIC), 15016 SND_PCI_QUIRK(0x1043, 0x12d3, "ASUS N61Jv", ALC269_AMIC),
15020 SND_PCI_QUIRK(0x1043, 0x13a3, "ASUS UL30Vt", ALC269_AMIC), 15017 SND_PCI_QUIRK(0x1043, 0x13a3, "ASUS UL30Vt", ALC269_AMIC),
15021 SND_PCI_QUIRK(0x1043, 0x1373, "ASUS G73JX", ALC269_AMIC), 15018 SND_PCI_QUIRK(0x1043, 0x1373, "ASUS G73JX", ALC269_AMIC),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 9ea48b425d0b..bd7b123f6440 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -586,7 +586,12 @@ static hda_nid_t stac92hd83xxx_pin_nids[10] = {
586 0x0f, 0x10, 0x11, 0x1f, 0x20, 586 0x0f, 0x10, 0x11, 0x1f, 0x20,
587}; 587};
588 588
589static hda_nid_t stac92hd88xxx_pin_nids[10] = { 589static hda_nid_t stac92hd87xxx_pin_nids[6] = {
590 0x0a, 0x0b, 0x0c, 0x0d,
591 0x0f, 0x11,
592};
593
594static hda_nid_t stac92hd88xxx_pin_nids[8] = {
590 0x0a, 0x0b, 0x0c, 0x0d, 595 0x0a, 0x0b, 0x0c, 0x0d,
591 0x0f, 0x11, 0x1f, 0x20, 596 0x0f, 0x11, 0x1f, 0x20,
592}; 597};
@@ -5430,12 +5435,13 @@ again:
5430 switch (codec->vendor_id) { 5435 switch (codec->vendor_id) {
5431 case 0x111d76d1: 5436 case 0x111d76d1:
5432 case 0x111d76d9: 5437 case 0x111d76d9:
5438 case 0x111d76e5:
5433 spec->dmic_nids = stac92hd87b_dmic_nids; 5439 spec->dmic_nids = stac92hd87b_dmic_nids;
5434 spec->num_dmics = stac92xx_connected_ports(codec, 5440 spec->num_dmics = stac92xx_connected_ports(codec,
5435 stac92hd87b_dmic_nids, 5441 stac92hd87b_dmic_nids,
5436 STAC92HD87B_NUM_DMICS); 5442 STAC92HD87B_NUM_DMICS);
5437 spec->num_pins = ARRAY_SIZE(stac92hd88xxx_pin_nids); 5443 spec->num_pins = ARRAY_SIZE(stac92hd87xxx_pin_nids);
5438 spec->pin_nids = stac92hd88xxx_pin_nids; 5444 spec->pin_nids = stac92hd87xxx_pin_nids;
5439 spec->mono_nid = 0; 5445 spec->mono_nid = 0;
5440 spec->num_pwrs = 0; 5446 spec->num_pwrs = 0;
5441 break; 5447 break;
@@ -5443,6 +5449,7 @@ again:
5443 case 0x111d7667: 5449 case 0x111d7667:
5444 case 0x111d7668: 5450 case 0x111d7668:
5445 case 0x111d7669: 5451 case 0x111d7669:
5452 case 0x111d76e3:
5446 spec->num_dmics = stac92xx_connected_ports(codec, 5453 spec->num_dmics = stac92xx_connected_ports(codec,
5447 stac92hd88xxx_dmic_nids, 5454 stac92hd88xxx_dmic_nids,
5448 STAC92HD88XXX_NUM_DMICS); 5455 STAC92HD88XXX_NUM_DMICS);
@@ -6387,6 +6394,8 @@ static struct hda_codec_preset snd_hda_preset_sigmatel[] = {
6387 { .id = 0x111d76cd, .name = "92HD89F2", .patch = patch_stac92hd73xx }, 6394 { .id = 0x111d76cd, .name = "92HD89F2", .patch = patch_stac92hd73xx },
6388 { .id = 0x111d76ce, .name = "92HD89F1", .patch = patch_stac92hd73xx }, 6395 { .id = 0x111d76ce, .name = "92HD89F1", .patch = patch_stac92hd73xx },
6389 { .id = 0x111d76e0, .name = "92HD91BXX", .patch = patch_stac92hd83xxx}, 6396 { .id = 0x111d76e0, .name = "92HD91BXX", .patch = patch_stac92hd83xxx},
6397 { .id = 0x111d76e3, .name = "92HD98BXX", .patch = patch_stac92hd83xxx},
6398 { .id = 0x111d76e5, .name = "92HD99BXX", .patch = patch_stac92hd83xxx},
6390 { .id = 0x111d76e7, .name = "92HD90BXX", .patch = patch_stac92hd83xxx}, 6399 { .id = 0x111d76e7, .name = "92HD90BXX", .patch = patch_stac92hd83xxx},
6391 {} /* terminator */ 6400 {} /* terminator */
6392}; 6401};
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index a76c3260d941..63b0054200a8 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -567,7 +567,7 @@ static void via_auto_init_analog_input(struct hda_codec *codec)
567 hda_nid_t nid = cfg->inputs[i].pin; 567 hda_nid_t nid = cfg->inputs[i].pin;
568 if (spec->smart51_enabled && is_smart51_pins(spec, nid)) 568 if (spec->smart51_enabled && is_smart51_pins(spec, nid))
569 ctl = PIN_OUT; 569 ctl = PIN_OUT;
570 else if (i == AUTO_PIN_MIC) 570 else if (cfg->inputs[i].type == AUTO_PIN_MIC)
571 ctl = PIN_VREF50; 571 ctl = PIN_VREF50;
572 else 572 else
573 ctl = PIN_IN; 573 ctl = PIN_IN;
diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c
index bb4bf65b9e7e..0bb424af956f 100644
--- a/sound/soc/codecs/cx20442.c
+++ b/sound/soc/codecs/cx20442.c
@@ -367,7 +367,7 @@ static int cx20442_codec_remove(struct snd_soc_codec *codec)
367 return 0; 367 return 0;
368} 368}
369 369
370static const u8 cx20442_reg = CX20442_TELOUT | CX20442_MIC; 370static const u8 cx20442_reg;
371 371
372static struct snd_soc_codec_driver cx20442_codec_dev = { 372static struct snd_soc_codec_driver cx20442_codec_dev = {
373 .probe = cx20442_codec_probe, 373 .probe = cx20442_codec_probe,
diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index 987476a5895f..017d99ceb42e 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -1482,7 +1482,7 @@ int wm8903_mic_detect(struct snd_soc_codec *codec, struct snd_soc_jack *jack,
1482 WM8903_MICDET_EINT | WM8903_MICSHRT_EINT, 1482 WM8903_MICDET_EINT | WM8903_MICSHRT_EINT,
1483 irq_mask); 1483 irq_mask);
1484 1484
1485 if (det && shrt) { 1485 if (det || shrt) {
1486 /* Enable mic detection, this may not have been set through 1486 /* Enable mic detection, this may not have been set through
1487 * platform data (eg, if the defaults are OK). */ 1487 * platform data (eg, if the defaults are OK). */
1488 snd_soc_update_bits(codec, WM8903_WRITE_SEQUENCER_0, 1488 snd_soc_update_bits(codec, WM8903_WRITE_SEQUENCER_0,
diff --git a/sound/soc/codecs/wm8903.h b/sound/soc/codecs/wm8903.h
index e8490f3edd03..e3ec2433b215 100644
--- a/sound/soc/codecs/wm8903.h
+++ b/sound/soc/codecs/wm8903.h
@@ -165,7 +165,7 @@ extern int wm8903_mic_detect(struct snd_soc_codec *codec,
165 165
166#define WM8903_VMID_RES_50K 2 166#define WM8903_VMID_RES_50K 2
167#define WM8903_VMID_RES_250K 3 167#define WM8903_VMID_RES_250K 3
168#define WM8903_VMID_RES_5K 4 168#define WM8903_VMID_RES_5K 6
169 169
170/* 170/*
171 * R8 (0x08) - Analogue DAC 0 171 * R8 (0x08) - Analogue DAC 0
diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c
index 4bbc3442703f..8dfb0a0da673 100644
--- a/sound/soc/codecs/wm8978.c
+++ b/sound/soc/codecs/wm8978.c
@@ -145,18 +145,18 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = {
145 SOC_SINGLE("DAC Playback Limiter Threshold", 145 SOC_SINGLE("DAC Playback Limiter Threshold",
146 WM8978_DAC_LIMITER_2, 4, 7, 0), 146 WM8978_DAC_LIMITER_2, 4, 7, 0),
147 SOC_SINGLE("DAC Playback Limiter Boost", 147 SOC_SINGLE("DAC Playback Limiter Boost",
148 WM8978_DAC_LIMITER_2, 0, 15, 0), 148 WM8978_DAC_LIMITER_2, 0, 12, 0),
149 149
150 SOC_ENUM("ALC Enable Switch", alc1), 150 SOC_ENUM("ALC Enable Switch", alc1),
151 SOC_SINGLE("ALC Capture Min Gain", WM8978_ALC_CONTROL_1, 0, 7, 0), 151 SOC_SINGLE("ALC Capture Min Gain", WM8978_ALC_CONTROL_1, 0, 7, 0),
152 SOC_SINGLE("ALC Capture Max Gain", WM8978_ALC_CONTROL_1, 3, 7, 0), 152 SOC_SINGLE("ALC Capture Max Gain", WM8978_ALC_CONTROL_1, 3, 7, 0),
153 153
154 SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 7, 0), 154 SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 10, 0),
155 SOC_SINGLE("ALC Capture Target", WM8978_ALC_CONTROL_2, 0, 15, 0), 155 SOC_SINGLE("ALC Capture Target", WM8978_ALC_CONTROL_2, 0, 15, 0),
156 156
157 SOC_ENUM("ALC Capture Mode", alc3), 157 SOC_ENUM("ALC Capture Mode", alc3),
158 SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 15, 0), 158 SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 10, 0),
159 SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 15, 0), 159 SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 10, 0),
160 160
161 SOC_SINGLE("ALC Capture Noise Gate Switch", WM8978_NOISE_GATE, 3, 1, 0), 161 SOC_SINGLE("ALC Capture Noise Gate Switch", WM8978_NOISE_GATE, 3, 1, 0),
162 SOC_SINGLE("ALC Capture Noise Gate Threshold", 162 SOC_SINGLE("ALC Capture Noise Gate Threshold",
@@ -211,8 +211,10 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = {
211 WM8978_LOUT2_SPK_CONTROL, WM8978_ROUT2_SPK_CONTROL, 6, 1, 1), 211 WM8978_LOUT2_SPK_CONTROL, WM8978_ROUT2_SPK_CONTROL, 6, 1, 1),
212 212
213 /* DAC / ADC oversampling */ 213 /* DAC / ADC oversampling */
214 SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL, 8, 1, 0), 214 SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL,
215 SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL, 8, 1, 0), 215 5, 1, 0),
216 SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL,
217 5, 1, 0),
216}; 218};
217 219
218/* Mixer #1: Output (OUT1, OUT2) Mixer: mix AUX, Input mixer output and DAC */ 220/* Mixer #1: Output (OUT1, OUT2) Mixer: mix AUX, Input mixer output and DAC */
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 37b8aa8a680f..c6c958ee5d59 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -107,6 +107,12 @@ struct wm8994_priv {
107 107
108 int revision; 108 int revision;
109 struct wm8994_pdata *pdata; 109 struct wm8994_pdata *pdata;
110
111 unsigned int aif1clk_enable:1;
112 unsigned int aif2clk_enable:1;
113
114 unsigned int aif1clk_disable:1;
115 unsigned int aif2clk_disable:1;
110}; 116};
111 117
112static int wm8994_readable(unsigned int reg) 118static int wm8994_readable(unsigned int reg)
@@ -1004,6 +1010,110 @@ static void wm8994_update_class_w(struct snd_soc_codec *codec)
1004 } 1010 }
1005} 1011}
1006 1012
1013static int late_enable_ev(struct snd_soc_dapm_widget *w,
1014 struct snd_kcontrol *kcontrol, int event)
1015{
1016 struct snd_soc_codec *codec = w->codec;
1017 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
1018
1019 switch (event) {
1020 case SND_SOC_DAPM_PRE_PMU:
1021 if (wm8994->aif1clk_enable) {
1022 snd_soc_update_bits(codec, WM8994_AIF1_CLOCKING_1,
1023 WM8994_AIF1CLK_ENA_MASK,
1024 WM8994_AIF1CLK_ENA);
1025 wm8994->aif1clk_enable = 0;
1026 }
1027 if (wm8994->aif2clk_enable) {
1028 snd_soc_update_bits(codec, WM8994_AIF2_CLOCKING_1,
1029 WM8994_AIF2CLK_ENA_MASK,
1030 WM8994_AIF2CLK_ENA);
1031 wm8994->aif2clk_enable = 0;
1032 }
1033 break;
1034 }
1035
1036 return 0;
1037}
1038
1039static int late_disable_ev(struct snd_soc_dapm_widget *w,
1040 struct snd_kcontrol *kcontrol, int event)
1041{
1042 struct snd_soc_codec *codec = w->codec;
1043 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
1044
1045 switch (event) {
1046 case SND_SOC_DAPM_POST_PMD:
1047 if (wm8994->aif1clk_disable) {
1048 snd_soc_update_bits(codec, WM8994_AIF1_CLOCKING_1,
1049 WM8994_AIF1CLK_ENA_MASK, 0);
1050 wm8994->aif1clk_disable = 0;
1051 }
1052 if (wm8994->aif2clk_disable) {
1053 snd_soc_update_bits(codec, WM8994_AIF2_CLOCKING_1,
1054 WM8994_AIF2CLK_ENA_MASK, 0);
1055 wm8994->aif2clk_disable = 0;
1056 }
1057 break;
1058 }
1059
1060 return 0;
1061}
1062
1063static int aif1clk_ev(struct snd_soc_dapm_widget *w,
1064 struct snd_kcontrol *kcontrol, int event)
1065{
1066 struct snd_soc_codec *codec = w->codec;
1067 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
1068
1069 switch (event) {
1070 case SND_SOC_DAPM_PRE_PMU:
1071 wm8994->aif1clk_enable = 1;
1072 break;
1073 case SND_SOC_DAPM_POST_PMD:
1074 wm8994->aif1clk_disable = 1;
1075 break;
1076 }
1077
1078 return 0;
1079}
1080
1081static int aif2clk_ev(struct snd_soc_dapm_widget *w,
1082 struct snd_kcontrol *kcontrol, int event)
1083{
1084 struct snd_soc_codec *codec = w->codec;
1085 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
1086
1087 switch (event) {
1088 case SND_SOC_DAPM_PRE_PMU:
1089 wm8994->aif2clk_enable = 1;
1090 break;
1091 case SND_SOC_DAPM_POST_PMD:
1092 wm8994->aif2clk_disable = 1;
1093 break;
1094 }
1095
1096 return 0;
1097}
1098
1099static int adc_mux_ev(struct snd_soc_dapm_widget *w,
1100 struct snd_kcontrol *kcontrol, int event)
1101{
1102 late_enable_ev(w, kcontrol, event);
1103 return 0;
1104}
1105
1106static int dac_ev(struct snd_soc_dapm_widget *w,
1107 struct snd_kcontrol *kcontrol, int event)
1108{
1109 struct snd_soc_codec *codec = w->codec;
1110 unsigned int mask = 1 << w->shift;
1111
1112 snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_5,
1113 mask, mask);
1114 return 0;
1115}
1116
1007static const char *hp_mux_text[] = { 1117static const char *hp_mux_text[] = {
1008 "Mixer", 1118 "Mixer",
1009 "DAC", 1119 "DAC",
@@ -1272,6 +1382,59 @@ static const struct soc_enum aif2dacr_src_enum =
1272static const struct snd_kcontrol_new aif2dacr_src_mux = 1382static const struct snd_kcontrol_new aif2dacr_src_mux =
1273 SOC_DAPM_ENUM("AIF2DACR Mux", aif2dacr_src_enum); 1383 SOC_DAPM_ENUM("AIF2DACR Mux", aif2dacr_src_enum);
1274 1384
1385static const struct snd_soc_dapm_widget wm8994_lateclk_revd_widgets[] = {
1386SND_SOC_DAPM_SUPPLY("AIF1CLK", SND_SOC_NOPM, 0, 0, aif1clk_ev,
1387 SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
1388SND_SOC_DAPM_SUPPLY("AIF2CLK", SND_SOC_NOPM, 0, 0, aif2clk_ev,
1389 SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
1390
1391SND_SOC_DAPM_PGA_E("Late DAC1L Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
1392 late_enable_ev, SND_SOC_DAPM_PRE_PMU),
1393SND_SOC_DAPM_PGA_E("Late DAC1R Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
1394 late_enable_ev, SND_SOC_DAPM_PRE_PMU),
1395SND_SOC_DAPM_PGA_E("Late DAC2L Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
1396 late_enable_ev, SND_SOC_DAPM_PRE_PMU),
1397SND_SOC_DAPM_PGA_E("Late DAC2R Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
1398 late_enable_ev, SND_SOC_DAPM_PRE_PMU),
1399
1400SND_SOC_DAPM_POST("Late Disable PGA", late_disable_ev)
1401};
1402
1403static const struct snd_soc_dapm_widget wm8994_lateclk_widgets[] = {
1404SND_SOC_DAPM_SUPPLY("AIF1CLK", WM8994_AIF1_CLOCKING_1, 0, 0, NULL, 0),
1405SND_SOC_DAPM_SUPPLY("AIF2CLK", WM8994_AIF2_CLOCKING_1, 0, 0, NULL, 0)
1406};
1407
1408static const struct snd_soc_dapm_widget wm8994_dac_revd_widgets[] = {
1409SND_SOC_DAPM_DAC_E("DAC2L", NULL, SND_SOC_NOPM, 3, 0,
1410 dac_ev, SND_SOC_DAPM_PRE_PMU),
1411SND_SOC_DAPM_DAC_E("DAC2R", NULL, SND_SOC_NOPM, 2, 0,
1412 dac_ev, SND_SOC_DAPM_PRE_PMU),
1413SND_SOC_DAPM_DAC_E("DAC1L", NULL, SND_SOC_NOPM, 1, 0,
1414 dac_ev, SND_SOC_DAPM_PRE_PMU),
1415SND_SOC_DAPM_DAC_E("DAC1R", NULL, SND_SOC_NOPM, 0, 0,
1416 dac_ev, SND_SOC_DAPM_PRE_PMU),
1417};
1418
1419static const struct snd_soc_dapm_widget wm8994_dac_widgets[] = {
1420SND_SOC_DAPM_DAC("DAC2L", NULL, WM8994_POWER_MANAGEMENT_5, 3, 0),
1421SND_SOC_DAPM_DAC("DAC2R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0),
1422SND_SOC_DAPM_DAC("DAC1L", NULL, WM8994_POWER_MANAGEMENT_5, 1, 0),
1423SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 0, 0),
1424};
1425
1426static const struct snd_soc_dapm_widget wm8994_adc_revd_widgets[] = {
1427SND_SOC_DAPM_MUX_E("ADCL Mux", WM8994_POWER_MANAGEMENT_4, 1, 0, &adcl_mux,
1428 adc_mux_ev, SND_SOC_DAPM_PRE_PMU),
1429SND_SOC_DAPM_MUX_E("ADCR Mux", WM8994_POWER_MANAGEMENT_4, 0, 0, &adcr_mux,
1430 adc_mux_ev, SND_SOC_DAPM_PRE_PMU),
1431};
1432
1433static const struct snd_soc_dapm_widget wm8994_adc_widgets[] = {
1434SND_SOC_DAPM_MUX("ADCL Mux", WM8994_POWER_MANAGEMENT_4, 1, 0, &adcl_mux),
1435SND_SOC_DAPM_MUX("ADCR Mux", WM8994_POWER_MANAGEMENT_4, 0, 0, &adcr_mux),
1436};
1437
1275static const struct snd_soc_dapm_widget wm8994_dapm_widgets[] = { 1438static const struct snd_soc_dapm_widget wm8994_dapm_widgets[] = {
1276SND_SOC_DAPM_INPUT("DMIC1DAT"), 1439SND_SOC_DAPM_INPUT("DMIC1DAT"),
1277SND_SOC_DAPM_INPUT("DMIC2DAT"), 1440SND_SOC_DAPM_INPUT("DMIC2DAT"),
@@ -1284,9 +1447,6 @@ SND_SOC_DAPM_SUPPLY("DSP1CLK", WM8994_CLOCKING_1, 3, 0, NULL, 0),
1284SND_SOC_DAPM_SUPPLY("DSP2CLK", WM8994_CLOCKING_1, 2, 0, NULL, 0), 1447SND_SOC_DAPM_SUPPLY("DSP2CLK", WM8994_CLOCKING_1, 2, 0, NULL, 0),
1285SND_SOC_DAPM_SUPPLY("DSPINTCLK", WM8994_CLOCKING_1, 1, 0, NULL, 0), 1448SND_SOC_DAPM_SUPPLY("DSPINTCLK", WM8994_CLOCKING_1, 1, 0, NULL, 0),
1286 1449
1287SND_SOC_DAPM_SUPPLY("AIF1CLK", WM8994_AIF1_CLOCKING_1, 0, 0, NULL, 0),
1288SND_SOC_DAPM_SUPPLY("AIF2CLK", WM8994_AIF2_CLOCKING_1, 0, 0, NULL, 0),
1289
1290SND_SOC_DAPM_AIF_OUT("AIF1ADC1L", NULL, 1450SND_SOC_DAPM_AIF_OUT("AIF1ADC1L", NULL,
1291 0, WM8994_POWER_MANAGEMENT_4, 9, 0), 1451 0, WM8994_POWER_MANAGEMENT_4, 9, 0),
1292SND_SOC_DAPM_AIF_OUT("AIF1ADC1R", NULL, 1452SND_SOC_DAPM_AIF_OUT("AIF1ADC1R", NULL,
@@ -1369,14 +1529,6 @@ SND_SOC_DAPM_ADC("DMIC1R", NULL, WM8994_POWER_MANAGEMENT_4, 2, 0),
1369SND_SOC_DAPM_ADC("ADCL", NULL, SND_SOC_NOPM, 1, 0), 1529SND_SOC_DAPM_ADC("ADCL", NULL, SND_SOC_NOPM, 1, 0),
1370SND_SOC_DAPM_ADC("ADCR", NULL, SND_SOC_NOPM, 0, 0), 1530SND_SOC_DAPM_ADC("ADCR", NULL, SND_SOC_NOPM, 0, 0),
1371 1531
1372SND_SOC_DAPM_MUX("ADCL Mux", WM8994_POWER_MANAGEMENT_4, 1, 0, &adcl_mux),
1373SND_SOC_DAPM_MUX("ADCR Mux", WM8994_POWER_MANAGEMENT_4, 0, 0, &adcr_mux),
1374
1375SND_SOC_DAPM_DAC("DAC2L", NULL, WM8994_POWER_MANAGEMENT_5, 3, 0),
1376SND_SOC_DAPM_DAC("DAC2R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0),
1377SND_SOC_DAPM_DAC("DAC1L", NULL, WM8994_POWER_MANAGEMENT_5, 1, 0),
1378SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 0, 0),
1379
1380SND_SOC_DAPM_MUX("Left Headphone Mux", SND_SOC_NOPM, 0, 0, &hpl_mux), 1532SND_SOC_DAPM_MUX("Left Headphone Mux", SND_SOC_NOPM, 0, 0, &hpl_mux),
1381SND_SOC_DAPM_MUX("Right Headphone Mux", SND_SOC_NOPM, 0, 0, &hpr_mux), 1533SND_SOC_DAPM_MUX("Right Headphone Mux", SND_SOC_NOPM, 0, 0, &hpr_mux),
1382 1534
@@ -1516,14 +1668,12 @@ static const struct snd_soc_dapm_route intercon[] = {
1516 { "AIF2ADC Mux", "AIF3DACDAT", "AIF3ADCDAT" }, 1668 { "AIF2ADC Mux", "AIF3DACDAT", "AIF3ADCDAT" },
1517 1669
1518 /* DAC1 inputs */ 1670 /* DAC1 inputs */
1519 { "DAC1L", NULL, "DAC1L Mixer" },
1520 { "DAC1L Mixer", "AIF2 Switch", "AIF2DACL" }, 1671 { "DAC1L Mixer", "AIF2 Switch", "AIF2DACL" },
1521 { "DAC1L Mixer", "AIF1.2 Switch", "AIF1DAC2L" }, 1672 { "DAC1L Mixer", "AIF1.2 Switch", "AIF1DAC2L" },
1522 { "DAC1L Mixer", "AIF1.1 Switch", "AIF1DAC1L" }, 1673 { "DAC1L Mixer", "AIF1.1 Switch", "AIF1DAC1L" },
1523 { "DAC1L Mixer", "Left Sidetone Switch", "Left Sidetone" }, 1674 { "DAC1L Mixer", "Left Sidetone Switch", "Left Sidetone" },
1524 { "DAC1L Mixer", "Right Sidetone Switch", "Right Sidetone" }, 1675 { "DAC1L Mixer", "Right Sidetone Switch", "Right Sidetone" },
1525 1676
1526 { "DAC1R", NULL, "DAC1R Mixer" },
1527 { "DAC1R Mixer", "AIF2 Switch", "AIF2DACR" }, 1677 { "DAC1R Mixer", "AIF2 Switch", "AIF2DACR" },
1528 { "DAC1R Mixer", "AIF1.2 Switch", "AIF1DAC2R" }, 1678 { "DAC1R Mixer", "AIF1.2 Switch", "AIF1DAC2R" },
1529 { "DAC1R Mixer", "AIF1.1 Switch", "AIF1DAC1R" }, 1679 { "DAC1R Mixer", "AIF1.1 Switch", "AIF1DAC1R" },
@@ -1532,7 +1682,6 @@ static const struct snd_soc_dapm_route intercon[] = {
1532 1682
1533 /* DAC2/AIF2 outputs */ 1683 /* DAC2/AIF2 outputs */
1534 { "AIF2ADCL", NULL, "AIF2DAC2L Mixer" }, 1684 { "AIF2ADCL", NULL, "AIF2DAC2L Mixer" },
1535 { "DAC2L", NULL, "AIF2DAC2L Mixer" },
1536 { "AIF2DAC2L Mixer", "AIF2 Switch", "AIF2DACL" }, 1685 { "AIF2DAC2L Mixer", "AIF2 Switch", "AIF2DACL" },
1537 { "AIF2DAC2L Mixer", "AIF1.2 Switch", "AIF1DAC2L" }, 1686 { "AIF2DAC2L Mixer", "AIF1.2 Switch", "AIF1DAC2L" },
1538 { "AIF2DAC2L Mixer", "AIF1.1 Switch", "AIF1DAC1L" }, 1687 { "AIF2DAC2L Mixer", "AIF1.1 Switch", "AIF1DAC1L" },
@@ -1540,7 +1689,6 @@ static const struct snd_soc_dapm_route intercon[] = {
1540 { "AIF2DAC2L Mixer", "Right Sidetone Switch", "Right Sidetone" }, 1689 { "AIF2DAC2L Mixer", "Right Sidetone Switch", "Right Sidetone" },
1541 1690
1542 { "AIF2ADCR", NULL, "AIF2DAC2R Mixer" }, 1691 { "AIF2ADCR", NULL, "AIF2DAC2R Mixer" },
1543 { "DAC2R", NULL, "AIF2DAC2R Mixer" },
1544 { "AIF2DAC2R Mixer", "AIF2 Switch", "AIF2DACR" }, 1692 { "AIF2DAC2R Mixer", "AIF2 Switch", "AIF2DACR" },
1545 { "AIF2DAC2R Mixer", "AIF1.2 Switch", "AIF1DAC2R" }, 1693 { "AIF2DAC2R Mixer", "AIF1.2 Switch", "AIF1DAC2R" },
1546 { "AIF2DAC2R Mixer", "AIF1.1 Switch", "AIF1DAC1R" }, 1694 { "AIF2DAC2R Mixer", "AIF1.1 Switch", "AIF1DAC1R" },
@@ -1584,6 +1732,24 @@ static const struct snd_soc_dapm_route intercon[] = {
1584 { "Right Headphone Mux", "DAC", "DAC1R" }, 1732 { "Right Headphone Mux", "DAC", "DAC1R" },
1585}; 1733};
1586 1734
1735static const struct snd_soc_dapm_route wm8994_lateclk_revd_intercon[] = {
1736 { "DAC1L", NULL, "Late DAC1L Enable PGA" },
1737 { "Late DAC1L Enable PGA", NULL, "DAC1L Mixer" },
1738 { "DAC1R", NULL, "Late DAC1R Enable PGA" },
1739 { "Late DAC1R Enable PGA", NULL, "DAC1R Mixer" },
1740 { "DAC2L", NULL, "Late DAC2L Enable PGA" },
1741 { "Late DAC2L Enable PGA", NULL, "AIF2DAC2L Mixer" },
1742 { "DAC2R", NULL, "Late DAC2R Enable PGA" },
1743 { "Late DAC2R Enable PGA", NULL, "AIF2DAC2R Mixer" }
1744};
1745
1746static const struct snd_soc_dapm_route wm8994_lateclk_intercon[] = {
1747 { "DAC1L", NULL, "DAC1L Mixer" },
1748 { "DAC1R", NULL, "DAC1R Mixer" },
1749 { "DAC2L", NULL, "AIF2DAC2L Mixer" },
1750 { "DAC2R", NULL, "AIF2DAC2R Mixer" },
1751};
1752
1587static const struct snd_soc_dapm_route wm8994_revd_intercon[] = { 1753static const struct snd_soc_dapm_route wm8994_revd_intercon[] = {
1588 { "AIF1DACDAT", NULL, "AIF2DACDAT" }, 1754 { "AIF1DACDAT", NULL, "AIF2DACDAT" },
1589 { "AIF2DACDAT", NULL, "AIF1DACDAT" }, 1755 { "AIF2DACDAT", NULL, "AIF1DACDAT" },
@@ -2514,6 +2680,22 @@ static int wm8994_resume(struct snd_soc_codec *codec)
2514{ 2680{
2515 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec); 2681 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
2516 int i, ret; 2682 int i, ret;
2683 unsigned int val, mask;
2684
2685 if (wm8994->revision < 4) {
2686 /* force a HW read */
2687 val = wm8994_reg_read(codec->control_data,
2688 WM8994_POWER_MANAGEMENT_5);
2689
2690 /* modify the cache only */
2691 codec->cache_only = 1;
2692 mask = WM8994_DAC1R_ENA | WM8994_DAC1L_ENA |
2693 WM8994_DAC2R_ENA | WM8994_DAC2L_ENA;
2694 val &= mask;
2695 snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_5,
2696 mask, val);
2697 codec->cache_only = 0;
2698 }
2517 2699
2518 /* Restore the registers */ 2700 /* Restore the registers */
2519 ret = snd_soc_cache_sync(codec); 2701 ret = snd_soc_cache_sync(codec);
@@ -2847,11 +3029,10 @@ static void wm8958_default_micdet(u16 status, void *data)
2847 report |= SND_JACK_BTN_5; 3029 report |= SND_JACK_BTN_5;
2848 3030
2849done: 3031done:
2850 snd_soc_jack_report(wm8994->micdet[0].jack, 3032 snd_soc_jack_report(wm8994->micdet[0].jack, report,
2851 SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | 3033 SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 |
2852 SND_JACK_BTN_3 | SND_JACK_BTN_4 | SND_JACK_BTN_5 | 3034 SND_JACK_BTN_3 | SND_JACK_BTN_4 | SND_JACK_BTN_5 |
2853 SND_JACK_MICROPHONE | SND_JACK_VIDEOOUT, 3035 SND_JACK_MICROPHONE | SND_JACK_VIDEOOUT);
2854 report);
2855} 3036}
2856 3037
2857/** 3038/**
@@ -3125,10 +3306,31 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
3125 case WM8994: 3306 case WM8994:
3126 snd_soc_dapm_new_controls(dapm, wm8994_specific_dapm_widgets, 3307 snd_soc_dapm_new_controls(dapm, wm8994_specific_dapm_widgets,
3127 ARRAY_SIZE(wm8994_specific_dapm_widgets)); 3308 ARRAY_SIZE(wm8994_specific_dapm_widgets));
3309 if (wm8994->revision < 4) {
3310 snd_soc_dapm_new_controls(dapm, wm8994_lateclk_revd_widgets,
3311 ARRAY_SIZE(wm8994_lateclk_revd_widgets));
3312 snd_soc_dapm_new_controls(dapm, wm8994_adc_revd_widgets,
3313 ARRAY_SIZE(wm8994_adc_revd_widgets));
3314 snd_soc_dapm_new_controls(dapm, wm8994_dac_revd_widgets,
3315 ARRAY_SIZE(wm8994_dac_revd_widgets));
3316 } else {
3317 snd_soc_dapm_new_controls(dapm, wm8994_lateclk_widgets,
3318 ARRAY_SIZE(wm8994_lateclk_widgets));
3319 snd_soc_dapm_new_controls(dapm, wm8994_adc_widgets,
3320 ARRAY_SIZE(wm8994_adc_widgets));
3321 snd_soc_dapm_new_controls(dapm, wm8994_dac_widgets,
3322 ARRAY_SIZE(wm8994_dac_widgets));
3323 }
3128 break; 3324 break;
3129 case WM8958: 3325 case WM8958:
3130 snd_soc_add_controls(codec, wm8958_snd_controls, 3326 snd_soc_add_controls(codec, wm8958_snd_controls,
3131 ARRAY_SIZE(wm8958_snd_controls)); 3327 ARRAY_SIZE(wm8958_snd_controls));
3328 snd_soc_dapm_new_controls(dapm, wm8994_lateclk_widgets,
3329 ARRAY_SIZE(wm8994_lateclk_widgets));
3330 snd_soc_dapm_new_controls(dapm, wm8994_adc_widgets,
3331 ARRAY_SIZE(wm8994_adc_widgets));
3332 snd_soc_dapm_new_controls(dapm, wm8994_dac_widgets,
3333 ARRAY_SIZE(wm8994_dac_widgets));
3132 snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets, 3334 snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets,
3133 ARRAY_SIZE(wm8958_dapm_widgets)); 3335 ARRAY_SIZE(wm8958_dapm_widgets));
3134 break; 3336 break;
@@ -3143,12 +3345,19 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
3143 snd_soc_dapm_add_routes(dapm, wm8994_intercon, 3345 snd_soc_dapm_add_routes(dapm, wm8994_intercon,
3144 ARRAY_SIZE(wm8994_intercon)); 3346 ARRAY_SIZE(wm8994_intercon));
3145 3347
3146 if (wm8994->revision < 4) 3348 if (wm8994->revision < 4) {
3147 snd_soc_dapm_add_routes(dapm, wm8994_revd_intercon, 3349 snd_soc_dapm_add_routes(dapm, wm8994_revd_intercon,
3148 ARRAY_SIZE(wm8994_revd_intercon)); 3350 ARRAY_SIZE(wm8994_revd_intercon));
3149 3351 snd_soc_dapm_add_routes(dapm, wm8994_lateclk_revd_intercon,
3352 ARRAY_SIZE(wm8994_lateclk_revd_intercon));
3353 } else {
3354 snd_soc_dapm_add_routes(dapm, wm8994_lateclk_intercon,
3355 ARRAY_SIZE(wm8994_lateclk_intercon));
3356 }
3150 break; 3357 break;
3151 case WM8958: 3358 case WM8958:
3359 snd_soc_dapm_add_routes(dapm, wm8994_lateclk_intercon,
3360 ARRAY_SIZE(wm8994_lateclk_intercon));
3152 snd_soc_dapm_add_routes(dapm, wm8958_intercon, 3361 snd_soc_dapm_add_routes(dapm, wm8958_intercon,
3153 ARRAY_SIZE(wm8958_intercon)); 3362 ARRAY_SIZE(wm8958_intercon));
3154 break; 3363 break;
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index 43825b2102a5..cce704c275c6 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c
@@ -15,6 +15,7 @@
15#include <linux/moduleparam.h> 15#include <linux/moduleparam.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/device.h>
18#include <linux/pm.h> 19#include <linux/pm.h>
19#include <linux/i2c.h> 20#include <linux/i2c.h>
20#include <linux/platform_device.h> 21#include <linux/platform_device.h>
@@ -1341,6 +1342,10 @@ static __devinit int wm9081_i2c_probe(struct i2c_client *i2c,
1341 wm9081->control_type = SND_SOC_I2C; 1342 wm9081->control_type = SND_SOC_I2C;
1342 wm9081->control_data = i2c; 1343 wm9081->control_data = i2c;
1343 1344
1345 if (dev_get_platdata(&i2c->dev))
1346 memcpy(&wm9081->retune, dev_get_platdata(&i2c->dev),
1347 sizeof(wm9081->retune));
1348
1344 ret = snd_soc_register_codec(&i2c->dev, 1349 ret = snd_soc_register_codec(&i2c->dev,
1345 &soc_codec_dev_wm9081, &wm9081_dai, 1); 1350 &soc_codec_dev_wm9081, &wm9081_dai, 1);
1346 if (ret < 0) 1351 if (ret < 0)
diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c
index 613df5db0b32..516892706063 100644
--- a/sound/soc/codecs/wm_hubs.c
+++ b/sound/soc/codecs/wm_hubs.c
@@ -674,6 +674,9 @@ SND_SOC_DAPM_OUTPUT("LINEOUT2N"),
674}; 674};
675 675
676static const struct snd_soc_dapm_route analogue_routes[] = { 676static const struct snd_soc_dapm_route analogue_routes[] = {
677 { "MICBIAS1", NULL, "CLK_SYS" },
678 { "MICBIAS2", NULL, "CLK_SYS" },
679
677 { "IN1L PGA", "IN1LP Switch", "IN1LP" }, 680 { "IN1L PGA", "IN1LP Switch", "IN1LP" },
678 { "IN1L PGA", "IN1LN Switch", "IN1LN" }, 681 { "IN1L PGA", "IN1LN Switch", "IN1LN" },
679 682
diff --git a/sound/soc/imx/eukrea-tlv320.c b/sound/soc/imx/eukrea-tlv320.c
index e20c9e1457c0..1e9bccae4e80 100644
--- a/sound/soc/imx/eukrea-tlv320.c
+++ b/sound/soc/imx/eukrea-tlv320.c
@@ -79,7 +79,7 @@ static struct snd_soc_dai_link eukrea_tlv320_dai = {
79 .name = "tlv320aic23", 79 .name = "tlv320aic23",
80 .stream_name = "TLV320AIC23", 80 .stream_name = "TLV320AIC23",
81 .codec_dai_name = "tlv320aic23-hifi", 81 .codec_dai_name = "tlv320aic23-hifi",
82 .platform_name = "imx-pcm-audio.0", 82 .platform_name = "imx-fiq-pcm-audio.0",
83 .codec_name = "tlv320aic23-codec.0-001a", 83 .codec_name = "tlv320aic23-codec.0-001a",
84 .cpu_dai_name = "imx-ssi.0", 84 .cpu_dai_name = "imx-ssi.0",
85 .ops = &eukrea_tlv320_snd_ops, 85 .ops = &eukrea_tlv320_snd_ops,
diff --git a/sound/soc/omap/am3517evm.c b/sound/soc/omap/am3517evm.c
index 161750443ebc..73dde4a1adc3 100644
--- a/sound/soc/omap/am3517evm.c
+++ b/sound/soc/omap/am3517evm.c
@@ -139,7 +139,7 @@ static struct snd_soc_dai_link am3517evm_dai = {
139 .cpu_dai_name ="omap-mcbsp-dai.0", 139 .cpu_dai_name ="omap-mcbsp-dai.0",
140 .codec_dai_name = "tlv320aic23-hifi", 140 .codec_dai_name = "tlv320aic23-hifi",
141 .platform_name = "omap-pcm-audio", 141 .platform_name = "omap-pcm-audio",
142 .codec_name = "tlv320aic23-codec", 142 .codec_name = "tlv320aic23-codec.2-001a",
143 .init = am3517evm_aic23_init, 143 .init = am3517evm_aic23_init,
144 .ops = &am3517evm_ops, 144 .ops = &am3517evm_ops,
145}; 145};
diff --git a/sound/soc/pxa/e740_wm9705.c b/sound/soc/pxa/e740_wm9705.c
index 28333e7d9c50..dc65650a6fa1 100644
--- a/sound/soc/pxa/e740_wm9705.c
+++ b/sound/soc/pxa/e740_wm9705.c
@@ -117,7 +117,7 @@ static struct snd_soc_dai_link e740_dai[] = {
117 { 117 {
118 .name = "AC97", 118 .name = "AC97",
119 .stream_name = "AC97 HiFi", 119 .stream_name = "AC97 HiFi",
120 .cpu_dai_name = "pxa-ac97.0", 120 .cpu_dai_name = "pxa2xx-ac97",
121 .codec_dai_name = "wm9705-hifi", 121 .codec_dai_name = "wm9705-hifi",
122 .platform_name = "pxa-pcm-audio", 122 .platform_name = "pxa-pcm-audio",
123 .codec_name = "wm9705-codec", 123 .codec_name = "wm9705-codec",
@@ -126,7 +126,7 @@ static struct snd_soc_dai_link e740_dai[] = {
126 { 126 {
127 .name = "AC97 Aux", 127 .name = "AC97 Aux",
128 .stream_name = "AC97 Aux", 128 .stream_name = "AC97 Aux",
129 .cpu_dai_name = "pxa-ac97.1", 129 .cpu_dai_name = "pxa2xx-ac97-aux",
130 .codec_dai_name = "wm9705-aux", 130 .codec_dai_name = "wm9705-aux",
131 .platform_name = "pxa-pcm-audio", 131 .platform_name = "pxa-pcm-audio",
132 .codec_name = "wm9705-codec", 132 .codec_name = "wm9705-codec",
diff --git a/sound/soc/pxa/e750_wm9705.c b/sound/soc/pxa/e750_wm9705.c
index 01bf31675c55..51897fcd911b 100644
--- a/sound/soc/pxa/e750_wm9705.c
+++ b/sound/soc/pxa/e750_wm9705.c
@@ -99,7 +99,7 @@ static struct snd_soc_dai_link e750_dai[] = {
99 { 99 {
100 .name = "AC97", 100 .name = "AC97",
101 .stream_name = "AC97 HiFi", 101 .stream_name = "AC97 HiFi",
102 .cpu_dai_name = "pxa-ac97.0", 102 .cpu_dai_name = "pxa2xx-ac97",
103 .codec_dai_name = "wm9705-hifi", 103 .codec_dai_name = "wm9705-hifi",
104 .platform_name = "pxa-pcm-audio", 104 .platform_name = "pxa-pcm-audio",
105 .codec_name = "wm9705-codec", 105 .codec_name = "wm9705-codec",
@@ -109,7 +109,7 @@ static struct snd_soc_dai_link e750_dai[] = {
109 { 109 {
110 .name = "AC97 Aux", 110 .name = "AC97 Aux",
111 .stream_name = "AC97 Aux", 111 .stream_name = "AC97 Aux",
112 .cpu_dai_name = "pxa-ac97.1", 112 .cpu_dai_name = "pxa2xx-ac97-aux",
113 .codec_dai_name ="wm9705-aux", 113 .codec_dai_name ="wm9705-aux",
114 .platform_name = "pxa-pcm-audio", 114 .platform_name = "pxa-pcm-audio",
115 .codec_name = "wm9705-codec", 115 .codec_name = "wm9705-codec",
diff --git a/sound/soc/pxa/e800_wm9712.c b/sound/soc/pxa/e800_wm9712.c
index c6a37c6ef23b..053ed208e59f 100644
--- a/sound/soc/pxa/e800_wm9712.c
+++ b/sound/soc/pxa/e800_wm9712.c
@@ -89,7 +89,7 @@ static struct snd_soc_dai_link e800_dai[] = {
89 { 89 {
90 .name = "AC97", 90 .name = "AC97",
91 .stream_name = "AC97 HiFi", 91 .stream_name = "AC97 HiFi",
92 .cpu_dai_name = "pxa-ac97.0", 92 .cpu_dai_name = "pxa2xx-ac97",
93 .codec_dai_name = "wm9712-hifi", 93 .codec_dai_name = "wm9712-hifi",
94 .platform_name = "pxa-pcm-audio", 94 .platform_name = "pxa-pcm-audio",
95 .codec_name = "wm9712-codec", 95 .codec_name = "wm9712-codec",
@@ -98,7 +98,7 @@ static struct snd_soc_dai_link e800_dai[] = {
98 { 98 {
99 .name = "AC97 Aux", 99 .name = "AC97 Aux",
100 .stream_name = "AC97 Aux", 100 .stream_name = "AC97 Aux",
101 .cpu_dai_name = "pxa-ac97.1", 101 .cpu_dai_name = "pxa2xx-ac97-aux",
102 .codec_dai_name ="wm9712-aux", 102 .codec_dai_name ="wm9712-aux",
103 .platform_name = "pxa-pcm-audio", 103 .platform_name = "pxa-pcm-audio",
104 .codec_name = "wm9712-codec", 104 .codec_name = "wm9712-codec",
diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c
index fc22e6eefc98..b13a4252812d 100644
--- a/sound/soc/pxa/em-x270.c
+++ b/sound/soc/pxa/em-x270.c
@@ -37,7 +37,7 @@ static struct snd_soc_dai_link em_x270_dai[] = {
37 { 37 {
38 .name = "AC97", 38 .name = "AC97",
39 .stream_name = "AC97 HiFi", 39 .stream_name = "AC97 HiFi",
40 .cpu_dai_name = "pxa-ac97.0", 40 .cpu_dai_name = "pxa2xx-ac97",
41 .codec_dai_name = "wm9712-hifi", 41 .codec_dai_name = "wm9712-hifi",
42 .platform_name = "pxa-pcm-audio", 42 .platform_name = "pxa-pcm-audio",
43 .codec_name = "wm9712-codec", 43 .codec_name = "wm9712-codec",
@@ -45,7 +45,7 @@ static struct snd_soc_dai_link em_x270_dai[] = {
45 { 45 {
46 .name = "AC97 Aux", 46 .name = "AC97 Aux",
47 .stream_name = "AC97 Aux", 47 .stream_name = "AC97 Aux",
48 .cpu_dai_name = "pxa-ac97.1", 48 .cpu_dai_name = "pxa2xx-ac97-aux",
49 .codec_dai_name ="wm9712-aux", 49 .codec_dai_name ="wm9712-aux",
50 .platform_name = "pxa-pcm-audio", 50 .platform_name = "pxa-pcm-audio",
51 .codec_name = "wm9712-codec", 51 .codec_name = "wm9712-codec",
diff --git a/sound/soc/pxa/mioa701_wm9713.c b/sound/soc/pxa/mioa701_wm9713.c
index 0d70fc8c12bd..38ca6759907e 100644
--- a/sound/soc/pxa/mioa701_wm9713.c
+++ b/sound/soc/pxa/mioa701_wm9713.c
@@ -162,7 +162,7 @@ static struct snd_soc_dai_link mioa701_dai[] = {
162 { 162 {
163 .name = "AC97", 163 .name = "AC97",
164 .stream_name = "AC97 HiFi", 164 .stream_name = "AC97 HiFi",
165 .cpu_dai_name = "pxa-ac97.0", 165 .cpu_dai_name = "pxa2xx-ac97",
166 .codec_dai_name = "wm9713-hifi", 166 .codec_dai_name = "wm9713-hifi",
167 .codec_name = "wm9713-codec", 167 .codec_name = "wm9713-codec",
168 .init = mioa701_wm9713_init, 168 .init = mioa701_wm9713_init,
@@ -172,7 +172,7 @@ static struct snd_soc_dai_link mioa701_dai[] = {
172 { 172 {
173 .name = "AC97 Aux", 173 .name = "AC97 Aux",
174 .stream_name = "AC97 Aux", 174 .stream_name = "AC97 Aux",
175 .cpu_dai_name = "pxa-ac97.1", 175 .cpu_dai_name = "pxa2xx-ac97-aux",
176 .codec_dai_name ="wm9713-aux", 176 .codec_dai_name ="wm9713-aux",
177 .codec_name = "wm9713-codec", 177 .codec_name = "wm9713-codec",
178 .platform_name = "pxa-pcm-audio", 178 .platform_name = "pxa-pcm-audio",
diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c
index 857db96d4a4f..504e4004f004 100644
--- a/sound/soc/pxa/palm27x.c
+++ b/sound/soc/pxa/palm27x.c
@@ -132,7 +132,7 @@ static struct snd_soc_dai_link palm27x_dai[] = {
132{ 132{
133 .name = "AC97 HiFi", 133 .name = "AC97 HiFi",
134 .stream_name = "AC97 HiFi", 134 .stream_name = "AC97 HiFi",
135 .cpu_dai_name = "pxa-ac97.0", 135 .cpu_dai_name = "pxa2xx-ac97",
136 .codec_dai_name = "wm9712-hifi", 136 .codec_dai_name = "wm9712-hifi",
137 .codec_name = "wm9712-codec", 137 .codec_name = "wm9712-codec",
138 .platform_name = "pxa-pcm-audio", 138 .platform_name = "pxa-pcm-audio",
@@ -141,7 +141,7 @@ static struct snd_soc_dai_link palm27x_dai[] = {
141{ 141{
142 .name = "AC97 Aux", 142 .name = "AC97 Aux",
143 .stream_name = "AC97 Aux", 143 .stream_name = "AC97 Aux",
144 .cpu_dai_name = "pxa-ac97.1", 144 .cpu_dai_name = "pxa2xx-ac97-aux",
145 .codec_dai_name = "wm9712-aux", 145 .codec_dai_name = "wm9712-aux",
146 .codec_name = "wm9712-codec", 146 .codec_name = "wm9712-codec",
147 .platform_name = "pxa-pcm-audio", 147 .platform_name = "pxa-pcm-audio",
diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c
index f75804ef0897..4b6e5d608b42 100644
--- a/sound/soc/pxa/tosa.c
+++ b/sound/soc/pxa/tosa.c
@@ -219,7 +219,7 @@ static struct snd_soc_dai_link tosa_dai[] = {
219{ 219{
220 .name = "AC97", 220 .name = "AC97",
221 .stream_name = "AC97 HiFi", 221 .stream_name = "AC97 HiFi",
222 .cpu_dai_name = "pxa-ac97.0", 222 .cpu_dai_name = "pxa2xx-ac97",
223 .codec_dai_name = "wm9712-hifi", 223 .codec_dai_name = "wm9712-hifi",
224 .platform_name = "pxa-pcm-audio", 224 .platform_name = "pxa-pcm-audio",
225 .codec_name = "wm9712-codec", 225 .codec_name = "wm9712-codec",
@@ -229,7 +229,7 @@ static struct snd_soc_dai_link tosa_dai[] = {
229{ 229{
230 .name = "AC97 Aux", 230 .name = "AC97 Aux",
231 .stream_name = "AC97 Aux", 231 .stream_name = "AC97 Aux",
232 .cpu_dai_name = "pxa-ac97.1", 232 .cpu_dai_name = "pxa2xx-ac97-aux",
233 .codec_dai_name = "wm9712-aux", 233 .codec_dai_name = "wm9712-aux",
234 .platform_name = "pxa-pcm-audio", 234 .platform_name = "pxa-pcm-audio",
235 .codec_name = "wm9712-codec", 235 .codec_name = "wm9712-codec",
diff --git a/sound/soc/pxa/zylonite.c b/sound/soc/pxa/zylonite.c
index b222a7d72027..25bba108fea3 100644
--- a/sound/soc/pxa/zylonite.c
+++ b/sound/soc/pxa/zylonite.c
@@ -166,7 +166,7 @@ static struct snd_soc_dai_link zylonite_dai[] = {
166 .stream_name = "AC97 HiFi", 166 .stream_name = "AC97 HiFi",
167 .codec_name = "wm9713-codec", 167 .codec_name = "wm9713-codec",
168 .platform_name = "pxa-pcm-audio", 168 .platform_name = "pxa-pcm-audio",
169 .cpu_dai_name = "pxa-ac97.0", 169 .cpu_dai_name = "pxa2xx-ac97",
170 .codec_name = "wm9713-hifi", 170 .codec_name = "wm9713-hifi",
171 .init = zylonite_wm9713_init, 171 .init = zylonite_wm9713_init,
172}, 172},
@@ -175,7 +175,7 @@ static struct snd_soc_dai_link zylonite_dai[] = {
175 .stream_name = "AC97 Aux", 175 .stream_name = "AC97 Aux",
176 .codec_name = "wm9713-codec", 176 .codec_name = "wm9713-codec",
177 .platform_name = "pxa-pcm-audio", 177 .platform_name = "pxa-pcm-audio",
178 .cpu_dai_name = "pxa-ac97.1", 178 .cpu_dai_name = "pxa2xx-ac97-aux",
179 .codec_name = "wm9713-aux", 179 .codec_name = "wm9713-aux",
180}, 180},
181{ 181{
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 8194f150bab7..1790f83ee665 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -712,7 +712,15 @@ static int dapm_supply_check_power(struct snd_soc_dapm_widget *w)
712 !path->connected(path->source, path->sink)) 712 !path->connected(path->source, path->sink))
713 continue; 713 continue;
714 714
715 if (path->sink && path->sink->power_check && 715 if (!path->sink)
716 continue;
717
718 if (path->sink->force) {
719 power = 1;
720 break;
721 }
722
723 if (path->sink->power_check &&
716 path->sink->power_check(path->sink)) { 724 path->sink->power_check(path->sink)) {
717 power = 1; 725 power = 1;
718 break; 726 break;
@@ -933,7 +941,7 @@ static void dapm_seq_run(struct snd_soc_dapm_context *dapm,
933 } 941 }
934 942
935 if (!list_empty(&pending)) 943 if (!list_empty(&pending))
936 dapm_seq_run_coalesced(dapm, &pending); 944 dapm_seq_run_coalesced(cur_dapm, &pending);
937} 945}
938 946
939static void dapm_widget_update(struct snd_soc_dapm_context *dapm) 947static void dapm_widget_update(struct snd_soc_dapm_context *dapm)
@@ -1627,6 +1635,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_add_routes);
1627int snd_soc_dapm_new_widgets(struct snd_soc_dapm_context *dapm) 1635int snd_soc_dapm_new_widgets(struct snd_soc_dapm_context *dapm)
1628{ 1636{
1629 struct snd_soc_dapm_widget *w; 1637 struct snd_soc_dapm_widget *w;
1638 unsigned int val;
1630 1639
1631 list_for_each_entry(w, &dapm->card->widgets, list) 1640 list_for_each_entry(w, &dapm->card->widgets, list)
1632 { 1641 {
@@ -1675,6 +1684,18 @@ int snd_soc_dapm_new_widgets(struct snd_soc_dapm_context *dapm)
1675 case snd_soc_dapm_post: 1684 case snd_soc_dapm_post:
1676 break; 1685 break;
1677 } 1686 }
1687
1688 /* Read the initial power state from the device */
1689 if (w->reg >= 0) {
1690 val = snd_soc_read(w->codec, w->reg);
1691 val &= 1 << w->shift;
1692 if (w->invert)
1693 val = !val;
1694
1695 if (val)
1696 w->power = 1;
1697 }
1698
1678 w->new = 1; 1699 w->new = 1;
1679 } 1700 }
1680 1701
diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c
index 68b97477577b..66eabafb1c24 100644
--- a/sound/usb/caiaq/audio.c
+++ b/sound/usb/caiaq/audio.c
@@ -785,7 +785,7 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev)
785 } 785 }
786 786
787 dev->pcm->private_data = dev; 787 dev->pcm->private_data = dev;
788 strcpy(dev->pcm->name, dev->product_name); 788 strlcpy(dev->pcm->name, dev->product_name, sizeof(dev->pcm->name));
789 789
790 memset(dev->sub_playback, 0, sizeof(dev->sub_playback)); 790 memset(dev->sub_playback, 0, sizeof(dev->sub_playback));
791 memset(dev->sub_capture, 0, sizeof(dev->sub_capture)); 791 memset(dev->sub_capture, 0, sizeof(dev->sub_capture));
diff --git a/sound/usb/caiaq/midi.c b/sound/usb/caiaq/midi.c
index 2f218c77fff2..a1a47088fd0c 100644
--- a/sound/usb/caiaq/midi.c
+++ b/sound/usb/caiaq/midi.c
@@ -136,7 +136,7 @@ int snd_usb_caiaq_midi_init(struct snd_usb_caiaqdev *device)
136 if (ret < 0) 136 if (ret < 0)
137 return ret; 137 return ret;
138 138
139 strcpy(rmidi->name, device->product_name); 139 strlcpy(rmidi->name, device->product_name, sizeof(rmidi->name));
140 140
141 rmidi->info_flags = SNDRV_RAWMIDI_INFO_DUPLEX; 141 rmidi->info_flags = SNDRV_RAWMIDI_INFO_DUPLEX;
142 rmidi->private_data = device; 142 rmidi->private_data = device;
diff --git a/sound/usb/card.c b/sound/usb/card.c
index 800f7cb4f251..c0f8270bc199 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -323,6 +323,7 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx,
323 return -ENOMEM; 323 return -ENOMEM;
324 } 324 }
325 325
326 mutex_init(&chip->shutdown_mutex);
326 chip->index = idx; 327 chip->index = idx;
327 chip->dev = dev; 328 chip->dev = dev;
328 chip->card = card; 329 chip->card = card;
@@ -531,6 +532,7 @@ static void snd_usb_audio_disconnect(struct usb_device *dev, void *ptr)
531 chip = ptr; 532 chip = ptr;
532 card = chip->card; 533 card = chip->card;
533 mutex_lock(&register_mutex); 534 mutex_lock(&register_mutex);
535 mutex_lock(&chip->shutdown_mutex);
534 chip->shutdown = 1; 536 chip->shutdown = 1;
535 chip->num_interfaces--; 537 chip->num_interfaces--;
536 if (chip->num_interfaces <= 0) { 538 if (chip->num_interfaces <= 0) {
@@ -548,9 +550,11 @@ static void snd_usb_audio_disconnect(struct usb_device *dev, void *ptr)
548 snd_usb_mixer_disconnect(p); 550 snd_usb_mixer_disconnect(p);
549 } 551 }
550 usb_chip[chip->index] = NULL; 552 usb_chip[chip->index] = NULL;
553 mutex_unlock(&chip->shutdown_mutex);
551 mutex_unlock(&register_mutex); 554 mutex_unlock(&register_mutex);
552 snd_card_free_when_closed(card); 555 snd_card_free_when_closed(card);
553 } else { 556 } else {
557 mutex_unlock(&chip->shutdown_mutex);
554 mutex_unlock(&register_mutex); 558 mutex_unlock(&register_mutex);
555 } 559 }
556} 560}
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 4132522ac90f..e3f680526cb5 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -361,6 +361,7 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
361 } 361 }
362 362
363 if (changed) { 363 if (changed) {
364 mutex_lock(&subs->stream->chip->shutdown_mutex);
364 /* format changed */ 365 /* format changed */
365 snd_usb_release_substream_urbs(subs, 0); 366 snd_usb_release_substream_urbs(subs, 0);
366 /* influenced: period_bytes, channels, rate, format, */ 367 /* influenced: period_bytes, channels, rate, format, */
@@ -368,6 +369,7 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
368 params_rate(hw_params), 369 params_rate(hw_params),
369 snd_pcm_format_physical_width(params_format(hw_params)) * 370 snd_pcm_format_physical_width(params_format(hw_params)) *
370 params_channels(hw_params)); 371 params_channels(hw_params));
372 mutex_unlock(&subs->stream->chip->shutdown_mutex);
371 } 373 }
372 374
373 return ret; 375 return ret;
@@ -385,8 +387,9 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream)
385 subs->cur_audiofmt = NULL; 387 subs->cur_audiofmt = NULL;
386 subs->cur_rate = 0; 388 subs->cur_rate = 0;
387 subs->period_bytes = 0; 389 subs->period_bytes = 0;
388 if (!subs->stream->chip->shutdown) 390 mutex_lock(&subs->stream->chip->shutdown_mutex);
389 snd_usb_release_substream_urbs(subs, 0); 391 snd_usb_release_substream_urbs(subs, 0);
392 mutex_unlock(&subs->stream->chip->shutdown_mutex);
390 return snd_pcm_lib_free_vmalloc_buffer(substream); 393 return snd_pcm_lib_free_vmalloc_buffer(substream);
391} 394}
392 395
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index db3eb21627ee..6e66fffe87f5 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -36,6 +36,7 @@ struct snd_usb_audio {
36 struct snd_card *card; 36 struct snd_card *card;
37 u32 usb_id; 37 u32 usb_id;
38 int shutdown; 38 int shutdown;
39 struct mutex shutdown_mutex;
39 unsigned int txfr_quirk:1; /* Subframe boundaries on transfers */ 40 unsigned int txfr_quirk:1; /* Subframe boundaries on transfers */
40 int num_interfaces; 41 int num_interfaces;
41 int num_suspended_intf; 42 int num_suspended_intf;
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index cb43289e447f..416684be0ad3 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -1,4 +1,3 @@
1PERF-BUILD-OPTIONS
2PERF-CFLAGS 1PERF-CFLAGS
3PERF-GUI-VARS 2PERF-GUI-VARS
4PERF-VERSION-FILE 3PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index bd498d496952..4626a398836a 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -178,8 +178,8 @@ install-pdf: pdf
178 $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) 178 $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
179 $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) 179 $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir)
180 180
181install-html: html 181#install-html: html
182 '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) 182# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
183 183
184../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE 184../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
185 $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE 185 $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE
@@ -288,15 +288,16 @@ $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
288 sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \ 288 sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
289 mv $@+ $@ 289 mv $@+ $@
290 290
291install-webdoc : html 291# UNIMPLEMENTED
292 '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST) 292#install-webdoc : html
293# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
293 294
294quick-install: quick-install-man 295# quick-install: quick-install-man
295 296
296quick-install-man: 297# quick-install-man:
297 '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir) 298# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
298 299
299quick-install-html: 300#quick-install-html:
300 '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir) 301# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
301 302
302.PHONY: .FORCE-PERF-VERSION-FILE 303.PHONY: .FORCE-PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 399751befeed..7a527f7e9da9 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,7 @@ perf-list - List all symbolic event types
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf list' 11'perf list' [hw|sw|cache|tracepoint|event_glob]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
@@ -63,7 +63,26 @@ details. Some of them are referenced in the SEE ALSO section below.
63 63
64OPTIONS 64OPTIONS
65------- 65-------
66None 66
67Without options all known events will be listed.
68
69To limit the list use:
70
71. 'hw' or 'hardware' to list hardware events such as cache-misses, etc.
72
73. 'sw' or 'software' to list software events such as context switches, etc.
74
75. 'cache' or 'hwcache' to list hardware cache events such as L1-dcache-loads, etc.
76
77. 'tracepoint' to list all tracepoint events, alternatively use
78 'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched,
79 block, etc.
80
81. If none of the above is matched, it will apply the supplied glob to all
82 events, printing the ones that match.
83
84One or more types can be used at the same time, listing the events for the
85types specified.
67 86
68SEE ALSO 87SEE ALSO
69-------- 88--------
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 921de259ea10..4a26a2f3a6a3 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -24,8 +24,8 @@ and statistics with this 'perf lock' command.
24 24
25 'perf lock report' reports statistical data. 25 'perf lock report' reports statistical data.
26 26
27OPTIONS 27COMMON OPTIONS
28------- 28--------------
29 29
30-i:: 30-i::
31--input=<file>:: 31--input=<file>::
@@ -39,6 +39,14 @@ OPTIONS
39--dump-raw-trace:: 39--dump-raw-trace::
40 Dump raw trace in ASCII. 40 Dump raw trace in ASCII.
41 41
42REPORT OPTIONS
43--------------
44
45-k::
46--key=<value>::
47 Sorting key. Possible values: acquired (default), contended,
48 wait_total, wait_max, wait_min.
49
42SEE ALSO 50SEE ALSO
43-------- 51--------
44linkperf:perf[1] 52linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 86b797a35aa6..02bafce4b341 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -16,7 +16,7 @@ or
16or 16or
17'perf probe' --list 17'perf probe' --list
18or 18or
19'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' 19'perf probe' [options] --line='LINE'
20or 20or
21'perf probe' [options] --vars='PROBEPOINT' 21'perf probe' [options] --vars='PROBEPOINT'
22 22
@@ -73,6 +73,17 @@ OPTIONS
73 (Only for --vars) Show external defined variables in addition to local 73 (Only for --vars) Show external defined variables in addition to local
74 variables. 74 variables.
75 75
76-F::
77--funcs::
78 Show available functions in given module or kernel.
79
80--filter=FILTER::
81 (Only for --vars and --funcs) Set filter. FILTER is a combination of glob
82 pattern, see FILTER PATTERN for detail.
83 Default FILTER is "!__k???tab_* & !__crc_*" for --vars, and "!_*"
84 for --funcs.
85 If several filters are specified, only the last filter is used.
86
76-f:: 87-f::
77--force:: 88--force::
78 Forcibly add events with existing name. 89 Forcibly add events with existing name.
@@ -117,13 +128,14 @@ LINE SYNTAX
117----------- 128-----------
118Line range is described by following syntax. 129Line range is described by following syntax.
119 130
120 "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]" 131 "FUNC[@SRC][:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
121 132
122FUNC specifies the function name of showing lines. 'RLN' is the start line 133FUNC specifies the function name of showing lines. 'RLN' is the start line
123number from function entry line, and 'RLN2' is the end line number. As same as 134number from function entry line, and 'RLN2' is the end line number. As same as
124probe syntax, 'SRC' means the source file path, 'ALN' is start line number, 135probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
125and 'ALN2' is end line number in the file. It is also possible to specify how 136and 'ALN2' is end line number in the file. It is also possible to specify how
126many lines to show by using 'NUM'. 137many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good
138for searching a specific function when several functions share same name.
127So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. 139So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
128 140
129LAZY MATCHING 141LAZY MATCHING
@@ -135,6 +147,14 @@ e.g.
135 147
136This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.) 148This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.)
137 149
150FILTER PATTERN
151--------------
152 The filter pattern is a glob matching pattern(s) to filter variables.
153 In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")".
154
155e.g.
156 With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar".
157 With --filter "!foo* & *bar", perf probe -V shows variables which don't start with "foo" and end with "bar", like "fizzbar". But "foobar" is filtered out.
138 158
139EXAMPLES 159EXAMPLES
140-------- 160--------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e032716c839b..5a520f825295 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -137,6 +137,17 @@ Do not update the builid cache. This saves some overhead in situations
137where the information in the perf.data file (which includes buildids) 137where the information in the perf.data file (which includes buildids)
138is sufficient. 138is sufficient.
139 139
140-G name,...::
141--cgroup name,...::
142monitor only in the container (cgroup) called "name". This option is available only
143in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
144container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
145can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
146to first event, second cgroup to second event and so on. It is possible to provide
147an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
148corresponding events, i.e., they always refer to events defined earlier on the command
149line.
150
140SEE ALSO 151SEE ALSO
141-------- 152--------
142linkperf:perf-stat[1], linkperf:perf-list[1] 153linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index b6da7affbbee..918cc38ee6d1 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -83,6 +83,17 @@ This option is only valid in system-wide mode.
83print counts using a CSV-style output to make it easy to import directly into 83print counts using a CSV-style output to make it easy to import directly into
84spreadsheets. Columns are separated by the string specified in SEP. 84spreadsheets. Columns are separated by the string specified in SEP.
85 85
86-G name::
87--cgroup name::
88monitor only in the container (cgroup) called "name". This option is available only
89in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
90container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
91can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
92to first event, second cgroup to second event and so on. It is possible to provide
93an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
94corresponding events, i.e., they always refer to events defined earlier on the command
95line.
96
86EXAMPLES 97EXAMPLES
87-------- 98--------
88 99
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7141c42e1469..9b8421805c5c 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -3,7 +3,7 @@ ifeq ("$(origin O)", "command line")
3endif 3endif
4 4
5# The default target of this Makefile is... 5# The default target of this Makefile is...
6all:: 6all:
7 7
8ifneq ($(OUTPUT),) 8ifneq ($(OUTPUT),)
9# check that the output directory actually exists 9# check that the output directory actually exists
@@ -11,152 +11,12 @@ OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
11$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) 11$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
12endif 12endif
13 13
14# Define V=1 to have a more verbose compile. 14# Define V to have a more verbose compile.
15# Define V=2 to have an even more verbose compile.
16#
17# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
18# or vsnprintf() return -1 instead of number of characters which would
19# have been written to the final string if enough space had been available.
20#
21# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds
22# when attempting to read from an fopen'ed directory.
23#
24# Define NO_OPENSSL environment variable if you do not have OpenSSL.
25# This also implies MOZILLA_SHA1.
26#
27# Define CURLDIR=/foo/bar if your curl header and library files are in
28# /foo/bar/include and /foo/bar/lib directories.
29#
30# Define EXPATDIR=/foo/bar if your expat header and library files are in
31# /foo/bar/include and /foo/bar/lib directories.
32#
33# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent.
34#
35# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks
36# d_type in struct dirent (latest Cygwin -- will be fixed soonish).
37#
38# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.)
39# do not support the 'size specifiers' introduced by C99, namely ll, hh,
40# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t).
41# some C compilers supported these specifiers prior to C99 as an extension.
42#
43# Define NO_STRCASESTR if you don't have strcasestr.
44#
45# Define NO_MEMMEM if you don't have memmem.
46#
47# Define NO_STRTOUMAX if you don't have strtoumax in the C library.
48# If your compiler also does not support long long or does not have
49# strtoull, define NO_STRTOULL.
50#
51# Define NO_SETENV if you don't have setenv in the C library.
52#
53# Define NO_UNSETENV if you don't have unsetenv in the C library.
54#
55# Define NO_MKDTEMP if you don't have mkdtemp in the C library.
56#
57# Define NO_SYS_SELECT_H if you don't have sys/select.h.
58#
59# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link.
60# Enable it on Windows. By default, symrefs are still used.
61#
62# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability
63# tests. These tests take up a significant amount of the total test time
64# but are not needed unless you plan to talk to SVN repos.
65#
66# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink
67# installed in /sw, but don't want PERF to link against any libraries
68# installed there. If defined you may specify your own (or Fink's)
69# include directories and library directories by defining CFLAGS
70# and LDFLAGS appropriately.
71#
72# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X,
73# have DarwinPorts installed in /opt/local, but don't want PERF to
74# link against any libraries installed there. If defined you may
75# specify your own (or DarwinPort's) include directories and
76# library directories by defining CFLAGS and LDFLAGS appropriately.
77#
78# Define PPC_SHA1 environment variable when running make to make use of
79# a bundled SHA1 routine optimized for PowerPC.
80#
81# Define ARM_SHA1 environment variable when running make to make use of
82# a bundled SHA1 routine optimized for ARM.
83#
84# Define MOZILLA_SHA1 environment variable when running make to make use of
85# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast
86# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default
87# choice) has very fast version optimized for i586.
88#
89# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin).
90#
91# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
92#
93# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
94# Patrick Mauritz).
95#
96# Define NO_MMAP if you want to avoid mmap.
97#
98# Define NO_PTHREADS if you do not have or do not want to use Pthreads.
99#
100# Define NO_PREAD if you have a problem with pread() system call (e.g.
101# cygwin.dll before v1.5.22).
102#
103# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is
104# generally faster on your platform than accessing the working directory.
105#
106# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support
107# the executable mode bit, but doesn't really do so.
108#
109# Define NO_IPV6 if you lack IPv6 support and getaddrinfo().
110#
111# Define NO_SOCKADDR_STORAGE if your platform does not have struct
112# sockaddr_storage.
113#
114# Define NO_ICONV if your libc does not properly support iconv.
115#
116# Define OLD_ICONV if your library has an old iconv(), where the second
117# (input buffer pointer) parameter is declared with type (const char **).
118#
119# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound.
120#
121# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib"
122# that tells runtime paths to dynamic libraries;
123# "-Wl,-rpath=/path/lib" is used instead.
124#
125# Define USE_NSEC below if you want perf to care about sub-second file mtimes
126# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and
127# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely
128# randomly break unless your underlying filesystem supports those sub-second
129# times (my ext3 doesn't).
130#
131# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of
132# "st_ctim"
133#
134# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec"
135# available. This automatically turns USE_NSEC off.
136#
137# Define USE_STDEV below if you want perf to care about the underlying device
138# change being considered an inode change from the update-index perspective.
139#
140# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks
141# field that counts the on-disk footprint in 512-byte blocks.
142# 15#
143# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 16# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
144# 17#
145# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. 18# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
146# 19#
147# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's
148# MakeMaker (e.g. using ActiveState under Cygwin).
149#
150# Define NO_PERL if you do not want Perl scripts or libraries at all.
151#
152# Define INTERNAL_QSORT to use Git's implementation of qsort(), which
153# is a simplified version of the merge sort used in glibc. This is
154# recommended if Git triggers O(n^2) behavior in your platform's qsort().
155#
156# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call
157# your external grep (e.g., if your system lacks grep, if its grep is
158# broken, or spawning external process is slower than built-in grep perf has).
159#
160# Define LDFLAGS=-static to build a static binary. 20# Define LDFLAGS=-static to build a static binary.
161# 21#
162# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. 22# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
@@ -167,12 +27,7 @@ $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
167 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) 27 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
168-include $(OUTPUT)PERF-VERSION-FILE 28-include $(OUTPUT)PERF-VERSION-FILE
169 29
170uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') 30uname_M := $(shell uname -m 2>/dev/null || echo not)
171uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
172uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not')
173uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
174uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
175uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
176 31
177ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ 32ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
178 -e s/arm.*/arm/ -e s/sa110/arm/ \ 33 -e s/arm.*/arm/ -e s/sa110/arm/ \
@@ -191,8 +46,6 @@ ifeq ($(ARCH),x86_64)
191 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S 46 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
192endif 47endif
193 48
194# CFLAGS and LDFLAGS are for the users to override from the command line.
195
196# 49#
197# Include saner warnings here, which can catch bugs: 50# Include saner warnings here, which can catch bugs:
198# 51#
@@ -270,22 +123,13 @@ CC = $(CROSS_COMPILE)gcc
270AR = $(CROSS_COMPILE)ar 123AR = $(CROSS_COMPILE)ar
271RM = rm -f 124RM = rm -f
272MKDIR = mkdir 125MKDIR = mkdir
273TAR = tar
274FIND = find 126FIND = find
275INSTALL = install 127INSTALL = install
276RPMBUILD = rpmbuild
277PTHREAD_LIBS = -lpthread
278 128
279# sparse is architecture-neutral, which means that we need to tell it 129# sparse is architecture-neutral, which means that we need to tell it
280# explicitly what architecture to check for. Fix this up for yours.. 130# explicitly what architecture to check for. Fix this up for yours..
281SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ 131SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
282 132
283ifeq ($(V), 2)
284 QUIET_STDERR = ">/dev/null"
285else
286 QUIET_STDERR = ">/dev/null 2>&1"
287endif
288
289-include feature-tests.mak 133-include feature-tests.mak
290 134
291ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y) 135ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y)
@@ -310,49 +154,37 @@ BASIC_LDFLAGS =
310 154
311# Guard against environment variables 155# Guard against environment variables
312BUILTIN_OBJS = 156BUILTIN_OBJS =
313BUILT_INS =
314COMPAT_CFLAGS =
315COMPAT_OBJS =
316LIB_H = 157LIB_H =
317LIB_OBJS = 158LIB_OBJS =
318SCRIPT_PERL = 159PYRF_OBJS =
319SCRIPT_SH = 160SCRIPT_SH =
320TEST_PROGRAMS =
321 161
322SCRIPT_SH += perf-archive.sh 162SCRIPT_SH += perf-archive.sh
323 163
324grep-libs = $(filter -l%,$(1)) 164grep-libs = $(filter -l%,$(1))
325strip-libs = $(filter-out -l%,$(1)) 165strip-libs = $(filter-out -l%,$(1))
326 166
167$(OUTPUT)python/perf.so: $(PYRF_OBJS)
168 $(QUIET_GEN)python util/setup.py --quiet build_ext --build-lib='$(OUTPUT)python' \
169 --build-temp='$(OUTPUT)python/temp'
327# 170#
328# No Perl scripts right now: 171# No Perl scripts right now:
329# 172#
330 173
331# SCRIPT_PERL += perf-add--interactive.perl 174SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
332
333SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
334 $(patsubst %.perl,%,$(SCRIPT_PERL))
335
336# Empty...
337EXTRA_PROGRAMS =
338
339# ... and all the rest that could be moved out of bindir to perfexecdir
340PROGRAMS += $(EXTRA_PROGRAMS)
341 175
342# 176#
343# Single 'perf' binary right now: 177# Single 'perf' binary right now:
344# 178#
345PROGRAMS += $(OUTPUT)perf 179PROGRAMS += $(OUTPUT)perf
346 180
347# List built-in command $C whose implementation cmd_$C() is not in 181LANG_BINDINGS =
348# builtin-$C.o but is linked in as part of some other command.
349#
350 182
351# what 'all' will build and 'install' will install, in perfexecdir 183# what 'all' will build and 'install' will install, in perfexecdir
352ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) 184ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
353 185
354# what 'all' will build but not install in perfexecdir 186# what 'all' will build but not install in perfexecdir
355OTHER_PROGRAMS = $(OUTPUT)perf$X 187OTHER_PROGRAMS = $(OUTPUT)perf
356 188
357# Set paths to tools early so that they can be used for version tests. 189# Set paths to tools early so that they can be used for version tests.
358ifndef SHELL_PATH 190ifndef SHELL_PATH
@@ -395,6 +227,7 @@ LIB_H += util/include/dwarf-regs.h
395LIB_H += util/include/asm/dwarf2.h 227LIB_H += util/include/asm/dwarf2.h
396LIB_H += util/include/asm/cpufeature.h 228LIB_H += util/include/asm/cpufeature.h
397LIB_H += perf.h 229LIB_H += perf.h
230LIB_H += util/annotate.h
398LIB_H += util/cache.h 231LIB_H += util/cache.h
399LIB_H += util/callchain.h 232LIB_H += util/callchain.h
400LIB_H += util/build-id.h 233LIB_H += util/build-id.h
@@ -402,6 +235,7 @@ LIB_H += util/debug.h
402LIB_H += util/debugfs.h 235LIB_H += util/debugfs.h
403LIB_H += util/event.h 236LIB_H += util/event.h
404LIB_H += util/evsel.h 237LIB_H += util/evsel.h
238LIB_H += util/evlist.h
405LIB_H += util/exec_cmd.h 239LIB_H += util/exec_cmd.h
406LIB_H += util/types.h 240LIB_H += util/types.h
407LIB_H += util/levenshtein.h 241LIB_H += util/levenshtein.h
@@ -416,6 +250,7 @@ LIB_H += util/help.h
416LIB_H += util/session.h 250LIB_H += util/session.h
417LIB_H += util/strbuf.h 251LIB_H += util/strbuf.h
418LIB_H += util/strlist.h 252LIB_H += util/strlist.h
253LIB_H += util/strfilter.h
419LIB_H += util/svghelper.h 254LIB_H += util/svghelper.h
420LIB_H += util/run-command.h 255LIB_H += util/run-command.h
421LIB_H += util/sigchain.h 256LIB_H += util/sigchain.h
@@ -425,21 +260,26 @@ LIB_H += util/values.h
425LIB_H += util/sort.h 260LIB_H += util/sort.h
426LIB_H += util/hist.h 261LIB_H += util/hist.h
427LIB_H += util/thread.h 262LIB_H += util/thread.h
263LIB_H += util/thread_map.h
428LIB_H += util/trace-event.h 264LIB_H += util/trace-event.h
429LIB_H += util/probe-finder.h 265LIB_H += util/probe-finder.h
430LIB_H += util/probe-event.h 266LIB_H += util/probe-event.h
431LIB_H += util/pstack.h 267LIB_H += util/pstack.h
432LIB_H += util/cpumap.h 268LIB_H += util/cpumap.h
269LIB_H += util/top.h
433LIB_H += $(ARCH_INCLUDE) 270LIB_H += $(ARCH_INCLUDE)
271LIB_H += util/cgroup.h
434 272
435LIB_OBJS += $(OUTPUT)util/abspath.o 273LIB_OBJS += $(OUTPUT)util/abspath.o
436LIB_OBJS += $(OUTPUT)util/alias.o 274LIB_OBJS += $(OUTPUT)util/alias.o
275LIB_OBJS += $(OUTPUT)util/annotate.o
437LIB_OBJS += $(OUTPUT)util/build-id.o 276LIB_OBJS += $(OUTPUT)util/build-id.o
438LIB_OBJS += $(OUTPUT)util/config.o 277LIB_OBJS += $(OUTPUT)util/config.o
439LIB_OBJS += $(OUTPUT)util/ctype.o 278LIB_OBJS += $(OUTPUT)util/ctype.o
440LIB_OBJS += $(OUTPUT)util/debugfs.o 279LIB_OBJS += $(OUTPUT)util/debugfs.o
441LIB_OBJS += $(OUTPUT)util/environment.o 280LIB_OBJS += $(OUTPUT)util/environment.o
442LIB_OBJS += $(OUTPUT)util/event.o 281LIB_OBJS += $(OUTPUT)util/event.o
282LIB_OBJS += $(OUTPUT)util/evlist.o
443LIB_OBJS += $(OUTPUT)util/evsel.o 283LIB_OBJS += $(OUTPUT)util/evsel.o
444LIB_OBJS += $(OUTPUT)util/exec_cmd.o 284LIB_OBJS += $(OUTPUT)util/exec_cmd.o
445LIB_OBJS += $(OUTPUT)util/help.o 285LIB_OBJS += $(OUTPUT)util/help.o
@@ -455,6 +295,8 @@ LIB_OBJS += $(OUTPUT)util/quote.o
455LIB_OBJS += $(OUTPUT)util/strbuf.o 295LIB_OBJS += $(OUTPUT)util/strbuf.o
456LIB_OBJS += $(OUTPUT)util/string.o 296LIB_OBJS += $(OUTPUT)util/string.o
457LIB_OBJS += $(OUTPUT)util/strlist.o 297LIB_OBJS += $(OUTPUT)util/strlist.o
298LIB_OBJS += $(OUTPUT)util/strfilter.o
299LIB_OBJS += $(OUTPUT)util/top.o
458LIB_OBJS += $(OUTPUT)util/usage.o 300LIB_OBJS += $(OUTPUT)util/usage.o
459LIB_OBJS += $(OUTPUT)util/wrapper.o 301LIB_OBJS += $(OUTPUT)util/wrapper.o
460LIB_OBJS += $(OUTPUT)util/sigchain.o 302LIB_OBJS += $(OUTPUT)util/sigchain.o
@@ -469,6 +311,7 @@ LIB_OBJS += $(OUTPUT)util/map.o
469LIB_OBJS += $(OUTPUT)util/pstack.o 311LIB_OBJS += $(OUTPUT)util/pstack.o
470LIB_OBJS += $(OUTPUT)util/session.o 312LIB_OBJS += $(OUTPUT)util/session.o
471LIB_OBJS += $(OUTPUT)util/thread.o 313LIB_OBJS += $(OUTPUT)util/thread.o
314LIB_OBJS += $(OUTPUT)util/thread_map.o
472LIB_OBJS += $(OUTPUT)util/trace-event-parse.o 315LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
473LIB_OBJS += $(OUTPUT)util/trace-event-read.o 316LIB_OBJS += $(OUTPUT)util/trace-event-read.o
474LIB_OBJS += $(OUTPUT)util/trace-event-info.o 317LIB_OBJS += $(OUTPUT)util/trace-event-info.o
@@ -480,6 +323,7 @@ LIB_OBJS += $(OUTPUT)util/probe-event.o
480LIB_OBJS += $(OUTPUT)util/util.o 323LIB_OBJS += $(OUTPUT)util/util.o
481LIB_OBJS += $(OUTPUT)util/xyarray.o 324LIB_OBJS += $(OUTPUT)util/xyarray.o
482LIB_OBJS += $(OUTPUT)util/cpumap.o 325LIB_OBJS += $(OUTPUT)util/cpumap.o
326LIB_OBJS += $(OUTPUT)util/cgroup.o
483 327
484BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o 328BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
485 329
@@ -514,6 +358,20 @@ BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
514 358
515PERFLIBS = $(LIB_FILE) 359PERFLIBS = $(LIB_FILE)
516 360
361# Files needed for the python binding, perf.so
362# pyrf is just an internal name needed for all those wrappers.
363# This has to be in sync with what is in the 'sources' variable in
364# tools/perf/util/setup.py
365
366PYRF_OBJS += $(OUTPUT)util/cpumap.o
367PYRF_OBJS += $(OUTPUT)util/ctype.o
368PYRF_OBJS += $(OUTPUT)util/evlist.o
369PYRF_OBJS += $(OUTPUT)util/evsel.o
370PYRF_OBJS += $(OUTPUT)util/python.o
371PYRF_OBJS += $(OUTPUT)util/thread_map.o
372PYRF_OBJS += $(OUTPUT)util/util.o
373PYRF_OBJS += $(OUTPUT)util/xyarray.o
374
517# 375#
518# Platform specific tweaks 376# Platform specific tweaks
519# 377#
@@ -535,22 +393,6 @@ endif # NO_DWARF
535 393
536-include arch/$(ARCH)/Makefile 394-include arch/$(ARCH)/Makefile
537 395
538ifeq ($(uname_S),Darwin)
539 ifndef NO_FINK
540 ifeq ($(shell test -d /sw/lib && echo y),y)
541 BASIC_CFLAGS += -I/sw/include
542 BASIC_LDFLAGS += -L/sw/lib
543 endif
544 endif
545 ifndef NO_DARWIN_PORTS
546 ifeq ($(shell test -d /opt/local/lib && echo y),y)
547 BASIC_CFLAGS += -I/opt/local/include
548 BASIC_LDFLAGS += -L/opt/local/lib
549 endif
550 endif
551 PTHREAD_LIBS =
552endif
553
554ifneq ($(OUTPUT),) 396ifneq ($(OUTPUT),)
555 BASIC_CFLAGS += -I$(OUTPUT) 397 BASIC_CFLAGS += -I$(OUTPUT)
556endif 398endif
@@ -595,6 +437,7 @@ else
595 LIB_OBJS += $(OUTPUT)util/ui/browsers/annotate.o 437 LIB_OBJS += $(OUTPUT)util/ui/browsers/annotate.o
596 LIB_OBJS += $(OUTPUT)util/ui/browsers/hists.o 438 LIB_OBJS += $(OUTPUT)util/ui/browsers/hists.o
597 LIB_OBJS += $(OUTPUT)util/ui/browsers/map.o 439 LIB_OBJS += $(OUTPUT)util/ui/browsers/map.o
440 LIB_OBJS += $(OUTPUT)util/ui/browsers/top.o
598 LIB_OBJS += $(OUTPUT)util/ui/helpline.o 441 LIB_OBJS += $(OUTPUT)util/ui/helpline.o
599 LIB_OBJS += $(OUTPUT)util/ui/progress.o 442 LIB_OBJS += $(OUTPUT)util/ui/progress.o
600 LIB_OBJS += $(OUTPUT)util/ui/util.o 443 LIB_OBJS += $(OUTPUT)util/ui/util.o
@@ -604,6 +447,7 @@ else
604 LIB_H += util/ui/libslang.h 447 LIB_H += util/ui/libslang.h
605 LIB_H += util/ui/progress.h 448 LIB_H += util/ui/progress.h
606 LIB_H += util/ui/util.h 449 LIB_H += util/ui/util.h
450 LIB_H += util/ui/ui.h
607 endif 451 endif
608endif 452endif
609 453
@@ -635,12 +479,14 @@ else
635 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` 479 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
636 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) 480 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
637 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y) 481 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
482 msg := $(warning No Python.h found, install python-dev[el] to have python support in 'perf script' and to build the python bindings)
638 BASIC_CFLAGS += -DNO_LIBPYTHON 483 BASIC_CFLAGS += -DNO_LIBPYTHON
639 else 484 else
640 ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS) 485 ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
641 EXTLIBS += $(PYTHON_EMBED_LIBADD) 486 EXTLIBS += $(PYTHON_EMBED_LIBADD)
642 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o 487 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
643 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o 488 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
489 LANG_BINDINGS += $(OUTPUT)python/perf.so
644 endif 490 endif
645endif 491endif
646 492
@@ -690,201 +536,13 @@ else
690 endif 536 endif
691endif 537endif
692 538
693ifndef CC_LD_DYNPATH
694 ifdef NO_R_TO_GCC_LINKER
695 # Some gcc does not accept and pass -R to the linker to specify
696 # the runtime dynamic library path.
697 CC_LD_DYNPATH = -Wl,-rpath,
698 else
699 CC_LD_DYNPATH = -R
700 endif
701endif
702
703ifdef NEEDS_SOCKET
704 EXTLIBS += -lsocket
705endif
706ifdef NEEDS_NSL
707 EXTLIBS += -lnsl
708endif
709ifdef NO_D_TYPE_IN_DIRENT
710 BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT
711endif
712ifdef NO_D_INO_IN_DIRENT
713 BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT
714endif
715ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
716 BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT
717endif
718ifdef USE_NSEC
719 BASIC_CFLAGS += -DUSE_NSEC
720endif
721ifdef USE_ST_TIMESPEC
722 BASIC_CFLAGS += -DUSE_ST_TIMESPEC
723endif
724ifdef NO_NSEC
725 BASIC_CFLAGS += -DNO_NSEC
726endif
727ifdef NO_C99_FORMAT
728 BASIC_CFLAGS += -DNO_C99_FORMAT
729endif
730ifdef SNPRINTF_RETURNS_BOGUS
731 COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS
732 COMPAT_OBJS += $(OUTPUT)compat/snprintf.o
733endif
734ifdef FREAD_READS_DIRECTORIES
735 COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
736 COMPAT_OBJS += $(OUTPUT)compat/fopen.o
737endif
738ifdef NO_SYMLINK_HEAD
739 BASIC_CFLAGS += -DNO_SYMLINK_HEAD
740endif
741ifdef NO_STRCASESTR
742 COMPAT_CFLAGS += -DNO_STRCASESTR
743 COMPAT_OBJS += $(OUTPUT)compat/strcasestr.o
744endif
745ifdef NO_STRTOUMAX
746 COMPAT_CFLAGS += -DNO_STRTOUMAX
747 COMPAT_OBJS += $(OUTPUT)compat/strtoumax.o
748endif
749ifdef NO_STRTOULL
750 COMPAT_CFLAGS += -DNO_STRTOULL
751endif
752ifdef NO_SETENV
753 COMPAT_CFLAGS += -DNO_SETENV
754 COMPAT_OBJS += $(OUTPUT)compat/setenv.o
755endif
756ifdef NO_MKDTEMP
757 COMPAT_CFLAGS += -DNO_MKDTEMP
758 COMPAT_OBJS += $(OUTPUT)compat/mkdtemp.o
759endif
760ifdef NO_UNSETENV
761 COMPAT_CFLAGS += -DNO_UNSETENV
762 COMPAT_OBJS += $(OUTPUT)compat/unsetenv.o
763endif
764ifdef NO_SYS_SELECT_H
765 BASIC_CFLAGS += -DNO_SYS_SELECT_H
766endif
767ifdef NO_MMAP
768 COMPAT_CFLAGS += -DNO_MMAP
769 COMPAT_OBJS += $(OUTPUT)compat/mmap.o
770else
771 ifdef USE_WIN32_MMAP
772 COMPAT_CFLAGS += -DUSE_WIN32_MMAP
773 COMPAT_OBJS += $(OUTPUT)compat/win32mmap.o
774 endif
775endif
776ifdef NO_PREAD
777 COMPAT_CFLAGS += -DNO_PREAD
778 COMPAT_OBJS += $(OUTPUT)compat/pread.o
779endif
780ifdef NO_FAST_WORKING_DIRECTORY
781 BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
782endif
783ifdef NO_TRUSTABLE_FILEMODE
784 BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE
785endif
786ifdef NO_IPV6
787 BASIC_CFLAGS += -DNO_IPV6
788endif
789ifdef NO_UINTMAX_T
790 BASIC_CFLAGS += -Duintmax_t=uint32_t
791endif
792ifdef NO_SOCKADDR_STORAGE
793ifdef NO_IPV6
794 BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in
795else
796 BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6
797endif
798endif
799ifdef NO_INET_NTOP
800 LIB_OBJS += $(OUTPUT)compat/inet_ntop.o
801endif
802ifdef NO_INET_PTON
803 LIB_OBJS += $(OUTPUT)compat/inet_pton.o
804endif
805
806ifdef NO_ICONV
807 BASIC_CFLAGS += -DNO_ICONV
808endif
809
810ifdef OLD_ICONV
811 BASIC_CFLAGS += -DOLD_ICONV
812endif
813
814ifdef NO_DEFLATE_BOUND
815 BASIC_CFLAGS += -DNO_DEFLATE_BOUND
816endif
817
818ifdef PPC_SHA1
819 SHA1_HEADER = "ppc/sha1.h"
820 LIB_OBJS += $(OUTPUT)ppc/sha1.o ppc/sha1ppc.o
821else
822ifdef ARM_SHA1
823 SHA1_HEADER = "arm/sha1.h"
824 LIB_OBJS += $(OUTPUT)arm/sha1.o $(OUTPUT)arm/sha1_arm.o
825else
826ifdef MOZILLA_SHA1
827 SHA1_HEADER = "mozilla-sha1/sha1.h"
828 LIB_OBJS += $(OUTPUT)mozilla-sha1/sha1.o
829else
830 SHA1_HEADER = <openssl/sha.h>
831 EXTLIBS += $(LIB_4_CRYPTO)
832endif
833endif
834endif
835ifdef NO_PERL_MAKEMAKER
836 export NO_PERL_MAKEMAKER
837endif
838ifdef NO_HSTRERROR
839 COMPAT_CFLAGS += -DNO_HSTRERROR
840 COMPAT_OBJS += $(OUTPUT)compat/hstrerror.o
841endif
842ifdef NO_MEMMEM
843 COMPAT_CFLAGS += -DNO_MEMMEM
844 COMPAT_OBJS += $(OUTPUT)compat/memmem.o
845endif
846ifdef INTERNAL_QSORT
847 COMPAT_CFLAGS += -DINTERNAL_QSORT
848 COMPAT_OBJS += $(OUTPUT)compat/qsort.o
849endif
850ifdef RUNTIME_PREFIX
851 COMPAT_CFLAGS += -DRUNTIME_PREFIX
852endif
853
854ifdef DIR_HAS_BSD_GROUP_SEMANTICS
855 COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
856endif
857ifdef NO_EXTERNAL_GREP
858 BASIC_CFLAGS += -DNO_EXTERNAL_GREP
859endif
860
861ifeq ($(PERL_PATH),)
862NO_PERL=NoThanks
863endif
864
865QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
866QUIET_SUBDIR1 =
867
868ifneq ($(findstring $(MAKEFLAGS),w),w)
869PRINT_DIR = --no-print-directory
870else # "make -w"
871NO_SUBDIR = :
872endif
873
874ifneq ($(findstring $(MAKEFLAGS),s),s) 539ifneq ($(findstring $(MAKEFLAGS),s),s)
875ifndef V 540ifndef V
876 QUIET_CC = @echo ' ' CC $@; 541 QUIET_CC = @echo ' ' CC $@;
877 QUIET_AR = @echo ' ' AR $@; 542 QUIET_AR = @echo ' ' AR $@;
878 QUIET_LINK = @echo ' ' LINK $@; 543 QUIET_LINK = @echo ' ' LINK $@;
879 QUIET_MKDIR = @echo ' ' MKDIR $@; 544 QUIET_MKDIR = @echo ' ' MKDIR $@;
880 QUIET_BUILT_IN = @echo ' ' BUILTIN $@;
881 QUIET_GEN = @echo ' ' GEN $@; 545 QUIET_GEN = @echo ' ' GEN $@;
882 QUIET_SUBDIR0 = +@subdir=
883 QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \
884 $(MAKE) $(PRINT_DIR) -C $$subdir
885 export V
886 export QUIET_GEN
887 export QUIET_BUILT_IN
888endif 546endif
889endif 547endif
890 548
@@ -894,7 +552,6 @@ endif
894 552
895# Shell quote (do not use $(call) to accommodate ancient setups); 553# Shell quote (do not use $(call) to accommodate ancient setups);
896 554
897SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER))
898ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) 555ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
899 556
900DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) 557DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
@@ -908,46 +565,36 @@ htmldir_SQ = $(subst ','\'',$(htmldir))
908prefix_SQ = $(subst ','\'',$(prefix)) 565prefix_SQ = $(subst ','\'',$(prefix))
909 566
910SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) 567SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
911PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
912 568
913LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS) 569LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS)
914 570
915BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
916 $(COMPAT_CFLAGS)
917LIB_OBJS += $(COMPAT_OBJS)
918
919ALL_CFLAGS += $(BASIC_CFLAGS) 571ALL_CFLAGS += $(BASIC_CFLAGS)
920ALL_CFLAGS += $(ARCH_CFLAGS) 572ALL_CFLAGS += $(ARCH_CFLAGS)
921ALL_LDFLAGS += $(BASIC_LDFLAGS) 573ALL_LDFLAGS += $(BASIC_LDFLAGS)
922 574
923export TAR INSTALL DESTDIR SHELL_PATH 575export INSTALL SHELL_PATH
924 576
925 577
926### Build rules 578### Build rules
927 579
928SHELL = $(SHELL_PATH) 580SHELL = $(SHELL_PATH)
929 581
930all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS 582all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
931ifneq (,$X)
932 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
933endif
934
935all::
936 583
937please_set_SHELL_PATH_to_a_more_modern_shell: 584please_set_SHELL_PATH_to_a_more_modern_shell:
938 @$$(:) 585 @$$(:)
939 586
940shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell 587shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
941 588
942strip: $(PROGRAMS) $(OUTPUT)perf$X 589strip: $(PROGRAMS) $(OUTPUT)perf
943 $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf$X 590 $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
944 591
945$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS 592$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
946 $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ 593 $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
947 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ 594 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
948 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ 595 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
949 596
950$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) 597$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
951 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \ 598 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \
952 $(BUILTIN_OBJS) $(LIBS) -o $@ 599 $(BUILTIN_OBJS) $(LIBS) -o $@
953 600
@@ -963,39 +610,17 @@ $(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPU
963 '-DPERF_MAN_PATH="$(mandir_SQ)"' \ 610 '-DPERF_MAN_PATH="$(mandir_SQ)"' \
964 '-DPERF_INFO_PATH="$(infodir_SQ)"' $< 611 '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
965 612
966$(BUILT_INS): $(OUTPUT)perf$X
967 $(QUIET_BUILT_IN)$(RM) $@ && \
968 ln perf$X $@ 2>/dev/null || \
969 ln -s perf$X $@ 2>/dev/null || \
970 cp perf$X $@
971
972$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt 613$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
973 614
974$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt) 615$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
975 $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ 616 $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
976 617
977$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh 618$(SCRIPTS) : % : %.sh
978 $(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \ 619 $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
979 sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
980 -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
981 -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
982 -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
983 -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
984 $@.sh > $(OUTPUT)$@+ && \
985 chmod +x $(OUTPUT)$@+ && \
986 mv $(OUTPUT)$@+ $(OUTPUT)$@
987
988configure: configure.ac
989 $(QUIET_GEN)$(RM) $@ $<+ && \
990 sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
991 $< > $<+ && \
992 autoconf -o $@ $<+ && \
993 $(RM) $<+
994 620
995# These can record PERF_VERSION 621# These can record PERF_VERSION
996$(OUTPUT)perf.o perf.spec \ 622$(OUTPUT)perf.o perf.spec \
997 $(patsubst %.sh,%,$(SCRIPT_SH)) \ 623 $(SCRIPTS) \
998 $(patsubst %.perl,%,$(SCRIPT_PERL)) \
999 : $(OUTPUT)PERF-VERSION-FILE 624 : $(OUTPUT)PERF-VERSION-FILE
1000 625
1001$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS 626$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
@@ -1012,9 +637,6 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
1012 '-DPREFIX="$(prefix_SQ)"' \ 637 '-DPREFIX="$(prefix_SQ)"' \
1013 $< 638 $<
1014 639
1015$(OUTPUT)builtin-init-db.o: builtin-init-db.c $(OUTPUT)PERF-CFLAGS
1016 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
1017
1018$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS 640$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
1019 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 641 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
1020 642
@@ -1024,6 +646,9 @@ $(OUTPUT)util/ui/browser.o: util/ui/browser.c $(OUTPUT)PERF-CFLAGS
1024$(OUTPUT)util/ui/browsers/annotate.o: util/ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS 646$(OUTPUT)util/ui/browsers/annotate.o: util/ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
1025 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< 647 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
1026 648
649$(OUTPUT)util/ui/browsers/top.o: util/ui/browsers/top.c $(OUTPUT)PERF-CFLAGS
650 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
651
1027$(OUTPUT)util/ui/browsers/hists.o: util/ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS 652$(OUTPUT)util/ui/browsers/hists.o: util/ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
1028 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< 653 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
1029 654
@@ -1045,12 +670,11 @@ $(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/tra
1045$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS 670$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
1046 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< 671 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
1047 672
1048$(OUTPUT)perf-%$X: %.o $(PERFLIBS) 673$(OUTPUT)perf-%: %.o $(PERFLIBS)
1049 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) 674 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
1050 675
1051$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) 676$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
1052$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) 677$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
1053builtin-revert.o wt-status.o: wt-status.h
1054 678
1055# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So 679# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
1056# we depend the various files onto their directories. 680# we depend the various files onto their directories.
@@ -1063,6 +687,36 @@ $(sort $(dir $(DIRECTORY_DEPS))):
1063$(LIB_FILE): $(LIB_OBJS) 687$(LIB_FILE): $(LIB_OBJS)
1064 $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) 688 $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
1065 689
690help:
691 @echo 'Perf make targets:'
692 @echo ' doc - make *all* documentation (see below)'
693 @echo ' man - make manpage documentation (access with man <foo>)'
694 @echo ' html - make html documentation'
695 @echo ' info - make GNU info documentation (access with info <foo>)'
696 @echo ' pdf - make pdf documentation'
697 @echo ' TAGS - use etags to make tag information for source browsing'
698 @echo ' tags - use ctags to make tag information for source browsing'
699 @echo ' cscope - use cscope to make interactive browsing database'
700 @echo ''
701 @echo 'Perf install targets:'
702 @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
703 @echo ' HINT: use "make prefix=<path> <install target>" to install to a particular'
704 @echo ' path like make prefix=/usr/local install install-doc'
705 @echo ' install - install compiled binaries'
706 @echo ' install-doc - install *all* documentation'
707 @echo ' install-man - install manpage documentation'
708 @echo ' install-html - install html documentation'
709 @echo ' install-info - install GNU info documentation'
710 @echo ' install-pdf - install pdf documentation'
711 @echo ''
712 @echo ' quick-install-doc - alias for quick-install-man'
713 @echo ' quick-install-man - install the documentation quickly'
714 @echo ' quick-install-html - install the html documentation quickly'
715 @echo ''
716 @echo 'Perf maintainer targets:'
717 @echo ' distclean - alias to clean'
718 @echo ' clean - clean all binary objects and build output'
719
1066doc: 720doc:
1067 $(MAKE) -C Documentation all 721 $(MAKE) -C Documentation all
1068 722
@@ -1101,30 +755,12 @@ $(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
1101 echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ 755 echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
1102 fi 756 fi
1103 757
1104# We need to apply sq twice, once to protect from the shell
1105# that runs $(OUTPUT)PERF-BUILD-OPTIONS, and then again to protect it
1106# and the first level quoting from the shell that runs "echo".
1107$(OUTPUT)PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS
1108 @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@
1109 @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
1110 @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
1111 @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
1112
1113### Testing rules 758### Testing rules
1114 759
1115#
1116# None right now:
1117#
1118# TEST_PROGRAMS += test-something$X
1119
1120all:: $(TEST_PROGRAMS)
1121
1122# GNU make supports exporting all variables by "export" without parameters. 760# GNU make supports exporting all variables by "export" without parameters.
1123# However, the environment gets quite big, and some programs have problems 761# However, the environment gets quite big, and some programs have problems
1124# with that. 762# with that.
1125 763
1126export NO_SVN_TESTS
1127
1128check: $(OUTPUT)common-cmds.h 764check: $(OUTPUT)common-cmds.h
1129 if sparse; \ 765 if sparse; \
1130 then \ 766 then \
@@ -1133,33 +769,21 @@ check: $(OUTPUT)common-cmds.h
1133 sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ 769 sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
1134 done; \ 770 done; \
1135 else \ 771 else \
1136 echo 2>&1 "Did you mean 'make test'?"; \
1137 exit 1; \ 772 exit 1; \
1138 fi 773 fi
1139 774
1140remove-dashes:
1141 ./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS)
1142
1143### Installation rules 775### Installation rules
1144 776
1145ifneq ($(filter /%,$(firstword $(template_dir))),)
1146template_instdir = $(template_dir)
1147else
1148template_instdir = $(prefix)/$(template_dir)
1149endif
1150export template_instdir
1151
1152ifneq ($(filter /%,$(firstword $(perfexecdir))),) 777ifneq ($(filter /%,$(firstword $(perfexecdir))),)
1153perfexec_instdir = $(perfexecdir) 778perfexec_instdir = $(perfexecdir)
1154else 779else
1155perfexec_instdir = $(prefix)/$(perfexecdir) 780perfexec_instdir = $(prefix)/$(perfexecdir)
1156endif 781endif
1157perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) 782perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
1158export perfexec_instdir
1159 783
1160install: all 784install: all
1161 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' 785 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
1162 $(INSTALL) $(OUTPUT)perf$X '$(DESTDIR_SQ)$(bindir_SQ)' 786 $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
1163 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' 787 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
1164 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' 788 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
1165 $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 789 $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
@@ -1172,14 +796,6 @@ install: all
1172 $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' 796 $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
1173 $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' 797 $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
1174 798
1175ifdef BUILT_INS
1176 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
1177 $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
1178ifneq (,$X)
1179 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) $(OUTPUT)perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
1180endif
1181endif
1182
1183install-doc: 799install-doc:
1184 $(MAKE) -C Documentation install 800 $(MAKE) -C Documentation install
1185 801
@@ -1204,104 +820,17 @@ quick-install-man:
1204quick-install-html: 820quick-install-html:
1205 $(MAKE) -C Documentation quick-install-html 821 $(MAKE) -C Documentation quick-install-html
1206 822
1207
1208### Maintainer's dist rules
1209#
1210# None right now
1211#
1212#
1213# perf.spec: perf.spec.in
1214# sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+
1215# mv $@+ $@
1216#
1217# PERF_TARNAME=perf-$(PERF_VERSION)
1218# dist: perf.spec perf-archive$(X) configure
1219# ./perf-archive --format=tar \
1220# --prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar
1221# @mkdir -p $(PERF_TARNAME)
1222# @cp perf.spec configure $(PERF_TARNAME)
1223# @echo $(PERF_VERSION) > $(PERF_TARNAME)/version
1224# $(TAR) rf $(PERF_TARNAME).tar \
1225# $(PERF_TARNAME)/perf.spec \
1226# $(PERF_TARNAME)/configure \
1227# $(PERF_TARNAME)/version
1228# @$(RM) -r $(PERF_TARNAME)
1229# gzip -f -9 $(PERF_TARNAME).tar
1230#
1231# htmldocs = perf-htmldocs-$(PERF_VERSION)
1232# manpages = perf-manpages-$(PERF_VERSION)
1233# dist-doc:
1234# $(RM) -r .doc-tmp-dir
1235# mkdir .doc-tmp-dir
1236# $(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
1237# cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
1238# gzip -n -9 -f $(htmldocs).tar
1239# :
1240# $(RM) -r .doc-tmp-dir
1241# mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
1242# $(MAKE) -C Documentation DESTDIR=./ \
1243# man1dir=../.doc-tmp-dir/man1 \
1244# man5dir=../.doc-tmp-dir/man5 \
1245# man7dir=../.doc-tmp-dir/man7 \
1246# install
1247# cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
1248# gzip -n -9 -f $(manpages).tar
1249# $(RM) -r .doc-tmp-dir
1250#
1251# rpm: dist
1252# $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
1253
1254### Cleaning rules 823### Cleaning rules
1255 824
1256distclean: clean
1257# $(RM) configure
1258
1259clean: 825clean:
1260 $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE) 826 $(RM) $(OUTPUT){*.o,*/*.o,*/*/*.o,*/*/*/*.o,$(LIB_FILE),perf-archive}
1261 $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X 827 $(RM) $(ALL_PROGRAMS) perf
1262 $(RM) $(TEST_PROGRAMS)
1263 $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* 828 $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
1264 $(RM) -r autom4te.cache
1265 $(RM) config.log config.mak.autogen config.mak.append config.status config.cache
1266 $(RM) -r $(PERF_TARNAME) .doc-tmp-dir
1267 $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
1268 $(RM) $(htmldocs).tar.gz $(manpages).tar.gz
1269 $(MAKE) -C Documentation/ clean 829 $(MAKE) -C Documentation/ clean
1270 $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-BUILD-OPTIONS 830 $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS
831 @python util/setup.py clean --build-lib='$(OUTPUT)python' \
832 --build-temp='$(OUTPUT)python/temp'
1271 833
1272.PHONY: all install clean strip 834.PHONY: all install clean strip
1273.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell 835.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
1274.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS 836.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
1275.PHONY: .FORCE-PERF-BUILD-OPTIONS
1276
1277### Make sure built-ins do not have dups and listed in perf.c
1278#
1279check-builtins::
1280 ./check-builtins.sh
1281
1282### Test suite coverage testing
1283#
1284# None right now
1285#
1286# .PHONY: coverage coverage-clean coverage-build coverage-report
1287#
1288# coverage:
1289# $(MAKE) coverage-build
1290# $(MAKE) coverage-report
1291#
1292# coverage-clean:
1293# rm -f *.gcda *.gcno
1294#
1295# COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
1296# COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov
1297#
1298# coverage-build: coverage-clean
1299# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
1300# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \
1301# -j1 test
1302#
1303# coverage-report:
1304# gcov -b *.c */*.c
1305# grep '^function.*called 0 ' *.c.gcov */*.c.gcov \
1306# | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
1307# | tee coverage-untested-functions
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index d9ab3ce446ac..0c7454f8b8a9 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -55,7 +55,7 @@ int bench_sched_pipe(int argc, const char **argv,
55 * discarding returned value of read(), write() 55 * discarding returned value of read(), write()
56 * causes error in building environment for perf 56 * causes error in building environment for perf
57 */ 57 */
58 int ret, wait_stat; 58 int __used ret, wait_stat;
59 pid_t pid, retpid; 59 pid_t pid, retpid;
60 60
61 argc = parse_options(argc, argv, options, 61 argc = parse_options(argc, argv, options,
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 8879463807e4..695de4b5ae63 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -9,6 +9,7 @@
9 9
10#include "util/util.h" 10#include "util/util.h"
11 11
12#include "util/util.h"
12#include "util/color.h" 13#include "util/color.h"
13#include <linux/list.h> 14#include <linux/list.h>
14#include "util/cache.h" 15#include "util/cache.h"
@@ -18,6 +19,9 @@
18#include "perf.h" 19#include "perf.h"
19#include "util/debug.h" 20#include "util/debug.h"
20 21
22#include "util/evlist.h"
23#include "util/evsel.h"
24#include "util/annotate.h"
21#include "util/event.h" 25#include "util/event.h"
22#include "util/parse-options.h" 26#include "util/parse-options.h"
23#include "util/parse-events.h" 27#include "util/parse-events.h"
@@ -36,9 +40,13 @@ static bool print_line;
36 40
37static const char *sym_hist_filter; 41static const char *sym_hist_filter;
38 42
39static int hists__add_entry(struct hists *self, struct addr_location *al) 43static int perf_evlist__add_sample(struct perf_evlist *evlist,
44 struct perf_sample *sample,
45 struct addr_location *al)
40{ 46{
47 struct perf_evsel *evsel;
41 struct hist_entry *he; 48 struct hist_entry *he;
49 int ret;
42 50
43 if (sym_hist_filter != NULL && 51 if (sym_hist_filter != NULL &&
44 (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) { 52 (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) {
@@ -51,25 +59,51 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
51 return 0; 59 return 0;
52 } 60 }
53 61
54 he = __hists__add_entry(self, al, NULL, 1); 62 evsel = perf_evlist__id2evsel(evlist, sample->id);
63 if (evsel == NULL) {
64 /*
65 * FIXME: Propagate this back, but at least we're in a builtin,
66 * where exit() is allowed. ;-)
67 */
68 ui__warning("Invalid %s file, contains samples with id not in "
69 "its header!\n", input_name);
70 exit_browser(0);
71 exit(1);
72 }
73
74 he = __hists__add_entry(&evsel->hists, al, NULL, 1);
55 if (he == NULL) 75 if (he == NULL)
56 return -ENOMEM; 76 return -ENOMEM;
57 77
58 return hist_entry__inc_addr_samples(he, al->addr); 78 ret = 0;
79 if (he->ms.sym != NULL) {
80 struct annotation *notes = symbol__annotation(he->ms.sym);
81 if (notes->src == NULL &&
82 symbol__alloc_hist(he->ms.sym, evlist->nr_entries) < 0)
83 return -ENOMEM;
84
85 ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
86 }
87
88 evsel->hists.stats.total_period += sample->period;
89 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
90 return ret;
59} 91}
60 92
61static int process_sample_event(event_t *event, struct sample_data *sample, 93static int process_sample_event(union perf_event *event,
94 struct perf_sample *sample,
62 struct perf_session *session) 95 struct perf_session *session)
63{ 96{
64 struct addr_location al; 97 struct addr_location al;
65 98
66 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 99 if (perf_event__preprocess_sample(event, session, &al, sample,
100 symbol__annotate_init) < 0) {
67 pr_warning("problem processing %d event, skipping it.\n", 101 pr_warning("problem processing %d event, skipping it.\n",
68 event->header.type); 102 event->header.type);
69 return -1; 103 return -1;
70 } 104 }
71 105
72 if (!al.filtered && hists__add_entry(&session->hists, &al)) { 106 if (!al.filtered && perf_evlist__add_sample(session->evlist, sample, &al)) {
73 pr_warning("problem incrementing symbol count, " 107 pr_warning("problem incrementing symbol count, "
74 "skipping event\n"); 108 "skipping event\n");
75 return -1; 109 return -1;
@@ -78,261 +112,26 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
78 return 0; 112 return 0;
79} 113}
80 114
81static int objdump_line__print(struct objdump_line *self, 115static int hist_entry__tty_annotate(struct hist_entry *he, int evidx)
82 struct list_head *head,
83 struct hist_entry *he, u64 len)
84{
85 struct symbol *sym = he->ms.sym;
86 static const char *prev_line;
87 static const char *prev_color;
88
89 if (self->offset != -1) {
90 const char *path = NULL;
91 unsigned int hits = 0;
92 double percent = 0.0;
93 const char *color;
94 struct sym_priv *priv = symbol__priv(sym);
95 struct sym_ext *sym_ext = priv->ext;
96 struct sym_hist *h = priv->hist;
97 s64 offset = self->offset;
98 struct objdump_line *next = objdump__get_next_ip_line(head, self);
99
100 while (offset < (s64)len &&
101 (next == NULL || offset < next->offset)) {
102 if (sym_ext) {
103 if (path == NULL)
104 path = sym_ext[offset].path;
105 percent += sym_ext[offset].percent;
106 } else
107 hits += h->ip[offset];
108
109 ++offset;
110 }
111
112 if (sym_ext == NULL && h->sum)
113 percent = 100.0 * hits / h->sum;
114
115 color = get_percent_color(percent);
116
117 /*
118 * Also color the filename and line if needed, with
119 * the same color than the percentage. Don't print it
120 * twice for close colored ip with the same filename:line
121 */
122 if (path) {
123 if (!prev_line || strcmp(prev_line, path)
124 || color != prev_color) {
125 color_fprintf(stdout, color, " %s", path);
126 prev_line = path;
127 prev_color = color;
128 }
129 }
130
131 color_fprintf(stdout, color, " %7.2f", percent);
132 printf(" : ");
133 color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", self->line);
134 } else {
135 if (!*self->line)
136 printf(" :\n");
137 else
138 printf(" : %s\n", self->line);
139 }
140
141 return 0;
142}
143
144static struct rb_root root_sym_ext;
145
146static void insert_source_line(struct sym_ext *sym_ext)
147{
148 struct sym_ext *iter;
149 struct rb_node **p = &root_sym_ext.rb_node;
150 struct rb_node *parent = NULL;
151
152 while (*p != NULL) {
153 parent = *p;
154 iter = rb_entry(parent, struct sym_ext, node);
155
156 if (sym_ext->percent > iter->percent)
157 p = &(*p)->rb_left;
158 else
159 p = &(*p)->rb_right;
160 }
161
162 rb_link_node(&sym_ext->node, parent, p);
163 rb_insert_color(&sym_ext->node, &root_sym_ext);
164}
165
166static void free_source_line(struct hist_entry *he, int len)
167{
168 struct sym_priv *priv = symbol__priv(he->ms.sym);
169 struct sym_ext *sym_ext = priv->ext;
170 int i;
171
172 if (!sym_ext)
173 return;
174
175 for (i = 0; i < len; i++)
176 free(sym_ext[i].path);
177 free(sym_ext);
178
179 priv->ext = NULL;
180 root_sym_ext = RB_ROOT;
181}
182
183/* Get the filename:line for the colored entries */
184static void
185get_source_line(struct hist_entry *he, int len, const char *filename)
186{
187 struct symbol *sym = he->ms.sym;
188 u64 start;
189 int i;
190 char cmd[PATH_MAX * 2];
191 struct sym_ext *sym_ext;
192 struct sym_priv *priv = symbol__priv(sym);
193 struct sym_hist *h = priv->hist;
194
195 if (!h->sum)
196 return;
197
198 sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext));
199 if (!priv->ext)
200 return;
201
202 start = he->ms.map->unmap_ip(he->ms.map, sym->start);
203
204 for (i = 0; i < len; i++) {
205 char *path = NULL;
206 size_t line_len;
207 u64 offset;
208 FILE *fp;
209
210 sym_ext[i].percent = 100.0 * h->ip[i] / h->sum;
211 if (sym_ext[i].percent <= 0.5)
212 continue;
213
214 offset = start + i;
215 sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset);
216 fp = popen(cmd, "r");
217 if (!fp)
218 continue;
219
220 if (getline(&path, &line_len, fp) < 0 || !line_len)
221 goto next;
222
223 sym_ext[i].path = malloc(sizeof(char) * line_len + 1);
224 if (!sym_ext[i].path)
225 goto next;
226
227 strcpy(sym_ext[i].path, path);
228 insert_source_line(&sym_ext[i]);
229
230 next:
231 pclose(fp);
232 }
233}
234
235static void print_summary(const char *filename)
236{
237 struct sym_ext *sym_ext;
238 struct rb_node *node;
239
240 printf("\nSorted summary for file %s\n", filename);
241 printf("----------------------------------------------\n\n");
242
243 if (RB_EMPTY_ROOT(&root_sym_ext)) {
244 printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
245 return;
246 }
247
248 node = rb_first(&root_sym_ext);
249 while (node) {
250 double percent;
251 const char *color;
252 char *path;
253
254 sym_ext = rb_entry(node, struct sym_ext, node);
255 percent = sym_ext->percent;
256 color = get_percent_color(percent);
257 path = sym_ext->path;
258
259 color_fprintf(stdout, color, " %7.2f %s", percent, path);
260 node = rb_next(node);
261 }
262}
263
264static void hist_entry__print_hits(struct hist_entry *self)
265{
266 struct symbol *sym = self->ms.sym;
267 struct sym_priv *priv = symbol__priv(sym);
268 struct sym_hist *h = priv->hist;
269 u64 len = sym->end - sym->start, offset;
270
271 for (offset = 0; offset < len; ++offset)
272 if (h->ip[offset] != 0)
273 printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
274 sym->start + offset, h->ip[offset]);
275 printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum);
276}
277
278static int hist_entry__tty_annotate(struct hist_entry *he)
279{ 116{
280 struct map *map = he->ms.map; 117 return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx,
281 struct dso *dso = map->dso; 118 print_line, full_paths, 0, 0);
282 struct symbol *sym = he->ms.sym;
283 const char *filename = dso->long_name, *d_filename;
284 u64 len;
285 LIST_HEAD(head);
286 struct objdump_line *pos, *n;
287
288 if (hist_entry__annotate(he, &head, 0) < 0)
289 return -1;
290
291 if (full_paths)
292 d_filename = filename;
293 else
294 d_filename = basename(filename);
295
296 len = sym->end - sym->start;
297
298 if (print_line) {
299 get_source_line(he, len, filename);
300 print_summary(filename);
301 }
302
303 printf("\n\n------------------------------------------------\n");
304 printf(" Percent | Source code & Disassembly of %s\n", d_filename);
305 printf("------------------------------------------------\n");
306
307 if (verbose)
308 hist_entry__print_hits(he);
309
310 list_for_each_entry_safe(pos, n, &head, node) {
311 objdump_line__print(pos, &head, he, len);
312 list_del(&pos->node);
313 objdump_line__free(pos);
314 }
315
316 if (print_line)
317 free_source_line(he, len);
318
319 return 0;
320} 119}
321 120
322static void hists__find_annotations(struct hists *self) 121static void hists__find_annotations(struct hists *self, int evidx)
323{ 122{
324 struct rb_node *nd = rb_first(&self->entries), *next; 123 struct rb_node *nd = rb_first(&self->entries), *next;
325 int key = KEY_RIGHT; 124 int key = KEY_RIGHT;
326 125
327 while (nd) { 126 while (nd) {
328 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); 127 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
329 struct sym_priv *priv; 128 struct annotation *notes;
330 129
331 if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned) 130 if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
332 goto find_next; 131 goto find_next;
333 132
334 priv = symbol__priv(he->ms.sym); 133 notes = symbol__annotation(he->ms.sym);
335 if (priv->hist == NULL) { 134 if (notes->src == NULL) {
336find_next: 135find_next:
337 if (key == KEY_LEFT) 136 if (key == KEY_LEFT)
338 nd = rb_prev(nd); 137 nd = rb_prev(nd);
@@ -342,7 +141,7 @@ find_next:
342 } 141 }
343 142
344 if (use_browser > 0) { 143 if (use_browser > 0) {
345 key = hist_entry__tui_annotate(he); 144 key = hist_entry__tui_annotate(he, evidx);
346 switch (key) { 145 switch (key) {
347 case KEY_RIGHT: 146 case KEY_RIGHT:
348 next = rb_next(nd); 147 next = rb_next(nd);
@@ -357,24 +156,24 @@ find_next:
357 if (next != NULL) 156 if (next != NULL)
358 nd = next; 157 nd = next;
359 } else { 158 } else {
360 hist_entry__tty_annotate(he); 159 hist_entry__tty_annotate(he, evidx);
361 nd = rb_next(nd); 160 nd = rb_next(nd);
362 /* 161 /*
363 * Since we have a hist_entry per IP for the same 162 * Since we have a hist_entry per IP for the same
364 * symbol, free he->ms.sym->hist to signal we already 163 * symbol, free he->ms.sym->src to signal we already
365 * processed this symbol. 164 * processed this symbol.
366 */ 165 */
367 free(priv->hist); 166 free(notes->src);
368 priv->hist = NULL; 167 notes->src = NULL;
369 } 168 }
370 } 169 }
371} 170}
372 171
373static struct perf_event_ops event_ops = { 172static struct perf_event_ops event_ops = {
374 .sample = process_sample_event, 173 .sample = process_sample_event,
375 .mmap = event__process_mmap, 174 .mmap = perf_event__process_mmap,
376 .comm = event__process_comm, 175 .comm = perf_event__process_comm,
377 .fork = event__process_task, 176 .fork = perf_event__process_task,
378 .ordered_samples = true, 177 .ordered_samples = true,
379 .ordering_requires_timestamps = true, 178 .ordering_requires_timestamps = true,
380}; 179};
@@ -383,6 +182,8 @@ static int __cmd_annotate(void)
383{ 182{
384 int ret; 183 int ret;
385 struct perf_session *session; 184 struct perf_session *session;
185 struct perf_evsel *pos;
186 u64 total_nr_samples;
386 187
387 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); 188 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
388 if (session == NULL) 189 if (session == NULL)
@@ -403,12 +204,36 @@ static int __cmd_annotate(void)
403 if (verbose > 2) 204 if (verbose > 2)
404 perf_session__fprintf_dsos(session, stdout); 205 perf_session__fprintf_dsos(session, stdout);
405 206
406 hists__collapse_resort(&session->hists); 207 total_nr_samples = 0;
407 hists__output_resort(&session->hists); 208 list_for_each_entry(pos, &session->evlist->entries, node) {
408 hists__find_annotations(&session->hists); 209 struct hists *hists = &pos->hists;
409out_delete: 210 u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
410 perf_session__delete(session); 211
212 if (nr_samples > 0) {
213 total_nr_samples += nr_samples;
214 hists__collapse_resort(hists);
215 hists__output_resort(hists);
216 hists__find_annotations(hists, pos->idx);
217 }
218 }
411 219
220 if (total_nr_samples == 0) {
221 ui__warning("The %s file has no samples!\n", input_name);
222 goto out_delete;
223 }
224out_delete:
225 /*
226 * Speed up the exit process, for large files this can
227 * take quite a while.
228 *
229 * XXX Enable this when using valgrind or if we ever
230 * librarize this command.
231 *
232 * Also experiment with obstacks to see how much speed
233 * up we'll get here.
234 *
235 * perf_session__delete(session);
236 */
412 return ret; 237 return ret;
413} 238}
414 239
@@ -451,9 +276,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
451 else if (use_tui) 276 else if (use_tui)
452 use_browser = 1; 277 use_browser = 1;
453 278
454 setup_browser(); 279 setup_browser(true);
455 280
456 symbol_conf.priv_size = sizeof(struct sym_priv); 281 symbol_conf.priv_size = sizeof(struct annotation);
457 symbol_conf.try_vmlinux_path = true; 282 symbol_conf.try_vmlinux_path = true;
458 283
459 if (symbol__init() < 0) 284 if (symbol__init() < 0)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 3153e492dbcc..6b7d91160ecb 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -30,13 +30,13 @@ static int hists__add_entry(struct hists *self,
30 return -ENOMEM; 30 return -ENOMEM;
31} 31}
32 32
33static int diff__process_sample_event(event_t *event, 33static int diff__process_sample_event(union perf_event *event,
34 struct sample_data *sample, 34 struct perf_sample *sample,
35 struct perf_session *session) 35 struct perf_session *session)
36{ 36{
37 struct addr_location al; 37 struct addr_location al;
38 38
39 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 39 if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
40 pr_warning("problem processing %d event, skipping it.\n", 40 pr_warning("problem processing %d event, skipping it.\n",
41 event->header.type); 41 event->header.type);
42 return -1; 42 return -1;
@@ -56,11 +56,11 @@ static int diff__process_sample_event(event_t *event,
56 56
57static struct perf_event_ops event_ops = { 57static struct perf_event_ops event_ops = {
58 .sample = diff__process_sample_event, 58 .sample = diff__process_sample_event,
59 .mmap = event__process_mmap, 59 .mmap = perf_event__process_mmap,
60 .comm = event__process_comm, 60 .comm = perf_event__process_comm,
61 .exit = event__process_task, 61 .exit = perf_event__process_task,
62 .fork = event__process_task, 62 .fork = perf_event__process_task,
63 .lost = event__process_lost, 63 .lost = perf_event__process_lost,
64 .ordered_samples = true, 64 .ordered_samples = true,
65 .ordering_requires_timestamps = true, 65 .ordering_requires_timestamps = true,
66}; 66};
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 0c78ffa7bf67..e29f04ed3396 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,8 +16,8 @@
16static char const *input_name = "-"; 16static char const *input_name = "-";
17static bool inject_build_ids; 17static bool inject_build_ids;
18 18
19static int event__repipe_synth(event_t *event, 19static int perf_event__repipe_synth(union perf_event *event,
20 struct perf_session *session __used) 20 struct perf_session *session __used)
21{ 21{
22 uint32_t size; 22 uint32_t size;
23 void *buf = event; 23 void *buf = event;
@@ -36,41 +36,44 @@ static int event__repipe_synth(event_t *event,
36 return 0; 36 return 0;
37} 37}
38 38
39static int event__repipe(event_t *event, struct sample_data *sample __used, 39static int perf_event__repipe(union perf_event *event,
40 struct perf_session *session) 40 struct perf_sample *sample __used,
41 struct perf_session *session)
41{ 42{
42 return event__repipe_synth(event, session); 43 return perf_event__repipe_synth(event, session);
43} 44}
44 45
45static int event__repipe_mmap(event_t *self, struct sample_data *sample, 46static int perf_event__repipe_mmap(union perf_event *event,
46 struct perf_session *session) 47 struct perf_sample *sample,
48 struct perf_session *session)
47{ 49{
48 int err; 50 int err;
49 51
50 err = event__process_mmap(self, sample, session); 52 err = perf_event__process_mmap(event, sample, session);
51 event__repipe(self, sample, session); 53 perf_event__repipe(event, sample, session);
52 54
53 return err; 55 return err;
54} 56}
55 57
56static int event__repipe_task(event_t *self, struct sample_data *sample, 58static int perf_event__repipe_task(union perf_event *event,
57 struct perf_session *session) 59 struct perf_sample *sample,
60 struct perf_session *session)
58{ 61{
59 int err; 62 int err;
60 63
61 err = event__process_task(self, sample, session); 64 err = perf_event__process_task(event, sample, session);
62 event__repipe(self, sample, session); 65 perf_event__repipe(event, sample, session);
63 66
64 return err; 67 return err;
65} 68}
66 69
67static int event__repipe_tracing_data(event_t *self, 70static int perf_event__repipe_tracing_data(union perf_event *event,
68 struct perf_session *session) 71 struct perf_session *session)
69{ 72{
70 int err; 73 int err;
71 74
72 event__repipe_synth(self, session); 75 perf_event__repipe_synth(event, session);
73 err = event__process_tracing_data(self, session); 76 err = perf_event__process_tracing_data(event, session);
74 77
75 return err; 78 return err;
76} 79}
@@ -109,8 +112,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
109 if (self->kernel) 112 if (self->kernel)
110 misc = PERF_RECORD_MISC_KERNEL; 113 misc = PERF_RECORD_MISC_KERNEL;
111 114
112 err = event__synthesize_build_id(self, misc, event__repipe, 115 err = perf_event__synthesize_build_id(self, misc, perf_event__repipe,
113 machine, session); 116 machine, session);
114 if (err) { 117 if (err) {
115 pr_err("Can't synthesize build_id event for %s\n", self->long_name); 118 pr_err("Can't synthesize build_id event for %s\n", self->long_name);
116 return -1; 119 return -1;
@@ -119,8 +122,9 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
119 return 0; 122 return 0;
120} 123}
121 124
122static int event__inject_buildid(event_t *event, struct sample_data *sample, 125static int perf_event__inject_buildid(union perf_event *event,
123 struct perf_session *session) 126 struct perf_sample *sample,
127 struct perf_session *session)
124{ 128{
125 struct addr_location al; 129 struct addr_location al;
126 struct thread *thread; 130 struct thread *thread;
@@ -155,24 +159,24 @@ static int event__inject_buildid(event_t *event, struct sample_data *sample,
155 } 159 }
156 160
157repipe: 161repipe:
158 event__repipe(event, sample, session); 162 perf_event__repipe(event, sample, session);
159 return 0; 163 return 0;
160} 164}
161 165
162struct perf_event_ops inject_ops = { 166struct perf_event_ops inject_ops = {
163 .sample = event__repipe, 167 .sample = perf_event__repipe,
164 .mmap = event__repipe, 168 .mmap = perf_event__repipe,
165 .comm = event__repipe, 169 .comm = perf_event__repipe,
166 .fork = event__repipe, 170 .fork = perf_event__repipe,
167 .exit = event__repipe, 171 .exit = perf_event__repipe,
168 .lost = event__repipe, 172 .lost = perf_event__repipe,
169 .read = event__repipe, 173 .read = perf_event__repipe,
170 .throttle = event__repipe, 174 .throttle = perf_event__repipe,
171 .unthrottle = event__repipe, 175 .unthrottle = perf_event__repipe,
172 .attr = event__repipe_synth, 176 .attr = perf_event__repipe_synth,
173 .event_type = event__repipe_synth, 177 .event_type = perf_event__repipe_synth,
174 .tracing_data = event__repipe_synth, 178 .tracing_data = perf_event__repipe_synth,
175 .build_id = event__repipe_synth, 179 .build_id = perf_event__repipe_synth,
176}; 180};
177 181
178extern volatile int session_done; 182extern volatile int session_done;
@@ -190,10 +194,10 @@ static int __cmd_inject(void)
190 signal(SIGINT, sig_handler); 194 signal(SIGINT, sig_handler);
191 195
192 if (inject_build_ids) { 196 if (inject_build_ids) {
193 inject_ops.sample = event__inject_buildid; 197 inject_ops.sample = perf_event__inject_buildid;
194 inject_ops.mmap = event__repipe_mmap; 198 inject_ops.mmap = perf_event__repipe_mmap;
195 inject_ops.fork = event__repipe_task; 199 inject_ops.fork = perf_event__repipe_task;
196 inject_ops.tracing_data = event__repipe_tracing_data; 200 inject_ops.tracing_data = perf_event__repipe_tracing_data;
197 } 201 }
198 202
199 session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops); 203 session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index d97256d65980..7f618f4e7b79 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -275,9 +275,8 @@ static void process_free_event(void *data,
275 s_alloc->alloc_cpu = -1; 275 s_alloc->alloc_cpu = -1;
276} 276}
277 277
278static void 278static void process_raw_event(union perf_event *raw_event __used, void *data,
279process_raw_event(event_t *raw_event __used, void *data, 279 int cpu, u64 timestamp, struct thread *thread)
280 int cpu, u64 timestamp, struct thread *thread)
281{ 280{
282 struct event *event; 281 struct event *event;
283 int type; 282 int type;
@@ -304,7 +303,8 @@ process_raw_event(event_t *raw_event __used, void *data,
304 } 303 }
305} 304}
306 305
307static int process_sample_event(event_t *event, struct sample_data *sample, 306static int process_sample_event(union perf_event *event,
307 struct perf_sample *sample,
308 struct perf_session *session) 308 struct perf_session *session)
309{ 309{
310 struct thread *thread = perf_session__findnew(session, event->ip.pid); 310 struct thread *thread = perf_session__findnew(session, event->ip.pid);
@@ -325,7 +325,7 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
325 325
326static struct perf_event_ops event_ops = { 326static struct perf_event_ops event_ops = {
327 .sample = process_sample_event, 327 .sample = process_sample_event,
328 .comm = event__process_comm, 328 .comm = perf_event__process_comm,
329 .ordered_samples = true, 329 .ordered_samples = true,
330}; 330};
331 331
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index d88c6961274c..6313b6eb3ebb 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -5,6 +5,7 @@
5 * 5 *
6 * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de> 6 * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de>
7 * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 7 * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
8 */ 9 */
9#include "builtin.h" 10#include "builtin.h"
10 11
@@ -13,9 +14,47 @@
13#include "util/parse-events.h" 14#include "util/parse-events.h"
14#include "util/cache.h" 15#include "util/cache.h"
15 16
16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) 17int cmd_list(int argc, const char **argv, const char *prefix __used)
17{ 18{
18 setup_pager(); 19 setup_pager();
19 print_events(); 20
21 if (argc == 1)
22 print_events(NULL);
23 else {
24 int i;
25
26 for (i = 1; i < argc; ++i) {
27 if (i > 1)
28 putchar('\n');
29 if (strncmp(argv[i], "tracepoint", 10) == 0)
30 print_tracepoint_events(NULL, NULL);
31 else if (strcmp(argv[i], "hw") == 0 ||
32 strcmp(argv[i], "hardware") == 0)
33 print_events_type(PERF_TYPE_HARDWARE);
34 else if (strcmp(argv[i], "sw") == 0 ||
35 strcmp(argv[i], "software") == 0)
36 print_events_type(PERF_TYPE_SOFTWARE);
37 else if (strcmp(argv[i], "cache") == 0 ||
38 strcmp(argv[i], "hwcache") == 0)
39 print_hwcache_events(NULL);
40 else {
41 char *sep = strchr(argv[i], ':'), *s;
42 int sep_idx;
43
44 if (sep == NULL) {
45 print_events(argv[i]);
46 continue;
47 }
48 sep_idx = sep - argv[i];
49 s = strdup(argv[i]);
50 if (s == NULL)
51 return -1;
52
53 s[sep_idx] = '\0';
54 print_tracepoint_events(s, s + sep_idx + 1);
55 free(s);
56 }
57 }
58 }
20 return 0; 59 return 0;
21} 60}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 2b36defc5d73..2e93f99b1480 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -834,14 +834,14 @@ static void dump_info(void)
834 die("Unknown type of information\n"); 834 die("Unknown type of information\n");
835} 835}
836 836
837static int process_sample_event(event_t *self, struct sample_data *sample, 837static int process_sample_event(union perf_event *event, struct perf_sample *sample,
838 struct perf_session *s) 838 struct perf_session *s)
839{ 839{
840 struct thread *thread = perf_session__findnew(s, sample->tid); 840 struct thread *thread = perf_session__findnew(s, sample->tid);
841 841
842 if (thread == NULL) { 842 if (thread == NULL) {
843 pr_debug("problem processing %d event, skipping it.\n", 843 pr_debug("problem processing %d event, skipping it.\n",
844 self->header.type); 844 event->header.type);
845 return -1; 845 return -1;
846 } 846 }
847 847
@@ -852,7 +852,7 @@ static int process_sample_event(event_t *self, struct sample_data *sample,
852 852
853static struct perf_event_ops eops = { 853static struct perf_event_ops eops = {
854 .sample = process_sample_event, 854 .sample = process_sample_event,
855 .comm = event__process_comm, 855 .comm = perf_event__process_comm,
856 .ordered_samples = true, 856 .ordered_samples = true,
857}; 857};
858 858
@@ -893,7 +893,7 @@ static const char * const report_usage[] = {
893 893
894static const struct option report_options[] = { 894static const struct option report_options[] = {
895 OPT_STRING('k', "key", &sort_key, "acquired", 895 OPT_STRING('k', "key", &sort_key, "acquired",
896 "key for sorting"), 896 "key for sorting (acquired / contended / wait_total / wait_max / wait_min)"),
897 /* TODO: type */ 897 /* TODO: type */
898 OPT_END() 898 OPT_END()
899}; 899};
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index add163c9f0e7..2c0e64d0b4aa 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -36,6 +36,7 @@
36#include "builtin.h" 36#include "builtin.h"
37#include "util/util.h" 37#include "util/util.h"
38#include "util/strlist.h" 38#include "util/strlist.h"
39#include "util/strfilter.h"
39#include "util/symbol.h" 40#include "util/symbol.h"
40#include "util/debug.h" 41#include "util/debug.h"
41#include "util/debugfs.h" 42#include "util/debugfs.h"
@@ -43,6 +44,8 @@
43#include "util/probe-finder.h" 44#include "util/probe-finder.h"
44#include "util/probe-event.h" 45#include "util/probe-event.h"
45 46
47#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
48#define DEFAULT_FUNC_FILTER "!_*"
46#define MAX_PATH_LEN 256 49#define MAX_PATH_LEN 256
47 50
48/* Session management structure */ 51/* Session management structure */
@@ -52,6 +55,7 @@ static struct {
52 bool show_lines; 55 bool show_lines;
53 bool show_vars; 56 bool show_vars;
54 bool show_ext_vars; 57 bool show_ext_vars;
58 bool show_funcs;
55 bool mod_events; 59 bool mod_events;
56 int nevents; 60 int nevents;
57 struct perf_probe_event events[MAX_PROBES]; 61 struct perf_probe_event events[MAX_PROBES];
@@ -59,6 +63,7 @@ static struct {
59 struct line_range line_range; 63 struct line_range line_range;
60 const char *target_module; 64 const char *target_module;
61 int max_probe_points; 65 int max_probe_points;
66 struct strfilter *filter;
62} params; 67} params;
63 68
64/* Parse an event definition. Note that any error must die. */ 69/* Parse an event definition. Note that any error must die. */
@@ -157,6 +162,27 @@ static int opt_show_vars(const struct option *opt __used,
157} 162}
158#endif 163#endif
159 164
165static int opt_set_filter(const struct option *opt __used,
166 const char *str, int unset __used)
167{
168 const char *err;
169
170 if (str) {
171 pr_debug2("Set filter: %s\n", str);
172 if (params.filter)
173 strfilter__delete(params.filter);
174 params.filter = strfilter__new(str, &err);
175 if (!params.filter) {
176 pr_err("Filter parse error at %td.\n", err - str + 1);
177 pr_err("Source: \"%s\"\n", str);
178 pr_err(" %*c\n", (int)(err - str + 1), '^');
179 return -EINVAL;
180 }
181 }
182
183 return 0;
184}
185
160static const char * const probe_usage[] = { 186static const char * const probe_usage[] = {
161 "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]", 187 "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
162 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", 188 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
@@ -221,6 +247,13 @@ static const struct option options[] = {
221 OPT__DRY_RUN(&probe_event_dry_run), 247 OPT__DRY_RUN(&probe_event_dry_run),
222 OPT_INTEGER('\0', "max-probes", &params.max_probe_points, 248 OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
223 "Set how many probe points can be found for a probe."), 249 "Set how many probe points can be found for a probe."),
250 OPT_BOOLEAN('F', "funcs", &params.show_funcs,
251 "Show potential probe-able functions."),
252 OPT_CALLBACK('\0', "filter", NULL,
253 "[!]FILTER", "Set a filter (with --vars/funcs only)\n"
254 "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
255 "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)",
256 opt_set_filter),
224 OPT_END() 257 OPT_END()
225}; 258};
226 259
@@ -246,7 +279,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
246 params.max_probe_points = MAX_PROBES; 279 params.max_probe_points = MAX_PROBES;
247 280
248 if ((!params.nevents && !params.dellist && !params.list_events && 281 if ((!params.nevents && !params.dellist && !params.list_events &&
249 !params.show_lines)) 282 !params.show_lines && !params.show_funcs))
250 usage_with_options(probe_usage, options); 283 usage_with_options(probe_usage, options);
251 284
252 /* 285 /*
@@ -267,12 +300,41 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
267 pr_err(" Error: Don't use --list with --vars.\n"); 300 pr_err(" Error: Don't use --list with --vars.\n");
268 usage_with_options(probe_usage, options); 301 usage_with_options(probe_usage, options);
269 } 302 }
303 if (params.show_funcs) {
304 pr_err(" Error: Don't use --list with --funcs.\n");
305 usage_with_options(probe_usage, options);
306 }
270 ret = show_perf_probe_events(); 307 ret = show_perf_probe_events();
271 if (ret < 0) 308 if (ret < 0)
272 pr_err(" Error: Failed to show event list. (%d)\n", 309 pr_err(" Error: Failed to show event list. (%d)\n",
273 ret); 310 ret);
274 return ret; 311 return ret;
275 } 312 }
313 if (params.show_funcs) {
314 if (params.nevents != 0 || params.dellist) {
315 pr_err(" Error: Don't use --funcs with"
316 " --add/--del.\n");
317 usage_with_options(probe_usage, options);
318 }
319 if (params.show_lines) {
320 pr_err(" Error: Don't use --funcs with --line.\n");
321 usage_with_options(probe_usage, options);
322 }
323 if (params.show_vars) {
324 pr_err(" Error: Don't use --funcs with --vars.\n");
325 usage_with_options(probe_usage, options);
326 }
327 if (!params.filter)
328 params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
329 NULL);
330 ret = show_available_funcs(params.target_module,
331 params.filter);
332 strfilter__delete(params.filter);
333 if (ret < 0)
334 pr_err(" Error: Failed to show functions."
335 " (%d)\n", ret);
336 return ret;
337 }
276 338
277#ifdef DWARF_SUPPORT 339#ifdef DWARF_SUPPORT
278 if (params.show_lines) { 340 if (params.show_lines) {
@@ -297,10 +359,16 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
297 " --add/--del.\n"); 359 " --add/--del.\n");
298 usage_with_options(probe_usage, options); 360 usage_with_options(probe_usage, options);
299 } 361 }
362 if (!params.filter)
363 params.filter = strfilter__new(DEFAULT_VAR_FILTER,
364 NULL);
365
300 ret = show_available_vars(params.events, params.nevents, 366 ret = show_available_vars(params.events, params.nevents,
301 params.max_probe_points, 367 params.max_probe_points,
302 params.target_module, 368 params.target_module,
369 params.filter,
303 params.show_ext_vars); 370 params.show_ext_vars);
371 strfilter__delete(params.filter);
304 if (ret < 0) 372 if (ret < 0)
305 pr_err(" Error: Failed to show vars. (%d)\n", ret); 373 pr_err(" Error: Failed to show vars. (%d)\n", ret);
306 return ret; 374 return ret;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 60cac6f92e8b..6febcc168a8c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -18,11 +18,13 @@
18 18
19#include "util/header.h" 19#include "util/header.h"
20#include "util/event.h" 20#include "util/event.h"
21#include "util/evlist.h"
21#include "util/evsel.h" 22#include "util/evsel.h"
22#include "util/debug.h" 23#include "util/debug.h"
23#include "util/session.h" 24#include "util/session.h"
24#include "util/symbol.h" 25#include "util/symbol.h"
25#include "util/cpumap.h" 26#include "util/cpumap.h"
27#include "util/thread_map.h"
26 28
27#include <unistd.h> 29#include <unistd.h>
28#include <sched.h> 30#include <sched.h>
@@ -37,16 +39,14 @@ enum write_mode_t {
37 39
38static u64 user_interval = ULLONG_MAX; 40static u64 user_interval = ULLONG_MAX;
39static u64 default_interval = 0; 41static u64 default_interval = 0;
40static u64 sample_type;
41 42
42static struct cpu_map *cpus;
43static unsigned int page_size; 43static unsigned int page_size;
44static unsigned int mmap_pages = 128; 44static unsigned int mmap_pages = 128;
45static unsigned int user_freq = UINT_MAX; 45static unsigned int user_freq = UINT_MAX;
46static int freq = 1000; 46static int freq = 1000;
47static int output; 47static int output;
48static int pipe_output = 0; 48static int pipe_output = 0;
49static const char *output_name = "perf.data"; 49static const char *output_name = NULL;
50static int group = 0; 50static int group = 0;
51static int realtime_prio = 0; 51static int realtime_prio = 0;
52static bool nodelay = false; 52static bool nodelay = false;
@@ -55,7 +55,6 @@ static bool sample_id_all_avail = true;
55static bool system_wide = false; 55static bool system_wide = false;
56static pid_t target_pid = -1; 56static pid_t target_pid = -1;
57static pid_t target_tid = -1; 57static pid_t target_tid = -1;
58static struct thread_map *threads;
59static pid_t child_pid = -1; 58static pid_t child_pid = -1;
60static bool no_inherit = false; 59static bool no_inherit = false;
61static enum write_mode_t write_mode = WRITE_FORCE; 60static enum write_mode_t write_mode = WRITE_FORCE;
@@ -66,51 +65,17 @@ static bool sample_address = false;
66static bool sample_time = false; 65static bool sample_time = false;
67static bool no_buildid = false; 66static bool no_buildid = false;
68static bool no_buildid_cache = false; 67static bool no_buildid_cache = false;
68static struct perf_evlist *evsel_list;
69 69
70static long samples = 0; 70static long samples = 0;
71static u64 bytes_written = 0; 71static u64 bytes_written = 0;
72 72
73static struct pollfd *event_array;
74
75static int nr_poll = 0;
76static int nr_cpu = 0;
77
78static int file_new = 1; 73static int file_new = 1;
79static off_t post_processing_offset; 74static off_t post_processing_offset;
80 75
81static struct perf_session *session; 76static struct perf_session *session;
82static const char *cpu_list; 77static const char *cpu_list;
83 78
84struct mmap_data {
85 void *base;
86 unsigned int mask;
87 unsigned int prev;
88};
89
90static struct mmap_data mmap_array[MAX_NR_CPUS];
91
92static unsigned long mmap_read_head(struct mmap_data *md)
93{
94 struct perf_event_mmap_page *pc = md->base;
95 long head;
96
97 head = pc->data_head;
98 rmb();
99
100 return head;
101}
102
103static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
104{
105 struct perf_event_mmap_page *pc = md->base;
106
107 /*
108 * ensure all reads are done before we write the tail out.
109 */
110 /* mb(); */
111 pc->data_tail = tail;
112}
113
114static void advance_output(size_t size) 79static void advance_output(size_t size)
115{ 80{
116 bytes_written += size; 81 bytes_written += size;
@@ -131,42 +96,26 @@ static void write_output(void *buf, size_t size)
131 } 96 }
132} 97}
133 98
134static int process_synthesized_event(event_t *event, 99static int process_synthesized_event(union perf_event *event,
135 struct sample_data *sample __used, 100 struct perf_sample *sample __used,
136 struct perf_session *self __used) 101 struct perf_session *self __used)
137{ 102{
138 write_output(event, event->header.size); 103 write_output(event, event->header.size);
139 return 0; 104 return 0;
140} 105}
141 106
142static void mmap_read(struct mmap_data *md) 107static void mmap_read(struct perf_mmap *md)
143{ 108{
144 unsigned int head = mmap_read_head(md); 109 unsigned int head = perf_mmap__read_head(md);
145 unsigned int old = md->prev; 110 unsigned int old = md->prev;
146 unsigned char *data = md->base + page_size; 111 unsigned char *data = md->base + page_size;
147 unsigned long size; 112 unsigned long size;
148 void *buf; 113 void *buf;
149 int diff;
150 114
151 /* 115 if (old == head)
152 * If we're further behind than half the buffer, there's a chance 116 return;
153 * the writer will bite our tail and mess up the samples under us.
154 *
155 * If we somehow ended up ahead of the head, we got messed up.
156 *
157 * In either case, truncate and restart at head.
158 */
159 diff = head - old;
160 if (diff < 0) {
161 fprintf(stderr, "WARNING: failed to keep up with mmap data\n");
162 /*
163 * head points to a known good entry, start there.
164 */
165 old = head;
166 }
167 117
168 if (old != head) 118 samples++;
169 samples++;
170 119
171 size = head - old; 120 size = head - old;
172 121
@@ -185,7 +134,7 @@ static void mmap_read(struct mmap_data *md)
185 write_output(buf, size); 134 write_output(buf, size);
186 135
187 md->prev = old; 136 md->prev = old;
188 mmap_write_tail(md, old); 137 perf_mmap__write_tail(md, old);
189} 138}
190 139
191static volatile int done = 0; 140static volatile int done = 0;
@@ -209,53 +158,10 @@ static void sig_atexit(void)
209 kill(getpid(), signr); 158 kill(getpid(), signr);
210} 159}
211 160
212static int group_fd; 161static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
213
214static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
215{
216 struct perf_header_attr *h_attr;
217
218 if (nr < session->header.attrs) {
219 h_attr = session->header.attr[nr];
220 } else {
221 h_attr = perf_header_attr__new(a);
222 if (h_attr != NULL)
223 if (perf_header__add_attr(&session->header, h_attr) < 0) {
224 perf_header_attr__delete(h_attr);
225 h_attr = NULL;
226 }
227 }
228
229 return h_attr;
230}
231
232static void create_counter(struct perf_evsel *evsel, int cpu)
233{ 162{
234 char *filter = evsel->filter;
235 struct perf_event_attr *attr = &evsel->attr; 163 struct perf_event_attr *attr = &evsel->attr;
236 struct perf_header_attr *h_attr;
237 int track = !evsel->idx; /* only the first counter needs these */ 164 int track = !evsel->idx; /* only the first counter needs these */
238 int thread_index;
239 int ret;
240 struct {
241 u64 count;
242 u64 time_enabled;
243 u64 time_running;
244 u64 id;
245 } read_data;
246 /*
247 * Check if parse_single_tracepoint_event has already asked for
248 * PERF_SAMPLE_TIME.
249 *
250 * XXX this is kludgy but short term fix for problems introduced by
251 * eac23d1c that broke 'perf script' by having different sample_types
252 * when using multiple tracepoint events when we use a perf binary
253 * that tries to use sample_id_all on an older kernel.
254 *
255 * We need to move counter creation to perf_session, support
256 * different sample_types, etc.
257 */
258 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
259 165
260 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 166 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
261 PERF_FORMAT_TOTAL_TIME_RUNNING | 167 PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -263,7 +169,7 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
263 169
264 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; 170 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
265 171
266 if (nr_counters > 1) 172 if (evlist->nr_entries > 1)
267 attr->sample_type |= PERF_SAMPLE_ID; 173 attr->sample_type |= PERF_SAMPLE_ID;
268 174
269 /* 175 /*
@@ -315,19 +221,58 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
315 221
316 attr->mmap = track; 222 attr->mmap = track;
317 attr->comm = track; 223 attr->comm = track;
318 attr->inherit = !no_inherit; 224
319 if (target_pid == -1 && target_tid == -1 && !system_wide) { 225 if (target_pid == -1 && target_tid == -1 && !system_wide) {
320 attr->disabled = 1; 226 attr->disabled = 1;
321 attr->enable_on_exec = 1; 227 attr->enable_on_exec = 1;
322 } 228 }
323retry_sample_id: 229}
324 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
325 230
326 for (thread_index = 0; thread_index < threads->nr; thread_index++) { 231static bool perf_evlist__equal(struct perf_evlist *evlist,
327try_again: 232 struct perf_evlist *other)
328 FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0); 233{
234 struct perf_evsel *pos, *pair;
235
236 if (evlist->nr_entries != other->nr_entries)
237 return false;
238
239 pair = list_entry(other->entries.next, struct perf_evsel, node);
329 240
330 if (FD(evsel, nr_cpu, thread_index) < 0) { 241 list_for_each_entry(pos, &evlist->entries, node) {
242 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
243 return false;
244 pair = list_entry(pair->node.next, struct perf_evsel, node);
245 }
246
247 return true;
248}
249
250static void open_counters(struct perf_evlist *evlist)
251{
252 struct perf_evsel *pos;
253
254 list_for_each_entry(pos, &evlist->entries, node) {
255 struct perf_event_attr *attr = &pos->attr;
256 /*
257 * Check if parse_single_tracepoint_event has already asked for
258 * PERF_SAMPLE_TIME.
259 *
260 * XXX this is kludgy but short term fix for problems introduced by
261 * eac23d1c that broke 'perf script' by having different sample_types
262 * when using multiple tracepoint events when we use a perf binary
263 * that tries to use sample_id_all on an older kernel.
264 *
265 * We need to move counter creation to perf_session, support
266 * different sample_types, etc.
267 */
268 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
269
270 config_attr(pos, evlist);
271retry_sample_id:
272 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
273try_again:
274 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
275 !no_inherit) < 0) {
331 int err = errno; 276 int err = errno;
332 277
333 if (err == EPERM || err == EACCES) 278 if (err == EPERM || err == EACCES)
@@ -364,7 +309,7 @@ try_again:
364 } 309 }
365 printf("\n"); 310 printf("\n");
366 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", 311 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
367 FD(evsel, nr_cpu, thread_index), strerror(err)); 312 err, strerror(err));
368 313
369#if defined(__i386__) || defined(__x86_64__) 314#if defined(__i386__) || defined(__x86_64__)
370 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) 315 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
@@ -375,90 +320,28 @@ try_again:
375#endif 320#endif
376 321
377 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 322 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
378 exit(-1);
379 } 323 }
324 }
380 325
381 h_attr = get_header_attr(attr, evsel->idx); 326 if (perf_evlist__set_filters(evlist)) {
382 if (h_attr == NULL) 327 error("failed to set filter with %d (%s)\n", errno,
383 die("nomem\n"); 328 strerror(errno));
329 exit(-1);
330 }
384 331
385 if (!file_new) { 332 if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
386 if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { 333 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
387 fprintf(stderr, "incompatible append\n");
388 exit(-1);
389 }
390 }
391 334
392 if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) { 335 if (file_new)
393 perror("Unable to read perf file descriptor"); 336 session->evlist = evlist;
337 else {
338 if (!perf_evlist__equal(session->evlist, evlist)) {
339 fprintf(stderr, "incompatible append\n");
394 exit(-1); 340 exit(-1);
395 } 341 }
342 }
396 343
397 if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { 344 perf_session__update_sample_type(session);
398 pr_warning("Not enough memory to add id\n");
399 exit(-1);
400 }
401
402 assert(FD(evsel, nr_cpu, thread_index) >= 0);
403 fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK);
404
405 /*
406 * First counter acts as the group leader:
407 */
408 if (group && group_fd == -1)
409 group_fd = FD(evsel, nr_cpu, thread_index);
410
411 if (evsel->idx || thread_index) {
412 struct perf_evsel *first;
413 first = list_entry(evsel_list.next, struct perf_evsel, node);
414 ret = ioctl(FD(evsel, nr_cpu, thread_index),
415 PERF_EVENT_IOC_SET_OUTPUT,
416 FD(first, nr_cpu, 0));
417 if (ret) {
418 error("failed to set output: %d (%s)\n", errno,
419 strerror(errno));
420 exit(-1);
421 }
422 } else {
423 mmap_array[nr_cpu].prev = 0;
424 mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
425 mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
426 PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0);
427 if (mmap_array[nr_cpu].base == MAP_FAILED) {
428 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
429 exit(-1);
430 }
431
432 event_array[nr_poll].fd = FD(evsel, nr_cpu, thread_index);
433 event_array[nr_poll].events = POLLIN;
434 nr_poll++;
435 }
436
437 if (filter != NULL) {
438 ret = ioctl(FD(evsel, nr_cpu, thread_index),
439 PERF_EVENT_IOC_SET_FILTER, filter);
440 if (ret) {
441 error("failed to set filter with %d (%s)\n", errno,
442 strerror(errno));
443 exit(-1);
444 }
445 }
446 }
447
448 if (!sample_type)
449 sample_type = attr->sample_type;
450}
451
452static void open_counters(int cpu)
453{
454 struct perf_evsel *pos;
455
456 group_fd = -1;
457
458 list_for_each_entry(pos, &evsel_list, node)
459 create_counter(pos, cpu);
460
461 nr_cpu++;
462} 345}
463 346
464static int process_buildids(void) 347static int process_buildids(void)
@@ -481,14 +364,14 @@ static void atexit_header(void)
481 364
482 if (!no_buildid) 365 if (!no_buildid)
483 process_buildids(); 366 process_buildids();
484 perf_header__write(&session->header, output, true); 367 perf_session__write_header(session, evsel_list, output, true);
485 perf_session__delete(session); 368 perf_session__delete(session);
486 perf_evsel_list__delete(); 369 perf_evlist__delete(evsel_list);
487 symbol__exit(); 370 symbol__exit();
488 } 371 }
489} 372}
490 373
491static void event__synthesize_guest_os(struct machine *machine, void *data) 374static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
492{ 375{
493 int err; 376 int err;
494 struct perf_session *psession = data; 377 struct perf_session *psession = data;
@@ -504,8 +387,8 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
504 *method is used to avoid symbol missing when the first addr is 387 *method is used to avoid symbol missing when the first addr is
505 *in module instead of in guest kernel. 388 *in module instead of in guest kernel.
506 */ 389 */
507 err = event__synthesize_modules(process_synthesized_event, 390 err = perf_event__synthesize_modules(process_synthesized_event,
508 psession, machine); 391 psession, machine);
509 if (err < 0) 392 if (err < 0)
510 pr_err("Couldn't record guest kernel [%d]'s reference" 393 pr_err("Couldn't record guest kernel [%d]'s reference"
511 " relocation symbol.\n", machine->pid); 394 " relocation symbol.\n", machine->pid);
@@ -514,11 +397,12 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
514 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 397 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
515 * have no _text sometimes. 398 * have no _text sometimes.
516 */ 399 */
517 err = event__synthesize_kernel_mmap(process_synthesized_event, 400 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
518 psession, machine, "_text"); 401 psession, machine, "_text");
519 if (err < 0) 402 if (err < 0)
520 err = event__synthesize_kernel_mmap(process_synthesized_event, 403 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
521 psession, machine, "_stext"); 404 psession, machine,
405 "_stext");
522 if (err < 0) 406 if (err < 0)
523 pr_err("Couldn't record guest kernel [%d]'s reference" 407 pr_err("Couldn't record guest kernel [%d]'s reference"
524 " relocation symbol.\n", machine->pid); 408 " relocation symbol.\n", machine->pid);
@@ -533,9 +417,9 @@ static void mmap_read_all(void)
533{ 417{
534 int i; 418 int i;
535 419
536 for (i = 0; i < nr_cpu; i++) { 420 for (i = 0; i < evsel_list->cpus->nr; i++) {
537 if (mmap_array[i].base) 421 if (evsel_list->mmap[i].base)
538 mmap_read(&mmap_array[i]); 422 mmap_read(&evsel_list->mmap[i]);
539 } 423 }
540 424
541 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO)) 425 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
@@ -566,18 +450,26 @@ static int __cmd_record(int argc, const char **argv)
566 exit(-1); 450 exit(-1);
567 } 451 }
568 452
569 if (!strcmp(output_name, "-")) 453 if (!output_name) {
570 pipe_output = 1; 454 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
571 else if (!stat(output_name, &st) && st.st_size) { 455 pipe_output = 1;
572 if (write_mode == WRITE_FORCE) { 456 else
573 char oldname[PATH_MAX]; 457 output_name = "perf.data";
574 snprintf(oldname, sizeof(oldname), "%s.old", 458 }
575 output_name); 459 if (output_name) {
576 unlink(oldname); 460 if (!strcmp(output_name, "-"))
577 rename(output_name, oldname); 461 pipe_output = 1;
462 else if (!stat(output_name, &st) && st.st_size) {
463 if (write_mode == WRITE_FORCE) {
464 char oldname[PATH_MAX];
465 snprintf(oldname, sizeof(oldname), "%s.old",
466 output_name);
467 unlink(oldname);
468 rename(output_name, oldname);
469 }
470 } else if (write_mode == WRITE_APPEND) {
471 write_mode = WRITE_FORCE;
578 } 472 }
579 } else if (write_mode == WRITE_APPEND) {
580 write_mode = WRITE_FORCE;
581 } 473 }
582 474
583 flags = O_CREAT|O_RDWR; 475 flags = O_CREAT|O_RDWR;
@@ -606,19 +498,14 @@ static int __cmd_record(int argc, const char **argv)
606 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 498 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
607 499
608 if (!file_new) { 500 if (!file_new) {
609 err = perf_header__read(session, output); 501 err = perf_session__read_header(session, output);
610 if (err < 0) 502 if (err < 0)
611 goto out_delete_session; 503 goto out_delete_session;
612 } 504 }
613 505
614 if (have_tracepoints(&evsel_list)) 506 if (have_tracepoints(&evsel_list->entries))
615 perf_header__set_feat(&session->header, HEADER_TRACE_INFO); 507 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
616 508
617 /*
618 * perf_session__delete(session) will be called at atexit_header()
619 */
620 atexit(atexit_header);
621
622 if (forks) { 509 if (forks) {
623 child_pid = fork(); 510 child_pid = fork();
624 if (child_pid < 0) { 511 if (child_pid < 0) {
@@ -659,7 +546,7 @@ static int __cmd_record(int argc, const char **argv)
659 } 546 }
660 547
661 if (!system_wide && target_tid == -1 && target_pid == -1) 548 if (!system_wide && target_tid == -1 && target_pid == -1)
662 threads->map[0] = child_pid; 549 evsel_list->threads->map[0] = child_pid;
663 550
664 close(child_ready_pipe[1]); 551 close(child_ready_pipe[1]);
665 close(go_pipe[0]); 552 close(go_pipe[0]);
@@ -673,46 +560,42 @@ static int __cmd_record(int argc, const char **argv)
673 close(child_ready_pipe[0]); 560 close(child_ready_pipe[0]);
674 } 561 }
675 562
676 if (!system_wide && no_inherit && !cpu_list) { 563 open_counters(evsel_list);
677 open_counters(-1);
678 } else {
679 for (i = 0; i < cpus->nr; i++)
680 open_counters(cpus->map[i]);
681 }
682 564
683 perf_session__set_sample_type(session, sample_type); 565 /*
566 * perf_session__delete(session) will be called at atexit_header()
567 */
568 atexit(atexit_header);
684 569
685 if (pipe_output) { 570 if (pipe_output) {
686 err = perf_header__write_pipe(output); 571 err = perf_header__write_pipe(output);
687 if (err < 0) 572 if (err < 0)
688 return err; 573 return err;
689 } else if (file_new) { 574 } else if (file_new) {
690 err = perf_header__write(&session->header, output, false); 575 err = perf_session__write_header(session, evsel_list,
576 output, false);
691 if (err < 0) 577 if (err < 0)
692 return err; 578 return err;
693 } 579 }
694 580
695 post_processing_offset = lseek(output, 0, SEEK_CUR); 581 post_processing_offset = lseek(output, 0, SEEK_CUR);
696 582
697 perf_session__set_sample_id_all(session, sample_id_all_avail);
698
699 if (pipe_output) { 583 if (pipe_output) {
700 err = event__synthesize_attrs(&session->header, 584 err = perf_session__synthesize_attrs(session,
701 process_synthesized_event, 585 process_synthesized_event);
702 session);
703 if (err < 0) { 586 if (err < 0) {
704 pr_err("Couldn't synthesize attrs.\n"); 587 pr_err("Couldn't synthesize attrs.\n");
705 return err; 588 return err;
706 } 589 }
707 590
708 err = event__synthesize_event_types(process_synthesized_event, 591 err = perf_event__synthesize_event_types(process_synthesized_event,
709 session); 592 session);
710 if (err < 0) { 593 if (err < 0) {
711 pr_err("Couldn't synthesize event_types.\n"); 594 pr_err("Couldn't synthesize event_types.\n");
712 return err; 595 return err;
713 } 596 }
714 597
715 if (have_tracepoints(&evsel_list)) { 598 if (have_tracepoints(&evsel_list->entries)) {
716 /* 599 /*
717 * FIXME err <= 0 here actually means that 600 * FIXME err <= 0 here actually means that
718 * there were no tracepoints so its not really 601 * there were no tracepoints so its not really
@@ -721,9 +604,9 @@ static int __cmd_record(int argc, const char **argv)
721 * return this more properly and also 604 * return this more properly and also
722 * propagate errors that now are calling die() 605 * propagate errors that now are calling die()
723 */ 606 */
724 err = event__synthesize_tracing_data(output, &evsel_list, 607 err = perf_event__synthesize_tracing_data(output, evsel_list,
725 process_synthesized_event, 608 process_synthesized_event,
726 session); 609 session);
727 if (err <= 0) { 610 if (err <= 0) {
728 pr_err("Couldn't record tracing data.\n"); 611 pr_err("Couldn't record tracing data.\n");
729 return err; 612 return err;
@@ -738,31 +621,34 @@ static int __cmd_record(int argc, const char **argv)
738 return -1; 621 return -1;
739 } 622 }
740 623
741 err = event__synthesize_kernel_mmap(process_synthesized_event, 624 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
742 session, machine, "_text"); 625 session, machine, "_text");
743 if (err < 0) 626 if (err < 0)
744 err = event__synthesize_kernel_mmap(process_synthesized_event, 627 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
745 session, machine, "_stext"); 628 session, machine, "_stext");
746 if (err < 0) 629 if (err < 0)
747 pr_err("Couldn't record kernel reference relocation symbol\n" 630 pr_err("Couldn't record kernel reference relocation symbol\n"
748 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 631 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
749 "Check /proc/kallsyms permission or run as root.\n"); 632 "Check /proc/kallsyms permission or run as root.\n");
750 633
751 err = event__synthesize_modules(process_synthesized_event, 634 err = perf_event__synthesize_modules(process_synthesized_event,
752 session, machine); 635 session, machine);
753 if (err < 0) 636 if (err < 0)
754 pr_err("Couldn't record kernel module information.\n" 637 pr_err("Couldn't record kernel module information.\n"
755 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 638 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
756 "Check /proc/modules permission or run as root.\n"); 639 "Check /proc/modules permission or run as root.\n");
757 640
758 if (perf_guest) 641 if (perf_guest)
759 perf_session__process_machines(session, event__synthesize_guest_os); 642 perf_session__process_machines(session,
643 perf_event__synthesize_guest_os);
760 644
761 if (!system_wide) 645 if (!system_wide)
762 event__synthesize_thread_map(threads, process_synthesized_event, 646 perf_event__synthesize_thread_map(evsel_list->threads,
763 session); 647 process_synthesized_event,
648 session);
764 else 649 else
765 event__synthesize_threads(process_synthesized_event, session); 650 perf_event__synthesize_threads(process_synthesized_event,
651 session);
766 652
767 if (realtime_prio) { 653 if (realtime_prio) {
768 struct sched_param param; 654 struct sched_param param;
@@ -789,17 +675,17 @@ static int __cmd_record(int argc, const char **argv)
789 if (hits == samples) { 675 if (hits == samples) {
790 if (done) 676 if (done)
791 break; 677 break;
792 err = poll(event_array, nr_poll, -1); 678 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
793 waking++; 679 waking++;
794 } 680 }
795 681
796 if (done) { 682 if (done) {
797 for (i = 0; i < nr_cpu; i++) { 683 for (i = 0; i < evsel_list->cpus->nr; i++) {
798 struct perf_evsel *pos; 684 struct perf_evsel *pos;
799 685
800 list_for_each_entry(pos, &evsel_list, node) { 686 list_for_each_entry(pos, &evsel_list->entries, node) {
801 for (thread = 0; 687 for (thread = 0;
802 thread < threads->nr; 688 thread < evsel_list->threads->nr;
803 thread++) 689 thread++)
804 ioctl(FD(pos, i, thread), 690 ioctl(FD(pos, i, thread),
805 PERF_EVENT_IOC_DISABLE); 691 PERF_EVENT_IOC_DISABLE);
@@ -838,10 +724,10 @@ static const char * const record_usage[] = {
838static bool force, append_file; 724static bool force, append_file;
839 725
840const struct option record_options[] = { 726const struct option record_options[] = {
841 OPT_CALLBACK('e', "event", NULL, "event", 727 OPT_CALLBACK('e', "event", &evsel_list, "event",
842 "event selector. use 'perf list' to list available events", 728 "event selector. use 'perf list' to list available events",
843 parse_events), 729 parse_events),
844 OPT_CALLBACK(0, "filter", NULL, "filter", 730 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
845 "event filter", parse_filter), 731 "event filter", parse_filter),
846 OPT_INTEGER('p', "pid", &target_pid, 732 OPT_INTEGER('p', "pid", &target_pid,
847 "record events on existing process id"), 733 "record events on existing process id"),
@@ -884,6 +770,9 @@ const struct option record_options[] = {
884 "do not update the buildid cache"), 770 "do not update the buildid cache"),
885 OPT_BOOLEAN('B', "no-buildid", &no_buildid, 771 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
886 "do not collect buildids in perf.data"), 772 "do not collect buildids in perf.data"),
773 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
774 "monitor event in cgroup name only",
775 parse_cgroups),
887 OPT_END() 776 OPT_END()
888}; 777};
889 778
@@ -892,6 +781,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
892 int err = -ENOMEM; 781 int err = -ENOMEM;
893 struct perf_evsel *pos; 782 struct perf_evsel *pos;
894 783
784 evsel_list = perf_evlist__new(NULL, NULL);
785 if (evsel_list == NULL)
786 return -ENOMEM;
787
895 argc = parse_options(argc, argv, record_options, record_usage, 788 argc = parse_options(argc, argv, record_options, record_usage,
896 PARSE_OPT_STOP_AT_NON_OPTION); 789 PARSE_OPT_STOP_AT_NON_OPTION);
897 if (!argc && target_pid == -1 && target_tid == -1 && 790 if (!argc && target_pid == -1 && target_tid == -1 &&
@@ -908,12 +801,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
908 write_mode = WRITE_FORCE; 801 write_mode = WRITE_FORCE;
909 } 802 }
910 803
804 if (nr_cgroups && !system_wide) {
805 fprintf(stderr, "cgroup monitoring only available in"
806 " system-wide mode\n");
807 usage_with_options(record_usage, record_options);
808 }
809
911 symbol__init(); 810 symbol__init();
912 811
913 if (no_buildid_cache || no_buildid) 812 if (no_buildid_cache || no_buildid)
914 disable_buildid_cache(); 813 disable_buildid_cache();
915 814
916 if (list_empty(&evsel_list) && perf_evsel_list__create_default() < 0) { 815 if (evsel_list->nr_entries == 0 &&
816 perf_evlist__add_default(evsel_list) < 0) {
917 pr_err("Not enough memory for event selector list\n"); 817 pr_err("Not enough memory for event selector list\n");
918 goto out_symbol_exit; 818 goto out_symbol_exit;
919 } 819 }
@@ -921,27 +821,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
921 if (target_pid != -1) 821 if (target_pid != -1)
922 target_tid = target_pid; 822 target_tid = target_pid;
923 823
924 threads = thread_map__new(target_pid, target_tid); 824 if (perf_evlist__create_maps(evsel_list, target_pid,
925 if (threads == NULL) { 825 target_tid, cpu_list) < 0)
926 pr_err("Problems finding threads of monitor\n");
927 usage_with_options(record_usage, record_options); 826 usage_with_options(record_usage, record_options);
928 }
929 827
930 cpus = cpu_map__new(cpu_list); 828 list_for_each_entry(pos, &evsel_list->entries, node) {
931 if (cpus == NULL) { 829 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
932 perror("failed to parse CPUs map"); 830 evsel_list->threads->nr) < 0)
933 return -1;
934 }
935
936 list_for_each_entry(pos, &evsel_list, node) {
937 if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
938 goto out_free_fd; 831 goto out_free_fd;
939 if (perf_header__push_event(pos->attr.config, event_name(pos))) 832 if (perf_header__push_event(pos->attr.config, event_name(pos)))
940 goto out_free_fd; 833 goto out_free_fd;
941 } 834 }
942 event_array = malloc((sizeof(struct pollfd) * MAX_NR_CPUS * 835
943 MAX_COUNTERS * threads->nr)); 836 if (perf_evlist__alloc_pollfd(evsel_list) < 0)
944 if (!event_array)
945 goto out_free_fd; 837 goto out_free_fd;
946 838
947 if (user_interval != ULLONG_MAX) 839 if (user_interval != ULLONG_MAX)
@@ -959,16 +851,12 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
959 } else { 851 } else {
960 fprintf(stderr, "frequency and count are zero, aborting\n"); 852 fprintf(stderr, "frequency and count are zero, aborting\n");
961 err = -EINVAL; 853 err = -EINVAL;
962 goto out_free_event_array; 854 goto out_free_fd;
963 } 855 }
964 856
965 err = __cmd_record(argc, argv); 857 err = __cmd_record(argc, argv);
966
967out_free_event_array:
968 free(event_array);
969out_free_fd: 858out_free_fd:
970 thread_map__delete(threads); 859 perf_evlist__delete_maps(evsel_list);
971 threads = NULL;
972out_symbol_exit: 860out_symbol_exit:
973 symbol__exit(); 861 symbol__exit();
974 return err; 862 return err;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c27e31f289e6..b1b82009ab9b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -9,6 +9,7 @@
9 9
10#include "util/util.h" 10#include "util/util.h"
11 11
12#include "util/annotate.h"
12#include "util/color.h" 13#include "util/color.h"
13#include <linux/list.h> 14#include <linux/list.h>
14#include "util/cache.h" 15#include "util/cache.h"
@@ -20,6 +21,8 @@
20 21
21#include "perf.h" 22#include "perf.h"
22#include "util/debug.h" 23#include "util/debug.h"
24#include "util/evlist.h"
25#include "util/evsel.h"
23#include "util/header.h" 26#include "util/header.h"
24#include "util/session.h" 27#include "util/session.h"
25 28
@@ -43,120 +46,79 @@ static const char default_pretty_printing_style[] = "normal";
43static const char *pretty_printing_style = default_pretty_printing_style; 46static const char *pretty_printing_style = default_pretty_printing_style;
44 47
45static char callchain_default_opt[] = "fractal,0.5"; 48static char callchain_default_opt[] = "fractal,0.5";
49static symbol_filter_t annotate_init;
46 50
47static struct hists *perf_session__hists_findnew(struct perf_session *self, 51static int perf_session__add_hist_entry(struct perf_session *session,
48 u64 event_stream, u32 type,
49 u64 config)
50{
51 struct rb_node **p = &self->hists_tree.rb_node;
52 struct rb_node *parent = NULL;
53 struct hists *iter, *new;
54
55 while (*p != NULL) {
56 parent = *p;
57 iter = rb_entry(parent, struct hists, rb_node);
58 if (iter->config == config)
59 return iter;
60
61
62 if (config > iter->config)
63 p = &(*p)->rb_right;
64 else
65 p = &(*p)->rb_left;
66 }
67
68 new = malloc(sizeof(struct hists));
69 if (new == NULL)
70 return NULL;
71 memset(new, 0, sizeof(struct hists));
72 new->event_stream = event_stream;
73 new->config = config;
74 new->type = type;
75 rb_link_node(&new->rb_node, parent, p);
76 rb_insert_color(&new->rb_node, &self->hists_tree);
77 return new;
78}
79
80static int perf_session__add_hist_entry(struct perf_session *self,
81 struct addr_location *al, 52 struct addr_location *al,
82 struct sample_data *data) 53 struct perf_sample *sample)
83{ 54{
84 struct map_symbol *syms = NULL;
85 struct symbol *parent = NULL; 55 struct symbol *parent = NULL;
86 int err = -ENOMEM; 56 int err = 0;
87 struct hist_entry *he; 57 struct hist_entry *he;
88 struct hists *hists; 58 struct perf_evsel *evsel;
89 struct perf_event_attr *attr; 59
90 60 if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
91 if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) { 61 err = perf_session__resolve_callchain(session, al->thread,
92 syms = perf_session__resolve_callchain(self, al->thread, 62 sample->callchain, &parent);
93 data->callchain, &parent); 63 if (err)
94 if (syms == NULL) 64 return err;
95 return -ENOMEM;
96 } 65 }
97 66
98 attr = perf_header__find_attr(data->id, &self->header); 67 evsel = perf_evlist__id2evsel(session->evlist, sample->id);
99 if (attr) 68 if (evsel == NULL) {
100 hists = perf_session__hists_findnew(self, data->id, attr->type, attr->config); 69 /*
101 else 70 * FIXME: Propagate this back, but at least we're in a builtin,
102 hists = perf_session__hists_findnew(self, data->id, 0, 0); 71 * where exit() is allowed. ;-)
103 if (hists == NULL) 72 */
104 goto out_free_syms; 73 ui__warning("Invalid %s file, contains samples with id %" PRIu64 " not in "
105 he = __hists__add_entry(hists, al, parent, data->period); 74 "its header!\n", input_name, sample->id);
75 exit_browser(0);
76 exit(1);
77 }
78
79 he = __hists__add_entry(&evsel->hists, al, parent, sample->period);
106 if (he == NULL) 80 if (he == NULL)
107 goto out_free_syms; 81 return -ENOMEM;
108 err = 0; 82
109 if (symbol_conf.use_callchain) { 83 if (symbol_conf.use_callchain) {
110 err = callchain_append(he->callchain, data->callchain, syms, 84 err = callchain_append(he->callchain, &session->callchain_cursor,
111 data->period); 85 sample->period);
112 if (err) 86 if (err)
113 goto out_free_syms; 87 return err;
114 } 88 }
115 /* 89 /*
116 * Only in the newt browser we are doing integrated annotation, 90 * Only in the newt browser we are doing integrated annotation,
117 * so we don't allocated the extra space needed because the stdio 91 * so we don't allocated the extra space needed because the stdio
118 * code will not use it. 92 * code will not use it.
119 */ 93 */
120 if (use_browser > 0) 94 if (al->sym != NULL && use_browser > 0) {
121 err = hist_entry__inc_addr_samples(he, al->addr); 95 struct annotation *notes = symbol__annotation(he->ms.sym);
122out_free_syms:
123 free(syms);
124 return err;
125}
126 96
127static int add_event_total(struct perf_session *session, 97 assert(evsel != NULL);
128 struct sample_data *data,
129 struct perf_event_attr *attr)
130{
131 struct hists *hists;
132 98
133 if (attr) 99 err = -ENOMEM;
134 hists = perf_session__hists_findnew(session, data->id, 100 if (notes->src == NULL &&
135 attr->type, attr->config); 101 symbol__alloc_hist(he->ms.sym, session->evlist->nr_entries) < 0)
136 else 102 goto out;
137 hists = perf_session__hists_findnew(session, data->id, 0, 0);
138 103
139 if (!hists) 104 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
140 return -ENOMEM; 105 }
141 106
142 hists->stats.total_period += data->period; 107 evsel->hists.stats.total_period += sample->period;
143 /* 108 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
144 * FIXME: add_event_total should be moved from here to 109out:
145 * perf_session__process_event so that the proper hist is passed to 110 return err;
146 * the event_op methods.
147 */
148 hists__inc_nr_events(hists, PERF_RECORD_SAMPLE);
149 session->hists.stats.total_period += data->period;
150 return 0;
151} 111}
152 112
153static int process_sample_event(event_t *event, struct sample_data *sample, 113
114static int process_sample_event(union perf_event *event,
115 struct perf_sample *sample,
154 struct perf_session *session) 116 struct perf_session *session)
155{ 117{
156 struct addr_location al; 118 struct addr_location al;
157 struct perf_event_attr *attr;
158 119
159 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 120 if (perf_event__preprocess_sample(event, session, &al, sample,
121 annotate_init) < 0) {
160 fprintf(stderr, "problem processing %d event, skipping it.\n", 122 fprintf(stderr, "problem processing %d event, skipping it.\n",
161 event->header.type); 123 event->header.type);
162 return -1; 124 return -1;
@@ -170,26 +132,17 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
170 return -1; 132 return -1;
171 } 133 }
172 134
173 attr = perf_header__find_attr(sample->id, &session->header);
174
175 if (add_event_total(session, sample, attr)) {
176 pr_debug("problem adding event period\n");
177 return -1;
178 }
179
180 return 0; 135 return 0;
181} 136}
182 137
183static int process_read_event(event_t *event, struct sample_data *sample __used, 138static int process_read_event(union perf_event *event,
184 struct perf_session *session __used) 139 struct perf_sample *sample __used,
140 struct perf_session *session)
185{ 141{
186 struct perf_event_attr *attr; 142 struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist,
187 143 event->read.id);
188 attr = perf_header__find_attr(event->read.id, &session->header);
189
190 if (show_threads) { 144 if (show_threads) {
191 const char *name = attr ? __event_name(attr->type, attr->config) 145 const char *name = evsel ? event_name(evsel) : "unknown";
192 : "unknown";
193 perf_read_values_add_value(&show_threads_values, 146 perf_read_values_add_value(&show_threads_values,
194 event->read.pid, event->read.tid, 147 event->read.pid, event->read.tid,
195 event->read.id, 148 event->read.id,
@@ -198,7 +151,7 @@ static int process_read_event(event_t *event, struct sample_data *sample __used,
198 } 151 }
199 152
200 dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, 153 dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid,
201 attr ? __event_name(attr->type, attr->config) : "FAIL", 154 evsel ? event_name(evsel) : "FAIL",
202 event->read.value); 155 event->read.value);
203 156
204 return 0; 157 return 0;
@@ -222,7 +175,7 @@ static int perf_session__setup_sample_type(struct perf_session *self)
222 } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE && 175 } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
223 !symbol_conf.use_callchain) { 176 !symbol_conf.use_callchain) {
224 symbol_conf.use_callchain = true; 177 symbol_conf.use_callchain = true;
225 if (register_callchain_param(&callchain_param) < 0) { 178 if (callchain_register_param(&callchain_param) < 0) {
226 fprintf(stderr, "Can't register callchain" 179 fprintf(stderr, "Can't register callchain"
227 " params\n"); 180 " params\n");
228 return -EINVAL; 181 return -EINVAL;
@@ -233,17 +186,17 @@ static int perf_session__setup_sample_type(struct perf_session *self)
233} 186}
234 187
235static struct perf_event_ops event_ops = { 188static struct perf_event_ops event_ops = {
236 .sample = process_sample_event, 189 .sample = process_sample_event,
237 .mmap = event__process_mmap, 190 .mmap = perf_event__process_mmap,
238 .comm = event__process_comm, 191 .comm = perf_event__process_comm,
239 .exit = event__process_task, 192 .exit = perf_event__process_task,
240 .fork = event__process_task, 193 .fork = perf_event__process_task,
241 .lost = event__process_lost, 194 .lost = perf_event__process_lost,
242 .read = process_read_event, 195 .read = process_read_event,
243 .attr = event__process_attr, 196 .attr = perf_event__process_attr,
244 .event_type = event__process_event_type, 197 .event_type = perf_event__process_event_type,
245 .tracing_data = event__process_tracing_data, 198 .tracing_data = perf_event__process_tracing_data,
246 .build_id = event__process_build_id, 199 .build_id = perf_event__process_build_id,
247 .ordered_samples = true, 200 .ordered_samples = true,
248 .ordering_requires_timestamps = true, 201 .ordering_requires_timestamps = true,
249}; 202};
@@ -269,21 +222,21 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
269 return ret + fprintf(fp, "\n#\n"); 222 return ret + fprintf(fp, "\n#\n");
270} 223}
271 224
272static int hists__tty_browse_tree(struct rb_root *tree, const char *help) 225static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
226 const char *help)
273{ 227{
274 struct rb_node *next = rb_first(tree); 228 struct perf_evsel *pos;
275 229
276 while (next) { 230 list_for_each_entry(pos, &evlist->entries, node) {
277 struct hists *hists = rb_entry(next, struct hists, rb_node); 231 struct hists *hists = &pos->hists;
278 const char *evname = NULL; 232 const char *evname = NULL;
279 233
280 if (rb_first(&hists->entries) != rb_last(&hists->entries)) 234 if (rb_first(&hists->entries) != rb_last(&hists->entries))
281 evname = __event_name(hists->type, hists->config); 235 evname = event_name(pos);
282 236
283 hists__fprintf_nr_sample_events(hists, evname, stdout); 237 hists__fprintf_nr_sample_events(hists, evname, stdout);
284 hists__fprintf(hists, NULL, false, stdout); 238 hists__fprintf(hists, NULL, false, stdout);
285 fprintf(stdout, "\n\n"); 239 fprintf(stdout, "\n\n");
286 next = rb_next(&hists->rb_node);
287 } 240 }
288 241
289 if (sort_order == default_sort_order && 242 if (sort_order == default_sort_order &&
@@ -304,8 +257,9 @@ static int hists__tty_browse_tree(struct rb_root *tree, const char *help)
304static int __cmd_report(void) 257static int __cmd_report(void)
305{ 258{
306 int ret = -EINVAL; 259 int ret = -EINVAL;
260 u64 nr_samples;
307 struct perf_session *session; 261 struct perf_session *session;
308 struct rb_node *next; 262 struct perf_evsel *pos;
309 const char *help = "For a higher level overview, try: perf report --sort comm,dso"; 263 const char *help = "For a higher level overview, try: perf report --sort comm,dso";
310 264
311 signal(SIGINT, sig_handler); 265 signal(SIGINT, sig_handler);
@@ -336,20 +290,24 @@ static int __cmd_report(void)
336 if (verbose > 2) 290 if (verbose > 2)
337 perf_session__fprintf_dsos(session, stdout); 291 perf_session__fprintf_dsos(session, stdout);
338 292
339 next = rb_first(&session->hists_tree); 293 nr_samples = 0;
340 while (next) { 294 list_for_each_entry(pos, &session->evlist->entries, node) {
341 struct hists *hists; 295 struct hists *hists = &pos->hists;
342 296
343 hists = rb_entry(next, struct hists, rb_node);
344 hists__collapse_resort(hists); 297 hists__collapse_resort(hists);
345 hists__output_resort(hists); 298 hists__output_resort(hists);
346 next = rb_next(&hists->rb_node); 299 nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
300 }
301
302 if (nr_samples == 0) {
303 ui__warning("The %s file has no samples!\n", input_name);
304 goto out_delete;
347 } 305 }
348 306
349 if (use_browser > 0) 307 if (use_browser > 0)
350 hists__tui_browse_tree(&session->hists_tree, help); 308 perf_evlist__tui_browse_hists(session->evlist, help);
351 else 309 else
352 hists__tty_browse_tree(&session->hists_tree, help); 310 perf_evlist__tty_browse_hists(session->evlist, help);
353 311
354out_delete: 312out_delete:
355 /* 313 /*
@@ -424,7 +382,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
424 if (tok2) 382 if (tok2)
425 callchain_param.print_limit = strtod(tok2, &endptr); 383 callchain_param.print_limit = strtod(tok2, &endptr);
426setup: 384setup:
427 if (register_callchain_param(&callchain_param) < 0) { 385 if (callchain_register_param(&callchain_param) < 0) {
428 fprintf(stderr, "Can't register callchain params\n"); 386 fprintf(stderr, "Can't register callchain params\n");
429 return -1; 387 return -1;
430 } 388 }
@@ -498,7 +456,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
498 use_browser = 1; 456 use_browser = 1;
499 457
500 if (strcmp(input_name, "-") != 0) 458 if (strcmp(input_name, "-") != 0)
501 setup_browser(); 459 setup_browser(true);
502 else 460 else
503 use_browser = 0; 461 use_browser = 0;
504 /* 462 /*
@@ -507,7 +465,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
507 * implementation. 465 * implementation.
508 */ 466 */
509 if (use_browser > 0) { 467 if (use_browser > 0) {
510 symbol_conf.priv_size = sizeof(struct sym_priv); 468 symbol_conf.priv_size = sizeof(struct annotation);
469 annotate_init = symbol__annotate_init;
511 /* 470 /*
512 * For searching by name on the "Browse map details". 471 * For searching by name on the "Browse map details".
513 * providing it only in verbose mode not to bloat too 472 * providing it only in verbose mode not to bloat too
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 29acb894e035..a32f411faeac 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -369,11 +369,6 @@ static void
369process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) 369process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
370{ 370{
371 int ret = 0; 371 int ret = 0;
372 u64 now;
373 long long delta;
374
375 now = get_nsecs();
376 delta = start_time + atom->timestamp - now;
377 372
378 switch (atom->type) { 373 switch (atom->type) {
379 case SCHED_EVENT_RUN: 374 case SCHED_EVENT_RUN:
@@ -562,7 +557,7 @@ static void wait_for_tasks(void)
562 557
563static void run_one_test(void) 558static void run_one_test(void)
564{ 559{
565 u64 T0, T1, delta, avg_delta, fluct, std_dev; 560 u64 T0, T1, delta, avg_delta, fluct;
566 561
567 T0 = get_nsecs(); 562 T0 = get_nsecs();
568 wait_for_tasks(); 563 wait_for_tasks();
@@ -578,7 +573,6 @@ static void run_one_test(void)
578 else 573 else
579 fluct = delta - avg_delta; 574 fluct = delta - avg_delta;
580 sum_fluct += fluct; 575 sum_fluct += fluct;
581 std_dev = sum_fluct / nr_runs / sqrt(nr_runs);
582 if (!run_avg) 576 if (!run_avg)
583 run_avg = delta; 577 run_avg = delta;
584 run_avg = (run_avg*9 + delta)/10; 578 run_avg = (run_avg*9 + delta)/10;
@@ -799,7 +793,7 @@ replay_switch_event(struct trace_switch_event *switch_event,
799 u64 timestamp, 793 u64 timestamp,
800 struct thread *thread __used) 794 struct thread *thread __used)
801{ 795{
802 struct task_desc *prev, *next; 796 struct task_desc *prev, __used *next;
803 u64 timestamp0; 797 u64 timestamp0;
804 s64 delta; 798 s64 delta;
805 799
@@ -1404,7 +1398,7 @@ map_switch_event(struct trace_switch_event *switch_event,
1404 u64 timestamp, 1398 u64 timestamp,
1405 struct thread *thread __used) 1399 struct thread *thread __used)
1406{ 1400{
1407 struct thread *sched_out, *sched_in; 1401 struct thread *sched_out __used, *sched_in;
1408 int new_shortname; 1402 int new_shortname;
1409 u64 timestamp0; 1403 u64 timestamp0;
1410 s64 delta; 1404 s64 delta;
@@ -1580,9 +1574,9 @@ process_sched_migrate_task_event(void *data, struct perf_session *session,
1580 event, cpu, timestamp, thread); 1574 event, cpu, timestamp, thread);
1581} 1575}
1582 1576
1583static void 1577static void process_raw_event(union perf_event *raw_event __used,
1584process_raw_event(event_t *raw_event __used, struct perf_session *session, 1578 struct perf_session *session, void *data, int cpu,
1585 void *data, int cpu, u64 timestamp, struct thread *thread) 1579 u64 timestamp, struct thread *thread)
1586{ 1580{
1587 struct event *event; 1581 struct event *event;
1588 int type; 1582 int type;
@@ -1607,7 +1601,8 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
1607 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); 1601 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
1608} 1602}
1609 1603
1610static int process_sample_event(event_t *event, struct sample_data *sample, 1604static int process_sample_event(union perf_event *event,
1605 struct perf_sample *sample,
1611 struct perf_session *session) 1606 struct perf_session *session)
1612{ 1607{
1613 struct thread *thread; 1608 struct thread *thread;
@@ -1635,9 +1630,9 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
1635 1630
1636static struct perf_event_ops event_ops = { 1631static struct perf_event_ops event_ops = {
1637 .sample = process_sample_event, 1632 .sample = process_sample_event,
1638 .comm = event__process_comm, 1633 .comm = perf_event__process_comm,
1639 .lost = event__process_lost, 1634 .lost = perf_event__process_lost,
1640 .fork = event__process_task, 1635 .fork = perf_event__process_task,
1641 .ordered_samples = true, 1636 .ordered_samples = true,
1642}; 1637};
1643 1638
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index b766c2a9ac97..5f40df635dcb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -63,7 +63,8 @@ static int cleanup_scripting(void)
63 63
64static char const *input_name = "perf.data"; 64static char const *input_name = "perf.data";
65 65
66static int process_sample_event(event_t *event, struct sample_data *sample, 66static int process_sample_event(union perf_event *event,
67 struct perf_sample *sample,
67 struct perf_session *session) 68 struct perf_session *session)
68{ 69{
69 struct thread *thread = perf_session__findnew(session, event->ip.pid); 70 struct thread *thread = perf_session__findnew(session, event->ip.pid);
@@ -100,14 +101,14 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
100} 101}
101 102
102static struct perf_event_ops event_ops = { 103static struct perf_event_ops event_ops = {
103 .sample = process_sample_event, 104 .sample = process_sample_event,
104 .comm = event__process_comm, 105 .comm = perf_event__process_comm,
105 .attr = event__process_attr, 106 .attr = perf_event__process_attr,
106 .event_type = event__process_event_type, 107 .event_type = perf_event__process_event_type,
107 .tracing_data = event__process_tracing_data, 108 .tracing_data = perf_event__process_tracing_data,
108 .build_id = event__process_build_id, 109 .build_id = perf_event__process_build_id,
109 .ordering_requires_timestamps = true,
110 .ordered_samples = true, 110 .ordered_samples = true,
111 .ordering_requires_timestamps = true,
111}; 112};
112 113
113extern volatile int session_done; 114extern volatile int session_done;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a482a191a0ca..21c025222496 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -43,11 +43,13 @@
43#include "util/parse-options.h" 43#include "util/parse-options.h"
44#include "util/parse-events.h" 44#include "util/parse-events.h"
45#include "util/event.h" 45#include "util/event.h"
46#include "util/evlist.h"
46#include "util/evsel.h" 47#include "util/evsel.h"
47#include "util/debug.h" 48#include "util/debug.h"
48#include "util/header.h" 49#include "util/header.h"
49#include "util/cpumap.h" 50#include "util/cpumap.h"
50#include "util/thread.h" 51#include "util/thread.h"
52#include "util/thread_map.h"
51 53
52#include <sys/prctl.h> 54#include <sys/prctl.h>
53#include <math.h> 55#include <math.h>
@@ -71,8 +73,9 @@ static struct perf_event_attr default_attrs[] = {
71 73
72}; 74};
73 75
76struct perf_evlist *evsel_list;
77
74static bool system_wide = false; 78static bool system_wide = false;
75static struct cpu_map *cpus;
76static int run_idx = 0; 79static int run_idx = 0;
77 80
78static int run_count = 1; 81static int run_count = 1;
@@ -81,7 +84,6 @@ static bool scale = true;
81static bool no_aggr = false; 84static bool no_aggr = false;
82static pid_t target_pid = -1; 85static pid_t target_pid = -1;
83static pid_t target_tid = -1; 86static pid_t target_tid = -1;
84static struct thread_map *threads;
85static pid_t child_pid = -1; 87static pid_t child_pid = -1;
86static bool null_run = false; 88static bool null_run = false;
87static bool big_num = true; 89static bool big_num = true;
@@ -166,7 +168,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
166 PERF_FORMAT_TOTAL_TIME_RUNNING; 168 PERF_FORMAT_TOTAL_TIME_RUNNING;
167 169
168 if (system_wide) 170 if (system_wide)
169 return perf_evsel__open_per_cpu(evsel, cpus); 171 return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false, false);
170 172
171 attr->inherit = !no_inherit; 173 attr->inherit = !no_inherit;
172 if (target_pid == -1 && target_tid == -1) { 174 if (target_pid == -1 && target_tid == -1) {
@@ -174,7 +176,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
174 attr->enable_on_exec = 1; 176 attr->enable_on_exec = 1;
175 } 177 }
176 178
177 return perf_evsel__open_per_thread(evsel, threads); 179 return perf_evsel__open_per_thread(evsel, evsel_list->threads, false, false);
178} 180}
179 181
180/* 182/*
@@ -199,7 +201,8 @@ static int read_counter_aggr(struct perf_evsel *counter)
199 u64 *count = counter->counts->aggr.values; 201 u64 *count = counter->counts->aggr.values;
200 int i; 202 int i;
201 203
202 if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0) 204 if (__perf_evsel__read(counter, evsel_list->cpus->nr,
205 evsel_list->threads->nr, scale) < 0)
203 return -1; 206 return -1;
204 207
205 for (i = 0; i < 3; i++) 208 for (i = 0; i < 3; i++)
@@ -232,7 +235,7 @@ static int read_counter(struct perf_evsel *counter)
232 u64 *count; 235 u64 *count;
233 int cpu; 236 int cpu;
234 237
235 for (cpu = 0; cpu < cpus->nr; cpu++) { 238 for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
236 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) 239 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
237 return -1; 240 return -1;
238 241
@@ -297,7 +300,7 @@ static int run_perf_stat(int argc __used, const char **argv)
297 } 300 }
298 301
299 if (target_tid == -1 && target_pid == -1 && !system_wide) 302 if (target_tid == -1 && target_pid == -1 && !system_wide)
300 threads->map[0] = child_pid; 303 evsel_list->threads->map[0] = child_pid;
301 304
302 /* 305 /*
303 * Wait for the child to be ready to exec. 306 * Wait for the child to be ready to exec.
@@ -309,7 +312,7 @@ static int run_perf_stat(int argc __used, const char **argv)
309 close(child_ready_pipe[0]); 312 close(child_ready_pipe[0]);
310 } 313 }
311 314
312 list_for_each_entry(counter, &evsel_list, node) { 315 list_for_each_entry(counter, &evsel_list->entries, node) {
313 if (create_perf_stat_counter(counter) < 0) { 316 if (create_perf_stat_counter(counter) < 0) {
314 if (errno == -EPERM || errno == -EACCES) { 317 if (errno == -EPERM || errno == -EACCES) {
315 error("You may not have permission to collect %sstats.\n" 318 error("You may not have permission to collect %sstats.\n"
@@ -347,14 +350,15 @@ static int run_perf_stat(int argc __used, const char **argv)
347 update_stats(&walltime_nsecs_stats, t1 - t0); 350 update_stats(&walltime_nsecs_stats, t1 - t0);
348 351
349 if (no_aggr) { 352 if (no_aggr) {
350 list_for_each_entry(counter, &evsel_list, node) { 353 list_for_each_entry(counter, &evsel_list->entries, node) {
351 read_counter(counter); 354 read_counter(counter);
352 perf_evsel__close_fd(counter, cpus->nr, 1); 355 perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
353 } 356 }
354 } else { 357 } else {
355 list_for_each_entry(counter, &evsel_list, node) { 358 list_for_each_entry(counter, &evsel_list->entries, node) {
356 read_counter_aggr(counter); 359 read_counter_aggr(counter);
357 perf_evsel__close_fd(counter, cpus->nr, threads->nr); 360 perf_evsel__close_fd(counter, evsel_list->cpus->nr,
361 evsel_list->threads->nr);
358 } 362 }
359 } 363 }
360 364
@@ -382,10 +386,13 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
382 if (no_aggr) 386 if (no_aggr)
383 sprintf(cpustr, "CPU%*d%s", 387 sprintf(cpustr, "CPU%*d%s",
384 csv_output ? 0 : -4, 388 csv_output ? 0 : -4,
385 cpus->map[cpu], csv_sep); 389 evsel_list->cpus->map[cpu], csv_sep);
386 390
387 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel)); 391 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
388 392
393 if (evsel->cgrp)
394 fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
395
389 if (csv_output) 396 if (csv_output)
390 return; 397 return;
391 398
@@ -410,12 +417,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
410 if (no_aggr) 417 if (no_aggr)
411 sprintf(cpustr, "CPU%*d%s", 418 sprintf(cpustr, "CPU%*d%s",
412 csv_output ? 0 : -4, 419 csv_output ? 0 : -4,
413 cpus->map[cpu], csv_sep); 420 evsel_list->cpus->map[cpu], csv_sep);
414 else 421 else
415 cpu = 0; 422 cpu = 0;
416 423
417 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel)); 424 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
418 425
426 if (evsel->cgrp)
427 fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
428
419 if (csv_output) 429 if (csv_output)
420 return; 430 return;
421 431
@@ -456,9 +466,17 @@ static void print_counter_aggr(struct perf_evsel *counter)
456 int scaled = counter->counts->scaled; 466 int scaled = counter->counts->scaled;
457 467
458 if (scaled == -1) { 468 if (scaled == -1) {
459 fprintf(stderr, "%*s%s%-24s\n", 469 fprintf(stderr, "%*s%s%*s",
460 csv_output ? 0 : 18, 470 csv_output ? 0 : 18,
461 "<not counted>", csv_sep, event_name(counter)); 471 "<not counted>",
472 csv_sep,
473 csv_output ? 0 : -24,
474 event_name(counter));
475
476 if (counter->cgrp)
477 fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
478
479 fputc('\n', stderr);
462 return; 480 return;
463 } 481 }
464 482
@@ -483,7 +501,6 @@ static void print_counter_aggr(struct perf_evsel *counter)
483 fprintf(stderr, " (scaled from %.2f%%)", 501 fprintf(stderr, " (scaled from %.2f%%)",
484 100 * avg_running / avg_enabled); 502 100 * avg_running / avg_enabled);
485 } 503 }
486
487 fprintf(stderr, "\n"); 504 fprintf(stderr, "\n");
488} 505}
489 506
@@ -496,19 +513,23 @@ static void print_counter(struct perf_evsel *counter)
496 u64 ena, run, val; 513 u64 ena, run, val;
497 int cpu; 514 int cpu;
498 515
499 for (cpu = 0; cpu < cpus->nr; cpu++) { 516 for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
500 val = counter->counts->cpu[cpu].val; 517 val = counter->counts->cpu[cpu].val;
501 ena = counter->counts->cpu[cpu].ena; 518 ena = counter->counts->cpu[cpu].ena;
502 run = counter->counts->cpu[cpu].run; 519 run = counter->counts->cpu[cpu].run;
503 if (run == 0 || ena == 0) { 520 if (run == 0 || ena == 0) {
504 fprintf(stderr, "CPU%*d%s%*s%s%-24s", 521 fprintf(stderr, "CPU%*d%s%*s%s%*s",
505 csv_output ? 0 : -4, 522 csv_output ? 0 : -4,
506 cpus->map[cpu], csv_sep, 523 evsel_list->cpus->map[cpu], csv_sep,
507 csv_output ? 0 : 18, 524 csv_output ? 0 : 18,
508 "<not counted>", csv_sep, 525 "<not counted>", csv_sep,
526 csv_output ? 0 : -24,
509 event_name(counter)); 527 event_name(counter));
510 528
511 fprintf(stderr, "\n"); 529 if (counter->cgrp)
530 fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
531
532 fputc('\n', stderr);
512 continue; 533 continue;
513 } 534 }
514 535
@@ -525,7 +546,7 @@ static void print_counter(struct perf_evsel *counter)
525 100.0 * run / ena); 546 100.0 * run / ena);
526 } 547 }
527 } 548 }
528 fprintf(stderr, "\n"); 549 fputc('\n', stderr);
529 } 550 }
530} 551}
531 552
@@ -555,10 +576,10 @@ static void print_stat(int argc, const char **argv)
555 } 576 }
556 577
557 if (no_aggr) { 578 if (no_aggr) {
558 list_for_each_entry(counter, &evsel_list, node) 579 list_for_each_entry(counter, &evsel_list->entries, node)
559 print_counter(counter); 580 print_counter(counter);
560 } else { 581 } else {
561 list_for_each_entry(counter, &evsel_list, node) 582 list_for_each_entry(counter, &evsel_list->entries, node)
562 print_counter_aggr(counter); 583 print_counter_aggr(counter);
563 } 584 }
564 585
@@ -610,7 +631,7 @@ static int stat__set_big_num(const struct option *opt __used,
610} 631}
611 632
612static const struct option options[] = { 633static const struct option options[] = {
613 OPT_CALLBACK('e', "event", NULL, "event", 634 OPT_CALLBACK('e', "event", &evsel_list, "event",
614 "event selector. use 'perf list' to list available events", 635 "event selector. use 'perf list' to list available events",
615 parse_events), 636 parse_events),
616 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 637 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
@@ -638,6 +659,9 @@ static const struct option options[] = {
638 "disable CPU count aggregation"), 659 "disable CPU count aggregation"),
639 OPT_STRING('x', "field-separator", &csv_sep, "separator", 660 OPT_STRING('x', "field-separator", &csv_sep, "separator",
640 "print counts with custom separator"), 661 "print counts with custom separator"),
662 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
663 "monitor event in cgroup name only",
664 parse_cgroups),
641 OPT_END() 665 OPT_END()
642}; 666};
643 667
@@ -648,6 +672,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
648 672
649 setlocale(LC_ALL, ""); 673 setlocale(LC_ALL, "");
650 674
675 evsel_list = perf_evlist__new(NULL, NULL);
676 if (evsel_list == NULL)
677 return -ENOMEM;
678
651 argc = parse_options(argc, argv, options, stat_usage, 679 argc = parse_options(argc, argv, options, stat_usage,
652 PARSE_OPT_STOP_AT_NON_OPTION); 680 PARSE_OPT_STOP_AT_NON_OPTION);
653 681
@@ -674,49 +702,50 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
674 if (run_count <= 0) 702 if (run_count <= 0)
675 usage_with_options(stat_usage, options); 703 usage_with_options(stat_usage, options);
676 704
677 /* no_aggr is for system-wide only */ 705 /* no_aggr, cgroup are for system-wide only */
678 if (no_aggr && !system_wide) 706 if ((no_aggr || nr_cgroups) && !system_wide) {
707 fprintf(stderr, "both cgroup and no-aggregation "
708 "modes only available in system-wide mode\n");
709
679 usage_with_options(stat_usage, options); 710 usage_with_options(stat_usage, options);
711 }
680 712
681 /* Set attrs and nr_counters if no event is selected and !null_run */ 713 /* Set attrs and nr_counters if no event is selected and !null_run */
682 if (!null_run && !nr_counters) { 714 if (!null_run && !evsel_list->nr_entries) {
683 size_t c; 715 size_t c;
684 716
685 nr_counters = ARRAY_SIZE(default_attrs);
686
687 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { 717 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
688 pos = perf_evsel__new(&default_attrs[c], 718 pos = perf_evsel__new(&default_attrs[c], c);
689 nr_counters);
690 if (pos == NULL) 719 if (pos == NULL)
691 goto out; 720 goto out;
692 list_add(&pos->node, &evsel_list); 721 perf_evlist__add(evsel_list, pos);
693 } 722 }
694 } 723 }
695 724
696 if (target_pid != -1) 725 if (target_pid != -1)
697 target_tid = target_pid; 726 target_tid = target_pid;
698 727
699 threads = thread_map__new(target_pid, target_tid); 728 evsel_list->threads = thread_map__new(target_pid, target_tid);
700 if (threads == NULL) { 729 if (evsel_list->threads == NULL) {
701 pr_err("Problems finding threads of monitor\n"); 730 pr_err("Problems finding threads of monitor\n");
702 usage_with_options(stat_usage, options); 731 usage_with_options(stat_usage, options);
703 } 732 }
704 733
705 if (system_wide) 734 if (system_wide)
706 cpus = cpu_map__new(cpu_list); 735 evsel_list->cpus = cpu_map__new(cpu_list);
707 else 736 else
708 cpus = cpu_map__dummy_new(); 737 evsel_list->cpus = cpu_map__dummy_new();
709 738
710 if (cpus == NULL) { 739 if (evsel_list->cpus == NULL) {
711 perror("failed to parse CPUs map"); 740 perror("failed to parse CPUs map");
712 usage_with_options(stat_usage, options); 741 usage_with_options(stat_usage, options);
713 return -1; 742 return -1;
714 } 743 }
715 744
716 list_for_each_entry(pos, &evsel_list, node) { 745 list_for_each_entry(pos, &evsel_list->entries, node) {
717 if (perf_evsel__alloc_stat_priv(pos) < 0 || 746 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
718 perf_evsel__alloc_counts(pos, cpus->nr) < 0 || 747 perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 ||
719 perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) 748 perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0)
720 goto out_free_fd; 749 goto out_free_fd;
721 } 750 }
722 751
@@ -741,11 +770,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
741 if (status != -1) 770 if (status != -1)
742 print_stat(argc, argv); 771 print_stat(argc, argv);
743out_free_fd: 772out_free_fd:
744 list_for_each_entry(pos, &evsel_list, node) 773 list_for_each_entry(pos, &evsel_list->entries, node)
745 perf_evsel__free_stat_priv(pos); 774 perf_evsel__free_stat_priv(pos);
746 perf_evsel_list__delete(); 775 perf_evlist__delete_maps(evsel_list);
747out: 776out:
748 thread_map__delete(threads); 777 perf_evlist__delete(evsel_list);
749 threads = NULL;
750 return status; 778 return status;
751} 779}
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 5dcdba653d70..1b2106c58f66 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -7,10 +7,11 @@
7 7
8#include "util/cache.h" 8#include "util/cache.h"
9#include "util/debug.h" 9#include "util/debug.h"
10#include "util/evlist.h"
10#include "util/parse-options.h" 11#include "util/parse-options.h"
11#include "util/session.h" 12#include "util/parse-events.h"
12#include "util/symbol.h" 13#include "util/symbol.h"
13#include "util/thread.h" 14#include "util/thread_map.h"
14 15
15static long page_size; 16static long page_size;
16 17
@@ -238,14 +239,14 @@ out:
238#include "util/evsel.h" 239#include "util/evsel.h"
239#include <sys/types.h> 240#include <sys/types.h>
240 241
241static int trace_event__id(const char *event_name) 242static int trace_event__id(const char *evname)
242{ 243{
243 char *filename; 244 char *filename;
244 int err = -1, fd; 245 int err = -1, fd;
245 246
246 if (asprintf(&filename, 247 if (asprintf(&filename,
247 "/sys/kernel/debug/tracing/events/syscalls/%s/id", 248 "/sys/kernel/debug/tracing/events/syscalls/%s/id",
248 event_name) < 0) 249 evname) < 0)
249 return -1; 250 return -1;
250 251
251 fd = open(filename, O_RDONLY); 252 fd = open(filename, O_RDONLY);
@@ -289,7 +290,7 @@ static int test__open_syscall_event(void)
289 goto out_thread_map_delete; 290 goto out_thread_map_delete;
290 } 291 }
291 292
292 if (perf_evsel__open_per_thread(evsel, threads) < 0) { 293 if (perf_evsel__open_per_thread(evsel, threads, false, false) < 0) {
293 pr_debug("failed to open counter: %s, " 294 pr_debug("failed to open counter: %s, "
294 "tweak /proc/sys/kernel/perf_event_paranoid?\n", 295 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
295 strerror(errno)); 296 strerror(errno));
@@ -347,9 +348,9 @@ static int test__open_syscall_event_on_all_cpus(void)
347 } 348 }
348 349
349 cpus = cpu_map__new(NULL); 350 cpus = cpu_map__new(NULL);
350 if (threads == NULL) { 351 if (cpus == NULL) {
351 pr_debug("thread_map__new\n"); 352 pr_debug("cpu_map__new\n");
352 return -1; 353 goto out_thread_map_delete;
353 } 354 }
354 355
355 356
@@ -364,7 +365,7 @@ static int test__open_syscall_event_on_all_cpus(void)
364 goto out_thread_map_delete; 365 goto out_thread_map_delete;
365 } 366 }
366 367
367 if (perf_evsel__open(evsel, cpus, threads) < 0) { 368 if (perf_evsel__open(evsel, cpus, threads, false, false) < 0) {
368 pr_debug("failed to open counter: %s, " 369 pr_debug("failed to open counter: %s, "
369 "tweak /proc/sys/kernel/perf_event_paranoid?\n", 370 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
370 strerror(errno)); 371 strerror(errno));
@@ -408,6 +409,8 @@ static int test__open_syscall_event_on_all_cpus(void)
408 goto out_close_fd; 409 goto out_close_fd;
409 } 410 }
410 411
412 err = 0;
413
411 for (cpu = 0; cpu < cpus->nr; ++cpu) { 414 for (cpu = 0; cpu < cpus->nr; ++cpu) {
412 unsigned int expected; 415 unsigned int expected;
413 416
@@ -416,18 +419,18 @@ static int test__open_syscall_event_on_all_cpus(void)
416 419
417 if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { 420 if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
418 pr_debug("perf_evsel__open_read_on_cpu\n"); 421 pr_debug("perf_evsel__open_read_on_cpu\n");
419 goto out_close_fd; 422 err = -1;
423 break;
420 } 424 }
421 425
422 expected = nr_open_calls + cpu; 426 expected = nr_open_calls + cpu;
423 if (evsel->counts->cpu[cpu].val != expected) { 427 if (evsel->counts->cpu[cpu].val != expected) {
424 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", 428 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
425 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val); 429 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
426 goto out_close_fd; 430 err = -1;
427 } 431 }
428 } 432 }
429 433
430 err = 0;
431out_close_fd: 434out_close_fd:
432 perf_evsel__close_fd(evsel, 1, threads->nr); 435 perf_evsel__close_fd(evsel, 1, threads->nr);
433out_evsel_delete: 436out_evsel_delete:
@@ -437,6 +440,159 @@ out_thread_map_delete:
437 return err; 440 return err;
438} 441}
439 442
443/*
444 * This test will generate random numbers of calls to some getpid syscalls,
445 * then establish an mmap for a group of events that are created to monitor
446 * the syscalls.
447 *
448 * It will receive the events, using mmap, use its PERF_SAMPLE_ID generated
449 * sample.id field to map back to its respective perf_evsel instance.
450 *
451 * Then it checks if the number of syscalls reported as perf events by
452 * the kernel corresponds to the number of syscalls made.
453 */
454static int test__basic_mmap(void)
455{
456 int err = -1;
457 union perf_event *event;
458 struct thread_map *threads;
459 struct cpu_map *cpus;
460 struct perf_evlist *evlist;
461 struct perf_event_attr attr = {
462 .type = PERF_TYPE_TRACEPOINT,
463 .read_format = PERF_FORMAT_ID,
464 .sample_type = PERF_SAMPLE_ID,
465 .watermark = 0,
466 };
467 cpu_set_t cpu_set;
468 const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
469 "getpgid", };
470 pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
471 (void*)getpgid };
472#define nsyscalls ARRAY_SIZE(syscall_names)
473 int ids[nsyscalls];
474 unsigned int nr_events[nsyscalls],
475 expected_nr_events[nsyscalls], i, j;
476 struct perf_evsel *evsels[nsyscalls], *evsel;
477
478 for (i = 0; i < nsyscalls; ++i) {
479 char name[64];
480
481 snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
482 ids[i] = trace_event__id(name);
483 if (ids[i] < 0) {
484 pr_debug("Is debugfs mounted on /sys/kernel/debug?\n");
485 return -1;
486 }
487 nr_events[i] = 0;
488 expected_nr_events[i] = random() % 257;
489 }
490
491 threads = thread_map__new(-1, getpid());
492 if (threads == NULL) {
493 pr_debug("thread_map__new\n");
494 return -1;
495 }
496
497 cpus = cpu_map__new(NULL);
498 if (cpus == NULL) {
499 pr_debug("cpu_map__new\n");
500 goto out_free_threads;
501 }
502
503 CPU_ZERO(&cpu_set);
504 CPU_SET(cpus->map[0], &cpu_set);
505 sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
506 if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
507 pr_debug("sched_setaffinity() failed on CPU %d: %s ",
508 cpus->map[0], strerror(errno));
509 goto out_free_cpus;
510 }
511
512 evlist = perf_evlist__new(cpus, threads);
513 if (evlist == NULL) {
514 pr_debug("perf_evlist__new\n");
515 goto out_free_cpus;
516 }
517
518 /* anonymous union fields, can't be initialized above */
519 attr.wakeup_events = 1;
520 attr.sample_period = 1;
521
522 for (i = 0; i < nsyscalls; ++i) {
523 attr.config = ids[i];
524 evsels[i] = perf_evsel__new(&attr, i);
525 if (evsels[i] == NULL) {
526 pr_debug("perf_evsel__new\n");
527 goto out_free_evlist;
528 }
529
530 perf_evlist__add(evlist, evsels[i]);
531
532 if (perf_evsel__open(evsels[i], cpus, threads, false, false) < 0) {
533 pr_debug("failed to open counter: %s, "
534 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
535 strerror(errno));
536 goto out_close_fd;
537 }
538 }
539
540 if (perf_evlist__mmap(evlist, 128, true) < 0) {
541 pr_debug("failed to mmap events: %d (%s)\n", errno,
542 strerror(errno));
543 goto out_close_fd;
544 }
545
546 for (i = 0; i < nsyscalls; ++i)
547 for (j = 0; j < expected_nr_events[i]; ++j) {
548 int foo = syscalls[i]();
549 ++foo;
550 }
551
552 while ((event = perf_evlist__read_on_cpu(evlist, 0)) != NULL) {
553 struct perf_sample sample;
554
555 if (event->header.type != PERF_RECORD_SAMPLE) {
556 pr_debug("unexpected %s event\n",
557 perf_event__name(event->header.type));
558 goto out_munmap;
559 }
560
561 perf_event__parse_sample(event, attr.sample_type, false, &sample);
562 evsel = perf_evlist__id2evsel(evlist, sample.id);
563 if (evsel == NULL) {
564 pr_debug("event with id %" PRIu64
565 " doesn't map to an evsel\n", sample.id);
566 goto out_munmap;
567 }
568 nr_events[evsel->idx]++;
569 }
570
571 list_for_each_entry(evsel, &evlist->entries, node) {
572 if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
573 pr_debug("expected %d %s events, got %d\n",
574 expected_nr_events[evsel->idx],
575 event_name(evsel), nr_events[evsel->idx]);
576 goto out_munmap;
577 }
578 }
579
580 err = 0;
581out_munmap:
582 perf_evlist__munmap(evlist);
583out_close_fd:
584 for (i = 0; i < nsyscalls; ++i)
585 perf_evsel__close_fd(evsels[i], 1, threads->nr);
586out_free_evlist:
587 perf_evlist__delete(evlist);
588out_free_cpus:
589 cpu_map__delete(cpus);
590out_free_threads:
591 thread_map__delete(threads);
592 return err;
593#undef nsyscalls
594}
595
440static struct test { 596static struct test {
441 const char *desc; 597 const char *desc;
442 int (*func)(void); 598 int (*func)(void);
@@ -454,6 +610,10 @@ static struct test {
454 .func = test__open_syscall_event_on_all_cpus, 610 .func = test__open_syscall_event_on_all_cpus,
455 }, 611 },
456 { 612 {
613 .desc = "read samples using the mmap interface",
614 .func = test__basic_mmap,
615 },
616 {
457 .func = NULL, 617 .func = NULL,
458 }, 618 },
459}; 619};
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 746cf03cb05d..67c0459dc325 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -264,9 +264,6 @@ pid_put_sample(int pid, int type, unsigned int cpu, u64 start, u64 end)
264 c->start_time = start; 264 c->start_time = start;
265 if (p->start_time == 0 || p->start_time > start) 265 if (p->start_time == 0 || p->start_time > start)
266 p->start_time = start; 266 p->start_time = start;
267
268 if (cpu > numcpus)
269 numcpus = cpu;
270} 267}
271 268
272#define MAX_CPUS 4096 269#define MAX_CPUS 4096
@@ -276,21 +273,24 @@ static int cpus_cstate_state[MAX_CPUS];
276static u64 cpus_pstate_start_times[MAX_CPUS]; 273static u64 cpus_pstate_start_times[MAX_CPUS];
277static u64 cpus_pstate_state[MAX_CPUS]; 274static u64 cpus_pstate_state[MAX_CPUS];
278 275
279static int process_comm_event(event_t *event, struct sample_data *sample __used, 276static int process_comm_event(union perf_event *event,
277 struct perf_sample *sample __used,
280 struct perf_session *session __used) 278 struct perf_session *session __used)
281{ 279{
282 pid_set_comm(event->comm.tid, event->comm.comm); 280 pid_set_comm(event->comm.tid, event->comm.comm);
283 return 0; 281 return 0;
284} 282}
285 283
286static int process_fork_event(event_t *event, struct sample_data *sample __used, 284static int process_fork_event(union perf_event *event,
285 struct perf_sample *sample __used,
287 struct perf_session *session __used) 286 struct perf_session *session __used)
288{ 287{
289 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); 288 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
290 return 0; 289 return 0;
291} 290}
292 291
293static int process_exit_event(event_t *event, struct sample_data *sample __used, 292static int process_exit_event(union perf_event *event,
293 struct perf_sample *sample __used,
294 struct perf_session *session __used) 294 struct perf_session *session __used)
295{ 295{
296 pid_exit(event->fork.pid, event->fork.time); 296 pid_exit(event->fork.pid, event->fork.time);
@@ -486,8 +486,8 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
486} 486}
487 487
488 488
489static int process_sample_event(event_t *event __used, 489static int process_sample_event(union perf_event *event __used,
490 struct sample_data *sample, 490 struct perf_sample *sample,
491 struct perf_session *session) 491 struct perf_session *session)
492{ 492{
493 struct trace_entry *te; 493 struct trace_entry *te;
@@ -511,6 +511,9 @@ static int process_sample_event(event_t *event __used,
511 if (!event_str) 511 if (!event_str)
512 return 0; 512 return 0;
513 513
514 if (sample->cpu > numcpus)
515 numcpus = sample->cpu;
516
514 if (strcmp(event_str, "power:cpu_idle") == 0) { 517 if (strcmp(event_str, "power:cpu_idle") == 0) {
515 struct power_processor_entry *ppe = (void *)te; 518 struct power_processor_entry *ppe = (void *)te;
516 if (ppe->state == (u32)PWR_EVENT_EXIT) 519 if (ppe->state == (u32)PWR_EVENT_EXIT)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5a29d9cd9486..80c9e062bd5b 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -20,11 +20,16 @@
20 20
21#include "perf.h" 21#include "perf.h"
22 22
23#include "util/annotate.h"
24#include "util/cache.h"
23#include "util/color.h" 25#include "util/color.h"
26#include "util/evlist.h"
24#include "util/evsel.h" 27#include "util/evsel.h"
25#include "util/session.h" 28#include "util/session.h"
26#include "util/symbol.h" 29#include "util/symbol.h"
27#include "util/thread.h" 30#include "util/thread.h"
31#include "util/thread_map.h"
32#include "util/top.h"
28#include "util/util.h" 33#include "util/util.h"
29#include <linux/rbtree.h> 34#include <linux/rbtree.h>
30#include "util/parse-options.h" 35#include "util/parse-options.h"
@@ -45,7 +50,6 @@
45#include <errno.h> 50#include <errno.h>
46#include <time.h> 51#include <time.h>
47#include <sched.h> 52#include <sched.h>
48#include <pthread.h>
49 53
50#include <sys/syscall.h> 54#include <sys/syscall.h>
51#include <sys/ioctl.h> 55#include <sys/ioctl.h>
@@ -60,85 +64,42 @@
60 64
61#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 65#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
62 66
67static struct perf_top top = {
68 .count_filter = 5,
69 .delay_secs = 2,
70 .display_weighted = -1,
71 .target_pid = -1,
72 .target_tid = -1,
73 .active_symbols = LIST_HEAD_INIT(top.active_symbols),
74 .active_symbols_lock = PTHREAD_MUTEX_INITIALIZER,
75 .active_symbols_cond = PTHREAD_COND_INITIALIZER,
76 .freq = 1000, /* 1 KHz */
77};
78
63static bool system_wide = false; 79static bool system_wide = false;
64 80
65static int default_interval = 0; 81static bool use_tui, use_stdio;
66 82
67static int count_filter = 5; 83static int default_interval = 0;
68static int print_entries;
69 84
70static int target_pid = -1;
71static int target_tid = -1;
72static struct thread_map *threads;
73static bool inherit = false; 85static bool inherit = false;
74static struct cpu_map *cpus;
75static int realtime_prio = 0; 86static int realtime_prio = 0;
76static bool group = false; 87static bool group = false;
77static unsigned int page_size; 88static unsigned int page_size;
78static unsigned int mmap_pages = 16; 89static unsigned int mmap_pages = 128;
79static int freq = 1000; /* 1 KHz */
80 90
81static int delay_secs = 2;
82static bool zero = false;
83static bool dump_symtab = false; 91static bool dump_symtab = false;
84 92
85static bool hide_kernel_symbols = false;
86static bool hide_user_symbols = false;
87static struct winsize winsize; 93static struct winsize winsize;
88 94
89/*
90 * Source
91 */
92
93struct source_line {
94 u64 eip;
95 unsigned long count[MAX_COUNTERS];
96 char *line;
97 struct source_line *next;
98};
99
100static const char *sym_filter = NULL; 95static const char *sym_filter = NULL;
101struct sym_entry *sym_filter_entry = NULL;
102struct sym_entry *sym_filter_entry_sched = NULL; 96struct sym_entry *sym_filter_entry_sched = NULL;
103static int sym_pcnt_filter = 5; 97static int sym_pcnt_filter = 5;
104static int sym_counter = 0;
105static struct perf_evsel *sym_evsel = NULL;
106static int display_weighted = -1;
107static const char *cpu_list;
108
109/*
110 * Symbols
111 */
112
113struct sym_entry_source {
114 struct source_line *source;
115 struct source_line *lines;
116 struct source_line **lines_tail;
117 pthread_mutex_t lock;
118};
119
120struct sym_entry {
121 struct rb_node rb_node;
122 struct list_head node;
123 unsigned long snap_count;
124 double weight;
125 int skip;
126 u16 name_len;
127 u8 origin;
128 struct map *map;
129 struct sym_entry_source *src;
130 unsigned long count[0];
131};
132 98
133/* 99/*
134 * Source functions 100 * Source functions
135 */ 101 */
136 102
137static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
138{
139 return ((void *)self) + symbol_conf.priv_size;
140}
141
142void get_term_dimensions(struct winsize *ws) 103void get_term_dimensions(struct winsize *ws)
143{ 104{
144 char *s = getenv("LINES"); 105 char *s = getenv("LINES");
@@ -163,10 +124,10 @@ void get_term_dimensions(struct winsize *ws)
163 124
164static void update_print_entries(struct winsize *ws) 125static void update_print_entries(struct winsize *ws)
165{ 126{
166 print_entries = ws->ws_row; 127 top.print_entries = ws->ws_row;
167 128
168 if (print_entries > 9) 129 if (top.print_entries > 9)
169 print_entries -= 9; 130 top.print_entries -= 9;
170} 131}
171 132
172static void sig_winch_handler(int sig __used) 133static void sig_winch_handler(int sig __used)
@@ -178,12 +139,9 @@ static void sig_winch_handler(int sig __used)
178static int parse_source(struct sym_entry *syme) 139static int parse_source(struct sym_entry *syme)
179{ 140{
180 struct symbol *sym; 141 struct symbol *sym;
181 struct sym_entry_source *source; 142 struct annotation *notes;
182 struct map *map; 143 struct map *map;
183 FILE *file; 144 int err = -1;
184 char command[PATH_MAX*2];
185 const char *path;
186 u64 len;
187 145
188 if (!syme) 146 if (!syme)
189 return -1; 147 return -1;
@@ -194,411 +152,137 @@ static int parse_source(struct sym_entry *syme)
194 /* 152 /*
195 * We can't annotate with just /proc/kallsyms 153 * We can't annotate with just /proc/kallsyms
196 */ 154 */
197 if (map->dso->origin == DSO__ORIG_KERNEL) 155 if (map->dso->origin == DSO__ORIG_KERNEL) {
156 pr_err("Can't annotate %s: No vmlinux file was found in the "
157 "path\n", sym->name);
158 sleep(1);
198 return -1; 159 return -1;
199
200 if (syme->src == NULL) {
201 syme->src = zalloc(sizeof(*source));
202 if (syme->src == NULL)
203 return -1;
204 pthread_mutex_init(&syme->src->lock, NULL);
205 } 160 }
206 161
207 source = syme->src; 162 notes = symbol__annotation(sym);
208 163 if (notes->src != NULL) {
209 if (source->lines) { 164 pthread_mutex_lock(&notes->lock);
210 pthread_mutex_lock(&source->lock);
211 goto out_assign; 165 goto out_assign;
212 } 166 }
213 path = map->dso->long_name;
214
215 len = sym->end - sym->start;
216
217 sprintf(command,
218 "objdump --start-address=%#0*" PRIx64 " --stop-address=%#0*" PRIx64 " -dS %s",
219 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
220 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
221
222 file = popen(command, "r");
223 if (!file)
224 return -1;
225
226 pthread_mutex_lock(&source->lock);
227 source->lines_tail = &source->lines;
228 while (!feof(file)) {
229 struct source_line *src;
230 size_t dummy = 0;
231 char *c, *sep;
232 167
233 src = malloc(sizeof(struct source_line)); 168 pthread_mutex_lock(&notes->lock);
234 assert(src != NULL);
235 memset(src, 0, sizeof(struct source_line));
236 169
237 if (getline(&src->line, &dummy, file) < 0) 170 if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
238 break; 171 pthread_mutex_unlock(&notes->lock);
239 if (!src->line) 172 pr_err("Not enough memory for annotating '%s' symbol!\n",
240 break; 173 sym->name);
241 174 sleep(1);
242 c = strchr(src->line, '\n'); 175 return err;
243 if (c)
244 *c = 0;
245
246 src->next = NULL;
247 *source->lines_tail = src;
248 source->lines_tail = &src->next;
249
250 src->eip = strtoull(src->line, &sep, 16);
251 if (*sep == ':')
252 src->eip = map__objdump_2ip(map, src->eip);
253 else /* this line has no ip info (e.g. source line) */
254 src->eip = 0;
255 } 176 }
256 pclose(file); 177
178 err = symbol__annotate(sym, syme->map, 0);
179 if (err == 0) {
257out_assign: 180out_assign:
258 sym_filter_entry = syme; 181 top.sym_filter_entry = syme;
259 pthread_mutex_unlock(&source->lock); 182 }
260 return 0; 183
184 pthread_mutex_unlock(&notes->lock);
185 return err;
261} 186}
262 187
263static void __zero_source_counters(struct sym_entry *syme) 188static void __zero_source_counters(struct sym_entry *syme)
264{ 189{
265 int i; 190 struct symbol *sym = sym_entry__symbol(syme);
266 struct source_line *line; 191 symbol__annotate_zero_histograms(sym);
267
268 line = syme->src->lines;
269 while (line) {
270 for (i = 0; i < nr_counters; i++)
271 line->count[i] = 0;
272 line = line->next;
273 }
274} 192}
275 193
276static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) 194static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
277{ 195{
278 struct source_line *line; 196 struct annotation *notes;
279 197 struct symbol *sym;
280 if (syme != sym_filter_entry)
281 return;
282 198
283 if (pthread_mutex_trylock(&syme->src->lock)) 199 if (syme != top.sym_filter_entry)
284 return; 200 return;
285 201
286 if (syme->src == NULL || syme->src->source == NULL) 202 sym = sym_entry__symbol(syme);
287 goto out_unlock; 203 notes = symbol__annotation(sym);
288
289 for (line = syme->src->lines; line; line = line->next) {
290 /* skip lines without IP info */
291 if (line->eip == 0)
292 continue;
293 if (line->eip == ip) {
294 line->count[counter]++;
295 break;
296 }
297 if (line->eip > ip)
298 break;
299 }
300out_unlock:
301 pthread_mutex_unlock(&syme->src->lock);
302}
303
304#define PATTERN_LEN (BITS_PER_LONG / 4 + 2)
305
306static void lookup_sym_source(struct sym_entry *syme)
307{
308 struct symbol *symbol = sym_entry__symbol(syme);
309 struct source_line *line;
310 char pattern[PATTERN_LEN + 1];
311
312 sprintf(pattern, "%0*" PRIx64 " <", BITS_PER_LONG / 4,
313 map__rip_2objdump(syme->map, symbol->start));
314
315 pthread_mutex_lock(&syme->src->lock);
316 for (line = syme->src->lines; line; line = line->next) {
317 if (memcmp(line->line, pattern, PATTERN_LEN) == 0) {
318 syme->src->source = line;
319 break;
320 }
321 }
322 pthread_mutex_unlock(&syme->src->lock);
323}
324 204
325static void show_lines(struct source_line *queue, int count, int total) 205 if (pthread_mutex_trylock(&notes->lock))
326{ 206 return;
327 int i;
328 struct source_line *line;
329 207
330 line = queue; 208 ip = syme->map->map_ip(syme->map, ip);
331 for (i = 0; i < count; i++) { 209 symbol__inc_addr_samples(sym, syme->map, counter, ip);
332 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
333 210
334 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); 211 pthread_mutex_unlock(&notes->lock);
335 line = line->next;
336 }
337} 212}
338 213
339#define TRACE_COUNT 3
340
341static void show_details(struct sym_entry *syme) 214static void show_details(struct sym_entry *syme)
342{ 215{
216 struct annotation *notes;
343 struct symbol *symbol; 217 struct symbol *symbol;
344 struct source_line *line; 218 int more;
345 struct source_line *line_queue = NULL;
346 int displayed = 0;
347 int line_queue_count = 0, total = 0, more = 0;
348 219
349 if (!syme) 220 if (!syme)
350 return; 221 return;
351 222
352 if (!syme->src->source)
353 lookup_sym_source(syme);
354
355 if (!syme->src->source)
356 return;
357
358 symbol = sym_entry__symbol(syme); 223 symbol = sym_entry__symbol(syme);
359 printf("Showing %s for %s\n", event_name(sym_evsel), symbol->name); 224 notes = symbol__annotation(symbol);
360 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
361
362 pthread_mutex_lock(&syme->src->lock);
363 line = syme->src->source;
364 while (line) {
365 total += line->count[sym_counter];
366 line = line->next;
367 }
368
369 line = syme->src->source;
370 while (line) {
371 float pcnt = 0.0;
372
373 if (!line_queue_count)
374 line_queue = line;
375 line_queue_count++;
376
377 if (line->count[sym_counter])
378 pcnt = 100.0 * line->count[sym_counter] / (float)total;
379 if (pcnt >= (float)sym_pcnt_filter) {
380 if (displayed <= print_entries)
381 show_lines(line_queue, line_queue_count, total);
382 else more++;
383 displayed += line_queue_count;
384 line_queue_count = 0;
385 line_queue = NULL;
386 } else if (line_queue_count > TRACE_COUNT) {
387 line_queue = line_queue->next;
388 line_queue_count--;
389 }
390
391 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
392 line = line->next;
393 }
394 pthread_mutex_unlock(&syme->src->lock);
395 if (more)
396 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
397}
398 225
399/* 226 pthread_mutex_lock(&notes->lock);
400 * Symbols will be added here in event__process_sample and will get out
401 * after decayed.
402 */
403static LIST_HEAD(active_symbols);
404static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
405
406/*
407 * Ordering weight: count-1 * count-2 * ... / count-n
408 */
409static double sym_weight(const struct sym_entry *sym)
410{
411 double weight = sym->snap_count;
412 int counter;
413
414 if (!display_weighted)
415 return weight;
416 227
417 for (counter = 1; counter < nr_counters-1; counter++) 228 if (notes->src == NULL)
418 weight *= sym->count[counter]; 229 goto out_unlock;
419 230
420 weight /= (sym->count[counter] + 1); 231 printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
232 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
421 233
422 return weight; 234 more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx,
235 0, sym_pcnt_filter, top.print_entries, 4);
236 if (top.zero)
237 symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
238 else
239 symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
240 if (more != 0)
241 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
242out_unlock:
243 pthread_mutex_unlock(&notes->lock);
423} 244}
424 245
425static long samples;
426static long kernel_samples, us_samples;
427static long exact_samples;
428static long guest_us_samples, guest_kernel_samples;
429static const char CONSOLE_CLEAR[] = ""; 246static const char CONSOLE_CLEAR[] = "";
430 247
431static void __list_insert_active_sym(struct sym_entry *syme) 248static void __list_insert_active_sym(struct sym_entry *syme)
432{ 249{
433 list_add(&syme->node, &active_symbols); 250 list_add(&syme->node, &top.active_symbols);
434}
435
436static void list_remove_active_sym(struct sym_entry *syme)
437{
438 pthread_mutex_lock(&active_symbols_lock);
439 list_del_init(&syme->node);
440 pthread_mutex_unlock(&active_symbols_lock);
441} 251}
442 252
443static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) 253static void print_sym_table(struct perf_session *session)
444{ 254{
445 struct rb_node **p = &tree->rb_node; 255 char bf[160];
446 struct rb_node *parent = NULL; 256 int printed = 0;
447 struct sym_entry *iter;
448
449 while (*p != NULL) {
450 parent = *p;
451 iter = rb_entry(parent, struct sym_entry, rb_node);
452
453 if (se->weight > iter->weight)
454 p = &(*p)->rb_left;
455 else
456 p = &(*p)->rb_right;
457 }
458
459 rb_link_node(&se->rb_node, parent, p);
460 rb_insert_color(&se->rb_node, tree);
461}
462
463static void print_sym_table(void)
464{
465 int printed = 0, j;
466 struct perf_evsel *counter;
467 int snap = !display_weighted ? sym_counter : 0;
468 float samples_per_sec = samples/delay_secs;
469 float ksamples_per_sec = kernel_samples/delay_secs;
470 float us_samples_per_sec = (us_samples)/delay_secs;
471 float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs;
472 float guest_us_samples_per_sec = (guest_us_samples)/delay_secs;
473 float esamples_percent = (100.0*exact_samples)/samples;
474 float sum_ksamples = 0.0;
475 struct sym_entry *syme, *n;
476 struct rb_root tmp = RB_ROOT;
477 struct rb_node *nd; 257 struct rb_node *nd;
478 int sym_width = 0, dso_width = 0, dso_short_width = 0; 258 struct sym_entry *syme;
259 struct rb_root tmp = RB_ROOT;
479 const int win_width = winsize.ws_col - 1; 260 const int win_width = winsize.ws_col - 1;
480 261 int sym_width, dso_width, dso_short_width;
481 samples = us_samples = kernel_samples = exact_samples = 0; 262 float sum_ksamples = perf_top__decay_samples(&top, &tmp);
482 guest_kernel_samples = guest_us_samples = 0;
483
484 /* Sort the active symbols */
485 pthread_mutex_lock(&active_symbols_lock);
486 syme = list_entry(active_symbols.next, struct sym_entry, node);
487 pthread_mutex_unlock(&active_symbols_lock);
488
489 list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
490 syme->snap_count = syme->count[snap];
491 if (syme->snap_count != 0) {
492
493 if ((hide_user_symbols &&
494 syme->origin == PERF_RECORD_MISC_USER) ||
495 (hide_kernel_symbols &&
496 syme->origin == PERF_RECORD_MISC_KERNEL)) {
497 list_remove_active_sym(syme);
498 continue;
499 }
500 syme->weight = sym_weight(syme);
501 rb_insert_active_sym(&tmp, syme);
502 sum_ksamples += syme->snap_count;
503
504 for (j = 0; j < nr_counters; j++)
505 syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
506 } else
507 list_remove_active_sym(syme);
508 }
509 263
510 puts(CONSOLE_CLEAR); 264 puts(CONSOLE_CLEAR);
511 265
512 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 266 perf_top__header_snprintf(&top, bf, sizeof(bf));
513 if (!perf_guest) { 267 printf("%s\n", bf);
514 printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
515 " exact: %4.1f%% [",
516 samples_per_sec,
517 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
518 samples_per_sec)),
519 esamples_percent);
520 } else {
521 printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
522 " guest kernel:%4.1f%% guest us:%4.1f%%"
523 " exact: %4.1f%% [",
524 samples_per_sec,
525 100.0 - (100.0 * ((samples_per_sec-ksamples_per_sec) /
526 samples_per_sec)),
527 100.0 - (100.0 * ((samples_per_sec-us_samples_per_sec) /
528 samples_per_sec)),
529 100.0 - (100.0 * ((samples_per_sec -
530 guest_kernel_samples_per_sec) /
531 samples_per_sec)),
532 100.0 - (100.0 * ((samples_per_sec -
533 guest_us_samples_per_sec) /
534 samples_per_sec)),
535 esamples_percent);
536 }
537
538 if (nr_counters == 1 || !display_weighted) {
539 struct perf_evsel *first;
540 first = list_entry(evsel_list.next, struct perf_evsel, node);
541 printf("%" PRIu64, (uint64_t)first->attr.sample_period);
542 if (freq)
543 printf("Hz ");
544 else
545 printf(" ");
546 }
547
548 if (!display_weighted)
549 printf("%s", event_name(sym_evsel));
550 else list_for_each_entry(counter, &evsel_list, node) {
551 if (counter->idx)
552 printf("/");
553
554 printf("%s", event_name(counter));
555 }
556 268
557 printf( "], "); 269 perf_top__reset_sample_counters(&top);
558
559 if (target_pid != -1)
560 printf(" (target_pid: %d", target_pid);
561 else if (target_tid != -1)
562 printf(" (target_tid: %d", target_tid);
563 else
564 printf(" (all");
565
566 if (cpu_list)
567 printf(", CPU%s: %s)\n", cpus->nr > 1 ? "s" : "", cpu_list);
568 else {
569 if (target_tid != -1)
570 printf(")\n");
571 else
572 printf(", %d CPU%s)\n", cpus->nr, cpus->nr > 1 ? "s" : "");
573 }
574 270
575 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 271 printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
576 272
577 if (sym_filter_entry) { 273 if (session->hists.stats.total_lost != 0) {
578 show_details(sym_filter_entry); 274 color_fprintf(stdout, PERF_COLOR_RED, "WARNING:");
579 return; 275 printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n",
276 session->hists.stats.total_lost);
580 } 277 }
581 278
582 /* 279 if (top.sym_filter_entry) {
583 * Find the longest symbol name that will be displayed 280 show_details(top.sym_filter_entry);
584 */ 281 return;
585 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
586 syme = rb_entry(nd, struct sym_entry, rb_node);
587 if (++printed > print_entries ||
588 (int)syme->snap_count < count_filter)
589 continue;
590
591 if (syme->map->dso->long_name_len > dso_width)
592 dso_width = syme->map->dso->long_name_len;
593
594 if (syme->map->dso->short_name_len > dso_short_width)
595 dso_short_width = syme->map->dso->short_name_len;
596
597 if (syme->name_len > sym_width)
598 sym_width = syme->name_len;
599 } 282 }
600 283
601 printed = 0; 284 perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width,
285 &sym_width);
602 286
603 if (sym_width + dso_width > winsize.ws_col - 29) { 287 if (sym_width + dso_width > winsize.ws_col - 29) {
604 dso_width = dso_short_width; 288 dso_width = dso_short_width;
@@ -606,7 +290,7 @@ static void print_sym_table(void)
606 sym_width = winsize.ws_col - dso_width - 29; 290 sym_width = winsize.ws_col - dso_width - 29;
607 } 291 }
608 putchar('\n'); 292 putchar('\n');
609 if (nr_counters == 1) 293 if (top.evlist->nr_entries == 1)
610 printf(" samples pcnt"); 294 printf(" samples pcnt");
611 else 295 else
612 printf(" weight samples pcnt"); 296 printf(" weight samples pcnt");
@@ -615,7 +299,7 @@ static void print_sym_table(void)
615 printf(" RIP "); 299 printf(" RIP ");
616 printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); 300 printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
617 printf(" %s _______ _____", 301 printf(" %s _______ _____",
618 nr_counters == 1 ? " " : "______"); 302 top.evlist->nr_entries == 1 ? " " : "______");
619 if (verbose) 303 if (verbose)
620 printf(" ________________"); 304 printf(" ________________");
621 printf(" %-*.*s", sym_width, sym_width, graph_line); 305 printf(" %-*.*s", sym_width, sym_width, graph_line);
@@ -628,13 +312,14 @@ static void print_sym_table(void)
628 312
629 syme = rb_entry(nd, struct sym_entry, rb_node); 313 syme = rb_entry(nd, struct sym_entry, rb_node);
630 sym = sym_entry__symbol(syme); 314 sym = sym_entry__symbol(syme);
631 if (++printed > print_entries || (int)syme->snap_count < count_filter) 315 if (++printed > top.print_entries ||
316 (int)syme->snap_count < top.count_filter)
632 continue; 317 continue;
633 318
634 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 319 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
635 sum_ksamples)); 320 sum_ksamples));
636 321
637 if (nr_counters == 1 || !display_weighted) 322 if (top.evlist->nr_entries == 1 || !top.display_weighted)
638 printf("%20.2f ", syme->weight); 323 printf("%20.2f ", syme->weight);
639 else 324 else
640 printf("%9.1f %10ld ", syme->weight, syme->snap_count); 325 printf("%9.1f %10ld ", syme->weight, syme->snap_count);
@@ -693,10 +378,8 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
693 378
694 /* zero counters of active symbol */ 379 /* zero counters of active symbol */
695 if (syme) { 380 if (syme) {
696 pthread_mutex_lock(&syme->src->lock);
697 __zero_source_counters(syme); 381 __zero_source_counters(syme);
698 *target = NULL; 382 *target = NULL;
699 pthread_mutex_unlock(&syme->src->lock);
700 } 383 }
701 384
702 fprintf(stdout, "\n%s: ", msg); 385 fprintf(stdout, "\n%s: ", msg);
@@ -707,11 +390,11 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
707 if (p) 390 if (p)
708 *p = 0; 391 *p = 0;
709 392
710 pthread_mutex_lock(&active_symbols_lock); 393 pthread_mutex_lock(&top.active_symbols_lock);
711 syme = list_entry(active_symbols.next, struct sym_entry, node); 394 syme = list_entry(top.active_symbols.next, struct sym_entry, node);
712 pthread_mutex_unlock(&active_symbols_lock); 395 pthread_mutex_unlock(&top.active_symbols_lock);
713 396
714 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 397 list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) {
715 struct symbol *sym = sym_entry__symbol(syme); 398 struct symbol *sym = sym_entry__symbol(syme);
716 399
717 if (!strcmp(buf, sym->name)) { 400 if (!strcmp(buf, sym->name)) {
@@ -735,34 +418,34 @@ static void print_mapped_keys(void)
735{ 418{
736 char *name = NULL; 419 char *name = NULL;
737 420
738 if (sym_filter_entry) { 421 if (top.sym_filter_entry) {
739 struct symbol *sym = sym_entry__symbol(sym_filter_entry); 422 struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
740 name = sym->name; 423 name = sym->name;
741 } 424 }
742 425
743 fprintf(stdout, "\nMapped keys:\n"); 426 fprintf(stdout, "\nMapped keys:\n");
744 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); 427 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top.delay_secs);
745 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); 428 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top.print_entries);
746 429
747 if (nr_counters > 1) 430 if (top.evlist->nr_entries > 1)
748 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_evsel)); 431 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top.sym_evsel));
749 432
750 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 433 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top.count_filter);
751 434
752 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 435 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
753 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 436 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
754 fprintf(stdout, "\t[S] stop annotation.\n"); 437 fprintf(stdout, "\t[S] stop annotation.\n");
755 438
756 if (nr_counters > 1) 439 if (top.evlist->nr_entries > 1)
757 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 440 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0);
758 441
759 fprintf(stdout, 442 fprintf(stdout,
760 "\t[K] hide kernel_symbols symbols. \t(%s)\n", 443 "\t[K] hide kernel_symbols symbols. \t(%s)\n",
761 hide_kernel_symbols ? "yes" : "no"); 444 top.hide_kernel_symbols ? "yes" : "no");
762 fprintf(stdout, 445 fprintf(stdout,
763 "\t[U] hide user symbols. \t(%s)\n", 446 "\t[U] hide user symbols. \t(%s)\n",
764 hide_user_symbols ? "yes" : "no"); 447 top.hide_user_symbols ? "yes" : "no");
765 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); 448 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top.zero ? 1 : 0);
766 fprintf(stdout, "\t[qQ] quit.\n"); 449 fprintf(stdout, "\t[qQ] quit.\n");
767} 450}
768 451
@@ -783,7 +466,7 @@ static int key_mapped(int c)
783 return 1; 466 return 1;
784 case 'E': 467 case 'E':
785 case 'w': 468 case 'w':
786 return nr_counters > 1 ? 1 : 0; 469 return top.evlist->nr_entries > 1 ? 1 : 0;
787 default: 470 default:
788 break; 471 break;
789 } 472 }
@@ -818,47 +501,47 @@ static void handle_keypress(struct perf_session *session, int c)
818 501
819 switch (c) { 502 switch (c) {
820 case 'd': 503 case 'd':
821 prompt_integer(&delay_secs, "Enter display delay"); 504 prompt_integer(&top.delay_secs, "Enter display delay");
822 if (delay_secs < 1) 505 if (top.delay_secs < 1)
823 delay_secs = 1; 506 top.delay_secs = 1;
824 break; 507 break;
825 case 'e': 508 case 'e':
826 prompt_integer(&print_entries, "Enter display entries (lines)"); 509 prompt_integer(&top.print_entries, "Enter display entries (lines)");
827 if (print_entries == 0) { 510 if (top.print_entries == 0) {
828 sig_winch_handler(SIGWINCH); 511 sig_winch_handler(SIGWINCH);
829 signal(SIGWINCH, sig_winch_handler); 512 signal(SIGWINCH, sig_winch_handler);
830 } else 513 } else
831 signal(SIGWINCH, SIG_DFL); 514 signal(SIGWINCH, SIG_DFL);
832 break; 515 break;
833 case 'E': 516 case 'E':
834 if (nr_counters > 1) { 517 if (top.evlist->nr_entries > 1) {
835 fprintf(stderr, "\nAvailable events:"); 518 fprintf(stderr, "\nAvailable events:");
836 519
837 list_for_each_entry(sym_evsel, &evsel_list, node) 520 list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
838 fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel)); 521 fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
839 522
840 prompt_integer(&sym_counter, "Enter details event counter"); 523 prompt_integer(&top.sym_counter, "Enter details event counter");
841 524
842 if (sym_counter >= nr_counters) { 525 if (top.sym_counter >= top.evlist->nr_entries) {
843 sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node); 526 top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
844 sym_counter = 0; 527 top.sym_counter = 0;
845 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel)); 528 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
846 sleep(1); 529 sleep(1);
847 break; 530 break;
848 } 531 }
849 list_for_each_entry(sym_evsel, &evsel_list, node) 532 list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
850 if (sym_evsel->idx == sym_counter) 533 if (top.sym_evsel->idx == top.sym_counter)
851 break; 534 break;
852 } else sym_counter = 0; 535 } else top.sym_counter = 0;
853 break; 536 break;
854 case 'f': 537 case 'f':
855 prompt_integer(&count_filter, "Enter display event count filter"); 538 prompt_integer(&top.count_filter, "Enter display event count filter");
856 break; 539 break;
857 case 'F': 540 case 'F':
858 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 541 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
859 break; 542 break;
860 case 'K': 543 case 'K':
861 hide_kernel_symbols = !hide_kernel_symbols; 544 top.hide_kernel_symbols = !top.hide_kernel_symbols;
862 break; 545 break;
863 case 'q': 546 case 'q':
864 case 'Q': 547 case 'Q':
@@ -867,34 +550,50 @@ static void handle_keypress(struct perf_session *session, int c)
867 perf_session__fprintf_dsos(session, stderr); 550 perf_session__fprintf_dsos(session, stderr);
868 exit(0); 551 exit(0);
869 case 's': 552 case 's':
870 prompt_symbol(&sym_filter_entry, "Enter details symbol"); 553 prompt_symbol(&top.sym_filter_entry, "Enter details symbol");
871 break; 554 break;
872 case 'S': 555 case 'S':
873 if (!sym_filter_entry) 556 if (!top.sym_filter_entry)
874 break; 557 break;
875 else { 558 else {
876 struct sym_entry *syme = sym_filter_entry; 559 struct sym_entry *syme = top.sym_filter_entry;
877 560
878 pthread_mutex_lock(&syme->src->lock); 561 top.sym_filter_entry = NULL;
879 sym_filter_entry = NULL;
880 __zero_source_counters(syme); 562 __zero_source_counters(syme);
881 pthread_mutex_unlock(&syme->src->lock);
882 } 563 }
883 break; 564 break;
884 case 'U': 565 case 'U':
885 hide_user_symbols = !hide_user_symbols; 566 top.hide_user_symbols = !top.hide_user_symbols;
886 break; 567 break;
887 case 'w': 568 case 'w':
888 display_weighted = ~display_weighted; 569 top.display_weighted = ~top.display_weighted;
889 break; 570 break;
890 case 'z': 571 case 'z':
891 zero = !zero; 572 top.zero = !top.zero;
892 break; 573 break;
893 default: 574 default:
894 break; 575 break;
895 } 576 }
896} 577}
897 578
579static void *display_thread_tui(void *arg __used)
580{
581 int err = 0;
582 pthread_mutex_lock(&top.active_symbols_lock);
583 while (list_empty(&top.active_symbols)) {
584 err = pthread_cond_wait(&top.active_symbols_cond,
585 &top.active_symbols_lock);
586 if (err)
587 break;
588 }
589 pthread_mutex_unlock(&top.active_symbols_lock);
590 if (!err)
591 perf_top__tui_browser(&top);
592 exit_browser(0);
593 exit(0);
594 return NULL;
595}
596
898static void *display_thread(void *arg __used) 597static void *display_thread(void *arg __used)
899{ 598{
900 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 599 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
@@ -909,13 +608,13 @@ static void *display_thread(void *arg __used)
909 tc.c_cc[VTIME] = 0; 608 tc.c_cc[VTIME] = 0;
910 609
911repeat: 610repeat:
912 delay_msecs = delay_secs * 1000; 611 delay_msecs = top.delay_secs * 1000;
913 tcsetattr(0, TCSANOW, &tc); 612 tcsetattr(0, TCSANOW, &tc);
914 /* trash return*/ 613 /* trash return*/
915 getc(stdin); 614 getc(stdin);
916 615
917 do { 616 do {
918 print_sym_table(); 617 print_sym_table(session);
919 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 618 } while (!poll(&stdin_poll, 1, delay_msecs) == 1);
920 619
921 c = getc(stdin); 620 c = getc(stdin);
@@ -930,6 +629,7 @@ repeat:
930/* Tag samples to be skipped. */ 629/* Tag samples to be skipped. */
931static const char *skip_symbols[] = { 630static const char *skip_symbols[] = {
932 "default_idle", 631 "default_idle",
632 "native_safe_halt",
933 "cpu_idle", 633 "cpu_idle",
934 "enter_idle", 634 "enter_idle",
935 "exit_idle", 635 "exit_idle",
@@ -965,9 +665,9 @@ static int symbol_filter(struct map *map, struct symbol *sym)
965 665
966 syme = symbol__priv(sym); 666 syme = symbol__priv(sym);
967 syme->map = map; 667 syme->map = map;
968 syme->src = NULL; 668 symbol__annotate_init(map, sym);
969 669
970 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { 670 if (!top.sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
971 /* schedule initial sym_filter_entry setup */ 671 /* schedule initial sym_filter_entry setup */
972 sym_filter_entry_sched = syme; 672 sym_filter_entry_sched = syme;
973 sym_filter = NULL; 673 sym_filter = NULL;
@@ -980,44 +680,40 @@ static int symbol_filter(struct map *map, struct symbol *sym)
980 } 680 }
981 } 681 }
982 682
983 if (!syme->skip)
984 syme->name_len = strlen(sym->name);
985
986 return 0; 683 return 0;
987} 684}
988 685
989static void event__process_sample(const event_t *self, 686static void perf_event__process_sample(const union perf_event *event,
990 struct sample_data *sample, 687 struct perf_sample *sample,
991 struct perf_session *session, 688 struct perf_session *session)
992 struct perf_evsel *evsel)
993{ 689{
994 u64 ip = self->ip.ip; 690 u64 ip = event->ip.ip;
995 struct sym_entry *syme; 691 struct sym_entry *syme;
996 struct addr_location al; 692 struct addr_location al;
997 struct machine *machine; 693 struct machine *machine;
998 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 694 u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
999 695
1000 ++samples; 696 ++top.samples;
1001 697
1002 switch (origin) { 698 switch (origin) {
1003 case PERF_RECORD_MISC_USER: 699 case PERF_RECORD_MISC_USER:
1004 ++us_samples; 700 ++top.us_samples;
1005 if (hide_user_symbols) 701 if (top.hide_user_symbols)
1006 return; 702 return;
1007 machine = perf_session__find_host_machine(session); 703 machine = perf_session__find_host_machine(session);
1008 break; 704 break;
1009 case PERF_RECORD_MISC_KERNEL: 705 case PERF_RECORD_MISC_KERNEL:
1010 ++kernel_samples; 706 ++top.kernel_samples;
1011 if (hide_kernel_symbols) 707 if (top.hide_kernel_symbols)
1012 return; 708 return;
1013 machine = perf_session__find_host_machine(session); 709 machine = perf_session__find_host_machine(session);
1014 break; 710 break;
1015 case PERF_RECORD_MISC_GUEST_KERNEL: 711 case PERF_RECORD_MISC_GUEST_KERNEL:
1016 ++guest_kernel_samples; 712 ++top.guest_kernel_samples;
1017 machine = perf_session__find_machine(session, self->ip.pid); 713 machine = perf_session__find_machine(session, event->ip.pid);
1018 break; 714 break;
1019 case PERF_RECORD_MISC_GUEST_USER: 715 case PERF_RECORD_MISC_GUEST_USER:
1020 ++guest_us_samples; 716 ++top.guest_us_samples;
1021 /* 717 /*
1022 * TODO: we don't process guest user from host side 718 * TODO: we don't process guest user from host side
1023 * except simple counting. 719 * except simple counting.
@@ -1029,15 +725,15 @@ static void event__process_sample(const event_t *self,
1029 725
1030 if (!machine && perf_guest) { 726 if (!machine && perf_guest) {
1031 pr_err("Can't find guest [%d]'s kernel information\n", 727 pr_err("Can't find guest [%d]'s kernel information\n",
1032 self->ip.pid); 728 event->ip.pid);
1033 return; 729 return;
1034 } 730 }
1035 731
1036 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) 732 if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
1037 exact_samples++; 733 top.exact_samples++;
1038 734
1039 if (event__preprocess_sample(self, session, &al, sample, 735 if (perf_event__preprocess_sample(event, session, &al, sample,
1040 symbol_filter) < 0 || 736 symbol_filter) < 0 ||
1041 al.filtered) 737 al.filtered)
1042 return; 738 return;
1043 739
@@ -1055,8 +751,9 @@ static void event__process_sample(const event_t *self,
1055 */ 751 */
1056 if (al.map == machine->vmlinux_maps[MAP__FUNCTION] && 752 if (al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
1057 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { 753 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
1058 pr_err("The %s file can't be used\n", 754 ui__warning("The %s file can't be used\n",
1059 symbol_conf.vmlinux_name); 755 symbol_conf.vmlinux_name);
756 exit_browser(0);
1060 exit(1); 757 exit(1);
1061 } 758 }
1062 759
@@ -1065,13 +762,13 @@ static void event__process_sample(const event_t *self,
1065 762
1066 /* let's see, whether we need to install initial sym_filter_entry */ 763 /* let's see, whether we need to install initial sym_filter_entry */
1067 if (sym_filter_entry_sched) { 764 if (sym_filter_entry_sched) {
1068 sym_filter_entry = sym_filter_entry_sched; 765 top.sym_filter_entry = sym_filter_entry_sched;
1069 sym_filter_entry_sched = NULL; 766 sym_filter_entry_sched = NULL;
1070 if (parse_source(sym_filter_entry) < 0) { 767 if (parse_source(top.sym_filter_entry) < 0) {
1071 struct symbol *sym = sym_entry__symbol(sym_filter_entry); 768 struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
1072 769
1073 pr_err("Can't annotate %s", sym->name); 770 pr_err("Can't annotate %s", sym->name);
1074 if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) { 771 if (top.sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) {
1075 pr_err(": No vmlinux file was found in the path:\n"); 772 pr_err(": No vmlinux file was found in the path:\n");
1076 machine__fprintf_vmlinux_path(machine, stderr); 773 machine__fprintf_vmlinux_path(machine, stderr);
1077 } else 774 } else
@@ -1082,166 +779,73 @@ static void event__process_sample(const event_t *self,
1082 779
1083 syme = symbol__priv(al.sym); 780 syme = symbol__priv(al.sym);
1084 if (!syme->skip) { 781 if (!syme->skip) {
1085 syme->count[evsel->idx]++; 782 struct perf_evsel *evsel;
783
1086 syme->origin = origin; 784 syme->origin = origin;
785 evsel = perf_evlist__id2evsel(top.evlist, sample->id);
786 assert(evsel != NULL);
787 syme->count[evsel->idx]++;
1087 record_precise_ip(syme, evsel->idx, ip); 788 record_precise_ip(syme, evsel->idx, ip);
1088 pthread_mutex_lock(&active_symbols_lock); 789 pthread_mutex_lock(&top.active_symbols_lock);
1089 if (list_empty(&syme->node) || !syme->node.next) 790 if (list_empty(&syme->node) || !syme->node.next) {
791 static bool first = true;
1090 __list_insert_active_sym(syme); 792 __list_insert_active_sym(syme);
1091 pthread_mutex_unlock(&active_symbols_lock); 793 if (first) {
794 pthread_cond_broadcast(&top.active_symbols_cond);
795 first = false;
796 }
797 }
798 pthread_mutex_unlock(&top.active_symbols_lock);
1092 } 799 }
1093} 800}
1094 801
1095struct mmap_data { 802static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu)
1096 void *base;
1097 int mask;
1098 unsigned int prev;
1099};
1100
1101static int perf_evsel__alloc_mmap_per_thread(struct perf_evsel *evsel,
1102 int ncpus, int nthreads)
1103{
1104 evsel->priv = xyarray__new(ncpus, nthreads, sizeof(struct mmap_data));
1105 return evsel->priv != NULL ? 0 : -ENOMEM;
1106}
1107
1108static void perf_evsel__free_mmap(struct perf_evsel *evsel)
1109{
1110 xyarray__delete(evsel->priv);
1111 evsel->priv = NULL;
1112}
1113
1114static unsigned int mmap_read_head(struct mmap_data *md)
1115{
1116 struct perf_event_mmap_page *pc = md->base;
1117 int head;
1118
1119 head = pc->data_head;
1120 rmb();
1121
1122 return head;
1123}
1124
1125static void perf_session__mmap_read_counter(struct perf_session *self,
1126 struct perf_evsel *evsel,
1127 int cpu, int thread_idx)
1128{ 803{
1129 struct xyarray *mmap_array = evsel->priv; 804 struct perf_sample sample;
1130 struct mmap_data *md = xyarray__entry(mmap_array, cpu, thread_idx); 805 union perf_event *event;
1131 unsigned int head = mmap_read_head(md);
1132 unsigned int old = md->prev;
1133 unsigned char *data = md->base + page_size;
1134 struct sample_data sample;
1135 int diff;
1136
1137 /*
1138 * If we're further behind than half the buffer, there's a chance
1139 * the writer will bite our tail and mess up the samples under us.
1140 *
1141 * If we somehow ended up ahead of the head, we got messed up.
1142 *
1143 * In either case, truncate and restart at head.
1144 */
1145 diff = head - old;
1146 if (diff > md->mask / 2 || diff < 0) {
1147 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
1148
1149 /*
1150 * head points to a known good entry, start there.
1151 */
1152 old = head;
1153 }
1154
1155 for (; old != head;) {
1156 event_t *event = (event_t *)&data[old & md->mask];
1157
1158 event_t event_copy;
1159 806
1160 size_t size = event->header.size; 807 while ((event = perf_evlist__read_on_cpu(top.evlist, cpu)) != NULL) {
808 perf_session__parse_sample(self, event, &sample);
1161 809
1162 /*
1163 * Event straddles the mmap boundary -- header should always
1164 * be inside due to u64 alignment of output.
1165 */
1166 if ((old & md->mask) + size != ((old + size) & md->mask)) {
1167 unsigned int offset = old;
1168 unsigned int len = min(sizeof(*event), size), cpy;
1169 void *dst = &event_copy;
1170
1171 do {
1172 cpy = min(md->mask + 1 - (offset & md->mask), len);
1173 memcpy(dst, &data[offset & md->mask], cpy);
1174 offset += cpy;
1175 dst += cpy;
1176 len -= cpy;
1177 } while (len);
1178
1179 event = &event_copy;
1180 }
1181
1182 event__parse_sample(event, self, &sample);
1183 if (event->header.type == PERF_RECORD_SAMPLE) 810 if (event->header.type == PERF_RECORD_SAMPLE)
1184 event__process_sample(event, &sample, self, evsel); 811 perf_event__process_sample(event, &sample, self);
1185 else 812 else
1186 event__process(event, &sample, self); 813 perf_event__process(event, &sample, self);
1187 old += size;
1188 } 814 }
1189
1190 md->prev = old;
1191} 815}
1192 816
1193static struct pollfd *event_array;
1194
1195static void perf_session__mmap_read(struct perf_session *self) 817static void perf_session__mmap_read(struct perf_session *self)
1196{ 818{
1197 struct perf_evsel *counter; 819 int i;
1198 int i, thread_index;
1199
1200 for (i = 0; i < cpus->nr; i++) {
1201 list_for_each_entry(counter, &evsel_list, node) {
1202 for (thread_index = 0;
1203 thread_index < threads->nr;
1204 thread_index++) {
1205 perf_session__mmap_read_counter(self,
1206 counter, i, thread_index);
1207 }
1208 }
1209 }
1210}
1211 820
1212int nr_poll; 821 for (i = 0; i < top.evlist->cpus->nr; i++)
1213int group_fd; 822 perf_session__mmap_read_cpu(self, i);
823}
1214 824
1215static void start_counter(int i, struct perf_evsel *evsel) 825static void start_counters(struct perf_evlist *evlist)
1216{ 826{
1217 struct xyarray *mmap_array = evsel->priv; 827 struct perf_evsel *counter;
1218 struct mmap_data *mm;
1219 struct perf_event_attr *attr;
1220 int cpu = -1;
1221 int thread_index;
1222
1223 if (target_tid == -1)
1224 cpu = cpus->map[i];
1225 828
1226 attr = &evsel->attr; 829 list_for_each_entry(counter, &evlist->entries, node) {
830 struct perf_event_attr *attr = &counter->attr;
1227 831
1228 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 832 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
1229 833
1230 if (freq) { 834 if (top.freq) {
1231 attr->sample_type |= PERF_SAMPLE_PERIOD; 835 attr->sample_type |= PERF_SAMPLE_PERIOD;
1232 attr->freq = 1; 836 attr->freq = 1;
1233 attr->sample_freq = freq; 837 attr->sample_freq = top.freq;
1234 } 838 }
1235 839
1236 attr->inherit = (cpu < 0) && inherit; 840 if (evlist->nr_entries > 1) {
1237 attr->mmap = 1; 841 attr->sample_type |= PERF_SAMPLE_ID;
842 attr->read_format |= PERF_FORMAT_ID;
843 }
1238 844
1239 for (thread_index = 0; thread_index < threads->nr; thread_index++) { 845 attr->mmap = 1;
1240try_again: 846try_again:
1241 FD(evsel, i, thread_index) = sys_perf_event_open(attr, 847 if (perf_evsel__open(counter, top.evlist->cpus,
1242 threads->map[thread_index], cpu, group_fd, 0); 848 top.evlist->threads, group, inherit) < 0) {
1243
1244 if (FD(evsel, i, thread_index) < 0) {
1245 int err = errno; 849 int err = errno;
1246 850
1247 if (err == EPERM || err == EACCES) 851 if (err == EPERM || err == EACCES)
@@ -1253,8 +857,8 @@ try_again:
1253 * based cpu-clock-tick sw counter, which 857 * based cpu-clock-tick sw counter, which
1254 * is always available even if no PMU support: 858 * is always available even if no PMU support:
1255 */ 859 */
1256 if (attr->type == PERF_TYPE_HARDWARE 860 if (attr->type == PERF_TYPE_HARDWARE &&
1257 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 861 attr->config == PERF_COUNT_HW_CPU_CYCLES) {
1258 862
1259 if (verbose) 863 if (verbose)
1260 warning(" ... trying to fall back to cpu-clock-ticks\n"); 864 warning(" ... trying to fall back to cpu-clock-ticks\n");
@@ -1264,39 +868,22 @@ try_again:
1264 goto try_again; 868 goto try_again;
1265 } 869 }
1266 printf("\n"); 870 printf("\n");
1267 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", 871 error("sys_perf_event_open() syscall returned with %d "
1268 FD(evsel, i, thread_index), strerror(err)); 872 "(%s). /bin/dmesg may provide additional information.\n",
873 err, strerror(err));
1269 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 874 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
1270 exit(-1); 875 exit(-1);
1271 } 876 }
1272 assert(FD(evsel, i, thread_index) >= 0);
1273 fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK);
1274
1275 /*
1276 * First counter acts as the group leader:
1277 */
1278 if (group && group_fd == -1)
1279 group_fd = FD(evsel, i, thread_index);
1280
1281 event_array[nr_poll].fd = FD(evsel, i, thread_index);
1282 event_array[nr_poll].events = POLLIN;
1283 nr_poll++;
1284
1285 mm = xyarray__entry(mmap_array, i, thread_index);
1286 mm->prev = 0;
1287 mm->mask = mmap_pages*page_size - 1;
1288 mm->base = mmap(NULL, (mmap_pages+1)*page_size,
1289 PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0);
1290 if (mm->base == MAP_FAILED)
1291 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1292 } 877 }
878
879 if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
880 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1293} 881}
1294 882
1295static int __cmd_top(void) 883static int __cmd_top(void)
1296{ 884{
1297 pthread_t thread; 885 pthread_t thread;
1298 struct perf_evsel *counter; 886 int ret __used;
1299 int i, ret;
1300 /* 887 /*
1301 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this 888 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
1302 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. 889 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
@@ -1305,23 +892,23 @@ static int __cmd_top(void)
1305 if (session == NULL) 892 if (session == NULL)
1306 return -ENOMEM; 893 return -ENOMEM;
1307 894
1308 if (target_tid != -1) 895 if (top.target_tid != -1)
1309 event__synthesize_thread_map(threads, event__process, session); 896 perf_event__synthesize_thread_map(top.evlist->threads,
897 perf_event__process, session);
1310 else 898 else
1311 event__synthesize_threads(event__process, session); 899 perf_event__synthesize_threads(perf_event__process, session);
1312 900
1313 for (i = 0; i < cpus->nr; i++) { 901 start_counters(top.evlist);
1314 group_fd = -1; 902 session->evlist = top.evlist;
1315 list_for_each_entry(counter, &evsel_list, node) 903 perf_session__update_sample_type(session);
1316 start_counter(i, counter);
1317 }
1318 904
1319 /* Wait for a minimal set of events before starting the snapshot */ 905 /* Wait for a minimal set of events before starting the snapshot */
1320 poll(&event_array[0], nr_poll, 100); 906 poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
1321 907
1322 perf_session__mmap_read(session); 908 perf_session__mmap_read(session);
1323 909
1324 if (pthread_create(&thread, NULL, display_thread, session)) { 910 if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
911 display_thread), session)) {
1325 printf("Could not create display thread.\n"); 912 printf("Could not create display thread.\n");
1326 exit(-1); 913 exit(-1);
1327 } 914 }
@@ -1337,12 +924,12 @@ static int __cmd_top(void)
1337 } 924 }
1338 925
1339 while (1) { 926 while (1) {
1340 int hits = samples; 927 u64 hits = top.samples;
1341 928
1342 perf_session__mmap_read(session); 929 perf_session__mmap_read(session);
1343 930
1344 if (hits == samples) 931 if (hits == top.samples)
1345 ret = poll(event_array, nr_poll, 100); 932 ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
1346 } 933 }
1347 934
1348 return 0; 935 return 0;
@@ -1354,31 +941,31 @@ static const char * const top_usage[] = {
1354}; 941};
1355 942
1356static const struct option options[] = { 943static const struct option options[] = {
1357 OPT_CALLBACK('e', "event", NULL, "event", 944 OPT_CALLBACK('e', "event", &top.evlist, "event",
1358 "event selector. use 'perf list' to list available events", 945 "event selector. use 'perf list' to list available events",
1359 parse_events), 946 parse_events),
1360 OPT_INTEGER('c', "count", &default_interval, 947 OPT_INTEGER('c', "count", &default_interval,
1361 "event period to sample"), 948 "event period to sample"),
1362 OPT_INTEGER('p', "pid", &target_pid, 949 OPT_INTEGER('p', "pid", &top.target_pid,
1363 "profile events on existing process id"), 950 "profile events on existing process id"),
1364 OPT_INTEGER('t', "tid", &target_tid, 951 OPT_INTEGER('t', "tid", &top.target_tid,
1365 "profile events on existing thread id"), 952 "profile events on existing thread id"),
1366 OPT_BOOLEAN('a', "all-cpus", &system_wide, 953 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1367 "system-wide collection from all CPUs"), 954 "system-wide collection from all CPUs"),
1368 OPT_STRING('C', "cpu", &cpu_list, "cpu", 955 OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
1369 "list of cpus to monitor"), 956 "list of cpus to monitor"),
1370 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 957 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1371 "file", "vmlinux pathname"), 958 "file", "vmlinux pathname"),
1372 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, 959 OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1373 "hide kernel symbols"), 960 "hide kernel symbols"),
1374 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 961 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
1375 OPT_INTEGER('r', "realtime", &realtime_prio, 962 OPT_INTEGER('r', "realtime", &realtime_prio,
1376 "collect data with this RT SCHED_FIFO priority"), 963 "collect data with this RT SCHED_FIFO priority"),
1377 OPT_INTEGER('d', "delay", &delay_secs, 964 OPT_INTEGER('d', "delay", &top.delay_secs,
1378 "number of seconds to delay between refreshes"), 965 "number of seconds to delay between refreshes"),
1379 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, 966 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
1380 "dump the symbol table used for profiling"), 967 "dump the symbol table used for profiling"),
1381 OPT_INTEGER('f', "count-filter", &count_filter, 968 OPT_INTEGER('f', "count-filter", &top.count_filter,
1382 "only display functions with more events than this"), 969 "only display functions with more events than this"),
1383 OPT_BOOLEAN('g', "group", &group, 970 OPT_BOOLEAN('g', "group", &group,
1384 "put the counters into a counter group"), 971 "put the counters into a counter group"),
@@ -1386,14 +973,16 @@ static const struct option options[] = {
1386 "child tasks inherit counters"), 973 "child tasks inherit counters"),
1387 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", 974 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
1388 "symbol to annotate"), 975 "symbol to annotate"),
1389 OPT_BOOLEAN('z', "zero", &zero, 976 OPT_BOOLEAN('z', "zero", &top.zero,
1390 "zero history across updates"), 977 "zero history across updates"),
1391 OPT_INTEGER('F', "freq", &freq, 978 OPT_INTEGER('F', "freq", &top.freq,
1392 "profile at this frequency"), 979 "profile at this frequency"),
1393 OPT_INTEGER('E', "entries", &print_entries, 980 OPT_INTEGER('E', "entries", &top.print_entries,
1394 "display this many functions"), 981 "display this many functions"),
1395 OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols, 982 OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1396 "hide user symbols"), 983 "hide user symbols"),
984 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
985 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
1397 OPT_INCR('v', "verbose", &verbose, 986 OPT_INCR('v', "verbose", &verbose,
1398 "be more verbose (show counter open errors, etc)"), 987 "be more verbose (show counter open errors, etc)"),
1399 OPT_END() 988 OPT_END()
@@ -1404,64 +993,68 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1404 struct perf_evsel *pos; 993 struct perf_evsel *pos;
1405 int status = -ENOMEM; 994 int status = -ENOMEM;
1406 995
996 top.evlist = perf_evlist__new(NULL, NULL);
997 if (top.evlist == NULL)
998 return -ENOMEM;
999
1407 page_size = sysconf(_SC_PAGE_SIZE); 1000 page_size = sysconf(_SC_PAGE_SIZE);
1408 1001
1409 argc = parse_options(argc, argv, options, top_usage, 0); 1002 argc = parse_options(argc, argv, options, top_usage, 0);
1410 if (argc) 1003 if (argc)
1411 usage_with_options(top_usage, options); 1004 usage_with_options(top_usage, options);
1412 1005
1413 if (target_pid != -1) 1006 /*
1414 target_tid = target_pid; 1007 * XXX For now start disabled, only using TUI if explicitely asked for.
1008 * Change that when handle_keys equivalent gets written, live annotation
1009 * done, etc.
1010 */
1011 use_browser = 0;
1415 1012
1416 threads = thread_map__new(target_pid, target_tid); 1013 if (use_stdio)
1417 if (threads == NULL) { 1014 use_browser = 0;
1418 pr_err("Problems finding threads of monitor\n"); 1015 else if (use_tui)
1419 usage_with_options(top_usage, options); 1016 use_browser = 1;
1420 }
1421 1017
1422 event_array = malloc((sizeof(struct pollfd) * 1018 setup_browser(false);
1423 MAX_NR_CPUS * MAX_COUNTERS * threads->nr));
1424 if (!event_array)
1425 return -ENOMEM;
1426 1019
1427 /* CPU and PID are mutually exclusive */ 1020 /* CPU and PID are mutually exclusive */
1428 if (target_tid > 0 && cpu_list) { 1021 if (top.target_tid > 0 && top.cpu_list) {
1429 printf("WARNING: PID switch overriding CPU\n"); 1022 printf("WARNING: PID switch overriding CPU\n");
1430 sleep(1); 1023 sleep(1);
1431 cpu_list = NULL; 1024 top.cpu_list = NULL;
1432 } 1025 }
1433 1026
1434 if (!nr_counters && perf_evsel_list__create_default() < 0) { 1027 if (top.target_pid != -1)
1028 top.target_tid = top.target_pid;
1029
1030 if (perf_evlist__create_maps(top.evlist, top.target_pid,
1031 top.target_tid, top.cpu_list) < 0)
1032 usage_with_options(top_usage, options);
1033
1034 if (!top.evlist->nr_entries &&
1035 perf_evlist__add_default(top.evlist) < 0) {
1435 pr_err("Not enough memory for event selector list\n"); 1036 pr_err("Not enough memory for event selector list\n");
1436 return -ENOMEM; 1037 return -ENOMEM;
1437 } 1038 }
1438 1039
1439 if (delay_secs < 1) 1040 if (top.delay_secs < 1)
1440 delay_secs = 1; 1041 top.delay_secs = 1;
1441 1042
1442 /* 1043 /*
1443 * User specified count overrides default frequency. 1044 * User specified count overrides default frequency.
1444 */ 1045 */
1445 if (default_interval) 1046 if (default_interval)
1446 freq = 0; 1047 top.freq = 0;
1447 else if (freq) { 1048 else if (top.freq) {
1448 default_interval = freq; 1049 default_interval = top.freq;
1449 } else { 1050 } else {
1450 fprintf(stderr, "frequency and count are zero, aborting\n"); 1051 fprintf(stderr, "frequency and count are zero, aborting\n");
1451 exit(EXIT_FAILURE); 1052 exit(EXIT_FAILURE);
1452 } 1053 }
1453 1054
1454 if (target_tid != -1) 1055 list_for_each_entry(pos, &top.evlist->entries, node) {
1455 cpus = cpu_map__dummy_new(); 1056 if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
1456 else 1057 top.evlist->threads->nr) < 0)
1457 cpus = cpu_map__new(cpu_list);
1458
1459 if (cpus == NULL)
1460 usage_with_options(top_usage, options);
1461
1462 list_for_each_entry(pos, &evsel_list, node) {
1463 if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 ||
1464 perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
1465 goto out_free_fd; 1058 goto out_free_fd;
1466 /* 1059 /*
1467 * Fill in the ones not specifically initialized via -c: 1060 * Fill in the ones not specifically initialized via -c:
@@ -1472,26 +1065,28 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1472 pos->attr.sample_period = default_interval; 1065 pos->attr.sample_period = default_interval;
1473 } 1066 }
1474 1067
1475 sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node); 1068 if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
1069 perf_evlist__alloc_mmap(top.evlist) < 0)
1070 goto out_free_fd;
1071
1072 top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1476 1073
1477 symbol_conf.priv_size = (sizeof(struct sym_entry) + 1074 symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) +
1478 (nr_counters + 1) * sizeof(unsigned long)); 1075 (top.evlist->nr_entries + 1) * sizeof(unsigned long));
1479 1076
1480 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); 1077 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1481 if (symbol__init() < 0) 1078 if (symbol__init() < 0)
1482 return -1; 1079 return -1;
1483 1080
1484 get_term_dimensions(&winsize); 1081 get_term_dimensions(&winsize);
1485 if (print_entries == 0) { 1082 if (top.print_entries == 0) {
1486 update_print_entries(&winsize); 1083 update_print_entries(&winsize);
1487 signal(SIGWINCH, sig_winch_handler); 1084 signal(SIGWINCH, sig_winch_handler);
1488 } 1085 }
1489 1086
1490 status = __cmd_top(); 1087 status = __cmd_top();
1491out_free_fd: 1088out_free_fd:
1492 list_for_each_entry(pos, &evsel_list, node) 1089 perf_evlist__delete(top.evlist);
1493 perf_evsel__free_mmap(pos);
1494 perf_evsel_list__delete();
1495 1090
1496 return status; 1091 return status;
1497} 1092}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 95aaf565c704..a5fc660c1f12 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -94,6 +94,32 @@ void get_term_dimensions(struct winsize *ws);
94#include "util/types.h" 94#include "util/types.h"
95#include <stdbool.h> 95#include <stdbool.h>
96 96
97struct perf_mmap {
98 void *base;
99 int mask;
100 unsigned int prev;
101};
102
103static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
104{
105 struct perf_event_mmap_page *pc = mm->base;
106 int head = pc->data_head;
107 rmb();
108 return head;
109}
110
111static inline void perf_mmap__write_tail(struct perf_mmap *md,
112 unsigned long tail)
113{
114 struct perf_event_mmap_page *pc = md->base;
115
116 /*
117 * ensure all reads are done before we write the tail out.
118 */
119 /* mb(); */
120 pc->data_tail = tail;
121}
122
97/* 123/*
98 * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all 124 * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
99 * counters in the current task. 125 * counters in the current task.
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
new file mode 100755
index 000000000000..df638c438a9f
--- /dev/null
+++ b/tools/perf/python/twatch.py
@@ -0,0 +1,41 @@
1#! /usr/bin/python
2# -*- python -*-
3# -*- coding: utf-8 -*-
4# twatch - Experimental use of the perf python interface
5# Copyright (C) 2011 Arnaldo Carvalho de Melo <acme@redhat.com>
6#
7# This application is free software; you can redistribute it and/or
8# modify it under the terms of the GNU General Public License
9# as published by the Free Software Foundation; version 2.
10#
11# This application is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14# General Public License for more details.
15
16import perf
17
18def main():
19 cpus = perf.cpu_map()
20 threads = perf.thread_map()
21 evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
22 wakeup_events = 1, sample_period = 1,
23 sample_id_all = 1,
24 sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID)
25 evsel.open(cpus = cpus, threads = threads);
26 evlist = perf.evlist(cpus, threads)
27 evlist.add(evsel)
28 evlist.mmap()
29 while True:
30 evlist.poll(timeout = -1)
31 for cpu in cpus:
32 event = evlist.read_on_cpu(cpu)
33 if not event:
34 continue
35 print "cpu: %2d, pid: %4d, tid: %4d" % (event.sample_cpu,
36 event.sample_pid,
37 event.sample_tid),
38 print event
39
40if __name__ == '__main__':
41 main()
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
new file mode 100644
index 000000000000..0d0830c98cd7
--- /dev/null
+++ b/tools/perf/util/annotate.c
@@ -0,0 +1,605 @@
1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from builtin-annotate.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9
10#include "util.h"
11#include "build-id.h"
12#include "color.h"
13#include "cache.h"
14#include "symbol.h"
15#include "debug.h"
16#include "annotate.h"
17#include <pthread.h>
18
19int symbol__annotate_init(struct map *map __used, struct symbol *sym)
20{
21 struct annotation *notes = symbol__annotation(sym);
22 pthread_mutex_init(&notes->lock, NULL);
23 return 0;
24}
25
26int symbol__alloc_hist(struct symbol *sym, int nevents)
27{
28 struct annotation *notes = symbol__annotation(sym);
29 size_t sizeof_sym_hist = (sizeof(struct sym_hist) +
30 (sym->end - sym->start) * sizeof(u64));
31
32 notes->src = zalloc(sizeof(*notes->src) + nevents * sizeof_sym_hist);
33 if (notes->src == NULL)
34 return -1;
35 notes->src->sizeof_sym_hist = sizeof_sym_hist;
36 notes->src->nr_histograms = nevents;
37 INIT_LIST_HEAD(&notes->src->source);
38 return 0;
39}
40
41void symbol__annotate_zero_histograms(struct symbol *sym)
42{
43 struct annotation *notes = symbol__annotation(sym);
44
45 pthread_mutex_lock(&notes->lock);
46 if (notes->src != NULL)
47 memset(notes->src->histograms, 0,
48 notes->src->nr_histograms * notes->src->sizeof_sym_hist);
49 pthread_mutex_unlock(&notes->lock);
50}
51
52int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
53 int evidx, u64 addr)
54{
55 unsigned offset;
56 struct annotation *notes;
57 struct sym_hist *h;
58
59 notes = symbol__annotation(sym);
60 if (notes->src == NULL)
61 return -ENOMEM;
62
63 pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr));
64
65 if (addr >= sym->end)
66 return 0;
67
68 offset = addr - sym->start;
69 h = annotation__histogram(notes, evidx);
70 h->sum++;
71 h->addr[offset]++;
72
73 pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
74 ", evidx=%d] => %" PRIu64 "\n", sym->start, sym->name,
75 addr, addr - sym->start, evidx, h->addr[offset]);
76 return 0;
77}
78
79static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize)
80{
81 struct objdump_line *self = malloc(sizeof(*self) + privsize);
82
83 if (self != NULL) {
84 self->offset = offset;
85 self->line = line;
86 }
87
88 return self;
89}
90
91void objdump_line__free(struct objdump_line *self)
92{
93 free(self->line);
94 free(self);
95}
96
97static void objdump__add_line(struct list_head *head, struct objdump_line *line)
98{
99 list_add_tail(&line->node, head);
100}
101
102struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
103 struct objdump_line *pos)
104{
105 list_for_each_entry_continue(pos, head, node)
106 if (pos->offset >= 0)
107 return pos;
108
109 return NULL;
110}
111
112static int objdump_line__print(struct objdump_line *oline, struct symbol *sym,
113 int evidx, u64 len, int min_pcnt,
114 int printed, int max_lines,
115 struct objdump_line *queue)
116{
117 static const char *prev_line;
118 static const char *prev_color;
119
120 if (oline->offset != -1) {
121 const char *path = NULL;
122 unsigned int hits = 0;
123 double percent = 0.0;
124 const char *color;
125 struct annotation *notes = symbol__annotation(sym);
126 struct source_line *src_line = notes->src->lines;
127 struct sym_hist *h = annotation__histogram(notes, evidx);
128 s64 offset = oline->offset;
129 struct objdump_line *next;
130
131 next = objdump__get_next_ip_line(&notes->src->source, oline);
132
133 while (offset < (s64)len &&
134 (next == NULL || offset < next->offset)) {
135 if (src_line) {
136 if (path == NULL)
137 path = src_line[offset].path;
138 percent += src_line[offset].percent;
139 } else
140 hits += h->addr[offset];
141
142 ++offset;
143 }
144
145 if (src_line == NULL && h->sum)
146 percent = 100.0 * hits / h->sum;
147
148 if (percent < min_pcnt)
149 return -1;
150
151 if (max_lines && printed >= max_lines)
152 return 1;
153
154 if (queue != NULL) {
155 list_for_each_entry_from(queue, &notes->src->source, node) {
156 if (queue == oline)
157 break;
158 objdump_line__print(queue, sym, evidx, len,
159 0, 0, 1, NULL);
160 }
161 }
162
163 color = get_percent_color(percent);
164
165 /*
166 * Also color the filename and line if needed, with
167 * the same color than the percentage. Don't print it
168 * twice for close colored addr with the same filename:line
169 */
170 if (path) {
171 if (!prev_line || strcmp(prev_line, path)
172 || color != prev_color) {
173 color_fprintf(stdout, color, " %s", path);
174 prev_line = path;
175 prev_color = color;
176 }
177 }
178
179 color_fprintf(stdout, color, " %7.2f", percent);
180 printf(" : ");
181 color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", oline->line);
182 } else if (max_lines && printed >= max_lines)
183 return 1;
184 else {
185 if (queue)
186 return -1;
187
188 if (!*oline->line)
189 printf(" :\n");
190 else
191 printf(" : %s\n", oline->line);
192 }
193
194 return 0;
195}
196
197static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
198 FILE *file, size_t privsize)
199{
200 struct annotation *notes = symbol__annotation(sym);
201 struct objdump_line *objdump_line;
202 char *line = NULL, *tmp, *tmp2, *c;
203 size_t line_len;
204 s64 line_ip, offset = -1;
205
206 if (getline(&line, &line_len, file) < 0)
207 return -1;
208
209 if (!line)
210 return -1;
211
212 while (line_len != 0 && isspace(line[line_len - 1]))
213 line[--line_len] = '\0';
214
215 c = strchr(line, '\n');
216 if (c)
217 *c = 0;
218
219 line_ip = -1;
220
221 /*
222 * Strip leading spaces:
223 */
224 tmp = line;
225 while (*tmp) {
226 if (*tmp != ' ')
227 break;
228 tmp++;
229 }
230
231 if (*tmp) {
232 /*
233 * Parse hexa addresses followed by ':'
234 */
235 line_ip = strtoull(tmp, &tmp2, 16);
236 if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0')
237 line_ip = -1;
238 }
239
240 if (line_ip != -1) {
241 u64 start = map__rip_2objdump(map, sym->start),
242 end = map__rip_2objdump(map, sym->end);
243
244 offset = line_ip - start;
245 if (offset < 0 || (u64)line_ip > end)
246 offset = -1;
247 }
248
249 objdump_line = objdump_line__new(offset, line, privsize);
250 if (objdump_line == NULL) {
251 free(line);
252 return -1;
253 }
254 objdump__add_line(&notes->src->source, objdump_line);
255
256 return 0;
257}
258
259int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
260{
261 struct dso *dso = map->dso;
262 char *filename = dso__build_id_filename(dso, NULL, 0);
263 bool free_filename = true;
264 char command[PATH_MAX * 2];
265 FILE *file;
266 int err = 0;
267 char symfs_filename[PATH_MAX];
268
269 if (filename) {
270 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
271 symbol_conf.symfs, filename);
272 }
273
274 if (filename == NULL) {
275 if (dso->has_build_id) {
276 pr_err("Can't annotate %s: not enough memory\n",
277 sym->name);
278 return -ENOMEM;
279 }
280 goto fallback;
281 } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
282 strstr(command, "[kernel.kallsyms]") ||
283 access(symfs_filename, R_OK)) {
284 free(filename);
285fallback:
286 /*
287 * If we don't have build-ids or the build-id file isn't in the
288 * cache, or is just a kallsyms file, well, lets hope that this
289 * DSO is the same as when 'perf record' ran.
290 */
291 filename = dso->long_name;
292 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
293 symbol_conf.symfs, filename);
294 free_filename = false;
295 }
296
297 if (dso->origin == DSO__ORIG_KERNEL) {
298 char bf[BUILD_ID_SIZE * 2 + 16] = " with build id ";
299 char *build_id_msg = NULL;
300
301 if (dso->annotate_warned)
302 goto out_free_filename;
303
304 if (dso->has_build_id) {
305 build_id__sprintf(dso->build_id,
306 sizeof(dso->build_id), bf + 15);
307 build_id_msg = bf;
308 }
309 err = -ENOENT;
310 dso->annotate_warned = 1;
311 pr_err("Can't annotate %s: No vmlinux file%s was found in the "
312 "path.\nPlease use 'perf buildid-cache -av vmlinux' or "
313 "--vmlinux vmlinux.\n",
314 sym->name, build_id_msg ?: "");
315 goto out_free_filename;
316 }
317
318 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
319 filename, sym->name, map->unmap_ip(map, sym->start),
320 map->unmap_ip(map, sym->end));
321
322 pr_debug("annotating [%p] %30s : [%p] %30s\n",
323 dso, dso->long_name, sym, sym->name);
324
325 snprintf(command, sizeof(command),
326 "objdump --start-address=0x%016" PRIx64
327 " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand",
328 map__rip_2objdump(map, sym->start),
329 map__rip_2objdump(map, sym->end),
330 symfs_filename, filename);
331
332 pr_debug("Executing: %s\n", command);
333
334 file = popen(command, "r");
335 if (!file)
336 goto out_free_filename;
337
338 while (!feof(file))
339 if (symbol__parse_objdump_line(sym, map, file, privsize) < 0)
340 break;
341
342 pclose(file);
343out_free_filename:
344 if (free_filename)
345 free(filename);
346 return err;
347}
348
349static void insert_source_line(struct rb_root *root, struct source_line *src_line)
350{
351 struct source_line *iter;
352 struct rb_node **p = &root->rb_node;
353 struct rb_node *parent = NULL;
354
355 while (*p != NULL) {
356 parent = *p;
357 iter = rb_entry(parent, struct source_line, node);
358
359 if (src_line->percent > iter->percent)
360 p = &(*p)->rb_left;
361 else
362 p = &(*p)->rb_right;
363 }
364
365 rb_link_node(&src_line->node, parent, p);
366 rb_insert_color(&src_line->node, root);
367}
368
369static void symbol__free_source_line(struct symbol *sym, int len)
370{
371 struct annotation *notes = symbol__annotation(sym);
372 struct source_line *src_line = notes->src->lines;
373 int i;
374
375 for (i = 0; i < len; i++)
376 free(src_line[i].path);
377
378 free(src_line);
379 notes->src->lines = NULL;
380}
381
382/* Get the filename:line for the colored entries */
383static int symbol__get_source_line(struct symbol *sym, struct map *map,
384 int evidx, struct rb_root *root, int len,
385 const char *filename)
386{
387 u64 start;
388 int i;
389 char cmd[PATH_MAX * 2];
390 struct source_line *src_line;
391 struct annotation *notes = symbol__annotation(sym);
392 struct sym_hist *h = annotation__histogram(notes, evidx);
393
394 if (!h->sum)
395 return 0;
396
397 src_line = notes->src->lines = calloc(len, sizeof(struct source_line));
398 if (!notes->src->lines)
399 return -1;
400
401 start = map->unmap_ip(map, sym->start);
402
403 for (i = 0; i < len; i++) {
404 char *path = NULL;
405 size_t line_len;
406 u64 offset;
407 FILE *fp;
408
409 src_line[i].percent = 100.0 * h->addr[i] / h->sum;
410 if (src_line[i].percent <= 0.5)
411 continue;
412
413 offset = start + i;
414 sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset);
415 fp = popen(cmd, "r");
416 if (!fp)
417 continue;
418
419 if (getline(&path, &line_len, fp) < 0 || !line_len)
420 goto next;
421
422 src_line[i].path = malloc(sizeof(char) * line_len + 1);
423 if (!src_line[i].path)
424 goto next;
425
426 strcpy(src_line[i].path, path);
427 insert_source_line(root, &src_line[i]);
428
429 next:
430 pclose(fp);
431 }
432
433 return 0;
434}
435
436static void print_summary(struct rb_root *root, const char *filename)
437{
438 struct source_line *src_line;
439 struct rb_node *node;
440
441 printf("\nSorted summary for file %s\n", filename);
442 printf("----------------------------------------------\n\n");
443
444 if (RB_EMPTY_ROOT(root)) {
445 printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
446 return;
447 }
448
449 node = rb_first(root);
450 while (node) {
451 double percent;
452 const char *color;
453 char *path;
454
455 src_line = rb_entry(node, struct source_line, node);
456 percent = src_line->percent;
457 color = get_percent_color(percent);
458 path = src_line->path;
459
460 color_fprintf(stdout, color, " %7.2f %s", percent, path);
461 node = rb_next(node);
462 }
463}
464
465static void symbol__annotate_hits(struct symbol *sym, int evidx)
466{
467 struct annotation *notes = symbol__annotation(sym);
468 struct sym_hist *h = annotation__histogram(notes, evidx);
469 u64 len = sym->end - sym->start, offset;
470
471 for (offset = 0; offset < len; ++offset)
472 if (h->addr[offset] != 0)
473 printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
474 sym->start + offset, h->addr[offset]);
475 printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum);
476}
477
478int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx,
479 bool full_paths, int min_pcnt, int max_lines,
480 int context)
481{
482 struct dso *dso = map->dso;
483 const char *filename = dso->long_name, *d_filename;
484 struct annotation *notes = symbol__annotation(sym);
485 struct objdump_line *pos, *queue = NULL;
486 int printed = 2, queue_len = 0;
487 int more = 0;
488 u64 len;
489
490 if (full_paths)
491 d_filename = filename;
492 else
493 d_filename = basename(filename);
494
495 len = sym->end - sym->start;
496
497 printf(" Percent | Source code & Disassembly of %s\n", d_filename);
498 printf("------------------------------------------------\n");
499
500 if (verbose)
501 symbol__annotate_hits(sym, evidx);
502
503 list_for_each_entry(pos, &notes->src->source, node) {
504 if (context && queue == NULL) {
505 queue = pos;
506 queue_len = 0;
507 }
508
509 switch (objdump_line__print(pos, sym, evidx, len, min_pcnt,
510 printed, max_lines, queue)) {
511 case 0:
512 ++printed;
513 if (context) {
514 printed += queue_len;
515 queue = NULL;
516 queue_len = 0;
517 }
518 break;
519 case 1:
520 /* filtered by max_lines */
521 ++more;
522 break;
523 case -1:
524 default:
525 /*
526 * Filtered by min_pcnt or non IP lines when
527 * context != 0
528 */
529 if (!context)
530 break;
531 if (queue_len == context)
532 queue = list_entry(queue->node.next, typeof(*queue), node);
533 else
534 ++queue_len;
535 break;
536 }
537 }
538
539 return more;
540}
541
542void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
543{
544 struct annotation *notes = symbol__annotation(sym);
545 struct sym_hist *h = annotation__histogram(notes, evidx);
546
547 memset(h, 0, notes->src->sizeof_sym_hist);
548}
549
550void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
551{
552 struct annotation *notes = symbol__annotation(sym);
553 struct sym_hist *h = annotation__histogram(notes, evidx);
554 struct objdump_line *pos;
555 int len = sym->end - sym->start;
556
557 h->sum = 0;
558
559 list_for_each_entry(pos, &notes->src->source, node) {
560 if (pos->offset != -1 && pos->offset < len) {
561 h->addr[pos->offset] = h->addr[pos->offset] * 7 / 8;
562 h->sum += h->addr[pos->offset];
563 }
564 }
565}
566
567void objdump_line_list__purge(struct list_head *head)
568{
569 struct objdump_line *pos, *n;
570
571 list_for_each_entry_safe(pos, n, head, node) {
572 list_del(&pos->node);
573 objdump_line__free(pos);
574 }
575}
576
577int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx,
578 bool print_lines, bool full_paths, int min_pcnt,
579 int max_lines)
580{
581 struct dso *dso = map->dso;
582 const char *filename = dso->long_name;
583 struct rb_root source_line = RB_ROOT;
584 u64 len;
585
586 if (symbol__annotate(sym, map, 0) < 0)
587 return -1;
588
589 len = sym->end - sym->start;
590
591 if (print_lines) {
592 symbol__get_source_line(sym, map, evidx, &source_line,
593 len, filename);
594 print_summary(&source_line, filename);
595 }
596
597 symbol__annotate_printf(sym, map, evidx, full_paths,
598 min_pcnt, max_lines, 0);
599 if (print_lines)
600 symbol__free_source_line(sym, len);
601
602 objdump_line_list__purge(&symbol__annotation(sym)->src->source);
603
604 return 0;
605}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
new file mode 100644
index 000000000000..c2c286896801
--- /dev/null
+++ b/tools/perf/util/annotate.h
@@ -0,0 +1,103 @@
1#ifndef __PERF_ANNOTATE_H
2#define __PERF_ANNOTATE_H
3
4#include <stdbool.h>
5#include "types.h"
6#include "symbol.h"
7#include <linux/list.h>
8#include <linux/rbtree.h>
9
10struct objdump_line {
11 struct list_head node;
12 s64 offset;
13 char *line;
14};
15
16void objdump_line__free(struct objdump_line *self);
17struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
18 struct objdump_line *pos);
19
20struct sym_hist {
21 u64 sum;
22 u64 addr[0];
23};
24
25struct source_line {
26 struct rb_node node;
27 double percent;
28 char *path;
29};
30
31/** struct annotated_source - symbols with hits have this attached as in sannotation
32 *
33 * @histogram: Array of addr hit histograms per event being monitored
34 * @lines: If 'print_lines' is specified, per source code line percentages
35 * @source: source parsed from objdump -dS
36 *
37 * lines is allocated, percentages calculated and all sorted by percentage
38 * when the annotation is about to be presented, so the percentages are for
39 * one of the entries in the histogram array, i.e. for the event/counter being
40 * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate
41 * returns.
42 */
43struct annotated_source {
44 struct list_head source;
45 struct source_line *lines;
46 int nr_histograms;
47 int sizeof_sym_hist;
48 struct sym_hist histograms[0];
49};
50
51struct annotation {
52 pthread_mutex_t lock;
53 struct annotated_source *src;
54};
55
56struct sannotation {
57 struct annotation annotation;
58 struct symbol symbol;
59};
60
61static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
62{
63 return (((void *)&notes->src->histograms) +
64 (notes->src->sizeof_sym_hist * idx));
65}
66
67static inline struct annotation *symbol__annotation(struct symbol *sym)
68{
69 struct sannotation *a = container_of(sym, struct sannotation, symbol);
70 return &a->annotation;
71}
72
73int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
74 int evidx, u64 addr);
75int symbol__alloc_hist(struct symbol *sym, int nevents);
76void symbol__annotate_zero_histograms(struct symbol *sym);
77
78int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
79int symbol__annotate_init(struct map *map __used, struct symbol *sym);
80int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx,
81 bool full_paths, int min_pcnt, int max_lines,
82 int context);
83void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
84void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
85void objdump_line_list__purge(struct list_head *head);
86
87int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx,
88 bool print_lines, bool full_paths, int min_pcnt,
89 int max_lines);
90
91#ifdef NO_NEWT_SUPPORT
92static inline int symbol__tui_annotate(struct symbol *sym __used,
93 struct map *map __used,
94 int evidx __used, int refresh __used)
95{
96 return 0;
97}
98#else
99int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
100 int refresh);
101#endif
102
103#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index deffb8c96071..31f934af9861 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -14,8 +14,8 @@
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include "debug.h" 15#include "debug.h"
16 16
17static int build_id__mark_dso_hit(event_t *event, 17static int build_id__mark_dso_hit(union perf_event *event,
18 struct sample_data *sample __used, 18 struct perf_sample *sample __used,
19 struct perf_session *session) 19 struct perf_session *session)
20{ 20{
21 struct addr_location al; 21 struct addr_location al;
@@ -37,13 +37,14 @@ static int build_id__mark_dso_hit(event_t *event,
37 return 0; 37 return 0;
38} 38}
39 39
40static int event__exit_del_thread(event_t *self, struct sample_data *sample __used, 40static int perf_event__exit_del_thread(union perf_event *event,
41 struct perf_session *session) 41 struct perf_sample *sample __used,
42 struct perf_session *session)
42{ 43{
43 struct thread *thread = perf_session__findnew(session, self->fork.tid); 44 struct thread *thread = perf_session__findnew(session, event->fork.tid);
44 45
45 dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid, 46 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
46 self->fork.ppid, self->fork.ptid); 47 event->fork.ppid, event->fork.ptid);
47 48
48 if (thread) { 49 if (thread) {
49 rb_erase(&thread->rb_node, &session->threads); 50 rb_erase(&thread->rb_node, &session->threads);
@@ -56,9 +57,9 @@ static int event__exit_del_thread(event_t *self, struct sample_data *sample __us
56 57
57struct perf_event_ops build_id__mark_dso_hit_ops = { 58struct perf_event_ops build_id__mark_dso_hit_ops = {
58 .sample = build_id__mark_dso_hit, 59 .sample = build_id__mark_dso_hit,
59 .mmap = event__process_mmap, 60 .mmap = perf_event__process_mmap,
60 .fork = event__process_task, 61 .fork = perf_event__process_task,
61 .exit = event__exit_del_thread, 62 .exit = perf_event__exit_del_thread,
62}; 63};
63 64
64char *dso__build_id_filename(struct dso *self, char *bf, size_t size) 65char *dso__build_id_filename(struct dso *self, char *bf, size_t size)
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index a7729797fd96..fc5e5a09d5b9 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -34,13 +34,14 @@ extern int pager_use_color;
34extern int use_browser; 34extern int use_browser;
35 35
36#ifdef NO_NEWT_SUPPORT 36#ifdef NO_NEWT_SUPPORT
37static inline void setup_browser(void) 37static inline void setup_browser(bool fallback_to_pager)
38{ 38{
39 setup_pager(); 39 if (fallback_to_pager)
40 setup_pager();
40} 41}
41static inline void exit_browser(bool wait_for_ok __used) {} 42static inline void exit_browser(bool wait_for_ok __used) {}
42#else 43#else
43void setup_browser(void); 44void setup_browser(bool fallback_to_pager);
44void exit_browser(bool wait_for_ok); 45void exit_browser(bool wait_for_ok);
45#endif 46#endif
46 47
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index e12d539417b2..9f7106a8d9a4 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010, Frederic Weisbecker <fweisbec@gmail.com> 2 * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
3 * 3 *
4 * Handle the callchains from the stream in an ad-hoc radix tree and then 4 * Handle the callchains from the stream in an ad-hoc radix tree and then
5 * sort them in an rbtree. 5 * sort them in an rbtree.
@@ -18,7 +18,8 @@
18#include "util.h" 18#include "util.h"
19#include "callchain.h" 19#include "callchain.h"
20 20
21bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event) 21bool ip_callchain__valid(struct ip_callchain *chain,
22 const union perf_event *event)
22{ 23{
23 unsigned int chain_size = event->header.size; 24 unsigned int chain_size = event->header.size;
24 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; 25 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
@@ -26,10 +27,10 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
26} 27}
27 28
28#define chain_for_each_child(child, parent) \ 29#define chain_for_each_child(child, parent) \
29 list_for_each_entry(child, &parent->children, brothers) 30 list_for_each_entry(child, &parent->children, siblings)
30 31
31#define chain_for_each_child_safe(child, next, parent) \ 32#define chain_for_each_child_safe(child, next, parent) \
32 list_for_each_entry_safe(child, next, &parent->children, brothers) 33 list_for_each_entry_safe(child, next, &parent->children, siblings)
33 34
34static void 35static void
35rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, 36rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
@@ -38,14 +39,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
38 struct rb_node **p = &root->rb_node; 39 struct rb_node **p = &root->rb_node;
39 struct rb_node *parent = NULL; 40 struct rb_node *parent = NULL;
40 struct callchain_node *rnode; 41 struct callchain_node *rnode;
41 u64 chain_cumul = cumul_hits(chain); 42 u64 chain_cumul = callchain_cumul_hits(chain);
42 43
43 while (*p) { 44 while (*p) {
44 u64 rnode_cumul; 45 u64 rnode_cumul;
45 46
46 parent = *p; 47 parent = *p;
47 rnode = rb_entry(parent, struct callchain_node, rb_node); 48 rnode = rb_entry(parent, struct callchain_node, rb_node);
48 rnode_cumul = cumul_hits(rnode); 49 rnode_cumul = callchain_cumul_hits(rnode);
49 50
50 switch (mode) { 51 switch (mode) {
51 case CHAIN_FLAT: 52 case CHAIN_FLAT:
@@ -104,7 +105,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
104 105
105 chain_for_each_child(child, node) { 106 chain_for_each_child(child, node) {
106 __sort_chain_graph_abs(child, min_hit); 107 __sort_chain_graph_abs(child, min_hit);
107 if (cumul_hits(child) >= min_hit) 108 if (callchain_cumul_hits(child) >= min_hit)
108 rb_insert_callchain(&node->rb_root, child, 109 rb_insert_callchain(&node->rb_root, child,
109 CHAIN_GRAPH_ABS); 110 CHAIN_GRAPH_ABS);
110 } 111 }
@@ -129,7 +130,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
129 130
130 chain_for_each_child(child, node) { 131 chain_for_each_child(child, node) {
131 __sort_chain_graph_rel(child, min_percent); 132 __sort_chain_graph_rel(child, min_percent);
132 if (cumul_hits(child) >= min_hit) 133 if (callchain_cumul_hits(child) >= min_hit)
133 rb_insert_callchain(&node->rb_root, child, 134 rb_insert_callchain(&node->rb_root, child,
134 CHAIN_GRAPH_REL); 135 CHAIN_GRAPH_REL);
135 } 136 }
@@ -143,7 +144,7 @@ sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
143 rb_root->rb_node = chain_root->node.rb_root.rb_node; 144 rb_root->rb_node = chain_root->node.rb_root.rb_node;
144} 145}
145 146
146int register_callchain_param(struct callchain_param *param) 147int callchain_register_param(struct callchain_param *param)
147{ 148{
148 switch (param->mode) { 149 switch (param->mode) {
149 case CHAIN_GRAPH_ABS: 150 case CHAIN_GRAPH_ABS:
@@ -189,32 +190,27 @@ create_child(struct callchain_node *parent, bool inherit_children)
189 chain_for_each_child(next, new) 190 chain_for_each_child(next, new)
190 next->parent = new; 191 next->parent = new;
191 } 192 }
192 list_add_tail(&new->brothers, &parent->children); 193 list_add_tail(&new->siblings, &parent->children);
193 194
194 return new; 195 return new;
195} 196}
196 197
197 198
198struct resolved_ip {
199 u64 ip;
200 struct map_symbol ms;
201};
202
203struct resolved_chain {
204 u64 nr;
205 struct resolved_ip ips[0];
206};
207
208
209/* 199/*
210 * Fill the node with callchain values 200 * Fill the node with callchain values
211 */ 201 */
212static void 202static void
213fill_node(struct callchain_node *node, struct resolved_chain *chain, int start) 203fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
214{ 204{
215 unsigned int i; 205 struct callchain_cursor_node *cursor_node;
206
207 node->val_nr = cursor->nr - cursor->pos;
208 if (!node->val_nr)
209 pr_warning("Warning: empty node in callchain tree\n");
216 210
217 for (i = start; i < chain->nr; i++) { 211 cursor_node = callchain_cursor_current(cursor);
212
213 while (cursor_node) {
218 struct callchain_list *call; 214 struct callchain_list *call;
219 215
220 call = zalloc(sizeof(*call)); 216 call = zalloc(sizeof(*call));
@@ -222,23 +218,25 @@ fill_node(struct callchain_node *node, struct resolved_chain *chain, int start)
222 perror("not enough memory for the code path tree"); 218 perror("not enough memory for the code path tree");
223 return; 219 return;
224 } 220 }
225 call->ip = chain->ips[i].ip; 221 call->ip = cursor_node->ip;
226 call->ms = chain->ips[i].ms; 222 call->ms.sym = cursor_node->sym;
223 call->ms.map = cursor_node->map;
227 list_add_tail(&call->list, &node->val); 224 list_add_tail(&call->list, &node->val);
225
226 callchain_cursor_advance(cursor);
227 cursor_node = callchain_cursor_current(cursor);
228 } 228 }
229 node->val_nr = chain->nr - start;
230 if (!node->val_nr)
231 pr_warning("Warning: empty node in callchain tree\n");
232} 229}
233 230
234static void 231static void
235add_child(struct callchain_node *parent, struct resolved_chain *chain, 232add_child(struct callchain_node *parent,
236 int start, u64 period) 233 struct callchain_cursor *cursor,
234 u64 period)
237{ 235{
238 struct callchain_node *new; 236 struct callchain_node *new;
239 237
240 new = create_child(parent, false); 238 new = create_child(parent, false);
241 fill_node(new, chain, start); 239 fill_node(new, cursor);
242 240
243 new->children_hit = 0; 241 new->children_hit = 0;
244 new->hit = period; 242 new->hit = period;
@@ -250,9 +248,10 @@ add_child(struct callchain_node *parent, struct resolved_chain *chain,
250 * Then create another child to host the given callchain of new branch 248 * Then create another child to host the given callchain of new branch
251 */ 249 */
252static void 250static void
253split_add_child(struct callchain_node *parent, struct resolved_chain *chain, 251split_add_child(struct callchain_node *parent,
254 struct callchain_list *to_split, int idx_parents, int idx_local, 252 struct callchain_cursor *cursor,
255 u64 period) 253 struct callchain_list *to_split,
254 u64 idx_parents, u64 idx_local, u64 period)
256{ 255{
257 struct callchain_node *new; 256 struct callchain_node *new;
258 struct list_head *old_tail; 257 struct list_head *old_tail;
@@ -272,14 +271,14 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
272 /* split the hits */ 271 /* split the hits */
273 new->hit = parent->hit; 272 new->hit = parent->hit;
274 new->children_hit = parent->children_hit; 273 new->children_hit = parent->children_hit;
275 parent->children_hit = cumul_hits(new); 274 parent->children_hit = callchain_cumul_hits(new);
276 new->val_nr = parent->val_nr - idx_local; 275 new->val_nr = parent->val_nr - idx_local;
277 parent->val_nr = idx_local; 276 parent->val_nr = idx_local;
278 277
279 /* create a new child for the new branch if any */ 278 /* create a new child for the new branch if any */
280 if (idx_total < chain->nr) { 279 if (idx_total < cursor->nr) {
281 parent->hit = 0; 280 parent->hit = 0;
282 add_child(parent, chain, idx_total, period); 281 add_child(parent, cursor, period);
283 parent->children_hit += period; 282 parent->children_hit += period;
284 } else { 283 } else {
285 parent->hit = period; 284 parent->hit = period;
@@ -287,36 +286,41 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
287} 286}
288 287
289static int 288static int
290append_chain(struct callchain_node *root, struct resolved_chain *chain, 289append_chain(struct callchain_node *root,
291 unsigned int start, u64 period); 290 struct callchain_cursor *cursor,
291 u64 period);
292 292
293static void 293static void
294append_chain_children(struct callchain_node *root, struct resolved_chain *chain, 294append_chain_children(struct callchain_node *root,
295 unsigned int start, u64 period) 295 struct callchain_cursor *cursor,
296 u64 period)
296{ 297{
297 struct callchain_node *rnode; 298 struct callchain_node *rnode;
298 299
299 /* lookup in childrens */ 300 /* lookup in childrens */
300 chain_for_each_child(rnode, root) { 301 chain_for_each_child(rnode, root) {
301 unsigned int ret = append_chain(rnode, chain, start, period); 302 unsigned int ret = append_chain(rnode, cursor, period);
302 303
303 if (!ret) 304 if (!ret)
304 goto inc_children_hit; 305 goto inc_children_hit;
305 } 306 }
306 /* nothing in children, add to the current node */ 307 /* nothing in children, add to the current node */
307 add_child(root, chain, start, period); 308 add_child(root, cursor, period);
308 309
309inc_children_hit: 310inc_children_hit:
310 root->children_hit += period; 311 root->children_hit += period;
311} 312}
312 313
313static int 314static int
314append_chain(struct callchain_node *root, struct resolved_chain *chain, 315append_chain(struct callchain_node *root,
315 unsigned int start, u64 period) 316 struct callchain_cursor *cursor,
317 u64 period)
316{ 318{
319 struct callchain_cursor_node *curr_snap = cursor->curr;
317 struct callchain_list *cnode; 320 struct callchain_list *cnode;
318 unsigned int i = start; 321 u64 start = cursor->pos;
319 bool found = false; 322 bool found = false;
323 u64 matches;
320 324
321 /* 325 /*
322 * Lookup in the current node 326 * Lookup in the current node
@@ -324,141 +328,134 @@ append_chain(struct callchain_node *root, struct resolved_chain *chain,
324 * anywhere inside a function. 328 * anywhere inside a function.
325 */ 329 */
326 list_for_each_entry(cnode, &root->val, list) { 330 list_for_each_entry(cnode, &root->val, list) {
331 struct callchain_cursor_node *node;
327 struct symbol *sym; 332 struct symbol *sym;
328 333
329 if (i == chain->nr) 334 node = callchain_cursor_current(cursor);
335 if (!node)
330 break; 336 break;
331 337
332 sym = chain->ips[i].ms.sym; 338 sym = node->sym;
333 339
334 if (cnode->ms.sym && sym) { 340 if (cnode->ms.sym && sym) {
335 if (cnode->ms.sym->start != sym->start) 341 if (cnode->ms.sym->start != sym->start)
336 break; 342 break;
337 } else if (cnode->ip != chain->ips[i].ip) 343 } else if (cnode->ip != node->ip)
338 break; 344 break;
339 345
340 if (!found) 346 if (!found)
341 found = true; 347 found = true;
342 i++; 348
349 callchain_cursor_advance(cursor);
343 } 350 }
344 351
345 /* matches not, relay on the parent */ 352 /* matches not, relay on the parent */
346 if (!found) 353 if (!found) {
354 cursor->curr = curr_snap;
355 cursor->pos = start;
347 return -1; 356 return -1;
357 }
358
359 matches = cursor->pos - start;
348 360
349 /* we match only a part of the node. Split it and add the new chain */ 361 /* we match only a part of the node. Split it and add the new chain */
350 if (i - start < root->val_nr) { 362 if (matches < root->val_nr) {
351 split_add_child(root, chain, cnode, start, i - start, period); 363 split_add_child(root, cursor, cnode, start, matches, period);
352 return 0; 364 return 0;
353 } 365 }
354 366
355 /* we match 100% of the path, increment the hit */ 367 /* we match 100% of the path, increment the hit */
356 if (i - start == root->val_nr && i == chain->nr) { 368 if (matches == root->val_nr && cursor->pos == cursor->nr) {
357 root->hit += period; 369 root->hit += period;
358 return 0; 370 return 0;
359 } 371 }
360 372
361 /* We match the node and still have a part remaining */ 373 /* We match the node and still have a part remaining */
362 append_chain_children(root, chain, i, period); 374 append_chain_children(root, cursor, period);
363 375
364 return 0; 376 return 0;
365} 377}
366 378
367static void filter_context(struct ip_callchain *old, struct resolved_chain *new, 379int callchain_append(struct callchain_root *root,
368 struct map_symbol *syms) 380 struct callchain_cursor *cursor,
369{ 381 u64 period)
370 int i, j = 0;
371
372 for (i = 0; i < (int)old->nr; i++) {
373 if (old->ips[i] >= PERF_CONTEXT_MAX)
374 continue;
375
376 new->ips[j].ip = old->ips[i];
377 new->ips[j].ms = syms[i];
378 j++;
379 }
380
381 new->nr = j;
382}
383
384
385int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
386 struct map_symbol *syms, u64 period)
387{ 382{
388 struct resolved_chain *filtered; 383 if (!cursor->nr)
389
390 if (!chain->nr)
391 return 0; 384 return 0;
392 385
393 filtered = zalloc(sizeof(*filtered) + 386 callchain_cursor_commit(cursor);
394 chain->nr * sizeof(struct resolved_ip));
395 if (!filtered)
396 return -ENOMEM;
397
398 filter_context(chain, filtered, syms);
399
400 if (!filtered->nr)
401 goto end;
402 387
403 append_chain_children(&root->node, filtered, 0, period); 388 append_chain_children(&root->node, cursor, period);
404 389
405 if (filtered->nr > root->max_depth) 390 if (cursor->nr > root->max_depth)
406 root->max_depth = filtered->nr; 391 root->max_depth = cursor->nr;
407end:
408 free(filtered);
409 392
410 return 0; 393 return 0;
411} 394}
412 395
413static int 396static int
414merge_chain_branch(struct callchain_node *dst, struct callchain_node *src, 397merge_chain_branch(struct callchain_cursor *cursor,
415 struct resolved_chain *chain) 398 struct callchain_node *dst, struct callchain_node *src)
416{ 399{
400 struct callchain_cursor_node **old_last = cursor->last;
417 struct callchain_node *child, *next_child; 401 struct callchain_node *child, *next_child;
418 struct callchain_list *list, *next_list; 402 struct callchain_list *list, *next_list;
419 int old_pos = chain->nr; 403 int old_pos = cursor->nr;
420 int err = 0; 404 int err = 0;
421 405
422 list_for_each_entry_safe(list, next_list, &src->val, list) { 406 list_for_each_entry_safe(list, next_list, &src->val, list) {
423 chain->ips[chain->nr].ip = list->ip; 407 callchain_cursor_append(cursor, list->ip,
424 chain->ips[chain->nr].ms = list->ms; 408 list->ms.map, list->ms.sym);
425 chain->nr++;
426 list_del(&list->list); 409 list_del(&list->list);
427 free(list); 410 free(list);
428 } 411 }
429 412
430 if (src->hit) 413 if (src->hit) {
431 append_chain_children(dst, chain, 0, src->hit); 414 callchain_cursor_commit(cursor);
415 append_chain_children(dst, cursor, src->hit);
416 }
432 417
433 chain_for_each_child_safe(child, next_child, src) { 418 chain_for_each_child_safe(child, next_child, src) {
434 err = merge_chain_branch(dst, child, chain); 419 err = merge_chain_branch(cursor, dst, child);
435 if (err) 420 if (err)
436 break; 421 break;
437 422
438 list_del(&child->brothers); 423 list_del(&child->siblings);
439 free(child); 424 free(child);
440 } 425 }
441 426
442 chain->nr = old_pos; 427 cursor->nr = old_pos;
428 cursor->last = old_last;
443 429
444 return err; 430 return err;
445} 431}
446 432
447int callchain_merge(struct callchain_root *dst, struct callchain_root *src) 433int callchain_merge(struct callchain_cursor *cursor,
434 struct callchain_root *dst, struct callchain_root *src)
435{
436 return merge_chain_branch(cursor, &dst->node, &src->node);
437}
438
439int callchain_cursor_append(struct callchain_cursor *cursor,
440 u64 ip, struct map *map, struct symbol *sym)
448{ 441{
449 struct resolved_chain *chain; 442 struct callchain_cursor_node *node = *cursor->last;
450 int err;
451 443
452 chain = malloc(sizeof(*chain) + 444 if (!node) {
453 src->max_depth * sizeof(struct resolved_ip)); 445 node = calloc(sizeof(*node), 1);
454 if (!chain) 446 if (!node)
455 return -ENOMEM; 447 return -ENOMEM;
456 448
457 chain->nr = 0; 449 *cursor->last = node;
450 }
458 451
459 err = merge_chain_branch(&dst->node, &src->node, chain); 452 node->ip = ip;
453 node->map = map;
454 node->sym = sym;
460 455
461 free(chain); 456 cursor->nr++;
462 457
463 return err; 458 cursor->last = &node->next;
459
460 return 0;
464} 461}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index c15fb8c24ad2..1a79df9f739f 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -16,7 +16,7 @@ enum chain_mode {
16 16
17struct callchain_node { 17struct callchain_node {
18 struct callchain_node *parent; 18 struct callchain_node *parent;
19 struct list_head brothers; 19 struct list_head siblings;
20 struct list_head children; 20 struct list_head children;
21 struct list_head val; 21 struct list_head val;
22 struct rb_node rb_node; /* to sort nodes in an rbtree */ 22 struct rb_node rb_node; /* to sort nodes in an rbtree */
@@ -49,9 +49,30 @@ struct callchain_list {
49 struct list_head list; 49 struct list_head list;
50}; 50};
51 51
52/*
53 * A callchain cursor is a single linked list that
54 * let one feed a callchain progressively.
55 * It keeps persitent allocated entries to minimize
56 * allocations.
57 */
58struct callchain_cursor_node {
59 u64 ip;
60 struct map *map;
61 struct symbol *sym;
62 struct callchain_cursor_node *next;
63};
64
65struct callchain_cursor {
66 u64 nr;
67 struct callchain_cursor_node *first;
68 struct callchain_cursor_node **last;
69 u64 pos;
70 struct callchain_cursor_node *curr;
71};
72
52static inline void callchain_init(struct callchain_root *root) 73static inline void callchain_init(struct callchain_root *root)
53{ 74{
54 INIT_LIST_HEAD(&root->node.brothers); 75 INIT_LIST_HEAD(&root->node.siblings);
55 INIT_LIST_HEAD(&root->node.children); 76 INIT_LIST_HEAD(&root->node.children);
56 INIT_LIST_HEAD(&root->node.val); 77 INIT_LIST_HEAD(&root->node.val);
57 78
@@ -61,15 +82,54 @@ static inline void callchain_init(struct callchain_root *root)
61 root->max_depth = 0; 82 root->max_depth = 0;
62} 83}
63 84
64static inline u64 cumul_hits(struct callchain_node *node) 85static inline u64 callchain_cumul_hits(struct callchain_node *node)
65{ 86{
66 return node->hit + node->children_hit; 87 return node->hit + node->children_hit;
67} 88}
68 89
69int register_callchain_param(struct callchain_param *param); 90int callchain_register_param(struct callchain_param *param);
70int callchain_append(struct callchain_root *root, struct ip_callchain *chain, 91int callchain_append(struct callchain_root *root,
71 struct map_symbol *syms, u64 period); 92 struct callchain_cursor *cursor,
72int callchain_merge(struct callchain_root *dst, struct callchain_root *src); 93 u64 period);
94
95int callchain_merge(struct callchain_cursor *cursor,
96 struct callchain_root *dst, struct callchain_root *src);
97
98bool ip_callchain__valid(struct ip_callchain *chain,
99 const union perf_event *event);
100/*
101 * Initialize a cursor before adding entries inside, but keep
102 * the previously allocated entries as a cache.
103 */
104static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
105{
106 cursor->nr = 0;
107 cursor->last = &cursor->first;
108}
109
110int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
111 struct map *map, struct symbol *sym);
73 112
74bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); 113/* Close a cursor writing session. Initialize for the reader */
114static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
115{
116 cursor->curr = cursor->first;
117 cursor->pos = 0;
118}
119
120/* Cursor reading iteration helpers */
121static inline struct callchain_cursor_node *
122callchain_cursor_current(struct callchain_cursor *cursor)
123{
124 if (cursor->pos == cursor->nr)
125 return NULL;
126
127 return cursor->curr;
128}
129
130static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
131{
132 cursor->curr = cursor->curr->next;
133 cursor->pos++;
134}
75#endif /* __PERF_CALLCHAIN_H */ 135#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
new file mode 100644
index 000000000000..9fea75535221
--- /dev/null
+++ b/tools/perf/util/cgroup.c
@@ -0,0 +1,178 @@
1#include "util.h"
2#include "../perf.h"
3#include "parse-options.h"
4#include "evsel.h"
5#include "cgroup.h"
6#include "debugfs.h" /* MAX_PATH, STR() */
7#include "evlist.h"
8
9int nr_cgroups;
10
11static int
12cgroupfs_find_mountpoint(char *buf, size_t maxlen)
13{
14 FILE *fp;
15 char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1];
16 char *token, *saved_ptr;
17 int found = 0;
18
19 fp = fopen("/proc/mounts", "r");
20 if (!fp)
21 return -1;
22
23 /*
24 * in order to handle split hierarchy, we need to scan /proc/mounts
25 * and inspect every cgroupfs mount point to find one that has
26 * perf_event subsystem
27 */
28 while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %"
29 STR(MAX_PATH)"s %*d %*d\n",
30 mountpoint, type, tokens) == 3) {
31
32 if (!strcmp(type, "cgroup")) {
33
34 token = strtok_r(tokens, ",", &saved_ptr);
35
36 while (token != NULL) {
37 if (!strcmp(token, "perf_event")) {
38 found = 1;
39 break;
40 }
41 token = strtok_r(NULL, ",", &saved_ptr);
42 }
43 }
44 if (found)
45 break;
46 }
47 fclose(fp);
48 if (!found)
49 return -1;
50
51 if (strlen(mountpoint) < maxlen) {
52 strcpy(buf, mountpoint);
53 return 0;
54 }
55 return -1;
56}
57
58static int open_cgroup(char *name)
59{
60 char path[MAX_PATH+1];
61 char mnt[MAX_PATH+1];
62 int fd;
63
64
65 if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1))
66 return -1;
67
68 snprintf(path, MAX_PATH, "%s/%s", mnt, name);
69
70 fd = open(path, O_RDONLY);
71 if (fd == -1)
72 fprintf(stderr, "no access to cgroup %s\n", path);
73
74 return fd;
75}
76
77static int add_cgroup(struct perf_evlist *evlist, char *str)
78{
79 struct perf_evsel *counter;
80 struct cgroup_sel *cgrp = NULL;
81 int n;
82 /*
83 * check if cgrp is already defined, if so we reuse it
84 */
85 list_for_each_entry(counter, &evlist->entries, node) {
86 cgrp = counter->cgrp;
87 if (!cgrp)
88 continue;
89 if (!strcmp(cgrp->name, str))
90 break;
91
92 cgrp = NULL;
93 }
94
95 if (!cgrp) {
96 cgrp = zalloc(sizeof(*cgrp));
97 if (!cgrp)
98 return -1;
99
100 cgrp->name = str;
101
102 cgrp->fd = open_cgroup(str);
103 if (cgrp->fd == -1) {
104 free(cgrp);
105 return -1;
106 }
107 }
108
109 /*
110 * find corresponding event
111 * if add cgroup N, then need to find event N
112 */
113 n = 0;
114 list_for_each_entry(counter, &evlist->entries, node) {
115 if (n == nr_cgroups)
116 goto found;
117 n++;
118 }
119 if (cgrp->refcnt == 0)
120 free(cgrp);
121
122 return -1;
123found:
124 cgrp->refcnt++;
125 counter->cgrp = cgrp;
126 return 0;
127}
128
129void close_cgroup(struct cgroup_sel *cgrp)
130{
131 if (!cgrp)
132 return;
133
134 /* XXX: not reentrant */
135 if (--cgrp->refcnt == 0) {
136 close(cgrp->fd);
137 free(cgrp->name);
138 free(cgrp);
139 }
140}
141
142int parse_cgroups(const struct option *opt __used, const char *str,
143 int unset __used)
144{
145 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
146 const char *p, *e, *eos = str + strlen(str);
147 char *s;
148 int ret;
149
150 if (list_empty(&evlist->entries)) {
151 fprintf(stderr, "must define events before cgroups\n");
152 return -1;
153 }
154
155 for (;;) {
156 p = strchr(str, ',');
157 e = p ? p : eos;
158
159 /* allow empty cgroups, i.e., skip */
160 if (e - str) {
161 /* termination added */
162 s = strndup(str, e - str);
163 if (!s)
164 return -1;
165 ret = add_cgroup(evlist, s);
166 if (ret) {
167 free(s);
168 return -1;
169 }
170 }
171 /* nr_cgroups is increased een for empty cgroups */
172 nr_cgroups++;
173 if (!p)
174 break;
175 str = p+1;
176 }
177 return 0;
178}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
new file mode 100644
index 000000000000..89acd6debdc5
--- /dev/null
+++ b/tools/perf/util/cgroup.h
@@ -0,0 +1,17 @@
1#ifndef __CGROUP_H__
2#define __CGROUP_H__
3
4struct option;
5
6struct cgroup_sel {
7 char *name;
8 int fd;
9 int refcnt;
10};
11
12
13extern int nr_cgroups; /* number of explicit cgroups defined */
14extern void close_cgroup(struct cgroup_sel *cgrp);
15extern int parse_cgroups(const struct option *opt, const char *str, int unset);
16
17#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 3ccaa1043383..6893eec693ab 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -177,3 +177,8 @@ struct cpu_map *cpu_map__dummy_new(void)
177 177
178 return cpus; 178 return cpus;
179} 179}
180
181void cpu_map__delete(struct cpu_map *map)
182{
183 free(map);
184}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index f7a4f42f6307..072c0a374794 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -8,6 +8,6 @@ struct cpu_map {
8 8
9struct cpu_map *cpu_map__new(const char *cpu_list); 9struct cpu_map *cpu_map__new(const char *cpu_list);
10struct cpu_map *cpu_map__dummy_new(void); 10struct cpu_map *cpu_map__dummy_new(void);
11void *cpu_map__delete(struct cpu_map *map); 11void cpu_map__delete(struct cpu_map *map);
12 12
13#endif /* __PERF_CPUMAP_H */ 13#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 01bbe8ecec3f..d4536a9e0d8c 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -57,7 +57,7 @@ void ui__warning(const char *format, ...)
57} 57}
58#endif 58#endif
59 59
60void trace_event(event_t *event) 60void trace_event(union perf_event *event)
61{ 61{
62 unsigned char *raw_event = (void *)event; 62 unsigned char *raw_event = (void *)event;
63 const char *color = PERF_COLOR_BLUE; 63 const char *color = PERF_COLOR_BLUE;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index ca35fd66b5df..93516cf4682c 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -9,7 +9,7 @@ extern int verbose;
9extern bool quiet, dump_trace; 9extern bool quiet, dump_trace;
10 10
11int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); 11int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
12void trace_event(event_t *event); 12void trace_event(union perf_event *event);
13 13
14struct ui_progress; 14struct ui_progress;
15 15
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 50d0a931497a..2b15c362ef56 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -6,8 +6,9 @@
6#include "string.h" 6#include "string.h"
7#include "strlist.h" 7#include "strlist.h"
8#include "thread.h" 8#include "thread.h"
9#include "thread_map.h"
9 10
10static const char *event__name[] = { 11static const char *perf_event__names[] = {
11 [0] = "TOTAL", 12 [0] = "TOTAL",
12 [PERF_RECORD_MMAP] = "MMAP", 13 [PERF_RECORD_MMAP] = "MMAP",
13 [PERF_RECORD_LOST] = "LOST", 14 [PERF_RECORD_LOST] = "LOST",
@@ -25,16 +26,16 @@ static const char *event__name[] = {
25 [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", 26 [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
26}; 27};
27 28
28const char *event__get_event_name(unsigned int id) 29const char *perf_event__name(unsigned int id)
29{ 30{
30 if (id >= ARRAY_SIZE(event__name)) 31 if (id >= ARRAY_SIZE(perf_event__names))
31 return "INVALID"; 32 return "INVALID";
32 if (!event__name[id]) 33 if (!perf_event__names[id])
33 return "UNKNOWN"; 34 return "UNKNOWN";
34 return event__name[id]; 35 return perf_event__names[id];
35} 36}
36 37
37static struct sample_data synth_sample = { 38static struct perf_sample synth_sample = {
38 .pid = -1, 39 .pid = -1,
39 .tid = -1, 40 .tid = -1,
40 .time = -1, 41 .time = -1,
@@ -43,9 +44,9 @@ static struct sample_data synth_sample = {
43 .period = 1, 44 .period = 1,
44}; 45};
45 46
46static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full, 47static pid_t perf_event__synthesize_comm(union perf_event *event, pid_t pid,
47 event__handler_t process, 48 int full, perf_event__handler_t process,
48 struct perf_session *session) 49 struct perf_session *session)
49{ 50{
50 char filename[PATH_MAX]; 51 char filename[PATH_MAX];
51 char bf[BUFSIZ]; 52 char bf[BUFSIZ];
@@ -126,9 +127,10 @@ out:
126 return tgid; 127 return tgid;
127} 128}
128 129
129static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid, 130static int perf_event__synthesize_mmap_events(union perf_event *event,
130 event__handler_t process, 131 pid_t pid, pid_t tgid,
131 struct perf_session *session) 132 perf_event__handler_t process,
133 struct perf_session *session)
132{ 134{
133 char filename[PATH_MAX]; 135 char filename[PATH_MAX];
134 FILE *fp; 136 FILE *fp;
@@ -199,14 +201,14 @@ static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
199 return 0; 201 return 0;
200} 202}
201 203
202int event__synthesize_modules(event__handler_t process, 204int perf_event__synthesize_modules(perf_event__handler_t process,
203 struct perf_session *session, 205 struct perf_session *session,
204 struct machine *machine) 206 struct machine *machine)
205{ 207{
206 struct rb_node *nd; 208 struct rb_node *nd;
207 struct map_groups *kmaps = &machine->kmaps; 209 struct map_groups *kmaps = &machine->kmaps;
208 event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size); 210 union perf_event *event = zalloc((sizeof(event->mmap) +
209 211 session->id_hdr_size));
210 if (event == NULL) { 212 if (event == NULL) {
211 pr_debug("Not enough memory synthesizing mmap event " 213 pr_debug("Not enough memory synthesizing mmap event "
212 "for kernel modules\n"); 214 "for kernel modules\n");
@@ -251,23 +253,24 @@ int event__synthesize_modules(event__handler_t process,
251 return 0; 253 return 0;
252} 254}
253 255
254static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event, 256static int __event__synthesize_thread(union perf_event *comm_event,
255 pid_t pid, event__handler_t process, 257 union perf_event *mmap_event,
258 pid_t pid, perf_event__handler_t process,
256 struct perf_session *session) 259 struct perf_session *session)
257{ 260{
258 pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process, 261 pid_t tgid = perf_event__synthesize_comm(comm_event, pid, 1, process,
259 session); 262 session);
260 if (tgid == -1) 263 if (tgid == -1)
261 return -1; 264 return -1;
262 return event__synthesize_mmap_events(mmap_event, pid, tgid, 265 return perf_event__synthesize_mmap_events(mmap_event, pid, tgid,
263 process, session); 266 process, session);
264} 267}
265 268
266int event__synthesize_thread_map(struct thread_map *threads, 269int perf_event__synthesize_thread_map(struct thread_map *threads,
267 event__handler_t process, 270 perf_event__handler_t process,
268 struct perf_session *session) 271 struct perf_session *session)
269{ 272{
270 event_t *comm_event, *mmap_event; 273 union perf_event *comm_event, *mmap_event;
271 int err = -1, thread; 274 int err = -1, thread;
272 275
273 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); 276 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
@@ -294,12 +297,12 @@ out:
294 return err; 297 return err;
295} 298}
296 299
297int event__synthesize_threads(event__handler_t process, 300int perf_event__synthesize_threads(perf_event__handler_t process,
298 struct perf_session *session) 301 struct perf_session *session)
299{ 302{
300 DIR *proc; 303 DIR *proc;
301 struct dirent dirent, *next; 304 struct dirent dirent, *next;
302 event_t *comm_event, *mmap_event; 305 union perf_event *comm_event, *mmap_event;
303 int err = -1; 306 int err = -1;
304 307
305 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); 308 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
@@ -357,10 +360,10 @@ static int find_symbol_cb(void *arg, const char *name, char type,
357 return 1; 360 return 1;
358} 361}
359 362
360int event__synthesize_kernel_mmap(event__handler_t process, 363int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
361 struct perf_session *session, 364 struct perf_session *session,
362 struct machine *machine, 365 struct machine *machine,
363 const char *symbol_name) 366 const char *symbol_name)
364{ 367{
365 size_t size; 368 size_t size;
366 const char *filename, *mmap_name; 369 const char *filename, *mmap_name;
@@ -374,8 +377,8 @@ int event__synthesize_kernel_mmap(event__handler_t process,
374 * kernels. 377 * kernels.
375 */ 378 */
376 struct process_symbol_args args = { .name = symbol_name, }; 379 struct process_symbol_args args = { .name = symbol_name, };
377 event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size); 380 union perf_event *event = zalloc((sizeof(event->mmap) +
378 381 session->id_hdr_size));
379 if (event == NULL) { 382 if (event == NULL) {
380 pr_debug("Not enough memory synthesizing mmap event " 383 pr_debug("Not enough memory synthesizing mmap event "
381 "for kernel modules\n"); 384 "for kernel modules\n");
@@ -421,42 +424,15 @@ int event__synthesize_kernel_mmap(event__handler_t process,
421 return err; 424 return err;
422} 425}
423 426
424static void thread__comm_adjust(struct thread *self, struct hists *hists) 427int perf_event__process_comm(union perf_event *event,
425{ 428 struct perf_sample *sample __used,
426 char *comm = self->comm; 429 struct perf_session *session)
427
428 if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
429 (!symbol_conf.comm_list ||
430 strlist__has_entry(symbol_conf.comm_list, comm))) {
431 u16 slen = strlen(comm);
432
433 if (hists__new_col_len(hists, HISTC_COMM, slen))
434 hists__set_col_len(hists, HISTC_THREAD, slen + 6);
435 }
436}
437
438static int thread__set_comm_adjust(struct thread *self, const char *comm,
439 struct hists *hists)
440{ 430{
441 int ret = thread__set_comm(self, comm); 431 struct thread *thread = perf_session__findnew(session, event->comm.tid);
442
443 if (ret)
444 return ret;
445
446 thread__comm_adjust(self, hists);
447 432
448 return 0; 433 dump_printf(": %s:%d\n", event->comm.comm, event->comm.tid);
449}
450 434
451int event__process_comm(event_t *self, struct sample_data *sample __used, 435 if (thread == NULL || thread__set_comm(thread, event->comm.comm)) {
452 struct perf_session *session)
453{
454 struct thread *thread = perf_session__findnew(session, self->comm.tid);
455
456 dump_printf(": %s:%d\n", self->comm.comm, self->comm.tid);
457
458 if (thread == NULL || thread__set_comm_adjust(thread, self->comm.comm,
459 &session->hists)) {
460 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); 436 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
461 return -1; 437 return -1;
462 } 438 }
@@ -464,19 +440,21 @@ int event__process_comm(event_t *self, struct sample_data *sample __used,
464 return 0; 440 return 0;
465} 441}
466 442
467int event__process_lost(event_t *self, struct sample_data *sample __used, 443int perf_event__process_lost(union perf_event *event,
468 struct perf_session *session) 444 struct perf_sample *sample __used,
445 struct perf_session *session)
469{ 446{
470 dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n", 447 dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
471 self->lost.id, self->lost.lost); 448 event->lost.id, event->lost.lost);
472 session->hists.stats.total_lost += self->lost.lost; 449 session->hists.stats.total_lost += event->lost.lost;
473 return 0; 450 return 0;
474} 451}
475 452
476static void event_set_kernel_mmap_len(struct map **maps, event_t *self) 453static void perf_event__set_kernel_mmap_len(union perf_event *event,
454 struct map **maps)
477{ 455{
478 maps[MAP__FUNCTION]->start = self->mmap.start; 456 maps[MAP__FUNCTION]->start = event->mmap.start;
479 maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len; 457 maps[MAP__FUNCTION]->end = event->mmap.start + event->mmap.len;
480 /* 458 /*
481 * Be a bit paranoid here, some perf.data file came with 459 * Be a bit paranoid here, some perf.data file came with
482 * a zero sized synthesized MMAP event for the kernel. 460 * a zero sized synthesized MMAP event for the kernel.
@@ -485,8 +463,8 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
485 maps[MAP__FUNCTION]->end = ~0ULL; 463 maps[MAP__FUNCTION]->end = ~0ULL;
486} 464}
487 465
488static int event__process_kernel_mmap(event_t *self, 466static int perf_event__process_kernel_mmap(union perf_event *event,
489 struct perf_session *session) 467 struct perf_session *session)
490{ 468{
491 struct map *map; 469 struct map *map;
492 char kmmap_prefix[PATH_MAX]; 470 char kmmap_prefix[PATH_MAX];
@@ -494,9 +472,9 @@ static int event__process_kernel_mmap(event_t *self,
494 enum dso_kernel_type kernel_type; 472 enum dso_kernel_type kernel_type;
495 bool is_kernel_mmap; 473 bool is_kernel_mmap;
496 474
497 machine = perf_session__findnew_machine(session, self->mmap.pid); 475 machine = perf_session__findnew_machine(session, event->mmap.pid);
498 if (!machine) { 476 if (!machine) {
499 pr_err("Can't find id %d's machine\n", self->mmap.pid); 477 pr_err("Can't find id %d's machine\n", event->mmap.pid);
500 goto out_problem; 478 goto out_problem;
501 } 479 }
502 480
@@ -506,17 +484,17 @@ static int event__process_kernel_mmap(event_t *self,
506 else 484 else
507 kernel_type = DSO_TYPE_GUEST_KERNEL; 485 kernel_type = DSO_TYPE_GUEST_KERNEL;
508 486
509 is_kernel_mmap = memcmp(self->mmap.filename, 487 is_kernel_mmap = memcmp(event->mmap.filename,
510 kmmap_prefix, 488 kmmap_prefix,
511 strlen(kmmap_prefix)) == 0; 489 strlen(kmmap_prefix)) == 0;
512 if (self->mmap.filename[0] == '/' || 490 if (event->mmap.filename[0] == '/' ||
513 (!is_kernel_mmap && self->mmap.filename[0] == '[')) { 491 (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
514 492
515 char short_module_name[1024]; 493 char short_module_name[1024];
516 char *name, *dot; 494 char *name, *dot;
517 495
518 if (self->mmap.filename[0] == '/') { 496 if (event->mmap.filename[0] == '/') {
519 name = strrchr(self->mmap.filename, '/'); 497 name = strrchr(event->mmap.filename, '/');
520 if (name == NULL) 498 if (name == NULL)
521 goto out_problem; 499 goto out_problem;
522 500
@@ -528,10 +506,10 @@ static int event__process_kernel_mmap(event_t *self,
528 "[%.*s]", (int)(dot - name), name); 506 "[%.*s]", (int)(dot - name), name);
529 strxfrchar(short_module_name, '-', '_'); 507 strxfrchar(short_module_name, '-', '_');
530 } else 508 } else
531 strcpy(short_module_name, self->mmap.filename); 509 strcpy(short_module_name, event->mmap.filename);
532 510
533 map = machine__new_module(machine, self->mmap.start, 511 map = machine__new_module(machine, event->mmap.start,
534 self->mmap.filename); 512 event->mmap.filename);
535 if (map == NULL) 513 if (map == NULL)
536 goto out_problem; 514 goto out_problem;
537 515
@@ -541,9 +519,9 @@ static int event__process_kernel_mmap(event_t *self,
541 519
542 map->dso->short_name = name; 520 map->dso->short_name = name;
543 map->dso->sname_alloc = 1; 521 map->dso->sname_alloc = 1;
544 map->end = map->start + self->mmap.len; 522 map->end = map->start + event->mmap.len;
545 } else if (is_kernel_mmap) { 523 } else if (is_kernel_mmap) {
546 const char *symbol_name = (self->mmap.filename + 524 const char *symbol_name = (event->mmap.filename +
547 strlen(kmmap_prefix)); 525 strlen(kmmap_prefix));
548 /* 526 /*
549 * Should be there already, from the build-id table in 527 * Should be there already, from the build-id table in
@@ -558,10 +536,10 @@ static int event__process_kernel_mmap(event_t *self,
558 if (__machine__create_kernel_maps(machine, kernel) < 0) 536 if (__machine__create_kernel_maps(machine, kernel) < 0)
559 goto out_problem; 537 goto out_problem;
560 538
561 event_set_kernel_mmap_len(machine->vmlinux_maps, self); 539 perf_event__set_kernel_mmap_len(event, machine->vmlinux_maps);
562 perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, 540 perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
563 symbol_name, 541 symbol_name,
564 self->mmap.pgoff); 542 event->mmap.pgoff);
565 if (machine__is_default_guest(machine)) { 543 if (machine__is_default_guest(machine)) {
566 /* 544 /*
567 * preload dso of guest kernel and modules 545 * preload dso of guest kernel and modules
@@ -575,22 +553,23 @@ out_problem:
575 return -1; 553 return -1;
576} 554}
577 555
578int event__process_mmap(event_t *self, struct sample_data *sample __used, 556int perf_event__process_mmap(union perf_event *event,
579 struct perf_session *session) 557 struct perf_sample *sample __used,
558 struct perf_session *session)
580{ 559{
581 struct machine *machine; 560 struct machine *machine;
582 struct thread *thread; 561 struct thread *thread;
583 struct map *map; 562 struct map *map;
584 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 563 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
585 int ret = 0; 564 int ret = 0;
586 565
587 dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n", 566 dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n",
588 self->mmap.pid, self->mmap.tid, self->mmap.start, 567 event->mmap.pid, event->mmap.tid, event->mmap.start,
589 self->mmap.len, self->mmap.pgoff, self->mmap.filename); 568 event->mmap.len, event->mmap.pgoff, event->mmap.filename);
590 569
591 if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || 570 if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
592 cpumode == PERF_RECORD_MISC_KERNEL) { 571 cpumode == PERF_RECORD_MISC_KERNEL) {
593 ret = event__process_kernel_mmap(self, session); 572 ret = perf_event__process_kernel_mmap(event, session);
594 if (ret < 0) 573 if (ret < 0)
595 goto out_problem; 574 goto out_problem;
596 return 0; 575 return 0;
@@ -599,12 +578,12 @@ int event__process_mmap(event_t *self, struct sample_data *sample __used,
599 machine = perf_session__find_host_machine(session); 578 machine = perf_session__find_host_machine(session);
600 if (machine == NULL) 579 if (machine == NULL)
601 goto out_problem; 580 goto out_problem;
602 thread = perf_session__findnew(session, self->mmap.pid); 581 thread = perf_session__findnew(session, event->mmap.pid);
603 if (thread == NULL) 582 if (thread == NULL)
604 goto out_problem; 583 goto out_problem;
605 map = map__new(&machine->user_dsos, self->mmap.start, 584 map = map__new(&machine->user_dsos, event->mmap.start,
606 self->mmap.len, self->mmap.pgoff, 585 event->mmap.len, event->mmap.pgoff,
607 self->mmap.pid, self->mmap.filename, 586 event->mmap.pid, event->mmap.filename,
608 MAP__FUNCTION); 587 MAP__FUNCTION);
609 if (map == NULL) 588 if (map == NULL)
610 goto out_problem; 589 goto out_problem;
@@ -617,16 +596,17 @@ out_problem:
617 return 0; 596 return 0;
618} 597}
619 598
620int event__process_task(event_t *self, struct sample_data *sample __used, 599int perf_event__process_task(union perf_event *event,
621 struct perf_session *session) 600 struct perf_sample *sample __used,
601 struct perf_session *session)
622{ 602{
623 struct thread *thread = perf_session__findnew(session, self->fork.tid); 603 struct thread *thread = perf_session__findnew(session, event->fork.tid);
624 struct thread *parent = perf_session__findnew(session, self->fork.ptid); 604 struct thread *parent = perf_session__findnew(session, event->fork.ptid);
625 605
626 dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid, 606 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
627 self->fork.ppid, self->fork.ptid); 607 event->fork.ppid, event->fork.ptid);
628 608
629 if (self->header.type == PERF_RECORD_EXIT) { 609 if (event->header.type == PERF_RECORD_EXIT) {
630 perf_session__remove_thread(session, thread); 610 perf_session__remove_thread(session, thread);
631 return 0; 611 return 0;
632 } 612 }
@@ -640,20 +620,22 @@ int event__process_task(event_t *self, struct sample_data *sample __used,
640 return 0; 620 return 0;
641} 621}
642 622
643int event__process(event_t *event, struct sample_data *sample, 623int perf_event__process(union perf_event *event, struct perf_sample *sample,
644 struct perf_session *session) 624 struct perf_session *session)
645{ 625{
646 switch (event->header.type) { 626 switch (event->header.type) {
647 case PERF_RECORD_COMM: 627 case PERF_RECORD_COMM:
648 event__process_comm(event, sample, session); 628 perf_event__process_comm(event, sample, session);
649 break; 629 break;
650 case PERF_RECORD_MMAP: 630 case PERF_RECORD_MMAP:
651 event__process_mmap(event, sample, session); 631 perf_event__process_mmap(event, sample, session);
652 break; 632 break;
653 case PERF_RECORD_FORK: 633 case PERF_RECORD_FORK:
654 case PERF_RECORD_EXIT: 634 case PERF_RECORD_EXIT:
655 event__process_task(event, sample, session); 635 perf_event__process_task(event, sample, session);
656 break; 636 break;
637 case PERF_RECORD_LOST:
638 perf_event__process_lost(event, sample, session);
657 default: 639 default:
658 break; 640 break;
659 } 641 }
@@ -750,24 +732,14 @@ void thread__find_addr_location(struct thread *self,
750 al->sym = NULL; 732 al->sym = NULL;
751} 733}
752 734
753static void dso__calc_col_width(struct dso *self, struct hists *hists) 735int perf_event__preprocess_sample(const union perf_event *event,
754{ 736 struct perf_session *session,
755 if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep && 737 struct addr_location *al,
756 (!symbol_conf.dso_list || 738 struct perf_sample *sample,
757 strlist__has_entry(symbol_conf.dso_list, self->name))) { 739 symbol_filter_t filter)
758 u16 slen = dso__name_len(self);
759 hists__new_col_len(hists, HISTC_DSO, slen);
760 }
761
762 self->slen_calculated = 1;
763}
764
765int event__preprocess_sample(const event_t *self, struct perf_session *session,
766 struct addr_location *al, struct sample_data *data,
767 symbol_filter_t filter)
768{ 740{
769 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 741 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
770 struct thread *thread = perf_session__findnew(session, self->ip.pid); 742 struct thread *thread = perf_session__findnew(session, event->ip.pid);
771 743
772 if (thread == NULL) 744 if (thread == NULL)
773 return -1; 745 return -1;
@@ -789,12 +761,12 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
789 machine__create_kernel_maps(&session->host_machine); 761 machine__create_kernel_maps(&session->host_machine);
790 762
791 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 763 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
792 self->ip.pid, self->ip.ip, al); 764 event->ip.pid, event->ip.ip, al);
793 dump_printf(" ...... dso: %s\n", 765 dump_printf(" ...... dso: %s\n",
794 al->map ? al->map->dso->long_name : 766 al->map ? al->map->dso->long_name :
795 al->level == 'H' ? "[hypervisor]" : "<not found>"); 767 al->level == 'H' ? "[hypervisor]" : "<not found>");
796 al->sym = NULL; 768 al->sym = NULL;
797 al->cpu = data->cpu; 769 al->cpu = sample->cpu;
798 770
799 if (al->map) { 771 if (al->map) {
800 if (symbol_conf.dso_list && 772 if (symbol_conf.dso_list &&
@@ -805,23 +777,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
805 strlist__has_entry(symbol_conf.dso_list, 777 strlist__has_entry(symbol_conf.dso_list,
806 al->map->dso->long_name))))) 778 al->map->dso->long_name)))))
807 goto out_filtered; 779 goto out_filtered;
808 /*
809 * We have to do this here as we may have a dso with no symbol
810 * hit that has a name longer than the ones with symbols
811 * sampled.
812 */
813 if (!sort_dso.elide && !al->map->dso->slen_calculated)
814 dso__calc_col_width(al->map->dso, &session->hists);
815 780
816 al->sym = map__find_symbol(al->map, al->addr, filter); 781 al->sym = map__find_symbol(al->map, al->addr, filter);
817 } else {
818 const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
819
820 if (hists__col_len(&session->hists, HISTC_DSO) < unresolved_col_width &&
821 !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
822 !symbol_conf.dso_list)
823 hists__set_col_len(&session->hists, HISTC_DSO,
824 unresolved_col_width);
825 } 782 }
826 783
827 if (symbol_conf.sym_list && al->sym && 784 if (symbol_conf.sym_list && al->sym &&
@@ -834,128 +791,3 @@ out_filtered:
834 al->filtered = true; 791 al->filtered = true;
835 return 0; 792 return 0;
836} 793}
837
838static int event__parse_id_sample(const event_t *event,
839 struct perf_session *session,
840 struct sample_data *sample)
841{
842 const u64 *array;
843 u64 type;
844
845 sample->cpu = sample->pid = sample->tid = -1;
846 sample->stream_id = sample->id = sample->time = -1ULL;
847
848 if (!session->sample_id_all)
849 return 0;
850
851 array = event->sample.array;
852 array += ((event->header.size -
853 sizeof(event->header)) / sizeof(u64)) - 1;
854 type = session->sample_type;
855
856 if (type & PERF_SAMPLE_CPU) {
857 u32 *p = (u32 *)array;
858 sample->cpu = *p;
859 array--;
860 }
861
862 if (type & PERF_SAMPLE_STREAM_ID) {
863 sample->stream_id = *array;
864 array--;
865 }
866
867 if (type & PERF_SAMPLE_ID) {
868 sample->id = *array;
869 array--;
870 }
871
872 if (type & PERF_SAMPLE_TIME) {
873 sample->time = *array;
874 array--;
875 }
876
877 if (type & PERF_SAMPLE_TID) {
878 u32 *p = (u32 *)array;
879 sample->pid = p[0];
880 sample->tid = p[1];
881 }
882
883 return 0;
884}
885
886int event__parse_sample(const event_t *event, struct perf_session *session,
887 struct sample_data *data)
888{
889 const u64 *array;
890 u64 type;
891
892 if (event->header.type != PERF_RECORD_SAMPLE)
893 return event__parse_id_sample(event, session, data);
894
895 array = event->sample.array;
896 type = session->sample_type;
897
898 if (type & PERF_SAMPLE_IP) {
899 data->ip = event->ip.ip;
900 array++;
901 }
902
903 if (type & PERF_SAMPLE_TID) {
904 u32 *p = (u32 *)array;
905 data->pid = p[0];
906 data->tid = p[1];
907 array++;
908 }
909
910 if (type & PERF_SAMPLE_TIME) {
911 data->time = *array;
912 array++;
913 }
914
915 if (type & PERF_SAMPLE_ADDR) {
916 data->addr = *array;
917 array++;
918 }
919
920 data->id = -1ULL;
921 if (type & PERF_SAMPLE_ID) {
922 data->id = *array;
923 array++;
924 }
925
926 if (type & PERF_SAMPLE_STREAM_ID) {
927 data->stream_id = *array;
928 array++;
929 }
930
931 if (type & PERF_SAMPLE_CPU) {
932 u32 *p = (u32 *)array;
933 data->cpu = *p;
934 array++;
935 } else
936 data->cpu = -1;
937
938 if (type & PERF_SAMPLE_PERIOD) {
939 data->period = *array;
940 array++;
941 }
942
943 if (type & PERF_SAMPLE_READ) {
944 pr_debug("PERF_SAMPLE_READ is unsuported for now\n");
945 return -1;
946 }
947
948 if (type & PERF_SAMPLE_CALLCHAIN) {
949 data->callchain = (struct ip_callchain *)array;
950 array += 1 + data->callchain->nr;
951 }
952
953 if (type & PERF_SAMPLE_RAW) {
954 u32 *p = (u32 *)array;
955 data->raw_size = *p;
956 p++;
957 data->raw_data = p;
958 }
959
960 return 0;
961}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index cc7b52f9b492..9c35170fb379 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -61,7 +61,7 @@ struct sample_event {
61 u64 array[]; 61 u64 array[];
62}; 62};
63 63
64struct sample_data { 64struct perf_sample {
65 u64 ip; 65 u64 ip;
66 u32 pid, tid; 66 u32 pid, tid;
67 u64 time; 67 u64 time;
@@ -117,7 +117,7 @@ struct tracing_data_event {
117 u32 size; 117 u32 size;
118}; 118};
119 119
120typedef union event_union { 120union perf_event {
121 struct perf_event_header header; 121 struct perf_event_header header;
122 struct ip_event ip; 122 struct ip_event ip;
123 struct mmap_event mmap; 123 struct mmap_event mmap;
@@ -130,50 +130,54 @@ typedef union event_union {
130 struct event_type_event event_type; 130 struct event_type_event event_type;
131 struct tracing_data_event tracing_data; 131 struct tracing_data_event tracing_data;
132 struct build_id_event build_id; 132 struct build_id_event build_id;
133} event_t; 133};
134 134
135void event__print_totals(void); 135void perf_event__print_totals(void);
136 136
137struct perf_session; 137struct perf_session;
138struct thread_map; 138struct thread_map;
139 139
140typedef int (*event__handler_synth_t)(event_t *event, 140typedef int (*perf_event__handler_synth_t)(union perf_event *event,
141 struct perf_session *session);
142typedef int (*perf_event__handler_t)(union perf_event *event,
143 struct perf_sample *sample,
141 struct perf_session *session); 144 struct perf_session *session);
142typedef int (*event__handler_t)(event_t *event, struct sample_data *sample, 145
143 struct perf_session *session); 146int perf_event__synthesize_thread_map(struct thread_map *threads,
144 147 perf_event__handler_t process,
145int event__synthesize_thread_map(struct thread_map *threads, 148 struct perf_session *session);
146 event__handler_t process, 149int perf_event__synthesize_threads(perf_event__handler_t process,
147 struct perf_session *session); 150 struct perf_session *session);
148int event__synthesize_threads(event__handler_t process, 151int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
149 struct perf_session *session); 152 struct perf_session *session,
150int event__synthesize_kernel_mmap(event__handler_t process, 153 struct machine *machine,
151 struct perf_session *session, 154 const char *symbol_name);
152 struct machine *machine, 155
153 const char *symbol_name); 156int perf_event__synthesize_modules(perf_event__handler_t process,
154 157 struct perf_session *session,
155int event__synthesize_modules(event__handler_t process, 158 struct machine *machine);
156 struct perf_session *session, 159
157 struct machine *machine); 160int perf_event__process_comm(union perf_event *event, struct perf_sample *sample,
158 161 struct perf_session *session);
159int event__process_comm(event_t *self, struct sample_data *sample, 162int perf_event__process_lost(union perf_event *event, struct perf_sample *sample,
160 struct perf_session *session); 163 struct perf_session *session);
161int event__process_lost(event_t *self, struct sample_data *sample, 164int perf_event__process_mmap(union perf_event *event, struct perf_sample *sample,
162 struct perf_session *session); 165 struct perf_session *session);
163int event__process_mmap(event_t *self, struct sample_data *sample, 166int perf_event__process_task(union perf_event *event, struct perf_sample *sample,
164 struct perf_session *session); 167 struct perf_session *session);
165int event__process_task(event_t *self, struct sample_data *sample, 168int perf_event__process(union perf_event *event, struct perf_sample *sample,
166 struct perf_session *session); 169 struct perf_session *session);
167int event__process(event_t *event, struct sample_data *sample,
168 struct perf_session *session);
169 170
170struct addr_location; 171struct addr_location;
171int event__preprocess_sample(const event_t *self, struct perf_session *session, 172int perf_event__preprocess_sample(const union perf_event *self,
172 struct addr_location *al, struct sample_data *data, 173 struct perf_session *session,
173 symbol_filter_t filter); 174 struct addr_location *al,
174int event__parse_sample(const event_t *event, struct perf_session *session, 175 struct perf_sample *sample,
175 struct sample_data *sample); 176 symbol_filter_t filter);
177
178const char *perf_event__name(unsigned int id);
176 179
177const char *event__get_event_name(unsigned int id); 180int perf_event__parse_sample(const union perf_event *event, u64 type,
181 bool sample_id_all, struct perf_sample *sample);
178 182
179#endif /* __PERF_RECORD_H */ 183#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
new file mode 100644
index 000000000000..d852cefa20de
--- /dev/null
+++ b/tools/perf/util/evlist.c
@@ -0,0 +1,394 @@
1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9#include <poll.h>
10#include "cpumap.h"
11#include "thread_map.h"
12#include "evlist.h"
13#include "evsel.h"
14#include "util.h"
15
16#include <sys/mman.h>
17
18#include <linux/bitops.h>
19#include <linux/hash.h>
20
21#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
22#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
23
24void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
25 struct thread_map *threads)
26{
27 int i;
28
29 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
30 INIT_HLIST_HEAD(&evlist->heads[i]);
31 INIT_LIST_HEAD(&evlist->entries);
32 perf_evlist__set_maps(evlist, cpus, threads);
33}
34
35struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
36 struct thread_map *threads)
37{
38 struct perf_evlist *evlist = zalloc(sizeof(*evlist));
39
40 if (evlist != NULL)
41 perf_evlist__init(evlist, cpus, threads);
42
43 return evlist;
44}
45
46static void perf_evlist__purge(struct perf_evlist *evlist)
47{
48 struct perf_evsel *pos, *n;
49
50 list_for_each_entry_safe(pos, n, &evlist->entries, node) {
51 list_del_init(&pos->node);
52 perf_evsel__delete(pos);
53 }
54
55 evlist->nr_entries = 0;
56}
57
58void perf_evlist__exit(struct perf_evlist *evlist)
59{
60 free(evlist->mmap);
61 free(evlist->pollfd);
62 evlist->mmap = NULL;
63 evlist->pollfd = NULL;
64}
65
66void perf_evlist__delete(struct perf_evlist *evlist)
67{
68 perf_evlist__purge(evlist);
69 perf_evlist__exit(evlist);
70 free(evlist);
71}
72
73void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
74{
75 list_add_tail(&entry->node, &evlist->entries);
76 ++evlist->nr_entries;
77}
78
79int perf_evlist__add_default(struct perf_evlist *evlist)
80{
81 struct perf_event_attr attr = {
82 .type = PERF_TYPE_HARDWARE,
83 .config = PERF_COUNT_HW_CPU_CYCLES,
84 };
85 struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
86
87 if (evsel == NULL)
88 return -ENOMEM;
89
90 perf_evlist__add(evlist, evsel);
91 return 0;
92}
93
94int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
95{
96 int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries;
97 evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
98 return evlist->pollfd != NULL ? 0 : -ENOMEM;
99}
100
101void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
102{
103 fcntl(fd, F_SETFL, O_NONBLOCK);
104 evlist->pollfd[evlist->nr_fds].fd = fd;
105 evlist->pollfd[evlist->nr_fds].events = POLLIN;
106 evlist->nr_fds++;
107}
108
109static void perf_evlist__id_hash(struct perf_evlist *evlist,
110 struct perf_evsel *evsel,
111 int cpu, int thread, u64 id)
112{
113 int hash;
114 struct perf_sample_id *sid = SID(evsel, cpu, thread);
115
116 sid->id = id;
117 sid->evsel = evsel;
118 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
119 hlist_add_head(&sid->node, &evlist->heads[hash]);
120}
121
122void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
123 int cpu, int thread, u64 id)
124{
125 perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
126 evsel->id[evsel->ids++] = id;
127}
128
129static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
130 struct perf_evsel *evsel,
131 int cpu, int thread, int fd)
132{
133 u64 read_data[4] = { 0, };
134 int id_idx = 1; /* The first entry is the counter value */
135
136 if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
137 read(fd, &read_data, sizeof(read_data)) == -1)
138 return -1;
139
140 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
141 ++id_idx;
142 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
143 ++id_idx;
144
145 perf_evlist__id_add(evlist, evsel, cpu, thread, read_data[id_idx]);
146 return 0;
147}
148
149struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
150{
151 struct hlist_head *head;
152 struct hlist_node *pos;
153 struct perf_sample_id *sid;
154 int hash;
155
156 if (evlist->nr_entries == 1)
157 return list_entry(evlist->entries.next, struct perf_evsel, node);
158
159 hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
160 head = &evlist->heads[hash];
161
162 hlist_for_each_entry(sid, pos, head, node)
163 if (sid->id == id)
164 return sid->evsel;
165 return NULL;
166}
167
168union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu)
169{
170 /* XXX Move this to perf.c, making it generally available */
171 unsigned int page_size = sysconf(_SC_PAGE_SIZE);
172 struct perf_mmap *md = &evlist->mmap[cpu];
173 unsigned int head = perf_mmap__read_head(md);
174 unsigned int old = md->prev;
175 unsigned char *data = md->base + page_size;
176 union perf_event *event = NULL;
177
178 if (evlist->overwrite) {
179 /*
180 * If we're further behind than half the buffer, there's a chance
181 * the writer will bite our tail and mess up the samples under us.
182 *
183 * If we somehow ended up ahead of the head, we got messed up.
184 *
185 * In either case, truncate and restart at head.
186 */
187 int diff = head - old;
188 if (diff > md->mask / 2 || diff < 0) {
189 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
190
191 /*
192 * head points to a known good entry, start there.
193 */
194 old = head;
195 }
196 }
197
198 if (old != head) {
199 size_t size;
200
201 event = (union perf_event *)&data[old & md->mask];
202 size = event->header.size;
203
204 /*
205 * Event straddles the mmap boundary -- header should always
206 * be inside due to u64 alignment of output.
207 */
208 if ((old & md->mask) + size != ((old + size) & md->mask)) {
209 unsigned int offset = old;
210 unsigned int len = min(sizeof(*event), size), cpy;
211 void *dst = &evlist->event_copy;
212
213 do {
214 cpy = min(md->mask + 1 - (offset & md->mask), len);
215 memcpy(dst, &data[offset & md->mask], cpy);
216 offset += cpy;
217 dst += cpy;
218 len -= cpy;
219 } while (len);
220
221 event = &evlist->event_copy;
222 }
223
224 old += size;
225 }
226
227 md->prev = old;
228
229 if (!evlist->overwrite)
230 perf_mmap__write_tail(md, old);
231
232 return event;
233}
234
235void perf_evlist__munmap(struct perf_evlist *evlist)
236{
237 int cpu;
238
239 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
240 if (evlist->mmap[cpu].base != NULL) {
241 munmap(evlist->mmap[cpu].base, evlist->mmap_len);
242 evlist->mmap[cpu].base = NULL;
243 }
244 }
245}
246
247int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
248{
249 evlist->mmap = zalloc(evlist->cpus->nr * sizeof(struct perf_mmap));
250 return evlist->mmap != NULL ? 0 : -ENOMEM;
251}
252
253static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot,
254 int mask, int fd)
255{
256 evlist->mmap[cpu].prev = 0;
257 evlist->mmap[cpu].mask = mask;
258 evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot,
259 MAP_SHARED, fd, 0);
260 if (evlist->mmap[cpu].base == MAP_FAILED)
261 return -1;
262
263 perf_evlist__add_pollfd(evlist, fd);
264 return 0;
265}
266
267/** perf_evlist__mmap - Create per cpu maps to receive events
268 *
269 * @evlist - list of events
270 * @pages - map length in pages
271 * @overwrite - overwrite older events?
272 *
273 * If overwrite is false the user needs to signal event consuption using:
274 *
275 * struct perf_mmap *m = &evlist->mmap[cpu];
276 * unsigned int head = perf_mmap__read_head(m);
277 *
278 * perf_mmap__write_tail(m, head)
279 *
280 * Using perf_evlist__read_on_cpu does this automatically.
281 */
282int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite)
283{
284 unsigned int page_size = sysconf(_SC_PAGE_SIZE);
285 int mask = pages * page_size - 1, cpu;
286 struct perf_evsel *first_evsel, *evsel;
287 const struct cpu_map *cpus = evlist->cpus;
288 const struct thread_map *threads = evlist->threads;
289 int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE);
290
291 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
292 return -ENOMEM;
293
294 if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
295 return -ENOMEM;
296
297 evlist->overwrite = overwrite;
298 evlist->mmap_len = (pages + 1) * page_size;
299 first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
300
301 list_for_each_entry(evsel, &evlist->entries, node) {
302 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
303 evsel->sample_id == NULL &&
304 perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0)
305 return -ENOMEM;
306
307 for (cpu = 0; cpu < cpus->nr; cpu++) {
308 for (thread = 0; thread < threads->nr; thread++) {
309 int fd = FD(evsel, cpu, thread);
310
311 if (evsel->idx || thread) {
312 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT,
313 FD(first_evsel, cpu, 0)) != 0)
314 goto out_unmap;
315 } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0)
316 goto out_unmap;
317
318 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
319 perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
320 goto out_unmap;
321 }
322 }
323 }
324
325 return 0;
326
327out_unmap:
328 for (cpu = 0; cpu < cpus->nr; cpu++) {
329 if (evlist->mmap[cpu].base != NULL) {
330 munmap(evlist->mmap[cpu].base, evlist->mmap_len);
331 evlist->mmap[cpu].base = NULL;
332 }
333 }
334 return -1;
335}
336
337int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
338 pid_t target_tid, const char *cpu_list)
339{
340 evlist->threads = thread_map__new(target_pid, target_tid);
341
342 if (evlist->threads == NULL)
343 return -1;
344
345 if (target_tid != -1)
346 evlist->cpus = cpu_map__dummy_new();
347 else
348 evlist->cpus = cpu_map__new(cpu_list);
349
350 if (evlist->cpus == NULL)
351 goto out_delete_threads;
352
353 return 0;
354
355out_delete_threads:
356 thread_map__delete(evlist->threads);
357 return -1;
358}
359
360void perf_evlist__delete_maps(struct perf_evlist *evlist)
361{
362 cpu_map__delete(evlist->cpus);
363 thread_map__delete(evlist->threads);
364 evlist->cpus = NULL;
365 evlist->threads = NULL;
366}
367
368int perf_evlist__set_filters(struct perf_evlist *evlist)
369{
370 const struct thread_map *threads = evlist->threads;
371 const struct cpu_map *cpus = evlist->cpus;
372 struct perf_evsel *evsel;
373 char *filter;
374 int thread;
375 int cpu;
376 int err;
377 int fd;
378
379 list_for_each_entry(evsel, &evlist->entries, node) {
380 filter = evsel->filter;
381 if (!filter)
382 continue;
383 for (cpu = 0; cpu < cpus->nr; cpu++) {
384 for (thread = 0; thread < threads->nr; thread++) {
385 fd = FD(evsel, cpu, thread);
386 err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter);
387 if (err)
388 return err;
389 }
390 }
391 }
392
393 return 0;
394}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
new file mode 100644
index 000000000000..8b1cb7a4c5f1
--- /dev/null
+++ b/tools/perf/util/evlist.h
@@ -0,0 +1,68 @@
1#ifndef __PERF_EVLIST_H
2#define __PERF_EVLIST_H 1
3
4#include <linux/list.h>
5#include "../perf.h"
6#include "event.h"
7
8struct pollfd;
9struct thread_map;
10struct cpu_map;
11
12#define PERF_EVLIST__HLIST_BITS 8
13#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
14
15struct perf_evlist {
16 struct list_head entries;
17 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
18 int nr_entries;
19 int nr_fds;
20 int mmap_len;
21 bool overwrite;
22 union perf_event event_copy;
23 struct perf_mmap *mmap;
24 struct pollfd *pollfd;
25 struct thread_map *threads;
26 struct cpu_map *cpus;
27};
28
29struct perf_evsel;
30
31struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
32 struct thread_map *threads);
33void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
34 struct thread_map *threads);
35void perf_evlist__exit(struct perf_evlist *evlist);
36void perf_evlist__delete(struct perf_evlist *evlist);
37
38void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
39int perf_evlist__add_default(struct perf_evlist *evlist);
40
41void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
42 int cpu, int thread, u64 id);
43
44int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
45void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
46
47struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
48
49union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu);
50
51int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
52int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
53void perf_evlist__munmap(struct perf_evlist *evlist);
54
55static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
56 struct cpu_map *cpus,
57 struct thread_map *threads)
58{
59 evlist->cpus = cpus;
60 evlist->threads = threads;
61}
62
63int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
64 pid_t target_tid, const char *cpu_list);
65void perf_evlist__delete_maps(struct perf_evlist *evlist);
66int perf_evlist__set_filters(struct perf_evlist *evlist);
67
68#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d8575d31ee6c..662596afd7f1 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1,20 +1,34 @@
1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9
1#include "evsel.h" 10#include "evsel.h"
2#include "../perf.h" 11#include "evlist.h"
3#include "util.h" 12#include "util.h"
4#include "cpumap.h" 13#include "cpumap.h"
5#include "thread.h" 14#include "thread_map.h"
6 15
7#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 16#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
8 17
18void perf_evsel__init(struct perf_evsel *evsel,
19 struct perf_event_attr *attr, int idx)
20{
21 evsel->idx = idx;
22 evsel->attr = *attr;
23 INIT_LIST_HEAD(&evsel->node);
24}
25
9struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) 26struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
10{ 27{
11 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 28 struct perf_evsel *evsel = zalloc(sizeof(*evsel));
12 29
13 if (evsel != NULL) { 30 if (evsel != NULL)
14 evsel->idx = idx; 31 perf_evsel__init(evsel, attr, idx);
15 evsel->attr = *attr;
16 INIT_LIST_HEAD(&evsel->node);
17 }
18 32
19 return evsel; 33 return evsel;
20} 34}
@@ -25,6 +39,22 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
25 return evsel->fd != NULL ? 0 : -ENOMEM; 39 return evsel->fd != NULL ? 0 : -ENOMEM;
26} 40}
27 41
42int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
43{
44 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
45 if (evsel->sample_id == NULL)
46 return -ENOMEM;
47
48 evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
49 if (evsel->id == NULL) {
50 xyarray__delete(evsel->sample_id);
51 evsel->sample_id = NULL;
52 return -ENOMEM;
53 }
54
55 return 0;
56}
57
28int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 58int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
29{ 59{
30 evsel->counts = zalloc((sizeof(*evsel->counts) + 60 evsel->counts = zalloc((sizeof(*evsel->counts) +
@@ -38,6 +68,14 @@ void perf_evsel__free_fd(struct perf_evsel *evsel)
38 evsel->fd = NULL; 68 evsel->fd = NULL;
39} 69}
40 70
71void perf_evsel__free_id(struct perf_evsel *evsel)
72{
73 xyarray__delete(evsel->sample_id);
74 evsel->sample_id = NULL;
75 free(evsel->id);
76 evsel->id = NULL;
77}
78
41void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 79void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
42{ 80{
43 int cpu, thread; 81 int cpu, thread;
@@ -49,10 +87,19 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
49 } 87 }
50} 88}
51 89
52void perf_evsel__delete(struct perf_evsel *evsel) 90void perf_evsel__exit(struct perf_evsel *evsel)
53{ 91{
54 assert(list_empty(&evsel->node)); 92 assert(list_empty(&evsel->node));
55 xyarray__delete(evsel->fd); 93 xyarray__delete(evsel->fd);
94 xyarray__delete(evsel->sample_id);
95 free(evsel->id);
96}
97
98void perf_evsel__delete(struct perf_evsel *evsel)
99{
100 perf_evsel__exit(evsel);
101 close_cgroup(evsel->cgrp);
102 free(evsel->name);
56 free(evsel); 103 free(evsel);
57} 104}
58 105
@@ -128,21 +175,51 @@ int __perf_evsel__read(struct perf_evsel *evsel,
128} 175}
129 176
130static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 177static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
131 struct thread_map *threads) 178 struct thread_map *threads, bool group, bool inherit)
132{ 179{
133 int cpu, thread; 180 int cpu, thread;
181 unsigned long flags = 0;
182 int pid = -1;
134 183
135 if (evsel->fd == NULL && 184 if (evsel->fd == NULL &&
136 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 185 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
137 return -1; 186 return -1;
138 187
188 if (evsel->cgrp) {
189 flags = PERF_FLAG_PID_CGROUP;
190 pid = evsel->cgrp->fd;
191 }
192
139 for (cpu = 0; cpu < cpus->nr; cpu++) { 193 for (cpu = 0; cpu < cpus->nr; cpu++) {
194 int group_fd = -1;
195 /*
196 * Don't allow mmap() of inherited per-task counters. This
197 * would create a performance issue due to all children writing
198 * to the same buffer.
199 *
200 * FIXME:
201 * Proper fix is not to pass 'inherit' to perf_evsel__open*,
202 * but a 'flags' parameter, with 'group' folded there as well,
203 * then introduce a PERF_O_{MMAP,GROUP,INHERIT} enum, and if
204 * O_MMAP is set, emit a warning if cpu < 0 and O_INHERIT is
205 * set. Lets go for the minimal fix first tho.
206 */
207 evsel->attr.inherit = (cpus->map[cpu] >= 0) && inherit;
208
140 for (thread = 0; thread < threads->nr; thread++) { 209 for (thread = 0; thread < threads->nr; thread++) {
210
211 if (!evsel->cgrp)
212 pid = threads->map[thread];
213
141 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 214 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
142 threads->map[thread], 215 pid,
143 cpus->map[cpu], -1, 0); 216 cpus->map[cpu],
217 group_fd, flags);
144 if (FD(evsel, cpu, thread) < 0) 218 if (FD(evsel, cpu, thread) < 0)
145 goto out_close; 219 goto out_close;
220
221 if (group && group_fd == -1)
222 group_fd = FD(evsel, cpu, thread);
146 } 223 }
147 } 224 }
148 225
@@ -175,10 +252,9 @@ static struct {
175 .threads = { -1, }, 252 .threads = { -1, },
176}; 253};
177 254
178int perf_evsel__open(struct perf_evsel *evsel, 255int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
179 struct cpu_map *cpus, struct thread_map *threads) 256 struct thread_map *threads, bool group, bool inherit)
180{ 257{
181
182 if (cpus == NULL) { 258 if (cpus == NULL) {
183 /* Work around old compiler warnings about strict aliasing */ 259 /* Work around old compiler warnings about strict aliasing */
184 cpus = &empty_cpu_map.map; 260 cpus = &empty_cpu_map.map;
@@ -187,15 +263,135 @@ int perf_evsel__open(struct perf_evsel *evsel,
187 if (threads == NULL) 263 if (threads == NULL)
188 threads = &empty_thread_map.map; 264 threads = &empty_thread_map.map;
189 265
190 return __perf_evsel__open(evsel, cpus, threads); 266 return __perf_evsel__open(evsel, cpus, threads, group, inherit);
191} 267}
192 268
193int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) 269int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
270 struct cpu_map *cpus, bool group, bool inherit)
194{ 271{
195 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map); 272 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, inherit);
273}
274
275int perf_evsel__open_per_thread(struct perf_evsel *evsel,
276 struct thread_map *threads, bool group, bool inherit)
277{
278 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit);
279}
280
281static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
282 struct perf_sample *sample)
283{
284 const u64 *array = event->sample.array;
285
286 array += ((event->header.size -
287 sizeof(event->header)) / sizeof(u64)) - 1;
288
289 if (type & PERF_SAMPLE_CPU) {
290 u32 *p = (u32 *)array;
291 sample->cpu = *p;
292 array--;
293 }
294
295 if (type & PERF_SAMPLE_STREAM_ID) {
296 sample->stream_id = *array;
297 array--;
298 }
299
300 if (type & PERF_SAMPLE_ID) {
301 sample->id = *array;
302 array--;
303 }
304
305 if (type & PERF_SAMPLE_TIME) {
306 sample->time = *array;
307 array--;
308 }
309
310 if (type & PERF_SAMPLE_TID) {
311 u32 *p = (u32 *)array;
312 sample->pid = p[0];
313 sample->tid = p[1];
314 }
315
316 return 0;
196} 317}
197 318
198int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) 319int perf_event__parse_sample(const union perf_event *event, u64 type,
320 bool sample_id_all, struct perf_sample *data)
199{ 321{
200 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); 322 const u64 *array;
323
324 data->cpu = data->pid = data->tid = -1;
325 data->stream_id = data->id = data->time = -1ULL;
326
327 if (event->header.type != PERF_RECORD_SAMPLE) {
328 if (!sample_id_all)
329 return 0;
330 return perf_event__parse_id_sample(event, type, data);
331 }
332
333 array = event->sample.array;
334
335 if (type & PERF_SAMPLE_IP) {
336 data->ip = event->ip.ip;
337 array++;
338 }
339
340 if (type & PERF_SAMPLE_TID) {
341 u32 *p = (u32 *)array;
342 data->pid = p[0];
343 data->tid = p[1];
344 array++;
345 }
346
347 if (type & PERF_SAMPLE_TIME) {
348 data->time = *array;
349 array++;
350 }
351
352 if (type & PERF_SAMPLE_ADDR) {
353 data->addr = *array;
354 array++;
355 }
356
357 data->id = -1ULL;
358 if (type & PERF_SAMPLE_ID) {
359 data->id = *array;
360 array++;
361 }
362
363 if (type & PERF_SAMPLE_STREAM_ID) {
364 data->stream_id = *array;
365 array++;
366 }
367
368 if (type & PERF_SAMPLE_CPU) {
369 u32 *p = (u32 *)array;
370 data->cpu = *p;
371 array++;
372 }
373
374 if (type & PERF_SAMPLE_PERIOD) {
375 data->period = *array;
376 array++;
377 }
378
379 if (type & PERF_SAMPLE_READ) {
380 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
381 return -1;
382 }
383
384 if (type & PERF_SAMPLE_CALLCHAIN) {
385 data->callchain = (struct ip_callchain *)array;
386 array += 1 + data->callchain->nr;
387 }
388
389 if (type & PERF_SAMPLE_RAW) {
390 u32 *p = (u32 *)array;
391 data->raw_size = *p;
392 p++;
393 data->raw_data = p;
394 }
395
396 return 0;
201} 397}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b2d755fe88a5..6710ab538342 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -6,6 +6,8 @@
6#include "../../../include/linux/perf_event.h" 6#include "../../../include/linux/perf_event.h"
7#include "types.h" 7#include "types.h"
8#include "xyarray.h" 8#include "xyarray.h"
9#include "cgroup.h"
10#include "hist.h"
9 11
10struct perf_counts_values { 12struct perf_counts_values {
11 union { 13 union {
@@ -24,31 +26,66 @@ struct perf_counts {
24 struct perf_counts_values cpu[]; 26 struct perf_counts_values cpu[];
25}; 27};
26 28
29struct perf_evsel;
30
31/*
32 * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
33 * more than one entry in the evlist.
34 */
35struct perf_sample_id {
36 struct hlist_node node;
37 u64 id;
38 struct perf_evsel *evsel;
39};
40
41/** struct perf_evsel - event selector
42 *
43 * @name - Can be set to retain the original event name passed by the user,
44 * so that when showing results in tools such as 'perf stat', we
45 * show the name used, not some alias.
46 */
27struct perf_evsel { 47struct perf_evsel {
28 struct list_head node; 48 struct list_head node;
29 struct perf_event_attr attr; 49 struct perf_event_attr attr;
30 char *filter; 50 char *filter;
31 struct xyarray *fd; 51 struct xyarray *fd;
52 struct xyarray *sample_id;
53 u64 *id;
32 struct perf_counts *counts; 54 struct perf_counts *counts;
33 int idx; 55 int idx;
34 void *priv; 56 int ids;
57 struct hists hists;
58 char *name;
59 union {
60 void *priv;
61 off_t id_offset;
62 };
63 struct cgroup_sel *cgrp;
35}; 64};
36 65
37struct cpu_map; 66struct cpu_map;
38struct thread_map; 67struct thread_map;
68struct perf_evlist;
39 69
40struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); 70struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
71void perf_evsel__init(struct perf_evsel *evsel,
72 struct perf_event_attr *attr, int idx);
73void perf_evsel__exit(struct perf_evsel *evsel);
41void perf_evsel__delete(struct perf_evsel *evsel); 74void perf_evsel__delete(struct perf_evsel *evsel);
42 75
43int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 76int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
77int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
44int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); 78int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
45void perf_evsel__free_fd(struct perf_evsel *evsel); 79void perf_evsel__free_fd(struct perf_evsel *evsel);
80void perf_evsel__free_id(struct perf_evsel *evsel);
46void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 81void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
47 82
48int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus); 83int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
49int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads); 84 struct cpu_map *cpus, bool group, bool inherit);
50int perf_evsel__open(struct perf_evsel *evsel, 85int perf_evsel__open_per_thread(struct perf_evsel *evsel,
51 struct cpu_map *cpus, struct thread_map *threads); 86 struct thread_map *threads, bool group, bool inherit);
87int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
88 struct thread_map *threads, bool group, bool inherit);
52 89
53#define perf_evsel__match(evsel, t, c) \ 90#define perf_evsel__match(evsel, t, c) \
54 (evsel->attr.type == PERF_TYPE_##t && \ 91 (evsel->attr.type == PERF_TYPE_##t && \
diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c
index 67eeff571568..7adf4ad15d8f 100644
--- a/tools/perf/util/exec_cmd.c
+++ b/tools/perf/util/exec_cmd.c
@@ -11,31 +11,12 @@ static const char *argv0_path;
11 11
12const char *system_path(const char *path) 12const char *system_path(const char *path)
13{ 13{
14#ifdef RUNTIME_PREFIX
15 static const char *prefix;
16#else
17 static const char *prefix = PREFIX; 14 static const char *prefix = PREFIX;
18#endif
19 struct strbuf d = STRBUF_INIT; 15 struct strbuf d = STRBUF_INIT;
20 16
21 if (is_absolute_path(path)) 17 if (is_absolute_path(path))
22 return path; 18 return path;
23 19
24#ifdef RUNTIME_PREFIX
25 assert(argv0_path);
26 assert(is_absolute_path(argv0_path));
27
28 if (!prefix &&
29 !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) &&
30 !(prefix = strip_path_suffix(argv0_path, BINDIR)) &&
31 !(prefix = strip_path_suffix(argv0_path, "perf"))) {
32 prefix = PREFIX;
33 fprintf(stderr, "RUNTIME_PREFIX requested, "
34 "but prefix computation failed. "
35 "Using static fallback '%s'.\n", prefix);
36 }
37#endif
38
39 strbuf_addf(&d, "%s/%s", prefix, path); 20 strbuf_addf(&d, "%s/%s", prefix, path);
40 path = strbuf_detach(&d, NULL); 21 path = strbuf_detach(&d, NULL);
41 return path; 22 return path;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f6a929e74981..e5230c0ef95b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -8,6 +8,8 @@
8#include <linux/list.h> 8#include <linux/list.h>
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10 10
11#include "evlist.h"
12#include "evsel.h"
11#include "util.h" 13#include "util.h"
12#include "header.h" 14#include "header.h"
13#include "../perf.h" 15#include "../perf.h"
@@ -18,89 +20,6 @@
18 20
19static bool no_buildid_cache = false; 21static bool no_buildid_cache = false;
20 22
21/*
22 * Create new perf.data header attribute:
23 */
24struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr)
25{
26 struct perf_header_attr *self = malloc(sizeof(*self));
27
28 if (self != NULL) {
29 self->attr = *attr;
30 self->ids = 0;
31 self->size = 1;
32 self->id = malloc(sizeof(u64));
33 if (self->id == NULL) {
34 free(self);
35 self = NULL;
36 }
37 }
38
39 return self;
40}
41
42void perf_header_attr__delete(struct perf_header_attr *self)
43{
44 free(self->id);
45 free(self);
46}
47
48int perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
49{
50 int pos = self->ids;
51
52 self->ids++;
53 if (self->ids > self->size) {
54 int nsize = self->size * 2;
55 u64 *nid = realloc(self->id, nsize * sizeof(u64));
56
57 if (nid == NULL)
58 return -1;
59
60 self->size = nsize;
61 self->id = nid;
62 }
63 self->id[pos] = id;
64 return 0;
65}
66
67int perf_header__init(struct perf_header *self)
68{
69 self->size = 1;
70 self->attr = malloc(sizeof(void *));
71 return self->attr == NULL ? -ENOMEM : 0;
72}
73
74void perf_header__exit(struct perf_header *self)
75{
76 int i;
77 for (i = 0; i < self->attrs; ++i)
78 perf_header_attr__delete(self->attr[i]);
79 free(self->attr);
80}
81
82int perf_header__add_attr(struct perf_header *self,
83 struct perf_header_attr *attr)
84{
85 if (self->frozen)
86 return -1;
87
88 if (self->attrs == self->size) {
89 int nsize = self->size * 2;
90 struct perf_header_attr **nattr;
91
92 nattr = realloc(self->attr, nsize * sizeof(void *));
93 if (nattr == NULL)
94 return -1;
95
96 self->size = nsize;
97 self->attr = nattr;
98 }
99
100 self->attr[self->attrs++] = attr;
101 return 0;
102}
103
104static int event_count; 23static int event_count;
105static struct perf_trace_event_type *events; 24static struct perf_trace_event_type *events;
106 25
@@ -147,19 +66,19 @@ struct perf_file_attr {
147 struct perf_file_section ids; 66 struct perf_file_section ids;
148}; 67};
149 68
150void perf_header__set_feat(struct perf_header *self, int feat) 69void perf_header__set_feat(struct perf_header *header, int feat)
151{ 70{
152 set_bit(feat, self->adds_features); 71 set_bit(feat, header->adds_features);
153} 72}
154 73
155void perf_header__clear_feat(struct perf_header *self, int feat) 74void perf_header__clear_feat(struct perf_header *header, int feat)
156{ 75{
157 clear_bit(feat, self->adds_features); 76 clear_bit(feat, header->adds_features);
158} 77}
159 78
160bool perf_header__has_feat(const struct perf_header *self, int feat) 79bool perf_header__has_feat(const struct perf_header *header, int feat)
161{ 80{
162 return test_bit(feat, self->adds_features); 81 return test_bit(feat, header->adds_features);
163} 82}
164 83
165static int do_write(int fd, const void *buf, size_t size) 84static int do_write(int fd, const void *buf, size_t size)
@@ -228,22 +147,22 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
228 return 0; 147 return 0;
229} 148}
230 149
231static int machine__write_buildid_table(struct machine *self, int fd) 150static int machine__write_buildid_table(struct machine *machine, int fd)
232{ 151{
233 int err; 152 int err;
234 u16 kmisc = PERF_RECORD_MISC_KERNEL, 153 u16 kmisc = PERF_RECORD_MISC_KERNEL,
235 umisc = PERF_RECORD_MISC_USER; 154 umisc = PERF_RECORD_MISC_USER;
236 155
237 if (!machine__is_host(self)) { 156 if (!machine__is_host(machine)) {
238 kmisc = PERF_RECORD_MISC_GUEST_KERNEL; 157 kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
239 umisc = PERF_RECORD_MISC_GUEST_USER; 158 umisc = PERF_RECORD_MISC_GUEST_USER;
240 } 159 }
241 160
242 err = __dsos__write_buildid_table(&self->kernel_dsos, self->pid, 161 err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid,
243 kmisc, fd); 162 kmisc, fd);
244 if (err == 0) 163 if (err == 0)
245 err = __dsos__write_buildid_table(&self->user_dsos, 164 err = __dsos__write_buildid_table(&machine->user_dsos,
246 self->pid, umisc, fd); 165 machine->pid, umisc, fd);
247 return err; 166 return err;
248} 167}
249 168
@@ -270,11 +189,15 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
270 const char *name, bool is_kallsyms) 189 const char *name, bool is_kallsyms)
271{ 190{
272 const size_t size = PATH_MAX; 191 const size_t size = PATH_MAX;
273 char *realname = realpath(name, NULL), 192 char *realname, *filename = malloc(size),
274 *filename = malloc(size),
275 *linkname = malloc(size), *targetname; 193 *linkname = malloc(size), *targetname;
276 int len, err = -1; 194 int len, err = -1;
277 195
196 if (is_kallsyms)
197 realname = (char *)name;
198 else
199 realname = realpath(name, NULL);
200
278 if (realname == NULL || filename == NULL || linkname == NULL) 201 if (realname == NULL || filename == NULL || linkname == NULL)
279 goto out_free; 202 goto out_free;
280 203
@@ -306,7 +229,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
306 if (symlink(targetname, linkname) == 0) 229 if (symlink(targetname, linkname) == 0)
307 err = 0; 230 err = 0;
308out_free: 231out_free:
309 free(realname); 232 if (!is_kallsyms)
233 free(realname);
310 free(filename); 234 free(filename);
311 free(linkname); 235 free(linkname);
312 return err; 236 return err;
@@ -361,12 +285,12 @@ out_free:
361 return err; 285 return err;
362} 286}
363 287
364static int dso__cache_build_id(struct dso *self, const char *debugdir) 288static int dso__cache_build_id(struct dso *dso, const char *debugdir)
365{ 289{
366 bool is_kallsyms = self->kernel && self->long_name[0] != '/'; 290 bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
367 291
368 return build_id_cache__add_b(self->build_id, sizeof(self->build_id), 292 return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id),
369 self->long_name, debugdir, is_kallsyms); 293 dso->long_name, debugdir, is_kallsyms);
370} 294}
371 295
372static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) 296static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
@@ -381,14 +305,14 @@ static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
381 return err; 305 return err;
382} 306}
383 307
384static int machine__cache_build_ids(struct machine *self, const char *debugdir) 308static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
385{ 309{
386 int ret = __dsos__cache_build_ids(&self->kernel_dsos, debugdir); 310 int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir);
387 ret |= __dsos__cache_build_ids(&self->user_dsos, debugdir); 311 ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir);
388 return ret; 312 return ret;
389} 313}
390 314
391static int perf_session__cache_build_ids(struct perf_session *self) 315static int perf_session__cache_build_ids(struct perf_session *session)
392{ 316{
393 struct rb_node *nd; 317 struct rb_node *nd;
394 int ret; 318 int ret;
@@ -399,28 +323,28 @@ static int perf_session__cache_build_ids(struct perf_session *self)
399 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) 323 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
400 return -1; 324 return -1;
401 325
402 ret = machine__cache_build_ids(&self->host_machine, debugdir); 326 ret = machine__cache_build_ids(&session->host_machine, debugdir);
403 327
404 for (nd = rb_first(&self->machines); nd; nd = rb_next(nd)) { 328 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
405 struct machine *pos = rb_entry(nd, struct machine, rb_node); 329 struct machine *pos = rb_entry(nd, struct machine, rb_node);
406 ret |= machine__cache_build_ids(pos, debugdir); 330 ret |= machine__cache_build_ids(pos, debugdir);
407 } 331 }
408 return ret ? -1 : 0; 332 return ret ? -1 : 0;
409} 333}
410 334
411static bool machine__read_build_ids(struct machine *self, bool with_hits) 335static bool machine__read_build_ids(struct machine *machine, bool with_hits)
412{ 336{
413 bool ret = __dsos__read_build_ids(&self->kernel_dsos, with_hits); 337 bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
414 ret |= __dsos__read_build_ids(&self->user_dsos, with_hits); 338 ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
415 return ret; 339 return ret;
416} 340}
417 341
418static bool perf_session__read_build_ids(struct perf_session *self, bool with_hits) 342static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
419{ 343{
420 struct rb_node *nd; 344 struct rb_node *nd;
421 bool ret = machine__read_build_ids(&self->host_machine, with_hits); 345 bool ret = machine__read_build_ids(&session->host_machine, with_hits);
422 346
423 for (nd = rb_first(&self->machines); nd; nd = rb_next(nd)) { 347 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
424 struct machine *pos = rb_entry(nd, struct machine, rb_node); 348 struct machine *pos = rb_entry(nd, struct machine, rb_node);
425 ret |= machine__read_build_ids(pos, with_hits); 349 ret |= machine__read_build_ids(pos, with_hits);
426 } 350 }
@@ -428,7 +352,8 @@ static bool perf_session__read_build_ids(struct perf_session *self, bool with_hi
428 return ret; 352 return ret;
429} 353}
430 354
431static int perf_header__adds_write(struct perf_header *self, int fd) 355static int perf_header__adds_write(struct perf_header *header,
356 struct perf_evlist *evlist, int fd)
432{ 357{
433 int nr_sections; 358 int nr_sections;
434 struct perf_session *session; 359 struct perf_session *session;
@@ -437,13 +362,13 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
437 u64 sec_start; 362 u64 sec_start;
438 int idx = 0, err; 363 int idx = 0, err;
439 364
440 session = container_of(self, struct perf_session, header); 365 session = container_of(header, struct perf_session, header);
441 366
442 if (perf_header__has_feat(self, HEADER_BUILD_ID && 367 if (perf_header__has_feat(header, HEADER_BUILD_ID &&
443 !perf_session__read_build_ids(session, true))) 368 !perf_session__read_build_ids(session, true)))
444 perf_header__clear_feat(self, HEADER_BUILD_ID); 369 perf_header__clear_feat(header, HEADER_BUILD_ID);
445 370
446 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); 371 nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
447 if (!nr_sections) 372 if (!nr_sections)
448 return 0; 373 return 0;
449 374
@@ -453,28 +378,28 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
453 378
454 sec_size = sizeof(*feat_sec) * nr_sections; 379 sec_size = sizeof(*feat_sec) * nr_sections;
455 380
456 sec_start = self->data_offset + self->data_size; 381 sec_start = header->data_offset + header->data_size;
457 lseek(fd, sec_start + sec_size, SEEK_SET); 382 lseek(fd, sec_start + sec_size, SEEK_SET);
458 383
459 if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { 384 if (perf_header__has_feat(header, HEADER_TRACE_INFO)) {
460 struct perf_file_section *trace_sec; 385 struct perf_file_section *trace_sec;
461 386
462 trace_sec = &feat_sec[idx++]; 387 trace_sec = &feat_sec[idx++];
463 388
464 /* Write trace info */ 389 /* Write trace info */
465 trace_sec->offset = lseek(fd, 0, SEEK_CUR); 390 trace_sec->offset = lseek(fd, 0, SEEK_CUR);
466 read_tracing_data(fd, &evsel_list); 391 read_tracing_data(fd, &evlist->entries);
467 trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; 392 trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
468 } 393 }
469 394
470 if (perf_header__has_feat(self, HEADER_BUILD_ID)) { 395 if (perf_header__has_feat(header, HEADER_BUILD_ID)) {
471 struct perf_file_section *buildid_sec; 396 struct perf_file_section *buildid_sec;
472 397
473 buildid_sec = &feat_sec[idx++]; 398 buildid_sec = &feat_sec[idx++];
474 399
475 /* Write build-ids */ 400 /* Write build-ids */
476 buildid_sec->offset = lseek(fd, 0, SEEK_CUR); 401 buildid_sec->offset = lseek(fd, 0, SEEK_CUR);
477 err = dsos__write_buildid_table(self, fd); 402 err = dsos__write_buildid_table(header, fd);
478 if (err < 0) { 403 if (err < 0) {
479 pr_debug("failed to write buildid table\n"); 404 pr_debug("failed to write buildid table\n");
480 goto out_free; 405 goto out_free;
@@ -513,32 +438,41 @@ int perf_header__write_pipe(int fd)
513 return 0; 438 return 0;
514} 439}
515 440
516int perf_header__write(struct perf_header *self, int fd, bool at_exit) 441int perf_session__write_header(struct perf_session *session,
442 struct perf_evlist *evlist,
443 int fd, bool at_exit)
517{ 444{
518 struct perf_file_header f_header; 445 struct perf_file_header f_header;
519 struct perf_file_attr f_attr; 446 struct perf_file_attr f_attr;
520 struct perf_header_attr *attr; 447 struct perf_header *header = &session->header;
521 int i, err; 448 struct perf_evsel *attr, *pair = NULL;
449 int err;
522 450
523 lseek(fd, sizeof(f_header), SEEK_SET); 451 lseek(fd, sizeof(f_header), SEEK_SET);
524 452
525 for (i = 0; i < self->attrs; i++) { 453 if (session->evlist != evlist)
526 attr = self->attr[i]; 454 pair = list_entry(session->evlist->entries.next, struct perf_evsel, node);
527 455
456 list_for_each_entry(attr, &evlist->entries, node) {
528 attr->id_offset = lseek(fd, 0, SEEK_CUR); 457 attr->id_offset = lseek(fd, 0, SEEK_CUR);
529 err = do_write(fd, attr->id, attr->ids * sizeof(u64)); 458 err = do_write(fd, attr->id, attr->ids * sizeof(u64));
530 if (err < 0) { 459 if (err < 0) {
460out_err_write:
531 pr_debug("failed to write perf header\n"); 461 pr_debug("failed to write perf header\n");
532 return err; 462 return err;
533 } 463 }
464 if (session->evlist != evlist) {
465 err = do_write(fd, pair->id, pair->ids * sizeof(u64));
466 if (err < 0)
467 goto out_err_write;
468 attr->ids += pair->ids;
469 pair = list_entry(pair->node.next, struct perf_evsel, node);
470 }
534 } 471 }
535 472
473 header->attr_offset = lseek(fd, 0, SEEK_CUR);
536 474
537 self->attr_offset = lseek(fd, 0, SEEK_CUR); 475 list_for_each_entry(attr, &evlist->entries, node) {
538
539 for (i = 0; i < self->attrs; i++) {
540 attr = self->attr[i];
541
542 f_attr = (struct perf_file_attr){ 476 f_attr = (struct perf_file_attr){
543 .attr = attr->attr, 477 .attr = attr->attr,
544 .ids = { 478 .ids = {
@@ -553,20 +487,20 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
553 } 487 }
554 } 488 }
555 489
556 self->event_offset = lseek(fd, 0, SEEK_CUR); 490 header->event_offset = lseek(fd, 0, SEEK_CUR);
557 self->event_size = event_count * sizeof(struct perf_trace_event_type); 491 header->event_size = event_count * sizeof(struct perf_trace_event_type);
558 if (events) { 492 if (events) {
559 err = do_write(fd, events, self->event_size); 493 err = do_write(fd, events, header->event_size);
560 if (err < 0) { 494 if (err < 0) {
561 pr_debug("failed to write perf header events\n"); 495 pr_debug("failed to write perf header events\n");
562 return err; 496 return err;
563 } 497 }
564 } 498 }
565 499
566 self->data_offset = lseek(fd, 0, SEEK_CUR); 500 header->data_offset = lseek(fd, 0, SEEK_CUR);
567 501
568 if (at_exit) { 502 if (at_exit) {
569 err = perf_header__adds_write(self, fd); 503 err = perf_header__adds_write(header, evlist, fd);
570 if (err < 0) 504 if (err < 0)
571 return err; 505 return err;
572 } 506 }
@@ -576,20 +510,20 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
576 .size = sizeof(f_header), 510 .size = sizeof(f_header),
577 .attr_size = sizeof(f_attr), 511 .attr_size = sizeof(f_attr),
578 .attrs = { 512 .attrs = {
579 .offset = self->attr_offset, 513 .offset = header->attr_offset,
580 .size = self->attrs * sizeof(f_attr), 514 .size = evlist->nr_entries * sizeof(f_attr),
581 }, 515 },
582 .data = { 516 .data = {
583 .offset = self->data_offset, 517 .offset = header->data_offset,
584 .size = self->data_size, 518 .size = header->data_size,
585 }, 519 },
586 .event_types = { 520 .event_types = {
587 .offset = self->event_offset, 521 .offset = header->event_offset,
588 .size = self->event_size, 522 .size = header->event_size,
589 }, 523 },
590 }; 524 };
591 525
592 memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features)); 526 memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features));
593 527
594 lseek(fd, 0, SEEK_SET); 528 lseek(fd, 0, SEEK_SET);
595 err = do_write(fd, &f_header, sizeof(f_header)); 529 err = do_write(fd, &f_header, sizeof(f_header));
@@ -597,26 +531,26 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
597 pr_debug("failed to write perf header\n"); 531 pr_debug("failed to write perf header\n");
598 return err; 532 return err;
599 } 533 }
600 lseek(fd, self->data_offset + self->data_size, SEEK_SET); 534 lseek(fd, header->data_offset + header->data_size, SEEK_SET);
601 535
602 self->frozen = 1; 536 header->frozen = 1;
603 return 0; 537 return 0;
604} 538}
605 539
606static int perf_header__getbuffer64(struct perf_header *self, 540static int perf_header__getbuffer64(struct perf_header *header,
607 int fd, void *buf, size_t size) 541 int fd, void *buf, size_t size)
608{ 542{
609 if (readn(fd, buf, size) <= 0) 543 if (readn(fd, buf, size) <= 0)
610 return -1; 544 return -1;
611 545
612 if (self->needs_swap) 546 if (header->needs_swap)
613 mem_bswap_64(buf, size); 547 mem_bswap_64(buf, size);
614 548
615 return 0; 549 return 0;
616} 550}
617 551
618int perf_header__process_sections(struct perf_header *self, int fd, 552int perf_header__process_sections(struct perf_header *header, int fd,
619 int (*process)(struct perf_file_section *self, 553 int (*process)(struct perf_file_section *section,
620 struct perf_header *ph, 554 struct perf_header *ph,
621 int feat, int fd)) 555 int feat, int fd))
622{ 556{
@@ -626,7 +560,7 @@ int perf_header__process_sections(struct perf_header *self, int fd,
626 int idx = 0; 560 int idx = 0;
627 int err = -1, feat = 1; 561 int err = -1, feat = 1;
628 562
629 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); 563 nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
630 if (!nr_sections) 564 if (!nr_sections)
631 return 0; 565 return 0;
632 566
@@ -636,17 +570,17 @@ int perf_header__process_sections(struct perf_header *self, int fd,
636 570
637 sec_size = sizeof(*feat_sec) * nr_sections; 571 sec_size = sizeof(*feat_sec) * nr_sections;
638 572
639 lseek(fd, self->data_offset + self->data_size, SEEK_SET); 573 lseek(fd, header->data_offset + header->data_size, SEEK_SET);
640 574
641 if (perf_header__getbuffer64(self, fd, feat_sec, sec_size)) 575 if (perf_header__getbuffer64(header, fd, feat_sec, sec_size))
642 goto out_free; 576 goto out_free;
643 577
644 err = 0; 578 err = 0;
645 while (idx < nr_sections && feat < HEADER_LAST_FEATURE) { 579 while (idx < nr_sections && feat < HEADER_LAST_FEATURE) {
646 if (perf_header__has_feat(self, feat)) { 580 if (perf_header__has_feat(header, feat)) {
647 struct perf_file_section *sec = &feat_sec[idx++]; 581 struct perf_file_section *sec = &feat_sec[idx++];
648 582
649 err = process(sec, self, feat, fd); 583 err = process(sec, header, feat, fd);
650 if (err < 0) 584 if (err < 0)
651 break; 585 break;
652 } 586 }
@@ -657,35 +591,35 @@ out_free:
657 return err; 591 return err;
658} 592}
659 593
660int perf_file_header__read(struct perf_file_header *self, 594int perf_file_header__read(struct perf_file_header *header,
661 struct perf_header *ph, int fd) 595 struct perf_header *ph, int fd)
662{ 596{
663 lseek(fd, 0, SEEK_SET); 597 lseek(fd, 0, SEEK_SET);
664 598
665 if (readn(fd, self, sizeof(*self)) <= 0 || 599 if (readn(fd, header, sizeof(*header)) <= 0 ||
666 memcmp(&self->magic, __perf_magic, sizeof(self->magic))) 600 memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
667 return -1; 601 return -1;
668 602
669 if (self->attr_size != sizeof(struct perf_file_attr)) { 603 if (header->attr_size != sizeof(struct perf_file_attr)) {
670 u64 attr_size = bswap_64(self->attr_size); 604 u64 attr_size = bswap_64(header->attr_size);
671 605
672 if (attr_size != sizeof(struct perf_file_attr)) 606 if (attr_size != sizeof(struct perf_file_attr))
673 return -1; 607 return -1;
674 608
675 mem_bswap_64(self, offsetof(struct perf_file_header, 609 mem_bswap_64(header, offsetof(struct perf_file_header,
676 adds_features)); 610 adds_features));
677 ph->needs_swap = true; 611 ph->needs_swap = true;
678 } 612 }
679 613
680 if (self->size != sizeof(*self)) { 614 if (header->size != sizeof(*header)) {
681 /* Support the previous format */ 615 /* Support the previous format */
682 if (self->size == offsetof(typeof(*self), adds_features)) 616 if (header->size == offsetof(typeof(*header), adds_features))
683 bitmap_zero(self->adds_features, HEADER_FEAT_BITS); 617 bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
684 else 618 else
685 return -1; 619 return -1;
686 } 620 }
687 621
688 memcpy(&ph->adds_features, &self->adds_features, 622 memcpy(&ph->adds_features, &header->adds_features,
689 sizeof(ph->adds_features)); 623 sizeof(ph->adds_features));
690 /* 624 /*
691 * FIXME: hack that assumes that if we need swap the perf.data file 625 * FIXME: hack that assumes that if we need swap the perf.data file
@@ -699,10 +633,10 @@ int perf_file_header__read(struct perf_file_header *self,
699 perf_header__set_feat(ph, HEADER_BUILD_ID); 633 perf_header__set_feat(ph, HEADER_BUILD_ID);
700 } 634 }
701 635
702 ph->event_offset = self->event_types.offset; 636 ph->event_offset = header->event_types.offset;
703 ph->event_size = self->event_types.size; 637 ph->event_size = header->event_types.size;
704 ph->data_offset = self->data.offset; 638 ph->data_offset = header->data.offset;
705 ph->data_size = self->data.size; 639 ph->data_size = header->data.size;
706 return 0; 640 return 0;
707} 641}
708 642
@@ -761,11 +695,10 @@ out:
761 return err; 695 return err;
762} 696}
763 697
764static int perf_header__read_build_ids(struct perf_header *self, 698static int perf_header__read_build_ids(struct perf_header *header,
765 int input, u64 offset, u64 size) 699 int input, u64 offset, u64 size)
766{ 700{
767 struct perf_session *session = container_of(self, 701 struct perf_session *session = container_of(header, struct perf_session, header);
768 struct perf_session, header);
769 struct build_id_event bev; 702 struct build_id_event bev;
770 char filename[PATH_MAX]; 703 char filename[PATH_MAX];
771 u64 limit = offset + size; 704 u64 limit = offset + size;
@@ -777,7 +710,7 @@ static int perf_header__read_build_ids(struct perf_header *self,
777 if (read(input, &bev, sizeof(bev)) != sizeof(bev)) 710 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
778 goto out; 711 goto out;
779 712
780 if (self->needs_swap) 713 if (header->needs_swap)
781 perf_event_header__bswap(&bev.header); 714 perf_event_header__bswap(&bev.header);
782 715
783 len = bev.header.size - sizeof(bev); 716 len = bev.header.size - sizeof(bev);
@@ -793,13 +726,13 @@ out:
793 return err; 726 return err;
794} 727}
795 728
796static int perf_file_section__process(struct perf_file_section *self, 729static int perf_file_section__process(struct perf_file_section *section,
797 struct perf_header *ph, 730 struct perf_header *ph,
798 int feat, int fd) 731 int feat, int fd)
799{ 732{
800 if (lseek(fd, self->offset, SEEK_SET) == (off_t)-1) { 733 if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
801 pr_debug("Failed to lseek to %" PRIu64 " offset for feature " 734 pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
802 "%d, continuing...\n", self->offset, feat); 735 "%d, continuing...\n", section->offset, feat);
803 return 0; 736 return 0;
804 } 737 }
805 738
@@ -809,7 +742,7 @@ static int perf_file_section__process(struct perf_file_section *self,
809 break; 742 break;
810 743
811 case HEADER_BUILD_ID: 744 case HEADER_BUILD_ID:
812 if (perf_header__read_build_ids(ph, fd, self->offset, self->size)) 745 if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
813 pr_debug("Failed to read buildids, continuing...\n"); 746 pr_debug("Failed to read buildids, continuing...\n");
814 break; 747 break;
815 default: 748 default:
@@ -819,21 +752,21 @@ static int perf_file_section__process(struct perf_file_section *self,
819 return 0; 752 return 0;
820} 753}
821 754
822static int perf_file_header__read_pipe(struct perf_pipe_file_header *self, 755static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
823 struct perf_header *ph, int fd, 756 struct perf_header *ph, int fd,
824 bool repipe) 757 bool repipe)
825{ 758{
826 if (readn(fd, self, sizeof(*self)) <= 0 || 759 if (readn(fd, header, sizeof(*header)) <= 0 ||
827 memcmp(&self->magic, __perf_magic, sizeof(self->magic))) 760 memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
828 return -1; 761 return -1;
829 762
830 if (repipe && do_write(STDOUT_FILENO, self, sizeof(*self)) < 0) 763 if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
831 return -1; 764 return -1;
832 765
833 if (self->size != sizeof(*self)) { 766 if (header->size != sizeof(*header)) {
834 u64 size = bswap_64(self->size); 767 u64 size = bswap_64(header->size);
835 768
836 if (size != sizeof(*self)) 769 if (size != sizeof(*header))
837 return -1; 770 return -1;
838 771
839 ph->needs_swap = true; 772 ph->needs_swap = true;
@@ -844,10 +777,10 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *self,
844 777
845static int perf_header__read_pipe(struct perf_session *session, int fd) 778static int perf_header__read_pipe(struct perf_session *session, int fd)
846{ 779{
847 struct perf_header *self = &session->header; 780 struct perf_header *header = &session->header;
848 struct perf_pipe_file_header f_header; 781 struct perf_pipe_file_header f_header;
849 782
850 if (perf_file_header__read_pipe(&f_header, self, fd, 783 if (perf_file_header__read_pipe(&f_header, header, fd,
851 session->repipe) < 0) { 784 session->repipe) < 0) {
852 pr_debug("incompatible file format\n"); 785 pr_debug("incompatible file format\n");
853 return -EINVAL; 786 return -EINVAL;
@@ -858,18 +791,22 @@ static int perf_header__read_pipe(struct perf_session *session, int fd)
858 return 0; 791 return 0;
859} 792}
860 793
861int perf_header__read(struct perf_session *session, int fd) 794int perf_session__read_header(struct perf_session *session, int fd)
862{ 795{
863 struct perf_header *self = &session->header; 796 struct perf_header *header = &session->header;
864 struct perf_file_header f_header; 797 struct perf_file_header f_header;
865 struct perf_file_attr f_attr; 798 struct perf_file_attr f_attr;
866 u64 f_id; 799 u64 f_id;
867 int nr_attrs, nr_ids, i, j; 800 int nr_attrs, nr_ids, i, j;
868 801
802 session->evlist = perf_evlist__new(NULL, NULL);
803 if (session->evlist == NULL)
804 return -ENOMEM;
805
869 if (session->fd_pipe) 806 if (session->fd_pipe)
870 return perf_header__read_pipe(session, fd); 807 return perf_header__read_pipe(session, fd);
871 808
872 if (perf_file_header__read(&f_header, self, fd) < 0) { 809 if (perf_file_header__read(&f_header, header, fd) < 0) {
873 pr_debug("incompatible file format\n"); 810 pr_debug("incompatible file format\n");
874 return -EINVAL; 811 return -EINVAL;
875 } 812 }
@@ -878,33 +815,39 @@ int perf_header__read(struct perf_session *session, int fd)
878 lseek(fd, f_header.attrs.offset, SEEK_SET); 815 lseek(fd, f_header.attrs.offset, SEEK_SET);
879 816
880 for (i = 0; i < nr_attrs; i++) { 817 for (i = 0; i < nr_attrs; i++) {
881 struct perf_header_attr *attr; 818 struct perf_evsel *evsel;
882 off_t tmp; 819 off_t tmp;
883 820
884 if (perf_header__getbuffer64(self, fd, &f_attr, sizeof(f_attr))) 821 if (perf_header__getbuffer64(header, fd, &f_attr, sizeof(f_attr)))
885 goto out_errno; 822 goto out_errno;
886 823
887 tmp = lseek(fd, 0, SEEK_CUR); 824 tmp = lseek(fd, 0, SEEK_CUR);
825 evsel = perf_evsel__new(&f_attr.attr, i);
888 826
889 attr = perf_header_attr__new(&f_attr.attr); 827 if (evsel == NULL)
890 if (attr == NULL) 828 goto out_delete_evlist;
891 return -ENOMEM; 829 /*
830 * Do it before so that if perf_evsel__alloc_id fails, this
831 * entry gets purged too at perf_evlist__delete().
832 */
833 perf_evlist__add(session->evlist, evsel);
892 834
893 nr_ids = f_attr.ids.size / sizeof(u64); 835 nr_ids = f_attr.ids.size / sizeof(u64);
836 /*
837 * We don't have the cpu and thread maps on the header, so
838 * for allocating the perf_sample_id table we fake 1 cpu and
839 * hattr->ids threads.
840 */
841 if (perf_evsel__alloc_id(evsel, 1, nr_ids))
842 goto out_delete_evlist;
843
894 lseek(fd, f_attr.ids.offset, SEEK_SET); 844 lseek(fd, f_attr.ids.offset, SEEK_SET);
895 845
896 for (j = 0; j < nr_ids; j++) { 846 for (j = 0; j < nr_ids; j++) {
897 if (perf_header__getbuffer64(self, fd, &f_id, sizeof(f_id))) 847 if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id)))
898 goto out_errno; 848 goto out_errno;
899 849
900 if (perf_header_attr__add_id(attr, f_id) < 0) { 850 perf_evlist__id_add(session->evlist, evsel, 0, j, f_id);
901 perf_header_attr__delete(attr);
902 return -ENOMEM;
903 }
904 }
905 if (perf_header__add_attr(self, attr) < 0) {
906 perf_header_attr__delete(attr);
907 return -ENOMEM;
908 } 851 }
909 852
910 lseek(fd, tmp, SEEK_SET); 853 lseek(fd, tmp, SEEK_SET);
@@ -915,93 +858,63 @@ int perf_header__read(struct perf_session *session, int fd)
915 events = malloc(f_header.event_types.size); 858 events = malloc(f_header.event_types.size);
916 if (events == NULL) 859 if (events == NULL)
917 return -ENOMEM; 860 return -ENOMEM;
918 if (perf_header__getbuffer64(self, fd, events, 861 if (perf_header__getbuffer64(header, fd, events,
919 f_header.event_types.size)) 862 f_header.event_types.size))
920 goto out_errno; 863 goto out_errno;
921 event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); 864 event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type);
922 } 865 }
923 866
924 perf_header__process_sections(self, fd, perf_file_section__process); 867 perf_header__process_sections(header, fd, perf_file_section__process);
925 868
926 lseek(fd, self->data_offset, SEEK_SET); 869 lseek(fd, header->data_offset, SEEK_SET);
927 870
928 self->frozen = 1; 871 header->frozen = 1;
929 return 0; 872 return 0;
930out_errno: 873out_errno:
931 return -errno; 874 return -errno;
875
876out_delete_evlist:
877 perf_evlist__delete(session->evlist);
878 session->evlist = NULL;
879 return -ENOMEM;
932} 880}
933 881
934u64 perf_header__sample_type(struct perf_header *header) 882u64 perf_evlist__sample_type(struct perf_evlist *evlist)
935{ 883{
884 struct perf_evsel *pos;
936 u64 type = 0; 885 u64 type = 0;
937 int i;
938
939 for (i = 0; i < header->attrs; i++) {
940 struct perf_header_attr *attr = header->attr[i];
941 886
887 list_for_each_entry(pos, &evlist->entries, node) {
942 if (!type) 888 if (!type)
943 type = attr->attr.sample_type; 889 type = pos->attr.sample_type;
944 else if (type != attr->attr.sample_type) 890 else if (type != pos->attr.sample_type)
945 die("non matching sample_type"); 891 die("non matching sample_type");
946 } 892 }
947 893
948 return type; 894 return type;
949} 895}
950 896
951bool perf_header__sample_id_all(const struct perf_header *header) 897bool perf_evlist__sample_id_all(const struct perf_evlist *evlist)
952{ 898{
953 bool value = false, first = true; 899 bool value = false, first = true;
954 int i; 900 struct perf_evsel *pos;
955
956 for (i = 0; i < header->attrs; i++) {
957 struct perf_header_attr *attr = header->attr[i];
958 901
902 list_for_each_entry(pos, &evlist->entries, node) {
959 if (first) { 903 if (first) {
960 value = attr->attr.sample_id_all; 904 value = pos->attr.sample_id_all;
961 first = false; 905 first = false;
962 } else if (value != attr->attr.sample_id_all) 906 } else if (value != pos->attr.sample_id_all)
963 die("non matching sample_id_all"); 907 die("non matching sample_id_all");
964 } 908 }
965 909
966 return value; 910 return value;
967} 911}
968 912
969struct perf_event_attr * 913int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
970perf_header__find_attr(u64 id, struct perf_header *header) 914 perf_event__handler_t process,
971{ 915 struct perf_session *session)
972 int i;
973
974 /*
975 * We set id to -1 if the data file doesn't contain sample
976 * ids. This can happen when the data file contains one type
977 * of event and in that case, the header can still store the
978 * event attribute information. Check for this and avoid
979 * walking through the entire list of ids which may be large.
980 */
981 if (id == -1ULL) {
982 if (header->attrs > 0)
983 return &header->attr[0]->attr;
984 return NULL;
985 }
986
987 for (i = 0; i < header->attrs; i++) {
988 struct perf_header_attr *attr = header->attr[i];
989 int j;
990
991 for (j = 0; j < attr->ids; j++) {
992 if (attr->id[j] == id)
993 return &attr->attr;
994 }
995 }
996
997 return NULL;
998}
999
1000int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
1001 event__handler_t process,
1002 struct perf_session *session)
1003{ 916{
1004 event_t *ev; 917 union perf_event *ev;
1005 size_t size; 918 size_t size;
1006 int err; 919 int err;
1007 920
@@ -1028,17 +941,15 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
1028 return err; 941 return err;
1029} 942}
1030 943
1031int event__synthesize_attrs(struct perf_header *self, event__handler_t process, 944int perf_session__synthesize_attrs(struct perf_session *session,
1032 struct perf_session *session) 945 perf_event__handler_t process)
1033{ 946{
1034 struct perf_header_attr *attr; 947 struct perf_evsel *attr;
1035 int i, err = 0; 948 int err = 0;
1036
1037 for (i = 0; i < self->attrs; i++) {
1038 attr = self->attr[i];
1039 949
1040 err = event__synthesize_attr(&attr->attr, attr->ids, attr->id, 950 list_for_each_entry(attr, &session->evlist->entries, node) {
1041 process, session); 951 err = perf_event__synthesize_attr(&attr->attr, attr->ids,
952 attr->id, process, session);
1042 if (err) { 953 if (err) {
1043 pr_debug("failed to create perf header attribute\n"); 954 pr_debug("failed to create perf header attribute\n");
1044 return err; 955 return err;
@@ -1048,29 +959,39 @@ int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
1048 return err; 959 return err;
1049} 960}
1050 961
1051int event__process_attr(event_t *self, struct perf_session *session) 962int perf_event__process_attr(union perf_event *event,
963 struct perf_session *session)
1052{ 964{
1053 struct perf_header_attr *attr;
1054 unsigned int i, ids, n_ids; 965 unsigned int i, ids, n_ids;
966 struct perf_evsel *evsel;
967
968 if (session->evlist == NULL) {
969 session->evlist = perf_evlist__new(NULL, NULL);
970 if (session->evlist == NULL)
971 return -ENOMEM;
972 }
1055 973
1056 attr = perf_header_attr__new(&self->attr.attr); 974 evsel = perf_evsel__new(&event->attr.attr,
1057 if (attr == NULL) 975 session->evlist->nr_entries);
976 if (evsel == NULL)
1058 return -ENOMEM; 977 return -ENOMEM;
1059 978
1060 ids = self->header.size; 979 perf_evlist__add(session->evlist, evsel);
1061 ids -= (void *)&self->attr.id - (void *)self; 980
981 ids = event->header.size;
982 ids -= (void *)&event->attr.id - (void *)event;
1062 n_ids = ids / sizeof(u64); 983 n_ids = ids / sizeof(u64);
984 /*
985 * We don't have the cpu and thread maps on the header, so
986 * for allocating the perf_sample_id table we fake 1 cpu and
987 * hattr->ids threads.
988 */
989 if (perf_evsel__alloc_id(evsel, 1, n_ids))
990 return -ENOMEM;
1063 991
1064 for (i = 0; i < n_ids; i++) { 992 for (i = 0; i < n_ids; i++) {
1065 if (perf_header_attr__add_id(attr, self->attr.id[i]) < 0) { 993 perf_evlist__id_add(session->evlist, evsel, 0, i,
1066 perf_header_attr__delete(attr); 994 event->attr.id[i]);
1067 return -ENOMEM;
1068 }
1069 }
1070
1071 if (perf_header__add_attr(&session->header, attr) < 0) {
1072 perf_header_attr__delete(attr);
1073 return -ENOMEM;
1074 } 995 }
1075 996
1076 perf_session__update_sample_type(session); 997 perf_session__update_sample_type(session);
@@ -1078,11 +999,11 @@ int event__process_attr(event_t *self, struct perf_session *session)
1078 return 0; 999 return 0;
1079} 1000}
1080 1001
1081int event__synthesize_event_type(u64 event_id, char *name, 1002int perf_event__synthesize_event_type(u64 event_id, char *name,
1082 event__handler_t process, 1003 perf_event__handler_t process,
1083 struct perf_session *session) 1004 struct perf_session *session)
1084{ 1005{
1085 event_t ev; 1006 union perf_event ev;
1086 size_t size = 0; 1007 size_t size = 0;
1087 int err = 0; 1008 int err = 0;
1088 1009
@@ -1103,8 +1024,8 @@ int event__synthesize_event_type(u64 event_id, char *name,
1103 return err; 1024 return err;
1104} 1025}
1105 1026
1106int event__synthesize_event_types(event__handler_t process, 1027int perf_event__synthesize_event_types(perf_event__handler_t process,
1107 struct perf_session *session) 1028 struct perf_session *session)
1108{ 1029{
1109 struct perf_trace_event_type *type; 1030 struct perf_trace_event_type *type;
1110 int i, err = 0; 1031 int i, err = 0;
@@ -1112,8 +1033,9 @@ int event__synthesize_event_types(event__handler_t process,
1112 for (i = 0; i < event_count; i++) { 1033 for (i = 0; i < event_count; i++) {
1113 type = &events[i]; 1034 type = &events[i];
1114 1035
1115 err = event__synthesize_event_type(type->event_id, type->name, 1036 err = perf_event__synthesize_event_type(type->event_id,
1116 process, session); 1037 type->name, process,
1038 session);
1117 if (err) { 1039 if (err) {
1118 pr_debug("failed to create perf header event type\n"); 1040 pr_debug("failed to create perf header event type\n");
1119 return err; 1041 return err;
@@ -1123,28 +1045,28 @@ int event__synthesize_event_types(event__handler_t process,
1123 return err; 1045 return err;
1124} 1046}
1125 1047
1126int event__process_event_type(event_t *self, 1048int perf_event__process_event_type(union perf_event *event,
1127 struct perf_session *session __unused) 1049 struct perf_session *session __unused)
1128{ 1050{
1129 if (perf_header__push_event(self->event_type.event_type.event_id, 1051 if (perf_header__push_event(event->event_type.event_type.event_id,
1130 self->event_type.event_type.name) < 0) 1052 event->event_type.event_type.name) < 0)
1131 return -ENOMEM; 1053 return -ENOMEM;
1132 1054
1133 return 0; 1055 return 0;
1134} 1056}
1135 1057
1136int event__synthesize_tracing_data(int fd, struct list_head *pattrs, 1058int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
1137 event__handler_t process, 1059 perf_event__handler_t process,
1138 struct perf_session *session __unused) 1060 struct perf_session *session __unused)
1139{ 1061{
1140 event_t ev; 1062 union perf_event ev;
1141 ssize_t size = 0, aligned_size = 0, padding; 1063 ssize_t size = 0, aligned_size = 0, padding;
1142 int err = 0; 1064 int err __used = 0;
1143 1065
1144 memset(&ev, 0, sizeof(ev)); 1066 memset(&ev, 0, sizeof(ev));
1145 1067
1146 ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; 1068 ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
1147 size = read_tracing_data_size(fd, pattrs); 1069 size = read_tracing_data_size(fd, &evlist->entries);
1148 if (size <= 0) 1070 if (size <= 0)
1149 return size; 1071 return size;
1150 aligned_size = ALIGN(size, sizeof(u64)); 1072 aligned_size = ALIGN(size, sizeof(u64));
@@ -1154,16 +1076,16 @@ int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
1154 1076
1155 process(&ev, NULL, session); 1077 process(&ev, NULL, session);
1156 1078
1157 err = read_tracing_data(fd, pattrs); 1079 err = read_tracing_data(fd, &evlist->entries);
1158 write_padded(fd, NULL, 0, padding); 1080 write_padded(fd, NULL, 0, padding);
1159 1081
1160 return aligned_size; 1082 return aligned_size;
1161} 1083}
1162 1084
1163int event__process_tracing_data(event_t *self, 1085int perf_event__process_tracing_data(union perf_event *event,
1164 struct perf_session *session) 1086 struct perf_session *session)
1165{ 1087{
1166 ssize_t size_read, padding, size = self->tracing_data.size; 1088 ssize_t size_read, padding, size = event->tracing_data.size;
1167 off_t offset = lseek(session->fd, 0, SEEK_CUR); 1089 off_t offset = lseek(session->fd, 0, SEEK_CUR);
1168 char buf[BUFSIZ]; 1090 char buf[BUFSIZ];
1169 1091
@@ -1189,12 +1111,12 @@ int event__process_tracing_data(event_t *self,
1189 return size_read + padding; 1111 return size_read + padding;
1190} 1112}
1191 1113
1192int event__synthesize_build_id(struct dso *pos, u16 misc, 1114int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
1193 event__handler_t process, 1115 perf_event__handler_t process,
1194 struct machine *machine, 1116 struct machine *machine,
1195 struct perf_session *session) 1117 struct perf_session *session)
1196{ 1118{
1197 event_t ev; 1119 union perf_event ev;
1198 size_t len; 1120 size_t len;
1199 int err = 0; 1121 int err = 0;
1200 1122
@@ -1217,11 +1139,11 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
1217 return err; 1139 return err;
1218} 1140}
1219 1141
1220int event__process_build_id(event_t *self, 1142int perf_event__process_build_id(union perf_event *event,
1221 struct perf_session *session) 1143 struct perf_session *session)
1222{ 1144{
1223 __event_process_build_id(&self->build_id, 1145 __event_process_build_id(&event->build_id,
1224 self->build_id.filename, 1146 event->build_id.filename,
1225 session); 1147 session);
1226 return 0; 1148 return 0;
1227} 1149}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 33f16be7b72f..456661d7f10e 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -9,13 +9,6 @@
9 9
10#include <linux/bitmap.h> 10#include <linux/bitmap.h>
11 11
12struct perf_header_attr {
13 struct perf_event_attr attr;
14 int ids, size;
15 u64 *id;
16 off_t id_offset;
17};
18
19enum { 12enum {
20 HEADER_TRACE_INFO = 1, 13 HEADER_TRACE_INFO = 1,
21 HEADER_BUILD_ID, 14 HEADER_BUILD_ID,
@@ -46,14 +39,12 @@ struct perf_pipe_file_header {
46 39
47struct perf_header; 40struct perf_header;
48 41
49int perf_file_header__read(struct perf_file_header *self, 42int perf_file_header__read(struct perf_file_header *header,
50 struct perf_header *ph, int fd); 43 struct perf_header *ph, int fd);
51 44
52struct perf_header { 45struct perf_header {
53 int frozen; 46 int frozen;
54 int attrs, size;
55 bool needs_swap; 47 bool needs_swap;
56 struct perf_header_attr **attr;
57 s64 attr_offset; 48 s64 attr_offset;
58 u64 data_offset; 49 u64 data_offset;
59 u64 data_size; 50 u64 data_size;
@@ -62,34 +53,25 @@ struct perf_header {
62 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); 53 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
63}; 54};
64 55
65int perf_header__init(struct perf_header *self); 56struct perf_evlist;
66void perf_header__exit(struct perf_header *self);
67 57
68int perf_header__read(struct perf_session *session, int fd); 58int perf_session__read_header(struct perf_session *session, int fd);
69int perf_header__write(struct perf_header *self, int fd, bool at_exit); 59int perf_session__write_header(struct perf_session *session,
60 struct perf_evlist *evlist,
61 int fd, bool at_exit);
70int perf_header__write_pipe(int fd); 62int perf_header__write_pipe(int fd);
71 63
72int perf_header__add_attr(struct perf_header *self,
73 struct perf_header_attr *attr);
74
75int perf_header__push_event(u64 id, const char *name); 64int perf_header__push_event(u64 id, const char *name);
76char *perf_header__find_event(u64 id); 65char *perf_header__find_event(u64 id);
77 66
78struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr); 67u64 perf_evlist__sample_type(struct perf_evlist *evlist);
79void perf_header_attr__delete(struct perf_header_attr *self); 68bool perf_evlist__sample_id_all(const struct perf_evlist *evlist);
69void perf_header__set_feat(struct perf_header *header, int feat);
70void perf_header__clear_feat(struct perf_header *header, int feat);
71bool perf_header__has_feat(const struct perf_header *header, int feat);
80 72
81int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); 73int perf_header__process_sections(struct perf_header *header, int fd,
82 74 int (*process)(struct perf_file_section *section,
83u64 perf_header__sample_type(struct perf_header *header);
84bool perf_header__sample_id_all(const struct perf_header *header);
85struct perf_event_attr *
86perf_header__find_attr(u64 id, struct perf_header *header);
87void perf_header__set_feat(struct perf_header *self, int feat);
88void perf_header__clear_feat(struct perf_header *self, int feat);
89bool perf_header__has_feat(const struct perf_header *self, int feat);
90
91int perf_header__process_sections(struct perf_header *self, int fd,
92 int (*process)(struct perf_file_section *self,
93 struct perf_header *ph, 75 struct perf_header *ph,
94 int feat, int fd)); 76 int feat, int fd));
95 77
@@ -97,32 +79,31 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
97 const char *name, bool is_kallsyms); 79 const char *name, bool is_kallsyms);
98int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); 80int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
99 81
100int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, 82int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
101 event__handler_t process, 83 perf_event__handler_t process,
102 struct perf_session *session);
103int event__synthesize_attrs(struct perf_header *self,
104 event__handler_t process,
105 struct perf_session *session);
106int event__process_attr(event_t *self, struct perf_session *session);
107
108int event__synthesize_event_type(u64 event_id, char *name,
109 event__handler_t process,
110 struct perf_session *session);
111int event__synthesize_event_types(event__handler_t process,
112 struct perf_session *session);
113int event__process_event_type(event_t *self,
114 struct perf_session *session);
115
116int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
117 event__handler_t process,
118 struct perf_session *session);
119int event__process_tracing_data(event_t *self,
120 struct perf_session *session); 84 struct perf_session *session);
85int perf_session__synthesize_attrs(struct perf_session *session,
86 perf_event__handler_t process);
87int perf_event__process_attr(union perf_event *event, struct perf_session *session);
88
89int perf_event__synthesize_event_type(u64 event_id, char *name,
90 perf_event__handler_t process,
91 struct perf_session *session);
92int perf_event__synthesize_event_types(perf_event__handler_t process,
93 struct perf_session *session);
94int perf_event__process_event_type(union perf_event *event,
95 struct perf_session *session);
121 96
122int event__synthesize_build_id(struct dso *pos, u16 misc, 97int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
123 event__handler_t process, 98 perf_event__handler_t process,
124 struct machine *machine, 99 struct perf_session *session);
125 struct perf_session *session); 100int perf_event__process_tracing_data(union perf_event *event,
126int event__process_build_id(event_t *self, struct perf_session *session); 101 struct perf_session *session);
127 102
103int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
104 perf_event__handler_t process,
105 struct machine *machine,
106 struct perf_session *session);
107int perf_event__process_build_id(union perf_event *event,
108 struct perf_session *session);
128#endif /* __PERF_HEADER_H */ 109#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 32f4f1f2f6e4..627a02e03c57 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1,3 +1,4 @@
1#include "annotate.h"
1#include "util.h" 2#include "util.h"
2#include "build-id.h" 3#include "build-id.h"
3#include "hist.h" 4#include "hist.h"
@@ -49,6 +50,15 @@ static void hists__calc_col_len(struct hists *self, struct hist_entry *h)
49 50
50 if (h->ms.sym) 51 if (h->ms.sym)
51 hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen); 52 hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen);
53 else {
54 const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
55
56 if (hists__col_len(self, HISTC_DSO) < unresolved_col_width &&
57 !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
58 !symbol_conf.dso_list)
59 hists__set_col_len(self, HISTC_DSO,
60 unresolved_col_width);
61 }
52 62
53 len = thread__comm_len(h->thread); 63 len = thread__comm_len(h->thread);
54 if (hists__new_col_len(self, HISTC_COMM, len)) 64 if (hists__new_col_len(self, HISTC_COMM, len))
@@ -211,7 +221,9 @@ void hist_entry__free(struct hist_entry *he)
211 * collapse the histogram 221 * collapse the histogram
212 */ 222 */
213 223
214static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he) 224static bool hists__collapse_insert_entry(struct hists *self,
225 struct rb_root *root,
226 struct hist_entry *he)
215{ 227{
216 struct rb_node **p = &root->rb_node; 228 struct rb_node **p = &root->rb_node;
217 struct rb_node *parent = NULL; 229 struct rb_node *parent = NULL;
@@ -226,8 +238,11 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
226 238
227 if (!cmp) { 239 if (!cmp) {
228 iter->period += he->period; 240 iter->period += he->period;
229 if (symbol_conf.use_callchain) 241 if (symbol_conf.use_callchain) {
230 callchain_merge(iter->callchain, he->callchain); 242 callchain_cursor_reset(&self->callchain_cursor);
243 callchain_merge(&self->callchain_cursor, iter->callchain,
244 he->callchain);
245 }
231 hist_entry__free(he); 246 hist_entry__free(he);
232 return false; 247 return false;
233 } 248 }
@@ -262,7 +277,7 @@ void hists__collapse_resort(struct hists *self)
262 next = rb_next(&n->rb_node); 277 next = rb_next(&n->rb_node);
263 278
264 rb_erase(&n->rb_node, &self->entries); 279 rb_erase(&n->rb_node, &self->entries);
265 if (collapse__insert_entry(&tmp, n)) 280 if (hists__collapse_insert_entry(self, &tmp, n))
266 hists__inc_nr_entries(self, n); 281 hists__inc_nr_entries(self, n);
267 } 282 }
268 283
@@ -425,7 +440,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
425 u64 cumul; 440 u64 cumul;
426 441
427 child = rb_entry(node, struct callchain_node, rb_node); 442 child = rb_entry(node, struct callchain_node, rb_node);
428 cumul = cumul_hits(child); 443 cumul = callchain_cumul_hits(child);
429 remaining -= cumul; 444 remaining -= cumul;
430 445
431 /* 446 /*
@@ -585,6 +600,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
585{ 600{
586 struct sort_entry *se; 601 struct sort_entry *se;
587 u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; 602 u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
603 u64 nr_events;
588 const char *sep = symbol_conf.field_sep; 604 const char *sep = symbol_conf.field_sep;
589 int ret; 605 int ret;
590 606
@@ -593,6 +609,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
593 609
594 if (pair_hists) { 610 if (pair_hists) {
595 period = self->pair ? self->pair->period : 0; 611 period = self->pair ? self->pair->period : 0;
612 nr_events = self->pair ? self->pair->nr_events : 0;
596 total = pair_hists->stats.total_period; 613 total = pair_hists->stats.total_period;
597 period_sys = self->pair ? self->pair->period_sys : 0; 614 period_sys = self->pair ? self->pair->period_sys : 0;
598 period_us = self->pair ? self->pair->period_us : 0; 615 period_us = self->pair ? self->pair->period_us : 0;
@@ -600,6 +617,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
600 period_guest_us = self->pair ? self->pair->period_guest_us : 0; 617 period_guest_us = self->pair ? self->pair->period_guest_us : 0;
601 } else { 618 } else {
602 period = self->period; 619 period = self->period;
620 nr_events = self->nr_events;
603 total = session_total; 621 total = session_total;
604 period_sys = self->period_sys; 622 period_sys = self->period_sys;
605 period_us = self->period_us; 623 period_us = self->period_us;
@@ -640,9 +658,9 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
640 658
641 if (symbol_conf.show_nr_samples) { 659 if (symbol_conf.show_nr_samples) {
642 if (sep) 660 if (sep)
643 ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); 661 ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events);
644 else 662 else
645 ret += snprintf(s + ret, size - ret, "%11" PRIu64, period); 663 ret += snprintf(s + ret, size - ret, "%11" PRIu64, nr_events);
646 } 664 }
647 665
648 if (pair_hists) { 666 if (pair_hists) {
@@ -944,225 +962,14 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread)
944 } 962 }
945} 963}
946 964
947static int symbol__alloc_hist(struct symbol *self) 965int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
948{
949 struct sym_priv *priv = symbol__priv(self);
950 const int size = (sizeof(*priv->hist) +
951 (self->end - self->start) * sizeof(u64));
952
953 priv->hist = zalloc(size);
954 return priv->hist == NULL ? -1 : 0;
955}
956
957int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip)
958{
959 unsigned int sym_size, offset;
960 struct symbol *sym = self->ms.sym;
961 struct sym_priv *priv;
962 struct sym_hist *h;
963
964 if (!sym || !self->ms.map)
965 return 0;
966
967 priv = symbol__priv(sym);
968 if (priv->hist == NULL && symbol__alloc_hist(sym) < 0)
969 return -ENOMEM;
970
971 sym_size = sym->end - sym->start;
972 offset = ip - sym->start;
973
974 pr_debug3("%s: ip=%#" PRIx64 "\n", __func__, self->ms.map->unmap_ip(self->ms.map, ip));
975
976 if (offset >= sym_size)
977 return 0;
978
979 h = priv->hist;
980 h->sum++;
981 h->ip[offset]++;
982
983 pr_debug3("%#" PRIx64 " %s: period++ [ip: %#" PRIx64 ", %#" PRIx64
984 "] => %" PRIu64 "\n", self->ms.sym->start, self->ms.sym->name,
985 ip, ip - self->ms.sym->start, h->ip[offset]);
986 return 0;
987}
988
989static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize)
990{
991 struct objdump_line *self = malloc(sizeof(*self) + privsize);
992
993 if (self != NULL) {
994 self->offset = offset;
995 self->line = line;
996 }
997
998 return self;
999}
1000
1001void objdump_line__free(struct objdump_line *self)
1002{
1003 free(self->line);
1004 free(self);
1005}
1006
1007static void objdump__add_line(struct list_head *head, struct objdump_line *line)
1008{
1009 list_add_tail(&line->node, head);
1010}
1011
1012struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
1013 struct objdump_line *pos)
1014{
1015 list_for_each_entry_continue(pos, head, node)
1016 if (pos->offset >= 0)
1017 return pos;
1018
1019 return NULL;
1020}
1021
1022static int hist_entry__parse_objdump_line(struct hist_entry *self, FILE *file,
1023 struct list_head *head, size_t privsize)
1024{ 966{
1025 struct symbol *sym = self->ms.sym; 967 return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
1026 struct objdump_line *objdump_line;
1027 char *line = NULL, *tmp, *tmp2, *c;
1028 size_t line_len;
1029 s64 line_ip, offset = -1;
1030
1031 if (getline(&line, &line_len, file) < 0)
1032 return -1;
1033
1034 if (!line)
1035 return -1;
1036
1037 while (line_len != 0 && isspace(line[line_len - 1]))
1038 line[--line_len] = '\0';
1039
1040 c = strchr(line, '\n');
1041 if (c)
1042 *c = 0;
1043
1044 line_ip = -1;
1045
1046 /*
1047 * Strip leading spaces:
1048 */
1049 tmp = line;
1050 while (*tmp) {
1051 if (*tmp != ' ')
1052 break;
1053 tmp++;
1054 }
1055
1056 if (*tmp) {
1057 /*
1058 * Parse hexa addresses followed by ':'
1059 */
1060 line_ip = strtoull(tmp, &tmp2, 16);
1061 if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0')
1062 line_ip = -1;
1063 }
1064
1065 if (line_ip != -1) {
1066 u64 start = map__rip_2objdump(self->ms.map, sym->start),
1067 end = map__rip_2objdump(self->ms.map, sym->end);
1068
1069 offset = line_ip - start;
1070 if (offset < 0 || (u64)line_ip > end)
1071 offset = -1;
1072 }
1073
1074 objdump_line = objdump_line__new(offset, line, privsize);
1075 if (objdump_line == NULL) {
1076 free(line);
1077 return -1;
1078 }
1079 objdump__add_line(head, objdump_line);
1080
1081 return 0;
1082} 968}
1083 969
1084int hist_entry__annotate(struct hist_entry *self, struct list_head *head, 970int hist_entry__annotate(struct hist_entry *he, size_t privsize)
1085 size_t privsize)
1086{ 971{
1087 struct symbol *sym = self->ms.sym; 972 return symbol__annotate(he->ms.sym, he->ms.map, privsize);
1088 struct map *map = self->ms.map;
1089 struct dso *dso = map->dso;
1090 char *filename = dso__build_id_filename(dso, NULL, 0);
1091 bool free_filename = true;
1092 char command[PATH_MAX * 2];
1093 FILE *file;
1094 int err = 0;
1095 u64 len;
1096 char symfs_filename[PATH_MAX];
1097
1098 if (filename) {
1099 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
1100 symbol_conf.symfs, filename);
1101 }
1102
1103 if (filename == NULL) {
1104 if (dso->has_build_id) {
1105 pr_err("Can't annotate %s: not enough memory\n",
1106 sym->name);
1107 return -ENOMEM;
1108 }
1109 goto fallback;
1110 } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
1111 strstr(command, "[kernel.kallsyms]") ||
1112 access(symfs_filename, R_OK)) {
1113 free(filename);
1114fallback:
1115 /*
1116 * If we don't have build-ids or the build-id file isn't in the
1117 * cache, or is just a kallsyms file, well, lets hope that this
1118 * DSO is the same as when 'perf record' ran.
1119 */
1120 filename = dso->long_name;
1121 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
1122 symbol_conf.symfs, filename);
1123 free_filename = false;
1124 }
1125
1126 if (dso->origin == DSO__ORIG_KERNEL) {
1127 if (dso->annotate_warned)
1128 goto out_free_filename;
1129 err = -ENOENT;
1130 dso->annotate_warned = 1;
1131 pr_err("Can't annotate %s: No vmlinux file was found in the "
1132 "path\n", sym->name);
1133 goto out_free_filename;
1134 }
1135
1136 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
1137 filename, sym->name, map->unmap_ip(map, sym->start),
1138 map->unmap_ip(map, sym->end));
1139
1140 len = sym->end - sym->start;
1141
1142 pr_debug("annotating [%p] %30s : [%p] %30s\n",
1143 dso, dso->long_name, sym, sym->name);
1144
1145 snprintf(command, sizeof(command),
1146 "objdump --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand",
1147 map__rip_2objdump(map, sym->start),
1148 map__rip_2objdump(map, sym->end),
1149 symfs_filename, filename);
1150
1151 pr_debug("Executing: %s\n", command);
1152
1153 file = popen(command, "r");
1154 if (!file)
1155 goto out_free_filename;
1156
1157 while (!feof(file))
1158 if (hist_entry__parse_objdump_line(self, file, head, privsize) < 0)
1159 break;
1160
1161 pclose(file);
1162out_free_filename:
1163 if (free_filename)
1164 free(filename);
1165 return err;
1166} 973}
1167 974
1168void hists__inc_nr_events(struct hists *self, u32 type) 975void hists__inc_nr_events(struct hists *self, u32 type)
@@ -1177,8 +984,12 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
1177 size_t ret = 0; 984 size_t ret = 0;
1178 985
1179 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { 986 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
1180 const char *name = event__get_event_name(i); 987 const char *name;
988
989 if (self->stats.nr_events[i] == 0)
990 continue;
1181 991
992 name = perf_event__name(i);
1182 if (!strcmp(name, "UNKNOWN")) 993 if (!strcmp(name, "UNKNOWN"))
1183 continue; 994 continue;
1184 995
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ee789856a8c9..cb6858a2f9a3 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -9,33 +9,6 @@ extern struct callchain_param callchain_param;
9struct hist_entry; 9struct hist_entry;
10struct addr_location; 10struct addr_location;
11struct symbol; 11struct symbol;
12struct rb_root;
13
14struct objdump_line {
15 struct list_head node;
16 s64 offset;
17 char *line;
18};
19
20void objdump_line__free(struct objdump_line *self);
21struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
22 struct objdump_line *pos);
23
24struct sym_hist {
25 u64 sum;
26 u64 ip[0];
27};
28
29struct sym_ext {
30 struct rb_node node;
31 double percent;
32 char *path;
33};
34
35struct sym_priv {
36 struct sym_hist *hist;
37 struct sym_ext *ext;
38};
39 12
40/* 13/*
41 * The kernel collects the number of events it couldn't send in a stretch and 14 * The kernel collects the number of events it couldn't send in a stretch and
@@ -69,14 +42,13 @@ enum hist_column {
69}; 42};
70 43
71struct hists { 44struct hists {
72 struct rb_node rb_node;
73 struct rb_root entries; 45 struct rb_root entries;
74 u64 nr_entries; 46 u64 nr_entries;
75 struct events_stats stats; 47 struct events_stats stats;
76 u64 config;
77 u64 event_stream; 48 u64 event_stream;
78 u32 type;
79 u16 col_len[HISTC_NR_COLS]; 49 u16 col_len[HISTC_NR_COLS];
50 /* Best would be to reuse the session callchain cursor */
51 struct callchain_cursor callchain_cursor;
80}; 52};
81 53
82struct hist_entry *__hists__add_entry(struct hists *self, 54struct hist_entry *__hists__add_entry(struct hists *self,
@@ -102,9 +74,8 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp);
102size_t hists__fprintf(struct hists *self, struct hists *pair, 74size_t hists__fprintf(struct hists *self, struct hists *pair,
103 bool show_displacement, FILE *fp); 75 bool show_displacement, FILE *fp);
104 76
105int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip); 77int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr);
106int hist_entry__annotate(struct hist_entry *self, struct list_head *head, 78int hist_entry__annotate(struct hist_entry *self, size_t privsize);
107 size_t privsize);
108 79
109void hists__filter_by_dso(struct hists *self, const struct dso *dso); 80void hists__filter_by_dso(struct hists *self, const struct dso *dso);
110void hists__filter_by_thread(struct hists *self, const struct thread *thread); 81void hists__filter_by_thread(struct hists *self, const struct thread *thread);
@@ -113,21 +84,18 @@ u16 hists__col_len(struct hists *self, enum hist_column col);
113void hists__set_col_len(struct hists *self, enum hist_column col, u16 len); 84void hists__set_col_len(struct hists *self, enum hist_column col, u16 len);
114bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len); 85bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len);
115 86
116#ifdef NO_NEWT_SUPPORT 87struct perf_evlist;
117static inline int hists__browse(struct hists *self __used,
118 const char *helpline __used,
119 const char *ev_name __used)
120{
121 return 0;
122}
123 88
124static inline int hists__tui_browse_tree(struct rb_root *self __used, 89#ifdef NO_NEWT_SUPPORT
125 const char *help __used) 90static inline
91int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used,
92 const char *help __used)
126{ 93{
127 return 0; 94 return 0;
128} 95}
129 96
130static inline int hist_entry__tui_annotate(struct hist_entry *self __used) 97static inline int hist_entry__tui_annotate(struct hist_entry *self __used,
98 int evidx __used)
131{ 99{
132 return 0; 100 return 0;
133} 101}
@@ -135,14 +103,12 @@ static inline int hist_entry__tui_annotate(struct hist_entry *self __used)
135#define KEY_RIGHT -2 103#define KEY_RIGHT -2
136#else 104#else
137#include <newt.h> 105#include <newt.h>
138int hists__browse(struct hists *self, const char *helpline, 106int hist_entry__tui_annotate(struct hist_entry *self, int evidx);
139 const char *ev_name);
140int hist_entry__tui_annotate(struct hist_entry *self);
141 107
142#define KEY_LEFT NEWT_KEY_LEFT 108#define KEY_LEFT NEWT_KEY_LEFT
143#define KEY_RIGHT NEWT_KEY_RIGHT 109#define KEY_RIGHT NEWT_KEY_RIGHT
144 110
145int hists__tui_browse_tree(struct rb_root *self, const char *help); 111int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help);
146#endif 112#endif
147 113
148unsigned int hists__sort_list_width(struct hists *self); 114unsigned int hists__sort_list_width(struct hists *self);
diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h
index f5ca26e53fbb..356c7e467b83 100644
--- a/tools/perf/util/include/linux/list.h
+++ b/tools/perf/util/include/linux/list.h
@@ -1,3 +1,4 @@
1#include <linux/kernel.h>
1#include "../../../../include/linux/list.h" 2#include "../../../../include/linux/list.h"
2 3
3#ifndef PERF_LIST_H 4#ifndef PERF_LIST_H
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 135f69baf966..54a7e2634d58 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1,6 +1,7 @@
1#include "../../../include/linux/hw_breakpoint.h" 1#include "../../../include/linux/hw_breakpoint.h"
2#include "util.h" 2#include "util.h"
3#include "../perf.h" 3#include "../perf.h"
4#include "evlist.h"
4#include "evsel.h" 5#include "evsel.h"
5#include "parse-options.h" 6#include "parse-options.h"
6#include "parse-events.h" 7#include "parse-events.h"
@@ -11,10 +12,6 @@
11#include "header.h" 12#include "header.h"
12#include "debugfs.h" 13#include "debugfs.h"
13 14
14int nr_counters;
15
16LIST_HEAD(evsel_list);
17
18struct event_symbol { 15struct event_symbol {
19 u8 type; 16 u8 type;
20 u64 config; 17 u64 config;
@@ -271,6 +268,9 @@ const char *event_name(struct perf_evsel *evsel)
271 u64 config = evsel->attr.config; 268 u64 config = evsel->attr.config;
272 int type = evsel->attr.type; 269 int type = evsel->attr.type;
273 270
271 if (evsel->name)
272 return evsel->name;
273
274 return __event_name(type, config); 274 return __event_name(type, config);
275} 275}
276 276
@@ -449,8 +449,8 @@ parse_single_tracepoint_event(char *sys_name,
449/* sys + ':' + event + ':' + flags*/ 449/* sys + ':' + event + ':' + flags*/
450#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128) 450#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
451static enum event_result 451static enum event_result
452parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp, 452parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,
453 char *flags) 453 const char *evt_exp, char *flags)
454{ 454{
455 char evt_path[MAXPATHLEN]; 455 char evt_path[MAXPATHLEN];
456 struct dirent *evt_ent; 456 struct dirent *evt_ent;
@@ -483,15 +483,16 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
483 if (len < 0) 483 if (len < 0)
484 return EVT_FAILED; 484 return EVT_FAILED;
485 485
486 if (parse_events(NULL, event_opt, 0)) 486 if (parse_events(opt, event_opt, 0))
487 return EVT_FAILED; 487 return EVT_FAILED;
488 } 488 }
489 489
490 return EVT_HANDLED_ALL; 490 return EVT_HANDLED_ALL;
491} 491}
492 492
493static enum event_result parse_tracepoint_event(const char **strp, 493static enum event_result
494 struct perf_event_attr *attr) 494parse_tracepoint_event(const struct option *opt, const char **strp,
495 struct perf_event_attr *attr)
495{ 496{
496 const char *evt_name; 497 const char *evt_name;
497 char *flags = NULL, *comma_loc; 498 char *flags = NULL, *comma_loc;
@@ -530,7 +531,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
530 return EVT_FAILED; 531 return EVT_FAILED;
531 if (strpbrk(evt_name, "*?")) { 532 if (strpbrk(evt_name, "*?")) {
532 *strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */ 533 *strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */
533 return parse_multiple_tracepoint_event(sys_name, evt_name, 534 return parse_multiple_tracepoint_event(opt, sys_name, evt_name,
534 flags); 535 flags);
535 } else { 536 } else {
536 return parse_single_tracepoint_event(sys_name, evt_name, 537 return parse_single_tracepoint_event(sys_name, evt_name,
@@ -740,11 +741,12 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
740 * Symbolic names are (almost) exactly matched. 741 * Symbolic names are (almost) exactly matched.
741 */ 742 */
742static enum event_result 743static enum event_result
743parse_event_symbols(const char **str, struct perf_event_attr *attr) 744parse_event_symbols(const struct option *opt, const char **str,
745 struct perf_event_attr *attr)
744{ 746{
745 enum event_result ret; 747 enum event_result ret;
746 748
747 ret = parse_tracepoint_event(str, attr); 749 ret = parse_tracepoint_event(opt, str, attr);
748 if (ret != EVT_FAILED) 750 if (ret != EVT_FAILED)
749 goto modifier; 751 goto modifier;
750 752
@@ -778,14 +780,17 @@ modifier:
778 return ret; 780 return ret;
779} 781}
780 782
781int parse_events(const struct option *opt __used, const char *str, int unset __used) 783int parse_events(const struct option *opt, const char *str, int unset __used)
782{ 784{
785 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
783 struct perf_event_attr attr; 786 struct perf_event_attr attr;
784 enum event_result ret; 787 enum event_result ret;
788 const char *ostr;
785 789
786 for (;;) { 790 for (;;) {
791 ostr = str;
787 memset(&attr, 0, sizeof(attr)); 792 memset(&attr, 0, sizeof(attr));
788 ret = parse_event_symbols(&str, &attr); 793 ret = parse_event_symbols(opt, &str, &attr);
789 if (ret == EVT_FAILED) 794 if (ret == EVT_FAILED)
790 return -1; 795 return -1;
791 796
@@ -794,12 +799,15 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
794 799
795 if (ret != EVT_HANDLED_ALL) { 800 if (ret != EVT_HANDLED_ALL) {
796 struct perf_evsel *evsel; 801 struct perf_evsel *evsel;
797 evsel = perf_evsel__new(&attr, 802 evsel = perf_evsel__new(&attr, evlist->nr_entries);
798 nr_counters);
799 if (evsel == NULL) 803 if (evsel == NULL)
800 return -1; 804 return -1;
801 list_add_tail(&evsel->node, &evsel_list); 805 perf_evlist__add(evlist, evsel);
802 ++nr_counters; 806
807 evsel->name = calloc(str - ostr + 1, 1);
808 if (!evsel->name)
809 return -1;
810 strncpy(evsel->name, ostr, str - ostr);
803 } 811 }
804 812
805 if (*str == 0) 813 if (*str == 0)
@@ -813,13 +821,14 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
813 return 0; 821 return 0;
814} 822}
815 823
816int parse_filter(const struct option *opt __used, const char *str, 824int parse_filter(const struct option *opt, const char *str,
817 int unset __used) 825 int unset __used)
818{ 826{
827 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
819 struct perf_evsel *last = NULL; 828 struct perf_evsel *last = NULL;
820 829
821 if (!list_empty(&evsel_list)) 830 if (evlist->nr_entries > 0)
822 last = list_entry(evsel_list.prev, struct perf_evsel, node); 831 last = list_entry(evlist->entries.prev, struct perf_evsel, node);
823 832
824 if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { 833 if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
825 fprintf(stderr, 834 fprintf(stderr,
@@ -849,7 +858,7 @@ static const char * const event_type_descriptors[] = {
849 * Print the events from <debugfs_mount_point>/tracing/events 858 * Print the events from <debugfs_mount_point>/tracing/events
850 */ 859 */
851 860
852static void print_tracepoint_events(void) 861void print_tracepoint_events(const char *subsys_glob, const char *event_glob)
853{ 862{
854 DIR *sys_dir, *evt_dir; 863 DIR *sys_dir, *evt_dir;
855 struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; 864 struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
@@ -864,6 +873,9 @@ static void print_tracepoint_events(void)
864 return; 873 return;
865 874
866 for_each_subsystem(sys_dir, sys_dirent, sys_next) { 875 for_each_subsystem(sys_dir, sys_dirent, sys_next) {
876 if (subsys_glob != NULL &&
877 !strglobmatch(sys_dirent.d_name, subsys_glob))
878 continue;
867 879
868 snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, 880 snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
869 sys_dirent.d_name); 881 sys_dirent.d_name);
@@ -872,6 +884,10 @@ static void print_tracepoint_events(void)
872 continue; 884 continue;
873 885
874 for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { 886 for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
887 if (event_glob != NULL &&
888 !strglobmatch(evt_dirent.d_name, event_glob))
889 continue;
890
875 snprintf(evt_path, MAXPATHLEN, "%s:%s", 891 snprintf(evt_path, MAXPATHLEN, "%s:%s",
876 sys_dirent.d_name, evt_dirent.d_name); 892 sys_dirent.d_name, evt_dirent.d_name);
877 printf(" %-42s [%s]\n", evt_path, 893 printf(" %-42s [%s]\n", evt_path,
@@ -923,13 +939,61 @@ int is_valid_tracepoint(const char *event_string)
923 return 0; 939 return 0;
924} 940}
925 941
942void print_events_type(u8 type)
943{
944 struct event_symbol *syms = event_symbols;
945 unsigned int i;
946 char name[64];
947
948 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
949 if (type != syms->type)
950 continue;
951
952 if (strlen(syms->alias))
953 snprintf(name, sizeof(name), "%s OR %s",
954 syms->symbol, syms->alias);
955 else
956 snprintf(name, sizeof(name), "%s", syms->symbol);
957
958 printf(" %-42s [%s]\n", name,
959 event_type_descriptors[type]);
960 }
961}
962
963int print_hwcache_events(const char *event_glob)
964{
965 unsigned int type, op, i, printed = 0;
966
967 for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
968 for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
969 /* skip invalid cache type */
970 if (!is_cache_op_valid(type, op))
971 continue;
972
973 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
974 char *name = event_cache_name(type, op, i);
975
976 if (event_glob != NULL &&
977 !strglobmatch(name, event_glob))
978 continue;
979
980 printf(" %-42s [%s]\n", name,
981 event_type_descriptors[PERF_TYPE_HW_CACHE]);
982 ++printed;
983 }
984 }
985 }
986
987 return printed;
988}
989
926/* 990/*
927 * Print the help text for the event symbols: 991 * Print the help text for the event symbols:
928 */ 992 */
929void print_events(void) 993void print_events(const char *event_glob)
930{ 994{
931 struct event_symbol *syms = event_symbols; 995 struct event_symbol *syms = event_symbols;
932 unsigned int i, type, op, prev_type = -1; 996 unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0;
933 char name[40]; 997 char name[40];
934 998
935 printf("\n"); 999 printf("\n");
@@ -938,8 +1002,16 @@ void print_events(void)
938 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { 1002 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
939 type = syms->type; 1003 type = syms->type;
940 1004
941 if (type != prev_type) 1005 if (type != prev_type && printed) {
942 printf("\n"); 1006 printf("\n");
1007 printed = 0;
1008 ntypes_printed++;
1009 }
1010
1011 if (event_glob != NULL &&
1012 !(strglobmatch(syms->symbol, event_glob) ||
1013 (syms->alias && strglobmatch(syms->alias, event_glob))))
1014 continue;
943 1015
944 if (strlen(syms->alias)) 1016 if (strlen(syms->alias))
945 sprintf(name, "%s OR %s", syms->symbol, syms->alias); 1017 sprintf(name, "%s OR %s", syms->symbol, syms->alias);
@@ -949,22 +1021,17 @@ void print_events(void)
949 event_type_descriptors[type]); 1021 event_type_descriptors[type]);
950 1022
951 prev_type = type; 1023 prev_type = type;
1024 ++printed;
952 } 1025 }
953 1026
954 printf("\n"); 1027 if (ntypes_printed) {
955 for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { 1028 printed = 0;
956 for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { 1029 printf("\n");
957 /* skip invalid cache type */
958 if (!is_cache_op_valid(type, op))
959 continue;
960
961 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
962 printf(" %-42s [%s]\n",
963 event_cache_name(type, op, i),
964 event_type_descriptors[PERF_TYPE_HW_CACHE]);
965 }
966 }
967 } 1030 }
1031 print_hwcache_events(event_glob);
1032
1033 if (event_glob != NULL)
1034 return;
968 1035
969 printf("\n"); 1036 printf("\n");
970 printf(" %-42s [%s]\n", 1037 printf(" %-42s [%s]\n",
@@ -977,37 +1044,7 @@ void print_events(void)
977 event_type_descriptors[PERF_TYPE_BREAKPOINT]); 1044 event_type_descriptors[PERF_TYPE_BREAKPOINT]);
978 printf("\n"); 1045 printf("\n");
979 1046
980 print_tracepoint_events(); 1047 print_tracepoint_events(NULL, NULL);
981 1048
982 exit(129); 1049 exit(129);
983} 1050}
984
985int perf_evsel_list__create_default(void)
986{
987 struct perf_evsel *evsel;
988 struct perf_event_attr attr;
989
990 memset(&attr, 0, sizeof(attr));
991 attr.type = PERF_TYPE_HARDWARE;
992 attr.config = PERF_COUNT_HW_CPU_CYCLES;
993
994 evsel = perf_evsel__new(&attr, 0);
995
996 if (evsel == NULL)
997 return -ENOMEM;
998
999 list_add(&evsel->node, &evsel_list);
1000 ++nr_counters;
1001 return 0;
1002}
1003
1004void perf_evsel_list__delete(void)
1005{
1006 struct perf_evsel *pos, *n;
1007
1008 list_for_each_entry_safe(pos, n, &evsel_list, node) {
1009 list_del_init(&pos->node);
1010 perf_evsel__delete(pos);
1011 }
1012 nr_counters = 0;
1013}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 458e3ecf17af..212f88e07a9c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -9,11 +9,6 @@
9struct list_head; 9struct list_head;
10struct perf_evsel; 10struct perf_evsel;
11 11
12extern struct list_head evsel_list;
13
14int perf_evsel_list__create_default(void);
15void perf_evsel_list__delete(void);
16
17struct option; 12struct option;
18 13
19struct tracepoint_path { 14struct tracepoint_path {
@@ -25,8 +20,6 @@ struct tracepoint_path {
25extern struct tracepoint_path *tracepoint_id_to_path(u64 config); 20extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
26extern bool have_tracepoints(struct list_head *evlist); 21extern bool have_tracepoints(struct list_head *evlist);
27 22
28extern int nr_counters;
29
30const char *event_name(struct perf_evsel *event); 23const char *event_name(struct perf_evsel *event);
31extern const char *__event_name(int type, u64 config); 24extern const char *__event_name(int type, u64 config);
32 25
@@ -35,7 +28,10 @@ extern int parse_filter(const struct option *opt, const char *str, int unset);
35 28
36#define EVENTS_HELP_MAX (128*1024) 29#define EVENTS_HELP_MAX (128*1024)
37 30
38extern void print_events(void); 31void print_events(const char *event_glob);
32void print_events_type(u8 type);
33void print_tracepoint_events(const char *subsys_glob, const char *event_glob);
34int print_hwcache_events(const char *event_glob);
39extern int is_valid_tracepoint(const char *event_string); 35extern int is_valid_tracepoint(const char *event_string);
40 36
41extern char debugfs_path[]; 37extern char debugfs_path[];
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 6e29d9c9dccc..5ddee66020a7 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -31,6 +31,7 @@
31#include <string.h> 31#include <string.h>
32#include <stdarg.h> 32#include <stdarg.h>
33#include <limits.h> 33#include <limits.h>
34#include <elf.h>
34 35
35#undef _GNU_SOURCE 36#undef _GNU_SOURCE
36#include "util.h" 37#include "util.h"
@@ -111,7 +112,25 @@ static struct symbol *__find_kernel_function_by_name(const char *name,
111 NULL); 112 NULL);
112} 113}
113 114
114const char *kernel_get_module_path(const char *module) 115static struct map *kernel_get_module_map(const char *module)
116{
117 struct rb_node *nd;
118 struct map_groups *grp = &machine.kmaps;
119
120 if (!module)
121 module = "kernel";
122
123 for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
124 struct map *pos = rb_entry(nd, struct map, rb_node);
125 if (strncmp(pos->dso->short_name + 1, module,
126 pos->dso->short_name_len - 2) == 0) {
127 return pos;
128 }
129 }
130 return NULL;
131}
132
133static struct dso *kernel_get_module_dso(const char *module)
115{ 134{
116 struct dso *dso; 135 struct dso *dso;
117 struct map *map; 136 struct map *map;
@@ -141,7 +160,13 @@ const char *kernel_get_module_path(const char *module)
141 } 160 }
142 } 161 }
143found: 162found:
144 return dso->long_name; 163 return dso;
164}
165
166const char *kernel_get_module_path(const char *module)
167{
168 struct dso *dso = kernel_get_module_dso(module);
169 return (dso) ? dso->long_name : NULL;
145} 170}
146 171
147#ifdef DWARF_SUPPORT 172#ifdef DWARF_SUPPORT
@@ -384,7 +409,7 @@ int show_line_range(struct line_range *lr, const char *module)
384 setup_pager(); 409 setup_pager();
385 410
386 if (lr->function) 411 if (lr->function)
387 fprintf(stdout, "<%s:%d>\n", lr->function, 412 fprintf(stdout, "<%s@%s:%d>\n", lr->function, lr->path,
388 lr->start - lr->offset); 413 lr->start - lr->offset);
389 else 414 else
390 fprintf(stdout, "<%s:%d>\n", lr->path, lr->start); 415 fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
@@ -426,12 +451,14 @@ end:
426} 451}
427 452
428static int show_available_vars_at(int fd, struct perf_probe_event *pev, 453static int show_available_vars_at(int fd, struct perf_probe_event *pev,
429 int max_vls, bool externs) 454 int max_vls, struct strfilter *_filter,
455 bool externs)
430{ 456{
431 char *buf; 457 char *buf;
432 int ret, i; 458 int ret, i, nvars;
433 struct str_node *node; 459 struct str_node *node;
434 struct variable_list *vls = NULL, *vl; 460 struct variable_list *vls = NULL, *vl;
461 const char *var;
435 462
436 buf = synthesize_perf_probe_point(&pev->point); 463 buf = synthesize_perf_probe_point(&pev->point);
437 if (!buf) 464 if (!buf)
@@ -439,36 +466,45 @@ static int show_available_vars_at(int fd, struct perf_probe_event *pev,
439 pr_debug("Searching variables at %s\n", buf); 466 pr_debug("Searching variables at %s\n", buf);
440 467
441 ret = find_available_vars_at(fd, pev, &vls, max_vls, externs); 468 ret = find_available_vars_at(fd, pev, &vls, max_vls, externs);
442 if (ret > 0) { 469 if (ret <= 0) {
443 /* Some variables were found */ 470 pr_err("Failed to find variables at %s (%d)\n", buf, ret);
444 fprintf(stdout, "Available variables at %s\n", buf); 471 goto end;
445 for (i = 0; i < ret; i++) { 472 }
446 vl = &vls[i]; 473 /* Some variables are found */
447 /* 474 fprintf(stdout, "Available variables at %s\n", buf);
448 * A probe point might be converted to 475 for (i = 0; i < ret; i++) {
449 * several trace points. 476 vl = &vls[i];
450 */ 477 /*
451 fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol, 478 * A probe point might be converted to
452 vl->point.offset); 479 * several trace points.
453 free(vl->point.symbol); 480 */
454 if (vl->vars) { 481 fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
455 strlist__for_each(node, vl->vars) 482 vl->point.offset);
483 free(vl->point.symbol);
484 nvars = 0;
485 if (vl->vars) {
486 strlist__for_each(node, vl->vars) {
487 var = strchr(node->s, '\t') + 1;
488 if (strfilter__compare(_filter, var)) {
456 fprintf(stdout, "\t\t%s\n", node->s); 489 fprintf(stdout, "\t\t%s\n", node->s);
457 strlist__delete(vl->vars); 490 nvars++;
458 } else 491 }
459 fprintf(stdout, "(No variables)\n"); 492 }
493 strlist__delete(vl->vars);
460 } 494 }
461 free(vls); 495 if (nvars == 0)
462 } else 496 fprintf(stdout, "\t\t(No matched variables)\n");
463 pr_err("Failed to find variables at %s (%d)\n", buf, ret); 497 }
464 498 free(vls);
499end:
465 free(buf); 500 free(buf);
466 return ret; 501 return ret;
467} 502}
468 503
469/* Show available variables on given probe point */ 504/* Show available variables on given probe point */
470int show_available_vars(struct perf_probe_event *pevs, int npevs, 505int show_available_vars(struct perf_probe_event *pevs, int npevs,
471 int max_vls, const char *module, bool externs) 506 int max_vls, const char *module,
507 struct strfilter *_filter, bool externs)
472{ 508{
473 int i, fd, ret = 0; 509 int i, fd, ret = 0;
474 510
@@ -485,7 +521,8 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
485 setup_pager(); 521 setup_pager();
486 522
487 for (i = 0; i < npevs && ret >= 0; i++) 523 for (i = 0; i < npevs && ret >= 0; i++)
488 ret = show_available_vars_at(fd, &pevs[i], max_vls, externs); 524 ret = show_available_vars_at(fd, &pevs[i], max_vls, _filter,
525 externs);
489 526
490 close(fd); 527 close(fd);
491 return ret; 528 return ret;
@@ -531,7 +568,9 @@ int show_line_range(struct line_range *lr __unused, const char *module __unused)
531 568
532int show_available_vars(struct perf_probe_event *pevs __unused, 569int show_available_vars(struct perf_probe_event *pevs __unused,
533 int npevs __unused, int max_vls __unused, 570 int npevs __unused, int max_vls __unused,
534 const char *module __unused, bool externs __unused) 571 const char *module __unused,
572 struct strfilter *filter __unused,
573 bool externs __unused)
535{ 574{
536 pr_warning("Debuginfo-analysis is not supported.\n"); 575 pr_warning("Debuginfo-analysis is not supported.\n");
537 return -ENOSYS; 576 return -ENOSYS;
@@ -556,11 +595,11 @@ static int parse_line_num(char **ptr, int *val, const char *what)
556 * The line range syntax is described by: 595 * The line range syntax is described by:
557 * 596 *
558 * SRC[:SLN[+NUM|-ELN]] 597 * SRC[:SLN[+NUM|-ELN]]
559 * FNC[:SLN[+NUM|-ELN]] 598 * FNC[@SRC][:SLN[+NUM|-ELN]]
560 */ 599 */
561int parse_line_range_desc(const char *arg, struct line_range *lr) 600int parse_line_range_desc(const char *arg, struct line_range *lr)
562{ 601{
563 char *range, *name = strdup(arg); 602 char *range, *file, *name = strdup(arg);
564 int err; 603 int err;
565 604
566 if (!name) 605 if (!name)
@@ -610,7 +649,16 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
610 } 649 }
611 } 650 }
612 651
613 if (strchr(name, '.')) 652 file = strchr(name, '@');
653 if (file) {
654 *file = '\0';
655 lr->file = strdup(++file);
656 if (lr->file == NULL) {
657 err = -ENOMEM;
658 goto err;
659 }
660 lr->function = name;
661 } else if (strchr(name, '.'))
614 lr->file = name; 662 lr->file = name;
615 else 663 else
616 lr->function = name; 664 lr->function = name;
@@ -1784,9 +1832,12 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1784 } 1832 }
1785 1833
1786 /* Loop 2: add all events */ 1834 /* Loop 2: add all events */
1787 for (i = 0; i < npevs && ret >= 0; i++) 1835 for (i = 0; i < npevs; i++) {
1788 ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs, 1836 ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs,
1789 pkgs[i].ntevs, force_add); 1837 pkgs[i].ntevs, force_add);
1838 if (ret < 0)
1839 break;
1840 }
1790end: 1841end:
1791 /* Loop 3: cleanup and free trace events */ 1842 /* Loop 3: cleanup and free trace events */
1792 for (i = 0; i < npevs; i++) { 1843 for (i = 0; i < npevs; i++) {
@@ -1912,4 +1963,46 @@ int del_perf_probe_events(struct strlist *dellist)
1912 1963
1913 return ret; 1964 return ret;
1914} 1965}
1966/* TODO: don't use a global variable for filter ... */
1967static struct strfilter *available_func_filter;
1968
1969/*
1970 * If a symbol corresponds to a function with global binding and
1971 * matches filter return 0. For all others return 1.
1972 */
1973static int filter_available_functions(struct map *map __unused,
1974 struct symbol *sym)
1975{
1976 if (sym->binding == STB_GLOBAL &&
1977 strfilter__compare(available_func_filter, sym->name))
1978 return 0;
1979 return 1;
1980}
1981
1982int show_available_funcs(const char *module, struct strfilter *_filter)
1983{
1984 struct map *map;
1985 int ret;
1986
1987 setup_pager();
1988
1989 ret = init_vmlinux();
1990 if (ret < 0)
1991 return ret;
1915 1992
1993 map = kernel_get_module_map(module);
1994 if (!map) {
1995 pr_err("Failed to find %s map.\n", (module) ? : "kernel");
1996 return -EINVAL;
1997 }
1998 available_func_filter = _filter;
1999 if (map__load(map, filter_available_functions)) {
2000 pr_err("Failed to load map.\n");
2001 return -EINVAL;
2002 }
2003 if (!dso__sorted_by_name(map->dso, map->type))
2004 dso__sort_by_name(map->dso, map->type);
2005
2006 dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
2007 return 0;
2008}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5accbedfea37..3434fc9d79d5 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -3,6 +3,7 @@
3 3
4#include <stdbool.h> 4#include <stdbool.h>
5#include "strlist.h" 5#include "strlist.h"
6#include "strfilter.h"
6 7
7extern bool probe_event_dry_run; 8extern bool probe_event_dry_run;
8 9
@@ -126,7 +127,8 @@ extern int show_perf_probe_events(void);
126extern int show_line_range(struct line_range *lr, const char *module); 127extern int show_line_range(struct line_range *lr, const char *module);
127extern int show_available_vars(struct perf_probe_event *pevs, int npevs, 128extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
128 int max_probe_points, const char *module, 129 int max_probe_points, const char *module,
129 bool externs); 130 struct strfilter *filter, bool externs);
131extern int show_available_funcs(const char *module, struct strfilter *filter);
130 132
131 133
132/* Maximum index number of event-name postfix */ 134/* Maximum index number of event-name postfix */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index ab83b6ac5d65..194f9e2a3285 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -33,6 +33,7 @@
33#include <ctype.h> 33#include <ctype.h>
34#include <dwarf-regs.h> 34#include <dwarf-regs.h>
35 35
36#include <linux/bitops.h>
36#include "event.h" 37#include "event.h"
37#include "debug.h" 38#include "debug.h"
38#include "util.h" 39#include "util.h"
@@ -280,6 +281,19 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
280 return name ? (strcmp(tname, name) == 0) : false; 281 return name ? (strcmp(tname, name) == 0) : false;
281} 282}
282 283
284/* Get callsite line number of inline-function instance */
285static int die_get_call_lineno(Dwarf_Die *in_die)
286{
287 Dwarf_Attribute attr;
288 Dwarf_Word ret;
289
290 if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
291 return -ENOENT;
292
293 dwarf_formudata(&attr, &ret);
294 return (int)ret;
295}
296
283/* Get type die */ 297/* Get type die */
284static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) 298static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
285{ 299{
@@ -320,13 +334,23 @@ static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
320 return vr_die; 334 return vr_die;
321} 335}
322 336
323static bool die_is_signed_type(Dwarf_Die *tp_die) 337static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
338 Dwarf_Word *result)
324{ 339{
325 Dwarf_Attribute attr; 340 Dwarf_Attribute attr;
341
342 if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
343 dwarf_formudata(&attr, result) != 0)
344 return -ENOENT;
345
346 return 0;
347}
348
349static bool die_is_signed_type(Dwarf_Die *tp_die)
350{
326 Dwarf_Word ret; 351 Dwarf_Word ret;
327 352
328 if (dwarf_attr(tp_die, DW_AT_encoding, &attr) == NULL || 353 if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
329 dwarf_formudata(&attr, &ret) != 0)
330 return false; 354 return false;
331 355
332 return (ret == DW_ATE_signed_char || ret == DW_ATE_signed || 356 return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
@@ -335,11 +359,29 @@ static bool die_is_signed_type(Dwarf_Die *tp_die)
335 359
336static int die_get_byte_size(Dwarf_Die *tp_die) 360static int die_get_byte_size(Dwarf_Die *tp_die)
337{ 361{
338 Dwarf_Attribute attr;
339 Dwarf_Word ret; 362 Dwarf_Word ret;
340 363
341 if (dwarf_attr(tp_die, DW_AT_byte_size, &attr) == NULL || 364 if (die_get_attr_udata(tp_die, DW_AT_byte_size, &ret))
342 dwarf_formudata(&attr, &ret) != 0) 365 return 0;
366
367 return (int)ret;
368}
369
370static int die_get_bit_size(Dwarf_Die *tp_die)
371{
372 Dwarf_Word ret;
373
374 if (die_get_attr_udata(tp_die, DW_AT_bit_size, &ret))
375 return 0;
376
377 return (int)ret;
378}
379
380static int die_get_bit_offset(Dwarf_Die *tp_die)
381{
382 Dwarf_Word ret;
383
384 if (die_get_attr_udata(tp_die, DW_AT_bit_offset, &ret))
343 return 0; 385 return 0;
344 386
345 return (int)ret; 387 return (int)ret;
@@ -458,6 +500,151 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
458 return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem); 500 return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
459} 501}
460 502
503/* Walker on lines (Note: line number will not be sorted) */
504typedef int (* line_walk_handler_t) (const char *fname, int lineno,
505 Dwarf_Addr addr, void *data);
506
507struct __line_walk_param {
508 const char *fname;
509 line_walk_handler_t handler;
510 void *data;
511 int retval;
512};
513
514static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
515{
516 struct __line_walk_param *lw = data;
517 Dwarf_Addr addr;
518 int lineno;
519
520 if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
521 lineno = die_get_call_lineno(in_die);
522 if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
523 lw->retval = lw->handler(lw->fname, lineno, addr,
524 lw->data);
525 if (lw->retval != 0)
526 return DIE_FIND_CB_FOUND;
527 }
528 }
529 return DIE_FIND_CB_SIBLING;
530}
531
532/* Walk on lines of blocks included in given DIE */
533static int __die_walk_funclines(Dwarf_Die *sp_die,
534 line_walk_handler_t handler, void *data)
535{
536 struct __line_walk_param lw = {
537 .handler = handler,
538 .data = data,
539 .retval = 0,
540 };
541 Dwarf_Die die_mem;
542 Dwarf_Addr addr;
543 int lineno;
544
545 /* Handle function declaration line */
546 lw.fname = dwarf_decl_file(sp_die);
547 if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
548 dwarf_entrypc(sp_die, &addr) == 0) {
549 lw.retval = handler(lw.fname, lineno, addr, data);
550 if (lw.retval != 0)
551 goto done;
552 }
553 die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
554done:
555 return lw.retval;
556}
557
558static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
559{
560 struct __line_walk_param *lw = data;
561
562 lw->retval = __die_walk_funclines(sp_die, lw->handler, lw->data);
563 if (lw->retval != 0)
564 return DWARF_CB_ABORT;
565
566 return DWARF_CB_OK;
567}
568
569/*
570 * Walk on lines inside given PDIE. If the PDIE is subprogram, walk only on
571 * the lines inside the subprogram, otherwise PDIE must be a CU DIE.
572 */
573static int die_walk_lines(Dwarf_Die *pdie, line_walk_handler_t handler,
574 void *data)
575{
576 Dwarf_Lines *lines;
577 Dwarf_Line *line;
578 Dwarf_Addr addr;
579 const char *fname;
580 int lineno, ret = 0;
581 Dwarf_Die die_mem, *cu_die;
582 size_t nlines, i;
583
584 /* Get the CU die */
585 if (dwarf_tag(pdie) == DW_TAG_subprogram)
586 cu_die = dwarf_diecu(pdie, &die_mem, NULL, NULL);
587 else
588 cu_die = pdie;
589 if (!cu_die) {
590 pr_debug2("Failed to get CU from subprogram\n");
591 return -EINVAL;
592 }
593
594 /* Get lines list in the CU */
595 if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
596 pr_debug2("Failed to get source lines on this CU.\n");
597 return -ENOENT;
598 }
599 pr_debug2("Get %zd lines from this CU\n", nlines);
600
601 /* Walk on the lines on lines list */
602 for (i = 0; i < nlines; i++) {
603 line = dwarf_onesrcline(lines, i);
604 if (line == NULL ||
605 dwarf_lineno(line, &lineno) != 0 ||
606 dwarf_lineaddr(line, &addr) != 0) {
607 pr_debug2("Failed to get line info. "
608 "Possible error in debuginfo.\n");
609 continue;
610 }
611 /* Filter lines based on address */
612 if (pdie != cu_die)
613 /*
614 * Address filtering
615 * The line is included in given function, and
616 * no inline block includes it.
617 */
618 if (!dwarf_haspc(pdie, addr) ||
619 die_find_inlinefunc(pdie, addr, &die_mem))
620 continue;
621 /* Get source line */
622 fname = dwarf_linesrc(line, NULL, NULL);
623
624 ret = handler(fname, lineno, addr, data);
625 if (ret != 0)
626 return ret;
627 }
628
629 /*
630 * Dwarf lines doesn't include function declarations and inlined
631 * subroutines. We have to check functions list or given function.
632 */
633 if (pdie != cu_die)
634 ret = __die_walk_funclines(pdie, handler, data);
635 else {
636 struct __line_walk_param param = {
637 .handler = handler,
638 .data = data,
639 .retval = 0,
640 };
641 dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
642 ret = param.retval;
643 }
644
645 return ret;
646}
647
461struct __find_variable_param { 648struct __find_variable_param {
462 const char *name; 649 const char *name;
463 Dwarf_Addr addr; 650 Dwarf_Addr addr;
@@ -669,6 +856,8 @@ static_var:
669 return 0; 856 return 0;
670} 857}
671 858
859#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long))
860
672static int convert_variable_type(Dwarf_Die *vr_die, 861static int convert_variable_type(Dwarf_Die *vr_die,
673 struct probe_trace_arg *tvar, 862 struct probe_trace_arg *tvar,
674 const char *cast) 863 const char *cast)
@@ -685,6 +874,14 @@ static int convert_variable_type(Dwarf_Die *vr_die,
685 return (tvar->type == NULL) ? -ENOMEM : 0; 874 return (tvar->type == NULL) ? -ENOMEM : 0;
686 } 875 }
687 876
877 if (die_get_bit_size(vr_die) != 0) {
878 /* This is a bitfield */
879 ret = snprintf(buf, 16, "b%d@%d/%zd", die_get_bit_size(vr_die),
880 die_get_bit_offset(vr_die),
881 BYTES_TO_BITS(die_get_byte_size(vr_die)));
882 goto formatted;
883 }
884
688 if (die_get_real_type(vr_die, &type) == NULL) { 885 if (die_get_real_type(vr_die, &type) == NULL) {
689 pr_warning("Failed to get a type information of %s.\n", 886 pr_warning("Failed to get a type information of %s.\n",
690 dwarf_diename(vr_die)); 887 dwarf_diename(vr_die));
@@ -729,29 +926,31 @@ static int convert_variable_type(Dwarf_Die *vr_die,
729 return (tvar->type == NULL) ? -ENOMEM : 0; 926 return (tvar->type == NULL) ? -ENOMEM : 0;
730 } 927 }
731 928
732 ret = die_get_byte_size(&type) * 8; 929 ret = BYTES_TO_BITS(die_get_byte_size(&type));
733 if (ret) { 930 if (!ret)
734 /* Check the bitwidth */ 931 /* No size ... try to use default type */
735 if (ret > MAX_BASIC_TYPE_BITS) { 932 return 0;
736 pr_info("%s exceeds max-bitwidth."
737 " Cut down to %d bits.\n",
738 dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
739 ret = MAX_BASIC_TYPE_BITS;
740 }
741 933
742 ret = snprintf(buf, 16, "%c%d", 934 /* Check the bitwidth */
743 die_is_signed_type(&type) ? 's' : 'u', ret); 935 if (ret > MAX_BASIC_TYPE_BITS) {
744 if (ret < 0 || ret >= 16) { 936 pr_info("%s exceeds max-bitwidth. Cut down to %d bits.\n",
745 if (ret >= 16) 937 dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
746 ret = -E2BIG; 938 ret = MAX_BASIC_TYPE_BITS;
747 pr_warning("Failed to convert variable type: %s\n", 939 }
748 strerror(-ret)); 940 ret = snprintf(buf, 16, "%c%d",
749 return ret; 941 die_is_signed_type(&type) ? 's' : 'u', ret);
750 } 942
751 tvar->type = strdup(buf); 943formatted:
752 if (tvar->type == NULL) 944 if (ret < 0 || ret >= 16) {
753 return -ENOMEM; 945 if (ret >= 16)
946 ret = -E2BIG;
947 pr_warning("Failed to convert variable type: %s\n",
948 strerror(-ret));
949 return ret;
754 } 950 }
951 tvar->type = strdup(buf);
952 if (tvar->type == NULL)
953 return -ENOMEM;
755 return 0; 954 return 0;
756} 955}
757 956
@@ -1050,157 +1249,102 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
1050 return ret; 1249 return ret;
1051} 1250}
1052 1251
1053/* Find probe point from its line number */ 1252static int probe_point_line_walker(const char *fname, int lineno,
1054static int find_probe_point_by_line(struct probe_finder *pf) 1253 Dwarf_Addr addr, void *data)
1055{ 1254{
1056 Dwarf_Lines *lines; 1255 struct probe_finder *pf = data;
1057 Dwarf_Line *line; 1256 int ret;
1058 size_t nlines, i;
1059 Dwarf_Addr addr;
1060 int lineno;
1061 int ret = 0;
1062
1063 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
1064 pr_warning("No source lines found.\n");
1065 return -ENOENT;
1066 }
1067 1257
1068 for (i = 0; i < nlines && ret == 0; i++) { 1258 if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
1069 line = dwarf_onesrcline(lines, i); 1259 return 0;
1070 if (dwarf_lineno(line, &lineno) != 0 ||
1071 lineno != pf->lno)
1072 continue;
1073 1260
1074 /* TODO: Get fileno from line, but how? */ 1261 pf->addr = addr;
1075 if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0) 1262 ret = call_probe_finder(NULL, pf);
1076 continue;
1077 1263
1078 if (dwarf_lineaddr(line, &addr) != 0) { 1264 /* Continue if no error, because the line will be in inline function */
1079 pr_warning("Failed to get the address of the line.\n"); 1265 return ret < 0 ? ret : 0;
1080 return -ENOENT; 1266}
1081 }
1082 pr_debug("Probe line found: line[%d]:%d addr:0x%jx\n",
1083 (int)i, lineno, (uintmax_t)addr);
1084 pf->addr = addr;
1085 1267
1086 ret = call_probe_finder(NULL, pf); 1268/* Find probe point from its line number */
1087 /* Continuing, because target line might be inlined. */ 1269static int find_probe_point_by_line(struct probe_finder *pf)
1088 } 1270{
1089 return ret; 1271 return die_walk_lines(&pf->cu_die, probe_point_line_walker, pf);
1090} 1272}
1091 1273
1092/* Find lines which match lazy pattern */ 1274/* Find lines which match lazy pattern */
1093static int find_lazy_match_lines(struct list_head *head, 1275static int find_lazy_match_lines(struct list_head *head,
1094 const char *fname, const char *pat) 1276 const char *fname, const char *pat)
1095{ 1277{
1096 char *fbuf, *p1, *p2; 1278 FILE *fp;
1097 int fd, line, nlines = -1; 1279 char *line = NULL;
1098 struct stat st; 1280 size_t line_len;
1099 1281 ssize_t len;
1100 fd = open(fname, O_RDONLY); 1282 int count = 0, linenum = 1;
1101 if (fd < 0) { 1283
1102 pr_warning("Failed to open %s: %s\n", fname, strerror(-fd)); 1284 fp = fopen(fname, "r");
1285 if (!fp) {
1286 pr_warning("Failed to open %s: %s\n", fname, strerror(errno));
1103 return -errno; 1287 return -errno;
1104 } 1288 }
1105 1289
1106 if (fstat(fd, &st) < 0) { 1290 while ((len = getline(&line, &line_len, fp)) > 0) {
1107 pr_warning("Failed to get the size of %s: %s\n", 1291
1108 fname, strerror(errno)); 1292 if (line[len - 1] == '\n')
1109 nlines = -errno; 1293 line[len - 1] = '\0';
1110 goto out_close; 1294
1111 } 1295 if (strlazymatch(line, pat)) {
1112 1296 line_list__add_line(head, linenum);
1113 nlines = -ENOMEM; 1297 count++;
1114 fbuf = malloc(st.st_size + 2);
1115 if (fbuf == NULL)
1116 goto out_close;
1117 if (read(fd, fbuf, st.st_size) < 0) {
1118 pr_warning("Failed to read %s: %s\n", fname, strerror(errno));
1119 nlines = -errno;
1120 goto out_free_fbuf;
1121 }
1122 fbuf[st.st_size] = '\n'; /* Dummy line */
1123 fbuf[st.st_size + 1] = '\0';
1124 p1 = fbuf;
1125 line = 1;
1126 nlines = 0;
1127 while ((p2 = strchr(p1, '\n')) != NULL) {
1128 *p2 = '\0';
1129 if (strlazymatch(p1, pat)) {
1130 line_list__add_line(head, line);
1131 nlines++;
1132 } 1298 }
1133 line++; 1299 linenum++;
1134 p1 = p2 + 1;
1135 } 1300 }
1136out_free_fbuf: 1301
1137 free(fbuf); 1302 if (ferror(fp))
1138out_close: 1303 count = -errno;
1139 close(fd); 1304 free(line);
1140 return nlines; 1305 fclose(fp);
1306
1307 if (count == 0)
1308 pr_debug("No matched lines found in %s.\n", fname);
1309 return count;
1310}
1311
1312static int probe_point_lazy_walker(const char *fname, int lineno,
1313 Dwarf_Addr addr, void *data)
1314{
1315 struct probe_finder *pf = data;
1316 int ret;
1317
1318 if (!line_list__has_line(&pf->lcache, lineno) ||
1319 strtailcmp(fname, pf->fname) != 0)
1320 return 0;
1321
1322 pr_debug("Probe line found: line:%d addr:0x%llx\n",
1323 lineno, (unsigned long long)addr);
1324 pf->addr = addr;
1325 ret = call_probe_finder(NULL, pf);
1326
1327 /*
1328 * Continue if no error, because the lazy pattern will match
1329 * to other lines
1330 */
1331 return ret < 0 ? ret : 0;
1141} 1332}
1142 1333
1143/* Find probe points from lazy pattern */ 1334/* Find probe points from lazy pattern */
1144static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) 1335static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
1145{ 1336{
1146 Dwarf_Lines *lines;
1147 Dwarf_Line *line;
1148 size_t nlines, i;
1149 Dwarf_Addr addr;
1150 Dwarf_Die die_mem;
1151 int lineno;
1152 int ret = 0; 1337 int ret = 0;
1153 1338
1154 if (list_empty(&pf->lcache)) { 1339 if (list_empty(&pf->lcache)) {
1155 /* Matching lazy line pattern */ 1340 /* Matching lazy line pattern */
1156 ret = find_lazy_match_lines(&pf->lcache, pf->fname, 1341 ret = find_lazy_match_lines(&pf->lcache, pf->fname,
1157 pf->pev->point.lazy_line); 1342 pf->pev->point.lazy_line);
1158 if (ret == 0) { 1343 if (ret <= 0)
1159 pr_debug("No matched lines found in %s.\n", pf->fname);
1160 return 0;
1161 } else if (ret < 0)
1162 return ret; 1344 return ret;
1163 } 1345 }
1164 1346
1165 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) { 1347 return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
1166 pr_warning("No source lines found.\n");
1167 return -ENOENT;
1168 }
1169
1170 for (i = 0; i < nlines && ret >= 0; i++) {
1171 line = dwarf_onesrcline(lines, i);
1172
1173 if (dwarf_lineno(line, &lineno) != 0 ||
1174 !line_list__has_line(&pf->lcache, lineno))
1175 continue;
1176
1177 /* TODO: Get fileno from line, but how? */
1178 if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
1179 continue;
1180
1181 if (dwarf_lineaddr(line, &addr) != 0) {
1182 pr_debug("Failed to get the address of line %d.\n",
1183 lineno);
1184 continue;
1185 }
1186 if (sp_die) {
1187 /* Address filtering 1: does sp_die include addr? */
1188 if (!dwarf_haspc(sp_die, addr))
1189 continue;
1190 /* Address filtering 2: No child include addr? */
1191 if (die_find_inlinefunc(sp_die, addr, &die_mem))
1192 continue;
1193 }
1194
1195 pr_debug("Probe line found: line[%d]:%d addr:0x%llx\n",
1196 (int)i, lineno, (unsigned long long)addr);
1197 pf->addr = addr;
1198
1199 ret = call_probe_finder(sp_die, pf);
1200 /* Continuing, because target line might be inlined. */
1201 }
1202 /* TODO: deallocate lines, but how? */
1203 return ret;
1204} 1348}
1205 1349
1206/* Callback parameter with return value */ 1350/* Callback parameter with return value */
@@ -1318,8 +1462,7 @@ static int find_probes(int fd, struct probe_finder *pf)
1318 off = 0; 1462 off = 0;
1319 line_list__init(&pf->lcache); 1463 line_list__init(&pf->lcache);
1320 /* Loop on CUs (Compilation Unit) */ 1464 /* Loop on CUs (Compilation Unit) */
1321 while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) && 1465 while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
1322 ret >= 0) {
1323 /* Get the DIE(Debugging Information Entry) of this CU */ 1466 /* Get the DIE(Debugging Information Entry) of this CU */
1324 diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die); 1467 diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die);
1325 if (!diep) 1468 if (!diep)
@@ -1340,6 +1483,8 @@ static int find_probes(int fd, struct probe_finder *pf)
1340 pf->lno = pp->line; 1483 pf->lno = pp->line;
1341 ret = find_probe_point_by_line(pf); 1484 ret = find_probe_point_by_line(pf);
1342 } 1485 }
1486 if (ret < 0)
1487 break;
1343 } 1488 }
1344 off = noff; 1489 off = noff;
1345 } 1490 }
@@ -1644,91 +1789,28 @@ static int line_range_add_line(const char *src, unsigned int lineno,
1644 return line_list__add_line(&lr->line_list, lineno); 1789 return line_list__add_line(&lr->line_list, lineno);
1645} 1790}
1646 1791
1647/* Search function declaration lines */ 1792static int line_range_walk_cb(const char *fname, int lineno,
1648static int line_range_funcdecl_cb(Dwarf_Die *sp_die, void *data) 1793 Dwarf_Addr addr __used,
1794 void *data)
1649{ 1795{
1650 struct dwarf_callback_param *param = data; 1796 struct line_finder *lf = data;
1651 struct line_finder *lf = param->data;
1652 const char *src;
1653 int lineno;
1654 1797
1655 src = dwarf_decl_file(sp_die); 1798 if ((strtailcmp(fname, lf->fname) != 0) ||
1656 if (src && strtailcmp(src, lf->fname) != 0)
1657 return DWARF_CB_OK;
1658
1659 if (dwarf_decl_line(sp_die, &lineno) != 0 ||
1660 (lf->lno_s > lineno || lf->lno_e < lineno)) 1799 (lf->lno_s > lineno || lf->lno_e < lineno))
1661 return DWARF_CB_OK; 1800 return 0;
1662 1801
1663 param->retval = line_range_add_line(src, lineno, lf->lr); 1802 if (line_range_add_line(fname, lineno, lf->lr) < 0)
1664 if (param->retval < 0) 1803 return -EINVAL;
1665 return DWARF_CB_ABORT;
1666 return DWARF_CB_OK;
1667}
1668 1804
1669static int find_line_range_func_decl_lines(struct line_finder *lf) 1805 return 0;
1670{
1671 struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
1672 dwarf_getfuncs(&lf->cu_die, line_range_funcdecl_cb, &param, 0);
1673 return param.retval;
1674} 1806}
1675 1807
1676/* Find line range from its line number */ 1808/* Find line range from its line number */
1677static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) 1809static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
1678{ 1810{
1679 Dwarf_Lines *lines; 1811 int ret;
1680 Dwarf_Line *line;
1681 size_t nlines, i;
1682 Dwarf_Addr addr;
1683 int lineno, ret = 0;
1684 const char *src;
1685 Dwarf_Die die_mem;
1686
1687 line_list__init(&lf->lr->line_list);
1688 if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
1689 pr_warning("No source lines found.\n");
1690 return -ENOENT;
1691 }
1692
1693 /* Search probable lines on lines list */
1694 for (i = 0; i < nlines; i++) {
1695 line = dwarf_onesrcline(lines, i);
1696 if (dwarf_lineno(line, &lineno) != 0 ||
1697 (lf->lno_s > lineno || lf->lno_e < lineno))
1698 continue;
1699
1700 if (sp_die) {
1701 /* Address filtering 1: does sp_die include addr? */
1702 if (dwarf_lineaddr(line, &addr) != 0 ||
1703 !dwarf_haspc(sp_die, addr))
1704 continue;
1705
1706 /* Address filtering 2: No child include addr? */
1707 if (die_find_inlinefunc(sp_die, addr, &die_mem))
1708 continue;
1709 }
1710
1711 /* TODO: Get fileno from line, but how? */
1712 src = dwarf_linesrc(line, NULL, NULL);
1713 if (strtailcmp(src, lf->fname) != 0)
1714 continue;
1715
1716 ret = line_range_add_line(src, lineno, lf->lr);
1717 if (ret < 0)
1718 return ret;
1719 }
1720 1812
1721 /* 1813 ret = die_walk_lines(sp_die ?: &lf->cu_die, line_range_walk_cb, lf);
1722 * Dwarf lines doesn't include function declarations. We have to
1723 * check functions list or given function.
1724 */
1725 if (sp_die) {
1726 src = dwarf_decl_file(sp_die);
1727 if (src && dwarf_decl_line(sp_die, &lineno) == 0 &&
1728 (lf->lno_s <= lineno && lf->lno_e >= lineno))
1729 ret = line_range_add_line(src, lineno, lf->lr);
1730 } else
1731 ret = find_line_range_func_decl_lines(lf);
1732 1814
1733 /* Update status */ 1815 /* Update status */
1734 if (ret >= 0) 1816 if (ret >= 0)
@@ -1758,9 +1840,6 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
1758 struct line_finder *lf = param->data; 1840 struct line_finder *lf = param->data;
1759 struct line_range *lr = lf->lr; 1841 struct line_range *lr = lf->lr;
1760 1842
1761 pr_debug("find (%llx) %s\n",
1762 (unsigned long long)dwarf_dieoffset(sp_die),
1763 dwarf_diename(sp_die));
1764 if (dwarf_tag(sp_die) == DW_TAG_subprogram && 1843 if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
1765 die_compare_name(sp_die, lr->function)) { 1844 die_compare_name(sp_die, lr->function)) {
1766 lf->fname = dwarf_decl_file(sp_die); 1845 lf->fname = dwarf_decl_file(sp_die);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
new file mode 100644
index 000000000000..a9f2d7e1204d
--- /dev/null
+++ b/tools/perf/util/python.c
@@ -0,0 +1,896 @@
1#include <Python.h>
2#include <structmember.h>
3#include <inttypes.h>
4#include <poll.h>
5#include "evlist.h"
6#include "evsel.h"
7#include "event.h"
8#include "cpumap.h"
9#include "thread_map.h"
10
11/* Define PyVarObject_HEAD_INIT for python 2.5 */
12#ifndef PyVarObject_HEAD_INIT
13# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
14#endif
15
16struct throttle_event {
17 struct perf_event_header header;
18 u64 time;
19 u64 id;
20 u64 stream_id;
21};
22
23PyMODINIT_FUNC initperf(void);
24
25#define member_def(type, member, ptype, help) \
26 { #member, ptype, \
27 offsetof(struct pyrf_event, event) + offsetof(struct type, member), \
28 0, help }
29
30#define sample_member_def(name, member, ptype, help) \
31 { #name, ptype, \
32 offsetof(struct pyrf_event, sample) + offsetof(struct perf_sample, member), \
33 0, help }
34
35struct pyrf_event {
36 PyObject_HEAD
37 struct perf_sample sample;
38 union perf_event event;
39};
40
41#define sample_members \
42 sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \
43 sample_member_def(sample_pid, pid, T_INT, "event pid"), \
44 sample_member_def(sample_tid, tid, T_INT, "event tid"), \
45 sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \
46 sample_member_def(sample_addr, addr, T_ULONGLONG, "event addr"), \
47 sample_member_def(sample_id, id, T_ULONGLONG, "event id"), \
48 sample_member_def(sample_stream_id, stream_id, T_ULONGLONG, "event stream id"), \
49 sample_member_def(sample_period, period, T_ULONGLONG, "event period"), \
50 sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"),
51
52static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
53
54static PyMemberDef pyrf_mmap_event__members[] = {
55 sample_members
56 member_def(perf_event_header, type, T_UINT, "event type"),
57 member_def(mmap_event, pid, T_UINT, "event pid"),
58 member_def(mmap_event, tid, T_UINT, "event tid"),
59 member_def(mmap_event, start, T_ULONGLONG, "start of the map"),
60 member_def(mmap_event, len, T_ULONGLONG, "map length"),
61 member_def(mmap_event, pgoff, T_ULONGLONG, "page offset"),
62 member_def(mmap_event, filename, T_STRING_INPLACE, "backing store"),
63 { .name = NULL, },
64};
65
66static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
67{
68 PyObject *ret;
69 char *s;
70
71 if (asprintf(&s, "{ type: mmap, pid: %u, tid: %u, start: %#" PRIx64 ", "
72 "length: %#" PRIx64 ", offset: %#" PRIx64 ", "
73 "filename: %s }",
74 pevent->event.mmap.pid, pevent->event.mmap.tid,
75 pevent->event.mmap.start, pevent->event.mmap.len,
76 pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
77 ret = PyErr_NoMemory();
78 } else {
79 ret = PyString_FromString(s);
80 free(s);
81 }
82 return ret;
83}
84
85static PyTypeObject pyrf_mmap_event__type = {
86 PyVarObject_HEAD_INIT(NULL, 0)
87 .tp_name = "perf.mmap_event",
88 .tp_basicsize = sizeof(struct pyrf_event),
89 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
90 .tp_doc = pyrf_mmap_event__doc,
91 .tp_members = pyrf_mmap_event__members,
92 .tp_repr = (reprfunc)pyrf_mmap_event__repr,
93};
94
95static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
96
97static PyMemberDef pyrf_task_event__members[] = {
98 sample_members
99 member_def(perf_event_header, type, T_UINT, "event type"),
100 member_def(fork_event, pid, T_UINT, "event pid"),
101 member_def(fork_event, ppid, T_UINT, "event ppid"),
102 member_def(fork_event, tid, T_UINT, "event tid"),
103 member_def(fork_event, ptid, T_UINT, "event ptid"),
104 member_def(fork_event, time, T_ULONGLONG, "timestamp"),
105 { .name = NULL, },
106};
107
108static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
109{
110 return PyString_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
111 "ptid: %u, time: %" PRIu64 "}",
112 pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
113 pevent->event.fork.pid,
114 pevent->event.fork.ppid,
115 pevent->event.fork.tid,
116 pevent->event.fork.ptid,
117 pevent->event.fork.time);
118}
119
120static PyTypeObject pyrf_task_event__type = {
121 PyVarObject_HEAD_INIT(NULL, 0)
122 .tp_name = "perf.task_event",
123 .tp_basicsize = sizeof(struct pyrf_event),
124 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
125 .tp_doc = pyrf_task_event__doc,
126 .tp_members = pyrf_task_event__members,
127 .tp_repr = (reprfunc)pyrf_task_event__repr,
128};
129
130static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
131
132static PyMemberDef pyrf_comm_event__members[] = {
133 sample_members
134 member_def(perf_event_header, type, T_UINT, "event type"),
135 member_def(comm_event, pid, T_UINT, "event pid"),
136 member_def(comm_event, tid, T_UINT, "event tid"),
137 member_def(comm_event, comm, T_STRING_INPLACE, "process name"),
138 { .name = NULL, },
139};
140
141static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
142{
143 return PyString_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
144 pevent->event.comm.pid,
145 pevent->event.comm.tid,
146 pevent->event.comm.comm);
147}
148
149static PyTypeObject pyrf_comm_event__type = {
150 PyVarObject_HEAD_INIT(NULL, 0)
151 .tp_name = "perf.comm_event",
152 .tp_basicsize = sizeof(struct pyrf_event),
153 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
154 .tp_doc = pyrf_comm_event__doc,
155 .tp_members = pyrf_comm_event__members,
156 .tp_repr = (reprfunc)pyrf_comm_event__repr,
157};
158
159static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
160
161static PyMemberDef pyrf_throttle_event__members[] = {
162 sample_members
163 member_def(perf_event_header, type, T_UINT, "event type"),
164 member_def(throttle_event, time, T_ULONGLONG, "timestamp"),
165 member_def(throttle_event, id, T_ULONGLONG, "event id"),
166 member_def(throttle_event, stream_id, T_ULONGLONG, "event stream id"),
167 { .name = NULL, },
168};
169
170static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
171{
172 struct throttle_event *te = (struct throttle_event *)(&pevent->event.header + 1);
173
174 return PyString_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64
175 ", stream_id: %" PRIu64 " }",
176 pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
177 te->time, te->id, te->stream_id);
178}
179
180static PyTypeObject pyrf_throttle_event__type = {
181 PyVarObject_HEAD_INIT(NULL, 0)
182 .tp_name = "perf.throttle_event",
183 .tp_basicsize = sizeof(struct pyrf_event),
184 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
185 .tp_doc = pyrf_throttle_event__doc,
186 .tp_members = pyrf_throttle_event__members,
187 .tp_repr = (reprfunc)pyrf_throttle_event__repr,
188};
189
190static int pyrf_event__setup_types(void)
191{
192 int err;
193 pyrf_mmap_event__type.tp_new =
194 pyrf_task_event__type.tp_new =
195 pyrf_comm_event__type.tp_new =
196 pyrf_throttle_event__type.tp_new = PyType_GenericNew;
197 err = PyType_Ready(&pyrf_mmap_event__type);
198 if (err < 0)
199 goto out;
200 err = PyType_Ready(&pyrf_task_event__type);
201 if (err < 0)
202 goto out;
203 err = PyType_Ready(&pyrf_comm_event__type);
204 if (err < 0)
205 goto out;
206 err = PyType_Ready(&pyrf_throttle_event__type);
207 if (err < 0)
208 goto out;
209out:
210 return err;
211}
212
213static PyTypeObject *pyrf_event__type[] = {
214 [PERF_RECORD_MMAP] = &pyrf_mmap_event__type,
215 [PERF_RECORD_LOST] = &pyrf_mmap_event__type,
216 [PERF_RECORD_COMM] = &pyrf_comm_event__type,
217 [PERF_RECORD_EXIT] = &pyrf_task_event__type,
218 [PERF_RECORD_THROTTLE] = &pyrf_throttle_event__type,
219 [PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
220 [PERF_RECORD_FORK] = &pyrf_task_event__type,
221 [PERF_RECORD_READ] = &pyrf_mmap_event__type,
222 [PERF_RECORD_SAMPLE] = &pyrf_mmap_event__type,
223};
224
225static PyObject *pyrf_event__new(union perf_event *event)
226{
227 struct pyrf_event *pevent;
228 PyTypeObject *ptype;
229
230 if (event->header.type < PERF_RECORD_MMAP ||
231 event->header.type > PERF_RECORD_SAMPLE)
232 return NULL;
233
234 ptype = pyrf_event__type[event->header.type];
235 pevent = PyObject_New(struct pyrf_event, ptype);
236 if (pevent != NULL)
237 memcpy(&pevent->event, event, event->header.size);
238 return (PyObject *)pevent;
239}
240
241struct pyrf_cpu_map {
242 PyObject_HEAD
243
244 struct cpu_map *cpus;
245};
246
247static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
248 PyObject *args, PyObject *kwargs)
249{
250 static char *kwlist[] = { "cpustr", NULL, NULL, };
251 char *cpustr = NULL;
252
253 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
254 kwlist, &cpustr))
255 return -1;
256
257 pcpus->cpus = cpu_map__new(cpustr);
258 if (pcpus->cpus == NULL)
259 return -1;
260 return 0;
261}
262
263static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus)
264{
265 cpu_map__delete(pcpus->cpus);
266 pcpus->ob_type->tp_free((PyObject*)pcpus);
267}
268
269static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
270{
271 struct pyrf_cpu_map *pcpus = (void *)obj;
272
273 return pcpus->cpus->nr;
274}
275
276static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
277{
278 struct pyrf_cpu_map *pcpus = (void *)obj;
279
280 if (i >= pcpus->cpus->nr)
281 return NULL;
282
283 return Py_BuildValue("i", pcpus->cpus->map[i]);
284}
285
286static PySequenceMethods pyrf_cpu_map__sequence_methods = {
287 .sq_length = pyrf_cpu_map__length,
288 .sq_item = pyrf_cpu_map__item,
289};
290
291static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
292
293static PyTypeObject pyrf_cpu_map__type = {
294 PyVarObject_HEAD_INIT(NULL, 0)
295 .tp_name = "perf.cpu_map",
296 .tp_basicsize = sizeof(struct pyrf_cpu_map),
297 .tp_dealloc = (destructor)pyrf_cpu_map__delete,
298 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
299 .tp_doc = pyrf_cpu_map__doc,
300 .tp_as_sequence = &pyrf_cpu_map__sequence_methods,
301 .tp_init = (initproc)pyrf_cpu_map__init,
302};
303
304static int pyrf_cpu_map__setup_types(void)
305{
306 pyrf_cpu_map__type.tp_new = PyType_GenericNew;
307 return PyType_Ready(&pyrf_cpu_map__type);
308}
309
310struct pyrf_thread_map {
311 PyObject_HEAD
312
313 struct thread_map *threads;
314};
315
316static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
317 PyObject *args, PyObject *kwargs)
318{
319 static char *kwlist[] = { "pid", "tid", NULL, NULL, };
320 int pid = -1, tid = -1;
321
322 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
323 kwlist, &pid, &tid))
324 return -1;
325
326 pthreads->threads = thread_map__new(pid, tid);
327 if (pthreads->threads == NULL)
328 return -1;
329 return 0;
330}
331
332static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads)
333{
334 thread_map__delete(pthreads->threads);
335 pthreads->ob_type->tp_free((PyObject*)pthreads);
336}
337
338static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
339{
340 struct pyrf_thread_map *pthreads = (void *)obj;
341
342 return pthreads->threads->nr;
343}
344
345static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
346{
347 struct pyrf_thread_map *pthreads = (void *)obj;
348
349 if (i >= pthreads->threads->nr)
350 return NULL;
351
352 return Py_BuildValue("i", pthreads->threads->map[i]);
353}
354
355static PySequenceMethods pyrf_thread_map__sequence_methods = {
356 .sq_length = pyrf_thread_map__length,
357 .sq_item = pyrf_thread_map__item,
358};
359
360static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
361
362static PyTypeObject pyrf_thread_map__type = {
363 PyVarObject_HEAD_INIT(NULL, 0)
364 .tp_name = "perf.thread_map",
365 .tp_basicsize = sizeof(struct pyrf_thread_map),
366 .tp_dealloc = (destructor)pyrf_thread_map__delete,
367 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
368 .tp_doc = pyrf_thread_map__doc,
369 .tp_as_sequence = &pyrf_thread_map__sequence_methods,
370 .tp_init = (initproc)pyrf_thread_map__init,
371};
372
373static int pyrf_thread_map__setup_types(void)
374{
375 pyrf_thread_map__type.tp_new = PyType_GenericNew;
376 return PyType_Ready(&pyrf_thread_map__type);
377}
378
379struct pyrf_evsel {
380 PyObject_HEAD
381
382 struct perf_evsel evsel;
383};
384
385static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
386 PyObject *args, PyObject *kwargs)
387{
388 struct perf_event_attr attr = {
389 .type = PERF_TYPE_HARDWARE,
390 .config = PERF_COUNT_HW_CPU_CYCLES,
391 .sample_type = PERF_SAMPLE_PERIOD | PERF_SAMPLE_TID,
392 };
393 static char *kwlist[] = {
394 "type",
395 "config",
396 "sample_freq",
397 "sample_period",
398 "sample_type",
399 "read_format",
400 "disabled",
401 "inherit",
402 "pinned",
403 "exclusive",
404 "exclude_user",
405 "exclude_kernel",
406 "exclude_hv",
407 "exclude_idle",
408 "mmap",
409 "comm",
410 "freq",
411 "inherit_stat",
412 "enable_on_exec",
413 "task",
414 "watermark",
415 "precise_ip",
416 "mmap_data",
417 "sample_id_all",
418 "wakeup_events",
419 "bp_type",
420 "bp_addr",
421 "bp_len", NULL, NULL, };
422 u64 sample_period = 0;
423 u32 disabled = 0,
424 inherit = 0,
425 pinned = 0,
426 exclusive = 0,
427 exclude_user = 0,
428 exclude_kernel = 0,
429 exclude_hv = 0,
430 exclude_idle = 0,
431 mmap = 0,
432 comm = 0,
433 freq = 1,
434 inherit_stat = 0,
435 enable_on_exec = 0,
436 task = 0,
437 watermark = 0,
438 precise_ip = 0,
439 mmap_data = 0,
440 sample_id_all = 1;
441 int idx = 0;
442
443 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
444 "|iKiKKiiiiiiiiiiiiiiiiiiiiiKK", kwlist,
445 &attr.type, &attr.config, &attr.sample_freq,
446 &sample_period, &attr.sample_type,
447 &attr.read_format, &disabled, &inherit,
448 &pinned, &exclusive, &exclude_user,
449 &exclude_kernel, &exclude_hv, &exclude_idle,
450 &mmap, &comm, &freq, &inherit_stat,
451 &enable_on_exec, &task, &watermark,
452 &precise_ip, &mmap_data, &sample_id_all,
453 &attr.wakeup_events, &attr.bp_type,
454 &attr.bp_addr, &attr.bp_len, &idx))
455 return -1;
456
457 /* union... */
458 if (sample_period != 0) {
459 if (attr.sample_freq != 0)
460 return -1; /* FIXME: throw right exception */
461 attr.sample_period = sample_period;
462 }
463
464 /* Bitfields */
465 attr.disabled = disabled;
466 attr.inherit = inherit;
467 attr.pinned = pinned;
468 attr.exclusive = exclusive;
469 attr.exclude_user = exclude_user;
470 attr.exclude_kernel = exclude_kernel;
471 attr.exclude_hv = exclude_hv;
472 attr.exclude_idle = exclude_idle;
473 attr.mmap = mmap;
474 attr.comm = comm;
475 attr.freq = freq;
476 attr.inherit_stat = inherit_stat;
477 attr.enable_on_exec = enable_on_exec;
478 attr.task = task;
479 attr.watermark = watermark;
480 attr.precise_ip = precise_ip;
481 attr.mmap_data = mmap_data;
482 attr.sample_id_all = sample_id_all;
483
484 perf_evsel__init(&pevsel->evsel, &attr, idx);
485 return 0;
486}
487
488static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
489{
490 perf_evsel__exit(&pevsel->evsel);
491 pevsel->ob_type->tp_free((PyObject*)pevsel);
492}
493
494static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
495 PyObject *args, PyObject *kwargs)
496{
497 struct perf_evsel *evsel = &pevsel->evsel;
498 struct cpu_map *cpus = NULL;
499 struct thread_map *threads = NULL;
500 PyObject *pcpus = NULL, *pthreads = NULL;
501 int group = 0, overwrite = 0;
502 static char *kwlist[] = {"cpus", "threads", "group", "overwrite", NULL, NULL};
503
504 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
505 &pcpus, &pthreads, &group, &overwrite))
506 return NULL;
507
508 if (pthreads != NULL)
509 threads = ((struct pyrf_thread_map *)pthreads)->threads;
510
511 if (pcpus != NULL)
512 cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
513
514 if (perf_evsel__open(evsel, cpus, threads, group, overwrite) < 0) {
515 PyErr_SetFromErrno(PyExc_OSError);
516 return NULL;
517 }
518
519 Py_INCREF(Py_None);
520 return Py_None;
521}
522
523static PyMethodDef pyrf_evsel__methods[] = {
524 {
525 .ml_name = "open",
526 .ml_meth = (PyCFunction)pyrf_evsel__open,
527 .ml_flags = METH_VARARGS | METH_KEYWORDS,
528 .ml_doc = PyDoc_STR("open the event selector file descriptor table.")
529 },
530 { .ml_name = NULL, }
531};
532
533static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
534
535static PyTypeObject pyrf_evsel__type = {
536 PyVarObject_HEAD_INIT(NULL, 0)
537 .tp_name = "perf.evsel",
538 .tp_basicsize = sizeof(struct pyrf_evsel),
539 .tp_dealloc = (destructor)pyrf_evsel__delete,
540 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
541 .tp_doc = pyrf_evsel__doc,
542 .tp_methods = pyrf_evsel__methods,
543 .tp_init = (initproc)pyrf_evsel__init,
544};
545
546static int pyrf_evsel__setup_types(void)
547{
548 pyrf_evsel__type.tp_new = PyType_GenericNew;
549 return PyType_Ready(&pyrf_evsel__type);
550}
551
552struct pyrf_evlist {
553 PyObject_HEAD
554
555 struct perf_evlist evlist;
556};
557
558static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
559 PyObject *args, PyObject *kwargs __used)
560{
561 PyObject *pcpus = NULL, *pthreads = NULL;
562 struct cpu_map *cpus;
563 struct thread_map *threads;
564
565 if (!PyArg_ParseTuple(args, "OO", &pcpus, &pthreads))
566 return -1;
567
568 threads = ((struct pyrf_thread_map *)pthreads)->threads;
569 cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
570 perf_evlist__init(&pevlist->evlist, cpus, threads);
571 return 0;
572}
573
574static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
575{
576 perf_evlist__exit(&pevlist->evlist);
577 pevlist->ob_type->tp_free((PyObject*)pevlist);
578}
579
580static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
581 PyObject *args, PyObject *kwargs)
582{
583 struct perf_evlist *evlist = &pevlist->evlist;
584 static char *kwlist[] = {"pages", "overwrite",
585 NULL, NULL};
586 int pages = 128, overwrite = false;
587
588 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
589 &pages, &overwrite))
590 return NULL;
591
592 if (perf_evlist__mmap(evlist, pages, overwrite) < 0) {
593 PyErr_SetFromErrno(PyExc_OSError);
594 return NULL;
595 }
596
597 Py_INCREF(Py_None);
598 return Py_None;
599}
600
601static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
602 PyObject *args, PyObject *kwargs)
603{
604 struct perf_evlist *evlist = &pevlist->evlist;
605 static char *kwlist[] = {"timeout", NULL, NULL};
606 int timeout = -1, n;
607
608 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
609 return NULL;
610
611 n = poll(evlist->pollfd, evlist->nr_fds, timeout);
612 if (n < 0) {
613 PyErr_SetFromErrno(PyExc_OSError);
614 return NULL;
615 }
616
617 return Py_BuildValue("i", n);
618}
619
620static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
621 PyObject *args __used, PyObject *kwargs __used)
622{
623 struct perf_evlist *evlist = &pevlist->evlist;
624 PyObject *list = PyList_New(0);
625 int i;
626
627 for (i = 0; i < evlist->nr_fds; ++i) {
628 PyObject *file;
629 FILE *fp = fdopen(evlist->pollfd[i].fd, "r");
630
631 if (fp == NULL)
632 goto free_list;
633
634 file = PyFile_FromFile(fp, "perf", "r", NULL);
635 if (file == NULL)
636 goto free_list;
637
638 if (PyList_Append(list, file) != 0) {
639 Py_DECREF(file);
640 goto free_list;
641 }
642
643 Py_DECREF(file);
644 }
645
646 return list;
647free_list:
648 return PyErr_NoMemory();
649}
650
651
652static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
653 PyObject *args, PyObject *kwargs __used)
654{
655 struct perf_evlist *evlist = &pevlist->evlist;
656 PyObject *pevsel;
657 struct perf_evsel *evsel;
658
659 if (!PyArg_ParseTuple(args, "O", &pevsel))
660 return NULL;
661
662 Py_INCREF(pevsel);
663 evsel = &((struct pyrf_evsel *)pevsel)->evsel;
664 evsel->idx = evlist->nr_entries;
665 perf_evlist__add(evlist, evsel);
666
667 return Py_BuildValue("i", evlist->nr_entries);
668}
669
670static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
671 PyObject *args, PyObject *kwargs)
672{
673 struct perf_evlist *evlist = &pevlist->evlist;
674 union perf_event *event;
675 int sample_id_all = 1, cpu;
676 static char *kwlist[] = {"sample_id_all", NULL, NULL};
677
678 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
679 &cpu, &sample_id_all))
680 return NULL;
681
682 event = perf_evlist__read_on_cpu(evlist, cpu);
683 if (event != NULL) {
684 struct perf_evsel *first;
685 PyObject *pyevent = pyrf_event__new(event);
686 struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
687
688 if (pyevent == NULL)
689 return PyErr_NoMemory();
690
691 first = list_entry(evlist->entries.next, struct perf_evsel, node);
692 perf_event__parse_sample(event, first->attr.sample_type, sample_id_all,
693 &pevent->sample);
694 return pyevent;
695 }
696
697 Py_INCREF(Py_None);
698 return Py_None;
699}
700
701static PyMethodDef pyrf_evlist__methods[] = {
702 {
703 .ml_name = "mmap",
704 .ml_meth = (PyCFunction)pyrf_evlist__mmap,
705 .ml_flags = METH_VARARGS | METH_KEYWORDS,
706 .ml_doc = PyDoc_STR("mmap the file descriptor table.")
707 },
708 {
709 .ml_name = "poll",
710 .ml_meth = (PyCFunction)pyrf_evlist__poll,
711 .ml_flags = METH_VARARGS | METH_KEYWORDS,
712 .ml_doc = PyDoc_STR("poll the file descriptor table.")
713 },
714 {
715 .ml_name = "get_pollfd",
716 .ml_meth = (PyCFunction)pyrf_evlist__get_pollfd,
717 .ml_flags = METH_VARARGS | METH_KEYWORDS,
718 .ml_doc = PyDoc_STR("get the poll file descriptor table.")
719 },
720 {
721 .ml_name = "add",
722 .ml_meth = (PyCFunction)pyrf_evlist__add,
723 .ml_flags = METH_VARARGS | METH_KEYWORDS,
724 .ml_doc = PyDoc_STR("adds an event selector to the list.")
725 },
726 {
727 .ml_name = "read_on_cpu",
728 .ml_meth = (PyCFunction)pyrf_evlist__read_on_cpu,
729 .ml_flags = METH_VARARGS | METH_KEYWORDS,
730 .ml_doc = PyDoc_STR("reads an event.")
731 },
732 { .ml_name = NULL, }
733};
734
735static Py_ssize_t pyrf_evlist__length(PyObject *obj)
736{
737 struct pyrf_evlist *pevlist = (void *)obj;
738
739 return pevlist->evlist.nr_entries;
740}
741
742static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
743{
744 struct pyrf_evlist *pevlist = (void *)obj;
745 struct perf_evsel *pos;
746
747 if (i >= pevlist->evlist.nr_entries)
748 return NULL;
749
750 list_for_each_entry(pos, &pevlist->evlist.entries, node)
751 if (i-- == 0)
752 break;
753
754 return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
755}
756
757static PySequenceMethods pyrf_evlist__sequence_methods = {
758 .sq_length = pyrf_evlist__length,
759 .sq_item = pyrf_evlist__item,
760};
761
762static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
763
764static PyTypeObject pyrf_evlist__type = {
765 PyVarObject_HEAD_INIT(NULL, 0)
766 .tp_name = "perf.evlist",
767 .tp_basicsize = sizeof(struct pyrf_evlist),
768 .tp_dealloc = (destructor)pyrf_evlist__delete,
769 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
770 .tp_as_sequence = &pyrf_evlist__sequence_methods,
771 .tp_doc = pyrf_evlist__doc,
772 .tp_methods = pyrf_evlist__methods,
773 .tp_init = (initproc)pyrf_evlist__init,
774};
775
776static int pyrf_evlist__setup_types(void)
777{
778 pyrf_evlist__type.tp_new = PyType_GenericNew;
779 return PyType_Ready(&pyrf_evlist__type);
780}
781
782static struct {
783 const char *name;
784 int value;
785} perf__constants[] = {
786 { "TYPE_HARDWARE", PERF_TYPE_HARDWARE },
787 { "TYPE_SOFTWARE", PERF_TYPE_SOFTWARE },
788 { "TYPE_TRACEPOINT", PERF_TYPE_TRACEPOINT },
789 { "TYPE_HW_CACHE", PERF_TYPE_HW_CACHE },
790 { "TYPE_RAW", PERF_TYPE_RAW },
791 { "TYPE_BREAKPOINT", PERF_TYPE_BREAKPOINT },
792
793 { "COUNT_HW_CPU_CYCLES", PERF_COUNT_HW_CPU_CYCLES },
794 { "COUNT_HW_INSTRUCTIONS", PERF_COUNT_HW_INSTRUCTIONS },
795 { "COUNT_HW_CACHE_REFERENCES", PERF_COUNT_HW_CACHE_REFERENCES },
796 { "COUNT_HW_CACHE_MISSES", PERF_COUNT_HW_CACHE_MISSES },
797 { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
798 { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES },
799 { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES },
800 { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D },
801 { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I },
802 { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL },
803 { "COUNT_HW_CACHE_DTLB", PERF_COUNT_HW_CACHE_DTLB },
804 { "COUNT_HW_CACHE_ITLB", PERF_COUNT_HW_CACHE_ITLB },
805 { "COUNT_HW_CACHE_BPU", PERF_COUNT_HW_CACHE_BPU },
806 { "COUNT_HW_CACHE_OP_READ", PERF_COUNT_HW_CACHE_OP_READ },
807 { "COUNT_HW_CACHE_OP_WRITE", PERF_COUNT_HW_CACHE_OP_WRITE },
808 { "COUNT_HW_CACHE_OP_PREFETCH", PERF_COUNT_HW_CACHE_OP_PREFETCH },
809 { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS },
810 { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS },
811
812 { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK },
813 { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK },
814 { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS },
815 { "COUNT_SW_CONTEXT_SWITCHES", PERF_COUNT_SW_CONTEXT_SWITCHES },
816 { "COUNT_SW_CPU_MIGRATIONS", PERF_COUNT_SW_CPU_MIGRATIONS },
817 { "COUNT_SW_PAGE_FAULTS_MIN", PERF_COUNT_SW_PAGE_FAULTS_MIN },
818 { "COUNT_SW_PAGE_FAULTS_MAJ", PERF_COUNT_SW_PAGE_FAULTS_MAJ },
819 { "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS },
820 { "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS },
821
822 { "SAMPLE_IP", PERF_SAMPLE_IP },
823 { "SAMPLE_TID", PERF_SAMPLE_TID },
824 { "SAMPLE_TIME", PERF_SAMPLE_TIME },
825 { "SAMPLE_ADDR", PERF_SAMPLE_ADDR },
826 { "SAMPLE_READ", PERF_SAMPLE_READ },
827 { "SAMPLE_CALLCHAIN", PERF_SAMPLE_CALLCHAIN },
828 { "SAMPLE_ID", PERF_SAMPLE_ID },
829 { "SAMPLE_CPU", PERF_SAMPLE_CPU },
830 { "SAMPLE_PERIOD", PERF_SAMPLE_PERIOD },
831 { "SAMPLE_STREAM_ID", PERF_SAMPLE_STREAM_ID },
832 { "SAMPLE_RAW", PERF_SAMPLE_RAW },
833
834 { "FORMAT_TOTAL_TIME_ENABLED", PERF_FORMAT_TOTAL_TIME_ENABLED },
835 { "FORMAT_TOTAL_TIME_RUNNING", PERF_FORMAT_TOTAL_TIME_RUNNING },
836 { "FORMAT_ID", PERF_FORMAT_ID },
837 { "FORMAT_GROUP", PERF_FORMAT_GROUP },
838
839 { "RECORD_MMAP", PERF_RECORD_MMAP },
840 { "RECORD_LOST", PERF_RECORD_LOST },
841 { "RECORD_COMM", PERF_RECORD_COMM },
842 { "RECORD_EXIT", PERF_RECORD_EXIT },
843 { "RECORD_THROTTLE", PERF_RECORD_THROTTLE },
844 { "RECORD_UNTHROTTLE", PERF_RECORD_UNTHROTTLE },
845 { "RECORD_FORK", PERF_RECORD_FORK },
846 { "RECORD_READ", PERF_RECORD_READ },
847 { "RECORD_SAMPLE", PERF_RECORD_SAMPLE },
848 { .name = NULL, },
849};
850
851static PyMethodDef perf__methods[] = {
852 { .ml_name = NULL, }
853};
854
855PyMODINIT_FUNC initperf(void)
856{
857 PyObject *obj;
858 int i;
859 PyObject *dict, *module = Py_InitModule("perf", perf__methods);
860
861 if (module == NULL ||
862 pyrf_event__setup_types() < 0 ||
863 pyrf_evlist__setup_types() < 0 ||
864 pyrf_evsel__setup_types() < 0 ||
865 pyrf_thread_map__setup_types() < 0 ||
866 pyrf_cpu_map__setup_types() < 0)
867 return;
868
869 Py_INCREF(&pyrf_evlist__type);
870 PyModule_AddObject(module, "evlist", (PyObject*)&pyrf_evlist__type);
871
872 Py_INCREF(&pyrf_evsel__type);
873 PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type);
874
875 Py_INCREF(&pyrf_thread_map__type);
876 PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type);
877
878 Py_INCREF(&pyrf_cpu_map__type);
879 PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type);
880
881 dict = PyModule_GetDict(module);
882 if (dict == NULL)
883 goto error;
884
885 for (i = 0; perf__constants[i].name != NULL; i++) {
886 obj = PyInt_FromLong(perf__constants[i].value);
887 if (obj == NULL)
888 goto error;
889 PyDict_SetItemString(dict, perf__constants[i].name, obj);
890 Py_DECREF(obj);
891 }
892
893error:
894 if (PyErr_Occurred())
895 PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
896}
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index c6d99334bdfa..2040b8538527 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -248,8 +248,7 @@ static void python_process_event(int cpu, void *data,
248 context = PyCObject_FromVoidPtr(scripting_context, NULL); 248 context = PyCObject_FromVoidPtr(scripting_context, NULL);
249 249
250 PyTuple_SetItem(t, n++, PyString_FromString(handler_name)); 250 PyTuple_SetItem(t, n++, PyString_FromString(handler_name));
251 PyTuple_SetItem(t, n++, 251 PyTuple_SetItem(t, n++, context);
252 PyCObject_FromVoidPtr(scripting_context, NULL));
253 252
254 if (handler) { 253 if (handler) {
255 PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); 254 PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 105f00bfd555..f26639fa0fb3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -7,6 +7,8 @@
7#include <sys/types.h> 7#include <sys/types.h>
8#include <sys/mman.h> 8#include <sys/mman.h>
9 9
10#include "evlist.h"
11#include "evsel.h"
10#include "session.h" 12#include "session.h"
11#include "sort.h" 13#include "sort.h"
12#include "util.h" 14#include "util.h"
@@ -19,7 +21,7 @@ static int perf_session__open(struct perf_session *self, bool force)
19 self->fd_pipe = true; 21 self->fd_pipe = true;
20 self->fd = STDIN_FILENO; 22 self->fd = STDIN_FILENO;
21 23
22 if (perf_header__read(self, self->fd) < 0) 24 if (perf_session__read_header(self, self->fd) < 0)
23 pr_err("incompatible file format"); 25 pr_err("incompatible file format");
24 26
25 return 0; 27 return 0;
@@ -51,7 +53,7 @@ static int perf_session__open(struct perf_session *self, bool force)
51 goto out_close; 53 goto out_close;
52 } 54 }
53 55
54 if (perf_header__read(self, self->fd) < 0) { 56 if (perf_session__read_header(self, self->fd) < 0) {
55 pr_err("incompatible file format"); 57 pr_err("incompatible file format");
56 goto out_close; 58 goto out_close;
57 } 59 }
@@ -67,7 +69,7 @@ out_close:
67 69
68static void perf_session__id_header_size(struct perf_session *session) 70static void perf_session__id_header_size(struct perf_session *session)
69{ 71{
70 struct sample_data *data; 72 struct perf_sample *data;
71 u64 sample_type = session->sample_type; 73 u64 sample_type = session->sample_type;
72 u16 size = 0; 74 u16 size = 0;
73 75
@@ -92,21 +94,10 @@ out:
92 session->id_hdr_size = size; 94 session->id_hdr_size = size;
93} 95}
94 96
95void perf_session__set_sample_id_all(struct perf_session *session, bool value)
96{
97 session->sample_id_all = value;
98 perf_session__id_header_size(session);
99}
100
101void perf_session__set_sample_type(struct perf_session *session, u64 type)
102{
103 session->sample_type = type;
104}
105
106void perf_session__update_sample_type(struct perf_session *self) 97void perf_session__update_sample_type(struct perf_session *self)
107{ 98{
108 self->sample_type = perf_header__sample_type(&self->header); 99 self->sample_type = perf_evlist__sample_type(self->evlist);
109 self->sample_id_all = perf_header__sample_id_all(&self->header); 100 self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
110 perf_session__id_header_size(self); 101 perf_session__id_header_size(self);
111} 102}
112 103
@@ -135,13 +126,9 @@ struct perf_session *perf_session__new(const char *filename, int mode,
135 if (self == NULL) 126 if (self == NULL)
136 goto out; 127 goto out;
137 128
138 if (perf_header__init(&self->header) < 0)
139 goto out_free;
140
141 memcpy(self->filename, filename, len); 129 memcpy(self->filename, filename, len);
142 self->threads = RB_ROOT; 130 self->threads = RB_ROOT;
143 INIT_LIST_HEAD(&self->dead_threads); 131 INIT_LIST_HEAD(&self->dead_threads);
144 self->hists_tree = RB_ROOT;
145 self->last_match = NULL; 132 self->last_match = NULL;
146 /* 133 /*
147 * On 64bit we can mmap the data file in one go. No need for tiny mmap 134 * On 64bit we can mmap the data file in one go. No need for tiny mmap
@@ -162,17 +149,16 @@ struct perf_session *perf_session__new(const char *filename, int mode,
162 if (mode == O_RDONLY) { 149 if (mode == O_RDONLY) {
163 if (perf_session__open(self, force) < 0) 150 if (perf_session__open(self, force) < 0)
164 goto out_delete; 151 goto out_delete;
152 perf_session__update_sample_type(self);
165 } else if (mode == O_WRONLY) { 153 } else if (mode == O_WRONLY) {
166 /* 154 /*
167 * In O_RDONLY mode this will be performed when reading the 155 * In O_RDONLY mode this will be performed when reading the
168 * kernel MMAP event, in event__process_mmap(). 156 * kernel MMAP event, in perf_event__process_mmap().
169 */ 157 */
170 if (perf_session__create_kernel_maps(self) < 0) 158 if (perf_session__create_kernel_maps(self) < 0)
171 goto out_delete; 159 goto out_delete;
172 } 160 }
173 161
174 perf_session__update_sample_type(self);
175
176 if (ops && ops->ordering_requires_timestamps && 162 if (ops && ops->ordering_requires_timestamps &&
177 ops->ordered_samples && !self->sample_id_all) { 163 ops->ordered_samples && !self->sample_id_all) {
178 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); 164 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
@@ -181,9 +167,6 @@ struct perf_session *perf_session__new(const char *filename, int mode,
181 167
182out: 168out:
183 return self; 169 return self;
184out_free:
185 free(self);
186 return NULL;
187out_delete: 170out_delete:
188 perf_session__delete(self); 171 perf_session__delete(self);
189 return NULL; 172 return NULL;
@@ -214,7 +197,6 @@ static void perf_session__delete_threads(struct perf_session *self)
214 197
215void perf_session__delete(struct perf_session *self) 198void perf_session__delete(struct perf_session *self)
216{ 199{
217 perf_header__exit(&self->header);
218 perf_session__destroy_kernel_maps(self); 200 perf_session__destroy_kernel_maps(self);
219 perf_session__delete_dead_threads(self); 201 perf_session__delete_dead_threads(self);
220 perf_session__delete_threads(self); 202 perf_session__delete_threads(self);
@@ -242,17 +224,16 @@ static bool symbol__match_parent_regex(struct symbol *sym)
242 return 0; 224 return 0;
243} 225}
244 226
245struct map_symbol *perf_session__resolve_callchain(struct perf_session *self, 227int perf_session__resolve_callchain(struct perf_session *self,
246 struct thread *thread, 228 struct thread *thread,
247 struct ip_callchain *chain, 229 struct ip_callchain *chain,
248 struct symbol **parent) 230 struct symbol **parent)
249{ 231{
250 u8 cpumode = PERF_RECORD_MISC_USER; 232 u8 cpumode = PERF_RECORD_MISC_USER;
251 unsigned int i; 233 unsigned int i;
252 struct map_symbol *syms = calloc(chain->nr, sizeof(*syms)); 234 int err;
253 235
254 if (!syms) 236 callchain_cursor_reset(&self->callchain_cursor);
255 return NULL;
256 237
257 for (i = 0; i < chain->nr; i++) { 238 for (i = 0; i < chain->nr; i++) {
258 u64 ip = chain->ips[i]; 239 u64 ip = chain->ips[i];
@@ -281,30 +262,33 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
281 *parent = al.sym; 262 *parent = al.sym;
282 if (!symbol_conf.use_callchain) 263 if (!symbol_conf.use_callchain)
283 break; 264 break;
284 syms[i].map = al.map;
285 syms[i].sym = al.sym;
286 } 265 }
266
267 err = callchain_cursor_append(&self->callchain_cursor,
268 ip, al.map, al.sym);
269 if (err)
270 return err;
287 } 271 }
288 272
289 return syms; 273 return 0;
290} 274}
291 275
292static int process_event_synth_stub(event_t *event __used, 276static int process_event_synth_stub(union perf_event *event __used,
293 struct perf_session *session __used) 277 struct perf_session *session __used)
294{ 278{
295 dump_printf(": unhandled!\n"); 279 dump_printf(": unhandled!\n");
296 return 0; 280 return 0;
297} 281}
298 282
299static int process_event_stub(event_t *event __used, 283static int process_event_stub(union perf_event *event __used,
300 struct sample_data *sample __used, 284 struct perf_sample *sample __used,
301 struct perf_session *session __used) 285 struct perf_session *session __used)
302{ 286{
303 dump_printf(": unhandled!\n"); 287 dump_printf(": unhandled!\n");
304 return 0; 288 return 0;
305} 289}
306 290
307static int process_finished_round_stub(event_t *event __used, 291static int process_finished_round_stub(union perf_event *event __used,
308 struct perf_session *session __used, 292 struct perf_session *session __used,
309 struct perf_event_ops *ops __used) 293 struct perf_event_ops *ops __used)
310{ 294{
@@ -312,7 +296,7 @@ static int process_finished_round_stub(event_t *event __used,
312 return 0; 296 return 0;
313} 297}
314 298
315static int process_finished_round(event_t *event, 299static int process_finished_round(union perf_event *event,
316 struct perf_session *session, 300 struct perf_session *session,
317 struct perf_event_ops *ops); 301 struct perf_event_ops *ops);
318 302
@@ -329,7 +313,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
329 if (handler->exit == NULL) 313 if (handler->exit == NULL)
330 handler->exit = process_event_stub; 314 handler->exit = process_event_stub;
331 if (handler->lost == NULL) 315 if (handler->lost == NULL)
332 handler->lost = event__process_lost; 316 handler->lost = perf_event__process_lost;
333 if (handler->read == NULL) 317 if (handler->read == NULL)
334 handler->read = process_event_stub; 318 handler->read = process_event_stub;
335 if (handler->throttle == NULL) 319 if (handler->throttle == NULL)
@@ -363,98 +347,98 @@ void mem_bswap_64(void *src, int byte_size)
363 } 347 }
364} 348}
365 349
366static void event__all64_swap(event_t *self) 350static void perf_event__all64_swap(union perf_event *event)
367{ 351{
368 struct perf_event_header *hdr = &self->header; 352 struct perf_event_header *hdr = &event->header;
369 mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr)); 353 mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
370} 354}
371 355
372static void event__comm_swap(event_t *self) 356static void perf_event__comm_swap(union perf_event *event)
373{ 357{
374 self->comm.pid = bswap_32(self->comm.pid); 358 event->comm.pid = bswap_32(event->comm.pid);
375 self->comm.tid = bswap_32(self->comm.tid); 359 event->comm.tid = bswap_32(event->comm.tid);
376} 360}
377 361
378static void event__mmap_swap(event_t *self) 362static void perf_event__mmap_swap(union perf_event *event)
379{ 363{
380 self->mmap.pid = bswap_32(self->mmap.pid); 364 event->mmap.pid = bswap_32(event->mmap.pid);
381 self->mmap.tid = bswap_32(self->mmap.tid); 365 event->mmap.tid = bswap_32(event->mmap.tid);
382 self->mmap.start = bswap_64(self->mmap.start); 366 event->mmap.start = bswap_64(event->mmap.start);
383 self->mmap.len = bswap_64(self->mmap.len); 367 event->mmap.len = bswap_64(event->mmap.len);
384 self->mmap.pgoff = bswap_64(self->mmap.pgoff); 368 event->mmap.pgoff = bswap_64(event->mmap.pgoff);
385} 369}
386 370
387static void event__task_swap(event_t *self) 371static void perf_event__task_swap(union perf_event *event)
388{ 372{
389 self->fork.pid = bswap_32(self->fork.pid); 373 event->fork.pid = bswap_32(event->fork.pid);
390 self->fork.tid = bswap_32(self->fork.tid); 374 event->fork.tid = bswap_32(event->fork.tid);
391 self->fork.ppid = bswap_32(self->fork.ppid); 375 event->fork.ppid = bswap_32(event->fork.ppid);
392 self->fork.ptid = bswap_32(self->fork.ptid); 376 event->fork.ptid = bswap_32(event->fork.ptid);
393 self->fork.time = bswap_64(self->fork.time); 377 event->fork.time = bswap_64(event->fork.time);
394} 378}
395 379
396static void event__read_swap(event_t *self) 380static void perf_event__read_swap(union perf_event *event)
397{ 381{
398 self->read.pid = bswap_32(self->read.pid); 382 event->read.pid = bswap_32(event->read.pid);
399 self->read.tid = bswap_32(self->read.tid); 383 event->read.tid = bswap_32(event->read.tid);
400 self->read.value = bswap_64(self->read.value); 384 event->read.value = bswap_64(event->read.value);
401 self->read.time_enabled = bswap_64(self->read.time_enabled); 385 event->read.time_enabled = bswap_64(event->read.time_enabled);
402 self->read.time_running = bswap_64(self->read.time_running); 386 event->read.time_running = bswap_64(event->read.time_running);
403 self->read.id = bswap_64(self->read.id); 387 event->read.id = bswap_64(event->read.id);
404} 388}
405 389
406static void event__attr_swap(event_t *self) 390static void perf_event__attr_swap(union perf_event *event)
407{ 391{
408 size_t size; 392 size_t size;
409 393
410 self->attr.attr.type = bswap_32(self->attr.attr.type); 394 event->attr.attr.type = bswap_32(event->attr.attr.type);
411 self->attr.attr.size = bswap_32(self->attr.attr.size); 395 event->attr.attr.size = bswap_32(event->attr.attr.size);
412 self->attr.attr.config = bswap_64(self->attr.attr.config); 396 event->attr.attr.config = bswap_64(event->attr.attr.config);
413 self->attr.attr.sample_period = bswap_64(self->attr.attr.sample_period); 397 event->attr.attr.sample_period = bswap_64(event->attr.attr.sample_period);
414 self->attr.attr.sample_type = bswap_64(self->attr.attr.sample_type); 398 event->attr.attr.sample_type = bswap_64(event->attr.attr.sample_type);
415 self->attr.attr.read_format = bswap_64(self->attr.attr.read_format); 399 event->attr.attr.read_format = bswap_64(event->attr.attr.read_format);
416 self->attr.attr.wakeup_events = bswap_32(self->attr.attr.wakeup_events); 400 event->attr.attr.wakeup_events = bswap_32(event->attr.attr.wakeup_events);
417 self->attr.attr.bp_type = bswap_32(self->attr.attr.bp_type); 401 event->attr.attr.bp_type = bswap_32(event->attr.attr.bp_type);
418 self->attr.attr.bp_addr = bswap_64(self->attr.attr.bp_addr); 402 event->attr.attr.bp_addr = bswap_64(event->attr.attr.bp_addr);
419 self->attr.attr.bp_len = bswap_64(self->attr.attr.bp_len); 403 event->attr.attr.bp_len = bswap_64(event->attr.attr.bp_len);
420 404
421 size = self->header.size; 405 size = event->header.size;
422 size -= (void *)&self->attr.id - (void *)self; 406 size -= (void *)&event->attr.id - (void *)event;
423 mem_bswap_64(self->attr.id, size); 407 mem_bswap_64(event->attr.id, size);
424} 408}
425 409
426static void event__event_type_swap(event_t *self) 410static void perf_event__event_type_swap(union perf_event *event)
427{ 411{
428 self->event_type.event_type.event_id = 412 event->event_type.event_type.event_id =
429 bswap_64(self->event_type.event_type.event_id); 413 bswap_64(event->event_type.event_type.event_id);
430} 414}
431 415
432static void event__tracing_data_swap(event_t *self) 416static void perf_event__tracing_data_swap(union perf_event *event)
433{ 417{
434 self->tracing_data.size = bswap_32(self->tracing_data.size); 418 event->tracing_data.size = bswap_32(event->tracing_data.size);
435} 419}
436 420
437typedef void (*event__swap_op)(event_t *self); 421typedef void (*perf_event__swap_op)(union perf_event *event);
438 422
439static event__swap_op event__swap_ops[] = { 423static perf_event__swap_op perf_event__swap_ops[] = {
440 [PERF_RECORD_MMAP] = event__mmap_swap, 424 [PERF_RECORD_MMAP] = perf_event__mmap_swap,
441 [PERF_RECORD_COMM] = event__comm_swap, 425 [PERF_RECORD_COMM] = perf_event__comm_swap,
442 [PERF_RECORD_FORK] = event__task_swap, 426 [PERF_RECORD_FORK] = perf_event__task_swap,
443 [PERF_RECORD_EXIT] = event__task_swap, 427 [PERF_RECORD_EXIT] = perf_event__task_swap,
444 [PERF_RECORD_LOST] = event__all64_swap, 428 [PERF_RECORD_LOST] = perf_event__all64_swap,
445 [PERF_RECORD_READ] = event__read_swap, 429 [PERF_RECORD_READ] = perf_event__read_swap,
446 [PERF_RECORD_SAMPLE] = event__all64_swap, 430 [PERF_RECORD_SAMPLE] = perf_event__all64_swap,
447 [PERF_RECORD_HEADER_ATTR] = event__attr_swap, 431 [PERF_RECORD_HEADER_ATTR] = perf_event__attr_swap,
448 [PERF_RECORD_HEADER_EVENT_TYPE] = event__event_type_swap, 432 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
449 [PERF_RECORD_HEADER_TRACING_DATA] = event__tracing_data_swap, 433 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
450 [PERF_RECORD_HEADER_BUILD_ID] = NULL, 434 [PERF_RECORD_HEADER_BUILD_ID] = NULL,
451 [PERF_RECORD_HEADER_MAX] = NULL, 435 [PERF_RECORD_HEADER_MAX] = NULL,
452}; 436};
453 437
454struct sample_queue { 438struct sample_queue {
455 u64 timestamp; 439 u64 timestamp;
456 u64 file_offset; 440 u64 file_offset;
457 event_t *event; 441 union perf_event *event;
458 struct list_head list; 442 struct list_head list;
459}; 443};
460 444
@@ -472,8 +456,8 @@ static void perf_session_free_sample_buffers(struct perf_session *session)
472} 456}
473 457
474static int perf_session_deliver_event(struct perf_session *session, 458static int perf_session_deliver_event(struct perf_session *session,
475 event_t *event, 459 union perf_event *event,
476 struct sample_data *sample, 460 struct perf_sample *sample,
477 struct perf_event_ops *ops, 461 struct perf_event_ops *ops,
478 u64 file_offset); 462 u64 file_offset);
479 463
@@ -483,7 +467,7 @@ static void flush_sample_queue(struct perf_session *s,
483 struct ordered_samples *os = &s->ordered_samples; 467 struct ordered_samples *os = &s->ordered_samples;
484 struct list_head *head = &os->samples; 468 struct list_head *head = &os->samples;
485 struct sample_queue *tmp, *iter; 469 struct sample_queue *tmp, *iter;
486 struct sample_data sample; 470 struct perf_sample sample;
487 u64 limit = os->next_flush; 471 u64 limit = os->next_flush;
488 u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; 472 u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
489 473
@@ -494,7 +478,7 @@ static void flush_sample_queue(struct perf_session *s,
494 if (iter->timestamp > limit) 478 if (iter->timestamp > limit)
495 break; 479 break;
496 480
497 event__parse_sample(iter->event, s, &sample); 481 perf_session__parse_sample(s, iter->event, &sample);
498 perf_session_deliver_event(s, iter->event, &sample, ops, 482 perf_session_deliver_event(s, iter->event, &sample, ops,
499 iter->file_offset); 483 iter->file_offset);
500 484
@@ -550,7 +534,7 @@ static void flush_sample_queue(struct perf_session *s,
550 * Flush every events below timestamp 7 534 * Flush every events below timestamp 7
551 * etc... 535 * etc...
552 */ 536 */
553static int process_finished_round(event_t *event __used, 537static int process_finished_round(union perf_event *event __used,
554 struct perf_session *session, 538 struct perf_session *session,
555 struct perf_event_ops *ops) 539 struct perf_event_ops *ops)
556{ 540{
@@ -607,12 +591,12 @@ static void __queue_event(struct sample_queue *new, struct perf_session *s)
607 591
608#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue)) 592#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
609 593
610static int perf_session_queue_event(struct perf_session *s, event_t *event, 594static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
611 struct sample_data *data, u64 file_offset) 595 struct perf_sample *sample, u64 file_offset)
612{ 596{
613 struct ordered_samples *os = &s->ordered_samples; 597 struct ordered_samples *os = &s->ordered_samples;
614 struct list_head *sc = &os->sample_cache; 598 struct list_head *sc = &os->sample_cache;
615 u64 timestamp = data->time; 599 u64 timestamp = sample->time;
616 struct sample_queue *new; 600 struct sample_queue *new;
617 601
618 if (!timestamp || timestamp == ~0ULL) 602 if (!timestamp || timestamp == ~0ULL)
@@ -648,7 +632,7 @@ static int perf_session_queue_event(struct perf_session *s, event_t *event,
648 return 0; 632 return 0;
649} 633}
650 634
651static void callchain__printf(struct sample_data *sample) 635static void callchain__printf(struct perf_sample *sample)
652{ 636{
653 unsigned int i; 637 unsigned int i;
654 638
@@ -660,8 +644,8 @@ static void callchain__printf(struct sample_data *sample)
660} 644}
661 645
662static void perf_session__print_tstamp(struct perf_session *session, 646static void perf_session__print_tstamp(struct perf_session *session,
663 event_t *event, 647 union perf_event *event,
664 struct sample_data *sample) 648 struct perf_sample *sample)
665{ 649{
666 if (event->header.type != PERF_RECORD_SAMPLE && 650 if (event->header.type != PERF_RECORD_SAMPLE &&
667 !session->sample_id_all) { 651 !session->sample_id_all) {
@@ -676,8 +660,8 @@ static void perf_session__print_tstamp(struct perf_session *session,
676 printf("%" PRIu64 " ", sample->time); 660 printf("%" PRIu64 " ", sample->time);
677} 661}
678 662
679static void dump_event(struct perf_session *session, event_t *event, 663static void dump_event(struct perf_session *session, union perf_event *event,
680 u64 file_offset, struct sample_data *sample) 664 u64 file_offset, struct perf_sample *sample)
681{ 665{
682 if (!dump_trace) 666 if (!dump_trace)
683 return; 667 return;
@@ -691,11 +675,11 @@ static void dump_event(struct perf_session *session, event_t *event,
691 perf_session__print_tstamp(session, event, sample); 675 perf_session__print_tstamp(session, event, sample);
692 676
693 printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, 677 printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
694 event->header.size, event__get_event_name(event->header.type)); 678 event->header.size, perf_event__name(event->header.type));
695} 679}
696 680
697static void dump_sample(struct perf_session *session, event_t *event, 681static void dump_sample(struct perf_session *session, union perf_event *event,
698 struct sample_data *sample) 682 struct perf_sample *sample)
699{ 683{
700 if (!dump_trace) 684 if (!dump_trace)
701 return; 685 return;
@@ -709,8 +693,8 @@ static void dump_sample(struct perf_session *session, event_t *event,
709} 693}
710 694
711static int perf_session_deliver_event(struct perf_session *session, 695static int perf_session_deliver_event(struct perf_session *session,
712 event_t *event, 696 union perf_event *event,
713 struct sample_data *sample, 697 struct perf_sample *sample,
714 struct perf_event_ops *ops, 698 struct perf_event_ops *ops,
715 u64 file_offset) 699 u64 file_offset)
716{ 700{
@@ -743,7 +727,7 @@ static int perf_session_deliver_event(struct perf_session *session,
743} 727}
744 728
745static int perf_session__preprocess_sample(struct perf_session *session, 729static int perf_session__preprocess_sample(struct perf_session *session,
746 event_t *event, struct sample_data *sample) 730 union perf_event *event, struct perf_sample *sample)
747{ 731{
748 if (event->header.type != PERF_RECORD_SAMPLE || 732 if (event->header.type != PERF_RECORD_SAMPLE ||
749 !(session->sample_type & PERF_SAMPLE_CALLCHAIN)) 733 !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
@@ -758,7 +742,7 @@ static int perf_session__preprocess_sample(struct perf_session *session,
758 return 0; 742 return 0;
759} 743}
760 744
761static int perf_session__process_user_event(struct perf_session *session, event_t *event, 745static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
762 struct perf_event_ops *ops, u64 file_offset) 746 struct perf_event_ops *ops, u64 file_offset)
763{ 747{
764 dump_event(session, event, file_offset, NULL); 748 dump_event(session, event, file_offset, NULL);
@@ -783,15 +767,16 @@ static int perf_session__process_user_event(struct perf_session *session, event_
783} 767}
784 768
785static int perf_session__process_event(struct perf_session *session, 769static int perf_session__process_event(struct perf_session *session,
786 event_t *event, 770 union perf_event *event,
787 struct perf_event_ops *ops, 771 struct perf_event_ops *ops,
788 u64 file_offset) 772 u64 file_offset)
789{ 773{
790 struct sample_data sample; 774 struct perf_sample sample;
791 int ret; 775 int ret;
792 776
793 if (session->header.needs_swap && event__swap_ops[event->header.type]) 777 if (session->header.needs_swap &&
794 event__swap_ops[event->header.type](event); 778 perf_event__swap_ops[event->header.type])
779 perf_event__swap_ops[event->header.type](event);
795 780
796 if (event->header.type >= PERF_RECORD_HEADER_MAX) 781 if (event->header.type >= PERF_RECORD_HEADER_MAX)
797 return -EINVAL; 782 return -EINVAL;
@@ -804,7 +789,7 @@ static int perf_session__process_event(struct perf_session *session,
804 /* 789 /*
805 * For all kernel events we get the sample data 790 * For all kernel events we get the sample data
806 */ 791 */
807 event__parse_sample(event, session, &sample); 792 perf_session__parse_sample(session, event, &sample);
808 793
809 /* Preprocess sample records - precheck callchains */ 794 /* Preprocess sample records - precheck callchains */
810 if (perf_session__preprocess_sample(session, event, &sample)) 795 if (perf_session__preprocess_sample(session, event, &sample))
@@ -843,7 +828,7 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
843static void perf_session__warn_about_errors(const struct perf_session *session, 828static void perf_session__warn_about_errors(const struct perf_session *session,
844 const struct perf_event_ops *ops) 829 const struct perf_event_ops *ops)
845{ 830{
846 if (ops->lost == event__process_lost && 831 if (ops->lost == perf_event__process_lost &&
847 session->hists.stats.total_lost != 0) { 832 session->hists.stats.total_lost != 0) {
848 ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64 833 ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
849 "!\n\nCheck IO/CPU overload!\n\n", 834 "!\n\nCheck IO/CPU overload!\n\n",
@@ -875,7 +860,7 @@ volatile int session_done;
875static int __perf_session__process_pipe_events(struct perf_session *self, 860static int __perf_session__process_pipe_events(struct perf_session *self,
876 struct perf_event_ops *ops) 861 struct perf_event_ops *ops)
877{ 862{
878 event_t event; 863 union perf_event event;
879 uint32_t size; 864 uint32_t size;
880 int skip = 0; 865 int skip = 0;
881 u64 head; 866 u64 head;
@@ -956,7 +941,7 @@ int __perf_session__process_events(struct perf_session *session,
956 struct ui_progress *progress; 941 struct ui_progress *progress;
957 size_t page_size, mmap_size; 942 size_t page_size, mmap_size;
958 char *buf, *mmaps[8]; 943 char *buf, *mmaps[8];
959 event_t *event; 944 union perf_event *event;
960 uint32_t size; 945 uint32_t size;
961 946
962 perf_event_ops__fill_defaults(ops); 947 perf_event_ops__fill_defaults(ops);
@@ -1001,7 +986,7 @@ remap:
1001 file_pos = file_offset + head; 986 file_pos = file_offset + head;
1002 987
1003more: 988more:
1004 event = (event_t *)(buf + head); 989 event = (union perf_event *)(buf + head);
1005 990
1006 if (session->header.needs_swap) 991 if (session->header.needs_swap)
1007 perf_event_header__bswap(&event->header); 992 perf_event_header__bswap(&event->header);
@@ -1134,3 +1119,18 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
1134 size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits); 1119 size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
1135 return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits); 1120 return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
1136} 1121}
1122
1123size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
1124{
1125 struct perf_evsel *pos;
1126 size_t ret = fprintf(fp, "Aggregated stats:\n");
1127
1128 ret += hists__fprintf_nr_events(&session->hists, fp);
1129
1130 list_for_each_entry(pos, &session->evlist->entries, node) {
1131 ret += fprintf(fp, "%s stats:\n", event_name(pos));
1132 ret += hists__fprintf_nr_events(&pos->hists, fp);
1133 }
1134
1135 return ret;
1136}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index decd83f274fd..b5b148b0aaca 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -34,12 +34,12 @@ struct perf_session {
34 struct thread *last_match; 34 struct thread *last_match;
35 struct machine host_machine; 35 struct machine host_machine;
36 struct rb_root machines; 36 struct rb_root machines;
37 struct rb_root hists_tree; 37 struct perf_evlist *evlist;
38 /* 38 /*
39 * FIXME: should point to the first entry in hists_tree and 39 * FIXME: Need to split this up further, we need global
40 * be a hists instance. Right now its only 'report' 40 * stats + per event stats. 'perf diff' also needs
41 * that is using ->hists_tree while all the rest use 41 * to properly support multiple events in a single
42 * ->hists. 42 * perf.data file.
43 */ 43 */
44 struct hists hists; 44 struct hists hists;
45 u64 sample_type; 45 u64 sample_type;
@@ -51,15 +51,17 @@ struct perf_session {
51 int cwdlen; 51 int cwdlen;
52 char *cwd; 52 char *cwd;
53 struct ordered_samples ordered_samples; 53 struct ordered_samples ordered_samples;
54 char filename[0]; 54 struct callchain_cursor callchain_cursor;
55 char filename[0];
55}; 56};
56 57
57struct perf_event_ops; 58struct perf_event_ops;
58 59
59typedef int (*event_op)(event_t *self, struct sample_data *sample, 60typedef int (*event_op)(union perf_event *self, struct perf_sample *sample,
60 struct perf_session *session); 61 struct perf_session *session);
61typedef int (*event_synth_op)(event_t *self, struct perf_session *session); 62typedef int (*event_synth_op)(union perf_event *self,
62typedef int (*event_op2)(event_t *self, struct perf_session *session, 63 struct perf_session *session);
64typedef int (*event_op2)(union perf_event *self, struct perf_session *session,
63 struct perf_event_ops *ops); 65 struct perf_event_ops *ops);
64 66
65struct perf_event_ops { 67struct perf_event_ops {
@@ -94,10 +96,10 @@ int __perf_session__process_events(struct perf_session *self,
94int perf_session__process_events(struct perf_session *self, 96int perf_session__process_events(struct perf_session *self,
95 struct perf_event_ops *event_ops); 97 struct perf_event_ops *event_ops);
96 98
97struct map_symbol *perf_session__resolve_callchain(struct perf_session *self, 99int perf_session__resolve_callchain(struct perf_session *self,
98 struct thread *thread, 100 struct thread *thread,
99 struct ip_callchain *chain, 101 struct ip_callchain *chain,
100 struct symbol **parent); 102 struct symbol **parent);
101 103
102bool perf_session__has_traces(struct perf_session *self, const char *msg); 104bool perf_session__has_traces(struct perf_session *self, const char *msg);
103 105
@@ -110,8 +112,6 @@ void mem_bswap_64(void *src, int byte_size);
110int perf_session__create_kernel_maps(struct perf_session *self); 112int perf_session__create_kernel_maps(struct perf_session *self);
111 113
112void perf_session__update_sample_type(struct perf_session *self); 114void perf_session__update_sample_type(struct perf_session *self);
113void perf_session__set_sample_id_all(struct perf_session *session, bool value);
114void perf_session__set_sample_type(struct perf_session *session, u64 type);
115void perf_session__remove_thread(struct perf_session *self, struct thread *th); 115void perf_session__remove_thread(struct perf_session *self, struct thread *th);
116 116
117static inline 117static inline
@@ -149,9 +149,14 @@ size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp);
149size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, 149size_t perf_session__fprintf_dsos_buildid(struct perf_session *self,
150 FILE *fp, bool with_hits); 150 FILE *fp, bool with_hits);
151 151
152static inline 152size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp);
153size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp) 153
154static inline int perf_session__parse_sample(struct perf_session *session,
155 const union perf_event *event,
156 struct perf_sample *sample)
154{ 157{
155 return hists__fprintf_nr_events(&self->hists, fp); 158 return perf_event__parse_sample(event, session->sample_type,
159 session->sample_id_all, sample);
156} 160}
161
157#endif /* __PERF_SESSION_H */ 162#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
new file mode 100644
index 000000000000..e24ffadb20b2
--- /dev/null
+++ b/tools/perf/util/setup.py
@@ -0,0 +1,19 @@
1#!/usr/bin/python2
2
3from distutils.core import setup, Extension
4
5perf = Extension('perf',
6 sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
7 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
8 'util/util.c', 'util/xyarray.c', 'util/cgroup.c'],
9 include_dirs = ['util/include'],
10 extra_compile_args = ['-fno-strict-aliasing', '-Wno-write-strings'])
11
12setup(name='perf',
13 version='0.1',
14 description='Interface with the Linux profiling infrastructure',
15 author='Arnaldo Carvalho de Melo',
16 author_email='acme@redhat.com',
17 license='GPLv2',
18 url='http://perf.wiki.kernel.org',
19 ext_modules=[perf])
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
new file mode 100644
index 000000000000..834c8ebfe38e
--- /dev/null
+++ b/tools/perf/util/strfilter.c
@@ -0,0 +1,199 @@
1#include "util.h"
2#include "string.h"
3#include "strfilter.h"
4
5/* Operators */
6static const char *OP_and = "&"; /* Logical AND */
7static const char *OP_or = "|"; /* Logical OR */
8static const char *OP_not = "!"; /* Logical NOT */
9
10#define is_operator(c) ((c) == '|' || (c) == '&' || (c) == '!')
11#define is_separator(c) (is_operator(c) || (c) == '(' || (c) == ')')
12
13static void strfilter_node__delete(struct strfilter_node *self)
14{
15 if (self) {
16 if (self->p && !is_operator(*self->p))
17 free((char *)self->p);
18 strfilter_node__delete(self->l);
19 strfilter_node__delete(self->r);
20 free(self);
21 }
22}
23
24void strfilter__delete(struct strfilter *self)
25{
26 if (self) {
27 strfilter_node__delete(self->root);
28 free(self);
29 }
30}
31
32static const char *get_token(const char *s, const char **e)
33{
34 const char *p;
35
36 while (isspace(*s)) /* Skip spaces */
37 s++;
38
39 if (*s == '\0') {
40 p = s;
41 goto end;
42 }
43
44 p = s + 1;
45 if (!is_separator(*s)) {
46 /* End search */
47retry:
48 while (*p && !is_separator(*p) && !isspace(*p))
49 p++;
50 /* Escape and special case: '!' is also used in glob pattern */
51 if (*(p - 1) == '\\' || (*p == '!' && *(p - 1) == '[')) {
52 p++;
53 goto retry;
54 }
55 }
56end:
57 *e = p;
58 return s;
59}
60
61static struct strfilter_node *strfilter_node__alloc(const char *op,
62 struct strfilter_node *l,
63 struct strfilter_node *r)
64{
65 struct strfilter_node *ret = zalloc(sizeof(struct strfilter_node));
66
67 if (ret) {
68 ret->p = op;
69 ret->l = l;
70 ret->r = r;
71 }
72
73 return ret;
74}
75
76static struct strfilter_node *strfilter_node__new(const char *s,
77 const char **ep)
78{
79 struct strfilter_node root, *cur, *last_op;
80 const char *e;
81
82 if (!s)
83 return NULL;
84
85 memset(&root, 0, sizeof(root));
86 last_op = cur = &root;
87
88 s = get_token(s, &e);
89 while (*s != '\0' && *s != ')') {
90 switch (*s) {
91 case '&': /* Exchg last OP->r with AND */
92 if (!cur->r || !last_op->r)
93 goto error;
94 cur = strfilter_node__alloc(OP_and, last_op->r, NULL);
95 if (!cur)
96 goto nomem;
97 last_op->r = cur;
98 last_op = cur;
99 break;
100 case '|': /* Exchg the root with OR */
101 if (!cur->r || !root.r)
102 goto error;
103 cur = strfilter_node__alloc(OP_or, root.r, NULL);
104 if (!cur)
105 goto nomem;
106 root.r = cur;
107 last_op = cur;
108 break;
109 case '!': /* Add NOT as a leaf node */
110 if (cur->r)
111 goto error;
112 cur->r = strfilter_node__alloc(OP_not, NULL, NULL);
113 if (!cur->r)
114 goto nomem;
115 cur = cur->r;
116 break;
117 case '(': /* Recursively parses inside the parenthesis */
118 if (cur->r)
119 goto error;
120 cur->r = strfilter_node__new(s + 1, &s);
121 if (!s)
122 goto nomem;
123 if (!cur->r || *s != ')')
124 goto error;
125 e = s + 1;
126 break;
127 default:
128 if (cur->r)
129 goto error;
130 cur->r = strfilter_node__alloc(NULL, NULL, NULL);
131 if (!cur->r)
132 goto nomem;
133 cur->r->p = strndup(s, e - s);
134 if (!cur->r->p)
135 goto nomem;
136 }
137 s = get_token(e, &e);
138 }
139 if (!cur->r)
140 goto error;
141 *ep = s;
142 return root.r;
143nomem:
144 s = NULL;
145error:
146 *ep = s;
147 strfilter_node__delete(root.r);
148 return NULL;
149}
150
151/*
152 * Parse filter rule and return new strfilter.
153 * Return NULL if fail, and *ep == NULL if memory allocation failed.
154 */
155struct strfilter *strfilter__new(const char *rules, const char **err)
156{
157 struct strfilter *ret = zalloc(sizeof(struct strfilter));
158 const char *ep = NULL;
159
160 if (ret)
161 ret->root = strfilter_node__new(rules, &ep);
162
163 if (!ret || !ret->root || *ep != '\0') {
164 if (err)
165 *err = ep;
166 strfilter__delete(ret);
167 ret = NULL;
168 }
169
170 return ret;
171}
172
173static bool strfilter_node__compare(struct strfilter_node *self,
174 const char *str)
175{
176 if (!self || !self->p)
177 return false;
178
179 switch (*self->p) {
180 case '|': /* OR */
181 return strfilter_node__compare(self->l, str) ||
182 strfilter_node__compare(self->r, str);
183 case '&': /* AND */
184 return strfilter_node__compare(self->l, str) &&
185 strfilter_node__compare(self->r, str);
186 case '!': /* NOT */
187 return !strfilter_node__compare(self->r, str);
188 default:
189 return strglobmatch(str, self->p);
190 }
191}
192
193/* Return true if STR matches the filter rules */
194bool strfilter__compare(struct strfilter *self, const char *str)
195{
196 if (!self)
197 return false;
198 return strfilter_node__compare(self->root, str);
199}
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
new file mode 100644
index 000000000000..00f58a7506de
--- /dev/null
+++ b/tools/perf/util/strfilter.h
@@ -0,0 +1,48 @@
1#ifndef __PERF_STRFILTER_H
2#define __PERF_STRFILTER_H
3/* General purpose glob matching filter */
4
5#include <linux/list.h>
6#include <stdbool.h>
7
8/* A node of string filter */
9struct strfilter_node {
10 struct strfilter_node *l; /* Tree left branche (for &,|) */
11 struct strfilter_node *r; /* Tree right branche (for !,&,|) */
12 const char *p; /* Operator or rule */
13};
14
15/* String filter */
16struct strfilter {
17 struct strfilter_node *root;
18};
19
20/**
21 * strfilter__new - Create a new string filter
22 * @rules: Filter rule, which is a combination of glob expressions.
23 * @err: Pointer which points an error detected on @rules
24 *
25 * Parse @rules and return new strfilter. Return NULL if an error detected.
26 * In that case, *@err will indicate where it is detected, and *@err is NULL
27 * if a memory allocation is failed.
28 */
29struct strfilter *strfilter__new(const char *rules, const char **err);
30
31/**
32 * strfilter__compare - compare given string and a string filter
33 * @self: String filter
34 * @str: target string
35 *
36 * Compare @str and @self. Return true if the str match the rule
37 */
38bool strfilter__compare(struct strfilter *self, const char *str);
39
40/**
41 * strfilter__delete - delete a string filter
42 * @self: String filter to delete
43 *
44 * Delete @self.
45 */
46void strfilter__delete(struct strfilter *self);
47
48#endif
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index fb737fe9be91..96c866045d60 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -456,9 +456,9 @@ void svg_legenda(void)
456 return; 456 return;
457 457
458 svg_legenda_box(0, "Running", "sample"); 458 svg_legenda_box(0, "Running", "sample");
459 svg_legenda_box(100, "Idle","rect.c1"); 459 svg_legenda_box(100, "Idle","c1");
460 svg_legenda_box(200, "Deeper Idle", "rect.c3"); 460 svg_legenda_box(200, "Deeper Idle", "c3");
461 svg_legenda_box(350, "Deepest Idle", "rect.c6"); 461 svg_legenda_box(350, "Deepest Idle", "c6");
462 svg_legenda_box(550, "Sleeping", "process2"); 462 svg_legenda_box(550, "Sleeping", "process2");
463 svg_legenda_box(650, "Waiting for cpu", "waiting"); 463 svg_legenda_box(650, "Waiting for cpu", "waiting");
464 svg_legenda_box(800, "Blocked on IO", "blocked"); 464 svg_legenda_box(800, "Blocked on IO", "blocked");
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 7821d0e6866f..00014e32c288 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -207,7 +207,6 @@ struct dso *dso__new(const char *name)
207 dso__set_short_name(self, self->name); 207 dso__set_short_name(self, self->name);
208 for (i = 0; i < MAP__NR_TYPES; ++i) 208 for (i = 0; i < MAP__NR_TYPES; ++i)
209 self->symbols[i] = self->symbol_names[i] = RB_ROOT; 209 self->symbols[i] = self->symbol_names[i] = RB_ROOT;
210 self->slen_calculated = 0;
211 self->origin = DSO__ORIG_NOT_FOUND; 210 self->origin = DSO__ORIG_NOT_FOUND;
212 self->loaded = 0; 211 self->loaded = 0;
213 self->sorted_by_name = 0; 212 self->sorted_by_name = 0;
@@ -1525,8 +1524,8 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
1525 symbol_conf.symfs, self->long_name); 1524 symbol_conf.symfs, self->long_name);
1526 break; 1525 break;
1527 case DSO__ORIG_GUEST_KMODULE: 1526 case DSO__ORIG_GUEST_KMODULE:
1528 if (map->groups && map->groups->machine) 1527 if (map->groups && machine)
1529 root_dir = map->groups->machine->root_dir; 1528 root_dir = machine->root_dir;
1530 else 1529 else
1531 root_dir = ""; 1530 root_dir = "";
1532 snprintf(name, size, "%s%s%s", symbol_conf.symfs, 1531 snprintf(name, size, "%s%s%s", symbol_conf.symfs,
@@ -1836,7 +1835,7 @@ int dso__load_vmlinux(struct dso *self, struct map *map,
1836 int err = -1, fd; 1835 int err = -1, fd;
1837 char symfs_vmlinux[PATH_MAX]; 1836 char symfs_vmlinux[PATH_MAX];
1838 1837
1839 snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s/%s", 1838 snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s%s",
1840 symbol_conf.symfs, vmlinux); 1839 symbol_conf.symfs, vmlinux);
1841 fd = open(symfs_vmlinux, O_RDONLY); 1840 fd = open(symfs_vmlinux, O_RDONLY);
1842 if (fd < 0) 1841 if (fd < 0)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 670cd1c88f54..4d7ed09fe332 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -132,7 +132,6 @@ struct dso {
132 struct rb_root symbol_names[MAP__NR_TYPES]; 132 struct rb_root symbol_names[MAP__NR_TYPES];
133 enum dso_kernel_type kernel; 133 enum dso_kernel_type kernel;
134 u8 adjust_symbols:1; 134 u8 adjust_symbols:1;
135 u8 slen_calculated:1;
136 u8 has_build_id:1; 135 u8 has_build_id:1;
137 u8 hit:1; 136 u8 hit:1;
138 u8 annotate_warned:1; 137 u8 annotate_warned:1;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 00f4eade2e3e..d5d3b22250f3 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,61 +7,6 @@
7#include "util.h" 7#include "util.h"
8#include "debug.h" 8#include "debug.h"
9 9
10/* Skip "." and ".." directories */
11static int filter(const struct dirent *dir)
12{
13 if (dir->d_name[0] == '.')
14 return 0;
15 else
16 return 1;
17}
18
19struct thread_map *thread_map__new_by_pid(pid_t pid)
20{
21 struct thread_map *threads;
22 char name[256];
23 int items;
24 struct dirent **namelist = NULL;
25 int i;
26
27 sprintf(name, "/proc/%d/task", pid);
28 items = scandir(name, &namelist, filter, NULL);
29 if (items <= 0)
30 return NULL;
31
32 threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
33 if (threads != NULL) {
34 for (i = 0; i < items; i++)
35 threads->map[i] = atoi(namelist[i]->d_name);
36 threads->nr = items;
37 }
38
39 for (i=0; i<items; i++)
40 free(namelist[i]);
41 free(namelist);
42
43 return threads;
44}
45
46struct thread_map *thread_map__new_by_tid(pid_t tid)
47{
48 struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
49
50 if (threads != NULL) {
51 threads->map[0] = tid;
52 threads->nr = 1;
53 }
54
55 return threads;
56}
57
58struct thread_map *thread_map__new(pid_t pid, pid_t tid)
59{
60 if (pid != -1)
61 return thread_map__new_by_pid(pid);
62 return thread_map__new_by_tid(tid);
63}
64
65static struct thread *thread__new(pid_t pid) 10static struct thread *thread__new(pid_t pid)
66{ 11{
67 struct thread *self = zalloc(sizeof(*self)); 12 struct thread *self = zalloc(sizeof(*self));
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index d7574101054a..e5f2401c1b5e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,24 +18,10 @@ struct thread {
18 int comm_len; 18 int comm_len;
19}; 19};
20 20
21struct thread_map {
22 int nr;
23 int map[];
24};
25
26struct perf_session; 21struct perf_session;
27 22
28void thread__delete(struct thread *self); 23void thread__delete(struct thread *self);
29 24
30struct thread_map *thread_map__new_by_pid(pid_t pid);
31struct thread_map *thread_map__new_by_tid(pid_t tid);
32struct thread_map *thread_map__new(pid_t pid, pid_t tid);
33
34static inline void thread_map__delete(struct thread_map *threads)
35{
36 free(threads);
37}
38
39int thread__set_comm(struct thread *self, const char *comm); 25int thread__set_comm(struct thread *self, const char *comm);
40int thread__comm_len(struct thread *self); 26int thread__comm_len(struct thread *self);
41struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); 27struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
new file mode 100644
index 000000000000..a5df131b77c3
--- /dev/null
+++ b/tools/perf/util/thread_map.c
@@ -0,0 +1,64 @@
1#include <dirent.h>
2#include <stdlib.h>
3#include <stdio.h>
4#include "thread_map.h"
5
6/* Skip "." and ".." directories */
7static int filter(const struct dirent *dir)
8{
9 if (dir->d_name[0] == '.')
10 return 0;
11 else
12 return 1;
13}
14
15struct thread_map *thread_map__new_by_pid(pid_t pid)
16{
17 struct thread_map *threads;
18 char name[256];
19 int items;
20 struct dirent **namelist = NULL;
21 int i;
22
23 sprintf(name, "/proc/%d/task", pid);
24 items = scandir(name, &namelist, filter, NULL);
25 if (items <= 0)
26 return NULL;
27
28 threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
29 if (threads != NULL) {
30 for (i = 0; i < items; i++)
31 threads->map[i] = atoi(namelist[i]->d_name);
32 threads->nr = items;
33 }
34
35 for (i=0; i<items; i++)
36 free(namelist[i]);
37 free(namelist);
38
39 return threads;
40}
41
42struct thread_map *thread_map__new_by_tid(pid_t tid)
43{
44 struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
45
46 if (threads != NULL) {
47 threads->map[0] = tid;
48 threads->nr = 1;
49 }
50
51 return threads;
52}
53
54struct thread_map *thread_map__new(pid_t pid, pid_t tid)
55{
56 if (pid != -1)
57 return thread_map__new_by_pid(pid);
58 return thread_map__new_by_tid(tid);
59}
60
61void thread_map__delete(struct thread_map *threads)
62{
63 free(threads);
64}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
new file mode 100644
index 000000000000..3cb907311409
--- /dev/null
+++ b/tools/perf/util/thread_map.h
@@ -0,0 +1,15 @@
1#ifndef __PERF_THREAD_MAP_H
2#define __PERF_THREAD_MAP_H
3
4#include <sys/types.h>
5
6struct thread_map {
7 int nr;
8 int map[];
9};
10
11struct thread_map *thread_map__new_by_pid(pid_t pid);
12struct thread_map *thread_map__new_by_tid(pid_t tid);
13struct thread_map *thread_map__new(pid_t pid, pid_t tid);
14void thread_map__delete(struct thread_map *threads);
15#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
new file mode 100644
index 000000000000..75cfe4d45119
--- /dev/null
+++ b/tools/perf/util/top.c
@@ -0,0 +1,238 @@
1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Refactored from builtin-top.c, see that files for further copyright notes.
5 *
6 * Released under the GPL v2. (and only v2, not any later version)
7 */
8
9#include "cpumap.h"
10#include "event.h"
11#include "evlist.h"
12#include "evsel.h"
13#include "parse-events.h"
14#include "symbol.h"
15#include "top.h"
16#include <inttypes.h>
17
18/*
19 * Ordering weight: count-1 * count-2 * ... / count-n
20 */
21static double sym_weight(const struct sym_entry *sym, struct perf_top *top)
22{
23 double weight = sym->snap_count;
24 int counter;
25
26 if (!top->display_weighted)
27 return weight;
28
29 for (counter = 1; counter < top->evlist->nr_entries - 1; counter++)
30 weight *= sym->count[counter];
31
32 weight /= (sym->count[counter] + 1);
33
34 return weight;
35}
36
37static void perf_top__remove_active_sym(struct perf_top *top, struct sym_entry *syme)
38{
39 pthread_mutex_lock(&top->active_symbols_lock);
40 list_del_init(&syme->node);
41 pthread_mutex_unlock(&top->active_symbols_lock);
42}
43
44static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
45{
46 struct rb_node **p = &tree->rb_node;
47 struct rb_node *parent = NULL;
48 struct sym_entry *iter;
49
50 while (*p != NULL) {
51 parent = *p;
52 iter = rb_entry(parent, struct sym_entry, rb_node);
53
54 if (se->weight > iter->weight)
55 p = &(*p)->rb_left;
56 else
57 p = &(*p)->rb_right;
58 }
59
60 rb_link_node(&se->rb_node, parent, p);
61 rb_insert_color(&se->rb_node, tree);
62}
63
64#define SNPRINTF(buf, size, fmt, args...) \
65({ \
66 size_t r = snprintf(buf, size, fmt, ## args); \
67 r > size ? size : r; \
68})
69
70size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
71{
72 struct perf_evsel *counter;
73 float samples_per_sec = top->samples / top->delay_secs;
74 float ksamples_per_sec = top->kernel_samples / top->delay_secs;
75 float esamples_percent = (100.0 * top->exact_samples) / top->samples;
76 size_t ret = 0;
77
78 if (!perf_guest) {
79 ret = SNPRINTF(bf, size,
80 " PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
81 " exact: %4.1f%% [", samples_per_sec,
82 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
83 samples_per_sec)),
84 esamples_percent);
85 } else {
86 float us_samples_per_sec = top->us_samples / top->delay_secs;
87 float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs;
88 float guest_us_samples_per_sec = top->guest_us_samples / top->delay_secs;
89
90 ret = SNPRINTF(bf, size,
91 " PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
92 " guest kernel:%4.1f%% guest us:%4.1f%%"
93 " exact: %4.1f%% [", samples_per_sec,
94 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
95 samples_per_sec)),
96 100.0 - (100.0 * ((samples_per_sec - us_samples_per_sec) /
97 samples_per_sec)),
98 100.0 - (100.0 * ((samples_per_sec -
99 guest_kernel_samples_per_sec) /
100 samples_per_sec)),
101 100.0 - (100.0 * ((samples_per_sec -
102 guest_us_samples_per_sec) /
103 samples_per_sec)),
104 esamples_percent);
105 }
106
107 if (top->evlist->nr_entries == 1 || !top->display_weighted) {
108 struct perf_evsel *first;
109 first = list_entry(top->evlist->entries.next, struct perf_evsel, node);
110 ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
111 (uint64_t)first->attr.sample_period,
112 top->freq ? "Hz" : "");
113 }
114
115 if (!top->display_weighted) {
116 ret += SNPRINTF(bf + ret, size - ret, "%s",
117 event_name(top->sym_evsel));
118 } else {
119 /*
120 * Don't let events eat all the space. Leaving 30 bytes
121 * for the rest should be enough.
122 */
123 size_t last_pos = size - 30;
124
125 list_for_each_entry(counter, &top->evlist->entries, node) {
126 ret += SNPRINTF(bf + ret, size - ret, "%s%s",
127 counter->idx ? "/" : "",
128 event_name(counter));
129 if (ret > last_pos) {
130 sprintf(bf + last_pos - 3, "..");
131 ret = last_pos - 1;
132 break;
133 }
134 }
135 }
136
137 ret += SNPRINTF(bf + ret, size - ret, "], ");
138
139 if (top->target_pid != -1)
140 ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d",
141 top->target_pid);
142 else if (top->target_tid != -1)
143 ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d",
144 top->target_tid);
145 else
146 ret += SNPRINTF(bf + ret, size - ret, " (all");
147
148 if (top->cpu_list)
149 ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
150 top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list);
151 else {
152 if (top->target_tid != -1)
153 ret += SNPRINTF(bf + ret, size - ret, ")");
154 else
155 ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
156 top->evlist->cpus->nr,
157 top->evlist->cpus->nr > 1 ? "s" : "");
158 }
159
160 return ret;
161}
162
163void perf_top__reset_sample_counters(struct perf_top *top)
164{
165 top->samples = top->us_samples = top->kernel_samples =
166 top->exact_samples = top->guest_kernel_samples =
167 top->guest_us_samples = 0;
168}
169
170float perf_top__decay_samples(struct perf_top *top, struct rb_root *root)
171{
172 struct sym_entry *syme, *n;
173 float sum_ksamples = 0.0;
174 int snap = !top->display_weighted ? top->sym_counter : 0, j;
175
176 /* Sort the active symbols */
177 pthread_mutex_lock(&top->active_symbols_lock);
178 syme = list_entry(top->active_symbols.next, struct sym_entry, node);
179 pthread_mutex_unlock(&top->active_symbols_lock);
180
181 top->rb_entries = 0;
182 list_for_each_entry_safe_from(syme, n, &top->active_symbols, node) {
183 syme->snap_count = syme->count[snap];
184 if (syme->snap_count != 0) {
185
186 if ((top->hide_user_symbols &&
187 syme->origin == PERF_RECORD_MISC_USER) ||
188 (top->hide_kernel_symbols &&
189 syme->origin == PERF_RECORD_MISC_KERNEL)) {
190 perf_top__remove_active_sym(top, syme);
191 continue;
192 }
193 syme->weight = sym_weight(syme, top);
194
195 if ((int)syme->snap_count >= top->count_filter) {
196 rb_insert_active_sym(root, syme);
197 ++top->rb_entries;
198 }
199 sum_ksamples += syme->snap_count;
200
201 for (j = 0; j < top->evlist->nr_entries; j++)
202 syme->count[j] = top->zero ? 0 : syme->count[j] * 7 / 8;
203 } else
204 perf_top__remove_active_sym(top, syme);
205 }
206
207 return sum_ksamples;
208}
209
210/*
211 * Find the longest symbol name that will be displayed
212 */
213void perf_top__find_widths(struct perf_top *top, struct rb_root *root,
214 int *dso_width, int *dso_short_width, int *sym_width)
215{
216 struct rb_node *nd;
217 int printed = 0;
218
219 *sym_width = *dso_width = *dso_short_width = 0;
220
221 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
222 struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node);
223 struct symbol *sym = sym_entry__symbol(syme);
224
225 if (++printed > top->print_entries ||
226 (int)syme->snap_count < top->count_filter)
227 continue;
228
229 if (syme->map->dso->long_name_len > *dso_width)
230 *dso_width = syme->map->dso->long_name_len;
231
232 if (syme->map->dso->short_name_len > *dso_short_width)
233 *dso_short_width = syme->map->dso->short_name_len;
234
235 if (sym->namelen > *sym_width)
236 *sym_width = sym->namelen;
237 }
238}
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
new file mode 100644
index 000000000000..96d1cb78af01
--- /dev/null
+++ b/tools/perf/util/top.h
@@ -0,0 +1,66 @@
1#ifndef __PERF_TOP_H
2#define __PERF_TOP_H 1
3
4#include "types.h"
5#include "../perf.h"
6#include <stddef.h>
7#include <pthread.h>
8#include <linux/list.h>
9#include <linux/rbtree.h>
10
11struct perf_evlist;
12struct perf_evsel;
13
14struct sym_entry {
15 struct rb_node rb_node;
16 struct list_head node;
17 unsigned long snap_count;
18 double weight;
19 int skip;
20 u8 origin;
21 struct map *map;
22 unsigned long count[0];
23};
24
25static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
26{
27 return ((void *)self) + symbol_conf.priv_size;
28}
29
30struct perf_top {
31 struct perf_evlist *evlist;
32 /*
33 * Symbols will be added here in perf_event__process_sample and will
34 * get out after decayed.
35 */
36 struct list_head active_symbols;
37 pthread_mutex_t active_symbols_lock;
38 pthread_cond_t active_symbols_cond;
39 u64 samples;
40 u64 kernel_samples, us_samples;
41 u64 exact_samples;
42 u64 guest_us_samples, guest_kernel_samples;
43 int print_entries, count_filter, delay_secs;
44 int display_weighted, freq, rb_entries, sym_counter;
45 pid_t target_pid, target_tid;
46 bool hide_kernel_symbols, hide_user_symbols, zero;
47 const char *cpu_list;
48 struct sym_entry *sym_filter_entry;
49 struct perf_evsel *sym_evsel;
50};
51
52size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
53void perf_top__reset_sample_counters(struct perf_top *top);
54float perf_top__decay_samples(struct perf_top *top, struct rb_root *root);
55void perf_top__find_widths(struct perf_top *top, struct rb_root *root,
56 int *dso_width, int *dso_short_width, int *sym_width);
57
58#ifdef NO_NEWT_SUPPORT
59static inline int perf_top__tui_browser(struct perf_top *top __used)
60{
61 return 0;
62}
63#else
64int perf_top__tui_browser(struct perf_top *top);
65#endif
66#endif /* __PERF_TOP_H */
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 73a02223c629..d8e622dd738a 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -153,7 +153,7 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused)
153 char *next = NULL; 153 char *next = NULL;
154 char *addr_str; 154 char *addr_str;
155 char ch; 155 char ch;
156 int ret; 156 int ret __used;
157 int i; 157 int i;
158 158
159 line = strtok_r(file, "\n", &next); 159 line = strtok_r(file, "\n", &next);
diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c
index 8bc010edca25..611219f80680 100644
--- a/tools/perf/util/ui/browser.c
+++ b/tools/perf/util/ui/browser.c
@@ -1,4 +1,5 @@
1#include "libslang.h" 1#include "libslang.h"
2#include "ui.h"
2#include <linux/compiler.h> 3#include <linux/compiler.h>
3#include <linux/list.h> 4#include <linux/list.h>
4#include <linux/rbtree.h> 5#include <linux/rbtree.h>
@@ -156,6 +157,20 @@ void ui_browser__add_exit_keys(struct ui_browser *self, int keys[])
156 } 157 }
157} 158}
158 159
160void __ui_browser__show_title(struct ui_browser *browser, const char *title)
161{
162 SLsmg_gotorc(0, 0);
163 ui_browser__set_color(browser, NEWT_COLORSET_ROOT);
164 slsmg_write_nstring(title, browser->width);
165}
166
167void ui_browser__show_title(struct ui_browser *browser, const char *title)
168{
169 pthread_mutex_lock(&ui__lock);
170 __ui_browser__show_title(browser, title);
171 pthread_mutex_unlock(&ui__lock);
172}
173
159int ui_browser__show(struct ui_browser *self, const char *title, 174int ui_browser__show(struct ui_browser *self, const char *title,
160 const char *helpline, ...) 175 const char *helpline, ...)
161{ 176{
@@ -178,9 +193,8 @@ int ui_browser__show(struct ui_browser *self, const char *title,
178 if (self->sb == NULL) 193 if (self->sb == NULL)
179 return -1; 194 return -1;
180 195
181 SLsmg_gotorc(0, 0); 196 pthread_mutex_lock(&ui__lock);
182 ui_browser__set_color(self, NEWT_COLORSET_ROOT); 197 __ui_browser__show_title(self, title);
183 slsmg_write_nstring(title, self->width);
184 198
185 ui_browser__add_exit_keys(self, keys); 199 ui_browser__add_exit_keys(self, keys);
186 newtFormAddComponent(self->form, self->sb); 200 newtFormAddComponent(self->form, self->sb);
@@ -188,25 +202,30 @@ int ui_browser__show(struct ui_browser *self, const char *title,
188 va_start(ap, helpline); 202 va_start(ap, helpline);
189 ui_helpline__vpush(helpline, ap); 203 ui_helpline__vpush(helpline, ap);
190 va_end(ap); 204 va_end(ap);
205 pthread_mutex_unlock(&ui__lock);
191 return 0; 206 return 0;
192} 207}
193 208
194void ui_browser__hide(struct ui_browser *self) 209void ui_browser__hide(struct ui_browser *self)
195{ 210{
211 pthread_mutex_lock(&ui__lock);
196 newtFormDestroy(self->form); 212 newtFormDestroy(self->form);
197 self->form = NULL; 213 self->form = NULL;
198 ui_helpline__pop(); 214 ui_helpline__pop();
215 pthread_mutex_unlock(&ui__lock);
199} 216}
200 217
201int ui_browser__refresh(struct ui_browser *self) 218int ui_browser__refresh(struct ui_browser *self)
202{ 219{
203 int row; 220 int row;
204 221
222 pthread_mutex_lock(&ui__lock);
205 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); 223 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1);
206 row = self->refresh(self); 224 row = self->refresh(self);
207 ui_browser__set_color(self, HE_COLORSET_NORMAL); 225 ui_browser__set_color(self, HE_COLORSET_NORMAL);
208 SLsmg_fill_region(self->y + row, self->x, 226 SLsmg_fill_region(self->y + row, self->x,
209 self->height - row, self->width, ' '); 227 self->height - row, self->width, ' ');
228 pthread_mutex_unlock(&ui__lock);
210 229
211 return 0; 230 return 0;
212} 231}
diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h
index 0dc7e4da36f5..fc63dda10910 100644
--- a/tools/perf/util/ui/browser.h
+++ b/tools/perf/util/ui/browser.h
@@ -24,7 +24,6 @@ struct ui_browser {
24 u32 nr_entries; 24 u32 nr_entries;
25}; 25};
26 26
27
28void ui_browser__set_color(struct ui_browser *self, int color); 27void ui_browser__set_color(struct ui_browser *self, int color);
29void ui_browser__set_percent_color(struct ui_browser *self, 28void ui_browser__set_percent_color(struct ui_browser *self,
30 double percent, bool current); 29 double percent, bool current);
@@ -35,6 +34,8 @@ void ui_browser__reset_index(struct ui_browser *self);
35void ui_browser__gotorc(struct ui_browser *self, int y, int x); 34void ui_browser__gotorc(struct ui_browser *self, int y, int x);
36void ui_browser__add_exit_key(struct ui_browser *self, int key); 35void ui_browser__add_exit_key(struct ui_browser *self, int key);
37void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]); 36void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]);
37void __ui_browser__show_title(struct ui_browser *browser, const char *title);
38void ui_browser__show_title(struct ui_browser *browser, const char *title);
38int ui_browser__show(struct ui_browser *self, const char *title, 39int ui_browser__show(struct ui_browser *self, const char *title,
39 const char *helpline, ...); 40 const char *helpline, ...);
40void ui_browser__hide(struct ui_browser *self); 41void ui_browser__hide(struct ui_browser *self);
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index 82b78f99251b..8c17a8730e4a 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -1,9 +1,12 @@
1#include "../browser.h" 1#include "../browser.h"
2#include "../helpline.h" 2#include "../helpline.h"
3#include "../libslang.h" 3#include "../libslang.h"
4#include "../../annotate.h"
4#include "../../hist.h" 5#include "../../hist.h"
5#include "../../sort.h" 6#include "../../sort.h"
6#include "../../symbol.h" 7#include "../../symbol.h"
8#include "../../annotate.h"
9#include <pthread.h>
7 10
8static void ui__error_window(const char *fmt, ...) 11static void ui__error_window(const char *fmt, ...)
9{ 12{
@@ -42,8 +45,6 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
42 struct objdump_line_rb_node *olrb = objdump_line__rb(ol); 45 struct objdump_line_rb_node *olrb = objdump_line__rb(ol);
43 ui_browser__set_percent_color(self, olrb->percent, current_entry); 46 ui_browser__set_percent_color(self, olrb->percent, current_entry);
44 slsmg_printf(" %7.2f ", olrb->percent); 47 slsmg_printf(" %7.2f ", olrb->percent);
45 if (!current_entry)
46 ui_browser__set_color(self, HE_COLORSET_CODE);
47 } else { 48 } else {
48 ui_browser__set_percent_color(self, 0, current_entry); 49 ui_browser__set_percent_color(self, 0, current_entry);
49 slsmg_write_nstring(" ", 9); 50 slsmg_write_nstring(" ", 9);
@@ -55,35 +56,40 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
55 slsmg_write_nstring(" ", width - 18); 56 slsmg_write_nstring(" ", width - 18);
56 else 57 else
57 slsmg_write_nstring(ol->line, width - 18); 58 slsmg_write_nstring(ol->line, width - 18);
59
60 if (!current_entry)
61 ui_browser__set_color(self, HE_COLORSET_CODE);
58} 62}
59 63
60static double objdump_line__calc_percent(struct objdump_line *self, 64static double objdump_line__calc_percent(struct objdump_line *self,
61 struct list_head *head, 65 struct symbol *sym, int evidx)
62 struct symbol *sym)
63{ 66{
64 double percent = 0.0; 67 double percent = 0.0;
65 68
66 if (self->offset != -1) { 69 if (self->offset != -1) {
67 int len = sym->end - sym->start; 70 int len = sym->end - sym->start;
68 unsigned int hits = 0; 71 unsigned int hits = 0;
69 struct sym_priv *priv = symbol__priv(sym); 72 struct annotation *notes = symbol__annotation(sym);
70 struct sym_ext *sym_ext = priv->ext; 73 struct source_line *src_line = notes->src->lines;
71 struct sym_hist *h = priv->hist; 74 struct sym_hist *h = annotation__histogram(notes, evidx);
72 s64 offset = self->offset; 75 s64 offset = self->offset;
73 struct objdump_line *next = objdump__get_next_ip_line(head, self); 76 struct objdump_line *next;
74
75 77
78 next = objdump__get_next_ip_line(&notes->src->source, self);
76 while (offset < (s64)len && 79 while (offset < (s64)len &&
77 (next == NULL || offset < next->offset)) { 80 (next == NULL || offset < next->offset)) {
78 if (sym_ext) { 81 if (src_line) {
79 percent += sym_ext[offset].percent; 82 percent += src_line[offset].percent;
80 } else 83 } else
81 hits += h->ip[offset]; 84 hits += h->addr[offset];
82 85
83 ++offset; 86 ++offset;
84 } 87 }
85 88 /*
86 if (sym_ext == NULL && h->sum) 89 * If the percentage wasn't already calculated in
90 * symbol__get_source_line, do it now:
91 */
92 if (src_line == NULL && h->sum)
87 percent = 100.0 * hits / h->sum; 93 percent = 100.0 * hits / h->sum;
88 } 94 }
89 95
@@ -133,103 +139,161 @@ static void annotate_browser__set_top(struct annotate_browser *self,
133 self->curr_hot = nd; 139 self->curr_hot = nd;
134} 140}
135 141
136static int annotate_browser__run(struct annotate_browser *self) 142static void annotate_browser__calc_percent(struct annotate_browser *browser,
143 int evidx)
137{ 144{
138 struct rb_node *nd; 145 struct symbol *sym = browser->b.priv;
139 struct hist_entry *he = self->b.priv; 146 struct annotation *notes = symbol__annotation(sym);
140 int key; 147 struct objdump_line *pos;
141 148
142 if (ui_browser__show(&self->b, he->ms.sym->name, 149 browser->entries = RB_ROOT;
143 "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) 150
144 return -1; 151 pthread_mutex_lock(&notes->lock);
152
153 list_for_each_entry(pos, &notes->src->source, node) {
154 struct objdump_line_rb_node *rbpos = objdump_line__rb(pos);
155 rbpos->percent = objdump_line__calc_percent(pos, sym, evidx);
156 if (rbpos->percent < 0.01) {
157 RB_CLEAR_NODE(&rbpos->rb_node);
158 continue;
159 }
160 objdump__insert_line(&browser->entries, rbpos);
161 }
162 pthread_mutex_unlock(&notes->lock);
163
164 browser->curr_hot = rb_last(&browser->entries);
165}
166
167static int annotate_browser__run(struct annotate_browser *self, int evidx,
168 int refresh)
169{
170 struct rb_node *nd = NULL;
171 struct symbol *sym = self->b.priv;
145 /* 172 /*
146 * To allow builtin-annotate to cycle thru multiple symbols by 173 * RIGHT To allow builtin-annotate to cycle thru multiple symbols by
147 * examining the exit key for this function. 174 * examining the exit key for this function.
148 */ 175 */
149 ui_browser__add_exit_key(&self->b, NEWT_KEY_RIGHT); 176 int exit_keys[] = { 'H', NEWT_KEY_TAB, NEWT_KEY_UNTAB,
177 NEWT_KEY_RIGHT, 0 };
178 int key;
179
180 if (ui_browser__show(&self->b, sym->name,
181 "<-, -> or ESC: exit, TAB/shift+TAB: "
182 "cycle hottest lines, H: Hottest") < 0)
183 return -1;
184
185 ui_browser__add_exit_keys(&self->b, exit_keys);
186 annotate_browser__calc_percent(self, evidx);
187
188 if (self->curr_hot)
189 annotate_browser__set_top(self, self->curr_hot);
150 190
151 nd = self->curr_hot; 191 nd = self->curr_hot;
152 if (nd) { 192
153 int tabs[] = { NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0 }; 193 if (refresh != 0)
154 ui_browser__add_exit_keys(&self->b, tabs); 194 newtFormSetTimer(self->b.form, refresh);
155 }
156 195
157 while (1) { 196 while (1) {
158 key = ui_browser__run(&self->b); 197 key = ui_browser__run(&self->b);
159 198
199 if (refresh != 0) {
200 annotate_browser__calc_percent(self, evidx);
201 /*
202 * Current line focus got out of the list of most active
203 * lines, NULL it so that if TAB|UNTAB is pressed, we
204 * move to curr_hot (current hottest line).
205 */
206 if (nd != NULL && RB_EMPTY_NODE(nd))
207 nd = NULL;
208 }
209
160 switch (key) { 210 switch (key) {
211 case -1:
212 /*
213 * FIXME we need to check if it was
214 * es.reason == NEWT_EXIT_TIMER
215 */
216 if (refresh != 0)
217 symbol__annotate_decay_histogram(sym, evidx);
218 continue;
161 case NEWT_KEY_TAB: 219 case NEWT_KEY_TAB:
162 nd = rb_prev(nd); 220 if (nd != NULL) {
163 if (nd == NULL) 221 nd = rb_prev(nd);
164 nd = rb_last(&self->entries); 222 if (nd == NULL)
165 annotate_browser__set_top(self, nd); 223 nd = rb_last(&self->entries);
224 } else
225 nd = self->curr_hot;
166 break; 226 break;
167 case NEWT_KEY_UNTAB: 227 case NEWT_KEY_UNTAB:
168 nd = rb_next(nd); 228 if (nd != NULL)
169 if (nd == NULL) 229 nd = rb_next(nd);
170 nd = rb_first(&self->entries); 230 if (nd == NULL)
171 annotate_browser__set_top(self, nd); 231 nd = rb_first(&self->entries);
232 else
233 nd = self->curr_hot;
234 break;
235 case 'H':
236 nd = self->curr_hot;
172 break; 237 break;
173 default: 238 default:
174 goto out; 239 goto out;
175 } 240 }
241
242 if (nd != NULL)
243 annotate_browser__set_top(self, nd);
176 } 244 }
177out: 245out:
178 ui_browser__hide(&self->b); 246 ui_browser__hide(&self->b);
179 return key; 247 return key;
180} 248}
181 249
182int hist_entry__tui_annotate(struct hist_entry *self) 250int hist_entry__tui_annotate(struct hist_entry *he, int evidx)
251{
252 return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, 0);
253}
254
255int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
256 int refresh)
183{ 257{
184 struct objdump_line *pos, *n; 258 struct objdump_line *pos, *n;
185 struct objdump_line_rb_node *rbpos; 259 struct annotation *notes = symbol__annotation(sym);
186 LIST_HEAD(head);
187 struct annotate_browser browser = { 260 struct annotate_browser browser = {
188 .b = { 261 .b = {
189 .entries = &head, 262 .entries = &notes->src->source,
190 .refresh = ui_browser__list_head_refresh, 263 .refresh = ui_browser__list_head_refresh,
191 .seek = ui_browser__list_head_seek, 264 .seek = ui_browser__list_head_seek,
192 .write = annotate_browser__write, 265 .write = annotate_browser__write,
193 .priv = self, 266 .priv = sym,
194 }, 267 },
195 }; 268 };
196 int ret; 269 int ret;
197 270
198 if (self->ms.sym == NULL) 271 if (sym == NULL)
199 return -1; 272 return -1;
200 273
201 if (self->ms.map->dso->annotate_warned) 274 if (map->dso->annotate_warned)
202 return -1; 275 return -1;
203 276
204 if (hist_entry__annotate(self, &head, sizeof(*rbpos)) < 0) { 277 if (symbol__annotate(sym, map, sizeof(struct objdump_line_rb_node)) < 0) {
205 ui__error_window(ui_helpline__last_msg); 278 ui__error_window(ui_helpline__last_msg);
206 return -1; 279 return -1;
207 } 280 }
208 281
209 ui_helpline__push("Press <- or ESC to exit"); 282 ui_helpline__push("Press <- or ESC to exit");
210 283
211 list_for_each_entry(pos, &head, node) { 284 list_for_each_entry(pos, &notes->src->source, node) {
285 struct objdump_line_rb_node *rbpos;
212 size_t line_len = strlen(pos->line); 286 size_t line_len = strlen(pos->line);
287
213 if (browser.b.width < line_len) 288 if (browser.b.width < line_len)
214 browser.b.width = line_len; 289 browser.b.width = line_len;
215 rbpos = objdump_line__rb(pos); 290 rbpos = objdump_line__rb(pos);
216 rbpos->idx = browser.b.nr_entries++; 291 rbpos->idx = browser.b.nr_entries++;
217 rbpos->percent = objdump_line__calc_percent(pos, &head, self->ms.sym);
218 if (rbpos->percent < 0.01)
219 continue;
220 objdump__insert_line(&browser.entries, rbpos);
221 } 292 }
222 293
223 /*
224 * Position the browser at the hottest line.
225 */
226 browser.curr_hot = rb_last(&browser.entries);
227 if (browser.curr_hot)
228 annotate_browser__set_top(&browser, browser.curr_hot);
229
230 browser.b.width += 18; /* Percentage */ 294 browser.b.width += 18; /* Percentage */
231 ret = annotate_browser__run(&browser); 295 ret = annotate_browser__run(&browser, evidx, refresh);
232 list_for_each_entry_safe(pos, n, &head, node) { 296 list_for_each_entry_safe(pos, n, &notes->src->source, node) {
233 list_del(&pos->node); 297 list_del(&pos->node);
234 objdump_line__free(pos); 298 objdump_line__free(pos);
235 } 299 }
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index 60c463c16028..798efdca3ead 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -7,6 +7,8 @@
7#include <newt.h> 7#include <newt.h>
8#include <linux/rbtree.h> 8#include <linux/rbtree.h>
9 9
10#include "../../evsel.h"
11#include "../../evlist.h"
10#include "../../hist.h" 12#include "../../hist.h"
11#include "../../pstack.h" 13#include "../../pstack.h"
12#include "../../sort.h" 14#include "../../sort.h"
@@ -292,7 +294,8 @@ static int hist_browser__run(struct hist_browser *self, const char *title)
292{ 294{
293 int key; 295 int key;
294 int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't', 296 int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't',
295 NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, 0, }; 297 NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT,
298 NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0, };
296 299
297 self->b.entries = &self->hists->entries; 300 self->b.entries = &self->hists->entries;
298 self->b.nr_entries = self->hists->nr_entries; 301 self->b.nr_entries = self->hists->nr_entries;
@@ -377,7 +380,7 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
377 while (node) { 380 while (node) {
378 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); 381 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
379 struct rb_node *next = rb_next(node); 382 struct rb_node *next = rb_next(node);
380 u64 cumul = cumul_hits(child); 383 u64 cumul = callchain_cumul_hits(child);
381 struct callchain_list *chain; 384 struct callchain_list *chain;
382 char folded_sign = ' '; 385 char folded_sign = ' ';
383 int first = true; 386 int first = true;
@@ -638,6 +641,9 @@ static void ui_browser__hists_seek(struct ui_browser *self,
638 struct rb_node *nd; 641 struct rb_node *nd;
639 bool first = true; 642 bool first = true;
640 643
644 if (self->nr_entries == 0)
645 return;
646
641 switch (whence) { 647 switch (whence) {
642 case SEEK_SET: 648 case SEEK_SET:
643 nd = hists__filter_entries(rb_first(self->entries)); 649 nd = hists__filter_entries(rb_first(self->entries));
@@ -797,8 +803,11 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
797 return printed; 803 return printed;
798} 804}
799 805
800int hists__browse(struct hists *self, const char *helpline, const char *ev_name) 806static int perf_evsel__hists_browse(struct perf_evsel *evsel,
807 const char *helpline, const char *ev_name,
808 bool left_exits)
801{ 809{
810 struct hists *self = &evsel->hists;
802 struct hist_browser *browser = hist_browser__new(self); 811 struct hist_browser *browser = hist_browser__new(self);
803 struct pstack *fstack; 812 struct pstack *fstack;
804 const struct thread *thread_filter = NULL; 813 const struct thread *thread_filter = NULL;
@@ -818,8 +827,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
818 hists__browser_title(self, msg, sizeof(msg), ev_name, 827 hists__browser_title(self, msg, sizeof(msg), ev_name,
819 dso_filter, thread_filter); 828 dso_filter, thread_filter);
820 while (1) { 829 while (1) {
821 const struct thread *thread; 830 const struct thread *thread = NULL;
822 const struct dso *dso; 831 const struct dso *dso = NULL;
823 char *options[16]; 832 char *options[16];
824 int nr_options = 0, choice = 0, i, 833 int nr_options = 0, choice = 0, i,
825 annotate = -2, zoom_dso = -2, zoom_thread = -2, 834 annotate = -2, zoom_dso = -2, zoom_thread = -2,
@@ -827,8 +836,10 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
827 836
828 key = hist_browser__run(browser, msg); 837 key = hist_browser__run(browser, msg);
829 838
830 thread = hist_browser__selected_thread(browser); 839 if (browser->he_selection != NULL) {
831 dso = browser->selection->map ? browser->selection->map->dso : NULL; 840 thread = hist_browser__selected_thread(browser);
841 dso = browser->selection->map ? browser->selection->map->dso : NULL;
842 }
832 843
833 switch (key) { 844 switch (key) {
834 case NEWT_KEY_TAB: 845 case NEWT_KEY_TAB:
@@ -839,7 +850,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
839 */ 850 */
840 goto out_free_stack; 851 goto out_free_stack;
841 case 'a': 852 case 'a':
842 if (browser->selection->map == NULL && 853 if (browser->selection == NULL ||
854 browser->selection->map == NULL ||
843 browser->selection->map->dso->annotate_warned) 855 browser->selection->map->dso->annotate_warned)
844 continue; 856 continue;
845 goto do_annotate; 857 goto do_annotate;
@@ -858,6 +870,7 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
858 "E Expand all callchains\n" 870 "E Expand all callchains\n"
859 "d Zoom into current DSO\n" 871 "d Zoom into current DSO\n"
860 "t Zoom into current Thread\n" 872 "t Zoom into current Thread\n"
873 "TAB/UNTAB Switch events\n"
861 "q/CTRL+C Exit browser"); 874 "q/CTRL+C Exit browser");
862 continue; 875 continue;
863 case NEWT_KEY_ENTER: 876 case NEWT_KEY_ENTER:
@@ -867,8 +880,14 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
867 case NEWT_KEY_LEFT: { 880 case NEWT_KEY_LEFT: {
868 const void *top; 881 const void *top;
869 882
870 if (pstack__empty(fstack)) 883 if (pstack__empty(fstack)) {
884 /*
885 * Go back to the perf_evsel_menu__run or other user
886 */
887 if (left_exits)
888 goto out_free_stack;
871 continue; 889 continue;
890 }
872 top = pstack__pop(fstack); 891 top = pstack__pop(fstack);
873 if (top == &dso_filter) 892 if (top == &dso_filter)
874 goto zoom_out_dso; 893 goto zoom_out_dso;
@@ -877,14 +896,16 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
877 continue; 896 continue;
878 } 897 }
879 case NEWT_KEY_ESCAPE: 898 case NEWT_KEY_ESCAPE:
880 if (!ui__dialog_yesno("Do you really want to exit?")) 899 if (!left_exits &&
900 !ui__dialog_yesno("Do you really want to exit?"))
881 continue; 901 continue;
882 /* Fall thru */ 902 /* Fall thru */
883 default: 903 default:
884 goto out_free_stack; 904 goto out_free_stack;
885 } 905 }
886 906
887 if (browser->selection->sym != NULL && 907 if (browser->selection != NULL &&
908 browser->selection->sym != NULL &&
888 !browser->selection->map->dso->annotate_warned && 909 !browser->selection->map->dso->annotate_warned &&
889 asprintf(&options[nr_options], "Annotate %s", 910 asprintf(&options[nr_options], "Annotate %s",
890 browser->selection->sym->name) > 0) 911 browser->selection->sym->name) > 0)
@@ -903,7 +924,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
903 (dso->kernel ? "the Kernel" : dso->short_name)) > 0) 924 (dso->kernel ? "the Kernel" : dso->short_name)) > 0)
904 zoom_dso = nr_options++; 925 zoom_dso = nr_options++;
905 926
906 if (browser->selection->map != NULL && 927 if (browser->selection != NULL &&
928 browser->selection->map != NULL &&
907 asprintf(&options[nr_options], "Browse map details") > 0) 929 asprintf(&options[nr_options], "Browse map details") > 0)
908 browse_map = nr_options++; 930 browse_map = nr_options++;
909 931
@@ -923,19 +945,11 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
923 if (choice == annotate) { 945 if (choice == annotate) {
924 struct hist_entry *he; 946 struct hist_entry *he;
925do_annotate: 947do_annotate:
926 if (browser->selection->map->dso->origin == DSO__ORIG_KERNEL) {
927 browser->selection->map->dso->annotate_warned = 1;
928 ui_helpline__puts("No vmlinux file found, can't "
929 "annotate with just a "
930 "kallsyms file");
931 continue;
932 }
933
934 he = hist_browser__selected_entry(browser); 948 he = hist_browser__selected_entry(browser);
935 if (he == NULL) 949 if (he == NULL)
936 continue; 950 continue;
937 951
938 hist_entry__tui_annotate(he); 952 hist_entry__tui_annotate(he, evsel->idx);
939 } else if (choice == browse_map) 953 } else if (choice == browse_map)
940 map__browse(browser->selection->map); 954 map__browse(browser->selection->map);
941 else if (choice == zoom_dso) { 955 else if (choice == zoom_dso) {
@@ -984,30 +998,141 @@ out:
984 return key; 998 return key;
985} 999}
986 1000
987int hists__tui_browse_tree(struct rb_root *self, const char *help) 1001struct perf_evsel_menu {
1002 struct ui_browser b;
1003 struct perf_evsel *selection;
1004};
1005
1006static void perf_evsel_menu__write(struct ui_browser *browser,
1007 void *entry, int row)
1008{
1009 struct perf_evsel_menu *menu = container_of(browser,
1010 struct perf_evsel_menu, b);
1011 struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
1012 bool current_entry = ui_browser__is_current_entry(browser, row);
1013 unsigned long nr_events = evsel->hists.stats.nr_events[PERF_RECORD_SAMPLE];
1014 const char *ev_name = event_name(evsel);
1015 char bf[256], unit;
1016
1017 ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
1018 HE_COLORSET_NORMAL);
1019
1020 nr_events = convert_unit(nr_events, &unit);
1021 snprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
1022 unit, unit == ' ' ? "" : " ", ev_name);
1023 slsmg_write_nstring(bf, browser->width);
1024
1025 if (current_entry)
1026 menu->selection = evsel;
1027}
1028
1029static int perf_evsel_menu__run(struct perf_evsel_menu *menu, const char *help)
988{ 1030{
989 struct rb_node *first = rb_first(self), *nd = first, *next; 1031 int exit_keys[] = { NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, };
990 int key = 0; 1032 struct perf_evlist *evlist = menu->b.priv;
1033 struct perf_evsel *pos;
1034 const char *ev_name, *title = "Available samples";
1035 int key;
1036
1037 if (ui_browser__show(&menu->b, title,
1038 "ESC: exit, ENTER|->: Browse histograms") < 0)
1039 return -1;
1040
1041 ui_browser__add_exit_keys(&menu->b, exit_keys);
991 1042
992 while (nd) { 1043 while (1) {
993 struct hists *hists = rb_entry(nd, struct hists, rb_node); 1044 key = ui_browser__run(&menu->b);
994 const char *ev_name = __event_name(hists->type, hists->config);
995 1045
996 key = hists__browse(hists, help, ev_name);
997 switch (key) { 1046 switch (key) {
998 case NEWT_KEY_TAB: 1047 case NEWT_KEY_RIGHT:
999 next = rb_next(nd); 1048 case NEWT_KEY_ENTER:
1000 if (next) 1049 if (!menu->selection)
1001 nd = next; 1050 continue;
1051 pos = menu->selection;
1052browse_hists:
1053 ev_name = event_name(pos);
1054 key = perf_evsel__hists_browse(pos, help, ev_name, true);
1055 ui_browser__show_title(&menu->b, title);
1002 break; 1056 break;
1003 case NEWT_KEY_UNTAB: 1057 case NEWT_KEY_LEFT:
1004 if (nd == first) 1058 continue;
1059 case NEWT_KEY_ESCAPE:
1060 if (!ui__dialog_yesno("Do you really want to exit?"))
1005 continue; 1061 continue;
1006 nd = rb_prev(nd); 1062 /* Fall thru */
1063 default:
1064 goto out;
1065 }
1066
1067 switch (key) {
1068 case NEWT_KEY_TAB:
1069 if (pos->node.next == &evlist->entries)
1070 pos = list_entry(evlist->entries.next, struct perf_evsel, node);
1071 else
1072 pos = list_entry(pos->node.next, struct perf_evsel, node);
1073 goto browse_hists;
1074 case NEWT_KEY_UNTAB:
1075 if (pos->node.prev == &evlist->entries)
1076 pos = list_entry(evlist->entries.prev, struct perf_evsel, node);
1077 else
1078 pos = list_entry(pos->node.prev, struct perf_evsel, node);
1079 goto browse_hists;
1080 case 'q':
1081 case CTRL('c'):
1082 goto out;
1007 default: 1083 default:
1008 return key; 1084 break;
1009 } 1085 }
1010 } 1086 }
1011 1087
1088out:
1089 ui_browser__hide(&menu->b);
1012 return key; 1090 return key;
1013} 1091}
1092
1093static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
1094 const char *help)
1095{
1096 struct perf_evsel *pos;
1097 struct perf_evsel_menu menu = {
1098 .b = {
1099 .entries = &evlist->entries,
1100 .refresh = ui_browser__list_head_refresh,
1101 .seek = ui_browser__list_head_seek,
1102 .write = perf_evsel_menu__write,
1103 .nr_entries = evlist->nr_entries,
1104 .priv = evlist,
1105 },
1106 };
1107
1108 ui_helpline__push("Press ESC to exit");
1109
1110 list_for_each_entry(pos, &evlist->entries, node) {
1111 const char *ev_name = event_name(pos);
1112 size_t line_len = strlen(ev_name) + 7;
1113
1114 if (menu.b.width < line_len)
1115 menu.b.width = line_len;
1116 /*
1117 * Cache the evsel name, tracepoints have a _high_ cost per
1118 * event_name() call.
1119 */
1120 if (pos->name == NULL)
1121 pos->name = strdup(ev_name);
1122 }
1123
1124 return perf_evsel_menu__run(&menu, help);
1125}
1126
1127int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help)
1128{
1129
1130 if (evlist->nr_entries == 1) {
1131 struct perf_evsel *first = list_entry(evlist->entries.next,
1132 struct perf_evsel, node);
1133 const char *ev_name = event_name(first);
1134 return perf_evsel__hists_browse(first, help, ev_name, false);
1135 }
1136
1137 return __perf_evlist__tui_browse_hists(evlist, help);
1138}
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index e5158369106e..8462bffe20bc 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -41,7 +41,7 @@ static int ui_entry__read(const char *title, char *bf, size_t size, int width)
41out_free_form: 41out_free_form:
42 newtPopWindow(); 42 newtPopWindow();
43 newtFormDestroy(form); 43 newtFormDestroy(form);
44 return 0; 44 return err;
45} 45}
46 46
47struct map_browser { 47struct map_browser {
diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c
new file mode 100644
index 000000000000..5a06538532af
--- /dev/null
+++ b/tools/perf/util/ui/browsers/top.c
@@ -0,0 +1,213 @@
1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9#include "../browser.h"
10#include "../../annotate.h"
11#include "../helpline.h"
12#include "../libslang.h"
13#include "../util.h"
14#include "../../evlist.h"
15#include "../../hist.h"
16#include "../../sort.h"
17#include "../../symbol.h"
18#include "../../top.h"
19
20struct perf_top_browser {
21 struct ui_browser b;
22 struct rb_root root;
23 struct sym_entry *selection;
24 float sum_ksamples;
25 int dso_width;
26 int dso_short_width;
27 int sym_width;
28};
29
30static void perf_top_browser__write(struct ui_browser *browser, void *entry, int row)
31{
32 struct perf_top_browser *top_browser = container_of(browser, struct perf_top_browser, b);
33 struct sym_entry *syme = rb_entry(entry, struct sym_entry, rb_node);
34 bool current_entry = ui_browser__is_current_entry(browser, row);
35 struct symbol *symbol = sym_entry__symbol(syme);
36 struct perf_top *top = browser->priv;
37 int width = browser->width;
38 double pcnt;
39
40 pcnt = 100.0 - (100.0 * ((top_browser->sum_ksamples - syme->snap_count) /
41 top_browser->sum_ksamples));
42 ui_browser__set_percent_color(browser, pcnt, current_entry);
43
44 if (top->evlist->nr_entries == 1 || !top->display_weighted) {
45 slsmg_printf("%20.2f ", syme->weight);
46 width -= 24;
47 } else {
48 slsmg_printf("%9.1f %10ld ", syme->weight, syme->snap_count);
49 width -= 23;
50 }
51
52 slsmg_printf("%4.1f%%", pcnt);
53 width -= 7;
54
55 if (verbose) {
56 slsmg_printf(" %016" PRIx64, symbol->start);
57 width -= 17;
58 }
59
60 slsmg_printf(" %-*.*s ", top_browser->sym_width, top_browser->sym_width,
61 symbol->name);
62 width -= top_browser->sym_width;
63 slsmg_write_nstring(width >= syme->map->dso->long_name_len ?
64 syme->map->dso->long_name :
65 syme->map->dso->short_name, width);
66
67 if (current_entry)
68 top_browser->selection = syme;
69}
70
71static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser)
72{
73 struct perf_top *top = browser->b.priv;
74 u64 top_idx = browser->b.top_idx;
75
76 browser->root = RB_ROOT;
77 browser->b.top = NULL;
78 browser->sum_ksamples = perf_top__decay_samples(top, &browser->root);
79 /*
80 * No active symbols
81 */
82 if (top->rb_entries == 0)
83 return;
84
85 perf_top__find_widths(top, &browser->root, &browser->dso_width,
86 &browser->dso_short_width,
87 &browser->sym_width);
88 if (browser->sym_width + browser->dso_width > browser->b.width - 29) {
89 browser->dso_width = browser->dso_short_width;
90 if (browser->sym_width + browser->dso_width > browser->b.width - 29)
91 browser->sym_width = browser->b.width - browser->dso_width - 29;
92 }
93
94 /*
95 * Adjust the ui_browser indexes since the entries in the browser->root
96 * rb_tree may have changed, then seek it from start, so that we get a
97 * possible new top of the screen.
98 */
99 browser->b.nr_entries = top->rb_entries;
100
101 if (top_idx >= browser->b.nr_entries) {
102 if (browser->b.height >= browser->b.nr_entries)
103 top_idx = browser->b.nr_entries - browser->b.height;
104 else
105 top_idx = 0;
106 }
107
108 if (browser->b.index >= top_idx + browser->b.height)
109 browser->b.index = top_idx + browser->b.index - browser->b.top_idx;
110
111 if (browser->b.index >= browser->b.nr_entries)
112 browser->b.index = browser->b.nr_entries - 1;
113
114 browser->b.top_idx = top_idx;
115 browser->b.seek(&browser->b, top_idx, SEEK_SET);
116}
117
118static void perf_top_browser__annotate(struct perf_top_browser *browser)
119{
120 struct sym_entry *syme = browser->selection;
121 struct symbol *sym = sym_entry__symbol(syme);
122 struct annotation *notes = symbol__annotation(sym);
123 struct perf_top *top = browser->b.priv;
124
125 if (notes->src != NULL)
126 goto do_annotation;
127
128 pthread_mutex_lock(&notes->lock);
129
130 top->sym_filter_entry = NULL;
131
132 if (symbol__alloc_hist(sym, top->evlist->nr_entries) < 0) {
133 pr_err("Not enough memory for annotating '%s' symbol!\n",
134 sym->name);
135 pthread_mutex_unlock(&notes->lock);
136 return;
137 }
138
139 top->sym_filter_entry = syme;
140
141 pthread_mutex_unlock(&notes->lock);
142do_annotation:
143 symbol__tui_annotate(sym, syme->map, 0, top->delay_secs * 1000);
144}
145
146static int perf_top_browser__run(struct perf_top_browser *browser)
147{
148 int key;
149 char title[160];
150 struct perf_top *top = browser->b.priv;
151 int delay_msecs = top->delay_secs * 1000;
152 int exit_keys[] = { 'a', NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, };
153
154 perf_top_browser__update_rb_tree(browser);
155 perf_top__header_snprintf(top, title, sizeof(title));
156 perf_top__reset_sample_counters(top);
157
158 if (ui_browser__show(&browser->b, title,
159 "ESC: exit, ENTER|->|a: Live Annotate") < 0)
160 return -1;
161
162 newtFormSetTimer(browser->b.form, delay_msecs);
163 ui_browser__add_exit_keys(&browser->b, exit_keys);
164
165 while (1) {
166 key = ui_browser__run(&browser->b);
167
168 switch (key) {
169 case -1:
170 /* FIXME we need to check if it was es.reason == NEWT_EXIT_TIMER */
171 perf_top_browser__update_rb_tree(browser);
172 perf_top__header_snprintf(top, title, sizeof(title));
173 perf_top__reset_sample_counters(top);
174 ui_browser__set_color(&browser->b, NEWT_COLORSET_ROOT);
175 SLsmg_gotorc(0, 0);
176 slsmg_write_nstring(title, browser->b.width);
177 break;
178 case 'a':
179 case NEWT_KEY_RIGHT:
180 case NEWT_KEY_ENTER:
181 if (browser->selection)
182 perf_top_browser__annotate(browser);
183 break;
184 case NEWT_KEY_LEFT:
185 continue;
186 case NEWT_KEY_ESCAPE:
187 if (!ui__dialog_yesno("Do you really want to exit?"))
188 continue;
189 /* Fall thru */
190 default:
191 goto out;
192 }
193 }
194out:
195 ui_browser__hide(&browser->b);
196 return key;
197}
198
199int perf_top__tui_browser(struct perf_top *top)
200{
201 struct perf_top_browser browser = {
202 .b = {
203 .entries = &browser.root,
204 .refresh = ui_browser__rb_tree_refresh,
205 .seek = ui_browser__rb_tree_seek,
206 .write = perf_top_browser__write,
207 .priv = top,
208 },
209 };
210
211 ui_helpline__push("Press <- or ESC to exit");
212 return perf_top_browser__run(&browser);
213}
diff --git a/tools/perf/util/ui/helpline.c b/tools/perf/util/ui/helpline.c
index 8d79daa4458a..f36d2ff509ed 100644
--- a/tools/perf/util/ui/helpline.c
+++ b/tools/perf/util/ui/helpline.c
@@ -5,6 +5,7 @@
5 5
6#include "../debug.h" 6#include "../debug.h"
7#include "helpline.h" 7#include "helpline.h"
8#include "ui.h"
8 9
9void ui_helpline__pop(void) 10void ui_helpline__pop(void)
10{ 11{
@@ -55,7 +56,8 @@ int ui_helpline__show_help(const char *format, va_list ap)
55 int ret; 56 int ret;
56 static int backlog; 57 static int backlog;
57 58
58 ret = vsnprintf(ui_helpline__last_msg + backlog, 59 pthread_mutex_lock(&ui__lock);
60 ret = vsnprintf(ui_helpline__last_msg + backlog,
59 sizeof(ui_helpline__last_msg) - backlog, format, ap); 61 sizeof(ui_helpline__last_msg) - backlog, format, ap);
60 backlog += ret; 62 backlog += ret;
61 63
@@ -64,6 +66,7 @@ int ui_helpline__show_help(const char *format, va_list ap)
64 newtRefresh(); 66 newtRefresh();
65 backlog = 0; 67 backlog = 0;
66 } 68 }
69 pthread_mutex_unlock(&ui__lock);
67 70
68 return ret; 71 return ret;
69} 72}
diff --git a/tools/perf/util/ui/libslang.h b/tools/perf/util/ui/libslang.h
index 5623da8e8080..2b63e1c9b181 100644
--- a/tools/perf/util/ui/libslang.h
+++ b/tools/perf/util/ui/libslang.h
@@ -13,11 +13,11 @@
13 13
14#if SLANG_VERSION < 20104 14#if SLANG_VERSION < 20104
15#define slsmg_printf(msg, args...) \ 15#define slsmg_printf(msg, args...) \
16 SLsmg_printf((char *)msg, ##args) 16 SLsmg_printf((char *)(msg), ##args)
17#define slsmg_write_nstring(msg, len) \ 17#define slsmg_write_nstring(msg, len) \
18 SLsmg_write_nstring((char *)msg, len) 18 SLsmg_write_nstring((char *)(msg), len)
19#define sltt_set_color(obj, name, fg, bg) \ 19#define sltt_set_color(obj, name, fg, bg) \
20 SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg) 20 SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg))
21#else 21#else
22#define slsmg_printf SLsmg_printf 22#define slsmg_printf SLsmg_printf
23#define slsmg_write_nstring SLsmg_write_nstring 23#define slsmg_write_nstring SLsmg_write_nstring
diff --git a/tools/perf/util/ui/setup.c b/tools/perf/util/ui/setup.c
index 662085032eb7..ee46d671db59 100644
--- a/tools/perf/util/ui/setup.c
+++ b/tools/perf/util/ui/setup.c
@@ -6,6 +6,9 @@
6#include "../debug.h" 6#include "../debug.h"
7#include "browser.h" 7#include "browser.h"
8#include "helpline.h" 8#include "helpline.h"
9#include "ui.h"
10
11pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
9 12
10static void newt_suspend(void *d __used) 13static void newt_suspend(void *d __used)
11{ 14{
@@ -14,11 +17,12 @@ static void newt_suspend(void *d __used)
14 newtResume(); 17 newtResume();
15} 18}
16 19
17void setup_browser(void) 20void setup_browser(bool fallback_to_pager)
18{ 21{
19 if (!isatty(1) || !use_browser || dump_trace) { 22 if (!isatty(1) || !use_browser || dump_trace) {
20 use_browser = 0; 23 use_browser = 0;
21 setup_pager(); 24 if (fallback_to_pager)
25 setup_pager();
22 return; 26 return;
23 } 27 }
24 28
diff --git a/tools/perf/util/ui/ui.h b/tools/perf/util/ui/ui.h
new file mode 100644
index 000000000000..d264e059c829
--- /dev/null
+++ b/tools/perf/util/ui/ui.h
@@ -0,0 +1,8 @@
1#ifndef _PERF_UI_H_
2#define _PERF_UI_H_ 1
3
4#include <pthread.h>
5
6extern pthread_mutex_t ui__lock;
7
8#endif /* _PERF_UI_H_ */
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 7b5a8926624e..fdf1fc8f08bc 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -9,6 +9,7 @@
9#include "../debug.h" 9#include "../debug.h"
10#include "browser.h" 10#include "browser.h"
11#include "helpline.h" 11#include "helpline.h"
12#include "ui.h"
12#include "util.h" 13#include "util.h"
13 14
14static void newt_form__set_exit_keys(newtComponent self) 15static void newt_form__set_exit_keys(newtComponent self)
@@ -118,10 +119,12 @@ void ui__warning(const char *format, ...)
118 va_list args; 119 va_list args;
119 120
120 va_start(args, format); 121 va_start(args, format);
121 if (use_browser > 0) 122 if (use_browser > 0) {
123 pthread_mutex_lock(&ui__lock);
122 newtWinMessagev((char *)warning_str, (char *)ok, 124 newtWinMessagev((char *)warning_str, (char *)ok,
123 (char *)format, args); 125 (char *)format, args);
124 else 126 pthread_mutex_unlock(&ui__lock);
127 } else
125 vfprintf(stderr, format, args); 128 vfprintf(stderr, format, args);
126 va_end(args); 129 va_end(args);
127} 130}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index e833f26f3bfc..fc784284ac8b 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -70,9 +70,7 @@
70#include <sys/poll.h> 70#include <sys/poll.h>
71#include <sys/socket.h> 71#include <sys/socket.h>
72#include <sys/ioctl.h> 72#include <sys/ioctl.h>
73#ifndef NO_SYS_SELECT_H
74#include <sys/select.h> 73#include <sys/select.h>
75#endif
76#include <netinet/in.h> 74#include <netinet/in.h>
77#include <netinet/tcp.h> 75#include <netinet/tcp.h>
78#include <arpa/inet.h> 76#include <arpa/inet.h>
@@ -83,10 +81,6 @@
83#include "types.h" 81#include "types.h"
84#include <sys/ttydefaults.h> 82#include <sys/ttydefaults.h>
85 83
86#ifndef NO_ICONV
87#include <iconv.h>
88#endif
89
90extern const char *graph_line; 84extern const char *graph_line;
91extern const char *graph_dotted_line; 85extern const char *graph_dotted_line;
92extern char buildid_dir[]; 86extern char buildid_dir[];
@@ -236,26 +230,6 @@ static inline int sane_case(int x, int high)
236 return x; 230 return x;
237} 231}
238 232
239#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
240# define FORCE_DIR_SET_GID S_ISGID
241#else
242# define FORCE_DIR_SET_GID 0
243#endif
244
245#ifdef NO_NSEC
246#undef USE_NSEC
247#define ST_CTIME_NSEC(st) 0
248#define ST_MTIME_NSEC(st) 0
249#else
250#ifdef USE_ST_TIMESPEC
251#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
252#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
253#else
254#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
255#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
256#endif
257#endif
258
259int mkdir_p(char *path, mode_t mode); 233int mkdir_p(char *path, mode_t mode);
260int copyfile(const char *from, const char *to); 234int copyfile(const char *from, const char *to);
261 235
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index e1c62eeb88f5..ba7c63af6f3b 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1,6 +1,6 @@
1#!/usr/bin/perl -w 1#!/usr/bin/perl -w
2# 2#
3# Copywrite 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc. 3# Copyright 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
4# Licensed under the terms of the GNU GPL License version 2 4# Licensed under the terms of the GNU GPL License version 2
5# 5#
6 6