aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2009-06-11 02:01:14 -0400
committerPaul Mundt <lethal@linux-sh.org>2009-06-11 02:01:14 -0400
commitcf9fe114e3b37e14fc8434d5abb192e35df551b1 (patch)
tree0f82879295dc792f9df1a3ce79e143a3c073510f
parentc1d0d32a603ed06377f404adf2c538de33bb3634 (diff)
parent991ec02cdca33b03a132a0cacfe6f0aa0be9aa8d (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
-rw-r--r--Documentation/ABI/testing/sysfs-devices-cache_disable18
-rw-r--r--Documentation/DMA-API.txt12
-rw-r--r--Documentation/DocBook/Makefile3
-rw-r--r--Documentation/DocBook/tracepoint.tmpl89
-rw-r--r--Documentation/RCU/trace.txt102
-rw-r--r--Documentation/filesystems/tmpfs.txt2
-rw-r--r--Documentation/futex-requeue-pi.txt131
-rw-r--r--Documentation/hwmon/sysfs-interface6
-rw-r--r--Documentation/input/multi-touch-protocol.txt103
-rw-r--r--Documentation/kernel-parameters.txt54
-rw-r--r--Documentation/memory-barriers.txt129
-rw-r--r--Documentation/scheduler/sched-rt-group.txt20
-rw-r--r--Documentation/sound/alsa/HD-Audio-Models.txt1
-rw-r--r--Documentation/sound/alsa/Procfile.txt5
-rw-r--r--Documentation/trace/events.txt90
-rw-r--r--Documentation/trace/ftrace.txt17
-rw-r--r--Documentation/trace/power.txt17
-rw-r--r--Documentation/x86/boot.txt122
-rw-r--r--Documentation/x86/x86_64/boot-options.txt5
-rw-r--r--Documentation/x86/x86_64/mm.txt9
-rw-r--r--MAINTAINERS49
-rw-r--r--Makefile6
-rw-r--r--arch/alpha/kernel/sys_dp264.c8
-rw-r--r--arch/alpha/kernel/sys_titan.c4
-rw-r--r--arch/arm/common/gic.c4
-rw-r--r--arch/arm/include/asm/assembler.h13
-rw-r--r--arch/arm/include/asm/atomic.h61
-rw-r--r--arch/arm/include/asm/cache.h16
-rw-r--r--arch/arm/include/asm/flat.h3
-rw-r--r--arch/arm/include/asm/page.h7
-rw-r--r--arch/arm/include/asm/system.h176
-rw-r--r--arch/arm/kernel/elf.c9
-rw-r--r--arch/arm/kernel/entry-armv.S5
-rw-r--r--arch/arm/lib/bitops.h2
-rw-r--r--arch/arm/mach-gemini/include/mach/hardware.h3
-rw-r--r--arch/arm/mach-kirkwood/common.c8
-rw-r--r--arch/arm/mach-kirkwood/ts219-setup.c6
-rw-r--r--arch/arm/mach-loki/common.c6
-rw-r--r--arch/arm/mach-mmp/include/mach/mfp-pxa168.h5
-rw-r--r--arch/arm/mach-mmp/include/mach/mfp-pxa910.h5
-rw-r--r--arch/arm/mach-mmp/include/mach/mfp.h9
-rw-r--r--arch/arm/mach-mmp/time.c2
-rw-r--r--arch/arm/mach-mv78xx0/common.c16
-rw-r--r--arch/arm/mach-mx2/clock_imx21.c2
-rw-r--r--arch/arm/mach-mx2/clock_imx27.c2
-rw-r--r--arch/arm/mach-mx3/clock-imx35.c2
-rw-r--r--arch/arm/mach-mx3/clock.c2
-rw-r--r--arch/arm/mach-orion5x/common.c5
-rw-r--r--arch/arm/mach-pxa/devices.c5
-rw-r--r--arch/arm/mach-pxa/ezx.c36
-rw-r--r--arch/arm/mach-pxa/imote2.c2
-rw-r--r--arch/arm/mach-pxa/include/mach/reset.h5
-rw-r--r--arch/arm/mach-pxa/mfp-pxa2xx.c6
-rw-r--r--arch/arm/mach-pxa/palmld.c2
-rw-r--r--arch/arm/mach-pxa/palmt5.c1
-rw-r--r--arch/arm/mach-pxa/palmtx.c1
-rw-r--r--arch/arm/mach-pxa/reset.c4
-rw-r--r--arch/arm/mach-pxa/spitz.c8
-rw-r--r--arch/arm/mach-pxa/tosa.c2
-rw-r--r--arch/arm/mm/proc-v7.S36
-rw-r--r--arch/arm/tools/mach-types131
-rw-r--r--arch/blackfin/include/asm/.gitignore1
-rw-r--r--arch/blackfin/include/asm/flat.h1
-rw-r--r--arch/blackfin/include/asm/unistd.h4
-rw-r--r--arch/blackfin/kernel/.gitignore1
-rw-r--r--arch/blackfin/lib/strncmp.c3
-rw-r--r--arch/blackfin/mach-common/entry.S2
-rw-r--r--arch/cris/arch-v32/kernel/irq.c4
-rw-r--r--arch/h8300/include/asm/flat.h1
-rw-r--r--arch/ia64/hp/sim/hpsim_irq.c3
-rw-r--r--arch/ia64/kernel/acpi.c5
-rw-r--r--arch/ia64/kernel/iosapic.c10
-rw-r--r--arch/ia64/kernel/msi_ia64.c16
-rw-r--r--arch/ia64/sn/kernel/irq.c4
-rw-r--r--arch/ia64/sn/kernel/msi_sn.c8
-rw-r--r--arch/m32r/include/asm/flat.h1
-rw-r--r--arch/m68k/include/asm/flat.h1
-rw-r--r--arch/mips/Kconfig5
-rw-r--r--arch/mips/cavium-octeon/octeon-irq.c8
-rw-r--r--arch/mips/include/asm/cpu-info.h4
-rw-r--r--arch/mips/include/asm/delay.h92
-rw-r--r--arch/mips/include/asm/ioctl.h4
-rw-r--r--arch/mips/include/asm/irq.h2
-rw-r--r--arch/mips/include/asm/uaccess.h2
-rw-r--r--arch/mips/kernel/irq-gic.c5
-rw-r--r--arch/mips/kernel/proc.c2
-rw-r--r--arch/mips/lib/Makefile4
-rw-r--r--arch/mips/lib/delay.c56
-rw-r--r--arch/mips/mti-malta/malta-smtc.c4
-rw-r--r--arch/mips/sgi-ip32/ip32-reset.c7
-rw-r--r--arch/mips/sibyte/bcm1480/irq.c8
-rw-r--r--arch/mips/sibyte/cfe/setup.c8
-rw-r--r--arch/mips/sibyte/sb1250/irq.c8
-rw-r--r--arch/parisc/kernel/irq.c6
-rw-r--r--arch/powerpc/Kconfig12
-rw-r--r--arch/powerpc/configs/pmac32_defconfig278
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h6
-rw-r--r--arch/powerpc/include/asm/fixmap.h4
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc32.h26
-rw-r--r--arch/powerpc/kernel/dma.c2
-rw-r--r--arch/powerpc/lib/Makefile1
-rw-r--r--arch/powerpc/lib/dma-noncoherent.c237
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c400
-rw-r--r--arch/powerpc/mm/init_32.c8
-rw-r--r--arch/powerpc/mm/mem.c17
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c6
-rw-r--r--arch/powerpc/mm/pgtable_32.c2
-rw-r--r--arch/powerpc/platforms/maple/pci.c14
-rw-r--r--arch/powerpc/platforms/pseries/xics.c12
-rw-r--r--arch/powerpc/sysdev/mpic.c4
-rw-r--r--arch/powerpc/sysdev/mpic.h2
-rw-r--r--arch/sh/include/asm/flat.h1
-rw-r--r--arch/sparc/include/asm/elf_64.h5
-rw-r--r--arch/sparc/include/asm/thread_info_64.h4
-rw-r--r--arch/sparc/kernel/irq_64.c12
-rw-r--r--arch/sparc/lib/csum_copy_from_user.S2
-rw-r--r--arch/sparc/lib/csum_copy_to_user.S2
-rw-r--r--arch/x86/Kbuild16
-rw-r--r--arch/x86/Kconfig53
-rw-r--r--arch/x86/Kconfig.debug20
-rw-r--r--arch/x86/Makefile19
-rw-r--r--arch/x86/boot/.gitignore2
-rw-r--r--arch/x86/boot/Makefile29
-rw-r--r--arch/x86/boot/a20.c9
-rw-r--r--arch/x86/boot/apm.c76
-rw-r--r--arch/x86/boot/bioscall.S82
-rw-r--r--arch/x86/boot/boot.h48
-rw-r--r--arch/x86/boot/compressed/.gitignore3
-rw-r--r--arch/x86/boot/compressed/Makefile54
-rw-r--r--arch/x86/boot/compressed/head_32.S194
-rw-r--r--arch/x86/boot/compressed/head_64.S169
-rw-r--r--arch/x86/boot/compressed/misc.c12
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c97
-rw-r--r--arch/x86/boot/compressed/relocs.c7
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S (renamed from arch/x86/boot/compressed/vmlinux_64.lds)29
-rw-r--r--arch/x86/boot/compressed/vmlinux.scr10
-rw-r--r--arch/x86/boot/compressed/vmlinux_32.lds43
-rw-r--r--arch/x86/boot/edd.c71
-rw-r--r--arch/x86/boot/header.S30
-rw-r--r--arch/x86/boot/main.c39
-rw-r--r--arch/x86/boot/mca.c27
-rw-r--r--arch/x86/boot/memory.c108
-rw-r--r--arch/x86/boot/regs.c29
-rw-r--r--arch/x86/boot/setup.ld6
-rw-r--r--arch/x86/boot/tty.c52
-rw-r--r--arch/x86/boot/video-bios.c27
-rw-r--r--arch/x86/boot/video-vesa.c137
-rw-r--r--arch/x86/boot/video-vga.c95
-rw-r--r--arch/x86/boot/video.c42
-rw-r--r--arch/x86/boot/video.h14
-rw-r--r--arch/x86/configs/i386_defconfig148
-rw-r--r--arch/x86/configs/x86_64_defconfig151
-rw-r--r--arch/x86/ia32/ia32entry.S1
-rw-r--r--arch/x86/include/asm/alternative.h59
-rw-r--r--arch/x86/include/asm/amd_iommu.h2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h55
-rw-r--r--arch/x86/include/asm/apic.h33
-rw-r--r--arch/x86/include/asm/apicdef.h8
-rw-r--r--arch/x86/include/asm/boot.h15
-rw-r--r--arch/x86/include/asm/bootparam.h3
-rw-r--r--arch/x86/include/asm/cpu_debug.h101
-rw-r--r--arch/x86/include/asm/cpufeature.h7
-rw-r--r--arch/x86/include/asm/ds.h82
-rw-r--r--arch/x86/include/asm/hw_irq.h25
-rw-r--r--arch/x86/include/asm/i387.h43
-rw-r--r--arch/x86/include/asm/i8259.h4
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/iomap.h5
-rw-r--r--arch/x86/include/asm/irq_remapping.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h1
-rw-r--r--arch/x86/include/asm/k8.h13
-rw-r--r--arch/x86/include/asm/microcode.h25
-rw-r--r--arch/x86/include/asm/mpspec.h15
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/include/asm/numa_64.h10
-rw-r--r--arch/x86/include/asm/page_32_types.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h22
-rw-r--r--arch/x86/include/asm/page_types.h6
-rw-r--r--arch/x86/include/asm/paravirt.h22
-rw-r--r--arch/x86/include/asm/pgtable.h4
-rw-r--r--arch/x86/include/asm/pgtable_64.h6
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/processor.h47
-rw-r--r--arch/x86/include/asm/ptrace.h9
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/smp.h2
-rw-r--r--arch/x86/include/asm/sparsemem.h2
-rw-r--r--arch/x86/include/asm/syscalls.h45
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/tlbflush.h8
-rw-r--r--arch/x86/include/asm/topology.h3
-rw-r--r--arch/x86/include/asm/traps.h5
-rw-r--r--arch/x86/include/asm/unistd_32.h1
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h6
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/acpi/boot.c156
-rw-r--r--arch/x86/kernel/acpi/realmode/Makefile2
-rw-r--r--arch/x86/kernel/acpi/realmode/bioscall.S1
-rw-r--r--arch/x86/kernel/acpi/realmode/regs.c1
-rw-r--r--arch/x86/kernel/amd_iommu.c500
-rw-r--r--arch/x86/kernel/amd_iommu_init.c273
-rw-r--r--arch/x86/kernel/apic/apic.c311
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c902
-rw-r--r--arch/x86/kernel/apic/nmi.c2
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_64.c2
-rw-r--r--arch/x86/kernel/apic/summit_32.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c20
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c12
-rw-r--r--arch/x86/kernel/cpu/common.c30
-rw-r--r--arch/x86/kernel/cpu/cpu_debug.c431
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c57
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c6
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c153
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c24
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h15
-rw-r--r--arch/x86/kernel/cpu/mtrr/state.c6
-rw-r--r--arch/x86/kernel/ds.c921
-rw-r--r--arch/x86/kernel/ds_selftest.c408
-rw-r--r--arch/x86/kernel/ds_selftest.h15
-rw-r--r--arch/x86/kernel/dumpstack.h1
-rw-r--r--arch/x86/kernel/e820.c46
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/entry_64.S24
-rw-r--r--arch/x86/kernel/head_32.S7
-rw-r--r--arch/x86/kernel/irq.c20
-rw-r--r--arch/x86/kernel/irqinit.c (renamed from arch/x86/kernel/irqinit_32.c)149
-rw-r--r--arch/x86/kernel/irqinit_64.c177
-rw-r--r--arch/x86/kernel/kgdb.c2
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/microcode_amd.c70
-rw-r--r--arch/x86/kernel/microcode_core.c329
-rw-r--r--arch/x86/kernel/microcode_intel.c90
-rw-r--r--arch/x86/kernel/mpparse.c34
-rw-r--r--arch/x86/kernel/paravirt.c56
-rw-r--r--arch/x86/kernel/pci-calgary_64.c54
-rw-r--r--arch/x86/kernel/pci-gart_64.c55
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/process.c20
-rw-r--r--arch/x86/kernel/process_32.c20
-rw-r--r--arch/x86/kernel/process_64.c20
-rw-r--r--arch/x86/kernel/ptrace.c284
-rw-r--r--arch/x86/kernel/quirks.c37
-rw-r--r--arch/x86/kernel/reboot.c17
-rw-r--r--arch/x86/kernel/setup.c40
-rw-r--r--arch/x86/kernel/setup_percpu.c12
-rw-r--r--arch/x86/kernel/smp.c20
-rw-r--r--arch/x86/kernel/smpboot.c22
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--arch/x86/kernel/tlb_uv.c17
-rw-r--r--arch/x86/kernel/traps.c10
-rw-r--r--arch/x86/kernel/tsc.c19
-rw-r--r--arch/x86/kernel/tsc_sync.c14
-rw-r--r--arch/x86/kernel/vm86_32.c13
-rw-r--r--arch/x86/kernel/vmi_32.c20
-rw-r--r--arch/x86/kernel/vmlinux.lds.S430
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S229
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S298
-rw-r--r--arch/x86/kernel/vsyscall_64.c8
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--arch/x86/lguest/Makefile1
-rw-r--r--arch/x86/lguest/boot.c35
-rw-r--r--arch/x86/mm/dump_pagetables.c7
-rw-r--r--arch/x86/mm/fault.c57
-rw-r--r--arch/x86/mm/highmem_32.c2
-rw-r--r--arch/x86/mm/hugetlbpage.c6
-rw-r--r--arch/x86/mm/init.c78
-rw-r--r--arch/x86/mm/init_32.c61
-rw-r--r--arch/x86/mm/init_64.c47
-rw-r--r--arch/x86/mm/iomap_32.c1
-rw-r--r--arch/x86/mm/kmmio.c104
-rw-r--r--arch/x86/mm/memtest.c14
-rw-r--r--arch/x86/mm/mmio-mod.c2
-rw-r--r--arch/x86/mm/numa_64.c33
-rw-r--r--arch/x86/mm/pageattr.c27
-rw-r--r--arch/x86/mm/srat_64.c98
-rw-r--r--arch/x86/oprofile/nmi_int.c27
-rw-r--r--arch/x86/pci/irq.c84
-rw-r--r--arch/x86/pci/mmconfig-shared.c6
-rw-r--r--arch/x86/vdso/vma.c1
-rw-r--r--arch/x86/xen/enlighten.c65
-rw-r--r--arch/x86/xen/mmu.c23
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/xen-ops.h1
-rw-r--r--block/blk-core.c21
-rw-r--r--block/blk-sysfs.c7
-rw-r--r--block/bsg.c3
-rw-r--r--block/compat_ioctl.c2
-rw-r--r--block/elevator.c8
-rw-r--r--crypto/ahash.c7
-rw-r--r--drivers/acpi/pci_bind.c24
-rw-r--r--drivers/acpi/pci_irq.c5
-rw-r--r--drivers/acpi/processor_core.c2
-rw-r--r--drivers/acpi/processor_idle.c8
-rw-r--r--drivers/acpi/processor_perflib.c12
-rw-r--r--drivers/acpi/processor_throttling.c2
-rw-r--r--drivers/acpi/video.c18
-rw-r--r--drivers/ata/ahci.c72
-rw-r--r--drivers/ata/ata_piix.c9
-rw-r--r--drivers/ata/pata_ali.c17
-rw-r--r--drivers/ata/pata_efar.c17
-rw-r--r--drivers/ata/pata_legacy.c2
-rw-r--r--drivers/ata/pata_netcell.c13
-rw-r--r--drivers/base/bus.c4
-rw-r--r--drivers/base/core.c5
-rw-r--r--drivers/base/driver.c4
-rw-r--r--drivers/base/power/main.c4
-rw-r--r--drivers/char/hpet.c4
-rw-r--r--drivers/char/ipmi/ipmi_msghandler.c13
-rw-r--r--drivers/char/mem.c2
-rw-r--r--drivers/char/mxser.c2
-rw-r--r--drivers/cpufreq/cpufreq.c6
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c5
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c5
-rw-r--r--drivers/dma/fsldma.c71
-rw-r--r--drivers/dma/ioat_dma.c2
-rw-r--r--drivers/edac/Kconfig8
-rw-r--r--drivers/edac/Makefile2
-rw-r--r--drivers/edac/amd8111_edac.c4
-rw-r--r--drivers/edac/amd8131_edac.c2
-rw-r--r--drivers/gpu/drm/Kconfig14
-rw-r--r--drivers/gpu/drm/drm_crtc.c7
-rw-r--r--drivers/gpu/drm/drm_crtc_helper.c109
-rw-r--r--drivers/gpu/drm/drm_edid.c5
-rw-r--r--drivers/gpu/drm/drm_irq.c8
-rw-r--r--drivers/gpu/drm/drm_sysfs.c7
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c12
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h3
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c68
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c14
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h20
-rw-r--r--drivers/gpu/drm/i915/intel_bios.c102
-rw-r--r--drivers/gpu/drm/i915/intel_bios.h17
-rw-r--r--drivers/gpu/drm/i915/intel_crt.c155
-rw-r--r--drivers/gpu/drm/i915/intel_display.c26
-rw-r--r--drivers/gpu/drm/i915/intel_dvo.c1
-rw-r--r--drivers/gpu/drm/i915/intel_hdmi.c1
-rw-r--r--drivers/gpu/drm/i915/intel_lvds.c12
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c138
-rw-r--r--drivers/gpu/drm/i915/intel_tv.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_cp.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.h5
-rw-r--r--drivers/hwmon/lm78.c2
-rw-r--r--drivers/ide/ide-io.c4
-rw-r--r--drivers/ide/ide-iops.c21
-rw-r--r--drivers/ide/ide-lib.c27
-rw-r--r--drivers/ide/ide-pci-generic.c11
-rw-r--r--drivers/ide/ide-probe.c9
-rw-r--r--drivers/ide/pdc202xx_old.c22
-rw-r--r--drivers/ide/via82cxxx.c2
-rw-r--r--drivers/idle/i7300_idle.c6
-rw-r--r--drivers/input/input.c1
-rw-r--r--drivers/input/serio/libps2.c2
-rw-r--r--drivers/input/touchscreen/ucb1400_ts.c2
-rw-r--r--drivers/isdn/gigaset/isocdata.c2
-rw-r--r--drivers/lguest/x86/core.c19
-rw-r--r--drivers/md/bitmap.c13
-rw-r--r--drivers/md/dm.c8
-rw-r--r--drivers/md/md.c31
-rw-r--r--drivers/md/raid5.c34
-rw-r--r--drivers/media/video/ivtv/ivtv-queue.c3
-rw-r--r--drivers/misc/enclosure.c6
-rw-r--r--drivers/mmc/host/mvsdio.c35
-rw-r--r--drivers/mmc/host/mxcmmc.c47
-rw-r--r--drivers/mmc/host/omap.c2
-rw-r--r--drivers/mmc/host/omap_hsmmc.c2
-rw-r--r--drivers/mmc/host/sdhci-of.c9
-rw-r--r--drivers/mtd/nand/davinci_nand.c7
-rw-r--r--drivers/mtd/nand/mxc_nand.c43
-rw-r--r--drivers/net/3c509.c4
-rw-r--r--drivers/net/Makefile2
-rw-r--r--drivers/net/atl1e/atl1e_main.c1
-rw-r--r--drivers/net/atlx/atl1.c6
-rw-r--r--drivers/net/atlx/atlx.h6
-rw-r--r--drivers/net/bfin_mac.c29
-rw-r--r--drivers/net/cxgb3/adapter.h4
-rw-r--r--drivers/net/cxgb3/cxgb3_main.c8
-rw-r--r--drivers/net/cxgb3/sge.c11
-rw-r--r--drivers/net/cxgb3/t3_hw.c5
-rw-r--r--drivers/net/e1000/e1000_main.c5
-rw-r--r--drivers/net/forcedeth.c15
-rw-r--r--drivers/net/gianfar.c11
-rw-r--r--drivers/net/gianfar.h2
-rw-r--r--drivers/net/mac8390.c12
-rw-r--r--drivers/net/mlx4/en_tx.c8
-rw-r--r--drivers/net/r8169.c113
-rw-r--r--drivers/net/wimax/i2400m/usb.c35
-rw-r--r--drivers/net/wireless/Kconfig1
-rw-r--r--drivers/net/wireless/airo.c23
-rw-r--r--drivers/net/wireless/at76c50x-usb.c12
-rw-r--r--drivers/net/wireless/ath5k/phy.c49
-rw-r--r--drivers/net/wireless/ath5k/reset.c8
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-5000.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-agn.c7
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-scan.c7
-rw-r--r--drivers/net/wireless/iwlwifi/iwl3945-base.c9
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00debug.c2
-rw-r--r--drivers/net/wireless/rtl818x/rtl8187_dev.c2
-rw-r--r--drivers/oprofile/cpu_buffer.c8
-rw-r--r--drivers/parisc/iosapic.c6
-rw-r--r--drivers/parport/parport_gsc.c4
-rw-r--r--drivers/parport/share.c13
-rw-r--r--drivers/pci/hotplug/acpiphp.h1
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c63
-rw-r--r--drivers/pci/hotplug/ibmphp_core.c54
-rw-r--r--drivers/pci/htirq.c4
-rw-r--r--drivers/pci/intel-iommu.c9
-rw-r--r--drivers/pci/intr_remapping.c54
-rw-r--r--drivers/pci/pci.c3
-rw-r--r--drivers/pci/probe.c2
-rw-r--r--drivers/pnp/pnpacpi/rsparser.c2
-rw-r--r--drivers/scsi/3w-9xxx.c3
-rw-r--r--drivers/scsi/3w-xxxx.c5
-rw-r--r--drivers/scsi/3w-xxxx.h2
-rw-r--r--drivers/scsi/Kconfig11
-rw-r--r--drivers/scsi/Makefile1
-rw-r--r--drivers/scsi/fnic/Makefile15
-rw-r--r--drivers/scsi/fnic/cq_desc.h78
-rw-r--r--drivers/scsi/fnic/cq_enet_desc.h167
-rw-r--r--drivers/scsi/fnic/cq_exch_desc.h182
-rw-r--r--drivers/scsi/fnic/fcpio.h780
-rw-r--r--drivers/scsi/fnic/fnic.h265
-rw-r--r--drivers/scsi/fnic/fnic_attrs.c56
-rw-r--r--drivers/scsi/fnic/fnic_fcs.c742
-rw-r--r--drivers/scsi/fnic/fnic_io.h67
-rw-r--r--drivers/scsi/fnic/fnic_isr.c332
-rw-r--r--drivers/scsi/fnic/fnic_main.c942
-rw-r--r--drivers/scsi/fnic/fnic_res.c444
-rw-r--r--drivers/scsi/fnic/fnic_res.h197
-rw-r--r--drivers/scsi/fnic/fnic_scsi.c1850
-rw-r--r--drivers/scsi/fnic/rq_enet_desc.h58
-rw-r--r--drivers/scsi/fnic/vnic_cq.c85
-rw-r--r--drivers/scsi/fnic/vnic_cq.h121
-rw-r--r--drivers/scsi/fnic/vnic_cq_copy.h62
-rw-r--r--drivers/scsi/fnic/vnic_dev.c690
-rw-r--r--drivers/scsi/fnic/vnic_dev.h161
-rw-r--r--drivers/scsi/fnic/vnic_devcmd.h281
-rw-r--r--drivers/scsi/fnic/vnic_intr.c60
-rw-r--r--drivers/scsi/fnic/vnic_intr.h118
-rw-r--r--drivers/scsi/fnic/vnic_nic.h69
-rw-r--r--drivers/scsi/fnic/vnic_resource.h61
-rw-r--r--drivers/scsi/fnic/vnic_rq.c196
-rw-r--r--drivers/scsi/fnic/vnic_rq.h235
-rw-r--r--drivers/scsi/fnic/vnic_scsi.h99
-rw-r--r--drivers/scsi/fnic/vnic_stats.h68
-rw-r--r--drivers/scsi/fnic/vnic_wq.c182
-rw-r--r--drivers/scsi/fnic/vnic_wq.h175
-rw-r--r--drivers/scsi/fnic/vnic_wq_copy.c117
-rw-r--r--drivers/scsi/fnic/vnic_wq_copy.h128
-rw-r--r--drivers/scsi/fnic/wq_enet_desc.h96
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_base.h2
-rw-r--r--drivers/scsi/scsi_scan.c1
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c2
-rw-r--r--drivers/scsi/sg.c1
-rw-r--r--drivers/serial/8250.c15
-rw-r--r--drivers/serial/8250_gsc.c4
-rw-r--r--drivers/serial/icom.c2
-rw-r--r--drivers/serial/imx.c2
-rw-r--r--drivers/serial/mpc52xx_uart.c2
-rw-r--r--drivers/ssb/embedded.c1
-rw-r--r--drivers/usb/Makefile1
-rw-r--r--drivers/usb/class/cdc-acm.c3
-rw-r--r--drivers/usb/gadget/atmel_usba_udc.c5
-rw-r--r--drivers/usb/host/isp1760-hcd.c24
-rw-r--r--drivers/usb/serial/usb-serial.c1
-rw-r--r--drivers/video/atmel_lcdfb.c10
-rw-r--r--drivers/video/s3c-fb.c12
-rw-r--r--drivers/xen/Kconfig20
-rw-r--r--drivers/xen/Makefile4
-rw-r--r--drivers/xen/events.c20
-rw-r--r--drivers/xen/evtchn.c507
-rw-r--r--drivers/xen/manage.c9
-rw-r--r--drivers/xen/sys-hypervisor.c445
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c61
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c2
-rw-r--r--drivers/xen/xenfs/super.c19
-rw-r--r--firmware/cis/.gitignore1
-rw-r--r--fs/autofs4/waitq.c22
-rw-r--r--fs/binfmt_flat.c46
-rw-r--r--fs/bio.c3
-rw-r--r--fs/btrfs/extent-tree.c51
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/cachefiles/internal.h18
-rw-r--r--fs/cifs/dir.c51
-rw-r--r--fs/cifs/file.c66
-rw-r--r--fs/fscache/internal.h18
-rw-r--r--fs/inode.c32
-rw-r--r--fs/jbd/commit.c6
-rw-r--r--fs/jffs2/erase.c7
-rw-r--r--fs/nfs/nfs4proc.c9
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/nilfs2/cpfile.c6
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/loadavg.c18
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/xfs/linux-2.6/kmem.h2
-rw-r--r--fs/xfs/xfs_dfrag.c8
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--include/Kbuild1
-rw-r--r--include/asm-generic/pgtable.h21
-rw-r--r--include/asm-generic/vmlinux.lds.h2
-rw-r--r--include/drm/drmP.h24
-rw-r--r--include/drm/drm_crtc.h3
-rw-r--r--include/drm/drm_crtc_helper.h2
-rw-r--r--include/linux/acpi.h2
-rw-r--r--include/linux/amba/serial.h1
-rw-r--r--include/linux/auto_fs.h3
-rw-r--r--include/linux/blktrace_api.h45
-rw-r--r--include/linux/compat.h2
-rw-r--r--include/linux/cpumask.h15
-rw-r--r--include/linux/cred.h1
-rw-r--r--include/linux/dma-debug.h7
-rw-r--r--include/linux/dmar.h3
-rw-r--r--include/linux/ftrace.h17
-rw-r--r--include/linux/ftrace_event.h172
-rw-r--r--include/linux/futex.h6
-rw-r--r--include/linux/i7300_idle.h20
-rw-r--r--include/linux/ide.h2
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/input.h1
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/linux/irq.h58
-rw-r--r--include/linux/kmemtrace.h25
-rw-r--r--include/linux/mm.h9
-rw-r--r--include/linux/mmiotrace.h2
-rw-r--r--include/linux/module.h8
-rw-r--r--include/linux/mutex.h1
-rw-r--r--include/linux/net_dropmon.h1
-rw-r--r--include/linux/netfilter/nf_conntrack_tcp.h4
-rw-r--r--include/linux/parport.h4
-rw-r--r--include/linux/pci_ids.h2
-rw-r--r--include/linux/ptrace.h10
-rw-r--r--include/linux/rculist.h30
-rw-r--r--include/linux/rcutree.h9
-rw-r--r--include/linux/ring_buffer.h66
-rw-r--r--include/linux/sched.h53
-rw-r--r--include/linux/signal.h2
-rw-r--r--include/linux/slab_def.h2
-rw-r--r--include/linux/slub_def.h2
-rw-r--r--include/linux/spinlock_up.h1
-rw-r--r--include/linux/swap.h5
-rw-r--r--include/linux/swiotlb.h3
-rw-r--r--include/linux/thread_info.h3
-rw-r--r--include/linux/trace_seq.h92
-rw-r--r--include/linux/tracehook.h11
-rw-r--r--include/linux/tracepoint.h8
-rw-r--r--include/linux/wait.h2
-rw-r--r--include/scsi/scsi_transport_fc.h4
-rw-r--r--include/trace/block.h76
-rw-r--r--include/trace/define_trace.h75
-rw-r--r--include/trace/events/block.h498
-rw-r--r--include/trace/events/irq.h145
-rw-r--r--include/trace/events/kmem.h231
-rw-r--r--include/trace/events/lockdep.h96
-rw-r--r--include/trace/events/sched.h (renamed from include/trace/sched_event_types.h)29
-rw-r--r--include/trace/events/skb.h40
-rw-r--r--include/trace/events/workqueue.h100
-rw-r--r--include/trace/ftrace.h591
-rw-r--r--include/trace/irq.h9
-rw-r--r--include/trace/irq_event_types.h55
-rw-r--r--include/trace/kmemtrace.h63
-rw-r--r--include/trace/lockdep.h9
-rw-r--r--include/trace/lockdep_event_types.h44
-rw-r--r--include/trace/sched.h9
-rw-r--r--include/trace/skb.h11
-rw-r--r--include/trace/trace_event_types.h5
-rw-r--r--include/trace/trace_events.h5
-rw-r--r--include/trace/workqueue.h25
-rw-r--r--include/xen/Kbuild1
-rw-r--r--include/xen/events.h3
-rw-r--r--include/xen/evtchn.h88
-rw-r--r--include/xen/interface/version.h3
-rw-r--r--include/xen/xenbus.h3
-rw-r--r--init/Kconfig2
-rw-r--r--init/main.c5
-rw-r--r--ipc/sem.c4
-rw-r--r--ipc/shm.c7
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/async.c13
-rw-r--r--kernel/compat.c11
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/fork.c21
-rw-r--r--kernel/futex.c1188
-rw-r--r--kernel/irq/Makefile2
-rw-r--r--kernel/irq/chip.c12
-rw-r--r--kernel/irq/handle.c63
-rw-r--r--kernel/irq/internals.h5
-rw-r--r--kernel/irq/manage.c17
-rw-r--r--kernel/irq/migration.c14
-rw-r--r--kernel/irq/numa_migrate.c38
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kmod.c4
-rw-r--r--kernel/kthread.c5
-rw-r--r--kernel/lockdep.c16
-rw-r--r--kernel/module.c26
-rw-r--r--kernel/mutex.c29
-rw-r--r--kernel/power/disk.c21
-rw-r--r--kernel/power/main.c7
-rw-r--r--kernel/ptrace.c12
-rw-r--r--kernel/rcupreempt.c8
-rw-r--r--kernel/rcutree.c25
-rw-r--r--kernel/rcutree_trace.c64
-rw-r--r--kernel/rtmutex.c248
-rw-r--r--kernel/rtmutex_common.h8
-rw-r--r--kernel/sched.c359
-rw-r--r--kernel/sched_cpupri.c2
-rw-r--r--kernel/sched_fair.c13
-rw-r--r--kernel/sched_idletask.c3
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/signal.c60
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/softirq.c9
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/timer.c86
-rw-r--r--kernel/trace/Kconfig143
-rw-r--r--kernel/trace/Makefile20
-rw-r--r--kernel/trace/blktrace.c272
-rw-r--r--kernel/trace/events.c14
-rw-r--r--kernel/trace/ftrace.c801
-rw-r--r--kernel/trace/kmemtrace.c10
-rw-r--r--kernel/trace/ring_buffer.c777
-rw-r--r--kernel/trace/ring_buffer_benchmark.c416
-rw-r--r--kernel/trace/trace.c383
-rw-r--r--kernel/trace/trace.h243
-rw-r--r--kernel/trace/trace_boot.c5
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_event_profile.c24
-rw-r--r--kernel/trace/trace_event_types.h12
-rw-r--r--kernel/trace/trace_events.c839
-rw-r--r--kernel/trace/trace_events_filter.c1204
-rw-r--r--kernel/trace/trace_events_stage_1.h39
-rw-r--r--kernel/trace/trace_events_stage_2.h176
-rw-r--r--kernel/trace/trace_events_stage_3.h281
-rw-r--r--kernel/trace/trace_export.c110
-rw-r--r--kernel/trace/trace_functions_graph.c31
-rw-r--r--kernel/trace/trace_hw_branches.c203
-rw-r--r--kernel/trace/trace_mmiotrace.c6
-rw-r--r--kernel/trace/trace_output.c240
-rw-r--r--kernel/trace/trace_output.h34
-rw-r--r--kernel/trace/trace_power.c8
-rw-r--r--kernel/trace/trace_printk.c6
-rw-r--r--kernel/trace/trace_sched_switch.c12
-rw-r--r--kernel/trace/trace_sched_wakeup.c8
-rw-r--r--kernel/trace/trace_selftest.c58
-rw-r--r--kernel/trace/trace_stack.c15
-rw-r--r--kernel/trace/trace_stat.c208
-rw-r--r--kernel/trace/trace_stat.h2
-rw-r--r--kernel/trace/trace_sysprof.c6
-rw-r--r--kernel/trace/trace_workqueue.c25
-rw-r--r--kernel/wait.c2
-rw-r--r--kernel/workqueue.c11
-rw-r--r--lib/cpumask.c12
-rw-r--r--lib/dma-debug.c432
-rw-r--r--lib/swiotlb.c119
-rw-r--r--lib/vsprintf.c56
-rw-r--r--mm/bounce.c5
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/hugetlb.c26
-rw-r--r--mm/memcontrol.c14
-rw-r--r--mm/mlock.c51
-rw-r--r--mm/oom_kill.c24
-rw-r--r--mm/page_alloc.c69
-rw-r--r--mm/percpu.c141
-rw-r--r--mm/rmap.c2
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c2
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/truncate.c1
-rw-r--r--mm/util.c11
-rw-r--r--mm/vmscan.c2
-rw-r--r--net/bluetooth/hci_sysfs.c6
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/net-traces.c4
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/ipv4/fib_trie.c6
-rw-r--r--net/ipv4/route.c60
-rw-r--r--net/ipv4/tcp_vegas.c11
-rw-r--r--net/ipv6/route.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c18
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/xt_hashlimit.c2
-rw-r--r--net/rxrpc/ar-connection.c12
-rw-r--r--net/sched/cls_api.c23
-rw-r--r--net/sched/cls_cgroup.c25
-rw-r--r--net/sunrpc/svcsock.c35
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c12
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c10
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/wireless/reg.c7
-rw-r--r--net/wireless/wext.c7
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile2
-rw-r--r--samples/trace_events/Makefile6
-rw-r--r--samples/trace_events/trace-events-sample.c52
-rw-r--r--samples/trace_events/trace-events-sample.h129
-rw-r--r--scripts/Makefile.lib28
-rw-r--r--scripts/bin_size10
-rwxr-xr-xscripts/kernel-doc22
-rwxr-xr-xscripts/recordmcount.pl6
-rw-r--r--security/integrity/ima/ima_fs.c4
-rw-r--r--security/smack/smackfs.c8
-rw-r--r--security/tomoyo/tomoyo.c6
-rw-r--r--sound/core/pcm_lib.c10
-rw-r--r--sound/core/pcm_native.c6
-rw-r--r--sound/drivers/pcsp/pcsp_mixer.c2
-rw-r--r--sound/pci/ac97/ac97_patch.c7
-rw-r--r--sound/pci/ca0106/ca0106_mixer.c2
-rw-r--r--sound/pci/hda/hda_intel.c1
-rw-r--r--sound/pci/hda/patch_conexant.c1
-rw-r--r--sound/pci/hda/patch_realtek.c7
-rw-r--r--sound/pci/hda/patch_sigmatel.c10
-rw-r--r--sound/usb/usbaudio.c2
-rw-r--r--sound/usb/usbaudio.h2
-rw-r--r--sound/usb/usbmidi.c12
-rw-r--r--sound/usb/usbquirks.h2
-rw-r--r--virt/kvm/kvm_main.c3
745 files changed, 30120 insertions, 9824 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable
new file mode 100644
index 000000000000..175bb4f70512
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-cache_disable
@@ -0,0 +1,18 @@
1What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
2Date: August 2008
3KernelVersion: 2.6.27
4Contact: mark.langsdorf@amd.com
5Description: These files exist in every cpu's cache index directories.
6 There are currently 2 cache_disable_# files in each
7 directory. Reading from these files on a supported
8 processor will return that cache disable index value
9 for that processor and node. Writing to one of these
10 files will cause the specificed cache index to be disabled.
11
12 Currently, only AMD Family 10h Processors support cache index
13 disable, and only for their L3 caches. See the BIOS and
14 Kernel Developer's Guide at
15 http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf
16 for formatting information and other details on the
17 cache index disable.
18Users: joachim.deguara@amd.com
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index d9aa43d78bcc..25fb8bcf32a2 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -704,12 +704,24 @@ this directory the following files can currently be found:
704 The current number of free dma_debug_entries 704 The current number of free dma_debug_entries
705 in the allocator. 705 in the allocator.
706 706
707 dma-api/driver-filter
708 You can write a name of a driver into this file
709 to limit the debug output to requests from that
710 particular driver. Write an empty string to
711 that file to disable the filter and see
712 all errors again.
713
707If you have this code compiled into your kernel it will be enabled by default. 714If you have this code compiled into your kernel it will be enabled by default.
708If you want to boot without the bookkeeping anyway you can provide 715If you want to boot without the bookkeeping anyway you can provide
709'dma_debug=off' as a boot parameter. This will disable DMA-API debugging. 716'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
710Notice that you can not enable it again at runtime. You have to reboot to do 717Notice that you can not enable it again at runtime. You have to reboot to do
711so. 718so.
712 719
720If you want to see debug messages only for a special device driver you can
721specify the dma_debug_driver=<drivername> parameter. This will enable the
722driver filter at boot time. The debug code will only print errors for that
723driver afterwards. This filter can be disabled or changed later using debugfs.
724
713When the code disables itself at runtime this is most likely because it ran 725When the code disables itself at runtime this is most likely because it ran
714out of dma_debug_entries. These entries are preallocated at boot. The number 726out of dma_debug_entries. These entries are preallocated at boot. The number
715of preallocated entries is defined per architecture. If it is too low for you 727of preallocated entries is defined per architecture. If it is too low for you
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index b1eb661e6302..9632444f6c62 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -13,7 +13,8 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \
13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ 13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ 14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
15 mac80211.xml debugobjects.xml sh.xml regulator.xml \ 15 mac80211.xml debugobjects.xml sh.xml regulator.xml \
16 alsa-driver-api.xml writing-an-alsa-driver.xml 16 alsa-driver-api.xml writing-an-alsa-driver.xml \
17 tracepoint.xml
17 18
18### 19###
19# The build process is as follows (targets): 20# The build process is as follows (targets):
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
new file mode 100644
index 000000000000..b0756d0fd579
--- /dev/null
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -0,0 +1,89 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="Tracepoints">
6 <bookinfo>
7 <title>The Linux Kernel Tracepoint API</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Jason</firstname>
12 <surname>Baron</surname>
13 <affiliation>
14 <address>
15 <email>jbaron@redhat.com</email>
16 </address>
17 </affiliation>
18 </author>
19 </authorgroup>
20
21 <legalnotice>
22 <para>
23 This documentation is free software; you can redistribute
24 it and/or modify it under the terms of the GNU General Public
25 License as published by the Free Software Foundation; either
26 version 2 of the License, or (at your option) any later
27 version.
28 </para>
29
30 <para>
31 This program is distributed in the hope that it will be
32 useful, but WITHOUT ANY WARRANTY; without even the implied
33 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
34 See the GNU General Public License for more details.
35 </para>
36
37 <para>
38 You should have received a copy of the GNU General Public
39 License along with this program; if not, write to the Free
40 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
41 MA 02111-1307 USA
42 </para>
43
44 <para>
45 For more details see the file COPYING in the source
46 distribution of Linux.
47 </para>
48 </legalnotice>
49 </bookinfo>
50
51 <toc></toc>
52 <chapter id="intro">
53 <title>Introduction</title>
54 <para>
55 Tracepoints are static probe points that are located in strategic points
56 throughout the kernel. 'Probes' register/unregister with tracepoints
57 via a callback mechanism. The 'probes' are strictly typed functions that
58 are passed a unique set of parameters defined by each tracepoint.
59 </para>
60
61 <para>
62 From this simple callback mechanism, 'probes' can be used to profile, debug,
63 and understand kernel behavior. There are a number of tools that provide a
64 framework for using 'probes'. These tools include Systemtap, ftrace, and
65 LTTng.
66 </para>
67
68 <para>
69 Tracepoints are defined in a number of header files via various macros. Thus,
70 the purpose of this document is to provide a clear accounting of the available
71 tracepoints. The intention is to understand not only what tracepoints are
72 available but also to understand where future tracepoints might be added.
73 </para>
74
75 <para>
76 The API presented has functions of the form:
77 <function>trace_tracepointname(function parameters)</function>. These are the
78 tracepoints callbacks that are found throughout the code. Registering and
79 unregistering probes with these callback sites is covered in the
80 <filename>Documentation/trace/*</filename> directory.
81 </para>
82 </chapter>
83
84 <chapter id="irq">
85 <title>IRQ</title>
86!Iinclude/trace/events/irq.h
87 </chapter>
88
89</book>
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 068848240a8b..02cced183b2d 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -192,23 +192,24 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy).
192The output of "cat rcu/rcudata" looks as follows: 192The output of "cat rcu/rcudata" looks as follows:
193 193
194rcu: 194rcu:
195 0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10 195rcu:
196 1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10 196 0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
197 2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10 197 1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
198 3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10 198 2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
199 4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10 199 3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10
200 5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10 200 4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10
201 6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10 201 5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10
202 7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10 202 6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10
203 7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10
203rcu_bh: 204rcu_bh:
204 0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10 205 0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10
205 1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10 206 1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10
206 2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10 207 2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
207 3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10 208 3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10
208 4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 209 4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
209 5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 210 5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
210 6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 211 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
211 7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 212 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
212 213
213The first section lists the rcu_data structures for rcu, the second for 214The first section lists the rcu_data structures for rcu, the second for
214rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. 215rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system.
@@ -253,12 +254,6 @@ o "pqc" indicates which grace period the last-observed quiescent
253o "qp" indicates that RCU still expects a quiescent state from 254o "qp" indicates that RCU still expects a quiescent state from
254 this CPU. 255 this CPU.
255 256
256o "rpfq" is the number of rcu_pending() calls on this CPU required
257 to induce this CPU to invoke force_quiescent_state().
258
259o "rp" is low-order four hex digits of the count of how many times
260 rcu_pending() has been invoked on this CPU.
261
262o "dt" is the current value of the dyntick counter that is incremented 257o "dt" is the current value of the dyntick counter that is incremented
263 when entering or leaving dynticks idle state, either by the 258 when entering or leaving dynticks idle state, either by the
264 scheduler or by irq. The number after the "/" is the interrupt 259 scheduler or by irq. The number after the "/" is the interrupt
@@ -305,6 +300,9 @@ o "b" is the batch limit for this CPU. If more than this number
305 of RCU callbacks is ready to invoke, then the remainder will 300 of RCU callbacks is ready to invoke, then the remainder will
306 be deferred. 301 be deferred.
307 302
303There is also an rcu/rcudata.csv file with the same information in
304comma-separated-variable spreadsheet format.
305
308 306
309The output of "cat rcu/rcugp" looks as follows: 307The output of "cat rcu/rcugp" looks as follows:
310 308
@@ -411,3 +409,63 @@ o Each element of the form "1/1 0:127 ^0" represents one struct
411 For example, the first entry at the lowest level shows 409 For example, the first entry at the lowest level shows
412 "^0", indicating that it corresponds to bit zero in 410 "^0", indicating that it corresponds to bit zero in
413 the first entry at the middle level. 411 the first entry at the middle level.
412
413
414The output of "cat rcu/rcu_pending" looks as follows:
415
416rcu:
417 0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741
418 1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792
419 2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629
420 3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723
421 4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110
422 5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456
423 6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834
424 7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888
425rcu_bh:
426 0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314
427 1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180
428 2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936
429 3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863
430 4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671
431 5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235
432 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921
433 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542
434
435As always, this is once again split into "rcu" and "rcu_bh" portions.
436The fields are as follows:
437
438o "np" is the number of times that __rcu_pending() has been invoked
439 for the corresponding flavor of RCU.
440
441o "qsp" is the number of times that the RCU was waiting for a
442 quiescent state from this CPU.
443
444o "cbr" is the number of times that this CPU had RCU callbacks
445 that had passed through a grace period, and were thus ready
446 to be invoked.
447
448o "cng" is the number of times that this CPU needed another
449 grace period while RCU was idle.
450
451o "gpc" is the number of times that an old grace period had
452 completed, but this CPU was not yet aware of it.
453
454o "gps" is the number of times that a new grace period had started,
455 but this CPU was not yet aware of it.
456
457o "nf" is the number of times that this CPU suspected that the
458 current grace period had run for too long, and thus needed to
459 be forced.
460
461 Please note that "forcing" consists of sending resched IPIs
462 to holdout CPUs. If that CPU really still is in an old RCU
463 read-side critical section, then we really do have to wait for it.
464 The assumption behing "forcing" is that the CPU is not still in
465 an old RCU read-side critical section, but has not yet responded
466 for some other reason.
467
468o "nn" is the number of times that this CPU needed nothing. Alert
469 readers will note that the rcu "nn" number for a given CPU very
470 closely matches the rcu_bh "np" number for that same CPU. This
471 is due to short-circuit evaluation in rcu_pending().
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index 222437efd75a..3015da0c6b2a 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -133,4 +133,4 @@ RAM/SWAP in 10240 inodes and it is only accessible by root.
133Author: 133Author:
134 Christoph Rohland <cr@sap.com>, 1.12.01 134 Christoph Rohland <cr@sap.com>, 1.12.01
135Updated: 135Updated:
136 Hugh Dickins <hugh@veritas.com>, 4 June 2007 136 Hugh Dickins, 4 June 2007
diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt
new file mode 100644
index 000000000000..9dc1ff4fd536
--- /dev/null
+++ b/Documentation/futex-requeue-pi.txt
@@ -0,0 +1,131 @@
1Futex Requeue PI
2----------------
3
4Requeueing of tasks from a non-PI futex to a PI futex requires
5special handling in order to ensure the underlying rt_mutex is never
6left without an owner if it has waiters; doing so would break the PI
7boosting logic [see rt-mutex-desgin.txt] For the purposes of
8brevity, this action will be referred to as "requeue_pi" throughout
9this document. Priority inheritance is abbreviated throughout as
10"PI".
11
12Motivation
13----------
14
15Without requeue_pi, the glibc implementation of
16pthread_cond_broadcast() must resort to waking all the tasks waiting
17on a pthread_condvar and letting them try to sort out which task
18gets to run first in classic thundering-herd formation. An ideal
19implementation would wake the highest-priority waiter, and leave the
20rest to the natural wakeup inherent in unlocking the mutex
21associated with the condvar.
22
23Consider the simplified glibc calls:
24
25/* caller must lock mutex */
26pthread_cond_wait(cond, mutex)
27{
28 lock(cond->__data.__lock);
29 unlock(mutex);
30 do {
31 unlock(cond->__data.__lock);
32 futex_wait(cond->__data.__futex);
33 lock(cond->__data.__lock);
34 } while(...)
35 unlock(cond->__data.__lock);
36 lock(mutex);
37}
38
39pthread_cond_broadcast(cond)
40{
41 lock(cond->__data.__lock);
42 unlock(cond->__data.__lock);
43 futex_requeue(cond->data.__futex, cond->mutex);
44}
45
46Once pthread_cond_broadcast() requeues the tasks, the cond->mutex
47has waiters. Note that pthread_cond_wait() attempts to lock the
48mutex only after it has returned to user space. This will leave the
49underlying rt_mutex with waiters, and no owner, breaking the
50previously mentioned PI-boosting algorithms.
51
52In order to support PI-aware pthread_condvar's, the kernel needs to
53be able to requeue tasks to PI futexes. This support implies that
54upon a successful futex_wait system call, the caller would return to
55user space already holding the PI futex. The glibc implementation
56would be modified as follows:
57
58
59/* caller must lock mutex */
60pthread_cond_wait_pi(cond, mutex)
61{
62 lock(cond->__data.__lock);
63 unlock(mutex);
64 do {
65 unlock(cond->__data.__lock);
66 futex_wait_requeue_pi(cond->__data.__futex);
67 lock(cond->__data.__lock);
68 } while(...)
69 unlock(cond->__data.__lock);
70 /* the kernel acquired the the mutex for us */
71}
72
73pthread_cond_broadcast_pi(cond)
74{
75 lock(cond->__data.__lock);
76 unlock(cond->__data.__lock);
77 futex_requeue_pi(cond->data.__futex, cond->mutex);
78}
79
80The actual glibc implementation will likely test for PI and make the
81necessary changes inside the existing calls rather than creating new
82calls for the PI cases. Similar changes are needed for
83pthread_cond_timedwait() and pthread_cond_signal().
84
85Implementation
86--------------
87
88In order to ensure the rt_mutex has an owner if it has waiters, it
89is necessary for both the requeue code, as well as the waiting code,
90to be able to acquire the rt_mutex before returning to user space.
91The requeue code cannot simply wake the waiter and leave it to
92acquire the rt_mutex as it would open a race window between the
93requeue call returning to user space and the waiter waking and
94starting to run. This is especially true in the uncontended case.
95
96The solution involves two new rt_mutex helper routines,
97rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
98allow the requeue code to acquire an uncontended rt_mutex on behalf
99of the waiter and to enqueue the waiter on a contended rt_mutex.
100Two new system calls provide the kernel<->user interface to
101requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI.
102
103FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
104and pthread_cond_timedwait()) to block on the initial futex and wait
105to be requeued to a PI-aware futex. The implementation is the
106result of a high-speed collision between futex_wait() and
107futex_lock_pi(), with some extra logic to check for the additional
108wake-up scenarios.
109
110FUTEX_REQUEUE_CMP_PI is called by the waker
111(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
112possibly wake the waiting tasks. Internally, this system call is
113still handled by futex_requeue (by passing requeue_pi=1). Before
114requeueing, futex_requeue() attempts to acquire the requeue target
115PI futex on behalf of the top waiter. If it can, this waiter is
116woken. futex_requeue() then proceeds to requeue the remaining
117nr_wake+nr_requeue tasks to the PI futex, calling
118rt_mutex_start_proxy_lock() prior to each requeue to prepare the
119task as a waiter on the underlying rt_mutex. It is possible that
120the lock can be acquired at this stage as well, if so, the next
121waiter is woken to finish the acquisition of the lock.
122
123FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
124their sum is all that really matters. futex_requeue() will wake or
125requeue up to nr_wake + nr_requeue tasks. It will wake only as many
126tasks as it can acquire the lock for, which in the majority of cases
127should be 0 as good programming practice dictates that the caller of
128either pthread_cond_broadcast() or pthread_cond_signal() acquire the
129mutex prior to making the call. FUTEX_REQUEUE_PI requires that
130nr_wake=1. nr_requeue should be INT_MAX for broadcast and 0 for
131signal.
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index 2f10ce6a879f..004ee161721e 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -150,6 +150,11 @@ fan[1-*]_min Fan minimum value
150 Unit: revolution/min (RPM) 150 Unit: revolution/min (RPM)
151 RW 151 RW
152 152
153fan[1-*]_max Fan maximum value
154 Unit: revolution/min (RPM)
155 Only rarely supported by the hardware.
156 RW
157
153fan[1-*]_input Fan input value. 158fan[1-*]_input Fan input value.
154 Unit: revolution/min (RPM) 159 Unit: revolution/min (RPM)
155 RO 160 RO
@@ -390,6 +395,7 @@ OR
390in[0-*]_min_alarm 395in[0-*]_min_alarm
391in[0-*]_max_alarm 396in[0-*]_max_alarm
392fan[1-*]_min_alarm 397fan[1-*]_min_alarm
398fan[1-*]_max_alarm
393temp[1-*]_min_alarm 399temp[1-*]_min_alarm
394temp[1-*]_max_alarm 400temp[1-*]_max_alarm
395temp[1-*]_crit_alarm 401temp[1-*]_crit_alarm
diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt
index 9f09557aea39..a12ea3b586e6 100644
--- a/Documentation/input/multi-touch-protocol.txt
+++ b/Documentation/input/multi-touch-protocol.txt
@@ -18,8 +18,12 @@ Usage
18Anonymous finger details are sent sequentially as separate packets of ABS 18Anonymous finger details are sent sequentially as separate packets of ABS
19events. Only the ABS_MT events are recognized as part of a finger 19events. Only the ABS_MT events are recognized as part of a finger
20packet. The end of a packet is marked by calling the input_mt_sync() 20packet. The end of a packet is marked by calling the input_mt_sync()
21function, which generates a SYN_MT_REPORT event. The end of multi-touch 21function, which generates a SYN_MT_REPORT event. This instructs the
22transfer is marked by calling the usual input_sync() function. 22receiver to accept the data for the current finger and prepare to receive
23another. The end of a multi-touch transfer is marked by calling the usual
24input_sync() function. This instructs the receiver to act upon events
25accumulated since last EV_SYN/SYN_REPORT and prepare to receive a new
26set of events/packets.
23 27
24A set of ABS_MT events with the desired properties is defined. The events 28A set of ABS_MT events with the desired properties is defined. The events
25are divided into categories, to allow for partial implementation. The 29are divided into categories, to allow for partial implementation. The
@@ -27,11 +31,26 @@ minimum set consists of ABS_MT_TOUCH_MAJOR, ABS_MT_POSITION_X and
27ABS_MT_POSITION_Y, which allows for multiple fingers to be tracked. If the 31ABS_MT_POSITION_Y, which allows for multiple fingers to be tracked. If the
28device supports it, the ABS_MT_WIDTH_MAJOR may be used to provide the size 32device supports it, the ABS_MT_WIDTH_MAJOR may be used to provide the size
29of the approaching finger. Anisotropy and direction may be specified with 33of the approaching finger. Anisotropy and direction may be specified with
30ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION. Devices with 34ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION. The
31more granular information may specify general shapes as blobs, i.e., as a 35ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a
32sequence of rectangular shapes grouped together by an 36finger or a pen or something else. Devices with more granular information
33ABS_MT_BLOB_ID. Finally, the ABS_MT_TOOL_TYPE may be used to specify 37may specify general shapes as blobs, i.e., as a sequence of rectangular
34whether the touching tool is a finger or a pen or something else. 38shapes grouped together by an ABS_MT_BLOB_ID. Finally, for the few devices
39that currently support it, the ABS_MT_TRACKING_ID event may be used to
40report finger tracking from hardware [5].
41
42Here is what a minimal event sequence for a two-finger touch would look
43like:
44
45 ABS_MT_TOUCH_MAJOR
46 ABS_MT_POSITION_X
47 ABS_MT_POSITION_Y
48 SYN_MT_REPORT
49 ABS_MT_TOUCH_MAJOR
50 ABS_MT_POSITION_X
51 ABS_MT_POSITION_Y
52 SYN_MT_REPORT
53 SYN_REPORT
35 54
36 55
37Event Semantics 56Event Semantics
@@ -44,24 +63,24 @@ ABS_MT_TOUCH_MAJOR
44 63
45The length of the major axis of the contact. The length should be given in 64The length of the major axis of the contact. The length should be given in
46surface units. If the surface has an X times Y resolution, the largest 65surface units. If the surface has an X times Y resolution, the largest
47possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal. 66possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal [4].
48 67
49ABS_MT_TOUCH_MINOR 68ABS_MT_TOUCH_MINOR
50 69
51The length, in surface units, of the minor axis of the contact. If the 70The length, in surface units, of the minor axis of the contact. If the
52contact is circular, this event can be omitted. 71contact is circular, this event can be omitted [4].
53 72
54ABS_MT_WIDTH_MAJOR 73ABS_MT_WIDTH_MAJOR
55 74
56The length, in surface units, of the major axis of the approaching 75The length, in surface units, of the major axis of the approaching
57tool. This should be understood as the size of the tool itself. The 76tool. This should be understood as the size of the tool itself. The
58orientation of the contact and the approaching tool are assumed to be the 77orientation of the contact and the approaching tool are assumed to be the
59same. 78same [4].
60 79
61ABS_MT_WIDTH_MINOR 80ABS_MT_WIDTH_MINOR
62 81
63The length, in surface units, of the minor axis of the approaching 82The length, in surface units, of the minor axis of the approaching
64tool. Omit if circular. 83tool. Omit if circular [4].
65 84
66The above four values can be used to derive additional information about 85The above four values can be used to derive additional information about
67the contact. The ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR approximates 86the contact. The ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR approximates
@@ -70,14 +89,17 @@ different characteristic widths [1].
70 89
71ABS_MT_ORIENTATION 90ABS_MT_ORIENTATION
72 91
73The orientation of the ellipse. The value should describe half a revolution 92The orientation of the ellipse. The value should describe a signed quarter
74clockwise around the touch center. The scale of the value is arbitrary, but 93of a revolution clockwise around the touch center. The signed value range
75zero should be returned for an ellipse aligned along the Y axis of the 94is arbitrary, but zero should be returned for a finger aligned along the Y
76surface. As an example, an index finger placed straight onto the axis could 95axis of the surface, a negative value when finger is turned to the left, and
77return zero orientation, something negative when twisted to the left, and 96a positive value when finger turned to the right. When completely aligned with
78something positive when twisted to the right. This value can be omitted if 97the X axis, the range max should be returned. Orientation can be omitted
79the touching object is circular, or if the information is not available in 98if the touching object is circular, or if the information is not available
80the kernel driver. 99in the kernel driver. Partial orientation support is possible if the device
100can distinguish between the two axis, but not (uniquely) any values in
101between. In such cases, the range of ABS_MT_ORIENTATION should be [0, 1]
102[4].
81 103
82ABS_MT_POSITION_X 104ABS_MT_POSITION_X
83 105
@@ -98,8 +120,35 @@ ABS_MT_BLOB_ID
98 120
99The BLOB_ID groups several packets together into one arbitrarily shaped 121The BLOB_ID groups several packets together into one arbitrarily shaped
100contact. This is a low-level anonymous grouping, and should not be confused 122contact. This is a low-level anonymous grouping, and should not be confused
101with the high-level contactID, explained below. Most kernel drivers will 123with the high-level trackingID [5]. Most kernel drivers will not have blob
102not have this capability, and can safely omit the event. 124capability, and can safely omit the event.
125
126ABS_MT_TRACKING_ID
127
128The TRACKING_ID identifies an initiated contact throughout its life cycle
129[5]. There are currently only a few devices that support it, so this event
130should normally be omitted.
131
132
133Event Computation
134-----------------
135
136The flora of different hardware unavoidably leads to some devices fitting
137better to the MT protocol than others. To simplify and unify the mapping,
138this section gives recipes for how to compute certain events.
139
140For devices reporting contacts as rectangular shapes, signed orientation
141cannot be obtained. Assuming X and Y are the lengths of the sides of the
142touching rectangle, here is a simple formula that retains the most
143information possible:
144
145 ABS_MT_TOUCH_MAJOR := max(X, Y)
146 ABS_MT_TOUCH_MINOR := min(X, Y)
147 ABS_MT_ORIENTATION := bool(X > Y)
148
149The range of ABS_MT_ORIENTATION should be set to [0, 1], to indicate that
150the device can distinguish between a finger along the Y axis (0) and a
151finger along the X axis (1).
103 152
104 153
105Finger Tracking 154Finger Tracking
@@ -109,14 +158,18 @@ The kernel driver should generate an arbitrary enumeration of the set of
109anonymous contacts currently on the surface. The order in which the packets 158anonymous contacts currently on the surface. The order in which the packets
110appear in the event stream is not important. 159appear in the event stream is not important.
111 160
112The process of finger tracking, i.e., to assign a unique contactID to each 161The process of finger tracking, i.e., to assign a unique trackingID to each
113initiated contact on the surface, is left to user space; preferably the 162initiated contact on the surface, is left to user space; preferably the
114multi-touch X driver [3]. In that driver, the contactID stays the same and 163multi-touch X driver [3]. In that driver, the trackingID stays the same and
115unique until the contact vanishes (when the finger leaves the surface). The 164unique until the contact vanishes (when the finger leaves the surface). The
116problem of assigning a set of anonymous fingers to a set of identified 165problem of assigning a set of anonymous fingers to a set of identified
117fingers is a euclidian bipartite matching problem at each event update, and 166fingers is a euclidian bipartite matching problem at each event update, and
118relies on a sufficiently rapid update rate. 167relies on a sufficiently rapid update rate.
119 168
169There are a few devices that support trackingID in hardware. User space can
170make use of these native identifiers to reduce bandwidth and cpu usage.
171
172
120Notes 173Notes
121----- 174-----
122 175
@@ -136,5 +189,7 @@ could be used to derive tilt.
136time of writing (April 2009), the MT protocol is not yet merged, and the 189time of writing (April 2009), the MT protocol is not yet merged, and the
137prototype implements finger matching, basic mouse support and two-finger 190prototype implements finger matching, basic mouse support and two-finger
138scrolling. The project aims at improving the quality of current multi-touch 191scrolling. The project aims at improving the quality of current multi-touch
139functionality available in the synaptics X driver, and in addition 192functionality available in the Synaptics X driver, and in addition
140implement more advanced gestures. 193implement more advanced gestures.
194[4] See the section on event computation.
195[5] See the section on finger tracking.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e87bdbfbcc75..4a3c2209a124 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -56,7 +56,6 @@ parameter is applicable:
56 ISAPNP ISA PnP code is enabled. 56 ISAPNP ISA PnP code is enabled.
57 ISDN Appropriate ISDN support is enabled. 57 ISDN Appropriate ISDN support is enabled.
58 JOY Appropriate joystick support is enabled. 58 JOY Appropriate joystick support is enabled.
59 KMEMTRACE kmemtrace is enabled.
60 LIBATA Libata driver is enabled 59 LIBATA Libata driver is enabled
61 LP Printer support is enabled. 60 LP Printer support is enabled.
62 LOOP Loopback device support is enabled. 61 LOOP Loopback device support is enabled.
@@ -329,11 +328,6 @@ and is between 256 and 4096 characters. It is defined in the file
329 flushed before they will be reused, which 328 flushed before they will be reused, which
330 is a lot of faster 329 is a lot of faster
331 330
332 amd_iommu_size= [HW,X86-64]
333 Define the size of the aperture for the AMD IOMMU
334 driver. Possible values are:
335 '32M', '64M' (default), '128M', '256M', '512M', '1G'
336
337 amijoy.map= [HW,JOY] Amiga joystick support 331 amijoy.map= [HW,JOY] Amiga joystick support
338 Map of devices attached to JOY0DAT and JOY1DAT 332 Map of devices attached to JOY0DAT and JOY1DAT
339 Format: <a>,<b> 333 Format: <a>,<b>
@@ -646,6 +640,13 @@ and is between 256 and 4096 characters. It is defined in the file
646 DMA-API debugging code disables itself because the 640 DMA-API debugging code disables itself because the
647 architectural default is too low. 641 architectural default is too low.
648 642
643 dma_debug_driver=<driver_name>
644 With this option the DMA-API debugging driver
645 filter feature can be enabled at boot time. Just
646 pass the driver to filter for as the parameter.
647 The filter can be disabled or changed to another
648 driver later using sysfs.
649
649 dscc4.setup= [NET] 650 dscc4.setup= [NET]
650 651
651 dtc3181e= [HW,SCSI] 652 dtc3181e= [HW,SCSI]
@@ -752,12 +753,25 @@ and is between 256 and 4096 characters. It is defined in the file
752 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. 753 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
753 754
754 ftrace=[tracer] 755 ftrace=[tracer]
755 [ftrace] will set and start the specified tracer 756 [FTRACE] will set and start the specified tracer
756 as early as possible in order to facilitate early 757 as early as possible in order to facilitate early
757 boot debugging. 758 boot debugging.
758 759
759 ftrace_dump_on_oops 760 ftrace_dump_on_oops
760 [ftrace] will dump the trace buffers on oops. 761 [FTRACE] will dump the trace buffers on oops.
762
763 ftrace_filter=[function-list]
764 [FTRACE] Limit the functions traced by the function
765 tracer at boot up. function-list is a comma separated
766 list of functions. This list can be changed at run
767 time by the set_ftrace_filter file in the debugfs
768 tracing directory.
769
770 ftrace_notrace=[function-list]
771 [FTRACE] Do not trace the functions specified in
772 function-list. This list can be changed at run time
773 by the set_ftrace_notrace file in the debugfs
774 tracing directory.
761 775
762 gamecon.map[2|3]= 776 gamecon.map[2|3]=
763 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad 777 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
@@ -1054,15 +1068,6 @@ and is between 256 and 4096 characters. It is defined in the file
1054 use the HighMem zone if it exists, and the Normal 1068 use the HighMem zone if it exists, and the Normal
1055 zone if it does not. 1069 zone if it does not.
1056 1070
1057 kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no }
1058 Controls whether kmemtrace is enabled
1059 at boot-time.
1060
1061 kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of
1062 subbufs kmemtrace's relay channel has. Set this
1063 higher than default (KMEMTRACE_N_SUBBUFS in code) if
1064 you experience buffer overruns.
1065
1066 kgdboc= [HW] kgdb over consoles. 1071 kgdboc= [HW] kgdb over consoles.
1067 Requires a tty driver that supports console polling. 1072 Requires a tty driver that supports console polling.
1068 (only serial suported for now) 1073 (only serial suported for now)
@@ -1535,6 +1540,10 @@ and is between 256 and 4096 characters. It is defined in the file
1535 register save and restore. The kernel will only save 1540 register save and restore. The kernel will only save
1536 legacy floating-point registers on task switch. 1541 legacy floating-point registers on task switch.
1537 1542
1543 noxsave [BUGS=X86] Disables x86 extended register state save
1544 and restore using xsave. The kernel will fallback to
1545 enabling legacy floating-point and sse state.
1546
1538 nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or 1547 nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
1539 wfi(ARM) instruction doesn't work correctly and not to 1548 wfi(ARM) instruction doesn't work correctly and not to
1540 use it. This is also useful when using JTAG debugger. 1549 use it. This is also useful when using JTAG debugger.
@@ -1571,6 +1580,9 @@ and is between 256 and 4096 characters. It is defined in the file
1571 noinitrd [RAM] Tells the kernel not to load any configured 1580 noinitrd [RAM] Tells the kernel not to load any configured
1572 initial RAM disk. 1581 initial RAM disk.
1573 1582
1583 nointremap [X86-64, Intel-IOMMU] Do not enable interrupt
1584 remapping.
1585
1574 nointroute [IA-64] 1586 nointroute [IA-64]
1575 1587
1576 nojitter [IA64] Disables jitter checking for ITC timers. 1588 nojitter [IA64] Disables jitter checking for ITC timers.
@@ -1656,6 +1668,14 @@ and is between 256 and 4096 characters. It is defined in the file
1656 oprofile.timer= [HW] 1668 oprofile.timer= [HW]
1657 Use timer interrupt instead of performance counters 1669 Use timer interrupt instead of performance counters
1658 1670
1671 oprofile.cpu_type= Force an oprofile cpu type
1672 This might be useful if you have an older oprofile
1673 userland or if you want common events.
1674 Format: { archperfmon }
1675 archperfmon: [X86] Force use of architectural
1676 perfmon on Intel CPUs instead of the
1677 CPU specific event set.
1678
1659 osst= [HW,SCSI] SCSI Tape Driver 1679 osst= [HW,SCSI] SCSI Tape Driver
1660 Format: <buffer_size>,<write_threshold> 1680 Format: <buffer_size>,<write_threshold>
1661 See also Documentation/scsi/st.txt. 1681 See also Documentation/scsi/st.txt.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f5b7127f54ac..7f5809eddee6 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -31,6 +31,7 @@ Contents:
31 31
32 - Locking functions. 32 - Locking functions.
33 - Interrupt disabling functions. 33 - Interrupt disabling functions.
34 - Sleep and wake-up functions.
34 - Miscellaneous functions. 35 - Miscellaneous functions.
35 36
36 (*) Inter-CPU locking barrier effects. 37 (*) Inter-CPU locking barrier effects.
@@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some
1217other means. 1218other means.
1218 1219
1219 1220
1221SLEEP AND WAKE-UP FUNCTIONS
1222---------------------------
1223
1224Sleeping and waking on an event flagged in global data can be viewed as an
1225interaction between two pieces of data: the task state of the task waiting for
1226the event and the global data used to indicate the event. To make sure that
1227these appear to happen in the right order, the primitives to begin the process
1228of going to sleep, and the primitives to initiate a wake up imply certain
1229barriers.
1230
1231Firstly, the sleeper normally follows something like this sequence of events:
1232
1233 for (;;) {
1234 set_current_state(TASK_UNINTERRUPTIBLE);
1235 if (event_indicated)
1236 break;
1237 schedule();
1238 }
1239
1240A general memory barrier is interpolated automatically by set_current_state()
1241after it has altered the task state:
1242
1243 CPU 1
1244 ===============================
1245 set_current_state();
1246 set_mb();
1247 STORE current->state
1248 <general barrier>
1249 LOAD event_indicated
1250
1251set_current_state() may be wrapped by:
1252
1253 prepare_to_wait();
1254 prepare_to_wait_exclusive();
1255
1256which therefore also imply a general memory barrier after setting the state.
1257The whole sequence above is available in various canned forms, all of which
1258interpolate the memory barrier in the right place:
1259
1260 wait_event();
1261 wait_event_interruptible();
1262 wait_event_interruptible_exclusive();
1263 wait_event_interruptible_timeout();
1264 wait_event_killable();
1265 wait_event_timeout();
1266 wait_on_bit();
1267 wait_on_bit_lock();
1268
1269
1270Secondly, code that performs a wake up normally follows something like this:
1271
1272 event_indicated = 1;
1273 wake_up(&event_wait_queue);
1274
1275or:
1276
1277 event_indicated = 1;
1278 wake_up_process(event_daemon);
1279
1280A write memory barrier is implied by wake_up() and co. if and only if they wake
1281something up. The barrier occurs before the task state is cleared, and so sits
1282between the STORE to indicate the event and the STORE to set TASK_RUNNING:
1283
1284 CPU 1 CPU 2
1285 =============================== ===============================
1286 set_current_state(); STORE event_indicated
1287 set_mb(); wake_up();
1288 STORE current->state <write barrier>
1289 <general barrier> STORE current->state
1290 LOAD event_indicated
1291
1292The available waker functions include:
1293
1294 complete();
1295 wake_up();
1296 wake_up_all();
1297 wake_up_bit();
1298 wake_up_interruptible();
1299 wake_up_interruptible_all();
1300 wake_up_interruptible_nr();
1301 wake_up_interruptible_poll();
1302 wake_up_interruptible_sync();
1303 wake_up_interruptible_sync_poll();
1304 wake_up_locked();
1305 wake_up_locked_poll();
1306 wake_up_nr();
1307 wake_up_poll();
1308 wake_up_process();
1309
1310
1311[!] Note that the memory barriers implied by the sleeper and the waker do _not_
1312order multiple stores before the wake-up with respect to loads of those stored
1313values after the sleeper has called set_current_state(). For instance, if the
1314sleeper does:
1315
1316 set_current_state(TASK_INTERRUPTIBLE);
1317 if (event_indicated)
1318 break;
1319 __set_current_state(TASK_RUNNING);
1320 do_something(my_data);
1321
1322and the waker does:
1323
1324 my_data = value;
1325 event_indicated = 1;
1326 wake_up(&event_wait_queue);
1327
1328there's no guarantee that the change to event_indicated will be perceived by
1329the sleeper as coming after the change to my_data. In such a circumstance, the
1330code on both sides must interpolate its own memory barriers between the
1331separate data accesses. Thus the above sleeper ought to do:
1332
1333 set_current_state(TASK_INTERRUPTIBLE);
1334 if (event_indicated) {
1335 smp_rmb();
1336 do_something(my_data);
1337 }
1338
1339and the waker should do:
1340
1341 my_data = value;
1342 smp_wmb();
1343 event_indicated = 1;
1344 wake_up(&event_wait_queue);
1345
1346
1220MISCELLANEOUS FUNCTIONS 1347MISCELLANEOUS FUNCTIONS
1221----------------------- 1348-----------------------
1222 1349
@@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED?
1366 1493
1367Under normal operation, memory operation reordering is generally not going to 1494Under normal operation, memory operation reordering is generally not going to
1368be a problem as a single-threaded linear piece of code will still appear to 1495be a problem as a single-threaded linear piece of code will still appear to
1369work correctly, even if it's in an SMP kernel. There are, however, three 1496work correctly, even if it's in an SMP kernel. There are, however, four
1370circumstances in which reordering definitely _could_ be a problem: 1497circumstances in which reordering definitely _could_ be a problem:
1371 1498
1372 (*) Interprocessor interaction. 1499 (*) Interprocessor interaction.
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index 5ba4d3fc625a..1df7f9cdab05 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -4,6 +4,7 @@
4CONTENTS 4CONTENTS
5======== 5========
6 6
70. WARNING
71. Overview 81. Overview
8 1.1 The problem 9 1.1 The problem
9 1.2 The solution 10 1.2 The solution
@@ -14,6 +15,23 @@ CONTENTS
143. Future plans 153. Future plans
15 16
16 17
180. WARNING
19==========
20
21 Fiddling with these settings can result in an unstable system, the knobs are
22 root only and assumes root knows what he is doing.
23
24Most notable:
25
26 * very small values in sched_rt_period_us can result in an unstable
27 system when the period is smaller than either the available hrtimer
28 resolution, or the time it takes to handle the budget refresh itself.
29
30 * very small values in sched_rt_runtime_us can result in an unstable
31 system when the runtime is so small the system has difficulty making
32 forward progress (NOTE: the migration thread and kstopmachine both
33 are real-time processes).
34
171. Overview 351. Overview
18=========== 36===========
19 37
@@ -169,7 +187,7 @@ get their allocated time.
169 187
170Implementing SCHED_EDF might take a while to complete. Priority Inheritance is 188Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
171the biggest challenge as the current linux PI infrastructure is geared towards 189the biggest challenge as the current linux PI infrastructure is geared towards
172the limited static priority levels 0-139. With deadline scheduling you need to 190the limited static priority levels 0-99. With deadline scheduling you need to
173do deadline inheritance (since priority is inversely proportional to the 191do deadline inheritance (since priority is inversely proportional to the
174deadline delta (deadline - now). 192deadline delta (deadline - now).
175 193
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 8eec05bc079e..322869fc8a9e 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -334,6 +334,7 @@ STAC9227/9228/9229/927x
334 ref-no-jd Reference board without HP/Mic jack detection 334 ref-no-jd Reference board without HP/Mic jack detection
335 3stack D965 3stack 335 3stack D965 3stack
336 5stack D965 5stack + SPDIF 336 5stack D965 5stack + SPDIF
337 5stack-no-fp D965 5stack without front panel
337 dell-3stack Dell Dimension E520 338 dell-3stack Dell Dimension E520
338 dell-bios Fixes with Dell BIOS setup 339 dell-bios Fixes with Dell BIOS setup
339 auto BIOS setup (default) 340 auto BIOS setup (default)
diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt
index bba2dbb79d81..cfac20cf9e33 100644
--- a/Documentation/sound/alsa/Procfile.txt
+++ b/Documentation/sound/alsa/Procfile.txt
@@ -104,6 +104,11 @@ card*/pcm*/xrun_debug
104 When this value is greater than 1, the driver will show the 104 When this value is greater than 1, the driver will show the
105 stack trace additionally. This may help the debugging. 105 stack trace additionally. This may help the debugging.
106 106
107 Since 2.6.30, this option also enables the hwptr check using
108 jiffies. This detects spontaneous invalid pointer callback
109 values, but can be lead to too much corrections for a (mostly
110 buggy) hardware that doesn't give smooth pointer updates.
111
107card*/pcm*/sub*/info 112card*/pcm*/sub*/info
108 The general information of this PCM sub-stream. 113 The general information of this PCM sub-stream.
109 114
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
new file mode 100644
index 000000000000..f157d7594ea7
--- /dev/null
+++ b/Documentation/trace/events.txt
@@ -0,0 +1,90 @@
1 Event Tracing
2
3 Documentation written by Theodore Ts'o
4 Updated by Li Zefan
5
61. Introduction
7===============
8
9Tracepoints (see Documentation/trace/tracepoints.txt) can be used
10without creating custom kernel modules to register probe functions
11using the event tracing infrastructure.
12
13Not all tracepoints can be traced using the event tracing system;
14the kernel developer must provide code snippets which define how the
15tracing information is saved into the tracing buffer, and how the
16tracing information should be printed.
17
182. Using Event Tracing
19======================
20
212.1 Via the 'set_event' interface
22---------------------------------
23
24The events which are available for tracing can be found in the file
25/debug/tracing/available_events.
26
27To enable a particular event, such as 'sched_wakeup', simply echo it
28to /debug/tracing/set_event. For example:
29
30 # echo sched_wakeup >> /debug/tracing/set_event
31
32[ Note: '>>' is necessary, otherwise it will firstly disable
33 all the events. ]
34
35To disable an event, echo the event name to the set_event file prefixed
36with an exclamation point:
37
38 # echo '!sched_wakeup' >> /debug/tracing/set_event
39
40To disable all events, echo an empty line to the set_event file:
41
42 # echo > /debug/tracing/set_event
43
44To enable all events, echo '*:*' or '*:' to the set_event file:
45
46 # echo *:* > /debug/tracing/set_event
47
48The events are organized into subsystems, such as ext4, irq, sched,
49etc., and a full event name looks like this: <subsystem>:<event>. The
50subsystem name is optional, but it is displayed in the available_events
51file. All of the events in a subsystem can be specified via the syntax
52"<subsystem>:*"; for example, to enable all irq events, you can use the
53command:
54
55 # echo 'irq:*' > /debug/tracing/set_event
56
572.2 Via the 'enable' toggle
58---------------------------
59
60The events available are also listed in /debug/tracing/events/ hierarchy
61of directories.
62
63To enable event 'sched_wakeup':
64
65 # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable
66
67To disable it:
68
69 # echo 0 > /debug/tracing/events/sched/sched_wakeup/enable
70
71To enable all events in sched subsystem:
72
73 # echo 1 > /debug/tracing/events/sched/enable
74
75To eanble all events:
76
77 # echo 1 > /debug/tracing/events/enable
78
79When reading one of these enable files, there are four results:
80
81 0 - all events this file affects are disabled
82 1 - all events this file affects are enabled
83 X - there is a mixture of events enabled and disabled
84 ? - this file does not affect any event
85
863. Defining an event-enabled tracepoint
87=======================================
88
89See The example provided in samples/trace_events
90
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index fd9a3e693813..2a82d8602944 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -179,7 +179,7 @@ Here is the list of current tracers that may be configured.
179 179
180 Function call tracer to trace all kernel functions. 180 Function call tracer to trace all kernel functions.
181 181
182 "function_graph_tracer" 182 "function_graph"
183 183
184 Similar to the function tracer except that the 184 Similar to the function tracer except that the
185 function tracer probes the functions on their entry 185 function tracer probes the functions on their entry
@@ -518,9 +518,18 @@ priority with zero (0) being the highest priority and the nice
518values starting at 100 (nice -20). Below is a quick chart to map 518values starting at 100 (nice -20). Below is a quick chart to map
519the kernel priority to user land priorities. 519the kernel priority to user land priorities.
520 520
521 Kernel priority: 0 to 99 ==> user RT priority 99 to 0 521 Kernel Space User Space
522 Kernel priority: 100 to 139 ==> user nice -20 to 19 522 ===============================================================
523 Kernel priority: 140 ==> idle task priority 523 0(high) to 98(low) user RT priority 99(high) to 1(low)
524 with SCHED_RR or SCHED_FIFO
525 ---------------------------------------------------------------
526 99 sched_priority is not used in scheduling
527 decisions(it must be specified as 0)
528 ---------------------------------------------------------------
529 100(high) to 139(low) user nice -20(high) to 19(low)
530 ---------------------------------------------------------------
531 140 idle task priority
532 ---------------------------------------------------------------
524 533
525The task states are: 534The task states are:
526 535
diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
new file mode 100644
index 000000000000..cd805e16dc27
--- /dev/null
+++ b/Documentation/trace/power.txt
@@ -0,0 +1,17 @@
1The power tracer collects detailed information about C-state and P-state
2transitions, instead of just looking at the high-level "average"
3information.
4
5There is a helper script found in scrips/tracing/power.pl in the kernel
6sources which can be used to parse this information and create a
7Scalable Vector Graphics (SVG) picture from the trace data.
8
9To use this tracer:
10
11 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
12 echo power > /sys/kernel/debug/tracing/current_tracer
13 echo 1 > /sys/kernel/debug/tracing/tracing_enabled
14 sleep 1
15 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
16 cat /sys/kernel/debug/tracing/trace | \
17 perl scripts/tracing/power.pl > out.sv
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index e0203662f9e9..8da3a795083f 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -50,6 +50,10 @@ Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format
50Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical 50Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical
51 pointer to single linked list of struct setup_data. 51 pointer to single linked list of struct setup_data.
52 52
53Protocol 2.10: (Kernel 2.6.31) Added a protocol for relaxed alignment
54 beyond the kernel_alignment added, new init_size and
55 pref_address fields. Added extended boot loader IDs.
56
53**** MEMORY LAYOUT 57**** MEMORY LAYOUT
54 58
55The traditional memory map for the kernel loader, used for Image or 59The traditional memory map for the kernel loader, used for Image or
@@ -168,12 +172,13 @@ Offset Proto Name Meaning
168021C/4 2.00+ ramdisk_size initrd size (set by boot loader) 172021C/4 2.00+ ramdisk_size initrd size (set by boot loader)
1690220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only 1730220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only
1700224/2 2.01+ heap_end_ptr Free memory after setup end 1740224/2 2.01+ heap_end_ptr Free memory after setup end
1710226/2 N/A pad1 Unused 1750226/1 2.02+(3 ext_loader_ver Extended boot loader version
1760227/1 2.02+(3 ext_loader_type Extended boot loader ID
1720228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line 1770228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line
173022C/4 2.03+ ramdisk_max Highest legal initrd address 178022C/4 2.03+ ramdisk_max Highest legal initrd address
1740230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 1790230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
1750234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not 1800234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
1760235/1 N/A pad2 Unused 1810235/1 2.10+ min_alignment Minimum alignment, as a power of two
1770236/2 N/A pad3 Unused 1820236/2 N/A pad3 Unused
1780238/4 2.06+ cmdline_size Maximum size of the kernel command line 1830238/4 2.06+ cmdline_size Maximum size of the kernel command line
179023C/4 2.07+ hardware_subarch Hardware subarchitecture 184023C/4 2.07+ hardware_subarch Hardware subarchitecture
@@ -182,6 +187,8 @@ Offset Proto Name Meaning
182024C/4 2.08+ payload_length Length of kernel payload 187024C/4 2.08+ payload_length Length of kernel payload
1830250/8 2.09+ setup_data 64-bit physical pointer to linked list 1880250/8 2.09+ setup_data 64-bit physical pointer to linked list
184 of struct setup_data 189 of struct setup_data
1900258/8 2.10+ pref_address Preferred loading address
1910260/4 2.10+ init_size Linear memory required during initialization
185 192
186(1) For backwards compatibility, if the setup_sects field contains 0, the 193(1) For backwards compatibility, if the setup_sects field contains 0, the
187 real value is 4. 194 real value is 4.
@@ -190,6 +197,8 @@ Offset Proto Name Meaning
190 field are unusable, which means the size of a bzImage kernel 197 field are unusable, which means the size of a bzImage kernel
191 cannot be determined. 198 cannot be determined.
192 199
200(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
201
193If the "HdrS" (0x53726448) magic number is not found at offset 0x202, 202If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
194the boot protocol version is "old". Loading an old kernel, the 203the boot protocol version is "old". Loading an old kernel, the
195following parameters should be assumed: 204following parameters should be assumed:
@@ -343,18 +352,32 @@ Protocol: 2.00+
343 0xTV here, where T is an identifier for the boot loader and V is 352 0xTV here, where T is an identifier for the boot loader and V is
344 a version number. Otherwise, enter 0xFF here. 353 a version number. Otherwise, enter 0xFF here.
345 354
355 For boot loader IDs above T = 0xD, write T = 0xE to this field and
356 write the extended ID minus 0x10 to the ext_loader_type field.
357 Similarly, the ext_loader_ver field can be used to provide more than
358 four bits for the bootloader version.
359
360 For example, for T = 0x15, V = 0x234, write:
361
362 type_of_loader <- 0xE4
363 ext_loader_type <- 0x05
364 ext_loader_ver <- 0x23
365
346 Assigned boot loader ids: 366 Assigned boot loader ids:
347 0 LILO (0x00 reserved for pre-2.00 bootloader) 367 0 LILO (0x00 reserved for pre-2.00 bootloader)
348 1 Loadlin 368 1 Loadlin
349 2 bootsect-loader (0x20, all other values reserved) 369 2 bootsect-loader (0x20, all other values reserved)
350 3 SYSLINUX 370 3 Syslinux
351 4 EtherBoot 371 4 Etherboot/gPXE
352 5 ELILO 372 5 ELILO
353 7 GRUB 373 7 GRUB
354 8 U-BOOT 374 8 U-Boot
355 9 Xen 375 9 Xen
356 A Gujin 376 A Gujin
357 B Qemu 377 B Qemu
378 C Arcturus Networks uCbootloader
379 E Extended (see ext_loader_type)
380 F Special (0xFF = undefined)
358 381
359 Please contact <hpa@zytor.com> if you need a bootloader ID 382 Please contact <hpa@zytor.com> if you need a bootloader ID
360 value assigned. 383 value assigned.
@@ -453,6 +476,35 @@ Protocol: 2.01+
453 Set this field to the offset (from the beginning of the real-mode 476 Set this field to the offset (from the beginning of the real-mode
454 code) of the end of the setup stack/heap, minus 0x0200. 477 code) of the end of the setup stack/heap, minus 0x0200.
455 478
479Field name: ext_loader_ver
480Type: write (optional)
481Offset/size: 0x226/1
482Protocol: 2.02+
483
484 This field is used as an extension of the version number in the
485 type_of_loader field. The total version number is considered to be
486 (type_of_loader & 0x0f) + (ext_loader_ver << 4).
487
488 The use of this field is boot loader specific. If not written, it
489 is zero.
490
491 Kernels prior to 2.6.31 did not recognize this field, but it is safe
492 to write for protocol version 2.02 or higher.
493
494Field name: ext_loader_type
495Type: write (obligatory if (type_of_loader & 0xf0) == 0xe0)
496Offset/size: 0x227/1
497Protocol: 2.02+
498
499 This field is used as an extension of the type number in
500 type_of_loader field. If the type in type_of_loader is 0xE, then
501 the actual type is (ext_loader_type + 0x10).
502
503 This field is ignored if the type in type_of_loader is not 0xE.
504
505 Kernels prior to 2.6.31 did not recognize this field, but it is safe
506 to write for protocol version 2.02 or higher.
507
456Field name: cmd_line_ptr 508Field name: cmd_line_ptr
457Type: write (obligatory) 509Type: write (obligatory)
458Offset/size: 0x228/4 510Offset/size: 0x228/4
@@ -482,11 +534,19 @@ Protocol: 2.03+
482 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) 534 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
483 535
484Field name: kernel_alignment 536Field name: kernel_alignment
485Type: read (reloc) 537Type: read/modify (reloc)
486Offset/size: 0x230/4 538Offset/size: 0x230/4
487Protocol: 2.05+ 539Protocol: 2.05+ (read), 2.10+ (modify)
540
541 Alignment unit required by the kernel (if relocatable_kernel is
542 true.) A relocatable kernel that is loaded at an alignment
543 incompatible with the value in this field will be realigned during
544 kernel initialization.
488 545
489 Alignment unit required by the kernel (if relocatable_kernel is true.) 546 Starting with protocol version 2.10, this reflects the kernel
547 alignment preferred for optimal performance; it is possible for the
548 loader to modify this field to permit a lesser alignment. See the
549 min_alignment and pref_address field below.
490 550
491Field name: relocatable_kernel 551Field name: relocatable_kernel
492Type: read (reloc) 552Type: read (reloc)
@@ -498,6 +558,22 @@ Protocol: 2.05+
498 After loading, the boot loader must set the code32_start field to 558 After loading, the boot loader must set the code32_start field to
499 point to the loaded code, or to a boot loader hook. 559 point to the loaded code, or to a boot loader hook.
500 560
561Field name: min_alignment
562Type: read (reloc)
563Offset/size: 0x235/1
564Protocol: 2.10+
565
566 This field, if nonzero, indicates as a power of two the minimum
567 alignment required, as opposed to preferred, by the kernel to boot.
568 If a boot loader makes use of this field, it should update the
569 kernel_alignment field with the alignment unit desired; typically:
570
571 kernel_alignment = 1 << min_alignment
572
573 There may be a considerable performance cost with an excessively
574 misaligned kernel. Therefore, a loader should typically try each
575 power-of-two alignment from kernel_alignment down to this alignment.
576
501Field name: cmdline_size 577Field name: cmdline_size
502Type: read 578Type: read
503Offset/size: 0x238/4 579Offset/size: 0x238/4
@@ -582,6 +658,36 @@ Protocol: 2.09+
582 sure to consider the case where the linked list already contains 658 sure to consider the case where the linked list already contains
583 entries. 659 entries.
584 660
661Field name: pref_address
662Type: read (reloc)
663Offset/size: 0x258/8
664Protocol: 2.10+
665
666 This field, if nonzero, represents a preferred load address for the
667 kernel. A relocating bootloader should attempt to load at this
668 address if possible.
669
670 A non-relocatable kernel will unconditionally move itself and to run
671 at this address.
672
673Field name: init_size
674Type: read
675Offset/size: 0x25c/4
676
677 This field indicates the amount of linear contiguous memory starting
678 at the kernel runtime start address that the kernel needs before it
679 is capable of examining its memory map. This is not the same thing
680 as the total amount of memory the kernel needs to boot, but it can
681 be used by a relocating boot loader to help select a safe load
682 address for the kernel.
683
684 The kernel runtime start address is determined by the following algorithm:
685
686 if (relocatable_kernel)
687 runtime_start = align_up(load_address, kernel_alignment)
688 else
689 runtime_start = pref_address
690
585 691
586**** THE IMAGE CHECKSUM 692**** THE IMAGE CHECKSUM
587 693
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 34c13040a718..2db5893d6c97 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -150,11 +150,6 @@ NUMA
150 Otherwise, the remaining system RAM is allocated to an 150 Otherwise, the remaining system RAM is allocated to an
151 additional node. 151 additional node.
152 152
153 numa=hotadd=percent
154 Only allow hotadd memory to preallocate page structures upto
155 percent of already available memory.
156 numa=hotadd=0 will disable hotadd memory.
157
158ACPI 153ACPI
159 154
160 acpi=off Don't enable ACPI 155 acpi=off Don't enable ACPI
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 29b52b14d0b4..d6498e3cd713 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -6,10 +6,11 @@ Virtual memory map with 4 level page tables:
60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm 60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
7hole caused by [48:63] sign extension 7hole caused by [48:63] sign extension
8ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole 8ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole
9ffff880000000000 - ffffc0ffffffffff (=57 TB) direct mapping of all phys. memory 9ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
10ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole 10ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
11ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space 11ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
12ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) 12ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
13ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
13... unused hole ... 14... unused hole ...
14ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 15ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0
15ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space 16ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space
diff --git a/MAINTAINERS b/MAINTAINERS
index 2b349ba4add4..cf4abddfc8a4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -434,7 +434,7 @@ F: arch/alpha/
434 434
435AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER 435AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER
436P: Thomas Dahlmann 436P: Thomas Dahlmann
437M: thomas.dahlmann@amd.com 437M: dahlmann.thomas@arcor.de
438L: linux-geode@lists.infradead.org (moderated for non-subscribers) 438L: linux-geode@lists.infradead.org (moderated for non-subscribers)
439S: Supported 439S: Supported
440F: drivers/usb/gadget/amd5536udc.* 440F: drivers/usb/gadget/amd5536udc.*
@@ -624,6 +624,7 @@ M: paulius.zaleckas@teltonika.lt
624L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 624L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
625T: git git://gitorious.org/linux-gemini/mainline.git 625T: git git://gitorious.org/linux-gemini/mainline.git
626S: Maintained 626S: Maintained
627F: arch/arm/mach-gemini/
627 628
628ARM/EBSA110 MACHINE SUPPORT 629ARM/EBSA110 MACHINE SUPPORT
629P: Russell King 630P: Russell King
@@ -650,6 +651,7 @@ P: Paulius Zaleckas
650M: paulius.zaleckas@teltonika.lt 651M: paulius.zaleckas@teltonika.lt
651L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 652L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
652S: Maintained 653S: Maintained
654F: arch/arm/mm/*-fa*
653 655
654ARM/FOOTBRIDGE ARCHITECTURE 656ARM/FOOTBRIDGE ARCHITECTURE
655P: Russell King 657P: Russell King
@@ -1132,17 +1134,17 @@ F: fs/bfs/
1132F: include/linux/bfs_fs.h 1134F: include/linux/bfs_fs.h
1133 1135
1134BLACKFIN ARCHITECTURE 1136BLACKFIN ARCHITECTURE
1135P: Bryan Wu 1137P: Mike Frysinger
1136M: cooloney@kernel.org 1138M: vapier@gentoo.org
1137L: uclinux-dist-devel@blackfin.uclinux.org 1139L: uclinux-dist-devel@blackfin.uclinux.org
1138W: http://blackfin.uclinux.org 1140W: http://blackfin.uclinux.org
1139S: Supported 1141S: Supported
1140F: arch/blackfin/ 1142F: arch/blackfin/
1141 1143
1142BLACKFIN EMAC DRIVER 1144BLACKFIN EMAC DRIVER
1143P: Bryan Wu 1145P: Michael Hennerich
1144M: cooloney@kernel.org 1146M: michael.hennerich@analog.com
1145L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) 1147L: uclinux-dist-devel@blackfin.uclinux.org
1146W: http://blackfin.uclinux.org 1148W: http://blackfin.uclinux.org
1147S: Supported 1149S: Supported
1148F: drivers/net/bfin_mac.* 1150F: drivers/net/bfin_mac.*
@@ -1150,7 +1152,7 @@ F: drivers/net/bfin_mac.*
1150BLACKFIN RTC DRIVER 1152BLACKFIN RTC DRIVER
1151P: Mike Frysinger 1153P: Mike Frysinger
1152M: vapier.adi@gmail.com 1154M: vapier.adi@gmail.com
1153L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) 1155L: uclinux-dist-devel@blackfin.uclinux.org
1154W: http://blackfin.uclinux.org 1156W: http://blackfin.uclinux.org
1155S: Supported 1157S: Supported
1156F: drivers/rtc/rtc-bfin.c 1158F: drivers/rtc/rtc-bfin.c
@@ -1158,7 +1160,7 @@ F: drivers/rtc/rtc-bfin.c
1158BLACKFIN SERIAL DRIVER 1160BLACKFIN SERIAL DRIVER
1159P: Sonic Zhang 1161P: Sonic Zhang
1160M: sonic.zhang@analog.com 1162M: sonic.zhang@analog.com
1161L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) 1163L: uclinux-dist-devel@blackfin.uclinux.org
1162W: http://blackfin.uclinux.org 1164W: http://blackfin.uclinux.org
1163S: Supported 1165S: Supported
1164F: drivers/serial/bfin_5xx.c 1166F: drivers/serial/bfin_5xx.c
@@ -1166,7 +1168,7 @@ F: drivers/serial/bfin_5xx.c
1166BLACKFIN WATCHDOG DRIVER 1168BLACKFIN WATCHDOG DRIVER
1167P: Mike Frysinger 1169P: Mike Frysinger
1168M: vapier.adi@gmail.com 1170M: vapier.adi@gmail.com
1169L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) 1171L: uclinux-dist-devel@blackfin.uclinux.org
1170W: http://blackfin.uclinux.org 1172W: http://blackfin.uclinux.org
1171S: Supported 1173S: Supported
1172F: drivers/watchdog/bfin_wdt.c 1174F: drivers/watchdog/bfin_wdt.c
@@ -1174,7 +1176,7 @@ F: drivers/watchdog/bfin_wdt.c
1174BLACKFIN I2C TWI DRIVER 1176BLACKFIN I2C TWI DRIVER
1175P: Sonic Zhang 1177P: Sonic Zhang
1176M: sonic.zhang@analog.com 1178M: sonic.zhang@analog.com
1177L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) 1179L: uclinux-dist-devel@blackfin.uclinux.org
1178W: http://blackfin.uclinux.org/ 1180W: http://blackfin.uclinux.org/
1179S: Supported 1181S: Supported
1180F: drivers/i2c/busses/i2c-bfin-twi.c 1182F: drivers/i2c/busses/i2c-bfin-twi.c
@@ -1431,6 +1433,14 @@ P: Russell King
1431M: linux@arm.linux.org.uk 1433M: linux@arm.linux.org.uk
1432F: include/linux/clk.h 1434F: include/linux/clk.h
1433 1435
1436CISCO FCOE HBA DRIVER
1437P: Abhijeet Joglekar
1438M: abjoglek@cisco.com
1439P: Joe Eykholt
1440M: jeykholt@cisco.com
1441L: linux-scsi@vger.kernel.org
1442S: Supported
1443
1434CODA FILE SYSTEM 1444CODA FILE SYSTEM
1435P: Jan Harkes 1445P: Jan Harkes
1436M: jaharkes@cs.cmu.edu 1446M: jaharkes@cs.cmu.edu
@@ -1532,6 +1542,13 @@ W: http://www.fi.muni.cz/~kas/cosa/
1532S: Maintained 1542S: Maintained
1533F: drivers/net/wan/cosa* 1543F: drivers/net/wan/cosa*
1534 1544
1545CPMAC ETHERNET DRIVER
1546P: Florian Fainelli
1547M: florian@openwrt.org
1548L: netdev@vger.kernel.org
1549S: Maintained
1550F: drivers/net/cpmac.c
1551
1535CPU FREQUENCY DRIVERS 1552CPU FREQUENCY DRIVERS
1536P: Dave Jones 1553P: Dave Jones
1537M: davej@redhat.com 1554M: davej@redhat.com
@@ -1963,8 +1980,8 @@ F: include/linux/edac.h
1963 1980
1964EDAC-E752X 1981EDAC-E752X
1965P: Mark Gross 1982P: Mark Gross
1966P: Doug Thompson
1967M: mark.gross@intel.com 1983M: mark.gross@intel.com
1984P: Doug Thompson
1968M: dougthompson@xmission.com 1985M: dougthompson@xmission.com
1969L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) 1986L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1970W: bluesmoke.sourceforge.net 1987W: bluesmoke.sourceforge.net
@@ -2241,7 +2258,7 @@ P: Li Yang
2241M: leoli@freescale.com 2258M: leoli@freescale.com
2242P: Zhang Wei 2259P: Zhang Wei
2243M: zw@zh-kernel.org 2260M: zw@zh-kernel.org
2244L: linuxppc-embedded@ozlabs.org 2261L: linuxppc-dev@ozlabs.org
2245L: linux-kernel@vger.kernel.org 2262L: linux-kernel@vger.kernel.org
2246S: Maintained 2263S: Maintained
2247F: drivers/dma/fsldma.* 2264F: drivers/dma/fsldma.*
@@ -5579,6 +5596,14 @@ M: ian@mnementh.co.uk
5579S: Maintained 5596S: Maintained
5580F: drivers/mmc/host/tmio_mmc.* 5597F: drivers/mmc/host/tmio_mmc.*
5581 5598
5599TMPFS (SHMEM FILESYSTEM)
5600P: Hugh Dickins
5601M: hugh.dickins@tiscali.co.uk
5602L: linux-mm@kvack.org
5603S: Maintained
5604F: include/linux/shmem_fs.h
5605F: mm/shmem.c
5606
5582TPM DEVICE DRIVER 5607TPM DEVICE DRIVER
5583P: Debora Velarde 5608P: Debora Velarde
5584M: debora@linux.vnet.ibm.com 5609M: debora@linux.vnet.ibm.com
diff --git a/Makefile b/Makefile
index b57e1f539e83..03373bb703ca 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 30 3SUBLEVEL = 30
4EXTRAVERSION = -rc6 4EXTRAVERSION =
5NAME = Vindictive Armadillo 5NAME = Man-Eating Seals of Antiquity
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
8# To see a list of typical targets execute "make help" 8# To see a list of typical targets execute "make help"
@@ -533,7 +533,7 @@ endif
533 533
534include $(srctree)/arch/$(SRCARCH)/Makefile 534include $(srctree)/arch/$(SRCARCH)/Makefile
535 535
536ifneq (CONFIG_FRAME_WARN,0) 536ifneq ($(CONFIG_FRAME_WARN),0)
537KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN}) 537KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
538endif 538endif
539 539
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 9c9d1fd4155f..5bd5259324b7 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -176,22 +176,26 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
176 } 176 }
177} 177}
178 178
179static void 179static int
180dp264_set_affinity(unsigned int irq, const struct cpumask *affinity) 180dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
181{ 181{
182 spin_lock(&dp264_irq_lock); 182 spin_lock(&dp264_irq_lock);
183 cpu_set_irq_affinity(irq, *affinity); 183 cpu_set_irq_affinity(irq, *affinity);
184 tsunami_update_irq_hw(cached_irq_mask); 184 tsunami_update_irq_hw(cached_irq_mask);
185 spin_unlock(&dp264_irq_lock); 185 spin_unlock(&dp264_irq_lock);
186
187 return 0;
186} 188}
187 189
188static void 190static int
189clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) 191clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
190{ 192{
191 spin_lock(&dp264_irq_lock); 193 spin_lock(&dp264_irq_lock);
192 cpu_set_irq_affinity(irq - 16, *affinity); 194 cpu_set_irq_affinity(irq - 16, *affinity);
193 tsunami_update_irq_hw(cached_irq_mask); 195 tsunami_update_irq_hw(cached_irq_mask);
194 spin_unlock(&dp264_irq_lock); 196 spin_unlock(&dp264_irq_lock);
197
198 return 0;
195} 199}
196 200
197static struct hw_interrupt_type dp264_irq_type = { 201static struct hw_interrupt_type dp264_irq_type = {
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 27f840a4ad3d..8dd239ebdb9e 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -157,13 +157,15 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
157 157
158} 158}
159 159
160static void 160static int
161titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) 161titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
162{ 162{
163 spin_lock(&titan_irq_lock); 163 spin_lock(&titan_irq_lock);
164 titan_cpu_set_irq_affinity(irq - 16, *affinity); 164 titan_cpu_set_irq_affinity(irq - 16, *affinity);
165 titan_update_irq_hw(titan_cached_irq_mask); 165 titan_update_irq_hw(titan_cached_irq_mask);
166 spin_unlock(&titan_irq_lock); 166 spin_unlock(&titan_irq_lock);
167
168 return 0;
167} 169}
168 170
169static void 171static void
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 3e1714c6523f..664c7b8b1ba8 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -109,7 +109,7 @@ static void gic_unmask_irq(unsigned int irq)
109} 109}
110 110
111#ifdef CONFIG_SMP 111#ifdef CONFIG_SMP
112static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) 112static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
113{ 113{
114 void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3); 114 void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
115 unsigned int shift = (irq % 4) * 8; 115 unsigned int shift = (irq % 4) * 8;
@@ -122,6 +122,8 @@ static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
122 val |= 1 << (cpu + shift); 122 val |= 1 << (cpu + shift);
123 writel(val, reg); 123 writel(val, reg);
124 spin_unlock(&irq_controller_lock); 124 spin_unlock(&irq_controller_lock);
125
126 return 0;
125} 127}
126#endif 128#endif
127 129
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 6116e4893c0a..15f8a092b700 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -114,3 +114,16 @@
114 .align 3; \ 114 .align 3; \
115 .long 9999b,9001f; \ 115 .long 9999b,9001f; \
116 .previous 116 .previous
117
118/*
119 * SMP data memory barrier
120 */
121 .macro smp_dmb
122#ifdef CONFIG_SMP
123#if __LINUX_ARM_ARCH__ >= 7
124 dmb
125#elif __LINUX_ARM_ARCH__ == 6
126 mcr p15, 0, r0, c7, c10, 5 @ dmb
127#endif
128#endif
129 .endm
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index ee99723b3a6c..16b52f397983 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -44,11 +44,29 @@ static inline void atomic_set(atomic_t *v, int i)
44 : "cc"); 44 : "cc");
45} 45}
46 46
47static inline void atomic_add(int i, atomic_t *v)
48{
49 unsigned long tmp;
50 int result;
51
52 __asm__ __volatile__("@ atomic_add\n"
53"1: ldrex %0, [%2]\n"
54" add %0, %0, %3\n"
55" strex %1, %0, [%2]\n"
56" teq %1, #0\n"
57" bne 1b"
58 : "=&r" (result), "=&r" (tmp)
59 : "r" (&v->counter), "Ir" (i)
60 : "cc");
61}
62
47static inline int atomic_add_return(int i, atomic_t *v) 63static inline int atomic_add_return(int i, atomic_t *v)
48{ 64{
49 unsigned long tmp; 65 unsigned long tmp;
50 int result; 66 int result;
51 67
68 smp_mb();
69
52 __asm__ __volatile__("@ atomic_add_return\n" 70 __asm__ __volatile__("@ atomic_add_return\n"
53"1: ldrex %0, [%2]\n" 71"1: ldrex %0, [%2]\n"
54" add %0, %0, %3\n" 72" add %0, %0, %3\n"
@@ -59,14 +77,34 @@ static inline int atomic_add_return(int i, atomic_t *v)
59 : "r" (&v->counter), "Ir" (i) 77 : "r" (&v->counter), "Ir" (i)
60 : "cc"); 78 : "cc");
61 79
80 smp_mb();
81
62 return result; 82 return result;
63} 83}
64 84
85static inline void atomic_sub(int i, atomic_t *v)
86{
87 unsigned long tmp;
88 int result;
89
90 __asm__ __volatile__("@ atomic_sub\n"
91"1: ldrex %0, [%2]\n"
92" sub %0, %0, %3\n"
93" strex %1, %0, [%2]\n"
94" teq %1, #0\n"
95" bne 1b"
96 : "=&r" (result), "=&r" (tmp)
97 : "r" (&v->counter), "Ir" (i)
98 : "cc");
99}
100
65static inline int atomic_sub_return(int i, atomic_t *v) 101static inline int atomic_sub_return(int i, atomic_t *v)
66{ 102{
67 unsigned long tmp; 103 unsigned long tmp;
68 int result; 104 int result;
69 105
106 smp_mb();
107
70 __asm__ __volatile__("@ atomic_sub_return\n" 108 __asm__ __volatile__("@ atomic_sub_return\n"
71"1: ldrex %0, [%2]\n" 109"1: ldrex %0, [%2]\n"
72" sub %0, %0, %3\n" 110" sub %0, %0, %3\n"
@@ -77,6 +115,8 @@ static inline int atomic_sub_return(int i, atomic_t *v)
77 : "r" (&v->counter), "Ir" (i) 115 : "r" (&v->counter), "Ir" (i)
78 : "cc"); 116 : "cc");
79 117
118 smp_mb();
119
80 return result; 120 return result;
81} 121}
82 122
@@ -84,6 +124,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
84{ 124{
85 unsigned long oldval, res; 125 unsigned long oldval, res;
86 126
127 smp_mb();
128
87 do { 129 do {
88 __asm__ __volatile__("@ atomic_cmpxchg\n" 130 __asm__ __volatile__("@ atomic_cmpxchg\n"
89 "ldrex %1, [%2]\n" 131 "ldrex %1, [%2]\n"
@@ -95,6 +137,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
95 : "cc"); 137 : "cc");
96 } while (res); 138 } while (res);
97 139
140 smp_mb();
141
98 return oldval; 142 return oldval;
99} 143}
100 144
@@ -135,6 +179,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
135 179
136 return val; 180 return val;
137} 181}
182#define atomic_add(i, v) (void) atomic_add_return(i, v)
138 183
139static inline int atomic_sub_return(int i, atomic_t *v) 184static inline int atomic_sub_return(int i, atomic_t *v)
140{ 185{
@@ -148,6 +193,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
148 193
149 return val; 194 return val;
150} 195}
196#define atomic_sub(i, v) (void) atomic_sub_return(i, v)
151 197
152static inline int atomic_cmpxchg(atomic_t *v, int old, int new) 198static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
153{ 199{
@@ -187,10 +233,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
187} 233}
188#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) 234#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
189 235
190#define atomic_add(i, v) (void) atomic_add_return(i, v) 236#define atomic_inc(v) atomic_add(1, v)
191#define atomic_inc(v) (void) atomic_add_return(1, v) 237#define atomic_dec(v) atomic_sub(1, v)
192#define atomic_sub(i, v) (void) atomic_sub_return(i, v)
193#define atomic_dec(v) (void) atomic_sub_return(1, v)
194 238
195#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) 239#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
196#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) 240#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
@@ -200,11 +244,10 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
200 244
201#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) 245#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
202 246
203/* Atomic operations are already serializing on ARM */ 247#define smp_mb__before_atomic_dec() smp_mb()
204#define smp_mb__before_atomic_dec() barrier() 248#define smp_mb__after_atomic_dec() smp_mb()
205#define smp_mb__after_atomic_dec() barrier() 249#define smp_mb__before_atomic_inc() smp_mb()
206#define smp_mb__before_atomic_inc() barrier() 250#define smp_mb__after_atomic_inc() smp_mb()
207#define smp_mb__after_atomic_inc() barrier()
208 251
209#include <asm-generic/atomic.h> 252#include <asm-generic/atomic.h>
210#endif 253#endif
diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index cb7a9e97fd7e..feaa75f0013e 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -7,4 +7,20 @@
7#define L1_CACHE_SHIFT 5 7#define L1_CACHE_SHIFT 5
8#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 8#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
9 9
10/*
11 * Memory returned by kmalloc() may be used for DMA, so we must make
12 * sure that all such allocations are cache aligned. Otherwise,
13 * unrelated code may cause parts of the buffer to be read into the
14 * cache before the transfer is done, causing old data to be seen by
15 * the CPU.
16 */
17#define ARCH_KMALLOC_MINALIGN L1_CACHE_BYTES
18
19/*
20 * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
21 */
22#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
23#define ARCH_SLAB_MINALIGN 8
24#endif
25
10#endif 26#endif
diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h
index 1d77e51907f6..59426a4595c9 100644
--- a/arch/arm/include/asm/flat.h
+++ b/arch/arm/include/asm/flat.h
@@ -5,9 +5,6 @@
5#ifndef __ARM_FLAT_H__ 5#ifndef __ARM_FLAT_H__
6#define __ARM_FLAT_H__ 6#define __ARM_FLAT_H__
7 7
8/* An odd number of words will be pushed after this alignment, so
9 deliberately misalign the value. */
10#define flat_stack_align(sp) sp = (void *)(((unsigned long)(sp) - 4) | 4)
11#define flat_argvp_envp_on_stack() 1 8#define flat_argvp_envp_on_stack() 1
12#define flat_old_ram_flag(flags) (flags) 9#define flat_old_ram_flag(flags) (flags)
13#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) 10#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index e6eb8a67b807..7b522770f29d 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -202,13 +202,6 @@ typedef struct page *pgtable_t;
202 (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ 202 (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
203 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 203 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
204 204
205/*
206 * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
207 */
208#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
209#define ARCH_SLAB_MINALIGN 8
210#endif
211
212#include <asm-generic/page.h> 205#include <asm-generic/page.h>
213 206
214#endif 207#endif
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index bd4dc8ed53d5..d65b2f5bf41f 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -248,6 +248,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
248 unsigned int tmp; 248 unsigned int tmp;
249#endif 249#endif
250 250
251 smp_mb();
252
251 switch (size) { 253 switch (size) {
252#if __LINUX_ARM_ARCH__ >= 6 254#if __LINUX_ARM_ARCH__ >= 6
253 case 1: 255 case 1:
@@ -307,6 +309,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
307 __bad_xchg(ptr, size), ret = 0; 309 __bad_xchg(ptr, size), ret = 0;
308 break; 310 break;
309 } 311 }
312 smp_mb();
310 313
311 return ret; 314 return ret;
312} 315}
@@ -316,6 +319,12 @@ extern void enable_hlt(void);
316 319
317#include <asm-generic/cmpxchg-local.h> 320#include <asm-generic/cmpxchg-local.h>
318 321
322#if __LINUX_ARM_ARCH__ < 6
323
324#ifdef CONFIG_SMP
325#error "SMP is not supported on this platform"
326#endif
327
319/* 328/*
320 * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make 329 * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
321 * them available. 330 * them available.
@@ -329,6 +338,173 @@ extern void enable_hlt(void);
329#include <asm-generic/cmpxchg.h> 338#include <asm-generic/cmpxchg.h>
330#endif 339#endif
331 340
341#else /* __LINUX_ARM_ARCH__ >= 6 */
342
343extern void __bad_cmpxchg(volatile void *ptr, int size);
344
345/*
346 * cmpxchg only support 32-bits operands on ARMv6.
347 */
348
349static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
350 unsigned long new, int size)
351{
352 unsigned long oldval, res;
353
354 switch (size) {
355#ifdef CONFIG_CPU_32v6K
356 case 1:
357 do {
358 asm volatile("@ __cmpxchg1\n"
359 " ldrexb %1, [%2]\n"
360 " mov %0, #0\n"
361 " teq %1, %3\n"
362 " strexbeq %0, %4, [%2]\n"
363 : "=&r" (res), "=&r" (oldval)
364 : "r" (ptr), "Ir" (old), "r" (new)
365 : "memory", "cc");
366 } while (res);
367 break;
368 case 2:
369 do {
370 asm volatile("@ __cmpxchg1\n"
371 " ldrexh %1, [%2]\n"
372 " mov %0, #0\n"
373 " teq %1, %3\n"
374 " strexheq %0, %4, [%2]\n"
375 : "=&r" (res), "=&r" (oldval)
376 : "r" (ptr), "Ir" (old), "r" (new)
377 : "memory", "cc");
378 } while (res);
379 break;
380#endif /* CONFIG_CPU_32v6K */
381 case 4:
382 do {
383 asm volatile("@ __cmpxchg4\n"
384 " ldrex %1, [%2]\n"
385 " mov %0, #0\n"
386 " teq %1, %3\n"
387 " strexeq %0, %4, [%2]\n"
388 : "=&r" (res), "=&r" (oldval)
389 : "r" (ptr), "Ir" (old), "r" (new)
390 : "memory", "cc");
391 } while (res);
392 break;
393 default:
394 __bad_cmpxchg(ptr, size);
395 oldval = 0;
396 }
397
398 return oldval;
399}
400
401static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
402 unsigned long new, int size)
403{
404 unsigned long ret;
405
406 smp_mb();
407 ret = __cmpxchg(ptr, old, new, size);
408 smp_mb();
409
410 return ret;
411}
412
413#define cmpxchg(ptr,o,n) \
414 ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \
415 (unsigned long)(o), \
416 (unsigned long)(n), \
417 sizeof(*(ptr))))
418
419static inline unsigned long __cmpxchg_local(volatile void *ptr,
420 unsigned long old,
421 unsigned long new, int size)
422{
423 unsigned long ret;
424
425 switch (size) {
426#ifndef CONFIG_CPU_32v6K
427 case 1:
428 case 2:
429 ret = __cmpxchg_local_generic(ptr, old, new, size);
430 break;
431#endif /* !CONFIG_CPU_32v6K */
432 default:
433 ret = __cmpxchg(ptr, old, new, size);
434 }
435
436 return ret;
437}
438
439#define cmpxchg_local(ptr,o,n) \
440 ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \
441 (unsigned long)(o), \
442 (unsigned long)(n), \
443 sizeof(*(ptr))))
444
445#ifdef CONFIG_CPU_32v6K
446
447/*
448 * Note : ARMv7-M (currently unsupported by Linux) does not support
449 * ldrexd/strexd. If ARMv7-M is ever supported by the Linux kernel, it should
450 * not be allowed to use __cmpxchg64.
451 */
452static inline unsigned long long __cmpxchg64(volatile void *ptr,
453 unsigned long long old,
454 unsigned long long new)
455{
456 register unsigned long long oldval asm("r0");
457 register unsigned long long __old asm("r2") = old;
458 register unsigned long long __new asm("r4") = new;
459 unsigned long res;
460
461 do {
462 asm volatile(
463 " @ __cmpxchg8\n"
464 " ldrexd %1, %H1, [%2]\n"
465 " mov %0, #0\n"
466 " teq %1, %3\n"
467 " teqeq %H1, %H3\n"
468 " strexdeq %0, %4, %H4, [%2]\n"
469 : "=&r" (res), "=&r" (oldval)
470 : "r" (ptr), "Ir" (__old), "r" (__new)
471 : "memory", "cc");
472 } while (res);
473
474 return oldval;
475}
476
477static inline unsigned long long __cmpxchg64_mb(volatile void *ptr,
478 unsigned long long old,
479 unsigned long long new)
480{
481 unsigned long long ret;
482
483 smp_mb();
484 ret = __cmpxchg64(ptr, old, new);
485 smp_mb();
486
487 return ret;
488}
489
490#define cmpxchg64(ptr,o,n) \
491 ((__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \
492 (unsigned long long)(o), \
493 (unsigned long long)(n)))
494
495#define cmpxchg64_local(ptr,o,n) \
496 ((__typeof__(*(ptr)))__cmpxchg64((ptr), \
497 (unsigned long long)(o), \
498 (unsigned long long)(n)))
499
500#else /* !CONFIG_CPU_32v6K */
501
502#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
503
504#endif /* CONFIG_CPU_32v6K */
505
506#endif /* __LINUX_ARM_ARCH__ >= 6 */
507
332#endif /* __ASSEMBLY__ */ 508#endif /* __ASSEMBLY__ */
333 509
334#define arch_align_stack(x) (x) 510#define arch_align_stack(x) (x)
diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c
index d4a0da1e48f4..950391f194c4 100644
--- a/arch/arm/kernel/elf.c
+++ b/arch/arm/kernel/elf.c
@@ -78,6 +78,15 @@ int arm_elf_read_implies_exec(const struct elf32_hdr *x, int executable_stack)
78 return 1; 78 return 1;
79 if (cpu_architecture() < CPU_ARCH_ARMv6) 79 if (cpu_architecture() < CPU_ARCH_ARMv6)
80 return 1; 80 return 1;
81#if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
82 /*
83 * If we have support for OABI programs, we can never allow NX
84 * support - our signal syscall restart mechanism relies upon
85 * being able to execute code placed on the user stack.
86 */
87 return 1;
88#else
81 return 0; 89 return 0;
90#endif
82} 91}
83EXPORT_SYMBOL(arm_elf_read_implies_exec); 92EXPORT_SYMBOL(arm_elf_read_implies_exec);
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index d662a2f1fd85..83b1da6b7baa 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -815,10 +815,7 @@ __kuser_helper_start:
815 */ 815 */
816 816
817__kuser_memory_barrier: @ 0xffff0fa0 817__kuser_memory_barrier: @ 0xffff0fa0
818 818 smp_dmb
819#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_SMP)
820 mcr p15, 0, r0, c7, c10, 5 @ dmb
821#endif
822 usr_ret lr 819 usr_ret lr
823 820
824 .align 5 821 .align 5
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 2e787d40d599..c7f2627385e7 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -18,12 +18,14 @@
18 mov r2, #1 18 mov r2, #1
19 add r1, r1, r0, lsr #3 @ Get byte offset 19 add r1, r1, r0, lsr #3 @ Get byte offset
20 mov r3, r2, lsl r3 @ create mask 20 mov r3, r2, lsl r3 @ create mask
21 smp_dmb
211: ldrexb r2, [r1] 221: ldrexb r2, [r1]
22 ands r0, r2, r3 @ save old value of bit 23 ands r0, r2, r3 @ save old value of bit
23 \instr r2, r2, r3 @ toggle bit 24 \instr r2, r2, r3 @ toggle bit
24 strexb ip, r2, [r1] 25 strexb ip, r2, [r1]
25 cmp ip, #0 26 cmp ip, #0
26 bne 1b 27 bne 1b
28 smp_dmb
27 cmp r0, #0 29 cmp r0, #0
28 movne r0, #1 30 movne r0, #1
292: mov pc, lr 312: mov pc, lr
diff --git a/arch/arm/mach-gemini/include/mach/hardware.h b/arch/arm/mach-gemini/include/mach/hardware.h
index de6752674c05..213a4fcfeb1c 100644
--- a/arch/arm/mach-gemini/include/mach/hardware.h
+++ b/arch/arm/mach-gemini/include/mach/hardware.h
@@ -15,10 +15,9 @@
15/* 15/*
16 * Memory Map definitions 16 * Memory Map definitions
17 */ 17 */
18/* FIXME: Does it really swap SRAM like this? */
19#ifdef CONFIG_GEMINI_MEM_SWAP 18#ifdef CONFIG_GEMINI_MEM_SWAP
20# define GEMINI_DRAM_BASE 0x00000000 19# define GEMINI_DRAM_BASE 0x00000000
21# define GEMINI_SRAM_BASE 0x20000000 20# define GEMINI_SRAM_BASE 0x70000000
22#else 21#else
23# define GEMINI_SRAM_BASE 0x00000000 22# define GEMINI_SRAM_BASE 0x00000000
24# define GEMINI_DRAM_BASE 0x10000000 23# define GEMINI_DRAM_BASE 0x10000000
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index eeb00240d784..be1ca28fed3f 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -144,6 +144,9 @@ static struct platform_device kirkwood_ge00 = {
144 .id = 0, 144 .id = 0,
145 .num_resources = 1, 145 .num_resources = 1,
146 .resource = kirkwood_ge00_resources, 146 .resource = kirkwood_ge00_resources,
147 .dev = {
148 .coherent_dma_mask = 0xffffffff,
149 },
147}; 150};
148 151
149void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data) 152void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data)
@@ -202,6 +205,9 @@ static struct platform_device kirkwood_ge01 = {
202 .id = 1, 205 .id = 1,
203 .num_resources = 1, 206 .num_resources = 1,
204 .resource = kirkwood_ge01_resources, 207 .resource = kirkwood_ge01_resources,
208 .dev = {
209 .coherent_dma_mask = 0xffffffff,
210 },
205}; 211};
206 212
207void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data) 213void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data)
@@ -386,12 +392,10 @@ static struct mv64xxx_i2c_pdata kirkwood_i2c_pdata = {
386 392
387static struct resource kirkwood_i2c_resources[] = { 393static struct resource kirkwood_i2c_resources[] = {
388 { 394 {
389 .name = "i2c",
390 .start = I2C_PHYS_BASE, 395 .start = I2C_PHYS_BASE,
391 .end = I2C_PHYS_BASE + 0x1f, 396 .end = I2C_PHYS_BASE + 0x1f,
392 .flags = IORESOURCE_MEM, 397 .flags = IORESOURCE_MEM,
393 }, { 398 }, {
394 .name = "i2c",
395 .start = IRQ_KIRKWOOD_TWSI, 399 .start = IRQ_KIRKWOOD_TWSI,
396 .end = IRQ_KIRKWOOD_TWSI, 400 .end = IRQ_KIRKWOOD_TWSI,
397 .flags = IORESOURCE_IRQ, 401 .flags = IORESOURCE_IRQ,
diff --git a/arch/arm/mach-kirkwood/ts219-setup.c b/arch/arm/mach-kirkwood/ts219-setup.c
index dda5743cf3e0..01aa213c0a6f 100644
--- a/arch/arm/mach-kirkwood/ts219-setup.c
+++ b/arch/arm/mach-kirkwood/ts219-setup.c
@@ -142,6 +142,8 @@ static unsigned int qnap_ts219_mpp_config[] __initdata = {
142 MPP1_SPI_MOSI, 142 MPP1_SPI_MOSI,
143 MPP2_SPI_SCK, 143 MPP2_SPI_SCK,
144 MPP3_SPI_MISO, 144 MPP3_SPI_MISO,
145 MPP4_SATA1_ACTn,
146 MPP5_SATA0_ACTn,
145 MPP8_TW_SDA, 147 MPP8_TW_SDA,
146 MPP9_TW_SCK, 148 MPP9_TW_SCK,
147 MPP10_UART0_TXD, 149 MPP10_UART0_TXD,
@@ -150,10 +152,6 @@ static unsigned int qnap_ts219_mpp_config[] __initdata = {
150 MPP14_UART1_RXD, /* PIC controller */ 152 MPP14_UART1_RXD, /* PIC controller */
151 MPP15_GPIO, /* USB Copy button */ 153 MPP15_GPIO, /* USB Copy button */
152 MPP16_GPIO, /* Reset button */ 154 MPP16_GPIO, /* Reset button */
153 MPP20_SATA1_ACTn,
154 MPP21_SATA0_ACTn,
155 MPP22_SATA1_PRESENTn,
156 MPP23_SATA0_PRESENTn,
157 0 155 0
158}; 156};
159 157
diff --git a/arch/arm/mach-loki/common.c b/arch/arm/mach-loki/common.c
index c0d2d9d12e74..818f19d7ab1f 100644
--- a/arch/arm/mach-loki/common.c
+++ b/arch/arm/mach-loki/common.c
@@ -82,6 +82,9 @@ static struct platform_device loki_ge0 = {
82 .id = 0, 82 .id = 0,
83 .num_resources = 1, 83 .num_resources = 1,
84 .resource = loki_ge0_resources, 84 .resource = loki_ge0_resources,
85 .dev = {
86 .coherent_dma_mask = 0xffffffff,
87 },
85}; 88};
86 89
87void __init loki_ge0_init(struct mv643xx_eth_platform_data *eth_data) 90void __init loki_ge0_init(struct mv643xx_eth_platform_data *eth_data)
@@ -136,6 +139,9 @@ static struct platform_device loki_ge1 = {
136 .id = 1, 139 .id = 1,
137 .num_resources = 1, 140 .num_resources = 1,
138 .resource = loki_ge1_resources, 141 .resource = loki_ge1_resources,
142 .dev = {
143 .coherent_dma_mask = 0xffffffff,
144 },
139}; 145};
140 146
141void __init loki_ge1_init(struct mv643xx_eth_platform_data *eth_data) 147void __init loki_ge1_init(struct mv643xx_eth_platform_data *eth_data)
diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
index d0bdb6e3682b..2e914649b9e4 100644
--- a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
@@ -3,6 +3,11 @@
3 3
4#include <mach/mfp.h> 4#include <mach/mfp.h>
5 5
6#define MFP_DRIVE_VERY_SLOW (0x0 << 13)
7#define MFP_DRIVE_SLOW (0x1 << 13)
8#define MFP_DRIVE_MEDIUM (0x2 << 13)
9#define MFP_DRIVE_FAST (0x3 << 13)
10
6/* GPIO */ 11/* GPIO */
7#define GPIO0_GPIO MFP_CFG(GPIO0, AF5) 12#define GPIO0_GPIO MFP_CFG(GPIO0, AF5)
8#define GPIO1_GPIO MFP_CFG(GPIO1, AF5) 13#define GPIO1_GPIO MFP_CFG(GPIO1, AF5)
diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa910.h b/arch/arm/mach-mmp/include/mach/mfp-pxa910.h
index 48a1cbc7c56b..d97de36c50ad 100644
--- a/arch/arm/mach-mmp/include/mach/mfp-pxa910.h
+++ b/arch/arm/mach-mmp/include/mach/mfp-pxa910.h
@@ -3,6 +3,11 @@
3 3
4#include <mach/mfp.h> 4#include <mach/mfp.h>
5 5
6#define MFP_DRIVE_VERY_SLOW (0x0 << 13)
7#define MFP_DRIVE_SLOW (0x2 << 13)
8#define MFP_DRIVE_MEDIUM (0x4 << 13)
9#define MFP_DRIVE_FAST (0x8 << 13)
10
6/* UART2 */ 11/* UART2 */
7#define GPIO47_UART2_RXD MFP_CFG(GPIO47, AF6) 12#define GPIO47_UART2_RXD MFP_CFG(GPIO47, AF6)
8#define GPIO48_UART2_TXD MFP_CFG(GPIO48, AF6) 13#define GPIO48_UART2_TXD MFP_CFG(GPIO48, AF6)
diff --git a/arch/arm/mach-mmp/include/mach/mfp.h b/arch/arm/mach-mmp/include/mach/mfp.h
index 277ea4cd0f9f..62e510e80a58 100644
--- a/arch/arm/mach-mmp/include/mach/mfp.h
+++ b/arch/arm/mach-mmp/include/mach/mfp.h
@@ -12,16 +12,13 @@
12 * possible, we make the following compromise: 12 * possible, we make the following compromise:
13 * 13 *
14 * 1. SLEEP_OE_N will always be programmed to '1' (by MFP_LPM_FLOAT) 14 * 1. SLEEP_OE_N will always be programmed to '1' (by MFP_LPM_FLOAT)
15 * 2. DRIVE strength definitions redefined to include the reserved bit10 15 * 2. DRIVE strength definitions redefined to include the reserved bit
16 * - the reserved bit differs between pxa168 and pxa910, and the
17 * MFP_DRIVE_* macros are individually defined in mfp-pxa{168,910}.h
16 * 3. Override MFP_CFG() and MFP_CFG_DRV() 18 * 3. Override MFP_CFG() and MFP_CFG_DRV()
17 * 4. Drop the use of MFP_CFG_LPM() and MFP_CFG_X() 19 * 4. Drop the use of MFP_CFG_LPM() and MFP_CFG_X()
18 */ 20 */
19 21
20#define MFP_DRIVE_VERY_SLOW (0x0 << 13)
21#define MFP_DRIVE_SLOW (0x2 << 13)
22#define MFP_DRIVE_MEDIUM (0x4 << 13)
23#define MFP_DRIVE_FAST (0x8 << 13)
24
25#undef MFP_CFG 22#undef MFP_CFG
26#undef MFP_CFG_DRV 23#undef MFP_CFG_DRV
27#undef MFP_CFG_LPM 24#undef MFP_CFG_LPM
diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
index b03a6eda7419..a8400bb891e7 100644
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -136,7 +136,7 @@ static struct clock_event_device ckevt = {
136 .set_mode = timer_set_mode, 136 .set_mode = timer_set_mode,
137}; 137};
138 138
139static cycle_t clksrc_read(void) 139static cycle_t clksrc_read(struct clocksource *cs)
140{ 140{
141 return timer_read(); 141 return timer_read();
142} 142}
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index 9ba595083dab..1b22e4af8791 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -321,6 +321,9 @@ static struct platform_device mv78xx0_ge00 = {
321 .id = 0, 321 .id = 0,
322 .num_resources = 1, 322 .num_resources = 1,
323 .resource = mv78xx0_ge00_resources, 323 .resource = mv78xx0_ge00_resources,
324 .dev = {
325 .coherent_dma_mask = 0xffffffff,
326 },
324}; 327};
325 328
326void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data) 329void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data)
@@ -375,6 +378,9 @@ static struct platform_device mv78xx0_ge01 = {
375 .id = 1, 378 .id = 1,
376 .num_resources = 1, 379 .num_resources = 1,
377 .resource = mv78xx0_ge01_resources, 380 .resource = mv78xx0_ge01_resources,
381 .dev = {
382 .coherent_dma_mask = 0xffffffff,
383 },
378}; 384};
379 385
380void __init mv78xx0_ge01_init(struct mv643xx_eth_platform_data *eth_data) 386void __init mv78xx0_ge01_init(struct mv643xx_eth_platform_data *eth_data)
@@ -429,6 +435,9 @@ static struct platform_device mv78xx0_ge10 = {
429 .id = 2, 435 .id = 2,
430 .num_resources = 1, 436 .num_resources = 1,
431 .resource = mv78xx0_ge10_resources, 437 .resource = mv78xx0_ge10_resources,
438 .dev = {
439 .coherent_dma_mask = 0xffffffff,
440 },
432}; 441};
433 442
434void __init mv78xx0_ge10_init(struct mv643xx_eth_platform_data *eth_data) 443void __init mv78xx0_ge10_init(struct mv643xx_eth_platform_data *eth_data)
@@ -496,6 +505,9 @@ static struct platform_device mv78xx0_ge11 = {
496 .id = 3, 505 .id = 3,
497 .num_resources = 1, 506 .num_resources = 1,
498 .resource = mv78xx0_ge11_resources, 507 .resource = mv78xx0_ge11_resources,
508 .dev = {
509 .coherent_dma_mask = 0xffffffff,
510 },
499}; 511};
500 512
501void __init mv78xx0_ge11_init(struct mv643xx_eth_platform_data *eth_data) 513void __init mv78xx0_ge11_init(struct mv643xx_eth_platform_data *eth_data)
@@ -532,12 +544,10 @@ static struct mv64xxx_i2c_pdata mv78xx0_i2c_0_pdata = {
532 544
533static struct resource mv78xx0_i2c_0_resources[] = { 545static struct resource mv78xx0_i2c_0_resources[] = {
534 { 546 {
535 .name = "i2c 0 base",
536 .start = I2C_0_PHYS_BASE, 547 .start = I2C_0_PHYS_BASE,
537 .end = I2C_0_PHYS_BASE + 0x1f, 548 .end = I2C_0_PHYS_BASE + 0x1f,
538 .flags = IORESOURCE_MEM, 549 .flags = IORESOURCE_MEM,
539 }, { 550 }, {
540 .name = "i2c 0 irq",
541 .start = IRQ_MV78XX0_I2C_0, 551 .start = IRQ_MV78XX0_I2C_0,
542 .end = IRQ_MV78XX0_I2C_0, 552 .end = IRQ_MV78XX0_I2C_0,
543 .flags = IORESOURCE_IRQ, 553 .flags = IORESOURCE_IRQ,
@@ -567,12 +577,10 @@ static struct mv64xxx_i2c_pdata mv78xx0_i2c_1_pdata = {
567 577
568static struct resource mv78xx0_i2c_1_resources[] = { 578static struct resource mv78xx0_i2c_1_resources[] = {
569 { 579 {
570 .name = "i2c 1 base",
571 .start = I2C_1_PHYS_BASE, 580 .start = I2C_1_PHYS_BASE,
572 .end = I2C_1_PHYS_BASE + 0x1f, 581 .end = I2C_1_PHYS_BASE + 0x1f,
573 .flags = IORESOURCE_MEM, 582 .flags = IORESOURCE_MEM,
574 }, { 583 }, {
575 .name = "i2c 1 irq",
576 .start = IRQ_MV78XX0_I2C_1, 584 .start = IRQ_MV78XX0_I2C_1,
577 .end = IRQ_MV78XX0_I2C_1, 585 .end = IRQ_MV78XX0_I2C_1,
578 .flags = IORESOURCE_IRQ, 586 .flags = IORESOURCE_IRQ,
diff --git a/arch/arm/mach-mx2/clock_imx21.c b/arch/arm/mach-mx2/clock_imx21.c
index 999d013e06e3..e4b08ca804ea 100644
--- a/arch/arm/mach-mx2/clock_imx21.c
+++ b/arch/arm/mach-mx2/clock_imx21.c
@@ -890,7 +890,7 @@ static struct clk clko_clk = {
890 .con_id = n, \ 890 .con_id = n, \
891 .clk = &c, \ 891 .clk = &c, \
892 }, 892 },
893static struct clk_lookup lookups[] __initdata = { 893static struct clk_lookup lookups[] = {
894/* It's unlikely that any driver wants one of them directly: 894/* It's unlikely that any driver wants one of them directly:
895 _REGISTER_CLOCK(NULL, "ckih", ckih_clk) 895 _REGISTER_CLOCK(NULL, "ckih", ckih_clk)
896 _REGISTER_CLOCK(NULL, "ckil", ckil_clk) 896 _REGISTER_CLOCK(NULL, "ckil", ckil_clk)
diff --git a/arch/arm/mach-mx2/clock_imx27.c b/arch/arm/mach-mx2/clock_imx27.c
index 3f7280c490f0..2c971442f3f2 100644
--- a/arch/arm/mach-mx2/clock_imx27.c
+++ b/arch/arm/mach-mx2/clock_imx27.c
@@ -621,7 +621,7 @@ DEFINE_CLOCK1(csi_clk, 0, 0, 0, parent, &csi_clk1, &per4_clk);
621 .clk = &c, \ 621 .clk = &c, \
622 }, 622 },
623 623
624static struct clk_lookup lookups[] __initdata = { 624static struct clk_lookup lookups[] = {
625 _REGISTER_CLOCK("imx-uart.0", NULL, uart1_clk) 625 _REGISTER_CLOCK("imx-uart.0", NULL, uart1_clk)
626 _REGISTER_CLOCK("imx-uart.1", NULL, uart2_clk) 626 _REGISTER_CLOCK("imx-uart.1", NULL, uart2_clk)
627 _REGISTER_CLOCK("imx-uart.2", NULL, uart3_clk) 627 _REGISTER_CLOCK("imx-uart.2", NULL, uart3_clk)
diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c
index 53a112d4e04a..3c1e06f56dd6 100644
--- a/arch/arm/mach-mx3/clock-imx35.c
+++ b/arch/arm/mach-mx3/clock-imx35.c
@@ -404,7 +404,7 @@ DEFINE_CLOCK(gpu2d_clk, 0, CCM_CGR3, 4, NULL, NULL);
404 .clk = &c, \ 404 .clk = &c, \
405 }, 405 },
406 406
407static struct clk_lookup lookups[] __initdata = { 407static struct clk_lookup lookups[] = {
408 _REGISTER_CLOCK(NULL, "asrc", asrc_clk) 408 _REGISTER_CLOCK(NULL, "asrc", asrc_clk)
409 _REGISTER_CLOCK(NULL, "ata", ata_clk) 409 _REGISTER_CLOCK(NULL, "ata", ata_clk)
410 _REGISTER_CLOCK(NULL, "audmux", audmux_clk) 410 _REGISTER_CLOCK(NULL, "audmux", audmux_clk)
diff --git a/arch/arm/mach-mx3/clock.c b/arch/arm/mach-mx3/clock.c
index 9957a11533a4..a68fcf981edf 100644
--- a/arch/arm/mach-mx3/clock.c
+++ b/arch/arm/mach-mx3/clock.c
@@ -516,7 +516,7 @@ DEFINE_CLOCK(ipg_clk, 0, NULL, 0, ipg_get_rate, NULL, &ahb_clk);
516 .clk = &c, \ 516 .clk = &c, \
517 }, 517 },
518 518
519static struct clk_lookup lookups[] __initdata = { 519static struct clk_lookup lookups[] = {
520 _REGISTER_CLOCK(NULL, "emi", emi_clk) 520 _REGISTER_CLOCK(NULL, "emi", emi_clk)
521 _REGISTER_CLOCK(NULL, "cspi", cspi1_clk) 521 _REGISTER_CLOCK(NULL, "cspi", cspi1_clk)
522 _REGISTER_CLOCK(NULL, "cspi", cspi2_clk) 522 _REGISTER_CLOCK(NULL, "cspi", cspi2_clk)
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 6af99ddabdfb..b1c7778d9f96 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -188,6 +188,9 @@ static struct platform_device orion5x_eth = {
188 .id = 0, 188 .id = 0,
189 .num_resources = 1, 189 .num_resources = 1,
190 .resource = orion5x_eth_resources, 190 .resource = orion5x_eth_resources,
191 .dev = {
192 .coherent_dma_mask = 0xffffffff,
193 },
191}; 194};
192 195
193void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data) 196void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data)
@@ -248,12 +251,10 @@ static struct mv64xxx_i2c_pdata orion5x_i2c_pdata = {
248 251
249static struct resource orion5x_i2c_resources[] = { 252static struct resource orion5x_i2c_resources[] = {
250 { 253 {
251 .name = "i2c base",
252 .start = I2C_PHYS_BASE, 254 .start = I2C_PHYS_BASE,
253 .end = I2C_PHYS_BASE + 0x1f, 255 .end = I2C_PHYS_BASE + 0x1f,
254 .flags = IORESOURCE_MEM, 256 .flags = IORESOURCE_MEM,
255 }, { 257 }, {
256 .name = "i2c irq",
257 .start = IRQ_ORION5X_I2C, 258 .start = IRQ_ORION5X_I2C,
258 .end = IRQ_ORION5X_I2C, 259 .end = IRQ_ORION5X_I2C,
259 .flags = IORESOURCE_IRQ, 260 .flags = IORESOURCE_IRQ,
diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index d245e59c51b1..29970f703f3c 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -72,7 +72,10 @@ void __init pxa_set_mci_info(struct pxamci_platform_data *info)
72} 72}
73 73
74 74
75static struct pxa2xx_udc_mach_info pxa_udc_info; 75static struct pxa2xx_udc_mach_info pxa_udc_info = {
76 .gpio_pullup = -1,
77 .gpio_vbus = -1,
78};
76 79
77void __init pxa_set_udc_info(struct pxa2xx_udc_mach_info *info) 80void __init pxa_set_udc_info(struct pxa2xx_udc_mach_info *info)
78{ 81{
diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index 92ba16e1b6fc..7db966dc29ce 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -111,9 +111,9 @@ static unsigned long ezx_pin_config[] __initdata = {
111 GPIO25_SSP1_TXD, 111 GPIO25_SSP1_TXD,
112 GPIO26_SSP1_RXD, 112 GPIO26_SSP1_RXD,
113 GPIO24_GPIO, /* pcap chip select */ 113 GPIO24_GPIO, /* pcap chip select */
114 GPIO1_GPIO, /* pcap interrupt */ 114 GPIO1_GPIO | WAKEUP_ON_EDGE_RISE, /* pcap interrupt */
115 GPIO4_GPIO, /* WDI_AP */ 115 GPIO4_GPIO | MFP_LPM_DRIVE_HIGH, /* WDI_AP */
116 GPIO55_GPIO, /* SYS_RESTART */ 116 GPIO55_GPIO | MFP_LPM_DRIVE_HIGH, /* SYS_RESTART */
117 117
118 /* MMC */ 118 /* MMC */
119 GPIO32_MMC_CLK, 119 GPIO32_MMC_CLK,
@@ -144,20 +144,20 @@ static unsigned long ezx_pin_config[] __initdata = {
144#if defined(CONFIG_MACH_EZX_A780) || defined(CONFIG_MACH_EZX_E680) 144#if defined(CONFIG_MACH_EZX_A780) || defined(CONFIG_MACH_EZX_E680)
145static unsigned long gen1_pin_config[] __initdata = { 145static unsigned long gen1_pin_config[] __initdata = {
146 /* flip / lockswitch */ 146 /* flip / lockswitch */
147 GPIO12_GPIO, 147 GPIO12_GPIO | WAKEUP_ON_EDGE_BOTH,
148 148
149 /* bluetooth (bcm2035) */ 149 /* bluetooth (bcm2035) */
150 GPIO14_GPIO | WAKEUP_ON_LEVEL_HIGH, /* HOSTWAKE */ 150 GPIO14_GPIO | WAKEUP_ON_EDGE_RISE, /* HOSTWAKE */
151 GPIO48_GPIO, /* RESET */ 151 GPIO48_GPIO, /* RESET */
152 GPIO28_GPIO, /* WAKEUP */ 152 GPIO28_GPIO, /* WAKEUP */
153 153
154 /* Neptune handshake */ 154 /* Neptune handshake */
155 GPIO0_GPIO | WAKEUP_ON_LEVEL_HIGH, /* BP_RDY */ 155 GPIO0_GPIO | WAKEUP_ON_EDGE_FALL, /* BP_RDY */
156 GPIO57_GPIO, /* AP_RDY */ 156 GPIO57_GPIO | MFP_LPM_DRIVE_HIGH, /* AP_RDY */
157 GPIO13_GPIO | WAKEUP_ON_LEVEL_HIGH, /* WDI */ 157 GPIO13_GPIO | WAKEUP_ON_EDGE_BOTH, /* WDI */
158 GPIO3_GPIO | WAKEUP_ON_LEVEL_HIGH, /* WDI2 */ 158 GPIO3_GPIO | WAKEUP_ON_EDGE_BOTH, /* WDI2 */
159 GPIO82_GPIO, /* RESET */ 159 GPIO82_GPIO | MFP_LPM_DRIVE_HIGH, /* RESET */
160 GPIO99_GPIO, /* TC_MM_EN */ 160 GPIO99_GPIO | MFP_LPM_DRIVE_HIGH, /* TC_MM_EN */
161 161
162 /* sound */ 162 /* sound */
163 GPIO52_SSP3_SCLK, 163 GPIO52_SSP3_SCLK,
@@ -199,21 +199,21 @@ static unsigned long gen1_pin_config[] __initdata = {
199 defined(CONFIG_MACH_EZX_E2) || defined(CONFIG_MACH_EZX_E6) 199 defined(CONFIG_MACH_EZX_E2) || defined(CONFIG_MACH_EZX_E6)
200static unsigned long gen2_pin_config[] __initdata = { 200static unsigned long gen2_pin_config[] __initdata = {
201 /* flip / lockswitch */ 201 /* flip / lockswitch */
202 GPIO15_GPIO, 202 GPIO15_GPIO | WAKEUP_ON_EDGE_BOTH,
203 203
204 /* EOC */ 204 /* EOC */
205 GPIO10_GPIO, 205 GPIO10_GPIO | WAKEUP_ON_EDGE_RISE,
206 206
207 /* bluetooth (bcm2045) */ 207 /* bluetooth (bcm2045) */
208 GPIO13_GPIO | WAKEUP_ON_LEVEL_HIGH, /* HOSTWAKE */ 208 GPIO13_GPIO | WAKEUP_ON_EDGE_RISE, /* HOSTWAKE */
209 GPIO37_GPIO, /* RESET */ 209 GPIO37_GPIO, /* RESET */
210 GPIO57_GPIO, /* WAKEUP */ 210 GPIO57_GPIO, /* WAKEUP */
211 211
212 /* Neptune handshake */ 212 /* Neptune handshake */
213 GPIO0_GPIO | WAKEUP_ON_LEVEL_HIGH, /* BP_RDY */ 213 GPIO0_GPIO | WAKEUP_ON_EDGE_FALL, /* BP_RDY */
214 GPIO96_GPIO, /* AP_RDY */ 214 GPIO96_GPIO | MFP_LPM_DRIVE_HIGH, /* AP_RDY */
215 GPIO3_GPIO | WAKEUP_ON_LEVEL_HIGH, /* WDI */ 215 GPIO3_GPIO | WAKEUP_ON_EDGE_FALL, /* WDI */
216 GPIO116_GPIO, /* RESET */ 216 GPIO116_GPIO | MFP_LPM_DRIVE_HIGH, /* RESET */
217 GPIO41_GPIO, /* BP_FLASH */ 217 GPIO41_GPIO, /* BP_FLASH */
218 218
219 /* sound */ 219 /* sound */
diff --git a/arch/arm/mach-pxa/imote2.c b/arch/arm/mach-pxa/imote2.c
index 2121309b2474..2b27336c29f1 100644
--- a/arch/arm/mach-pxa/imote2.c
+++ b/arch/arm/mach-pxa/imote2.c
@@ -412,7 +412,7 @@ static struct platform_device imote2_flash_device = {
412 */ 412 */
413static struct i2c_board_info __initdata imote2_i2c_board_info[] = { 413static struct i2c_board_info __initdata imote2_i2c_board_info[] = {
414 { /* UCAM sensor board */ 414 { /* UCAM sensor board */
415 .type = "max1238", 415 .type = "max1239",
416 .addr = 0x35, 416 .addr = 0x35,
417 }, { /* ITS400 Sensor board only */ 417 }, { /* ITS400 Sensor board only */
418 .type = "max1363", 418 .type = "max1363",
diff --git a/arch/arm/mach-pxa/include/mach/reset.h b/arch/arm/mach-pxa/include/mach/reset.h
index 31e6a7b6ad80..b6c10556fbc7 100644
--- a/arch/arm/mach-pxa/include/mach/reset.h
+++ b/arch/arm/mach-pxa/include/mach/reset.h
@@ -13,8 +13,9 @@ extern void clear_reset_status(unsigned int mask);
13/** 13/**
14 * init_gpio_reset() - register GPIO as reset generator 14 * init_gpio_reset() - register GPIO as reset generator
15 * @gpio: gpio nr 15 * @gpio: gpio nr
16 * @output: set gpio as out/low instead of input during normal work 16 * @output: set gpio as output instead of input during normal work
17 * @level: output level
17 */ 18 */
18extern int init_gpio_reset(int gpio, int output); 19extern int init_gpio_reset(int gpio, int output, int level);
19 20
20#endif /* __ASM_ARCH_RESET_H */ 21#endif /* __ASM_ARCH_RESET_H */
diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c
index 7ffb91d64c39..cf6b720c055f 100644
--- a/arch/arm/mach-pxa/mfp-pxa2xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa2xx.c
@@ -322,6 +322,7 @@ static inline void pxa27x_mfp_init(void) {}
322#ifdef CONFIG_PM 322#ifdef CONFIG_PM
323static unsigned long saved_gafr[2][4]; 323static unsigned long saved_gafr[2][4];
324static unsigned long saved_gpdr[4]; 324static unsigned long saved_gpdr[4];
325static unsigned long saved_pgsr[4];
325 326
326static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state) 327static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state)
327{ 328{
@@ -332,6 +333,7 @@ static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state)
332 saved_gafr[0][i] = GAFR_L(i); 333 saved_gafr[0][i] = GAFR_L(i);
333 saved_gafr[1][i] = GAFR_U(i); 334 saved_gafr[1][i] = GAFR_U(i);
334 saved_gpdr[i] = GPDR(i * 32); 335 saved_gpdr[i] = GPDR(i * 32);
336 saved_pgsr[i] = PGSR(i);
335 337
336 GPDR(i * 32) = gpdr_lpm[i]; 338 GPDR(i * 32) = gpdr_lpm[i];
337 } 339 }
@@ -346,6 +348,7 @@ static int pxa2xx_mfp_resume(struct sys_device *d)
346 GAFR_L(i) = saved_gafr[0][i]; 348 GAFR_L(i) = saved_gafr[0][i];
347 GAFR_U(i) = saved_gafr[1][i]; 349 GAFR_U(i) = saved_gafr[1][i];
348 GPDR(i * 32) = saved_gpdr[i]; 350 GPDR(i * 32) = saved_gpdr[i];
351 PGSR(i) = saved_pgsr[i];
349 } 352 }
350 PSSR = PSSR_RDH | PSSR_PH; 353 PSSR = PSSR_RDH | PSSR_PH;
351 return 0; 354 return 0;
@@ -374,6 +377,9 @@ static int __init pxa2xx_mfp_init(void)
374 if (cpu_is_pxa27x()) 377 if (cpu_is_pxa27x())
375 pxa27x_mfp_init(); 378 pxa27x_mfp_init();
376 379
380 /* clear RDH bit to enable GPIO receivers after reset/sleep exit */
381 PSSR = PSSR_RDH;
382
377 /* initialize gafr_run[], pgsr_lpm[] from existing values */ 383 /* initialize gafr_run[], pgsr_lpm[] from existing values */
378 for (i = 0; i <= gpio_to_bank(pxa_last_gpio); i++) 384 for (i = 0; i <= gpio_to_bank(pxa_last_gpio); i++)
379 gpdr_lpm[i] = GPDR(i * 32); 385 gpdr_lpm[i] = GPDR(i * 32);
diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c
index 1cec1806f002..471a853e548b 100644
--- a/arch/arm/mach-pxa/palmld.c
+++ b/arch/arm/mach-pxa/palmld.c
@@ -62,6 +62,8 @@ static unsigned long palmld_pin_config[] __initdata = {
62 GPIO29_AC97_SDATA_IN_0, 62 GPIO29_AC97_SDATA_IN_0,
63 GPIO30_AC97_SDATA_OUT, 63 GPIO30_AC97_SDATA_OUT,
64 GPIO31_AC97_SYNC, 64 GPIO31_AC97_SYNC,
65 GPIO89_AC97_SYSCLK,
66 GPIO95_AC97_nRESET,
65 67
66 /* IrDA */ 68 /* IrDA */
67 GPIO108_GPIO, /* ir disable */ 69 GPIO108_GPIO, /* ir disable */
diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c
index 30662363907b..05bf979b78a6 100644
--- a/arch/arm/mach-pxa/palmt5.c
+++ b/arch/arm/mach-pxa/palmt5.c
@@ -64,6 +64,7 @@ static unsigned long palmt5_pin_config[] __initdata = {
64 GPIO29_AC97_SDATA_IN_0, 64 GPIO29_AC97_SDATA_IN_0,
65 GPIO30_AC97_SDATA_OUT, 65 GPIO30_AC97_SDATA_OUT,
66 GPIO31_AC97_SYNC, 66 GPIO31_AC97_SYNC,
67 GPIO89_AC97_SYSCLK,
67 GPIO95_AC97_nRESET, 68 GPIO95_AC97_nRESET,
68 69
69 /* IrDA */ 70 /* IrDA */
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index e2d44b1a8a9b..e99a893c58a7 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -65,6 +65,7 @@ static unsigned long palmtx_pin_config[] __initdata = {
65 GPIO29_AC97_SDATA_IN_0, 65 GPIO29_AC97_SDATA_IN_0,
66 GPIO30_AC97_SDATA_OUT, 66 GPIO30_AC97_SDATA_OUT,
67 GPIO31_AC97_SYNC, 67 GPIO31_AC97_SYNC,
68 GPIO89_AC97_SYSCLK,
68 GPIO95_AC97_nRESET, 69 GPIO95_AC97_nRESET,
69 70
70 /* IrDA */ 71 /* IrDA */
diff --git a/arch/arm/mach-pxa/reset.c b/arch/arm/mach-pxa/reset.c
index df29d45fb4e7..01e9d643394a 100644
--- a/arch/arm/mach-pxa/reset.c
+++ b/arch/arm/mach-pxa/reset.c
@@ -20,7 +20,7 @@ static void do_hw_reset(void);
20 20
21static int reset_gpio = -1; 21static int reset_gpio = -1;
22 22
23int init_gpio_reset(int gpio, int output) 23int init_gpio_reset(int gpio, int output, int level)
24{ 24{
25 int rc; 25 int rc;
26 26
@@ -31,7 +31,7 @@ int init_gpio_reset(int gpio, int output)
31 } 31 }
32 32
33 if (output) 33 if (output)
34 rc = gpio_direction_output(gpio, 0); 34 rc = gpio_direction_output(gpio, level);
35 else 35 else
36 rc = gpio_direction_input(gpio); 36 rc = gpio_direction_input(gpio);
37 if (rc) { 37 if (rc) {
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index c18e34acafcb..5a45fe340a10 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -531,9 +531,15 @@ static int spitz_ohci_init(struct device *dev)
531 return gpio_direction_output(SPITZ_GPIO_USB_HOST, 1); 531 return gpio_direction_output(SPITZ_GPIO_USB_HOST, 1);
532} 532}
533 533
534static void spitz_ohci_exit(struct device *dev)
535{
536 gpio_free(SPITZ_GPIO_USB_HOST);
537}
538
534static struct pxaohci_platform_data spitz_ohci_platform_data = { 539static struct pxaohci_platform_data spitz_ohci_platform_data = {
535 .port_mode = PMM_NPS_MODE, 540 .port_mode = PMM_NPS_MODE,
536 .init = spitz_ohci_init, 541 .init = spitz_ohci_init,
542 .exit = spitz_ohci_exit,
537 .flags = ENABLE_PORT_ALL | NO_OC_PROTECTION, 543 .flags = ENABLE_PORT_ALL | NO_OC_PROTECTION,
538 .power_budget = 150, 544 .power_budget = 150,
539}; 545};
@@ -731,7 +737,7 @@ static void spitz_restart(char mode, const char *cmd)
731 737
732static void __init common_init(void) 738static void __init common_init(void)
733{ 739{
734 init_gpio_reset(SPITZ_GPIO_ON_RESET, 1); 740 init_gpio_reset(SPITZ_GPIO_ON_RESET, 1, 0);
735 pm_power_off = spitz_poweroff; 741 pm_power_off = spitz_poweroff;
736 arm_pm_restart = spitz_restart; 742 arm_pm_restart = spitz_restart;
737 743
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index afac5b6d3d78..a0bd46ef5d30 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -897,7 +897,7 @@ static void __init tosa_init(void)
897 gpio_set_wake(MFP_PIN_GPIO1, 1); 897 gpio_set_wake(MFP_PIN_GPIO1, 1);
898 /* We can't pass to gpio-keys since it will drop the Reset altfunc */ 898 /* We can't pass to gpio-keys since it will drop the Reset altfunc */
899 899
900 init_gpio_reset(TOSA_GPIO_ON_RESET, 0); 900 init_gpio_reset(TOSA_GPIO_ON_RESET, 0, 0);
901 901
902 pm_power_off = tosa_poweroff; 902 pm_power_off = tosa_poweroff;
903 arm_pm_restart = tosa_restart; 903 arm_pm_restart = tosa_restart;
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3397f1e64d76..a08d9d2380d3 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -184,23 +184,37 @@ __v7_setup:
184 stmia r12, {r0-r5, r7, r9, r11, lr} 184 stmia r12, {r0-r5, r7, r9, r11, lr}
185 bl v7_flush_dcache_all 185 bl v7_flush_dcache_all
186 ldmia r12, {r0-r5, r7, r9, r11, lr} 186 ldmia r12, {r0-r5, r7, r9, r11, lr}
187
188 mrc p15, 0, r0, c0, c0, 0 @ read main ID register
189 and r10, r0, #0xff000000 @ ARM?
190 teq r10, #0x41000000
191 bne 2f
192 and r5, r0, #0x00f00000 @ variant
193 and r6, r0, #0x0000000f @ revision
194 orr r0, r6, r5, lsr #20-4 @ combine variant and revision
195
187#ifdef CONFIG_ARM_ERRATA_430973 196#ifdef CONFIG_ARM_ERRATA_430973
188 mrc p15, 0, r10, c1, c0, 1 @ read aux control register 197 teq r5, #0x00100000 @ only present in r1p*
189 orr r10, r10, #(1 << 6) @ set IBE to 1 198 mrceq p15, 0, r10, c1, c0, 1 @ read aux control register
190 mcr p15, 0, r10, c1, c0, 1 @ write aux control register 199 orreq r10, r10, #(1 << 6) @ set IBE to 1
200 mcreq p15, 0, r10, c1, c0, 1 @ write aux control register
191#endif 201#endif
192#ifdef CONFIG_ARM_ERRATA_458693 202#ifdef CONFIG_ARM_ERRATA_458693
193 mrc p15, 0, r10, c1, c0, 1 @ read aux control register 203 teq r0, #0x20 @ only present in r2p0
194 orr r10, r10, #(1 << 5) @ set L1NEON to 1 204 mrceq p15, 0, r10, c1, c0, 1 @ read aux control register
195 orr r10, r10, #(1 << 9) @ set PLDNOP to 1 205 orreq r10, r10, #(1 << 5) @ set L1NEON to 1
196 mcr p15, 0, r10, c1, c0, 1 @ write aux control register 206 orreq r10, r10, #(1 << 9) @ set PLDNOP to 1
207 mcreq p15, 0, r10, c1, c0, 1 @ write aux control register
197#endif 208#endif
198#ifdef CONFIG_ARM_ERRATA_460075 209#ifdef CONFIG_ARM_ERRATA_460075
199 mrc p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register 210 teq r0, #0x20 @ only present in r2p0
200 orr r10, r10, #(1 << 22) @ set the Write Allocate disable bit 211 mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register
201 mcr p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register 212 tsteq r10, #1 << 22
213 orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit
214 mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register
202#endif 215#endif
203 mov r10, #0 216
2172: mov r10, #0
204#ifdef HARVARD_CACHE 218#ifdef HARVARD_CACHE
205 mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate 219 mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate
206#endif 220#endif
diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index 945e0d237a1d..fec64678a63a 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -12,7 +12,7 @@
12# 12#
13# http://www.arm.linux.org.uk/developer/machines/?action=new 13# http://www.arm.linux.org.uk/developer/machines/?action=new
14# 14#
15# Last update: Mon Mar 23 20:09:01 2009 15# Last update: Fri May 29 10:14:20 2009
16# 16#
17# machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number 17# machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number
18# 18#
@@ -916,7 +916,7 @@ nxdb500 MACH_NXDB500 NXDB500 905
916apf9328 MACH_APF9328 APF9328 906 916apf9328 MACH_APF9328 APF9328 906
917omap_wipoq MACH_OMAP_WIPOQ OMAP_WIPOQ 907 917omap_wipoq MACH_OMAP_WIPOQ OMAP_WIPOQ 907
918omap_twip MACH_OMAP_TWIP OMAP_TWIP 908 918omap_twip MACH_OMAP_TWIP OMAP_TWIP 908
919palmt650 MACH_PALMT650 PALMT650 909 919treo650 MACH_TREO650 TREO650 909
920acumen MACH_ACUMEN ACUMEN 910 920acumen MACH_ACUMEN ACUMEN 910
921xp100 MACH_XP100 XP100 911 921xp100 MACH_XP100 XP100 911
922fs2410 MACH_FS2410 FS2410 912 922fs2410 MACH_FS2410 FS2410 912
@@ -1232,7 +1232,7 @@ ql202b MACH_QL202B QL202B 1226
1232vpac270 MACH_VPAC270 VPAC270 1227 1232vpac270 MACH_VPAC270 VPAC270 1227
1233rd129 MACH_RD129 RD129 1228 1233rd129 MACH_RD129 RD129 1228
1234htcwizard MACH_HTCWIZARD HTCWIZARD 1229 1234htcwizard MACH_HTCWIZARD HTCWIZARD 1229
1235xscale_treo680 MACH_XSCALE_TREO680 XSCALE_TREO680 1230 1235treo680 MACH_TREO680 TREO680 1230
1236tecon_tmezon MACH_TECON_TMEZON TECON_TMEZON 1231 1236tecon_tmezon MACH_TECON_TMEZON TECON_TMEZON 1231
1237zylonite MACH_ZYLONITE ZYLONITE 1233 1237zylonite MACH_ZYLONITE ZYLONITE 1233
1238gene1270 MACH_GENE1270 GENE1270 1234 1238gene1270 MACH_GENE1270 GENE1270 1234
@@ -1418,10 +1418,10 @@ looxc550 MACH_LOOXC550 LOOXC550 1417
1418cnty_titan MACH_CNTY_TITAN CNTY_TITAN 1418 1418cnty_titan MACH_CNTY_TITAN CNTY_TITAN 1418
1419app3xx MACH_APP3XX APP3XX 1419 1419app3xx MACH_APP3XX APP3XX 1419
1420sideoatsgrama MACH_SIDEOATSGRAMA SIDEOATSGRAMA 1420 1420sideoatsgrama MACH_SIDEOATSGRAMA SIDEOATSGRAMA 1420
1421palmtreo700p MACH_PALMTREO700P PALMTREO700P 1421 1421treo700p MACH_TREO700P TREO700P 1421
1422palmtreo700w MACH_PALMTREO700W PALMTREO700W 1422 1422treo700w MACH_TREO700W TREO700W 1422
1423palmtreo750 MACH_PALMTREO750 PALMTREO750 1423 1423treo750 MACH_TREO750 TREO750 1423
1424palmtreo755p MACH_PALMTREO755P PALMTREO755P 1424 1424treo755p MACH_TREO755P TREO755P 1424
1425ezreganut9200 MACH_EZREGANUT9200 EZREGANUT9200 1425 1425ezreganut9200 MACH_EZREGANUT9200 EZREGANUT9200 1425
1426sarge MACH_SARGE SARGE 1426 1426sarge MACH_SARGE SARGE 1426
1427a696 MACH_A696 A696 1427 1427a696 MACH_A696 A696 1427
@@ -1721,7 +1721,7 @@ sapphire MACH_SAPPHIRE SAPPHIRE 1729
1721csb637xo MACH_CSB637XO CSB637XO 1730 1721csb637xo MACH_CSB637XO CSB637XO 1730
1722evisiong MACH_EVISIONG EVISIONG 1731 1722evisiong MACH_EVISIONG EVISIONG 1731
1723stmp37xx MACH_STMP37XX STMP37XX 1732 1723stmp37xx MACH_STMP37XX STMP37XX 1732
1724stmp378x MACH_STMP38XX STMP38XX 1733 1724stmp378x MACH_STMP378X STMP378X 1733
1725tnt MACH_TNT TNT 1734 1725tnt MACH_TNT TNT 1734
1726tbxt MACH_TBXT TBXT 1735 1726tbxt MACH_TBXT TBXT 1735
1727playmate MACH_PLAYMATE PLAYMATE 1736 1727playmate MACH_PLAYMATE PLAYMATE 1736
@@ -1817,7 +1817,7 @@ smdkc100 MACH_SMDKC100 SMDKC100 1826
1817tavorevb MACH_TAVOREVB TAVOREVB 1827 1817tavorevb MACH_TAVOREVB TAVOREVB 1827
1818saar MACH_SAAR SAAR 1828 1818saar MACH_SAAR SAAR 1828
1819deister_eyecam MACH_DEISTER_EYECAM DEISTER_EYECAM 1829 1819deister_eyecam MACH_DEISTER_EYECAM DEISTER_EYECAM 1829
1820at91sam9m10ek MACH_AT91SAM9M10EK AT91SAM9M10EK 1830 1820at91sam9m10g45ek MACH_AT91SAM9M10G45EK AT91SAM9M10G45EK 1830
1821linkstation_produo MACH_LINKSTATION_PRODUO LINKSTATION_PRODUO 1831 1821linkstation_produo MACH_LINKSTATION_PRODUO LINKSTATION_PRODUO 1831
1822hit_b0 MACH_HIT_B0 HIT_B0 1832 1822hit_b0 MACH_HIT_B0 HIT_B0 1832
1823adx_rmu MACH_ADX_RMU ADX_RMU 1833 1823adx_rmu MACH_ADX_RMU ADX_RMU 1833
@@ -2132,3 +2132,116 @@ apollo MACH_APOLLO APOLLO 2141
2132at91cap9stk MACH_AT91CAP9STK AT91CAP9STK 2142 2132at91cap9stk MACH_AT91CAP9STK AT91CAP9STK 2142
2133spc300 MACH_SPC300 SPC300 2143 2133spc300 MACH_SPC300 SPC300 2143
2134eko MACH_EKO EKO 2144 2134eko MACH_EKO EKO 2144
2135ccw9m2443 MACH_CCW9M2443 CCW9M2443 2145
2136ccw9m2443js MACH_CCW9M2443JS CCW9M2443JS 2146
2137m2m_router_device MACH_M2M_ROUTER_DEVICE M2M_ROUTER_DEVICE 2147
2138str9104nas MACH_STAR9104NAS STAR9104NAS 2148
2139pca100 MACH_PCA100 PCA100 2149
2140z3_dm365_mod_01 MACH_Z3_DM365_MOD_01 Z3_DM365_MOD_01 2150
2141hipox MACH_HIPOX HIPOX 2151
2142omap3_piteds MACH_OMAP3_PITEDS OMAP3_PITEDS 2152
2143bm150r MACH_BM150R BM150R 2153
2144tbone MACH_TBONE TBONE 2154
2145merlin MACH_MERLIN MERLIN 2155
2146falcon MACH_FALCON FALCON 2156
2147davinci_da850_evm MACH_DAVINCI_DA850_EVM DAVINCI_DA850_EVM 2157
2148s5p6440 MACH_S5P6440 S5P6440 2158
2149at91sam9g10ek MACH_AT91SAM9G10EK AT91SAM9G10EK 2159
2150omap_4430sdp MACH_OMAP_4430SDP OMAP_4430SDP 2160
2151lpc313x MACH_LPC313X LPC313X 2161
2152magx_zn5 MACH_MAGX_ZN5 MAGX_ZN5 2162
2153magx_em30 MACH_MAGX_EM30 MAGX_EM30 2163
2154magx_ve66 MACH_MAGX_VE66 MAGX_VE66 2164
2155meesc MACH_MEESC MEESC 2165
2156otc570 MACH_OTC570 OTC570 2166
2157bcu2412 MACH_BCU2412 BCU2412 2167
2158beacon MACH_BEACON BEACON 2168
2159actia_tgw MACH_ACTIA_TGW ACTIA_TGW 2169
2160e4430 MACH_E4430 E4430 2170
2161ql300 MACH_QL300 QL300 2171
2162btmavb101 MACH_BTMAVB101 BTMAVB101 2172
2163btmawb101 MACH_BTMAWB101 BTMAWB101 2173
2164sq201 MACH_SQ201 SQ201 2174
2165quatro45xx MACH_QUATRO45XX QUATRO45XX 2175
2166openpad MACH_OPENPAD OPENPAD 2176
2167tx25 MACH_TX25 TX25 2177
2168omap3_torpedo MACH_OMAP3_TORPEDO OMAP3_TORPEDO 2178
2169htcraphael_k MACH_HTCRAPHAEL_K HTCRAPHAEL_K 2179
2170lal43 MACH_LAL43 LAL43 2181
2171htcraphael_cdma500 MACH_HTCRAPHAEL_CDMA500 HTCRAPHAEL_CDMA500 2182
2172anw6410 MACH_ANW6410 ANW6410 2183
2173htcprophet MACH_HTCPROPHET HTCPROPHET 2185
2174cfa_10022 MACH_CFA_10022 CFA_10022 2186
2175imx27_visstrim_m10 MACH_IMX27_VISSTRIM_M10 IMX27_VISSTRIM_M10 2187
2176px2imx27 MACH_PX2IMX27 PX2IMX27 2188
2177stm3210e_eval MACH_STM3210E_EVAL STM3210E_EVAL 2189
2178dvs10 MACH_DVS10 DVS10 2190
2179portuxg20 MACH_PORTUXG20 PORTUXG20 2191
2180arm_spv MACH_ARM_SPV ARM_SPV 2192
2181smdkc110 MACH_SMDKC110 SMDKC110 2193
2182cabespresso MACH_CABESPRESSO CABESPRESSO 2194
2183hmc800 MACH_HMC800 HMC800 2195
2184sholes MACH_SHOLES SHOLES 2196
2185btmxc31 MACH_BTMXC31 BTMXC31 2197
2186dt501 MACH_DT501 DT501 2198
2187ktx MACH_KTX KTX 2199
2188omap3517evm MACH_OMAP3517EVM OMAP3517EVM 2200
2189netspace_v2 MACH_NETSPACE_V2 NETSPACE_V2 2201
2190netspace_max_v2 MACH_NETSPACE_MAX_V2 NETSPACE_MAX_V2 2202
2191d2net_v2 MACH_D2NET_V2 D2NET_V2 2203
2192net2big_v2 MACH_NET2BIG_V2 NET2BIG_V2 2204
2193net4big_v2 MACH_NET4BIG_V2 NET4BIG_V2 2205
2194net5big_v2 MACH_NET5BIG_V2 NET5BIG_V2 2206
2195endb2443 MACH_ENDB2443 ENDB2443 2207
2196inetspace_v2 MACH_INETSPACE_V2 INETSPACE_V2 2208
2197tros MACH_TROS TROS 2209
2198pelco_homer MACH_PELCO_HOMER PELCO_HOMER 2210
2199ofsp8 MACH_OFSP8 OFSP8 2211
2200at91sam9g45ekes MACH_AT91SAM9G45EKES AT91SAM9G45EKES 2212
2201guf_cupid MACH_GUF_CUPID GUF_CUPID 2213
2202eab1r MACH_EAB1R EAB1R 2214
2203desirec MACH_DESIREC DESIREC 2215
2204cordoba MACH_CORDOBA CORDOBA 2216
2205irvine MACH_IRVINE IRVINE 2217
2206sff772 MACH_SFF772 SFF772 2218
2207pelco_milano MACH_PELCO_MILANO PELCO_MILANO 2219
2208pc7302 MACH_PC7302 PC7302 2220
2209bip6000 MACH_BIP6000 BIP6000 2221
2210silvermoon MACH_SILVERMOON SILVERMOON 2222
2211vc0830 MACH_VC0830 VC0830 2223
2212dt430 MACH_DT430 DT430 2224
2213ji42pf MACH_JI42PF JI42PF 2225
2214gnet_ksm MACH_GNET_KSM GNET_KSM 2226
2215gnet_sgm MACH_GNET_SGM GNET_SGM 2227
2216gnet_sgr MACH_GNET_SGR GNET_SGR 2228
2217omap3_icetekevm MACH_OMAP3_ICETEKEVM OMAP3_ICETEKEVM 2229
2218pnp MACH_PNP PNP 2230
2219ctera_2bay_k MACH_CTERA_2BAY_K CTERA_2BAY_K 2231
2220ctera_2bay_u MACH_CTERA_2BAY_U CTERA_2BAY_U 2232
2221sas_c MACH_SAS_C SAS_C 2233
2222vma2315 MACH_VMA2315 VMA2315 2234
2223vcs MACH_VCS VCS 2235
2224spear600 MACH_SPEAR600 SPEAR600 2236
2225spear300 MACH_SPEAR300 SPEAR300 2237
2226spear1300 MACH_SPEAR1300 SPEAR1300 2238
2227lilly1131 MACH_LILLY1131 LILLY1131 2239
2228arvoo_ax301 MACH_ARVOO_AX301 ARVOO_AX301 2240
2229mapphone MACH_MAPPHONE MAPPHONE 2241
2230legend MACH_LEGEND LEGEND 2242
2231salsa MACH_SALSA SALSA 2243
2232lounge MACH_LOUNGE LOUNGE 2244
2233vision MACH_VISION VISION 2245
2234vmb20 MACH_VMB20 VMB20 2246
2235hy2410 MACH_HY2410 HY2410 2247
2236hy9315 MACH_HY9315 HY9315 2248
2237bullwinkle MACH_BULLWINKLE BULLWINKLE 2249
2238arm_ultimator2 MACH_ARM_ULTIMATOR2 ARM_ULTIMATOR2 2250
2239vs_v210 MACH_VS_V210 VS_V210 2252
2240vs_v212 MACH_VS_V212 VS_V212 2253
2241hmt MACH_HMT HMT 2254
2242suen3 MACH_SUEN3 SUEN3 2255
2243vesper MACH_VESPER VESPER 2256
2244str9 MACH_STR9 STR9 2257
2245omap3_wl_ff MACH_OMAP3_WL_FF OMAP3_WL_FF 2258
2246simcom MACH_SIMCOM SIMCOM 2259
2247mcwebio MACH_MCWEBIO MCWEBIO 2260
diff --git a/arch/blackfin/include/asm/.gitignore b/arch/blackfin/include/asm/.gitignore
deleted file mode 100644
index 7858564a4466..000000000000
--- a/arch/blackfin/include/asm/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
1+mach
diff --git a/arch/blackfin/include/asm/flat.h b/arch/blackfin/include/asm/flat.h
index e70074e05f4e..733a178d782d 100644
--- a/arch/blackfin/include/asm/flat.h
+++ b/arch/blackfin/include/asm/flat.h
@@ -10,7 +10,6 @@
10 10
11#include <asm/unaligned.h> 11#include <asm/unaligned.h>
12 12
13#define flat_stack_align(sp) /* nothing needed */
14#define flat_argvp_envp_on_stack() 0 13#define flat_argvp_envp_on_stack() 0
15#define flat_old_ram_flag(flags) (flags) 14#define flat_old_ram_flag(flags) (flags)
16 15
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index 1e57b636e0bc..cf5066d3efd2 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -378,8 +378,10 @@
378#define __NR_dup3 363 378#define __NR_dup3 363
379#define __NR_pipe2 364 379#define __NR_pipe2 364
380#define __NR_inotify_init1 365 380#define __NR_inotify_init1 365
381#define __NR_preadv 366
382#define __NR_pwritev 367
381 383
382#define __NR_syscall 366 384#define __NR_syscall 368
383#define NR_syscalls __NR_syscall 385#define NR_syscalls __NR_syscall
384 386
385/* Old optional stuff no one actually uses */ 387/* Old optional stuff no one actually uses */
diff --git a/arch/blackfin/kernel/.gitignore b/arch/blackfin/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/blackfin/kernel/.gitignore
@@ -0,0 +1 @@
vmlinux.lds
diff --git a/arch/blackfin/lib/strncmp.c b/arch/blackfin/lib/strncmp.c
index 2aaae78a68e0..46518b1d2983 100644
--- a/arch/blackfin/lib/strncmp.c
+++ b/arch/blackfin/lib/strncmp.c
@@ -8,9 +8,8 @@
8 8
9#define strncmp __inline_strncmp 9#define strncmp __inline_strncmp
10#include <asm/string.h> 10#include <asm/string.h>
11#undef strncmp
12
13#include <linux/module.h> 11#include <linux/module.h>
12#undef strncmp
14 13
15int strncmp(const char *cs, const char *ct, size_t count) 14int strncmp(const char *cs, const char *ct, size_t count)
16{ 15{
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 21e65a339a22..a063a434f7e3 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1581,6 +1581,8 @@ ENTRY(_sys_call_table)
1581 .long _sys_dup3 1581 .long _sys_dup3
1582 .long _sys_pipe2 1582 .long _sys_pipe2
1583 .long _sys_inotify_init1 /* 365 */ 1583 .long _sys_inotify_init1 /* 365 */
1584 .long _sys_preadv
1585 .long _sys_pwritev
1584 1586
1585 .rept NR_syscalls-(.-_sys_call_table)/4 1587 .rept NR_syscalls-(.-_sys_call_table)/4
1586 .long _sys_ni_syscall 1588 .long _sys_ni_syscall
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index df3925cb1c7f..d70b445f4a8f 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -325,12 +325,14 @@ static void end_crisv32_irq(unsigned int irq)
325{ 325{
326} 326}
327 327
328void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest) 328int set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
329{ 329{
330 unsigned long flags; 330 unsigned long flags;
331 spin_lock_irqsave(&irq_lock, flags); 331 spin_lock_irqsave(&irq_lock, flags);
332 irq_allocations[irq - FIRST_IRQ].mask = *dest; 332 irq_allocations[irq - FIRST_IRQ].mask = *dest;
333 spin_unlock_irqrestore(&irq_lock, flags); 333 spin_unlock_irqrestore(&irq_lock, flags);
334
335 return 0;
334} 336}
335 337
336static struct irq_chip crisv32_irq_type = { 338static struct irq_chip crisv32_irq_type = {
diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h
index 2a873508a9a1..bd12b31b90e6 100644
--- a/arch/h8300/include/asm/flat.h
+++ b/arch/h8300/include/asm/flat.h
@@ -5,7 +5,6 @@
5#ifndef __H8300_FLAT_H__ 5#ifndef __H8300_FLAT_H__
6#define __H8300_FLAT_H__ 6#define __H8300_FLAT_H__
7 7
8#define flat_stack_align(sp) /* nothing needed */
9#define flat_argvp_envp_on_stack() 1 8#define flat_argvp_envp_on_stack() 1
10#define flat_old_ram_flag(flags) 1 9#define flat_old_ram_flag(flags) 1
11#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) 10#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index cc0a3182db3c..acb5047ab573 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -21,9 +21,10 @@ hpsim_irq_noop (unsigned int irq)
21{ 21{
22} 22}
23 23
24static void 24static int
25hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b) 25hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
26{ 26{
27 return 0;
27} 28}
28 29
29static struct hw_interrupt_type irq_type_hp_sim = { 30static struct hw_interrupt_type irq_type_hp_sim = {
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 5510317db37b..baec6f00f7f3 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -636,7 +636,7 @@ void __init acpi_numa_arch_fixup(void)
636 * success: return IRQ number (>=0) 636 * success: return IRQ number (>=0)
637 * failure: return < 0 637 * failure: return < 0
638 */ 638 */
639int acpi_register_gsi(u32 gsi, int triggering, int polarity) 639int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
640{ 640{
641 if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) 641 if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
642 return gsi; 642 return gsi;
@@ -678,7 +678,8 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
678 678
679 fadt = (struct acpi_table_fadt *)fadt_header; 679 fadt = (struct acpi_table_fadt *)fadt_header;
680 680
681 acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); 681 acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE,
682 ACPI_ACTIVE_LOW);
682 return 0; 683 return 0;
683} 684}
684 685
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 166e0d839fa0..f92cef47bf86 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -329,7 +329,7 @@ unmask_irq (unsigned int irq)
329} 329}
330 330
331 331
332static void 332static int
333iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) 333iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
334{ 334{
335#ifdef CONFIG_SMP 335#ifdef CONFIG_SMP
@@ -343,15 +343,15 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
343 343
344 cpu = cpumask_first_and(cpu_online_mask, mask); 344 cpu = cpumask_first_and(cpu_online_mask, mask);
345 if (cpu >= nr_cpu_ids) 345 if (cpu >= nr_cpu_ids)
346 return; 346 return -1;
347 347
348 if (irq_prepare_move(irq, cpu)) 348 if (irq_prepare_move(irq, cpu))
349 return; 349 return -1;
350 350
351 dest = cpu_physical_id(cpu); 351 dest = cpu_physical_id(cpu);
352 352
353 if (!iosapic_intr_info[irq].count) 353 if (!iosapic_intr_info[irq].count)
354 return; /* not an IOSAPIC interrupt */ 354 return -1; /* not an IOSAPIC interrupt */
355 355
356 set_irq_affinity_info(irq, dest, redir); 356 set_irq_affinity_info(irq, dest, redir);
357 357
@@ -376,7 +376,9 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
376 iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); 376 iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
377 iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32); 377 iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
378 } 378 }
379
379#endif 380#endif
381 return 0;
380} 382}
381 383
382/* 384/*
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 2b15e233f7fe..0f8ade9331ba 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -12,7 +12,7 @@
12static struct irq_chip ia64_msi_chip; 12static struct irq_chip ia64_msi_chip;
13 13
14#ifdef CONFIG_SMP 14#ifdef CONFIG_SMP
15static void ia64_set_msi_irq_affinity(unsigned int irq, 15static int ia64_set_msi_irq_affinity(unsigned int irq,
16 const cpumask_t *cpu_mask) 16 const cpumask_t *cpu_mask)
17{ 17{
18 struct msi_msg msg; 18 struct msi_msg msg;
@@ -20,10 +20,10 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
20 int cpu = first_cpu(*cpu_mask); 20 int cpu = first_cpu(*cpu_mask);
21 21
22 if (!cpu_online(cpu)) 22 if (!cpu_online(cpu))
23 return; 23 return -1;
24 24
25 if (irq_prepare_move(irq, cpu)) 25 if (irq_prepare_move(irq, cpu))
26 return; 26 return -1;
27 27
28 read_msi_msg(irq, &msg); 28 read_msi_msg(irq, &msg);
29 29
@@ -39,6 +39,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
39 39
40 write_msi_msg(irq, &msg); 40 write_msi_msg(irq, &msg);
41 cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); 41 cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
42
43 return 0;
42} 44}
43#endif /* CONFIG_SMP */ 45#endif /* CONFIG_SMP */
44 46
@@ -130,17 +132,17 @@ void arch_teardown_msi_irq(unsigned int irq)
130 132
131#ifdef CONFIG_DMAR 133#ifdef CONFIG_DMAR
132#ifdef CONFIG_SMP 134#ifdef CONFIG_SMP
133static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 135static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
134{ 136{
135 struct irq_cfg *cfg = irq_cfg + irq; 137 struct irq_cfg *cfg = irq_cfg + irq;
136 struct msi_msg msg; 138 struct msi_msg msg;
137 int cpu = cpumask_first(mask); 139 int cpu = cpumask_first(mask);
138 140
139 if (!cpu_online(cpu)) 141 if (!cpu_online(cpu))
140 return; 142 return -1;
141 143
142 if (irq_prepare_move(irq, cpu)) 144 if (irq_prepare_move(irq, cpu))
143 return; 145 return -1;
144 146
145 dmar_msi_read(irq, &msg); 147 dmar_msi_read(irq, &msg);
146 148
@@ -151,6 +153,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
151 153
152 dmar_msi_write(irq, &msg); 154 dmar_msi_write(irq, &msg);
153 cpumask_copy(irq_desc[irq].affinity, mask); 155 cpumask_copy(irq_desc[irq].affinity, mask);
156
157 return 0;
154} 158}
155#endif /* CONFIG_SMP */ 159#endif /* CONFIG_SMP */
156 160
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 66fd705e82c0..764f26abac05 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -227,7 +227,7 @@ finish_up:
227 return new_irq_info; 227 return new_irq_info;
228} 228}
229 229
230static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) 230static int sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
231{ 231{
232 struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; 232 struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
233 nasid_t nasid; 233 nasid_t nasid;
@@ -239,6 +239,8 @@ static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
239 list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, 239 list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
240 sn_irq_lh[irq], list) 240 sn_irq_lh[irq], list)
241 (void)sn_retarget_vector(sn_irq_info, nasid, slice); 241 (void)sn_retarget_vector(sn_irq_info, nasid, slice);
242
243 return 0;
242} 244}
243 245
244#ifdef CONFIG_SMP 246#ifdef CONFIG_SMP
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 81e428943d73..fbbfb9701201 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -151,7 +151,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
151} 151}
152 152
153#ifdef CONFIG_SMP 153#ifdef CONFIG_SMP
154static void sn_set_msi_irq_affinity(unsigned int irq, 154static int sn_set_msi_irq_affinity(unsigned int irq,
155 const struct cpumask *cpu_mask) 155 const struct cpumask *cpu_mask)
156{ 156{
157 struct msi_msg msg; 157 struct msi_msg msg;
@@ -168,7 +168,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
168 cpu = cpumask_first(cpu_mask); 168 cpu = cpumask_first(cpu_mask);
169 sn_irq_info = sn_msi_info[irq].sn_irq_info; 169 sn_irq_info = sn_msi_info[irq].sn_irq_info;
170 if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) 170 if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
171 return; 171 return -1;
172 172
173 /* 173 /*
174 * Release XIO resources for the old MSI PCI address 174 * Release XIO resources for the old MSI PCI address
@@ -189,7 +189,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
189 new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice); 189 new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice);
190 sn_msi_info[irq].sn_irq_info = new_irq_info; 190 sn_msi_info[irq].sn_irq_info = new_irq_info;
191 if (new_irq_info == NULL) 191 if (new_irq_info == NULL)
192 return; 192 return -1;
193 193
194 /* 194 /*
195 * Map the xio address into bus space 195 * Map the xio address into bus space
@@ -206,6 +206,8 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
206 206
207 write_msi_msg(irq, &msg); 207 write_msi_msg(irq, &msg);
208 cpumask_copy(irq_desc[irq].affinity, cpu_mask); 208 cpumask_copy(irq_desc[irq].affinity, cpu_mask);
209
210 return 0;
209} 211}
210#endif /* CONFIG_SMP */ 212#endif /* CONFIG_SMP */
211 213
diff --git a/arch/m32r/include/asm/flat.h b/arch/m32r/include/asm/flat.h
index d851cf0c4aa5..5d711c4688fb 100644
--- a/arch/m32r/include/asm/flat.h
+++ b/arch/m32r/include/asm/flat.h
@@ -12,7 +12,6 @@
12#ifndef __ASM_M32R_FLAT_H 12#ifndef __ASM_M32R_FLAT_H
13#define __ASM_M32R_FLAT_H 13#define __ASM_M32R_FLAT_H
14 14
15#define flat_stack_align(sp) (*sp += (*sp & 3 ? (4 - (*sp & 3)): 0))
16#define flat_argvp_envp_on_stack() 0 15#define flat_argvp_envp_on_stack() 0
17#define flat_old_ram_flag(flags) (flags) 16#define flat_old_ram_flag(flags) (flags)
18#define flat_set_persistent(relval, p) 0 17#define flat_set_persistent(relval, p) 0
diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h
index 814b5174a8e0..a0e290793978 100644
--- a/arch/m68k/include/asm/flat.h
+++ b/arch/m68k/include/asm/flat.h
@@ -5,7 +5,6 @@
5#ifndef __M68KNOMMU_FLAT_H__ 5#ifndef __M68KNOMMU_FLAT_H__
6#define __M68KNOMMU_FLAT_H__ 6#define __M68KNOMMU_FLAT_H__
7 7
8#define flat_stack_align(sp) /* nothing needed */
9#define flat_argvp_envp_on_stack() 1 8#define flat_argvp_envp_on_stack() 1
10#define flat_old_ram_flag(flags) (flags) 9#define flat_old_ram_flag(flags) (flags)
11#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) 10#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 09b1287a92ce..25f3b0a11ca8 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -72,6 +72,7 @@ config MIPS_COBALT
72 select IRQ_CPU 72 select IRQ_CPU
73 select IRQ_GT641XX 73 select IRQ_GT641XX
74 select PCI_GT64XXX_PCI0 74 select PCI_GT64XXX_PCI0
75 select PCI
75 select SYS_HAS_CPU_NEVADA 76 select SYS_HAS_CPU_NEVADA
76 select SYS_HAS_EARLY_PRINTK 77 select SYS_HAS_EARLY_PRINTK
77 select SYS_SUPPORTS_32BIT_KERNEL 78 select SYS_SUPPORTS_32BIT_KERNEL
@@ -593,7 +594,7 @@ config WR_PPMC
593 board, which is based on GT64120 bridge chip. 594 board, which is based on GT64120 bridge chip.
594 595
595config CAVIUM_OCTEON_SIMULATOR 596config CAVIUM_OCTEON_SIMULATOR
596 bool "Support for the Cavium Networks Octeon Simulator" 597 bool "Cavium Networks Octeon Simulator"
597 select CEVT_R4K 598 select CEVT_R4K
598 select 64BIT_PHYS_ADDR 599 select 64BIT_PHYS_ADDR
599 select DMA_COHERENT 600 select DMA_COHERENT
@@ -607,7 +608,7 @@ config CAVIUM_OCTEON_SIMULATOR
607 hardware. 608 hardware.
608 609
609config CAVIUM_OCTEON_REFERENCE_BOARD 610config CAVIUM_OCTEON_REFERENCE_BOARD
610 bool "Support for the Cavium Networks Octeon reference board" 611 bool "Cavium Networks Octeon reference board"
611 select CEVT_R4K 612 select CEVT_R4K
612 select 64BIT_PHYS_ADDR 613 select 64BIT_PHYS_ADDR
613 select DMA_COHERENT 614 select DMA_COHERENT
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 1c19af8daa62..d3a0c8154bec 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -177,7 +177,7 @@ static void octeon_irq_ciu0_disable(unsigned int irq)
177} 177}
178 178
179#ifdef CONFIG_SMP 179#ifdef CONFIG_SMP
180static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest) 180static int octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
181{ 181{
182 int cpu; 182 int cpu;
183 int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */ 183 int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */
@@ -199,6 +199,8 @@ static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask
199 */ 199 */
200 cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2)); 200 cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2));
201 write_unlock(&octeon_irq_ciu0_rwlock); 201 write_unlock(&octeon_irq_ciu0_rwlock);
202
203 return 0;
202} 204}
203#endif 205#endif
204 206
@@ -292,7 +294,7 @@ static void octeon_irq_ciu1_disable(unsigned int irq)
292} 294}
293 295
294#ifdef CONFIG_SMP 296#ifdef CONFIG_SMP
295static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest) 297static int octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
296{ 298{
297 int cpu; 299 int cpu;
298 int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */ 300 int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */
@@ -315,6 +317,8 @@ static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask
315 */ 317 */
316 cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1)); 318 cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1));
317 write_unlock(&octeon_irq_ciu1_rwlock); 319 write_unlock(&octeon_irq_ciu1_rwlock);
320
321 return 0;
318} 322}
319#endif 323#endif
320 324
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index 744cd8fb107f..126044308dec 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -39,8 +39,8 @@ struct cache_desc {
39#define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */ 39#define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */
40 40
41struct cpuinfo_mips { 41struct cpuinfo_mips {
42 unsigned long udelay_val; 42 unsigned int udelay_val;
43 unsigned long asid_cache; 43 unsigned int asid_cache;
44 44
45 /* 45 /*
46 * Capability and feature descriptor structure for MIPS CPU 46 * Capability and feature descriptor structure for MIPS CPU
diff --git a/arch/mips/include/asm/delay.h b/arch/mips/include/asm/delay.h
index b0bccd2c4ed5..a07e51b2be13 100644
--- a/arch/mips/include/asm/delay.h
+++ b/arch/mips/include/asm/delay.h
@@ -11,94 +11,12 @@
11#ifndef _ASM_DELAY_H 11#ifndef _ASM_DELAY_H
12#define _ASM_DELAY_H 12#define _ASM_DELAY_H
13 13
14#include <linux/param.h> 14extern void __delay(unsigned int loops);
15#include <linux/smp.h> 15extern void __ndelay(unsigned int ns);
16extern void __udelay(unsigned int us);
16 17
17#include <asm/compiler.h> 18#define ndelay(ns) __udelay(ns)
18#include <asm/war.h> 19#define udelay(us) __udelay(us)
19
20static inline void __delay(unsigned long loops)
21{
22 if (sizeof(long) == 4)
23 __asm__ __volatile__ (
24 " .set noreorder \n"
25 " .align 3 \n"
26 "1: bnez %0, 1b \n"
27 " subu %0, 1 \n"
28 " .set reorder \n"
29 : "=r" (loops)
30 : "0" (loops));
31 else if (sizeof(long) == 8 && !DADDI_WAR)
32 __asm__ __volatile__ (
33 " .set noreorder \n"
34 " .align 3 \n"
35 "1: bnez %0, 1b \n"
36 " dsubu %0, 1 \n"
37 " .set reorder \n"
38 : "=r" (loops)
39 : "0" (loops));
40 else if (sizeof(long) == 8 && DADDI_WAR)
41 __asm__ __volatile__ (
42 " .set noreorder \n"
43 " .align 3 \n"
44 "1: bnez %0, 1b \n"
45 " dsubu %0, %2 \n"
46 " .set reorder \n"
47 : "=r" (loops)
48 : "0" (loops), "r" (1));
49}
50
51
52/*
53 * Division by multiplication: you don't have to worry about
54 * loss of precision.
55 *
56 * Use only for very small delays ( < 1 msec). Should probably use a
57 * lookup table, really, as the multiplications take much too long with
58 * short delays. This is a "reasonable" implementation, though (and the
59 * first constant multiplications gets optimized away if the delay is
60 * a constant)
61 */
62
63static inline void __udelay(unsigned long usecs, unsigned long lpj)
64{
65 unsigned long hi, lo;
66
67 /*
68 * The rates of 128 is rounded wrongly by the catchall case
69 * for 64-bit. Excessive precission? Probably ...
70 */
71#if defined(CONFIG_64BIT) && (HZ == 128)
72 usecs *= 0x0008637bd05af6c7UL; /* 2**64 / (1000000 / HZ) */
73#elif defined(CONFIG_64BIT)
74 usecs *= (0x8000000000000000UL / (500000 / HZ));
75#else /* 32-bit junk follows here */
76 usecs *= (unsigned long) (((0x8000000000000000ULL / (500000 / HZ)) +
77 0x80000000ULL) >> 32);
78#endif
79
80 if (sizeof(long) == 4)
81 __asm__("multu\t%2, %3"
82 : "=h" (usecs), "=l" (lo)
83 : "r" (usecs), "r" (lpj)
84 : GCC_REG_ACCUM);
85 else if (sizeof(long) == 8 && !R4000_WAR)
86 __asm__("dmultu\t%2, %3"
87 : "=h" (usecs), "=l" (lo)
88 : "r" (usecs), "r" (lpj)
89 : GCC_REG_ACCUM);
90 else if (sizeof(long) == 8 && R4000_WAR)
91 __asm__("dmultu\t%3, %4\n\tmfhi\t%0"
92 : "=r" (usecs), "=h" (hi), "=l" (lo)
93 : "r" (usecs), "r" (lpj)
94 : GCC_REG_ACCUM);
95
96 __delay(usecs);
97}
98
99#define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val
100
101#define udelay(usecs) __udelay((usecs), __udelay_val)
102 20
103/* make sure "usecs *= ..." in udelay do not overflow. */ 21/* make sure "usecs *= ..." in udelay do not overflow. */
104#if HZ >= 1000 22#if HZ >= 1000
diff --git a/arch/mips/include/asm/ioctl.h b/arch/mips/include/asm/ioctl.h
index 85067e248a83..916163401b2c 100644
--- a/arch/mips/include/asm/ioctl.h
+++ b/arch/mips/include/asm/ioctl.h
@@ -60,12 +60,16 @@
60 ((nr) << _IOC_NRSHIFT) | \ 60 ((nr) << _IOC_NRSHIFT) | \
61 ((size) << _IOC_SIZESHIFT)) 61 ((size) << _IOC_SIZESHIFT))
62 62
63#ifdef __KERNEL__
63/* provoke compile error for invalid uses of size argument */ 64/* provoke compile error for invalid uses of size argument */
64extern unsigned int __invalid_size_argument_for_IOC; 65extern unsigned int __invalid_size_argument_for_IOC;
65#define _IOC_TYPECHECK(t) \ 66#define _IOC_TYPECHECK(t) \
66 ((sizeof(t) == sizeof(t[1]) && \ 67 ((sizeof(t) == sizeof(t[1]) && \
67 sizeof(t) < (1 << _IOC_SIZEBITS)) ? \ 68 sizeof(t) < (1 << _IOC_SIZEBITS)) ? \
68 sizeof(t) : __invalid_size_argument_for_IOC) 69 sizeof(t) : __invalid_size_argument_for_IOC)
70#else
71#define _IOC_TYPECHECK(t) (sizeof(t))
72#endif
69 73
70/* used to create numbers */ 74/* used to create numbers */
71#define _IO(type, nr) _IOC(_IOC_NONE, (type), (nr), 0) 75#define _IO(type, nr) _IOC(_IOC_NONE, (type), (nr), 0)
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 3214ade02d10..4f1eed107b08 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -49,7 +49,7 @@ static inline void smtc_im_ack_irq(unsigned int irq)
49#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF 49#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
50#include <linux/cpumask.h> 50#include <linux/cpumask.h>
51 51
52extern void plat_set_irq_affinity(unsigned int irq, 52extern int plat_set_irq_affinity(unsigned int irq,
53 const struct cpumask *affinity); 53 const struct cpumask *affinity);
54extern void smtc_forward_irq(unsigned int irq); 54extern void smtc_forward_irq(unsigned int irq);
55 55
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index 8de858f5449f..c2d53c18fd36 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -956,7 +956,7 @@ __clear_user(void __user *addr, __kernel_size_t size)
956 void __user * __cl_addr = (addr); \ 956 void __user * __cl_addr = (addr); \
957 unsigned long __cl_size = (n); \ 957 unsigned long __cl_size = (n); \
958 if (__cl_size && access_ok(VERIFY_WRITE, \ 958 if (__cl_size && access_ok(VERIFY_WRITE, \
959 ((unsigned long)(__cl_addr)), __cl_size)) \ 959 __cl_addr, __cl_size)) \
960 __cl_size = __clear_user(__cl_addr, __cl_size); \ 960 __cl_size = __clear_user(__cl_addr, __cl_size); \
961 __cl_size; \ 961 __cl_size; \
962}) 962})
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index 87deb8f6c458..3f43c2e3aa5a 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq)
155 155
156static DEFINE_SPINLOCK(gic_lock); 156static DEFINE_SPINLOCK(gic_lock);
157 157
158static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) 158static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
159{ 159{
160 cpumask_t tmp = CPU_MASK_NONE; 160 cpumask_t tmp = CPU_MASK_NONE;
161 unsigned long flags; 161 unsigned long flags;
@@ -166,7 +166,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
166 166
167 cpumask_and(&tmp, cpumask, cpu_online_mask); 167 cpumask_and(&tmp, cpumask, cpu_online_mask);
168 if (cpus_empty(tmp)) 168 if (cpus_empty(tmp))
169 return; 169 return -1;
170 170
171 /* Assumption : cpumask refers to a single CPU */ 171 /* Assumption : cpumask refers to a single CPU */
172 spin_lock_irqsave(&gic_lock, flags); 172 spin_lock_irqsave(&gic_lock, flags);
@@ -190,6 +190,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
190 cpumask_copy(irq_desc[irq].affinity, cpumask); 190 cpumask_copy(irq_desc[irq].affinity, cpumask);
191 spin_unlock_irqrestore(&gic_lock, flags); 191 spin_unlock_irqrestore(&gic_lock, flags);
192 192
193 return 0;
193} 194}
194#endif 195#endif
195 196
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 26760cad8b69..e0a4ac18fa07 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -42,7 +42,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
42 seq_printf(m, fmt, __cpu_name[n], 42 seq_printf(m, fmt, __cpu_name[n],
43 (version >> 4) & 0x0f, version & 0x0f, 43 (version >> 4) & 0x0f, version & 0x0f,
44 (fp_vers >> 4) & 0x0f, fp_vers & 0x0f); 44 (fp_vers >> 4) & 0x0f, fp_vers & 0x0f);
45 seq_printf(m, "BogoMIPS\t\t: %lu.%02lu\n", 45 seq_printf(m, "BogoMIPS\t\t: %u.%02u\n",
46 cpu_data[n].udelay_val / (500000/HZ), 46 cpu_data[n].udelay_val / (500000/HZ),
47 (cpu_data[n].udelay_val / (5000/HZ)) % 100); 47 (cpu_data[n].udelay_val / (5000/HZ)) % 100);
48 seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no"); 48 seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no");
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index c13c7ad2cdae..2adead5a8a37 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -2,8 +2,8 @@
2# Makefile for MIPS-specific library files.. 2# Makefile for MIPS-specific library files..
3# 3#
4 4
5lib-y += csum_partial.o memcpy.o memcpy-inatomic.o memset.o strlen_user.o \ 5lib-y += csum_partial.o delay.o memcpy.o memcpy-inatomic.o memset.o \
6 strncpy_user.o strnlen_user.o uncached.o 6 strlen_user.o strncpy_user.o strnlen_user.o uncached.o
7 7
8obj-y += iomap.o 8obj-y += iomap.o
9obj-$(CONFIG_PCI) += iomap-pci.o 9obj-$(CONFIG_PCI) += iomap-pci.o
diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c
new file mode 100644
index 000000000000..f69c6b569eb3
--- /dev/null
+++ b/arch/mips/lib/delay.c
@@ -0,0 +1,56 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1994 by Waldorf Electronics
7 * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle
8 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
9 * Copyright (C) 2007 Maciej W. Rozycki
10 */
11#include <linux/module.h>
12#include <linux/param.h>
13#include <linux/smp.h>
14
15#include <asm/compiler.h>
16#include <asm/war.h>
17
18inline void __delay(unsigned int loops)
19{
20 __asm__ __volatile__ (
21 " .set noreorder \n"
22 " .align 3 \n"
23 "1: bnez %0, 1b \n"
24 " subu %0, 1 \n"
25 " .set reorder \n"
26 : "=r" (loops)
27 : "0" (loops));
28}
29EXPORT_SYMBOL(__delay);
30
31/*
32 * Division by multiplication: you don't have to worry about
33 * loss of precision.
34 *
35 * Use only for very small delays ( < 1 msec). Should probably use a
36 * lookup table, really, as the multiplications take much too long with
37 * short delays. This is a "reasonable" implementation, though (and the
38 * first constant multiplications gets optimized away if the delay is
39 * a constant)
40 */
41
42void __udelay(unsigned long us)
43{
44 unsigned int lpj = current_cpu_data.udelay_val;
45
46 __delay((us * 0x000010c7 * HZ * lpj) >> 32);
47}
48EXPORT_SYMBOL(__udelay);
49
50void __ndelay(unsigned long ns)
51{
52 unsigned int lpj = current_cpu_data.udelay_val;
53
54 __delay((us * 0x00000005 * HZ * lpj) >> 32);
55}
56EXPORT_SYMBOL(__ndelay);
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 5ba31888fefb..499ffe5475df 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -114,7 +114,7 @@ struct plat_smp_ops msmtc_smp_ops = {
114 */ 114 */
115 115
116 116
117void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) 117int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
118{ 118{
119 cpumask_t tmask; 119 cpumask_t tmask;
120 int cpu = 0; 120 int cpu = 0;
@@ -156,5 +156,7 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
156 156
157 /* Do any generic SMTC IRQ affinity setup */ 157 /* Do any generic SMTC IRQ affinity setup */
158 smtc_set_irq_affinity(irq, tmask); 158 smtc_set_irq_affinity(irq, tmask);
159
160 return 0;
159} 161}
160#endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */ 162#endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 667da932b7b2..9b95d80ebc6e 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -53,7 +53,7 @@ static inline void ip32_machine_halt(void)
53 53
54static void ip32_machine_power_off(void) 54static void ip32_machine_power_off(void)
55{ 55{
56 volatile unsigned char reg_a, xctrl_a, xctrl_b; 56 unsigned char reg_a, xctrl_a, xctrl_b;
57 57
58 disable_irq(MACEISA_RTC_IRQ); 58 disable_irq(MACEISA_RTC_IRQ);
59 reg_a = CMOS_READ(RTC_REG_A); 59 reg_a = CMOS_READ(RTC_REG_A);
@@ -91,9 +91,10 @@ static void blink_timeout(unsigned long data)
91 91
92static void debounce(unsigned long data) 92static void debounce(unsigned long data)
93{ 93{
94 volatile unsigned char reg_a, reg_c, xctrl_a; 94 unsigned char reg_a, reg_c, xctrl_a;
95 95
96 reg_c = CMOS_READ(RTC_INTR_FLAGS); 96 reg_c = CMOS_READ(RTC_INTR_FLAGS);
97 reg_a = CMOS_READ(RTC_REG_A);
97 CMOS_WRITE(reg_a | DS_REGA_DV0, RTC_REG_A); 98 CMOS_WRITE(reg_a | DS_REGA_DV0, RTC_REG_A);
98 wbflush(); 99 wbflush();
99 xctrl_a = CMOS_READ(DS_B1_XCTRL4A); 100 xctrl_a = CMOS_READ(DS_B1_XCTRL4A);
@@ -137,7 +138,7 @@ static inline void ip32_power_button(void)
137 138
138static irqreturn_t ip32_rtc_int(int irq, void *dev_id) 139static irqreturn_t ip32_rtc_int(int irq, void *dev_id)
139{ 140{
140 volatile unsigned char reg_c; 141 unsigned char reg_c;
141 142
142 reg_c = CMOS_READ(RTC_INTR_FLAGS); 143 reg_c = CMOS_READ(RTC_INTR_FLAGS);
143 if (!(reg_c & RTC_IRQF)) { 144 if (!(reg_c & RTC_IRQF)) {
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index c147c4b35d3f..690de06bde90 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq);
50static void disable_bcm1480_irq(unsigned int irq); 50static void disable_bcm1480_irq(unsigned int irq);
51static void ack_bcm1480_irq(unsigned int irq); 51static void ack_bcm1480_irq(unsigned int irq);
52#ifdef CONFIG_SMP 52#ifdef CONFIG_SMP
53static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask); 53static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
54#endif 54#endif
55 55
56#ifdef CONFIG_PCI 56#ifdef CONFIG_PCI
@@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq)
109} 109}
110 110
111#ifdef CONFIG_SMP 111#ifdef CONFIG_SMP
112static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) 112static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
113{ 113{
114 int i = 0, old_cpu, cpu, int_on, k; 114 int i = 0, old_cpu, cpu, int_on, k;
115 u64 cur_ints; 115 u64 cur_ints;
@@ -118,7 +118,7 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
118 118
119 if (cpumask_weight(mask) != 1) { 119 if (cpumask_weight(mask) != 1) {
120 printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); 120 printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
121 return; 121 return -1;
122 } 122 }
123 i = cpumask_first(mask); 123 i = cpumask_first(mask);
124 124
@@ -152,6 +152,8 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
152 } 152 }
153 } 153 }
154 spin_unlock_irqrestore(&bcm1480_imr_lock, flags); 154 spin_unlock_irqrestore(&bcm1480_imr_lock, flags);
155
156 return 0;
155} 157}
156#endif 158#endif
157 159
diff --git a/arch/mips/sibyte/cfe/setup.c b/arch/mips/sibyte/cfe/setup.c
index 3de30f79db3f..eb5396cf81bb 100644
--- a/arch/mips/sibyte/cfe/setup.c
+++ b/arch/mips/sibyte/cfe/setup.c
@@ -288,13 +288,7 @@ void __init prom_init(void)
288 */ 288 */
289 cfe_cons_handle = cfe_getstdhandle(CFE_STDHANDLE_CONSOLE); 289 cfe_cons_handle = cfe_getstdhandle(CFE_STDHANDLE_CONSOLE);
290 if (cfe_getenv("LINUX_CMDLINE", arcs_cmdline, CL_SIZE) < 0) { 290 if (cfe_getenv("LINUX_CMDLINE", arcs_cmdline, CL_SIZE) < 0) {
291 if (argc < 0) { 291 if (argc >= 0) {
292 /*
293 * It's OK for direct boot to not provide a
294 * command line
295 */
296 strcpy(arcs_cmdline, "root=/dev/ram0 ");
297 } else {
298 /* The loader should have set the command line */ 292 /* The loader should have set the command line */
299 /* too early for panic to do any good */ 293 /* too early for panic to do any good */
300 printk("LINUX_CMDLINE not defined in cfe."); 294 printk("LINUX_CMDLINE not defined in cfe.");
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index 38cb998ade22..409dec798863 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq);
50static void disable_sb1250_irq(unsigned int irq); 50static void disable_sb1250_irq(unsigned int irq);
51static void ack_sb1250_irq(unsigned int irq); 51static void ack_sb1250_irq(unsigned int irq);
52#ifdef CONFIG_SMP 52#ifdef CONFIG_SMP
53static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask); 53static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
54#endif 54#endif
55 55
56#ifdef CONFIG_SIBYTE_HAS_LDT 56#ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,7 +103,7 @@ void sb1250_unmask_irq(int cpu, int irq)
103} 103}
104 104
105#ifdef CONFIG_SMP 105#ifdef CONFIG_SMP
106static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) 106static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
107{ 107{
108 int i = 0, old_cpu, cpu, int_on; 108 int i = 0, old_cpu, cpu, int_on;
109 u64 cur_ints; 109 u64 cur_ints;
@@ -113,7 +113,7 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
113 113
114 if (cpumask_weight(mask) > 1) { 114 if (cpumask_weight(mask) > 1) {
115 printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); 115 printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
116 return; 116 return -1;
117 } 117 }
118 118
119 /* Convert logical CPU to physical CPU */ 119 /* Convert logical CPU to physical CPU */
@@ -143,6 +143,8 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
143 R_IMR_INTERRUPT_MASK)); 143 R_IMR_INTERRUPT_MASK));
144 } 144 }
145 spin_unlock_irqrestore(&sb1250_imr_lock, flags); 145 spin_unlock_irqrestore(&sb1250_imr_lock, flags);
146
147 return 0;
146} 148}
147#endif 149#endif
148 150
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 4ea4229d765c..8007f1e65729 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -130,15 +130,17 @@ int cpu_check_affinity(unsigned int irq, const struct cpumask *dest)
130 return cpu_dest; 130 return cpu_dest;
131} 131}
132 132
133static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) 133static int cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
134{ 134{
135 int cpu_dest; 135 int cpu_dest;
136 136
137 cpu_dest = cpu_check_affinity(irq, dest); 137 cpu_dest = cpu_check_affinity(irq, dest);
138 if (cpu_dest < 0) 138 if (cpu_dest < 0)
139 return; 139 return -1;
140 140
141 cpumask_copy(&irq_desc[irq].affinity, dest); 141 cpumask_copy(&irq_desc[irq].affinity, dest);
142
143 return 0;
142} 144}
143#endif 145#endif
144 146
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a0d1146a0578..cdc9a6ff4be8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -868,6 +868,18 @@ config TASK_SIZE
868 default "0x80000000" if PPC_PREP || PPC_8xx 868 default "0x80000000" if PPC_PREP || PPC_8xx
869 default "0xc0000000" 869 default "0xc0000000"
870 870
871config CONSISTENT_SIZE_BOOL
872 bool "Set custom consistent memory pool size"
873 depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
874 help
875 This option allows you to set the size of the
876 consistent memory pool. This pool of virtual memory
877 is used to make consistent memory allocations.
878
879config CONSISTENT_SIZE
880 hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
881 default "0x00200000" if NOT_COHERENT_CACHE
882
871config PIN_TLB 883config PIN_TLB
872 bool "Pinned Kernel TLBs (860 ONLY)" 884 bool "Pinned Kernel TLBs (860 ONLY)"
873 depends on ADVANCED_OPTIONS && 8xx 885 depends on ADVANCED_OPTIONS && 8xx
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 5339bb44cce9..ea8870a34482 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.28-rc3 3# Linux kernel version: 2.6.30-rc7
4# Tue Nov 11 19:36:51 2008 4# Mon May 25 14:53:25 2009
5# 5#
6# CONFIG_PPC64 is not set 6# CONFIG_PPC64 is not set
7 7
@@ -14,6 +14,7 @@ CONFIG_6xx=y
14# CONFIG_40x is not set 14# CONFIG_40x is not set
15# CONFIG_44x is not set 15# CONFIG_44x is not set
16# CONFIG_E200 is not set 16# CONFIG_E200 is not set
17CONFIG_PPC_BOOK3S=y
17CONFIG_PPC_FPU=y 18CONFIG_PPC_FPU=y
18CONFIG_ALTIVEC=y 19CONFIG_ALTIVEC=y
19CONFIG_PPC_STD_MMU=y 20CONFIG_PPC_STD_MMU=y
@@ -43,7 +44,7 @@ CONFIG_GENERIC_FIND_NEXT_BIT=y
43CONFIG_PPC=y 44CONFIG_PPC=y
44CONFIG_EARLY_PRINTK=y 45CONFIG_EARLY_PRINTK=y
45CONFIG_GENERIC_NVRAM=y 46CONFIG_GENERIC_NVRAM=y
46CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y 47CONFIG_SCHED_OMIT_FRAME_POINTER=y
47CONFIG_ARCH_MAY_HAVE_PC_FDC=y 48CONFIG_ARCH_MAY_HAVE_PC_FDC=y
48CONFIG_PPC_OF=y 49CONFIG_PPC_OF=y
49CONFIG_OF=y 50CONFIG_OF=y
@@ -52,12 +53,14 @@ CONFIG_OF=y
52CONFIG_AUDIT_ARCH=y 53CONFIG_AUDIT_ARCH=y
53CONFIG_GENERIC_BUG=y 54CONFIG_GENERIC_BUG=y
54CONFIG_SYS_SUPPORTS_APM_EMULATION=y 55CONFIG_SYS_SUPPORTS_APM_EMULATION=y
56CONFIG_DTC=y
55# CONFIG_DEFAULT_UIMAGE is not set 57# CONFIG_DEFAULT_UIMAGE is not set
56CONFIG_HIBERNATE_32=y 58CONFIG_HIBERNATE_32=y
57CONFIG_ARCH_HIBERNATION_POSSIBLE=y 59CONFIG_ARCH_HIBERNATION_POSSIBLE=y
58CONFIG_ARCH_SUSPEND_POSSIBLE=y 60CONFIG_ARCH_SUSPEND_POSSIBLE=y
59# CONFIG_PPC_DCR_NATIVE is not set 61# CONFIG_PPC_DCR_NATIVE is not set
60# CONFIG_PPC_DCR_MMIO is not set 62# CONFIG_PPC_DCR_MMIO is not set
63CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
61CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" 64CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
62 65
63# 66#
@@ -72,14 +75,24 @@ CONFIG_SWAP=y
72CONFIG_SYSVIPC=y 75CONFIG_SYSVIPC=y
73CONFIG_SYSVIPC_SYSCTL=y 76CONFIG_SYSVIPC_SYSCTL=y
74CONFIG_POSIX_MQUEUE=y 77CONFIG_POSIX_MQUEUE=y
78CONFIG_POSIX_MQUEUE_SYSCTL=y
75# CONFIG_BSD_PROCESS_ACCT is not set 79# CONFIG_BSD_PROCESS_ACCT is not set
76# CONFIG_TASKSTATS is not set 80# CONFIG_TASKSTATS is not set
77# CONFIG_AUDIT is not set 81# CONFIG_AUDIT is not set
82
83#
84# RCU Subsystem
85#
86CONFIG_CLASSIC_RCU=y
87# CONFIG_TREE_RCU is not set
88# CONFIG_PREEMPT_RCU is not set
89# CONFIG_TREE_RCU_TRACE is not set
90# CONFIG_PREEMPT_RCU_TRACE is not set
78CONFIG_IKCONFIG=y 91CONFIG_IKCONFIG=y
79CONFIG_IKCONFIG_PROC=y 92CONFIG_IKCONFIG_PROC=y
80CONFIG_LOG_BUF_SHIFT=14 93CONFIG_LOG_BUF_SHIFT=14
81# CONFIG_CGROUPS is not set
82# CONFIG_GROUP_SCHED is not set 94# CONFIG_GROUP_SCHED is not set
95# CONFIG_CGROUPS is not set
83CONFIG_SYSFS_DEPRECATED=y 96CONFIG_SYSFS_DEPRECATED=y
84CONFIG_SYSFS_DEPRECATED_V2=y 97CONFIG_SYSFS_DEPRECATED_V2=y
85# CONFIG_RELAY is not set 98# CONFIG_RELAY is not set
@@ -88,23 +101,27 @@ CONFIG_NAMESPACES=y
88# CONFIG_IPC_NS is not set 101# CONFIG_IPC_NS is not set
89# CONFIG_USER_NS is not set 102# CONFIG_USER_NS is not set
90# CONFIG_PID_NS is not set 103# CONFIG_PID_NS is not set
104# CONFIG_NET_NS is not set
91CONFIG_BLK_DEV_INITRD=y 105CONFIG_BLK_DEV_INITRD=y
92CONFIG_INITRAMFS_SOURCE="" 106CONFIG_INITRAMFS_SOURCE=""
107CONFIG_RD_GZIP=y
108CONFIG_RD_BZIP2=y
109CONFIG_RD_LZMA=y
93# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set 110# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
94CONFIG_SYSCTL=y 111CONFIG_SYSCTL=y
112CONFIG_ANON_INODES=y
95# CONFIG_EMBEDDED is not set 113# CONFIG_EMBEDDED is not set
96CONFIG_SYSCTL_SYSCALL=y 114CONFIG_SYSCTL_SYSCALL=y
97CONFIG_KALLSYMS=y 115CONFIG_KALLSYMS=y
98CONFIG_KALLSYMS_ALL=y 116CONFIG_KALLSYMS_ALL=y
99# CONFIG_KALLSYMS_EXTRA_PASS is not set 117# CONFIG_KALLSYMS_EXTRA_PASS is not set
118# CONFIG_STRIP_ASM_SYMS is not set
100CONFIG_HOTPLUG=y 119CONFIG_HOTPLUG=y
101CONFIG_PRINTK=y 120CONFIG_PRINTK=y
102CONFIG_BUG=y 121CONFIG_BUG=y
103CONFIG_ELF_CORE=y 122CONFIG_ELF_CORE=y
104# CONFIG_COMPAT_BRK is not set
105CONFIG_BASE_FULL=y 123CONFIG_BASE_FULL=y
106CONFIG_FUTEX=y 124CONFIG_FUTEX=y
107CONFIG_ANON_INODES=y
108CONFIG_EPOLL=y 125CONFIG_EPOLL=y
109CONFIG_SIGNALFD=y 126CONFIG_SIGNALFD=y
110CONFIG_TIMERFD=y 127CONFIG_TIMERFD=y
@@ -114,10 +131,12 @@ CONFIG_AIO=y
114CONFIG_VM_EVENT_COUNTERS=y 131CONFIG_VM_EVENT_COUNTERS=y
115CONFIG_PCI_QUIRKS=y 132CONFIG_PCI_QUIRKS=y
116CONFIG_SLUB_DEBUG=y 133CONFIG_SLUB_DEBUG=y
134# CONFIG_COMPAT_BRK is not set
117# CONFIG_SLAB is not set 135# CONFIG_SLAB is not set
118CONFIG_SLUB=y 136CONFIG_SLUB=y
119# CONFIG_SLOB is not set 137# CONFIG_SLOB is not set
120CONFIG_PROFILING=y 138CONFIG_PROFILING=y
139CONFIG_TRACEPOINTS=y
121# CONFIG_MARKERS is not set 140# CONFIG_MARKERS is not set
122CONFIG_OPROFILE=y 141CONFIG_OPROFILE=y
123CONFIG_HAVE_OPROFILE=y 142CONFIG_HAVE_OPROFILE=y
@@ -127,10 +146,10 @@ CONFIG_HAVE_IOREMAP_PROT=y
127CONFIG_HAVE_KPROBES=y 146CONFIG_HAVE_KPROBES=y
128CONFIG_HAVE_KRETPROBES=y 147CONFIG_HAVE_KRETPROBES=y
129CONFIG_HAVE_ARCH_TRACEHOOK=y 148CONFIG_HAVE_ARCH_TRACEHOOK=y
149# CONFIG_SLOW_WORK is not set
130# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set 150# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
131CONFIG_SLABINFO=y 151CONFIG_SLABINFO=y
132CONFIG_RT_MUTEXES=y 152CONFIG_RT_MUTEXES=y
133# CONFIG_TINY_SHMEM is not set
134CONFIG_BASE_SMALL=0 153CONFIG_BASE_SMALL=0
135CONFIG_MODULES=y 154CONFIG_MODULES=y
136# CONFIG_MODULE_FORCE_LOAD is not set 155# CONFIG_MODULE_FORCE_LOAD is not set
@@ -138,11 +157,8 @@ CONFIG_MODULE_UNLOAD=y
138CONFIG_MODULE_FORCE_UNLOAD=y 157CONFIG_MODULE_FORCE_UNLOAD=y
139# CONFIG_MODVERSIONS is not set 158# CONFIG_MODVERSIONS is not set
140# CONFIG_MODULE_SRCVERSION_ALL is not set 159# CONFIG_MODULE_SRCVERSION_ALL is not set
141CONFIG_KMOD=y
142CONFIG_BLOCK=y 160CONFIG_BLOCK=y
143CONFIG_LBD=y 161CONFIG_LBD=y
144# CONFIG_BLK_DEV_IO_TRACE is not set
145CONFIG_LSF=y
146CONFIG_BLK_DEV_BSG=y 162CONFIG_BLK_DEV_BSG=y
147# CONFIG_BLK_DEV_INTEGRITY is not set 163# CONFIG_BLK_DEV_INTEGRITY is not set
148 164
@@ -158,14 +174,11 @@ CONFIG_DEFAULT_AS=y
158# CONFIG_DEFAULT_CFQ is not set 174# CONFIG_DEFAULT_CFQ is not set
159# CONFIG_DEFAULT_NOOP is not set 175# CONFIG_DEFAULT_NOOP is not set
160CONFIG_DEFAULT_IOSCHED="anticipatory" 176CONFIG_DEFAULT_IOSCHED="anticipatory"
161CONFIG_CLASSIC_RCU=y
162CONFIG_FREEZER=y 177CONFIG_FREEZER=y
163 178
164# 179#
165# Platform support 180# Platform support
166# 181#
167CONFIG_PPC_MULTIPLATFORM=y
168CONFIG_CLASSIC32=y
169# CONFIG_PPC_CHRP is not set 182# CONFIG_PPC_CHRP is not set
170# CONFIG_MPC5121_ADS is not set 183# CONFIG_MPC5121_ADS is not set
171# CONFIG_MPC5121_GENERIC is not set 184# CONFIG_MPC5121_GENERIC is not set
@@ -178,7 +191,9 @@ CONFIG_PPC_PMAC=y
178# CONFIG_PPC_83xx is not set 191# CONFIG_PPC_83xx is not set
179# CONFIG_PPC_86xx is not set 192# CONFIG_PPC_86xx is not set
180# CONFIG_EMBEDDED6xx is not set 193# CONFIG_EMBEDDED6xx is not set
194# CONFIG_AMIGAONE is not set
181CONFIG_PPC_NATIVE=y 195CONFIG_PPC_NATIVE=y
196CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
182# CONFIG_IPIC is not set 197# CONFIG_IPIC is not set
183CONFIG_MPIC=y 198CONFIG_MPIC=y
184# CONFIG_MPIC_WEIRD is not set 199# CONFIG_MPIC_WEIRD is not set
@@ -212,11 +227,12 @@ CONFIG_CPU_FREQ_PMAC=y
212CONFIG_PPC601_SYNC_FIX=y 227CONFIG_PPC601_SYNC_FIX=y
213# CONFIG_TAU is not set 228# CONFIG_TAU is not set
214# CONFIG_FSL_ULI1575 is not set 229# CONFIG_FSL_ULI1575 is not set
230# CONFIG_SIMPLE_GPIO is not set
215 231
216# 232#
217# Kernel options 233# Kernel options
218# 234#
219# CONFIG_HIGHMEM is not set 235CONFIG_HIGHMEM=y
220CONFIG_TICK_ONESHOT=y 236CONFIG_TICK_ONESHOT=y
221CONFIG_NO_HZ=y 237CONFIG_NO_HZ=y
222CONFIG_HIGH_RES_TIMERS=y 238CONFIG_HIGH_RES_TIMERS=y
@@ -239,6 +255,7 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
239CONFIG_ARCH_HAS_WALK_MEMORY=y 255CONFIG_ARCH_HAS_WALK_MEMORY=y
240CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y 256CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
241# CONFIG_KEXEC is not set 257# CONFIG_KEXEC is not set
258# CONFIG_CRASH_DUMP is not set
242CONFIG_ARCH_FLATMEM_ENABLE=y 259CONFIG_ARCH_FLATMEM_ENABLE=y
243CONFIG_ARCH_POPULATES_NODE_MAP=y 260CONFIG_ARCH_POPULATES_NODE_MAP=y
244CONFIG_SELECT_MEMORY_MODEL=y 261CONFIG_SELECT_MEMORY_MODEL=y
@@ -250,12 +267,17 @@ CONFIG_FLAT_NODE_MEM_MAP=y
250CONFIG_PAGEFLAGS_EXTENDED=y 267CONFIG_PAGEFLAGS_EXTENDED=y
251CONFIG_SPLIT_PTLOCK_CPUS=4 268CONFIG_SPLIT_PTLOCK_CPUS=4
252# CONFIG_MIGRATION is not set 269# CONFIG_MIGRATION is not set
253# CONFIG_RESOURCES_64BIT is not set
254# CONFIG_PHYS_ADDR_T_64BIT is not set 270# CONFIG_PHYS_ADDR_T_64BIT is not set
255CONFIG_ZONE_DMA_FLAG=1 271CONFIG_ZONE_DMA_FLAG=1
256CONFIG_BOUNCE=y 272CONFIG_BOUNCE=y
257CONFIG_VIRT_TO_BUS=y 273CONFIG_VIRT_TO_BUS=y
258CONFIG_UNEVICTABLE_LRU=y 274CONFIG_UNEVICTABLE_LRU=y
275CONFIG_HAVE_MLOCK=y
276CONFIG_HAVE_MLOCKED_PAGE_BIT=y
277CONFIG_PPC_4K_PAGES=y
278# CONFIG_PPC_16K_PAGES is not set
279# CONFIG_PPC_64K_PAGES is not set
280# CONFIG_PPC_256K_PAGES is not set
259CONFIG_FORCE_MAX_ZONEORDER=11 281CONFIG_FORCE_MAX_ZONEORDER=11
260CONFIG_PROC_DEVICETREE=y 282CONFIG_PROC_DEVICETREE=y
261# CONFIG_CMDLINE_BOOL is not set 283# CONFIG_CMDLINE_BOOL is not set
@@ -288,6 +310,8 @@ CONFIG_ARCH_SUPPORTS_MSI=y
288# CONFIG_PCI_MSI is not set 310# CONFIG_PCI_MSI is not set
289# CONFIG_PCI_LEGACY is not set 311# CONFIG_PCI_LEGACY is not set
290# CONFIG_PCI_DEBUG is not set 312# CONFIG_PCI_DEBUG is not set
313# CONFIG_PCI_STUB is not set
314# CONFIG_PCI_IOV is not set
291CONFIG_PCCARD=m 315CONFIG_PCCARD=m
292# CONFIG_PCMCIA_DEBUG is not set 316# CONFIG_PCMCIA_DEBUG is not set
293CONFIG_PCMCIA=m 317CONFIG_PCMCIA=m
@@ -397,6 +421,8 @@ CONFIG_NETFILTER_XTABLES=m
397CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m 421CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
398# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set 422# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set
399# CONFIG_NETFILTER_XT_TARGET_DSCP is not set 423# CONFIG_NETFILTER_XT_TARGET_DSCP is not set
424CONFIG_NETFILTER_XT_TARGET_HL=m
425# CONFIG_NETFILTER_XT_TARGET_LED is not set
400CONFIG_NETFILTER_XT_TARGET_MARK=m 426CONFIG_NETFILTER_XT_TARGET_MARK=m
401CONFIG_NETFILTER_XT_TARGET_NFLOG=m 427CONFIG_NETFILTER_XT_TARGET_NFLOG=m
402CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m 428CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
@@ -405,6 +431,7 @@ CONFIG_NETFILTER_XT_TARGET_RATEEST=m
405CONFIG_NETFILTER_XT_TARGET_TRACE=m 431CONFIG_NETFILTER_XT_TARGET_TRACE=m
406CONFIG_NETFILTER_XT_TARGET_TCPMSS=m 432CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
407CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m 433CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
434# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set
408CONFIG_NETFILTER_XT_MATCH_COMMENT=m 435CONFIG_NETFILTER_XT_MATCH_COMMENT=m
409# CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set 436# CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set
410CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m 437CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
@@ -415,6 +442,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m
415CONFIG_NETFILTER_XT_MATCH_ESP=m 442CONFIG_NETFILTER_XT_MATCH_ESP=m
416# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set 443# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set
417CONFIG_NETFILTER_XT_MATCH_HELPER=m 444CONFIG_NETFILTER_XT_MATCH_HELPER=m
445CONFIG_NETFILTER_XT_MATCH_HL=m
418CONFIG_NETFILTER_XT_MATCH_IPRANGE=m 446CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
419CONFIG_NETFILTER_XT_MATCH_LENGTH=m 447CONFIG_NETFILTER_XT_MATCH_LENGTH=m
420CONFIG_NETFILTER_XT_MATCH_LIMIT=m 448CONFIG_NETFILTER_XT_MATCH_LIMIT=m
@@ -478,17 +506,15 @@ CONFIG_IP_NF_ARPFILTER=m
478CONFIG_IP_NF_ARP_MANGLE=m 506CONFIG_IP_NF_ARP_MANGLE=m
479CONFIG_IP_DCCP=m 507CONFIG_IP_DCCP=m
480CONFIG_INET_DCCP_DIAG=m 508CONFIG_INET_DCCP_DIAG=m
481CONFIG_IP_DCCP_ACKVEC=y
482 509
483# 510#
484# DCCP CCIDs Configuration (EXPERIMENTAL) 511# DCCP CCIDs Configuration (EXPERIMENTAL)
485# 512#
486CONFIG_IP_DCCP_CCID2=m
487# CONFIG_IP_DCCP_CCID2_DEBUG is not set 513# CONFIG_IP_DCCP_CCID2_DEBUG is not set
488CONFIG_IP_DCCP_CCID3=m 514CONFIG_IP_DCCP_CCID3=y
489# CONFIG_IP_DCCP_CCID3_DEBUG is not set 515# CONFIG_IP_DCCP_CCID3_DEBUG is not set
490CONFIG_IP_DCCP_CCID3_RTO=100 516CONFIG_IP_DCCP_CCID3_RTO=100
491CONFIG_IP_DCCP_TFRC_LIB=m 517CONFIG_IP_DCCP_TFRC_LIB=y
492 518
493# 519#
494# DCCP Kernel Hacking 520# DCCP Kernel Hacking
@@ -508,13 +534,16 @@ CONFIG_IP_DCCP_TFRC_LIB=m
508# CONFIG_LAPB is not set 534# CONFIG_LAPB is not set
509# CONFIG_ECONET is not set 535# CONFIG_ECONET is not set
510# CONFIG_WAN_ROUTER is not set 536# CONFIG_WAN_ROUTER is not set
537# CONFIG_PHONET is not set
511# CONFIG_NET_SCHED is not set 538# CONFIG_NET_SCHED is not set
512CONFIG_NET_CLS_ROUTE=y 539CONFIG_NET_CLS_ROUTE=y
540# CONFIG_DCB is not set
513 541
514# 542#
515# Network testing 543# Network testing
516# 544#
517# CONFIG_NET_PKTGEN is not set 545# CONFIG_NET_PKTGEN is not set
546# CONFIG_NET_DROP_MONITOR is not set
518# CONFIG_HAMRADIO is not set 547# CONFIG_HAMRADIO is not set
519# CONFIG_CAN is not set 548# CONFIG_CAN is not set
520CONFIG_IRDA=m 549CONFIG_IRDA=m
@@ -577,8 +606,6 @@ CONFIG_BT_HIDP=m
577# 606#
578# Bluetooth device drivers 607# Bluetooth device drivers
579# 608#
580CONFIG_BT_HCIUSB=m
581# CONFIG_BT_HCIUSB_SCO is not set
582# CONFIG_BT_HCIBTUSB is not set 609# CONFIG_BT_HCIBTUSB is not set
583# CONFIG_BT_HCIUART is not set 610# CONFIG_BT_HCIUART is not set
584CONFIG_BT_HCIBCM203X=m 611CONFIG_BT_HCIBCM203X=m
@@ -590,31 +617,27 @@ CONFIG_BT_HCIBFUSB=m
590# CONFIG_BT_HCIBTUART is not set 617# CONFIG_BT_HCIBTUART is not set
591# CONFIG_BT_HCIVHCI is not set 618# CONFIG_BT_HCIVHCI is not set
592# CONFIG_AF_RXRPC is not set 619# CONFIG_AF_RXRPC is not set
593# CONFIG_PHONET is not set
594CONFIG_WIRELESS=y 620CONFIG_WIRELESS=y
595CONFIG_CFG80211=m 621CONFIG_CFG80211=m
596CONFIG_NL80211=y 622# CONFIG_CFG80211_REG_DEBUG is not set
597CONFIG_WIRELESS_OLD_REGULATORY=y 623CONFIG_WIRELESS_OLD_REGULATORY=y
598CONFIG_WIRELESS_EXT=y 624CONFIG_WIRELESS_EXT=y
599CONFIG_WIRELESS_EXT_SYSFS=y 625CONFIG_WIRELESS_EXT_SYSFS=y
626# CONFIG_LIB80211 is not set
600CONFIG_MAC80211=m 627CONFIG_MAC80211=m
601 628
602# 629#
603# Rate control algorithm selection 630# Rate control algorithm selection
604# 631#
605CONFIG_MAC80211_RC_PID=y 632CONFIG_MAC80211_RC_MINSTREL=y
606# CONFIG_MAC80211_RC_MINSTREL is not set 633# CONFIG_MAC80211_RC_DEFAULT_PID is not set
607CONFIG_MAC80211_RC_DEFAULT_PID=y 634CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
608# CONFIG_MAC80211_RC_DEFAULT_MINSTREL is not set 635CONFIG_MAC80211_RC_DEFAULT="minstrel"
609CONFIG_MAC80211_RC_DEFAULT="pid"
610# CONFIG_MAC80211_MESH is not set 636# CONFIG_MAC80211_MESH is not set
611CONFIG_MAC80211_LEDS=y 637CONFIG_MAC80211_LEDS=y
638# CONFIG_MAC80211_DEBUGFS is not set
612# CONFIG_MAC80211_DEBUG_MENU is not set 639# CONFIG_MAC80211_DEBUG_MENU is not set
613CONFIG_IEEE80211=m 640# CONFIG_WIMAX is not set
614# CONFIG_IEEE80211_DEBUG is not set
615CONFIG_IEEE80211_CRYPT_WEP=m
616CONFIG_IEEE80211_CRYPT_CCMP=m
617CONFIG_IEEE80211_CRYPT_TKIP=m
618# CONFIG_RFKILL is not set 641# CONFIG_RFKILL is not set
619# CONFIG_NET_9P is not set 642# CONFIG_NET_9P is not set
620 643
@@ -662,17 +685,27 @@ CONFIG_BLK_DEV_RAM_SIZE=4096
662# CONFIG_BLK_DEV_HD is not set 685# CONFIG_BLK_DEV_HD is not set
663CONFIG_MISC_DEVICES=y 686CONFIG_MISC_DEVICES=y
664# CONFIG_PHANTOM is not set 687# CONFIG_PHANTOM is not set
665# CONFIG_EEPROM_93CX6 is not set
666# CONFIG_SGI_IOC4 is not set 688# CONFIG_SGI_IOC4 is not set
667# CONFIG_TIFM_CORE is not set 689# CONFIG_TIFM_CORE is not set
690# CONFIG_ICS932S401 is not set
668# CONFIG_ENCLOSURE_SERVICES is not set 691# CONFIG_ENCLOSURE_SERVICES is not set
669# CONFIG_HP_ILO is not set 692# CONFIG_HP_ILO is not set
693# CONFIG_ISL29003 is not set
694# CONFIG_C2PORT is not set
695
696#
697# EEPROM support
698#
699# CONFIG_EEPROM_AT24 is not set
700# CONFIG_EEPROM_LEGACY is not set
701# CONFIG_EEPROM_93CX6 is not set
670CONFIG_HAVE_IDE=y 702CONFIG_HAVE_IDE=y
671CONFIG_IDE=y 703CONFIG_IDE=y
672 704
673# 705#
674# Please see Documentation/ide/ide.txt for help/info on IDE drives 706# Please see Documentation/ide/ide.txt for help/info on IDE drives
675# 707#
708CONFIG_IDE_XFER_MODE=y
676CONFIG_IDE_TIMINGS=y 709CONFIG_IDE_TIMINGS=y
677CONFIG_IDE_ATAPI=y 710CONFIG_IDE_ATAPI=y
678# CONFIG_BLK_DEV_IDE_SATA is not set 711# CONFIG_BLK_DEV_IDE_SATA is not set
@@ -684,7 +717,6 @@ CONFIG_BLK_DEV_IDECS=m
684CONFIG_BLK_DEV_IDECD=y 717CONFIG_BLK_DEV_IDECD=y
685CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y 718CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y
686# CONFIG_BLK_DEV_IDETAPE is not set 719# CONFIG_BLK_DEV_IDETAPE is not set
687CONFIG_BLK_DEV_IDESCSI=y
688# CONFIG_IDE_TASK_IOCTL is not set 720# CONFIG_IDE_TASK_IOCTL is not set
689CONFIG_IDE_PROC_FS=y 721CONFIG_IDE_PROC_FS=y
690 722
@@ -714,6 +746,7 @@ CONFIG_BLK_DEV_IDEDMA_PCI=y
714# CONFIG_BLK_DEV_JMICRON is not set 746# CONFIG_BLK_DEV_JMICRON is not set
715# CONFIG_BLK_DEV_SC1200 is not set 747# CONFIG_BLK_DEV_SC1200 is not set
716# CONFIG_BLK_DEV_PIIX is not set 748# CONFIG_BLK_DEV_PIIX is not set
749# CONFIG_BLK_DEV_IT8172 is not set
717# CONFIG_BLK_DEV_IT8213 is not set 750# CONFIG_BLK_DEV_IT8213 is not set
718# CONFIG_BLK_DEV_IT821X is not set 751# CONFIG_BLK_DEV_IT821X is not set
719# CONFIG_BLK_DEV_NS87415 is not set 752# CONFIG_BLK_DEV_NS87415 is not set
@@ -728,7 +761,6 @@ CONFIG_BLK_DEV_SL82C105=y
728# CONFIG_BLK_DEV_TC86C001 is not set 761# CONFIG_BLK_DEV_TC86C001 is not set
729CONFIG_BLK_DEV_IDE_PMAC=y 762CONFIG_BLK_DEV_IDE_PMAC=y
730CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y 763CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y
731CONFIG_BLK_DEV_IDEDMA_PMAC=y
732CONFIG_BLK_DEV_IDEDMA=y 764CONFIG_BLK_DEV_IDEDMA=y
733 765
734# 766#
@@ -772,6 +804,7 @@ CONFIG_SCSI_FC_ATTRS=y
772# CONFIG_SCSI_SRP_ATTRS is not set 804# CONFIG_SCSI_SRP_ATTRS is not set
773CONFIG_SCSI_LOWLEVEL=y 805CONFIG_SCSI_LOWLEVEL=y
774# CONFIG_ISCSI_TCP is not set 806# CONFIG_ISCSI_TCP is not set
807# CONFIG_SCSI_CXGB3_ISCSI is not set
775# CONFIG_BLK_DEV_3W_XXXX_RAID is not set 808# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
776# CONFIG_SCSI_3W_9XXX is not set 809# CONFIG_SCSI_3W_9XXX is not set
777# CONFIG_SCSI_ACARD is not set 810# CONFIG_SCSI_ACARD is not set
@@ -791,8 +824,12 @@ CONFIG_SCSI_AIC7XXX_OLD=m
791# CONFIG_MEGARAID_NEWGEN is not set 824# CONFIG_MEGARAID_NEWGEN is not set
792# CONFIG_MEGARAID_LEGACY is not set 825# CONFIG_MEGARAID_LEGACY is not set
793# CONFIG_MEGARAID_SAS is not set 826# CONFIG_MEGARAID_SAS is not set
827# CONFIG_SCSI_MPT2SAS is not set
794# CONFIG_SCSI_HPTIOP is not set 828# CONFIG_SCSI_HPTIOP is not set
795# CONFIG_SCSI_BUSLOGIC is not set 829# CONFIG_SCSI_BUSLOGIC is not set
830# CONFIG_LIBFC is not set
831# CONFIG_LIBFCOE is not set
832# CONFIG_FCOE is not set
796# CONFIG_SCSI_DMX3191D is not set 833# CONFIG_SCSI_DMX3191D is not set
797# CONFIG_SCSI_EATA is not set 834# CONFIG_SCSI_EATA is not set
798# CONFIG_SCSI_FUTURE_DOMAIN is not set 835# CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -822,6 +859,7 @@ CONFIG_SCSI_MAC53C94=y
822# CONFIG_SCSI_SRP is not set 859# CONFIG_SCSI_SRP is not set
823# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set 860# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
824# CONFIG_SCSI_DH is not set 861# CONFIG_SCSI_DH is not set
862# CONFIG_SCSI_OSD_INITIATOR is not set
825# CONFIG_ATA is not set 863# CONFIG_ATA is not set
826CONFIG_MD=y 864CONFIG_MD=y
827CONFIG_BLK_DEV_MD=m 865CONFIG_BLK_DEV_MD=m
@@ -881,6 +919,7 @@ CONFIG_THERM_ADT746X=m
881# CONFIG_ANSLCD is not set 919# CONFIG_ANSLCD is not set
882CONFIG_PMAC_RACKMETER=m 920CONFIG_PMAC_RACKMETER=m
883CONFIG_NETDEVICES=y 921CONFIG_NETDEVICES=y
922CONFIG_COMPAT_NET_DEV_OPS=y
884CONFIG_DUMMY=m 923CONFIG_DUMMY=m
885# CONFIG_BONDING is not set 924# CONFIG_BONDING is not set
886# CONFIG_MACVLAN is not set 925# CONFIG_MACVLAN is not set
@@ -898,6 +937,8 @@ CONFIG_BMAC=y
898CONFIG_SUNGEM=y 937CONFIG_SUNGEM=y
899# CONFIG_CASSINI is not set 938# CONFIG_CASSINI is not set
900# CONFIG_NET_VENDOR_3COM is not set 939# CONFIG_NET_VENDOR_3COM is not set
940# CONFIG_ETHOC is not set
941# CONFIG_DNET is not set
901# CONFIG_NET_TULIP is not set 942# CONFIG_NET_TULIP is not set
902# CONFIG_HP100 is not set 943# CONFIG_HP100 is not set
903# CONFIG_IBM_NEW_EMAC_ZMII is not set 944# CONFIG_IBM_NEW_EMAC_ZMII is not set
@@ -913,7 +954,6 @@ CONFIG_PCNET32=y
913# CONFIG_ADAPTEC_STARFIRE is not set 954# CONFIG_ADAPTEC_STARFIRE is not set
914# CONFIG_B44 is not set 955# CONFIG_B44 is not set
915# CONFIG_FORCEDETH is not set 956# CONFIG_FORCEDETH is not set
916# CONFIG_EEPRO100 is not set
917# CONFIG_E100 is not set 957# CONFIG_E100 is not set
918# CONFIG_FEALNX is not set 958# CONFIG_FEALNX is not set
919# CONFIG_NATSEMI is not set 959# CONFIG_NATSEMI is not set
@@ -923,6 +963,7 @@ CONFIG_PCNET32=y
923# CONFIG_R6040 is not set 963# CONFIG_R6040 is not set
924# CONFIG_SIS900 is not set 964# CONFIG_SIS900 is not set
925# CONFIG_EPIC100 is not set 965# CONFIG_EPIC100 is not set
966# CONFIG_SMSC9420 is not set
926# CONFIG_SUNDANCE is not set 967# CONFIG_SUNDANCE is not set
927# CONFIG_TLAN is not set 968# CONFIG_TLAN is not set
928# CONFIG_VIA_RHINE is not set 969# CONFIG_VIA_RHINE is not set
@@ -935,6 +976,7 @@ CONFIG_NETDEV_1000=y
935# CONFIG_E1000E is not set 976# CONFIG_E1000E is not set
936# CONFIG_IP1000 is not set 977# CONFIG_IP1000 is not set
937# CONFIG_IGB is not set 978# CONFIG_IGB is not set
979# CONFIG_IGBVF is not set
938# CONFIG_NS83820 is not set 980# CONFIG_NS83820 is not set
939# CONFIG_HAMACHI is not set 981# CONFIG_HAMACHI is not set
940# CONFIG_YELLOWFIN is not set 982# CONFIG_YELLOWFIN is not set
@@ -945,18 +987,20 @@ CONFIG_NETDEV_1000=y
945# CONFIG_VIA_VELOCITY is not set 987# CONFIG_VIA_VELOCITY is not set
946# CONFIG_TIGON3 is not set 988# CONFIG_TIGON3 is not set
947# CONFIG_BNX2 is not set 989# CONFIG_BNX2 is not set
948# CONFIG_MV643XX_ETH is not set
949# CONFIG_QLA3XXX is not set 990# CONFIG_QLA3XXX is not set
950# CONFIG_ATL1 is not set 991# CONFIG_ATL1 is not set
951# CONFIG_ATL1E is not set 992# CONFIG_ATL1E is not set
993# CONFIG_ATL1C is not set
952# CONFIG_JME is not set 994# CONFIG_JME is not set
953CONFIG_NETDEV_10000=y 995CONFIG_NETDEV_10000=y
954# CONFIG_CHELSIO_T1 is not set 996# CONFIG_CHELSIO_T1 is not set
997CONFIG_CHELSIO_T3_DEPENDS=y
955# CONFIG_CHELSIO_T3 is not set 998# CONFIG_CHELSIO_T3 is not set
956# CONFIG_ENIC is not set 999# CONFIG_ENIC is not set
957# CONFIG_IXGBE is not set 1000# CONFIG_IXGBE is not set
958# CONFIG_IXGB is not set 1001# CONFIG_IXGB is not set
959# CONFIG_S2IO is not set 1002# CONFIG_S2IO is not set
1003# CONFIG_VXGE is not set
960# CONFIG_MYRI10GE is not set 1004# CONFIG_MYRI10GE is not set
961# CONFIG_NETXEN_NIC is not set 1005# CONFIG_NETXEN_NIC is not set
962# CONFIG_NIU is not set 1006# CONFIG_NIU is not set
@@ -966,6 +1010,7 @@ CONFIG_NETDEV_10000=y
966# CONFIG_BNX2X is not set 1010# CONFIG_BNX2X is not set
967# CONFIG_QLGE is not set 1011# CONFIG_QLGE is not set
968# CONFIG_SFC is not set 1012# CONFIG_SFC is not set
1013# CONFIG_BE2NET is not set
969# CONFIG_TR is not set 1014# CONFIG_TR is not set
970 1015
971# 1016#
@@ -974,20 +1019,11 @@ CONFIG_NETDEV_10000=y
974# CONFIG_WLAN_PRE80211 is not set 1019# CONFIG_WLAN_PRE80211 is not set
975CONFIG_WLAN_80211=y 1020CONFIG_WLAN_80211=y
976# CONFIG_PCMCIA_RAYCS is not set 1021# CONFIG_PCMCIA_RAYCS is not set
977# CONFIG_IPW2100 is not set
978# CONFIG_IPW2200 is not set
979# CONFIG_LIBERTAS is not set 1022# CONFIG_LIBERTAS is not set
980# CONFIG_LIBERTAS_THINFIRM is not set 1023# CONFIG_LIBERTAS_THINFIRM is not set
981# CONFIG_AIRO is not set 1024# CONFIG_AIRO is not set
982CONFIG_HERMES=m
983CONFIG_APPLE_AIRPORT=m
984# CONFIG_PLX_HERMES is not set
985# CONFIG_TMD_HERMES is not set
986# CONFIG_NORTEL_HERMES is not set
987CONFIG_PCI_HERMES=m
988CONFIG_PCMCIA_HERMES=m
989# CONFIG_PCMCIA_SPECTRUM is not set
990# CONFIG_ATMEL is not set 1025# CONFIG_ATMEL is not set
1026# CONFIG_AT76C50X_USB is not set
991# CONFIG_AIRO_CS is not set 1027# CONFIG_AIRO_CS is not set
992# CONFIG_PCMCIA_WL3501 is not set 1028# CONFIG_PCMCIA_WL3501 is not set
993CONFIG_PRISM54=m 1029CONFIG_PRISM54=m
@@ -997,15 +1033,17 @@ CONFIG_PRISM54=m
997# CONFIG_RTL8187 is not set 1033# CONFIG_RTL8187 is not set
998# CONFIG_ADM8211 is not set 1034# CONFIG_ADM8211 is not set
999# CONFIG_MAC80211_HWSIM is not set 1035# CONFIG_MAC80211_HWSIM is not set
1036# CONFIG_MWL8K is not set
1000CONFIG_P54_COMMON=m 1037CONFIG_P54_COMMON=m
1001# CONFIG_P54_USB is not set 1038# CONFIG_P54_USB is not set
1002# CONFIG_P54_PCI is not set 1039# CONFIG_P54_PCI is not set
1040CONFIG_P54_LEDS=y
1003# CONFIG_ATH5K is not set 1041# CONFIG_ATH5K is not set
1004# CONFIG_ATH9K is not set 1042# CONFIG_ATH9K is not set
1005# CONFIG_IWLCORE is not set 1043# CONFIG_AR9170_USB is not set
1006# CONFIG_IWLWIFI_LEDS is not set 1044# CONFIG_IPW2100 is not set
1007# CONFIG_IWLAGN is not set 1045# CONFIG_IPW2200 is not set
1008# CONFIG_IWL3945 is not set 1046# CONFIG_IWLWIFI is not set
1009# CONFIG_HOSTAP is not set 1047# CONFIG_HOSTAP is not set
1010CONFIG_B43=m 1048CONFIG_B43=m
1011CONFIG_B43_PCI_AUTOSELECT=y 1049CONFIG_B43_PCI_AUTOSELECT=y
@@ -1025,6 +1063,19 @@ CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
1025# CONFIG_B43LEGACY_PIO_MODE is not set 1063# CONFIG_B43LEGACY_PIO_MODE is not set
1026# CONFIG_ZD1211RW is not set 1064# CONFIG_ZD1211RW is not set
1027# CONFIG_RT2X00 is not set 1065# CONFIG_RT2X00 is not set
1066CONFIG_HERMES=m
1067CONFIG_HERMES_CACHE_FW_ON_INIT=y
1068CONFIG_APPLE_AIRPORT=m
1069# CONFIG_PLX_HERMES is not set
1070# CONFIG_TMD_HERMES is not set
1071# CONFIG_NORTEL_HERMES is not set
1072CONFIG_PCI_HERMES=m
1073CONFIG_PCMCIA_HERMES=m
1074# CONFIG_PCMCIA_SPECTRUM is not set
1075
1076#
1077# Enable WiMAX (Networking options) to see the WiMAX drivers
1078#
1028 1079
1029# 1080#
1030# USB Network Adapters 1081# USB Network Adapters
@@ -1036,6 +1087,7 @@ CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
1036CONFIG_USB_USBNET=m 1087CONFIG_USB_USBNET=m
1037CONFIG_USB_NET_AX8817X=m 1088CONFIG_USB_NET_AX8817X=m
1038CONFIG_USB_NET_CDCETHER=m 1089CONFIG_USB_NET_CDCETHER=m
1090# CONFIG_USB_NET_CDC_EEM is not set
1039# CONFIG_USB_NET_DM9601 is not set 1091# CONFIG_USB_NET_DM9601 is not set
1040# CONFIG_USB_NET_SMSC95XX is not set 1092# CONFIG_USB_NET_SMSC95XX is not set
1041# CONFIG_USB_NET_GL620A is not set 1093# CONFIG_USB_NET_GL620A is not set
@@ -1099,7 +1151,7 @@ CONFIG_INPUT_KEYBOARD=y
1099CONFIG_INPUT_MOUSE=y 1151CONFIG_INPUT_MOUSE=y
1100# CONFIG_MOUSE_PS2 is not set 1152# CONFIG_MOUSE_PS2 is not set
1101# CONFIG_MOUSE_SERIAL is not set 1153# CONFIG_MOUSE_SERIAL is not set
1102# CONFIG_MOUSE_APPLETOUCH is not set 1154CONFIG_MOUSE_APPLETOUCH=y
1103# CONFIG_MOUSE_BCM5974 is not set 1155# CONFIG_MOUSE_BCM5974 is not set
1104# CONFIG_MOUSE_VSXXXAA is not set 1156# CONFIG_MOUSE_VSXXXAA is not set
1105# CONFIG_INPUT_JOYSTICK is not set 1157# CONFIG_INPUT_JOYSTICK is not set
@@ -1150,10 +1202,13 @@ CONFIG_SERIAL_PMACZILOG_TTYS=y
1150# CONFIG_SERIAL_JSM is not set 1202# CONFIG_SERIAL_JSM is not set
1151# CONFIG_SERIAL_OF_PLATFORM is not set 1203# CONFIG_SERIAL_OF_PLATFORM is not set
1152CONFIG_UNIX98_PTYS=y 1204CONFIG_UNIX98_PTYS=y
1205# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
1153CONFIG_LEGACY_PTYS=y 1206CONFIG_LEGACY_PTYS=y
1154CONFIG_LEGACY_PTY_COUNT=256 1207CONFIG_LEGACY_PTY_COUNT=256
1208# CONFIG_HVC_UDBG is not set
1155# CONFIG_IPMI_HANDLER is not set 1209# CONFIG_IPMI_HANDLER is not set
1156CONFIG_HW_RANDOM=m 1210CONFIG_HW_RANDOM=m
1211# CONFIG_HW_RANDOM_TIMERIOMEM is not set
1157CONFIG_NVRAM=y 1212CONFIG_NVRAM=y
1158CONFIG_GEN_RTC=y 1213CONFIG_GEN_RTC=y
1159# CONFIG_GEN_RTC_X is not set 1214# CONFIG_GEN_RTC_X is not set
@@ -1232,12 +1287,9 @@ CONFIG_I2C_POWERMAC=y
1232# Miscellaneous I2C Chip support 1287# Miscellaneous I2C Chip support
1233# 1288#
1234# CONFIG_DS1682 is not set 1289# CONFIG_DS1682 is not set
1235# CONFIG_EEPROM_AT24 is not set
1236# CONFIG_EEPROM_LEGACY is not set
1237# CONFIG_SENSORS_PCF8574 is not set 1290# CONFIG_SENSORS_PCF8574 is not set
1238# CONFIG_PCF8575 is not set 1291# CONFIG_PCF8575 is not set
1239# CONFIG_SENSORS_PCA9539 is not set 1292# CONFIG_SENSORS_PCA9539 is not set
1240# CONFIG_SENSORS_PCF8591 is not set
1241# CONFIG_SENSORS_MAX6875 is not set 1293# CONFIG_SENSORS_MAX6875 is not set
1242# CONFIG_SENSORS_TSL2550 is not set 1294# CONFIG_SENSORS_TSL2550 is not set
1243# CONFIG_I2C_DEBUG_CORE is not set 1295# CONFIG_I2C_DEBUG_CORE is not set
@@ -1259,11 +1311,11 @@ CONFIG_BATTERY_PMU=y
1259# CONFIG_THERMAL is not set 1311# CONFIG_THERMAL is not set
1260# CONFIG_THERMAL_HWMON is not set 1312# CONFIG_THERMAL_HWMON is not set
1261# CONFIG_WATCHDOG is not set 1313# CONFIG_WATCHDOG is not set
1314CONFIG_SSB_POSSIBLE=y
1262 1315
1263# 1316#
1264# Sonics Silicon Backplane 1317# Sonics Silicon Backplane
1265# 1318#
1266CONFIG_SSB_POSSIBLE=y
1267CONFIG_SSB=m 1319CONFIG_SSB=m
1268CONFIG_SSB_SPROM=y 1320CONFIG_SSB_SPROM=y
1269CONFIG_SSB_PCIHOST_POSSIBLE=y 1321CONFIG_SSB_PCIHOST_POSSIBLE=y
@@ -1281,18 +1333,13 @@ CONFIG_SSB_DRIVER_PCICORE=y
1281# CONFIG_MFD_CORE is not set 1333# CONFIG_MFD_CORE is not set
1282# CONFIG_MFD_SM501 is not set 1334# CONFIG_MFD_SM501 is not set
1283# CONFIG_HTC_PASIC3 is not set 1335# CONFIG_HTC_PASIC3 is not set
1336# CONFIG_TWL4030_CORE is not set
1284# CONFIG_MFD_TMIO is not set 1337# CONFIG_MFD_TMIO is not set
1285# CONFIG_PMIC_DA903X is not set 1338# CONFIG_PMIC_DA903X is not set
1286# CONFIG_MFD_WM8400 is not set 1339# CONFIG_MFD_WM8400 is not set
1287# CONFIG_MFD_WM8350_I2C is not set 1340# CONFIG_MFD_WM8350_I2C is not set
1288 1341# CONFIG_MFD_PCF50633 is not set
1289#
1290# Voltage and Current regulators
1291#
1292# CONFIG_REGULATOR is not set 1342# CONFIG_REGULATOR is not set
1293# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
1294# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
1295# CONFIG_REGULATOR_BQ24022 is not set
1296 1343
1297# 1344#
1298# Multimedia devices 1345# Multimedia devices
@@ -1390,6 +1437,7 @@ CONFIG_FB_ATY_BACKLIGHT=y
1390# CONFIG_FB_KYRO is not set 1437# CONFIG_FB_KYRO is not set
1391CONFIG_FB_3DFX=y 1438CONFIG_FB_3DFX=y
1392# CONFIG_FB_3DFX_ACCEL is not set 1439# CONFIG_FB_3DFX_ACCEL is not set
1440CONFIG_FB_3DFX_I2C=y
1393# CONFIG_FB_VOODOO1 is not set 1441# CONFIG_FB_VOODOO1 is not set
1394# CONFIG_FB_VT8623 is not set 1442# CONFIG_FB_VT8623 is not set
1395# CONFIG_FB_TRIDENT is not set 1443# CONFIG_FB_TRIDENT is not set
@@ -1399,12 +1447,14 @@ CONFIG_FB_3DFX=y
1399# CONFIG_FB_IBM_GXT4500 is not set 1447# CONFIG_FB_IBM_GXT4500 is not set
1400# CONFIG_FB_VIRTUAL is not set 1448# CONFIG_FB_VIRTUAL is not set
1401# CONFIG_FB_METRONOME is not set 1449# CONFIG_FB_METRONOME is not set
1450# CONFIG_FB_MB862XX is not set
1451# CONFIG_FB_BROADSHEET is not set
1402CONFIG_BACKLIGHT_LCD_SUPPORT=y 1452CONFIG_BACKLIGHT_LCD_SUPPORT=y
1403CONFIG_LCD_CLASS_DEVICE=m 1453CONFIG_LCD_CLASS_DEVICE=m
1404# CONFIG_LCD_ILI9320 is not set 1454# CONFIG_LCD_ILI9320 is not set
1405# CONFIG_LCD_PLATFORM is not set 1455# CONFIG_LCD_PLATFORM is not set
1406CONFIG_BACKLIGHT_CLASS_DEVICE=y 1456CONFIG_BACKLIGHT_CLASS_DEVICE=y
1407# CONFIG_BACKLIGHT_CORGI is not set 1457CONFIG_BACKLIGHT_GENERIC=y
1408 1458
1409# 1459#
1410# Display device support 1460# Display device support
@@ -1444,11 +1494,13 @@ CONFIG_SND_MIXER_OSS=m
1444CONFIG_SND_PCM_OSS=m 1494CONFIG_SND_PCM_OSS=m
1445CONFIG_SND_PCM_OSS_PLUGINS=y 1495CONFIG_SND_PCM_OSS_PLUGINS=y
1446CONFIG_SND_SEQUENCER_OSS=y 1496CONFIG_SND_SEQUENCER_OSS=y
1497# CONFIG_SND_HRTIMER is not set
1447# CONFIG_SND_DYNAMIC_MINORS is not set 1498# CONFIG_SND_DYNAMIC_MINORS is not set
1448CONFIG_SND_SUPPORT_OLD_API=y 1499CONFIG_SND_SUPPORT_OLD_API=y
1449CONFIG_SND_VERBOSE_PROCFS=y 1500CONFIG_SND_VERBOSE_PROCFS=y
1450# CONFIG_SND_VERBOSE_PRINTK is not set 1501# CONFIG_SND_VERBOSE_PRINTK is not set
1451# CONFIG_SND_DEBUG is not set 1502# CONFIG_SND_DEBUG is not set
1503CONFIG_SND_VMASTER=y
1452CONFIG_SND_DRIVERS=y 1504CONFIG_SND_DRIVERS=y
1453CONFIG_SND_DUMMY=m 1505CONFIG_SND_DUMMY=m
1454# CONFIG_SND_VIRMIDI is not set 1506# CONFIG_SND_VIRMIDI is not set
@@ -1486,6 +1538,8 @@ CONFIG_SND_PCI=y
1486# CONFIG_SND_INDIGO is not set 1538# CONFIG_SND_INDIGO is not set
1487# CONFIG_SND_INDIGOIO is not set 1539# CONFIG_SND_INDIGOIO is not set
1488# CONFIG_SND_INDIGODJ is not set 1540# CONFIG_SND_INDIGODJ is not set
1541# CONFIG_SND_INDIGOIOX is not set
1542# CONFIG_SND_INDIGODJX is not set
1489# CONFIG_SND_EMU10K1 is not set 1543# CONFIG_SND_EMU10K1 is not set
1490# CONFIG_SND_EMU10K1X is not set 1544# CONFIG_SND_EMU10K1X is not set
1491# CONFIG_SND_ENS1370 is not set 1545# CONFIG_SND_ENS1370 is not set
@@ -1551,28 +1605,31 @@ CONFIG_USB_HID=y
1551# 1605#
1552# Special HID drivers 1606# Special HID drivers
1553# 1607#
1554CONFIG_HID_COMPAT=y
1555CONFIG_HID_A4TECH=y 1608CONFIG_HID_A4TECH=y
1556CONFIG_HID_APPLE=y 1609CONFIG_HID_APPLE=y
1557CONFIG_HID_BELKIN=y 1610CONFIG_HID_BELKIN=y
1558CONFIG_HID_BRIGHT=y
1559CONFIG_HID_CHERRY=y 1611CONFIG_HID_CHERRY=y
1560CONFIG_HID_CHICONY=y 1612CONFIG_HID_CHICONY=y
1561CONFIG_HID_CYPRESS=y 1613CONFIG_HID_CYPRESS=y
1562CONFIG_HID_DELL=y 1614# CONFIG_DRAGONRISE_FF is not set
1563CONFIG_HID_EZKEY=y 1615CONFIG_HID_EZKEY=y
1616CONFIG_HID_KYE=y
1564CONFIG_HID_GYRATION=y 1617CONFIG_HID_GYRATION=y
1618CONFIG_HID_KENSINGTON=y
1565CONFIG_HID_LOGITECH=y 1619CONFIG_HID_LOGITECH=y
1566# CONFIG_LOGITECH_FF is not set 1620# CONFIG_LOGITECH_FF is not set
1567# CONFIG_LOGIRUMBLEPAD2_FF is not set 1621# CONFIG_LOGIRUMBLEPAD2_FF is not set
1568CONFIG_HID_MICROSOFT=y 1622CONFIG_HID_MICROSOFT=y
1569CONFIG_HID_MONTEREY=y 1623CONFIG_HID_MONTEREY=y
1624CONFIG_HID_NTRIG=y
1570CONFIG_HID_PANTHERLORD=y 1625CONFIG_HID_PANTHERLORD=y
1571# CONFIG_PANTHERLORD_FF is not set 1626# CONFIG_PANTHERLORD_FF is not set
1572CONFIG_HID_PETALYNX=y 1627CONFIG_HID_PETALYNX=y
1573CONFIG_HID_SAMSUNG=y 1628CONFIG_HID_SAMSUNG=y
1574CONFIG_HID_SONY=y 1629CONFIG_HID_SONY=y
1575CONFIG_HID_SUNPLUS=y 1630CONFIG_HID_SUNPLUS=y
1631# CONFIG_GREENASIA_FF is not set
1632CONFIG_HID_TOPSEED=y
1576# CONFIG_THRUSTMASTER_FF is not set 1633# CONFIG_THRUSTMASTER_FF is not set
1577# CONFIG_ZEROPLUS_FF is not set 1634# CONFIG_ZEROPLUS_FF is not set
1578CONFIG_USB_SUPPORT=y 1635CONFIG_USB_SUPPORT=y
@@ -1603,6 +1660,7 @@ CONFIG_USB_EHCI_HCD=m
1603CONFIG_USB_EHCI_ROOT_HUB_TT=y 1660CONFIG_USB_EHCI_ROOT_HUB_TT=y
1604# CONFIG_USB_EHCI_TT_NEWSCHED is not set 1661# CONFIG_USB_EHCI_TT_NEWSCHED is not set
1605# CONFIG_USB_EHCI_HCD_PPC_OF is not set 1662# CONFIG_USB_EHCI_HCD_PPC_OF is not set
1663# CONFIG_USB_OXU210HP_HCD is not set
1606# CONFIG_USB_ISP116X_HCD is not set 1664# CONFIG_USB_ISP116X_HCD is not set
1607# CONFIG_USB_ISP1760_HCD is not set 1665# CONFIG_USB_ISP1760_HCD is not set
1608CONFIG_USB_OHCI_HCD=y 1666CONFIG_USB_OHCI_HCD=y
@@ -1625,24 +1683,23 @@ CONFIG_USB_PRINTER=m
1625# CONFIG_USB_TMC is not set 1683# CONFIG_USB_TMC is not set
1626 1684
1627# 1685#
1628# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' 1686# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
1629# 1687#
1630 1688
1631# 1689#
1632# may also be needed; see USB_STORAGE Help for more information 1690# also be needed; see USB_STORAGE Help for more info
1633# 1691#
1634CONFIG_USB_STORAGE=m 1692CONFIG_USB_STORAGE=m
1635# CONFIG_USB_STORAGE_DEBUG is not set 1693# CONFIG_USB_STORAGE_DEBUG is not set
1636# CONFIG_USB_STORAGE_DATAFAB is not set 1694# CONFIG_USB_STORAGE_DATAFAB is not set
1637# CONFIG_USB_STORAGE_FREECOM is not set 1695# CONFIG_USB_STORAGE_FREECOM is not set
1638# CONFIG_USB_STORAGE_ISD200 is not set 1696# CONFIG_USB_STORAGE_ISD200 is not set
1639# CONFIG_USB_STORAGE_DPCM is not set
1640# CONFIG_USB_STORAGE_USBAT is not set 1697# CONFIG_USB_STORAGE_USBAT is not set
1641# CONFIG_USB_STORAGE_SDDR09 is not set 1698# CONFIG_USB_STORAGE_SDDR09 is not set
1642# CONFIG_USB_STORAGE_SDDR55 is not set 1699# CONFIG_USB_STORAGE_SDDR55 is not set
1643# CONFIG_USB_STORAGE_JUMPSHOT is not set 1700# CONFIG_USB_STORAGE_JUMPSHOT is not set
1644# CONFIG_USB_STORAGE_ALAUDA is not set 1701# CONFIG_USB_STORAGE_ALAUDA is not set
1645CONFIG_USB_STORAGE_ONETOUCH=y 1702CONFIG_USB_STORAGE_ONETOUCH=m
1646# CONFIG_USB_STORAGE_KARMA is not set 1703# CONFIG_USB_STORAGE_KARMA is not set
1647# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set 1704# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
1648# CONFIG_USB_LIBUSUAL is not set 1705# CONFIG_USB_LIBUSUAL is not set
@@ -1665,7 +1722,7 @@ CONFIG_USB_EZUSB=y
1665# CONFIG_USB_SERIAL_CH341 is not set 1722# CONFIG_USB_SERIAL_CH341 is not set
1666# CONFIG_USB_SERIAL_WHITEHEAT is not set 1723# CONFIG_USB_SERIAL_WHITEHEAT is not set
1667# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set 1724# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set
1668# CONFIG_USB_SERIAL_CP2101 is not set 1725# CONFIG_USB_SERIAL_CP210X is not set
1669# CONFIG_USB_SERIAL_CYPRESS_M8 is not set 1726# CONFIG_USB_SERIAL_CYPRESS_M8 is not set
1670# CONFIG_USB_SERIAL_EMPEG is not set 1727# CONFIG_USB_SERIAL_EMPEG is not set
1671# CONFIG_USB_SERIAL_FTDI_SIO is not set 1728# CONFIG_USB_SERIAL_FTDI_SIO is not set
@@ -1701,15 +1758,19 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
1701# CONFIG_USB_SERIAL_NAVMAN is not set 1758# CONFIG_USB_SERIAL_NAVMAN is not set
1702# CONFIG_USB_SERIAL_PL2303 is not set 1759# CONFIG_USB_SERIAL_PL2303 is not set
1703# CONFIG_USB_SERIAL_OTI6858 is not set 1760# CONFIG_USB_SERIAL_OTI6858 is not set
1761# CONFIG_USB_SERIAL_QUALCOMM is not set
1704# CONFIG_USB_SERIAL_SPCP8X5 is not set 1762# CONFIG_USB_SERIAL_SPCP8X5 is not set
1705# CONFIG_USB_SERIAL_HP4X is not set 1763# CONFIG_USB_SERIAL_HP4X is not set
1706# CONFIG_USB_SERIAL_SAFE is not set 1764# CONFIG_USB_SERIAL_SAFE is not set
1765# CONFIG_USB_SERIAL_SIEMENS_MPI is not set
1707# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set 1766# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set
1767# CONFIG_USB_SERIAL_SYMBOL is not set
1708# CONFIG_USB_SERIAL_TI is not set 1768# CONFIG_USB_SERIAL_TI is not set
1709# CONFIG_USB_SERIAL_CYBERJACK is not set 1769# CONFIG_USB_SERIAL_CYBERJACK is not set
1710# CONFIG_USB_SERIAL_XIRCOM is not set 1770# CONFIG_USB_SERIAL_XIRCOM is not set
1711# CONFIG_USB_SERIAL_OPTION is not set 1771# CONFIG_USB_SERIAL_OPTION is not set
1712# CONFIG_USB_SERIAL_OMNINET is not set 1772# CONFIG_USB_SERIAL_OMNINET is not set
1773# CONFIG_USB_SERIAL_OPTICON is not set
1713# CONFIG_USB_SERIAL_DEBUG is not set 1774# CONFIG_USB_SERIAL_DEBUG is not set
1714 1775
1715# 1776#
@@ -1726,7 +1787,6 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
1726# CONFIG_USB_LED is not set 1787# CONFIG_USB_LED is not set
1727# CONFIG_USB_CYPRESS_CY7C63 is not set 1788# CONFIG_USB_CYPRESS_CY7C63 is not set
1728# CONFIG_USB_CYTHERM is not set 1789# CONFIG_USB_CYTHERM is not set
1729# CONFIG_USB_PHIDGET is not set
1730# CONFIG_USB_IDMOUSE is not set 1790# CONFIG_USB_IDMOUSE is not set
1731# CONFIG_USB_FTDI_ELAN is not set 1791# CONFIG_USB_FTDI_ELAN is not set
1732CONFIG_USB_APPLEDISPLAY=m 1792CONFIG_USB_APPLEDISPLAY=m
@@ -1738,6 +1798,11 @@ CONFIG_USB_APPLEDISPLAY=m
1738# CONFIG_USB_ISIGHTFW is not set 1798# CONFIG_USB_ISIGHTFW is not set
1739# CONFIG_USB_VST is not set 1799# CONFIG_USB_VST is not set
1740# CONFIG_USB_GADGET is not set 1800# CONFIG_USB_GADGET is not set
1801
1802#
1803# OTG and related infrastructure
1804#
1805# CONFIG_NOP_USB_XCEIV is not set
1741# CONFIG_UWB is not set 1806# CONFIG_UWB is not set
1742# CONFIG_MMC is not set 1807# CONFIG_MMC is not set
1743# CONFIG_MEMSTICK is not set 1808# CONFIG_MEMSTICK is not set
@@ -1748,7 +1813,9 @@ CONFIG_LEDS_CLASS=y
1748# LED drivers 1813# LED drivers
1749# 1814#
1750# CONFIG_LEDS_PCA9532 is not set 1815# CONFIG_LEDS_PCA9532 is not set
1816# CONFIG_LEDS_LP5521 is not set
1751# CONFIG_LEDS_PCA955X is not set 1817# CONFIG_LEDS_PCA955X is not set
1818# CONFIG_LEDS_BD2802 is not set
1752 1819
1753# 1820#
1754# LED Triggers 1821# LED Triggers
@@ -1759,11 +1826,16 @@ CONFIG_LEDS_TRIGGER_IDE_DISK=y
1759# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set 1826# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
1760# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set 1827# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
1761CONFIG_LEDS_TRIGGER_DEFAULT_ON=y 1828CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
1829
1830#
1831# iptables trigger is under Netfilter config (LED target)
1832#
1762# CONFIG_ACCESSIBILITY is not set 1833# CONFIG_ACCESSIBILITY is not set
1763# CONFIG_INFINIBAND is not set 1834# CONFIG_INFINIBAND is not set
1764# CONFIG_EDAC is not set 1835# CONFIG_EDAC is not set
1765# CONFIG_RTC_CLASS is not set 1836# CONFIG_RTC_CLASS is not set
1766# CONFIG_DMADEVICES is not set 1837# CONFIG_DMADEVICES is not set
1838# CONFIG_AUXDISPLAY is not set
1767# CONFIG_UIO is not set 1839# CONFIG_UIO is not set
1768# CONFIG_STAGING is not set 1840# CONFIG_STAGING is not set
1769 1841
@@ -1774,6 +1846,7 @@ CONFIG_EXT2_FS=y
1774# CONFIG_EXT2_FS_XATTR is not set 1846# CONFIG_EXT2_FS_XATTR is not set
1775# CONFIG_EXT2_FS_XIP is not set 1847# CONFIG_EXT2_FS_XIP is not set
1776CONFIG_EXT3_FS=y 1848CONFIG_EXT3_FS=y
1849# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
1777CONFIG_EXT3_FS_XATTR=y 1850CONFIG_EXT3_FS_XATTR=y
1778CONFIG_EXT3_FS_POSIX_ACL=y 1851CONFIG_EXT3_FS_POSIX_ACL=y
1779# CONFIG_EXT3_FS_SECURITY is not set 1852# CONFIG_EXT3_FS_SECURITY is not set
@@ -1783,7 +1856,9 @@ CONFIG_EXT4_FS_XATTR=y
1783# CONFIG_EXT4_FS_POSIX_ACL is not set 1856# CONFIG_EXT4_FS_POSIX_ACL is not set
1784# CONFIG_EXT4_FS_SECURITY is not set 1857# CONFIG_EXT4_FS_SECURITY is not set
1785CONFIG_JBD=y 1858CONFIG_JBD=y
1859# CONFIG_JBD_DEBUG is not set
1786CONFIG_JBD2=y 1860CONFIG_JBD2=y
1861# CONFIG_JBD2_DEBUG is not set
1787CONFIG_FS_MBCACHE=y 1862CONFIG_FS_MBCACHE=y
1788# CONFIG_REISERFS_FS is not set 1863# CONFIG_REISERFS_FS is not set
1789# CONFIG_JFS_FS is not set 1864# CONFIG_JFS_FS is not set
@@ -1792,6 +1867,7 @@ CONFIG_FILE_LOCKING=y
1792# CONFIG_XFS_FS is not set 1867# CONFIG_XFS_FS is not set
1793# CONFIG_GFS2_FS is not set 1868# CONFIG_GFS2_FS is not set
1794# CONFIG_OCFS2_FS is not set 1869# CONFIG_OCFS2_FS is not set
1870# CONFIG_BTRFS_FS is not set
1795CONFIG_DNOTIFY=y 1871CONFIG_DNOTIFY=y
1796CONFIG_INOTIFY=y 1872CONFIG_INOTIFY=y
1797CONFIG_INOTIFY_USER=y 1873CONFIG_INOTIFY_USER=y
@@ -1801,6 +1877,11 @@ CONFIG_AUTOFS4_FS=m
1801CONFIG_FUSE_FS=m 1877CONFIG_FUSE_FS=m
1802 1878
1803# 1879#
1880# Caches
1881#
1882# CONFIG_FSCACHE is not set
1883
1884#
1804# CD-ROM/DVD Filesystems 1885# CD-ROM/DVD Filesystems
1805# 1886#
1806CONFIG_ISO9660_FS=y 1887CONFIG_ISO9660_FS=y
@@ -1831,10 +1912,7 @@ CONFIG_TMPFS=y
1831# CONFIG_TMPFS_POSIX_ACL is not set 1912# CONFIG_TMPFS_POSIX_ACL is not set
1832# CONFIG_HUGETLB_PAGE is not set 1913# CONFIG_HUGETLB_PAGE is not set
1833# CONFIG_CONFIGFS_FS is not set 1914# CONFIG_CONFIGFS_FS is not set
1834 1915CONFIG_MISC_FILESYSTEMS=y
1835#
1836# Miscellaneous filesystems
1837#
1838# CONFIG_ADFS_FS is not set 1916# CONFIG_ADFS_FS is not set
1839# CONFIG_AFFS_FS is not set 1917# CONFIG_AFFS_FS is not set
1840CONFIG_HFS_FS=m 1918CONFIG_HFS_FS=m
@@ -1843,6 +1921,7 @@ CONFIG_HFSPLUS_FS=m
1843# CONFIG_BFS_FS is not set 1921# CONFIG_BFS_FS is not set
1844# CONFIG_EFS_FS is not set 1922# CONFIG_EFS_FS is not set
1845# CONFIG_CRAMFS is not set 1923# CONFIG_CRAMFS is not set
1924# CONFIG_SQUASHFS is not set
1846# CONFIG_VXFS_FS is not set 1925# CONFIG_VXFS_FS is not set
1847# CONFIG_MINIX_FS is not set 1926# CONFIG_MINIX_FS is not set
1848# CONFIG_OMFS_FS is not set 1927# CONFIG_OMFS_FS is not set
@@ -1851,6 +1930,7 @@ CONFIG_HFSPLUS_FS=m
1851# CONFIG_ROMFS_FS is not set 1930# CONFIG_ROMFS_FS is not set
1852# CONFIG_SYSV_FS is not set 1931# CONFIG_SYSV_FS is not set
1853# CONFIG_UFS_FS is not set 1932# CONFIG_UFS_FS is not set
1933# CONFIG_NILFS2_FS is not set
1854CONFIG_NETWORK_FILESYSTEMS=y 1934CONFIG_NETWORK_FILESYSTEMS=y
1855CONFIG_NFS_FS=y 1935CONFIG_NFS_FS=y
1856CONFIG_NFS_V3=y 1936CONFIG_NFS_V3=y
@@ -1868,7 +1948,6 @@ CONFIG_NFS_ACL_SUPPORT=y
1868CONFIG_NFS_COMMON=y 1948CONFIG_NFS_COMMON=y
1869CONFIG_SUNRPC=y 1949CONFIG_SUNRPC=y
1870CONFIG_SUNRPC_GSS=y 1950CONFIG_SUNRPC_GSS=y
1871# CONFIG_SUNRPC_REGISTER_V4 is not set
1872CONFIG_RPCSEC_GSS_KRB5=y 1951CONFIG_RPCSEC_GSS_KRB5=y
1873# CONFIG_RPCSEC_GSS_SPKM3 is not set 1952# CONFIG_RPCSEC_GSS_SPKM3 is not set
1874CONFIG_SMB_FS=m 1953CONFIG_SMB_FS=m
@@ -1940,11 +2019,13 @@ CONFIG_NLS_ISO8859_1=m
1940# CONFIG_NLS_KOI8_U is not set 2019# CONFIG_NLS_KOI8_U is not set
1941CONFIG_NLS_UTF8=m 2020CONFIG_NLS_UTF8=m
1942# CONFIG_DLM is not set 2021# CONFIG_DLM is not set
2022CONFIG_BINARY_PRINTF=y
1943 2023
1944# 2024#
1945# Library routines 2025# Library routines
1946# 2026#
1947CONFIG_BITREVERSE=y 2027CONFIG_BITREVERSE=y
2028CONFIG_GENERIC_FIND_LAST_BIT=y
1948CONFIG_CRC_CCITT=y 2029CONFIG_CRC_CCITT=y
1949CONFIG_CRC16=y 2030CONFIG_CRC16=y
1950CONFIG_CRC_T10DIF=y 2031CONFIG_CRC_T10DIF=y
@@ -1954,15 +2035,18 @@ CONFIG_CRC32=y
1954CONFIG_LIBCRC32C=m 2035CONFIG_LIBCRC32C=m
1955CONFIG_ZLIB_INFLATE=y 2036CONFIG_ZLIB_INFLATE=y
1956CONFIG_ZLIB_DEFLATE=y 2037CONFIG_ZLIB_DEFLATE=y
2038CONFIG_DECOMPRESS_GZIP=y
2039CONFIG_DECOMPRESS_BZIP2=y
2040CONFIG_DECOMPRESS_LZMA=y
1957CONFIG_TEXTSEARCH=y 2041CONFIG_TEXTSEARCH=y
1958CONFIG_TEXTSEARCH_KMP=m 2042CONFIG_TEXTSEARCH_KMP=m
1959CONFIG_TEXTSEARCH_BM=m 2043CONFIG_TEXTSEARCH_BM=m
1960CONFIG_TEXTSEARCH_FSM=m 2044CONFIG_TEXTSEARCH_FSM=m
1961CONFIG_PLIST=y
1962CONFIG_HAS_IOMEM=y 2045CONFIG_HAS_IOMEM=y
1963CONFIG_HAS_IOPORT=y 2046CONFIG_HAS_IOPORT=y
1964CONFIG_HAS_DMA=y 2047CONFIG_HAS_DMA=y
1965CONFIG_HAVE_LMB=y 2048CONFIG_HAVE_LMB=y
2049CONFIG_NLATTR=y
1966 2050
1967# 2051#
1968# Kernel hacking 2052# Kernel hacking
@@ -1973,13 +2057,16 @@ CONFIG_ENABLE_MUST_CHECK=y
1973CONFIG_FRAME_WARN=1024 2057CONFIG_FRAME_WARN=1024
1974CONFIG_MAGIC_SYSRQ=y 2058CONFIG_MAGIC_SYSRQ=y
1975# CONFIG_UNUSED_SYMBOLS is not set 2059# CONFIG_UNUSED_SYMBOLS is not set
1976# CONFIG_DEBUG_FS is not set 2060CONFIG_DEBUG_FS=y
1977# CONFIG_HEADERS_CHECK is not set 2061# CONFIG_HEADERS_CHECK is not set
1978CONFIG_DEBUG_KERNEL=y 2062CONFIG_DEBUG_KERNEL=y
1979# CONFIG_DEBUG_SHIRQ is not set 2063# CONFIG_DEBUG_SHIRQ is not set
1980CONFIG_DETECT_SOFTLOCKUP=y 2064CONFIG_DETECT_SOFTLOCKUP=y
1981# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set 2065# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
1982CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 2066CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
2067CONFIG_DETECT_HUNG_TASK=y
2068# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
2069CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
1983CONFIG_SCHED_DEBUG=y 2070CONFIG_SCHED_DEBUG=y
1984CONFIG_SCHEDSTATS=y 2071CONFIG_SCHEDSTATS=y
1985# CONFIG_TIMER_STATS is not set 2072# CONFIG_TIMER_STATS is not set
@@ -1994,6 +2081,7 @@ CONFIG_SCHEDSTATS=y
1994# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set 2081# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
1995CONFIG_STACKTRACE=y 2082CONFIG_STACKTRACE=y
1996# CONFIG_DEBUG_KOBJECT is not set 2083# CONFIG_DEBUG_KOBJECT is not set
2084# CONFIG_DEBUG_HIGHMEM is not set
1997CONFIG_DEBUG_BUGVERBOSE=y 2085CONFIG_DEBUG_BUGVERBOSE=y
1998# CONFIG_DEBUG_INFO is not set 2086# CONFIG_DEBUG_INFO is not set
1999# CONFIG_DEBUG_VM is not set 2087# CONFIG_DEBUG_VM is not set
@@ -2001,6 +2089,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
2001CONFIG_DEBUG_MEMORY_INIT=y 2089CONFIG_DEBUG_MEMORY_INIT=y
2002# CONFIG_DEBUG_LIST is not set 2090# CONFIG_DEBUG_LIST is not set
2003# CONFIG_DEBUG_SG is not set 2091# CONFIG_DEBUG_SG is not set
2092# CONFIG_DEBUG_NOTIFIERS is not set
2004# CONFIG_BOOT_PRINTK_DELAY is not set 2093# CONFIG_BOOT_PRINTK_DELAY is not set
2005# CONFIG_RCU_TORTURE_TEST is not set 2094# CONFIG_RCU_TORTURE_TEST is not set
2006# CONFIG_RCU_CPU_STALL_DETECTOR is not set 2095# CONFIG_RCU_CPU_STALL_DETECTOR is not set
@@ -2009,7 +2098,14 @@ CONFIG_DEBUG_MEMORY_INIT=y
2009# CONFIG_FAULT_INJECTION is not set 2098# CONFIG_FAULT_INJECTION is not set
2010CONFIG_LATENCYTOP=y 2099CONFIG_LATENCYTOP=y
2011CONFIG_SYSCTL_SYSCALL_CHECK=y 2100CONFIG_SYSCTL_SYSCALL_CHECK=y
2101CONFIG_NOP_TRACER=y
2012CONFIG_HAVE_FUNCTION_TRACER=y 2102CONFIG_HAVE_FUNCTION_TRACER=y
2103CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
2104CONFIG_HAVE_DYNAMIC_FTRACE=y
2105CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
2106CONFIG_RING_BUFFER=y
2107CONFIG_TRACING=y
2108CONFIG_TRACING_SUPPORT=y
2013 2109
2014# 2110#
2015# Tracers 2111# Tracers
@@ -2017,12 +2113,19 @@ CONFIG_HAVE_FUNCTION_TRACER=y
2017# CONFIG_FUNCTION_TRACER is not set 2113# CONFIG_FUNCTION_TRACER is not set
2018# CONFIG_SCHED_TRACER is not set 2114# CONFIG_SCHED_TRACER is not set
2019# CONFIG_CONTEXT_SWITCH_TRACER is not set 2115# CONFIG_CONTEXT_SWITCH_TRACER is not set
2116# CONFIG_EVENT_TRACER is not set
2020# CONFIG_BOOT_TRACER is not set 2117# CONFIG_BOOT_TRACER is not set
2118# CONFIG_TRACE_BRANCH_PROFILING is not set
2021# CONFIG_STACK_TRACER is not set 2119# CONFIG_STACK_TRACER is not set
2022# CONFIG_DYNAMIC_PRINTK_DEBUG is not set 2120# CONFIG_KMEMTRACE is not set
2121# CONFIG_WORKQUEUE_TRACER is not set
2122# CONFIG_BLK_DEV_IO_TRACE is not set
2123# CONFIG_FTRACE_STARTUP_TEST is not set
2124# CONFIG_DYNAMIC_DEBUG is not set
2023# CONFIG_SAMPLES is not set 2125# CONFIG_SAMPLES is not set
2024CONFIG_HAVE_ARCH_KGDB=y 2126CONFIG_HAVE_ARCH_KGDB=y
2025# CONFIG_KGDB is not set 2127# CONFIG_KGDB is not set
2128CONFIG_PRINT_STACK_DEPTH=64
2026# CONFIG_DEBUG_STACKOVERFLOW is not set 2129# CONFIG_DEBUG_STACKOVERFLOW is not set
2027# CONFIG_DEBUG_STACK_USAGE is not set 2130# CONFIG_DEBUG_STACK_USAGE is not set
2028# CONFIG_CODE_PATCHING_SELFTEST is not set 2131# CONFIG_CODE_PATCHING_SELFTEST is not set
@@ -2033,6 +2136,7 @@ CONFIG_XMON_DEFAULT=y
2033CONFIG_XMON_DISASSEMBLY=y 2136CONFIG_XMON_DISASSEMBLY=y
2034CONFIG_DEBUGGER=y 2137CONFIG_DEBUGGER=y
2035CONFIG_IRQSTACKS=y 2138CONFIG_IRQSTACKS=y
2139# CONFIG_VIRQ_DEBUG is not set
2036# CONFIG_BDI_SWITCH is not set 2140# CONFIG_BDI_SWITCH is not set
2037CONFIG_BOOTX_TEXT=y 2141CONFIG_BOOTX_TEXT=y
2038# CONFIG_PPC_EARLY_DEBUG is not set 2142# CONFIG_PPC_EARLY_DEBUG is not set
@@ -2051,13 +2155,20 @@ CONFIG_CRYPTO=y
2051# 2155#
2052# CONFIG_CRYPTO_FIPS is not set 2156# CONFIG_CRYPTO_FIPS is not set
2053CONFIG_CRYPTO_ALGAPI=y 2157CONFIG_CRYPTO_ALGAPI=y
2158CONFIG_CRYPTO_ALGAPI2=y
2054CONFIG_CRYPTO_AEAD=y 2159CONFIG_CRYPTO_AEAD=y
2160CONFIG_CRYPTO_AEAD2=y
2055CONFIG_CRYPTO_BLKCIPHER=y 2161CONFIG_CRYPTO_BLKCIPHER=y
2162CONFIG_CRYPTO_BLKCIPHER2=y
2056CONFIG_CRYPTO_HASH=y 2163CONFIG_CRYPTO_HASH=y
2057CONFIG_CRYPTO_RNG=y 2164CONFIG_CRYPTO_HASH2=y
2165CONFIG_CRYPTO_RNG2=y
2166CONFIG_CRYPTO_PCOMP=y
2058CONFIG_CRYPTO_MANAGER=y 2167CONFIG_CRYPTO_MANAGER=y
2168CONFIG_CRYPTO_MANAGER2=y
2059# CONFIG_CRYPTO_GF128MUL is not set 2169# CONFIG_CRYPTO_GF128MUL is not set
2060CONFIG_CRYPTO_NULL=m 2170CONFIG_CRYPTO_NULL=m
2171CONFIG_CRYPTO_WORKQUEUE=y
2061# CONFIG_CRYPTO_CRYPTD is not set 2172# CONFIG_CRYPTO_CRYPTD is not set
2062CONFIG_CRYPTO_AUTHENC=y 2173CONFIG_CRYPTO_AUTHENC=y
2063# CONFIG_CRYPTO_TEST is not set 2174# CONFIG_CRYPTO_TEST is not set
@@ -2127,6 +2238,7 @@ CONFIG_CRYPTO_TWOFISH_COMMON=m
2127# Compression 2238# Compression
2128# 2239#
2129CONFIG_CRYPTO_DEFLATE=m 2240CONFIG_CRYPTO_DEFLATE=m
2241# CONFIG_CRYPTO_ZLIB is not set
2130# CONFIG_CRYPTO_LZO is not set 2242# CONFIG_CRYPTO_LZO is not set
2131 2243
2132# 2244#
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index c69f2b5f0cc4..cb448d68452c 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -26,7 +26,9 @@
26 * allocate the space "normally" and use the cache management functions 26 * allocate the space "normally" and use the cache management functions
27 * to ensure it is consistent. 27 * to ensure it is consistent.
28 */ 28 */
29extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp); 29struct device;
30extern void *__dma_alloc_coherent(struct device *dev, size_t size,
31 dma_addr_t *handle, gfp_t gfp);
30extern void __dma_free_coherent(size_t size, void *vaddr); 32extern void __dma_free_coherent(size_t size, void *vaddr);
31extern void __dma_sync(void *vaddr, size_t size, int direction); 33extern void __dma_sync(void *vaddr, size_t size, int direction);
32extern void __dma_sync_page(struct page *page, unsigned long offset, 34extern void __dma_sync_page(struct page *page, unsigned long offset,
@@ -37,7 +39,7 @@ extern void __dma_sync_page(struct page *page, unsigned long offset,
37 * Cache coherent cores. 39 * Cache coherent cores.
38 */ 40 */
39 41
40#define __dma_alloc_coherent(gfp, size, handle) NULL 42#define __dma_alloc_coherent(dev, gfp, size, handle) NULL
41#define __dma_free_coherent(size, addr) ((void)0) 43#define __dma_free_coherent(size, addr) ((void)0)
42#define __dma_sync(addr, size, rw) ((void)0) 44#define __dma_sync(addr, size, rw) ((void)0)
43#define __dma_sync_page(pg, off, sz, rw) ((void)0) 45#define __dma_sync_page(pg, off, sz, rw) ((void)0)
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index d60fd18f428c..f1f4e23a84e9 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -14,8 +14,6 @@
14#ifndef _ASM_FIXMAP_H 14#ifndef _ASM_FIXMAP_H
15#define _ASM_FIXMAP_H 15#define _ASM_FIXMAP_H
16 16
17extern unsigned long FIXADDR_TOP;
18
19#ifndef __ASSEMBLY__ 17#ifndef __ASSEMBLY__
20#include <linux/kernel.h> 18#include <linux/kernel.h>
21#include <asm/page.h> 19#include <asm/page.h>
@@ -24,6 +22,8 @@ extern unsigned long FIXADDR_TOP;
24#include <asm/kmap_types.h> 22#include <asm/kmap_types.h>
25#endif 23#endif
26 24
25#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE))
26
27/* 27/*
28 * Here we define all the compile-time 'special' virtual 28 * Here we define all the compile-time 'special' virtual
29 * addresses. The point is to have a constant address at 29 * addresses. The point is to have a constant address at
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index ba45c997830f..c9ff9d75990e 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -10,7 +10,7 @@
10 10
11extern unsigned long va_to_phys(unsigned long address); 11extern unsigned long va_to_phys(unsigned long address);
12extern pte_t *va_to_pte(unsigned long address); 12extern pte_t *va_to_pte(unsigned long address);
13extern unsigned long ioremap_bot, ioremap_base; 13extern unsigned long ioremap_bot;
14 14
15#ifdef CONFIG_44x 15#ifdef CONFIG_44x
16extern int icache_44x_need_flush; 16extern int icache_44x_need_flush;
@@ -56,8 +56,30 @@ extern int icache_44x_need_flush;
56 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) 56 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
57 57
58/* 58/*
59 * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
60 * value (for now) on others, from where we can start layout kernel
61 * virtual space that goes below PKMAP and FIXMAP
62 */
63#ifdef CONFIG_HIGHMEM
64#define KVIRT_TOP PKMAP_BASE
65#else
66#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */
67#endif
68
69/*
70 * ioremap_bot starts at that address. Early ioremaps move down from there,
71 * until mem_init() at which point this becomes the top of the vmalloc
72 * and ioremap space
73 */
74#ifdef CONFIG_NOT_COHERENT_CACHE
75#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
76#else
77#define IOREMAP_TOP KVIRT_TOP
78#endif
79
80/*
59 * Just any arbitrary offset to the start of the vmalloc VM area: the 81 * Just any arbitrary offset to the start of the vmalloc VM area: the
60 * current 64MB value just means that there will be a 64MB "hole" after the 82 * current 16MB value just means that there will be a 64MB "hole" after the
61 * physical memory until the kernel virtual memory starts. That means that 83 * physical memory until the kernel virtual memory starts. That means that
62 * any out-of-bounds memory accesses will hopefully be caught. 84 * any out-of-bounds memory accesses will hopefully be caught.
63 * The vmalloc() routines leaves a hole of 4kB between each vmalloced 85 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 53c7788cba78..6b02793dc75b 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -32,7 +32,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
32{ 32{
33 void *ret; 33 void *ret;
34#ifdef CONFIG_NOT_COHERENT_CACHE 34#ifdef CONFIG_NOT_COHERENT_CACHE
35 ret = __dma_alloc_coherent(size, dma_handle, flag); 35 ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
36 if (ret == NULL) 36 if (ret == NULL)
37 return NULL; 37 return NULL;
38 *dma_handle += get_dma_direct_offset(dev); 38 *dma_handle += get_dma_direct_offset(dev);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 8db35278a4b4..29b742b90f1f 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -18,7 +18,6 @@ obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
18 memcpy_64.o usercopy_64.o mem_64.o string.o 18 memcpy_64.o usercopy_64.o mem_64.o string.o
19obj-$(CONFIG_XMON) += sstep.o 19obj-$(CONFIG_XMON) += sstep.o
20obj-$(CONFIG_KPROBES) += sstep.o 20obj-$(CONFIG_KPROBES) += sstep.o
21obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
22 21
23ifeq ($(CONFIG_PPC64),y) 22ifeq ($(CONFIG_PPC64),y)
24obj-$(CONFIG_SMP) += locks.o 23obj-$(CONFIG_SMP) += locks.o
diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c
deleted file mode 100644
index 005a28d380af..000000000000
--- a/arch/powerpc/lib/dma-noncoherent.c
+++ /dev/null
@@ -1,237 +0,0 @@
1/*
2 * PowerPC version derived from arch/arm/mm/consistent.c
3 * Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
4 *
5 * Copyright (C) 2000 Russell King
6 *
7 * Consistent memory allocators. Used for DMA devices that want to
8 * share uncached memory with the processor core. The function return
9 * is the virtual address and 'dma_handle' is the physical address.
10 * Mostly stolen from the ARM port, with some changes for PowerPC.
11 * -- Dan
12 *
13 * Reorganized to get rid of the arch-specific consistent_* functions
14 * and provide non-coherent implementations for the DMA API. -Matt
15 *
16 * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
17 * implementation. This is pulled straight from ARM and barely
18 * modified. -Matt
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License version 2 as
22 * published by the Free Software Foundation.
23 */
24
25#include <linux/sched.h>
26#include <linux/kernel.h>
27#include <linux/errno.h>
28#include <linux/string.h>
29#include <linux/types.h>
30#include <linux/highmem.h>
31#include <linux/dma-mapping.h>
32#include <linux/vmalloc.h>
33
34#include <asm/tlbflush.h>
35
36/*
37 * Allocate DMA-coherent memory space and return both the kernel remapped
38 * virtual and bus address for that space.
39 */
40void *
41__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
42{
43 struct page *page;
44 unsigned long order;
45 int i;
46 unsigned int nr_pages = PAGE_ALIGN(size)>>PAGE_SHIFT;
47 unsigned int array_size = nr_pages * sizeof(struct page *);
48 struct page **pages;
49 struct page *end;
50 u64 mask = 0x00ffffff, limit; /* ISA default */
51 struct vm_struct *area;
52
53 BUG_ON(!mem_init_done);
54 size = PAGE_ALIGN(size);
55 limit = (mask + 1) & ~mask;
56 if (limit && size >= limit) {
57 printk(KERN_WARNING "coherent allocation too big (requested "
58 "%#x mask %#Lx)\n", size, mask);
59 return NULL;
60 }
61
62 order = get_order(size);
63
64 if (mask != 0xffffffff)
65 gfp |= GFP_DMA;
66
67 page = alloc_pages(gfp, order);
68 if (!page)
69 goto no_page;
70
71 end = page + (1 << order);
72
73 /*
74 * Invalidate any data that might be lurking in the
75 * kernel direct-mapped region for device DMA.
76 */
77 {
78 unsigned long kaddr = (unsigned long)page_address(page);
79 memset(page_address(page), 0, size);
80 flush_dcache_range(kaddr, kaddr + size);
81 }
82
83 split_page(page, order);
84
85 /*
86 * Set the "dma handle"
87 */
88 *handle = page_to_phys(page);
89
90 area = get_vm_area_caller(size, VM_IOREMAP,
91 __builtin_return_address(1));
92 if (!area)
93 goto out_free_pages;
94
95 if (array_size > PAGE_SIZE) {
96 pages = vmalloc(array_size);
97 area->flags |= VM_VPAGES;
98 } else {
99 pages = kmalloc(array_size, GFP_KERNEL);
100 }
101 if (!pages)
102 goto out_free_area;
103
104 area->pages = pages;
105 area->nr_pages = nr_pages;
106
107 for (i = 0; i < nr_pages; i++)
108 pages[i] = page + i;
109
110 if (map_vm_area(area, pgprot_noncached(PAGE_KERNEL), &pages))
111 goto out_unmap;
112
113 /*
114 * Free the otherwise unused pages.
115 */
116 page += nr_pages;
117 while (page < end) {
118 __free_page(page);
119 page++;
120 }
121
122 return area->addr;
123out_unmap:
124 vunmap(area->addr);
125 if (array_size > PAGE_SIZE)
126 vfree(pages);
127 else
128 kfree(pages);
129 goto out_free_pages;
130out_free_area:
131 free_vm_area(area);
132out_free_pages:
133 if (page)
134 __free_pages(page, order);
135no_page:
136 return NULL;
137}
138EXPORT_SYMBOL(__dma_alloc_coherent);
139
140/*
141 * free a page as defined by the above mapping.
142 */
143void __dma_free_coherent(size_t size, void *vaddr)
144{
145 vfree(vaddr);
146
147}
148EXPORT_SYMBOL(__dma_free_coherent);
149
150/*
151 * make an area consistent.
152 */
153void __dma_sync(void *vaddr, size_t size, int direction)
154{
155 unsigned long start = (unsigned long)vaddr;
156 unsigned long end = start + size;
157
158 switch (direction) {
159 case DMA_NONE:
160 BUG();
161 case DMA_FROM_DEVICE:
162 /*
163 * invalidate only when cache-line aligned otherwise there is
164 * the potential for discarding uncommitted data from the cache
165 */
166 if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1)))
167 flush_dcache_range(start, end);
168 else
169 invalidate_dcache_range(start, end);
170 break;
171 case DMA_TO_DEVICE: /* writeback only */
172 clean_dcache_range(start, end);
173 break;
174 case DMA_BIDIRECTIONAL: /* writeback and invalidate */
175 flush_dcache_range(start, end);
176 break;
177 }
178}
179EXPORT_SYMBOL(__dma_sync);
180
181#ifdef CONFIG_HIGHMEM
182/*
183 * __dma_sync_page() implementation for systems using highmem.
184 * In this case, each page of a buffer must be kmapped/kunmapped
185 * in order to have a virtual address for __dma_sync(). This must
186 * not sleep so kmap_atomic()/kunmap_atomic() are used.
187 *
188 * Note: yes, it is possible and correct to have a buffer extend
189 * beyond the first page.
190 */
191static inline void __dma_sync_page_highmem(struct page *page,
192 unsigned long offset, size_t size, int direction)
193{
194 size_t seg_size = min((size_t)(PAGE_SIZE - offset), size);
195 size_t cur_size = seg_size;
196 unsigned long flags, start, seg_offset = offset;
197 int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE;
198 int seg_nr = 0;
199
200 local_irq_save(flags);
201
202 do {
203 start = (unsigned long)kmap_atomic(page + seg_nr,
204 KM_PPC_SYNC_PAGE) + seg_offset;
205
206 /* Sync this buffer segment */
207 __dma_sync((void *)start, seg_size, direction);
208 kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE);
209 seg_nr++;
210
211 /* Calculate next buffer segment size */
212 seg_size = min((size_t)PAGE_SIZE, size - cur_size);
213
214 /* Add the segment size to our running total */
215 cur_size += seg_size;
216 seg_offset = 0;
217 } while (seg_nr < nr_segs);
218
219 local_irq_restore(flags);
220}
221#endif /* CONFIG_HIGHMEM */
222
223/*
224 * __dma_sync_page makes memory consistent. identical to __dma_sync, but
225 * takes a struct page instead of a virtual address
226 */
227void __dma_sync_page(struct page *page, unsigned long offset,
228 size_t size, int direction)
229{
230#ifdef CONFIG_HIGHMEM
231 __dma_sync_page_highmem(page, offset, size, direction);
232#else
233 unsigned long start = (unsigned long)page_address(page) + offset;
234 __dma_sync((void *)start, size, direction);
235#endif
236}
237EXPORT_SYMBOL(__dma_sync_page);
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 17290bcedc5e..b746f4ca4209 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
26obj-$(CONFIG_PPC_MM_SLICES) += slice.o 26obj-$(CONFIG_PPC_MM_SLICES) += slice.o
27obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 27obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
28obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o 28obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
29obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
new file mode 100644
index 000000000000..36692f5c9a76
--- /dev/null
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -0,0 +1,400 @@
1/*
2 * PowerPC version derived from arch/arm/mm/consistent.c
3 * Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
4 *
5 * Copyright (C) 2000 Russell King
6 *
7 * Consistent memory allocators. Used for DMA devices that want to
8 * share uncached memory with the processor core. The function return
9 * is the virtual address and 'dma_handle' is the physical address.
10 * Mostly stolen from the ARM port, with some changes for PowerPC.
11 * -- Dan
12 *
13 * Reorganized to get rid of the arch-specific consistent_* functions
14 * and provide non-coherent implementations for the DMA API. -Matt
15 *
16 * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
17 * implementation. This is pulled straight from ARM and barely
18 * modified. -Matt
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License version 2 as
22 * published by the Free Software Foundation.
23 */
24
25#include <linux/sched.h>
26#include <linux/kernel.h>
27#include <linux/errno.h>
28#include <linux/string.h>
29#include <linux/types.h>
30#include <linux/highmem.h>
31#include <linux/dma-mapping.h>
32
33#include <asm/tlbflush.h>
34
35#include "mmu_decl.h"
36
37/*
38 * This address range defaults to a value that is safe for all
39 * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
40 * can be further configured for specific applications under
41 * the "Advanced Setup" menu. -Matt
42 */
43#define CONSISTENT_BASE (IOREMAP_TOP)
44#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE)
45#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
46
47/*
48 * This is the page table (2MB) covering uncached, DMA consistent allocations
49 */
50static DEFINE_SPINLOCK(consistent_lock);
51
52/*
53 * VM region handling support.
54 *
55 * This should become something generic, handling VM region allocations for
56 * vmalloc and similar (ioremap, module space, etc).
57 *
58 * I envisage vmalloc()'s supporting vm_struct becoming:
59 *
60 * struct vm_struct {
61 * struct vm_region region;
62 * unsigned long flags;
63 * struct page **pages;
64 * unsigned int nr_pages;
65 * unsigned long phys_addr;
66 * };
67 *
68 * get_vm_area() would then call vm_region_alloc with an appropriate
69 * struct vm_region head (eg):
70 *
71 * struct vm_region vmalloc_head = {
72 * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
73 * .vm_start = VMALLOC_START,
74 * .vm_end = VMALLOC_END,
75 * };
76 *
77 * However, vmalloc_head.vm_start is variable (typically, it is dependent on
78 * the amount of RAM found at boot time.) I would imagine that get_vm_area()
79 * would have to initialise this each time prior to calling vm_region_alloc().
80 */
81struct ppc_vm_region {
82 struct list_head vm_list;
83 unsigned long vm_start;
84 unsigned long vm_end;
85};
86
87static struct ppc_vm_region consistent_head = {
88 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
89 .vm_start = CONSISTENT_BASE,
90 .vm_end = CONSISTENT_END,
91};
92
93static struct ppc_vm_region *
94ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
95{
96 unsigned long addr = head->vm_start, end = head->vm_end - size;
97 unsigned long flags;
98 struct ppc_vm_region *c, *new;
99
100 new = kmalloc(sizeof(struct ppc_vm_region), gfp);
101 if (!new)
102 goto out;
103
104 spin_lock_irqsave(&consistent_lock, flags);
105
106 list_for_each_entry(c, &head->vm_list, vm_list) {
107 if ((addr + size) < addr)
108 goto nospc;
109 if ((addr + size) <= c->vm_start)
110 goto found;
111 addr = c->vm_end;
112 if (addr > end)
113 goto nospc;
114 }
115
116 found:
117 /*
118 * Insert this entry _before_ the one we found.
119 */
120 list_add_tail(&new->vm_list, &c->vm_list);
121 new->vm_start = addr;
122 new->vm_end = addr + size;
123
124 spin_unlock_irqrestore(&consistent_lock, flags);
125 return new;
126
127 nospc:
128 spin_unlock_irqrestore(&consistent_lock, flags);
129 kfree(new);
130 out:
131 return NULL;
132}
133
134static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
135{
136 struct ppc_vm_region *c;
137
138 list_for_each_entry(c, &head->vm_list, vm_list) {
139 if (c->vm_start == addr)
140 goto out;
141 }
142 c = NULL;
143 out:
144 return c;
145}
146
147/*
148 * Allocate DMA-coherent memory space and return both the kernel remapped
149 * virtual and bus address for that space.
150 */
151void *
152__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
153{
154 struct page *page;
155 struct ppc_vm_region *c;
156 unsigned long order;
157 u64 mask = ISA_DMA_THRESHOLD, limit;
158
159 if (dev) {
160 mask = dev->coherent_dma_mask;
161
162 /*
163 * Sanity check the DMA mask - it must be non-zero, and
164 * must be able to be satisfied by a DMA allocation.
165 */
166 if (mask == 0) {
167 dev_warn(dev, "coherent DMA mask is unset\n");
168 goto no_page;
169 }
170
171 if ((~mask) & ISA_DMA_THRESHOLD) {
172 dev_warn(dev, "coherent DMA mask %#llx is smaller "
173 "than system GFP_DMA mask %#llx\n",
174 mask, (unsigned long long)ISA_DMA_THRESHOLD);
175 goto no_page;
176 }
177 }
178
179
180 size = PAGE_ALIGN(size);
181 limit = (mask + 1) & ~mask;
182 if ((limit && size >= limit) ||
183 size >= (CONSISTENT_END - CONSISTENT_BASE)) {
184 printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
185 size, mask);
186 return NULL;
187 }
188
189 order = get_order(size);
190
191 /* Might be useful if we ever have a real legacy DMA zone... */
192 if (mask != 0xffffffff)
193 gfp |= GFP_DMA;
194
195 page = alloc_pages(gfp, order);
196 if (!page)
197 goto no_page;
198
199 /*
200 * Invalidate any data that might be lurking in the
201 * kernel direct-mapped region for device DMA.
202 */
203 {
204 unsigned long kaddr = (unsigned long)page_address(page);
205 memset(page_address(page), 0, size);
206 flush_dcache_range(kaddr, kaddr + size);
207 }
208
209 /*
210 * Allocate a virtual address in the consistent mapping region.
211 */
212 c = ppc_vm_region_alloc(&consistent_head, size,
213 gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
214 if (c) {
215 unsigned long vaddr = c->vm_start;
216 struct page *end = page + (1 << order);
217
218 split_page(page, order);
219
220 /*
221 * Set the "dma handle"
222 */
223 *handle = page_to_phys(page);
224
225 do {
226 SetPageReserved(page);
227 map_page(vaddr, page_to_phys(page),
228 pgprot_noncached(PAGE_KERNEL));
229 page++;
230 vaddr += PAGE_SIZE;
231 } while (size -= PAGE_SIZE);
232
233 /*
234 * Free the otherwise unused pages.
235 */
236 while (page < end) {
237 __free_page(page);
238 page++;
239 }
240
241 return (void *)c->vm_start;
242 }
243
244 if (page)
245 __free_pages(page, order);
246 no_page:
247 return NULL;
248}
249EXPORT_SYMBOL(__dma_alloc_coherent);
250
251/*
252 * free a page as defined by the above mapping.
253 */
254void __dma_free_coherent(size_t size, void *vaddr)
255{
256 struct ppc_vm_region *c;
257 unsigned long flags, addr;
258
259 size = PAGE_ALIGN(size);
260
261 spin_lock_irqsave(&consistent_lock, flags);
262
263 c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
264 if (!c)
265 goto no_area;
266
267 if ((c->vm_end - c->vm_start) != size) {
268 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
269 __func__, c->vm_end - c->vm_start, size);
270 dump_stack();
271 size = c->vm_end - c->vm_start;
272 }
273
274 addr = c->vm_start;
275 do {
276 pte_t *ptep;
277 unsigned long pfn;
278
279 ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr),
280 addr),
281 addr),
282 addr);
283 if (!pte_none(*ptep) && pte_present(*ptep)) {
284 pfn = pte_pfn(*ptep);
285 pte_clear(&init_mm, addr, ptep);
286 if (pfn_valid(pfn)) {
287 struct page *page = pfn_to_page(pfn);
288
289 ClearPageReserved(page);
290 __free_page(page);
291 }
292 }
293 addr += PAGE_SIZE;
294 } while (size -= PAGE_SIZE);
295
296 flush_tlb_kernel_range(c->vm_start, c->vm_end);
297
298 list_del(&c->vm_list);
299
300 spin_unlock_irqrestore(&consistent_lock, flags);
301
302 kfree(c);
303 return;
304
305 no_area:
306 spin_unlock_irqrestore(&consistent_lock, flags);
307 printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
308 __func__, vaddr);
309 dump_stack();
310}
311EXPORT_SYMBOL(__dma_free_coherent);
312
313/*
314 * make an area consistent.
315 */
316void __dma_sync(void *vaddr, size_t size, int direction)
317{
318 unsigned long start = (unsigned long)vaddr;
319 unsigned long end = start + size;
320
321 switch (direction) {
322 case DMA_NONE:
323 BUG();
324 case DMA_FROM_DEVICE:
325 /*
326 * invalidate only when cache-line aligned otherwise there is
327 * the potential for discarding uncommitted data from the cache
328 */
329 if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1)))
330 flush_dcache_range(start, end);
331 else
332 invalidate_dcache_range(start, end);
333 break;
334 case DMA_TO_DEVICE: /* writeback only */
335 clean_dcache_range(start, end);
336 break;
337 case DMA_BIDIRECTIONAL: /* writeback and invalidate */
338 flush_dcache_range(start, end);
339 break;
340 }
341}
342EXPORT_SYMBOL(__dma_sync);
343
344#ifdef CONFIG_HIGHMEM
345/*
346 * __dma_sync_page() implementation for systems using highmem.
347 * In this case, each page of a buffer must be kmapped/kunmapped
348 * in order to have a virtual address for __dma_sync(). This must
349 * not sleep so kmap_atomic()/kunmap_atomic() are used.
350 *
351 * Note: yes, it is possible and correct to have a buffer extend
352 * beyond the first page.
353 */
354static inline void __dma_sync_page_highmem(struct page *page,
355 unsigned long offset, size_t size, int direction)
356{
357 size_t seg_size = min((size_t)(PAGE_SIZE - offset), size);
358 size_t cur_size = seg_size;
359 unsigned long flags, start, seg_offset = offset;
360 int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE;
361 int seg_nr = 0;
362
363 local_irq_save(flags);
364
365 do {
366 start = (unsigned long)kmap_atomic(page + seg_nr,
367 KM_PPC_SYNC_PAGE) + seg_offset;
368
369 /* Sync this buffer segment */
370 __dma_sync((void *)start, seg_size, direction);
371 kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE);
372 seg_nr++;
373
374 /* Calculate next buffer segment size */
375 seg_size = min((size_t)PAGE_SIZE, size - cur_size);
376
377 /* Add the segment size to our running total */
378 cur_size += seg_size;
379 seg_offset = 0;
380 } while (seg_nr < nr_segs);
381
382 local_irq_restore(flags);
383}
384#endif /* CONFIG_HIGHMEM */
385
386/*
387 * __dma_sync_page makes memory consistent. identical to __dma_sync, but
388 * takes a struct page instead of a virtual address
389 */
390void __dma_sync_page(struct page *page, unsigned long offset,
391 size_t size, int direction)
392{
393#ifdef CONFIG_HIGHMEM
394 __dma_sync_page_highmem(page, offset, size, direction);
395#else
396 unsigned long start = (unsigned long)page_address(page) + offset;
397 __dma_sync((void *)start, size, direction);
398#endif
399}
400EXPORT_SYMBOL(__dma_sync_page);
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 666a5e8a5be1..3de6a0d93824 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -168,12 +168,8 @@ void __init MMU_init(void)
168 ppc_md.progress("MMU:mapin", 0x301); 168 ppc_md.progress("MMU:mapin", 0x301);
169 mapin_ram(); 169 mapin_ram();
170 170
171#ifdef CONFIG_HIGHMEM 171 /* Initialize early top-down ioremap allocator */
172 ioremap_base = PKMAP_BASE; 172 ioremap_bot = IOREMAP_TOP;
173#else
174 ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */
175#endif /* CONFIG_HIGHMEM */
176 ioremap_bot = ioremap_base;
177 173
178 /* Map in I/O resources */ 174 /* Map in I/O resources */
179 if (ppc_md.progress) 175 if (ppc_md.progress)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index d0602a76bf7f..579382c163a9 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -380,6 +380,23 @@ void __init mem_init(void)
380 bsssize >> 10, 380 bsssize >> 10,
381 initsize >> 10); 381 initsize >> 10);
382 382
383#ifdef CONFIG_PPC32
384 pr_info("Kernel virtual memory layout:\n");
385 pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP);
386#ifdef CONFIG_HIGHMEM
387 pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n",
388 PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP));
389#endif /* CONFIG_HIGHMEM */
390#ifdef CONFIG_NOT_COHERENT_CACHE
391 pr_info(" * 0x%08lx..0x%08lx : consistent mem\n",
392 IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE);
393#endif /* CONFIG_NOT_COHERENT_CACHE */
394 pr_info(" * 0x%08lx..0x%08lx : early ioremap\n",
395 ioremap_bot, IOREMAP_TOP);
396 pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n",
397 VMALLOC_START, VMALLOC_END);
398#endif /* CONFIG_PPC32 */
399
383 mem_init_done = 1; 400 mem_init_done = 1;
384} 401}
385 402
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index a70e311bd457..030d0005b4d2 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -127,12 +127,12 @@ static unsigned int steal_context_up(unsigned int id)
127 127
128 pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm); 128 pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm);
129 129
130 /* Mark this mm has having no context anymore */
131 mm->context.id = MMU_NO_CONTEXT;
132
133 /* Flush the TLB for that context */ 130 /* Flush the TLB for that context */
134 local_flush_tlb_mm(mm); 131 local_flush_tlb_mm(mm);
135 132
133 /* Mark this mm has having no context anymore */
134 mm->context.id = MMU_NO_CONTEXT;
135
136 /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 136 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
137 __clear_bit(id, stale_map[cpu]); 137 __clear_bit(id, stale_map[cpu]);
138 138
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 430d0908fa50..5422169626ba 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -399,8 +399,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
399#endif /* CONFIG_DEBUG_PAGEALLOC */ 399#endif /* CONFIG_DEBUG_PAGEALLOC */
400 400
401static int fixmaps; 401static int fixmaps;
402unsigned long FIXADDR_TOP = (-PAGE_SIZE);
403EXPORT_SYMBOL(FIXADDR_TOP);
404 402
405void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) 403void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
406{ 404{
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index 301855263b81..04296ffff8bf 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -592,3 +592,17 @@ int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel)
592 } 592 }
593 return irq; 593 return irq;
594} 594}
595
596static void __devinit quirk_ipr_msi(struct pci_dev *dev)
597{
598 /* Something prevents MSIs from the IPR from working on Bimini,
599 * and the driver has no smarts to recover. So disable MSI
600 * on it for now. */
601
602 if (machine_is(maple)) {
603 dev->no_msi = 1;
604 dev_info(&dev->dev, "Quirk disabled MSI\n");
605 }
606}
607DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN,
608 quirk_ipr_msi);
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 80b513449f4c..be3581a8c294 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -333,7 +333,7 @@ static void xics_eoi_lpar(unsigned int virq)
333 lpar_xirr_info_set((0xff << 24) | irq); 333 lpar_xirr_info_set((0xff << 24) | irq);
334} 334}
335 335
336static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) 336static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
337{ 337{
338 unsigned int irq; 338 unsigned int irq;
339 int status; 339 int status;
@@ -342,14 +342,14 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
342 342
343 irq = (unsigned int)irq_map[virq].hwirq; 343 irq = (unsigned int)irq_map[virq].hwirq;
344 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) 344 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
345 return; 345 return -1;
346 346
347 status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); 347 status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
348 348
349 if (status) { 349 if (status) {
350 printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", 350 printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
351 __func__, irq, status); 351 __func__, irq, status);
352 return; 352 return -1;
353 } 353 }
354 354
355 /* 355 /*
@@ -363,7 +363,7 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
363 printk(KERN_WARNING 363 printk(KERN_WARNING
364 "%s: No online cpus in the mask %s for irq %d\n", 364 "%s: No online cpus in the mask %s for irq %d\n",
365 __func__, cpulist, virq); 365 __func__, cpulist, virq);
366 return; 366 return -1;
367 } 367 }
368 368
369 status = rtas_call(ibm_set_xive, 3, 1, NULL, 369 status = rtas_call(ibm_set_xive, 3, 1, NULL,
@@ -372,8 +372,10 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
372 if (status) { 372 if (status) {
373 printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", 373 printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
374 __func__, irq, status); 374 __func__, irq, status);
375 return; 375 return -1;
376 } 376 }
377
378 return 0;
377} 379}
378 380
379static struct irq_chip xics_pic_direct = { 381static struct irq_chip xics_pic_direct = {
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 0efc12d1a3d7..352d8c3ef526 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -807,7 +807,7 @@ static void mpic_end_ipi(unsigned int irq)
807 807
808#endif /* CONFIG_SMP */ 808#endif /* CONFIG_SMP */
809 809
810void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) 810int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
811{ 811{
812 struct mpic *mpic = mpic_from_irq(irq); 812 struct mpic *mpic = mpic_from_irq(irq);
813 unsigned int src = mpic_irq_to_hw(irq); 813 unsigned int src = mpic_irq_to_hw(irq);
@@ -824,6 +824,8 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
824 mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 824 mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
825 mpic_physmask(cpus_addr(tmp)[0])); 825 mpic_physmask(cpus_addr(tmp)[0]));
826 } 826 }
827
828 return 0;
827} 829}
828 830
829static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type) 831static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 3cef2af10f42..eff433c322a0 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
36 36
37extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type); 37extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
38extern void mpic_set_vector(unsigned int virq, unsigned int vector); 38extern void mpic_set_vector(unsigned int virq, unsigned int vector);
39extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask); 39extern int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
40 40
41#endif /* _POWERPC_SYSDEV_MPIC_H */ 41#endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h
index d3b2b4f109e3..5d84df5e27f6 100644
--- a/arch/sh/include/asm/flat.h
+++ b/arch/sh/include/asm/flat.h
@@ -12,7 +12,6 @@
12#ifndef __ASM_SH_FLAT_H 12#ifndef __ASM_SH_FLAT_H
13#define __ASM_SH_FLAT_H 13#define __ASM_SH_FLAT_H
14 14
15#define flat_stack_align(sp) /* nothing needed */
16#define flat_argvp_envp_on_stack() 0 15#define flat_argvp_envp_on_stack() 0
17#define flat_old_ram_flag(flags) (flags) 16#define flat_old_ram_flag(flags) (flags)
18#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) 17#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index 425c2f9be6d5..d42e393078c4 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -208,8 +208,9 @@ do { unsigned long new_flags = current_thread_info()->flags; \
208 else \ 208 else \
209 clear_thread_flag(TIF_ABI_PENDING); \ 209 clear_thread_flag(TIF_ABI_PENDING); \
210 /* flush_thread will update pgd cache */ \ 210 /* flush_thread will update pgd cache */ \
211 if (current->personality != PER_LINUX32) \ 211 if (personality(current->personality) != PER_LINUX32) \
212 set_personality(PER_LINUX); \ 212 set_personality(PER_LINUX | \
213 (current->personality & (~PER_MASK))); \
213} while (0) 214} while (0)
214 215
215#endif /* !(__ASM_SPARC64_ELF_H) */ 216#endif /* !(__ASM_SPARC64_ELF_H) */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 639ac805448a..65865726b283 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -102,8 +102,8 @@ struct thread_info {
102#define TI_KERN_CNTD1 0x00000488 102#define TI_KERN_CNTD1 0x00000488
103#define TI_PCR 0x00000490 103#define TI_PCR 0x00000490
104#define TI_RESTART_BLOCK 0x00000498 104#define TI_RESTART_BLOCK 0x00000498
105#define TI_KUNA_REGS 0x000004c0 105#define TI_KUNA_REGS 0x000004c8
106#define TI_KUNA_INSN 0x000004c8 106#define TI_KUNA_INSN 0x000004d0
107#define TI_FPREGS 0x00000500 107#define TI_FPREGS 0x00000500
108 108
109/* We embed this in the uppermost byte of thread_info->flags */ 109/* We embed this in the uppermost byte of thread_info->flags */
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 5deabe921a47..e5e78f9cfc95 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -318,10 +318,12 @@ static void sun4u_irq_enable(unsigned int virt_irq)
318 } 318 }
319} 319}
320 320
321static void sun4u_set_affinity(unsigned int virt_irq, 321static int sun4u_set_affinity(unsigned int virt_irq,
322 const struct cpumask *mask) 322 const struct cpumask *mask)
323{ 323{
324 sun4u_irq_enable(virt_irq); 324 sun4u_irq_enable(virt_irq);
325
326 return 0;
325} 327}
326 328
327/* Don't do anything. The desc->status check for IRQ_DISABLED in 329/* Don't do anything. The desc->status check for IRQ_DISABLED in
@@ -377,7 +379,7 @@ static void sun4v_irq_enable(unsigned int virt_irq)
377 ino, err); 379 ino, err);
378} 380}
379 381
380static void sun4v_set_affinity(unsigned int virt_irq, 382static int sun4v_set_affinity(unsigned int virt_irq,
381 const struct cpumask *mask) 383 const struct cpumask *mask)
382{ 384{
383 unsigned int ino = virt_irq_table[virt_irq].dev_ino; 385 unsigned int ino = virt_irq_table[virt_irq].dev_ino;
@@ -388,6 +390,8 @@ static void sun4v_set_affinity(unsigned int virt_irq,
388 if (err != HV_EOK) 390 if (err != HV_EOK)
389 printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): " 391 printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
390 "err(%d)\n", ino, cpuid, err); 392 "err(%d)\n", ino, cpuid, err);
393
394 return 0;
391} 395}
392 396
393static void sun4v_irq_disable(unsigned int virt_irq) 397static void sun4v_irq_disable(unsigned int virt_irq)
@@ -445,7 +449,7 @@ static void sun4v_virq_enable(unsigned int virt_irq)
445 dev_handle, dev_ino, err); 449 dev_handle, dev_ino, err);
446} 450}
447 451
448static void sun4v_virt_set_affinity(unsigned int virt_irq, 452static int sun4v_virt_set_affinity(unsigned int virt_irq,
449 const struct cpumask *mask) 453 const struct cpumask *mask)
450{ 454{
451 unsigned long cpuid, dev_handle, dev_ino; 455 unsigned long cpuid, dev_handle, dev_ino;
@@ -461,6 +465,8 @@ static void sun4v_virt_set_affinity(unsigned int virt_irq,
461 printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " 465 printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
462 "err(%d)\n", 466 "err(%d)\n",
463 dev_handle, dev_ino, cpuid, err); 467 dev_handle, dev_ino, cpuid, err);
468
469 return 0;
464} 470}
465 471
466static void sun4v_virq_disable(unsigned int virt_irq) 472static void sun4v_virq_disable(unsigned int virt_irq)
diff --git a/arch/sparc/lib/csum_copy_from_user.S b/arch/sparc/lib/csum_copy_from_user.S
index a22eddbe5dba..e0304e6a2242 100644
--- a/arch/sparc/lib/csum_copy_from_user.S
+++ b/arch/sparc/lib/csum_copy_from_user.S
@@ -5,7 +5,7 @@
5 5
6#define EX_LD(x) \ 6#define EX_LD(x) \
798: x; \ 798: x; \
8 .section .fixup; \ 8 .section .fixup, "ax"; \
9 .align 4; \ 9 .align 4; \
1099: retl; \ 1099: retl; \
11 mov -1, %o0; \ 11 mov -1, %o0; \
diff --git a/arch/sparc/lib/csum_copy_to_user.S b/arch/sparc/lib/csum_copy_to_user.S
index d5b12f441f02..afd01acc587c 100644
--- a/arch/sparc/lib/csum_copy_to_user.S
+++ b/arch/sparc/lib/csum_copy_to_user.S
@@ -5,7 +5,7 @@
5 5
6#define EX_ST(x) \ 6#define EX_ST(x) \
798: x; \ 798: x; \
8 .section .fixup; \ 8 .section .fixup,"ax"; \
9 .align 4; \ 9 .align 4; \
1099: retl; \ 1099: retl; \
11 mov -1, %o0; \ 11 mov -1, %o0; \
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
new file mode 100644
index 000000000000..ad8ec356fb36
--- /dev/null
+++ b/arch/x86/Kbuild
@@ -0,0 +1,16 @@
1
2obj-$(CONFIG_KVM) += kvm/
3
4# Xen paravirtualization support
5obj-$(CONFIG_XEN) += xen/
6
7# lguest paravirtualization support
8obj-$(CONFIG_LGUEST_GUEST) += lguest/
9
10obj-y += kernel/
11obj-y += mm/
12
13obj-y += crypto/
14obj-y += vdso/
15obj-$(CONFIG_IA32_EMULATION) += ia32/
16
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a6efe0a2e9ae..aafae3b140de 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -47,6 +47,11 @@ config X86
47 select HAVE_KERNEL_BZIP2 47 select HAVE_KERNEL_BZIP2
48 select HAVE_KERNEL_LZMA 48 select HAVE_KERNEL_LZMA
49 49
50config OUTPUT_FORMAT
51 string
52 default "elf32-i386" if X86_32
53 default "elf64-x86-64" if X86_64
54
50config ARCH_DEFCONFIG 55config ARCH_DEFCONFIG
51 string 56 string
52 default "arch/x86/configs/i386_defconfig" if X86_32 57 default "arch/x86/configs/i386_defconfig" if X86_32
@@ -274,15 +279,9 @@ config SPARSE_IRQ
274 279
275 If you don't know what to do here, say N. 280 If you don't know what to do here, say N.
276 281
277config NUMA_MIGRATE_IRQ_DESC 282config NUMA_IRQ_DESC
278 bool "Move irq desc when changing irq smp_affinity" 283 def_bool y
279 depends on SPARSE_IRQ && NUMA 284 depends on SPARSE_IRQ && NUMA
280 depends on BROKEN
281 default n
282 ---help---
283 This enables moving irq_desc to cpu/node that irq will use handled.
284
285 If you don't know what to do here, say N.
286 285
287config X86_MPPARSE 286config X86_MPPARSE
288 bool "Enable MPS table" if ACPI 287 bool "Enable MPS table" if ACPI
@@ -355,7 +354,7 @@ config X86_UV
355 depends on X86_64 354 depends on X86_64
356 depends on X86_EXTENDED_PLATFORM 355 depends on X86_EXTENDED_PLATFORM
357 depends on NUMA 356 depends on NUMA
358 select X86_X2APIC 357 depends on X86_X2APIC
359 ---help--- 358 ---help---
360 This option is needed in order to support SGI Ultraviolet systems. 359 This option is needed in order to support SGI Ultraviolet systems.
361 If you don't have one of these, you should say N here. 360 If you don't have one of these, you should say N here.
@@ -1466,9 +1465,7 @@ config KEXEC_JUMP
1466 1465
1467config PHYSICAL_START 1466config PHYSICAL_START
1468 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) 1467 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
1469 default "0x1000000" if X86_NUMAQ 1468 default "0x1000000"
1470 default "0x200000" if X86_64
1471 default "0x100000"
1472 ---help--- 1469 ---help---
1473 This gives the physical address where the kernel is loaded. 1470 This gives the physical address where the kernel is loaded.
1474 1471
@@ -1487,15 +1484,15 @@ config PHYSICAL_START
1487 to be specifically compiled to run from a specific memory area 1484 to be specifically compiled to run from a specific memory area
1488 (normally a reserved region) and this option comes handy. 1485 (normally a reserved region) and this option comes handy.
1489 1486
1490 So if you are using bzImage for capturing the crash dump, leave 1487 So if you are using bzImage for capturing the crash dump,
1491 the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y. 1488 leave the value here unchanged to 0x1000000 and set
1492 Otherwise if you plan to use vmlinux for capturing the crash dump 1489 CONFIG_RELOCATABLE=y. Otherwise if you plan to use vmlinux
1493 change this value to start of the reserved region (Typically 16MB 1490 for capturing the crash dump change this value to start of
1494 0x1000000). In other words, it can be set based on the "X" value as 1491 the reserved region. In other words, it can be set based on
1495 specified in the "crashkernel=YM@XM" command line boot parameter 1492 the "X" value as specified in the "crashkernel=YM@XM"
1496 passed to the panic-ed kernel. Typically this parameter is set as 1493 command line boot parameter passed to the panic-ed
1497 crashkernel=64M@16M. Please take a look at 1494 kernel. Please take a look at Documentation/kdump/kdump.txt
1498 Documentation/kdump/kdump.txt for more details about crash dumps. 1495 for more details about crash dumps.
1499 1496
1500 Usage of bzImage for capturing the crash dump is recommended as 1497 Usage of bzImage for capturing the crash dump is recommended as
1501 one does not have to build two kernels. Same kernel can be used 1498 one does not have to build two kernels. Same kernel can be used
@@ -1508,8 +1505,8 @@ config PHYSICAL_START
1508 Don't change this unless you know what you are doing. 1505 Don't change this unless you know what you are doing.
1509 1506
1510config RELOCATABLE 1507config RELOCATABLE
1511 bool "Build a relocatable kernel (EXPERIMENTAL)" 1508 bool "Build a relocatable kernel"
1512 depends on EXPERIMENTAL 1509 default y
1513 ---help--- 1510 ---help---
1514 This builds a kernel image that retains relocation information 1511 This builds a kernel image that retains relocation information
1515 so it can be loaded someplace besides the default 1MB. 1512 so it can be loaded someplace besides the default 1MB.
@@ -1524,12 +1521,16 @@ config RELOCATABLE
1524 it has been loaded at and the compile time physical address 1521 it has been loaded at and the compile time physical address
1525 (CONFIG_PHYSICAL_START) is ignored. 1522 (CONFIG_PHYSICAL_START) is ignored.
1526 1523
1524# Relocation on x86-32 needs some additional build support
1525config X86_NEED_RELOCS
1526 def_bool y
1527 depends on X86_32 && RELOCATABLE
1528
1527config PHYSICAL_ALIGN 1529config PHYSICAL_ALIGN
1528 hex 1530 hex
1529 prompt "Alignment value to which kernel should be aligned" if X86_32 1531 prompt "Alignment value to which kernel should be aligned" if X86_32
1530 default "0x100000" if X86_32 1532 default "0x1000000"
1531 default "0x200000" if X86_64 1533 range 0x2000 0x1000000
1532 range 0x2000 0x400000
1533 ---help--- 1534 ---help---
1534 This value puts the alignment restrictions on physical address 1535 This value puts the alignment restrictions on physical address
1535 where kernel is loaded and run from. Kernel is compiled for an 1536 where kernel is loaded and run from. Kernel is compiled for an
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8359e73317f..d105f29bb6bb 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -159,14 +159,30 @@ config IOMMU_DEBUG
159 options. See Documentation/x86_64/boot-options.txt for more 159 options. See Documentation/x86_64/boot-options.txt for more
160 details. 160 details.
161 161
162config IOMMU_STRESS
163 bool "Enable IOMMU stress-test mode"
164 ---help---
165 This option disables various optimizations in IOMMU related
166 code to do real stress testing of the IOMMU code. This option
167 will cause a performance drop and should only be enabled for
168 testing.
169
162config IOMMU_LEAK 170config IOMMU_LEAK
163 bool "IOMMU leak tracing" 171 bool "IOMMU leak tracing"
164 depends on DEBUG_KERNEL 172 depends on IOMMU_DEBUG && DMA_API_DEBUG
165 depends on IOMMU_DEBUG
166 ---help--- 173 ---help---
167 Add a simple leak tracer to the IOMMU code. This is useful when you 174 Add a simple leak tracer to the IOMMU code. This is useful when you
168 are debugging a buggy device driver that leaks IOMMU mappings. 175 are debugging a buggy device driver that leaks IOMMU mappings.
169 176
177config X86_DS_SELFTEST
178 bool "DS selftest"
179 default y
180 depends on DEBUG_KERNEL
181 depends on X86_DS
182 ---help---
183 Perform Debug Store selftests at boot time.
184 If in doubt, say "N".
185
170config HAVE_MMIOTRACE_SUPPORT 186config HAVE_MMIOTRACE_SUPPORT
171 def_bool y 187 def_bool y
172 188
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8c86b72afdc2..edbd0ca62067 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -7,8 +7,6 @@ else
7 KBUILD_DEFCONFIG := $(ARCH)_defconfig 7 KBUILD_DEFCONFIG := $(ARCH)_defconfig
8endif 8endif
9 9
10core-$(CONFIG_KVM) += arch/x86/kvm/
11
12# BITS is used as extension for files which are available in a 32 bit 10# BITS is used as extension for files which are available in a 32 bit
13# and a 64 bit version to simplify shared Makefiles. 11# and a 64 bit version to simplify shared Makefiles.
14# e.g.: obj-y += foo_$(BITS).o 12# e.g.: obj-y += foo_$(BITS).o
@@ -118,21 +116,8 @@ head-y += arch/x86/kernel/init_task.o
118 116
119libs-y += arch/x86/lib/ 117libs-y += arch/x86/lib/
120 118
121# Sub architecture files that needs linking first 119# See arch/x86/Kbuild for content of core part of the kernel
122core-y += $(fcore-y) 120core-y += arch/x86/
123
124# Xen paravirtualization support
125core-$(CONFIG_XEN) += arch/x86/xen/
126
127# lguest paravirtualization support
128core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
129
130core-y += arch/x86/kernel/
131core-y += arch/x86/mm/
132
133core-y += arch/x86/crypto/
134core-y += arch/x86/vdso/
135core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
136 121
137# drivers-y are linked after core-y 122# drivers-y are linked after core-y
138drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ 123drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 172cf8a98bdd..851fe936d242 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -3,6 +3,8 @@ bzImage
3cpustr.h 3cpustr.h
4mkcpustr 4mkcpustr
5offsets.h 5offsets.h
6voffset.h
7zoffset.h
6setup 8setup
7setup.bin 9setup.bin
8setup.elf 10setup.elf
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 6633b6e7505a..8d16ada25048 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -26,9 +26,10 @@ targets := vmlinux.bin setup.bin setup.elf bzImage
26targets += fdimage fdimage144 fdimage288 image.iso mtools.conf 26targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
27subdir- := compressed 27subdir- := compressed
28 28
29setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o 29setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o edd.o
30setup-y += header.o main.o mca.o memory.o pm.o pmjump.o 30setup-y += header.o main.o mca.o memory.o pm.o pmjump.o
31setup-y += printf.o string.o tty.o video.o video-mode.o version.o 31setup-y += printf.o regs.o string.o tty.o video.o video-mode.o
32setup-y += version.o
32setup-$(CONFIG_X86_APM_BOOT) += apm.o 33setup-$(CONFIG_X86_APM_BOOT) += apm.o
33 34
34# The link order of the video-*.o modules can matter. In particular, 35# The link order of the video-*.o modules can matter. In particular,
@@ -86,19 +87,27 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
86 87
87SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) 88SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
88 89
89sed-offsets := -e 's/^00*/0/' \ 90sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
90 -e 's/^\([0-9a-fA-F]*\) . \(input_data\|input_data_end\)$$/\#define \2 0x\1/p'
91 91
92quiet_cmd_offsets = OFFSETS $@ 92quiet_cmd_voffset = VOFFSET $@
93 cmd_offsets = $(NM) $< | sed -n $(sed-offsets) > $@ 93 cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
94 94
95$(obj)/offsets.h: $(obj)/compressed/vmlinux FORCE 95targets += voffset.h
96 $(call if_changed,offsets) 96$(obj)/voffset.h: vmlinux FORCE
97 $(call if_changed,voffset)
98
99sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
100
101quiet_cmd_zoffset = ZOFFSET $@
102 cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
103
104targets += zoffset.h
105$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
106 $(call if_changed,zoffset)
97 107
98targets += offsets.h
99 108
100AFLAGS_header.o += -I$(obj) 109AFLAGS_header.o += -I$(obj)
101$(obj)/header.o: $(obj)/offsets.h 110$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h
102 111
103LDFLAGS_setup.elf := -T 112LDFLAGS_setup.elf := -T
104$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE 113$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index 7c19ce8c2442..64a31a6d751a 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -2,7 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007-2008 rPath, Inc. - All Rights Reserved 4 * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation 5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
6 * 6 *
7 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
8 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -90,8 +90,11 @@ static int a20_test_long(void)
90 90
91static void enable_a20_bios(void) 91static void enable_a20_bios(void)
92{ 92{
93 asm volatile("pushfl; int $0x15; popfl" 93 struct biosregs ireg;
94 : : "a" ((u16)0x2401)); 94
95 initregs(&ireg);
96 ireg.ax = 0x2401;
97 intcall(0x15, &ireg, NULL);
95} 98}
96 99
97static void enable_a20_kbc(void) 100static void enable_a20_kbc(void)
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
index 7aa6033001f9..ee274834ea8b 100644
--- a/arch/x86/boot/apm.c
+++ b/arch/x86/boot/apm.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * Original APM BIOS checking by Stephen Rothwell, May 1994 7 * Original APM BIOS checking by Stephen Rothwell, May 1994
7 * (sfr@canb.auug.org.au) 8 * (sfr@canb.auug.org.au)
@@ -19,75 +20,56 @@
19 20
20int query_apm_bios(void) 21int query_apm_bios(void)
21{ 22{
22 u16 ax, bx, cx, dx, di; 23 struct biosregs ireg, oreg;
23 u32 ebx, esi;
24 u8 err;
25 24
26 /* APM BIOS installation check */ 25 /* APM BIOS installation check */
27 ax = 0x5300; 26 initregs(&ireg);
28 bx = cx = 0; 27 ireg.ah = 0x53;
29 asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" 28 intcall(0x15, &ireg, &oreg);
30 : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
31 : : "esi", "edi");
32 29
33 if (err) 30 if (oreg.flags & X86_EFLAGS_CF)
34 return -1; /* No APM BIOS */ 31 return -1; /* No APM BIOS */
35 32
36 if (bx != 0x504d) /* "PM" signature */ 33 if (oreg.bx != 0x504d) /* "PM" signature */
37 return -1; 34 return -1;
38 35
39 if (!(cx & 0x02)) /* 32 bits supported? */ 36 if (!(oreg.cx & 0x02)) /* 32 bits supported? */
40 return -1; 37 return -1;
41 38
42 /* Disconnect first, just in case */ 39 /* Disconnect first, just in case */
43 ax = 0x5304; 40 ireg.al = 0x04;
44 bx = 0; 41 intcall(0x15, &ireg, NULL);
45 asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
46 : "+a" (ax), "+b" (bx)
47 : : "ecx", "edx", "esi", "edi");
48
49 /* Paranoia */
50 ebx = esi = 0;
51 cx = dx = di = 0;
52 42
53 /* 32-bit connect */ 43 /* 32-bit connect */
54 asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6" 44 ireg.al = 0x03;
55 : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx), 45 intcall(0x15, &ireg, &oreg);
56 "+S" (esi), "+D" (di), "=m" (err) 46
57 : "a" (0x5303)); 47 boot_params.apm_bios_info.cseg = oreg.ax;
58 48 boot_params.apm_bios_info.offset = oreg.ebx;
59 boot_params.apm_bios_info.cseg = ax; 49 boot_params.apm_bios_info.cseg_16 = oreg.cx;
60 boot_params.apm_bios_info.offset = ebx; 50 boot_params.apm_bios_info.dseg = oreg.dx;
61 boot_params.apm_bios_info.cseg_16 = cx; 51 boot_params.apm_bios_info.cseg_len = oreg.si;
62 boot_params.apm_bios_info.dseg = dx; 52 boot_params.apm_bios_info.cseg_16_len = oreg.hsi;
63 boot_params.apm_bios_info.cseg_len = (u16)esi; 53 boot_params.apm_bios_info.dseg_len = oreg.di;
64 boot_params.apm_bios_info.cseg_16_len = esi >> 16; 54
65 boot_params.apm_bios_info.dseg_len = di; 55 if (oreg.flags & X86_EFLAGS_CF)
66
67 if (err)
68 return -1; 56 return -1;
69 57
70 /* Redo the installation check as the 32-bit connect; 58 /* Redo the installation check as the 32-bit connect;
71 some BIOSes return different flags this way... */ 59 some BIOSes return different flags this way... */
72 60
73 ax = 0x5300; 61 ireg.al = 0x00;
74 bx = cx = 0; 62 intcall(0x15, &ireg, &oreg);
75 asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
76 : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
77 : : "esi", "edi");
78 63
79 if (err || bx != 0x504d) { 64 if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) {
80 /* Failure with 32-bit connect, try to disconect and ignore */ 65 /* Failure with 32-bit connect, try to disconect and ignore */
81 ax = 0x5304; 66 ireg.al = 0x04;
82 bx = 0; 67 intcall(0x15, &ireg, NULL);
83 asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
84 : "+a" (ax), "+b" (bx)
85 : : "ecx", "edx", "esi", "edi");
86 return -1; 68 return -1;
87 } 69 }
88 70
89 boot_params.apm_bios_info.version = ax; 71 boot_params.apm_bios_info.version = oreg.ax;
90 boot_params.apm_bios_info.flags = cx; 72 boot_params.apm_bios_info.flags = oreg.cx;
91 return 0; 73 return 0;
92} 74}
93 75
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
new file mode 100644
index 000000000000..507793739ea5
--- /dev/null
+++ b/arch/x86/boot/bioscall.S
@@ -0,0 +1,82 @@
1/* -----------------------------------------------------------------------
2 *
3 * Copyright 2009 Intel Corporation; author H. Peter Anvin
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * "Glove box" for BIOS calls. Avoids the constant problems with BIOSes
13 * touching registers they shouldn't be.
14 */
15
16 .code16
17 .text
18 .globl intcall
19 .type intcall, @function
20intcall:
21 /* Self-modify the INT instruction. Ugly, but works. */
22 cmpb %al, 3f
23 je 1f
24 movb %al, 3f
25 jmp 1f /* Synchronize pipeline */
261:
27 /* Save state */
28 pushfl
29 pushw %fs
30 pushw %gs
31 pushal
32
33 /* Copy input state to stack frame */
34 subw $44, %sp
35 movw %dx, %si
36 movw %sp, %di
37 movw $11, %cx
38 rep; movsd
39
40 /* Pop full state from the stack */
41 popal
42 popw %gs
43 popw %fs
44 popw %es
45 popw %ds
46 popfl
47
48 /* Actual INT */
49 .byte 0xcd /* INT opcode */
503: .byte 0
51
52 /* Push full state to the stack */
53 pushfl
54 pushw %ds
55 pushw %es
56 pushw %fs
57 pushw %gs
58 pushal
59
60 /* Re-establish C environment invariants */
61 cld
62 movzwl %sp, %esp
63 movw %cs, %ax
64 movw %ax, %ds
65 movw %ax, %es
66
67 /* Copy output state from stack frame */
68 movw 68(%esp), %di /* Original %cx == 3rd argument */
69 andw %di, %di
70 jz 4f
71 movw %sp, %si
72 movw $11, %cx
73 rep; movsd
744: addw $44, %sp
75
76 /* Restore state and return */
77 popal
78 popw %gs
79 popw %fs
80 popfl
81 retl
82 .size intcall, .-intcall
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 7b2692e897e5..98239d2658f2 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -26,6 +27,7 @@
26#include <asm/setup.h> 27#include <asm/setup.h>
27#include "bitops.h" 28#include "bitops.h"
28#include <asm/cpufeature.h> 29#include <asm/cpufeature.h>
30#include <asm/processor-flags.h>
29 31
30/* Useful macros */ 32/* Useful macros */
31#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) 33#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@@ -241,6 +243,49 @@ int enable_a20(void);
241/* apm.c */ 243/* apm.c */
242int query_apm_bios(void); 244int query_apm_bios(void);
243 245
246/* bioscall.c */
247struct biosregs {
248 union {
249 struct {
250 u32 edi;
251 u32 esi;
252 u32 ebp;
253 u32 _esp;
254 u32 ebx;
255 u32 edx;
256 u32 ecx;
257 u32 eax;
258 u32 _fsgs;
259 u32 _dses;
260 u32 eflags;
261 };
262 struct {
263 u16 di, hdi;
264 u16 si, hsi;
265 u16 bp, hbp;
266 u16 _sp, _hsp;
267 u16 bx, hbx;
268 u16 dx, hdx;
269 u16 cx, hcx;
270 u16 ax, hax;
271 u16 gs, fs;
272 u16 es, ds;
273 u16 flags, hflags;
274 };
275 struct {
276 u8 dil, dih, edi2, edi3;
277 u8 sil, sih, esi2, esi3;
278 u8 bpl, bph, ebp2, ebp3;
279 u8 _spl, _sph, _esp2, _esp3;
280 u8 bl, bh, ebx2, ebx3;
281 u8 dl, dh, edx2, edx3;
282 u8 cl, ch, ecx2, ecx3;
283 u8 al, ah, eax2, eax3;
284 };
285 };
286};
287void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
288
244/* cmdline.c */ 289/* cmdline.c */
245int cmdline_find_option(const char *option, char *buffer, int bufsize); 290int cmdline_find_option(const char *option, char *buffer, int bufsize);
246int cmdline_find_option_bool(const char *option); 291int cmdline_find_option_bool(const char *option);
@@ -279,6 +324,9 @@ int sprintf(char *buf, const char *fmt, ...);
279int vsprintf(char *buf, const char *fmt, va_list args); 324int vsprintf(char *buf, const char *fmt, va_list args);
280int printf(const char *fmt, ...); 325int printf(const char *fmt, ...);
281 326
327/* regs.c */
328void initregs(struct biosregs *regs);
329
282/* string.c */ 330/* string.c */
283int strcmp(const char *str1, const char *str2); 331int strcmp(const char *str1, const char *str2);
284size_t strnlen(const char *s, size_t maxlen); 332size_t strnlen(const char *s, size_t maxlen);
diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore
index 63eff3b04d01..4a46fab7162e 100644
--- a/arch/x86/boot/compressed/.gitignore
+++ b/arch/x86/boot/compressed/.gitignore
@@ -1,3 +1,6 @@
1relocs 1relocs
2vmlinux.bin.all 2vmlinux.bin.all
3vmlinux.relocs 3vmlinux.relocs
4vmlinux.lds
5mkpiggy
6piggy.S
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 65551c9f8571..49c8a4c37d7c 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -19,7 +19,9 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
19LDFLAGS := -m elf_$(UTS_MACHINE) 19LDFLAGS := -m elf_$(UTS_MACHINE)
20LDFLAGS_vmlinux := -T 20LDFLAGS_vmlinux := -T
21 21
22$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE 22hostprogs-y := mkpiggy
23
24$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
23 $(call if_changed,ld) 25 $(call if_changed,ld)
24 @: 26 @:
25 27
@@ -29,7 +31,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE
29 31
30 32
31targets += vmlinux.bin.all vmlinux.relocs relocs 33targets += vmlinux.bin.all vmlinux.relocs relocs
32hostprogs-$(CONFIG_X86_32) += relocs 34hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs
33 35
34quiet_cmd_relocs = RELOCS $@ 36quiet_cmd_relocs = RELOCS $@
35 cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< 37 cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
@@ -37,46 +39,22 @@ $(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
37 $(call if_changed,relocs) 39 $(call if_changed,relocs)
38 40
39vmlinux.bin.all-y := $(obj)/vmlinux.bin 41vmlinux.bin.all-y := $(obj)/vmlinux.bin
40vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs 42vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
41quiet_cmd_relocbin = BUILD $@
42 cmd_relocbin = cat $(filter-out FORCE,$^) > $@
43$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
44 $(call if_changed,relocbin)
45
46ifeq ($(CONFIG_X86_32),y)
47 43
48ifdef CONFIG_RELOCATABLE 44$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
49$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
50 $(call if_changed,gzip)
51$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE
52 $(call if_changed,bzip2)
53$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE
54 $(call if_changed,lzma)
55else
56$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
57 $(call if_changed,gzip) 45 $(call if_changed,gzip)
58$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE 46$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
59 $(call if_changed,bzip2) 47 $(call if_changed,bzip2)
60$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE 48$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
61 $(call if_changed,lzma) 49 $(call if_changed,lzma)
62endif
63LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
64 50
65else 51suffix-$(CONFIG_KERNEL_GZIP) := gz
52suffix-$(CONFIG_KERNEL_BZIP2) := bz2
53suffix-$(CONFIG_KERNEL_LZMA) := lzma
66 54
67$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE 55quiet_cmd_mkpiggy = MKPIGGY $@
68 $(call if_changed,gzip) 56 cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
69$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
70 $(call if_changed,bzip2)
71$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
72 $(call if_changed,lzma)
73
74LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
75endif
76 57
77suffix_$(CONFIG_KERNEL_GZIP) = gz 58targets += piggy.S
78suffix_$(CONFIG_KERNEL_BZIP2) = bz2 59$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
79suffix_$(CONFIG_KERNEL_LZMA) = lzma 60 $(call if_changed,mkpiggy)
80
81$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE
82 $(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 3a8a866fb2e2..75e4f001e706 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -12,16 +12,16 @@
12 * the page directory. [According to comments etc elsewhere on a compressed 12 * the page directory. [According to comments etc elsewhere on a compressed
13 * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] 13 * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
14 * 14 *
15 * Page 0 is deliberately kept safe, since System Management Mode code in 15 * Page 0 is deliberately kept safe, since System Management Mode code in
16 * laptops may need to access the BIOS data stored there. This is also 16 * laptops may need to access the BIOS data stored there. This is also
17 * useful for future device drivers that either access the BIOS via VM86 17 * useful for future device drivers that either access the BIOS via VM86
18 * mode. 18 * mode.
19 */ 19 */
20 20
21/* 21/*
22 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 22 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
23 */ 23 */
24.text 24 .text
25 25
26#include <linux/linkage.h> 26#include <linux/linkage.h>
27#include <asm/segment.h> 27#include <asm/segment.h>
@@ -29,161 +29,151 @@
29#include <asm/boot.h> 29#include <asm/boot.h>
30#include <asm/asm-offsets.h> 30#include <asm/asm-offsets.h>
31 31
32.section ".text.head","ax",@progbits 32 .section ".text.head","ax",@progbits
33ENTRY(startup_32) 33ENTRY(startup_32)
34 cld 34 cld
35 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 35 /*
36 * us to not reload segments */ 36 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
37 testb $(1<<6), BP_loadflags(%esi) 37 * us to not reload segments
38 jnz 1f 38 */
39 testb $(1<<6), BP_loadflags(%esi)
40 jnz 1f
39 41
40 cli 42 cli
41 movl $(__BOOT_DS),%eax 43 movl $__BOOT_DS, %eax
42 movl %eax,%ds 44 movl %eax, %ds
43 movl %eax,%es 45 movl %eax, %es
44 movl %eax,%fs 46 movl %eax, %fs
45 movl %eax,%gs 47 movl %eax, %gs
46 movl %eax,%ss 48 movl %eax, %ss
471: 491:
48 50
49/* Calculate the delta between where we were compiled to run 51/*
52 * Calculate the delta between where we were compiled to run
50 * at and where we were actually loaded at. This can only be done 53 * at and where we were actually loaded at. This can only be done
51 * with a short local call on x86. Nothing else will tell us what 54 * with a short local call on x86. Nothing else will tell us what
52 * address we are running at. The reserved chunk of the real-mode 55 * address we are running at. The reserved chunk of the real-mode
53 * data at 0x1e4 (defined as a scratch field) are used as the stack 56 * data at 0x1e4 (defined as a scratch field) are used as the stack
54 * for this calculation. Only 4 bytes are needed. 57 * for this calculation. Only 4 bytes are needed.
55 */ 58 */
56 leal (0x1e4+4)(%esi), %esp 59 leal (BP_scratch+4)(%esi), %esp
57 call 1f 60 call 1f
581: popl %ebp 611: popl %ebp
59 subl $1b, %ebp 62 subl $1b, %ebp
60 63
61/* %ebp contains the address we are loaded at by the boot loader and %ebx 64/*
65 * %ebp contains the address we are loaded at by the boot loader and %ebx
62 * contains the address where we should move the kernel image temporarily 66 * contains the address where we should move the kernel image temporarily
63 * for safe in-place decompression. 67 * for safe in-place decompression.
64 */ 68 */
65 69
66#ifdef CONFIG_RELOCATABLE 70#ifdef CONFIG_RELOCATABLE
67 movl %ebp, %ebx 71 movl %ebp, %ebx
68 addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx 72 movl BP_kernel_alignment(%esi), %eax
69 andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx 73 decl %eax
74 addl %eax, %ebx
75 notl %eax
76 andl %eax, %ebx
70#else 77#else
71 movl $LOAD_PHYSICAL_ADDR, %ebx 78 movl $LOAD_PHYSICAL_ADDR, %ebx
72#endif 79#endif
73 80
74 /* Replace the compressed data size with the uncompressed size */ 81 /* Target address to relocate to for decompression */
75 subl input_len(%ebp), %ebx 82 addl $z_extract_offset, %ebx
76 movl output_len(%ebp), %eax 83
77 addl %eax, %ebx 84 /* Set up the stack */
78 /* Add 8 bytes for every 32K input block */ 85 leal boot_stack_end(%ebx), %esp
79 shrl $12, %eax 86
80 addl %eax, %ebx 87 /* Zero EFLAGS */
81 /* Add 32K + 18 bytes of extra slack */ 88 pushl $0
82 addl $(32768 + 18), %ebx 89 popfl
83 /* Align on a 4K boundary */ 90
84 addl $4095, %ebx 91/*
85 andl $~4095, %ebx 92 * Copy the compressed kernel to the end of our buffer
86
87/* Copy the compressed kernel to the end of our buffer
88 * where decompression in place becomes safe. 93 * where decompression in place becomes safe.
89 */ 94 */
90 pushl %esi 95 pushl %esi
91 leal _end(%ebp), %esi 96 leal (_bss-4)(%ebp), %esi
92 leal _end(%ebx), %edi 97 leal (_bss-4)(%ebx), %edi
93 movl $(_end - startup_32), %ecx 98 movl $(_bss - startup_32), %ecx
99 shrl $2, %ecx
94 std 100 std
95 rep 101 rep movsl
96 movsb
97 cld 102 cld
98 popl %esi 103 popl %esi
99
100/* Compute the kernel start address.
101 */
102#ifdef CONFIG_RELOCATABLE
103 addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp
104 andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp
105#else
106 movl $LOAD_PHYSICAL_ADDR, %ebp
107#endif
108 104
109/* 105/*
110 * Jump to the relocated address. 106 * Jump to the relocated address.
111 */ 107 */
112 leal relocated(%ebx), %eax 108 leal relocated(%ebx), %eax
113 jmp *%eax 109 jmp *%eax
114ENDPROC(startup_32) 110ENDPROC(startup_32)
115 111
116.section ".text" 112 .text
117relocated: 113relocated:
118 114
119/* 115/*
120 * Clear BSS 116 * Clear BSS (stack is currently empty)
121 */
122 xorl %eax,%eax
123 leal _edata(%ebx),%edi
124 leal _end(%ebx), %ecx
125 subl %edi,%ecx
126 cld
127 rep
128 stosb
129
130/*
131 * Setup the stack for the decompressor
132 */ 117 */
133 leal boot_stack_end(%ebx), %esp 118 xorl %eax, %eax
119 leal _bss(%ebx), %edi
120 leal _ebss(%ebx), %ecx
121 subl %edi, %ecx
122 shrl $2, %ecx
123 rep stosl
134 124
135/* 125/*
136 * Do the decompression, and jump to the new kernel.. 126 * Do the decompression, and jump to the new kernel..
137 */ 127 */
138 movl output_len(%ebx), %eax 128 leal z_extract_offset_negative(%ebx), %ebp
139 pushl %eax 129 /* push arguments for decompress_kernel: */
140 # push arguments for decompress_kernel: 130 pushl %ebp /* output address */
141 pushl %ebp # output address 131 pushl $z_input_len /* input_len */
142 movl input_len(%ebx), %eax 132 leal input_data(%ebx), %eax
143 pushl %eax # input_len 133 pushl %eax /* input_data */
144 leal input_data(%ebx), %eax 134 leal boot_heap(%ebx), %eax
145 pushl %eax # input_data 135 pushl %eax /* heap area */
146 leal boot_heap(%ebx), %eax 136 pushl %esi /* real mode pointer */
147 pushl %eax # heap area 137 call decompress_kernel
148 pushl %esi # real mode pointer 138 addl $20, %esp
149 call decompress_kernel
150 addl $20, %esp
151 popl %ecx
152 139
153#if CONFIG_RELOCATABLE 140#if CONFIG_RELOCATABLE
154/* Find the address of the relocations. 141/*
142 * Find the address of the relocations.
155 */ 143 */
156 movl %ebp, %edi 144 leal z_output_len(%ebp), %edi
157 addl %ecx, %edi
158 145
159/* Calculate the delta between where vmlinux was compiled to run 146/*
147 * Calculate the delta between where vmlinux was compiled to run
160 * and where it was actually loaded. 148 * and where it was actually loaded.
161 */ 149 */
162 movl %ebp, %ebx 150 movl %ebp, %ebx
163 subl $LOAD_PHYSICAL_ADDR, %ebx 151 subl $LOAD_PHYSICAL_ADDR, %ebx
164 jz 2f /* Nothing to be done if loaded at compiled addr. */ 152 jz 2f /* Nothing to be done if loaded at compiled addr. */
165/* 153/*
166 * Process relocations. 154 * Process relocations.
167 */ 155 */
168 156
1691: subl $4, %edi 1571: subl $4, %edi
170 movl 0(%edi), %ecx 158 movl (%edi), %ecx
171 testl %ecx, %ecx 159 testl %ecx, %ecx
172 jz 2f 160 jz 2f
173 addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) 161 addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
174 jmp 1b 162 jmp 1b
1752: 1632:
176#endif 164#endif
177 165
178/* 166/*
179 * Jump to the decompressed kernel. 167 * Jump to the decompressed kernel.
180 */ 168 */
181 xorl %ebx,%ebx 169 xorl %ebx, %ebx
182 jmp *%ebp 170 jmp *%ebp
183 171
184.bss 172/*
185/* Stack and heap for uncompression */ 173 * Stack and heap for uncompression
186.balign 4 174 */
175 .bss
176 .balign 4
187boot_heap: 177boot_heap:
188 .fill BOOT_HEAP_SIZE, 1, 0 178 .fill BOOT_HEAP_SIZE, 1, 0
189boot_stack: 179boot_stack:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index ed4a82948002..f62c284db9eb 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -21,8 +21,8 @@
21/* 21/*
22 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 22 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
23 */ 23 */
24.code32 24 .code32
25.text 25 .text
26 26
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <asm/segment.h> 28#include <asm/segment.h>
@@ -33,12 +33,14 @@
33#include <asm/processor-flags.h> 33#include <asm/processor-flags.h>
34#include <asm/asm-offsets.h> 34#include <asm/asm-offsets.h>
35 35
36.section ".text.head" 36 .section ".text.head"
37 .code32 37 .code32
38ENTRY(startup_32) 38ENTRY(startup_32)
39 cld 39 cld
40 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 40 /*
41 * us to not reload segments */ 41 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
42 * us to not reload segments
43 */
42 testb $(1<<6), BP_loadflags(%esi) 44 testb $(1<<6), BP_loadflags(%esi)
43 jnz 1f 45 jnz 1f
44 46
@@ -49,14 +51,15 @@ ENTRY(startup_32)
49 movl %eax, %ss 51 movl %eax, %ss
501: 521:
51 53
52/* Calculate the delta between where we were compiled to run 54/*
55 * Calculate the delta between where we were compiled to run
53 * at and where we were actually loaded at. This can only be done 56 * at and where we were actually loaded at. This can only be done
54 * with a short local call on x86. Nothing else will tell us what 57 * with a short local call on x86. Nothing else will tell us what
55 * address we are running at. The reserved chunk of the real-mode 58 * address we are running at. The reserved chunk of the real-mode
56 * data at 0x1e4 (defined as a scratch field) are used as the stack 59 * data at 0x1e4 (defined as a scratch field) are used as the stack
57 * for this calculation. Only 4 bytes are needed. 60 * for this calculation. Only 4 bytes are needed.
58 */ 61 */
59 leal (0x1e4+4)(%esi), %esp 62 leal (BP_scratch+4)(%esi), %esp
60 call 1f 63 call 1f
611: popl %ebp 641: popl %ebp
62 subl $1b, %ebp 65 subl $1b, %ebp
@@ -70,32 +73,28 @@ ENTRY(startup_32)
70 testl %eax, %eax 73 testl %eax, %eax
71 jnz no_longmode 74 jnz no_longmode
72 75
73/* Compute the delta between where we were compiled to run at 76/*
77 * Compute the delta between where we were compiled to run at
74 * and where the code will actually run at. 78 * and where the code will actually run at.
75 */ 79 *
76/* %ebp contains the address we are loaded at by the boot loader and %ebx 80 * %ebp contains the address we are loaded at by the boot loader and %ebx
77 * contains the address where we should move the kernel image temporarily 81 * contains the address where we should move the kernel image temporarily
78 * for safe in-place decompression. 82 * for safe in-place decompression.
79 */ 83 */
80 84
81#ifdef CONFIG_RELOCATABLE 85#ifdef CONFIG_RELOCATABLE
82 movl %ebp, %ebx 86 movl %ebp, %ebx
83 addl $(PMD_PAGE_SIZE -1), %ebx 87 movl BP_kernel_alignment(%esi), %eax
84 andl $PMD_PAGE_MASK, %ebx 88 decl %eax
89 addl %eax, %ebx
90 notl %eax
91 andl %eax, %ebx
85#else 92#else
86 movl $CONFIG_PHYSICAL_START, %ebx 93 movl $LOAD_PHYSICAL_ADDR, %ebx
87#endif 94#endif
88 95
89 /* Replace the compressed data size with the uncompressed size */ 96 /* Target address to relocate to for decompression */
90 subl input_len(%ebp), %ebx 97 addl $z_extract_offset, %ebx
91 movl output_len(%ebp), %eax
92 addl %eax, %ebx
93 /* Add 8 bytes for every 32K input block */
94 shrl $12, %eax
95 addl %eax, %ebx
96 /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
97 addl $(32768 + 18 + 4095), %ebx
98 andl $~4095, %ebx
99 98
100/* 99/*
101 * Prepare for entering 64 bit mode 100 * Prepare for entering 64 bit mode
@@ -114,7 +113,7 @@ ENTRY(startup_32)
114 /* 113 /*
115 * Build early 4G boot pagetable 114 * Build early 4G boot pagetable
116 */ 115 */
117 /* Initialize Page tables to 0*/ 116 /* Initialize Page tables to 0 */
118 leal pgtable(%ebx), %edi 117 leal pgtable(%ebx), %edi
119 xorl %eax, %eax 118 xorl %eax, %eax
120 movl $((4096*6)/4), %ecx 119 movl $((4096*6)/4), %ecx
@@ -155,7 +154,8 @@ ENTRY(startup_32)
155 btsl $_EFER_LME, %eax 154 btsl $_EFER_LME, %eax
156 wrmsr 155 wrmsr
157 156
158 /* Setup for the jump to 64bit mode 157 /*
158 * Setup for the jump to 64bit mode
159 * 159 *
160 * When the jump is performend we will be in long mode but 160 * When the jump is performend we will be in long mode but
161 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 161 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
@@ -184,7 +184,8 @@ no_longmode:
184 184
185#include "../../kernel/verify_cpu_64.S" 185#include "../../kernel/verify_cpu_64.S"
186 186
187 /* Be careful here startup_64 needs to be at a predictable 187 /*
188 * Be careful here startup_64 needs to be at a predictable
188 * address so I can export it in an ELF header. Bootloaders 189 * address so I can export it in an ELF header. Bootloaders
189 * should look at the ELF header to find this address, as 190 * should look at the ELF header to find this address, as
190 * it may change in the future. 191 * it may change in the future.
@@ -192,7 +193,8 @@ no_longmode:
192 .code64 193 .code64
193 .org 0x200 194 .org 0x200
194ENTRY(startup_64) 195ENTRY(startup_64)
195 /* We come here either from startup_32 or directly from a 196 /*
197 * We come here either from startup_32 or directly from a
196 * 64bit bootloader. If we come here from a bootloader we depend on 198 * 64bit bootloader. If we come here from a bootloader we depend on
197 * an identity mapped page table being provied that maps our 199 * an identity mapped page table being provied that maps our
198 * entire text+data+bss and hopefully all of memory. 200 * entire text+data+bss and hopefully all of memory.
@@ -209,50 +211,54 @@ ENTRY(startup_64)
209 movl $0x20, %eax 211 movl $0x20, %eax
210 ltr %ax 212 ltr %ax
211 213
212 /* Compute the decompressed kernel start address. It is where 214 /*
215 * Compute the decompressed kernel start address. It is where
213 * we were loaded at aligned to a 2M boundary. %rbp contains the 216 * we were loaded at aligned to a 2M boundary. %rbp contains the
214 * decompressed kernel start address. 217 * decompressed kernel start address.
215 * 218 *
216 * If it is a relocatable kernel then decompress and run the kernel 219 * If it is a relocatable kernel then decompress and run the kernel
217 * from load address aligned to 2MB addr, otherwise decompress and 220 * from load address aligned to 2MB addr, otherwise decompress and
218 * run the kernel from CONFIG_PHYSICAL_START 221 * run the kernel from LOAD_PHYSICAL_ADDR
222 *
223 * We cannot rely on the calculation done in 32-bit mode, since we
224 * may have been invoked via the 64-bit entry point.
219 */ 225 */
220 226
221 /* Start with the delta to where the kernel will run at. */ 227 /* Start with the delta to where the kernel will run at. */
222#ifdef CONFIG_RELOCATABLE 228#ifdef CONFIG_RELOCATABLE
223 leaq startup_32(%rip) /* - $startup_32 */, %rbp 229 leaq startup_32(%rip) /* - $startup_32 */, %rbp
224 addq $(PMD_PAGE_SIZE - 1), %rbp 230 movl BP_kernel_alignment(%rsi), %eax
225 andq $PMD_PAGE_MASK, %rbp 231 decl %eax
226 movq %rbp, %rbx 232 addq %rax, %rbp
233 notq %rax
234 andq %rax, %rbp
227#else 235#else
228 movq $CONFIG_PHYSICAL_START, %rbp 236 movq $LOAD_PHYSICAL_ADDR, %rbp
229 movq %rbp, %rbx
230#endif 237#endif
231 238
232 /* Replace the compressed data size with the uncompressed size */ 239 /* Target address to relocate to for decompression */
233 movl input_len(%rip), %eax 240 leaq z_extract_offset(%rbp), %rbx
234 subq %rax, %rbx 241
235 movl output_len(%rip), %eax 242 /* Set up the stack */
236 addq %rax, %rbx 243 leaq boot_stack_end(%rbx), %rsp
237 /* Add 8 bytes for every 32K input block */ 244
238 shrq $12, %rax 245 /* Zero EFLAGS */
239 addq %rax, %rbx 246 pushq $0
240 /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */ 247 popfq
241 addq $(32768 + 18 + 4095), %rbx 248
242 andq $~4095, %rbx 249/*
243 250 * Copy the compressed kernel to the end of our buffer
244/* Copy the compressed kernel to the end of our buffer
245 * where decompression in place becomes safe. 251 * where decompression in place becomes safe.
246 */ 252 */
247 leaq _end_before_pgt(%rip), %r8 253 pushq %rsi
248 leaq _end_before_pgt(%rbx), %r9 254 leaq (_bss-8)(%rip), %rsi
249 movq $_end_before_pgt /* - $startup_32 */, %rcx 255 leaq (_bss-8)(%rbx), %rdi
2501: subq $8, %r8 256 movq $_bss /* - $startup_32 */, %rcx
251 subq $8, %r9 257 shrq $3, %rcx
252 movq 0(%r8), %rax 258 std
253 movq %rax, 0(%r9) 259 rep movsq
254 subq $8, %rcx 260 cld
255 jnz 1b 261 popq %rsi
256 262
257/* 263/*
258 * Jump to the relocated address. 264 * Jump to the relocated address.
@@ -260,37 +266,28 @@ ENTRY(startup_64)
260 leaq relocated(%rbx), %rax 266 leaq relocated(%rbx), %rax
261 jmp *%rax 267 jmp *%rax
262 268
263.section ".text" 269 .text
264relocated: 270relocated:
265 271
266/* 272/*
267 * Clear BSS 273 * Clear BSS (stack is currently empty)
268 */ 274 */
269 xorq %rax, %rax 275 xorl %eax, %eax
270 leaq _edata(%rbx), %rdi 276 leaq _bss(%rip), %rdi
271 leaq _end_before_pgt(%rbx), %rcx 277 leaq _ebss(%rip), %rcx
272 subq %rdi, %rcx 278 subq %rdi, %rcx
273 cld 279 shrq $3, %rcx
274 rep 280 rep stosq
275 stosb
276
277 /* Setup the stack */
278 leaq boot_stack_end(%rip), %rsp
279
280 /* zero EFLAGS after setting rsp */
281 pushq $0
282 popfq
283 281
284/* 282/*
285 * Do the decompression, and jump to the new kernel.. 283 * Do the decompression, and jump to the new kernel..
286 */ 284 */
287 pushq %rsi # Save the real mode argument 285 pushq %rsi /* Save the real mode argument */
288 movq %rsi, %rdi # real mode address 286 movq %rsi, %rdi /* real mode address */
289 leaq boot_heap(%rip), %rsi # malloc area for uncompression 287 leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
290 leaq input_data(%rip), %rdx # input_data 288 leaq input_data(%rip), %rdx /* input_data */
291 movl input_len(%rip), %eax 289 movl $z_input_len, %ecx /* input_len */
292 movq %rax, %rcx # input_len 290 movq %rbp, %r8 /* output target address */
293 movq %rbp, %r8 # output
294 call decompress_kernel 291 call decompress_kernel
295 popq %rsi 292 popq %rsi
296 293
@@ -311,11 +308,21 @@ gdt:
311 .quad 0x0000000000000000 /* TS continued */ 308 .quad 0x0000000000000000 /* TS continued */
312gdt_end: 309gdt_end:
313 310
314.bss 311/*
315/* Stack and heap for uncompression */ 312 * Stack and heap for uncompression
316.balign 4 313 */
314 .bss
315 .balign 4
317boot_heap: 316boot_heap:
318 .fill BOOT_HEAP_SIZE, 1, 0 317 .fill BOOT_HEAP_SIZE, 1, 0
319boot_stack: 318boot_stack:
320 .fill BOOT_STACK_SIZE, 1, 0 319 .fill BOOT_STACK_SIZE, 1, 0
321boot_stack_end: 320boot_stack_end:
321
322/*
323 * Space for page tables (not in .bss so not zeroed)
324 */
325 .section ".pgtable","a",@nobits
326 .balign 4096
327pgtable:
328 .fill 6*4096, 1, 0
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index e45be73684ff..842b2a36174a 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -325,21 +325,19 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
325 free_mem_ptr = heap; /* Heap */ 325 free_mem_ptr = heap; /* Heap */
326 free_mem_end_ptr = heap + BOOT_HEAP_SIZE; 326 free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
327 327
328 if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
329 error("Destination address inappropriately aligned");
328#ifdef CONFIG_X86_64 330#ifdef CONFIG_X86_64
329 if ((unsigned long)output & (__KERNEL_ALIGN - 1)) 331 if (heap > 0x3fffffffffffUL)
330 error("Destination address not 2M aligned");
331 if ((unsigned long)output >= 0xffffffffffUL)
332 error("Destination address too large"); 332 error("Destination address too large");
333#else 333#else
334 if ((u32)output & (CONFIG_PHYSICAL_ALIGN - 1))
335 error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
336 if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) 334 if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
337 error("Destination address too large"); 335 error("Destination address too large");
336#endif
338#ifndef CONFIG_RELOCATABLE 337#ifndef CONFIG_RELOCATABLE
339 if ((u32)output != LOAD_PHYSICAL_ADDR) 338 if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
340 error("Wrong destination address"); 339 error("Wrong destination address");
341#endif 340#endif
342#endif
343 341
344 if (!quiet) 342 if (!quiet)
345 putstr("\nDecompressing Linux... "); 343 putstr("\nDecompressing Linux... ");
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
new file mode 100644
index 000000000000..bcbd36c41432
--- /dev/null
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -0,0 +1,97 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright (C) 2009 Intel Corporation. All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License version
7 * 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 * H. Peter Anvin <hpa@linux.intel.com>
20 *
21 * ----------------------------------------------------------------------- */
22
23/*
24 * Compute the desired load offset from a compressed program; outputs
25 * a small assembly wrapper with the appropriate symbols defined.
26 */
27
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <inttypes.h>
32
33static uint32_t getle32(const void *p)
34{
35 const uint8_t *cp = p;
36
37 return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) +
38 ((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24);
39}
40
41int main(int argc, char *argv[])
42{
43 uint32_t olen;
44 long ilen;
45 unsigned long offs;
46 FILE *f;
47
48 if (argc < 2) {
49 fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
50 return 1;
51 }
52
53 /* Get the information for the compressed kernel image first */
54
55 f = fopen(argv[1], "r");
56 if (!f) {
57 perror(argv[1]);
58 return 1;
59 }
60
61
62 if (fseek(f, -4L, SEEK_END)) {
63 perror(argv[1]);
64 }
65 fread(&olen, sizeof olen, 1, f);
66 ilen = ftell(f);
67 olen = getle32(&olen);
68 fclose(f);
69
70 /*
71 * Now we have the input (compressed) and output (uncompressed)
72 * sizes, compute the necessary decompression offset...
73 */
74
75 offs = (olen > ilen) ? olen - ilen : 0;
76 offs += olen >> 12; /* Add 8 bytes for each 32K block */
77 offs += 32*1024 + 18; /* Add 32K + 18 bytes slack */
78 offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
79
80 printf(".section \".rodata.compressed\",\"a\",@progbits\n");
81 printf(".globl z_input_len\n");
82 printf("z_input_len = %lu\n", ilen);
83 printf(".globl z_output_len\n");
84 printf("z_output_len = %lu\n", (unsigned long)olen);
85 printf(".globl z_extract_offset\n");
86 printf("z_extract_offset = 0x%lx\n", offs);
87 /* z_extract_offset_negative allows simplification of head_32.S */
88 printf(".globl z_extract_offset_negative\n");
89 printf("z_extract_offset_negative = -0x%lx\n", offs);
90
91 printf(".globl input_data, input_data_end\n");
92 printf("input_data:\n");
93 printf(".incbin \"%s\"\n", argv[1]);
94 printf("input_data_end:\n");
95
96 return 0;
97}
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index 857e492c571e..bbeb0c3fbd90 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -504,8 +504,11 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
504 if (sym->st_shndx == SHN_ABS) { 504 if (sym->st_shndx == SHN_ABS) {
505 continue; 505 continue;
506 } 506 }
507 if (r_type == R_386_PC32) { 507 if (r_type == R_386_NONE || r_type == R_386_PC32) {
508 /* PC relative relocations don't need to be adjusted */ 508 /*
509 * NONE can be ignored and and PC relative
510 * relocations don't need to be adjusted.
511 */
509 } 512 }
510 else if (r_type == R_386_32) { 513 else if (r_type == R_386_32) {
511 /* Visit relocations that need to be adjusted */ 514 /* Visit relocations that need to be adjusted */
diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux.lds.S
index bef1ac891bce..cc353e1b3ffd 100644
--- a/arch/x86/boot/compressed/vmlinux_64.lds
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -1,6 +1,17 @@
1OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") 1OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
2
3#undef i386
4
5#include <asm/page_types.h>
6
7#ifdef CONFIG_X86_64
2OUTPUT_ARCH(i386:x86-64) 8OUTPUT_ARCH(i386:x86-64)
3ENTRY(startup_64) 9ENTRY(startup_64)
10#else
11OUTPUT_ARCH(i386)
12ENTRY(startup_32)
13#endif
14
4SECTIONS 15SECTIONS
5{ 16{
6 /* Be careful parts of head_64.S assume startup_32 is at 17 /* Be careful parts of head_64.S assume startup_32 is at
@@ -33,16 +44,22 @@ SECTIONS
33 *(.data.*) 44 *(.data.*)
34 _edata = . ; 45 _edata = . ;
35 } 46 }
47 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
36 .bss : { 48 .bss : {
37 _bss = . ; 49 _bss = . ;
38 *(.bss) 50 *(.bss)
39 *(.bss.*) 51 *(.bss.*)
40 *(COMMON) 52 *(COMMON)
41 . = ALIGN(8); 53 . = ALIGN(8); /* For convenience during zeroing */
42 _end_before_pgt = . ;
43 . = ALIGN(4096);
44 pgtable = . ;
45 . = . + 4096 * 6;
46 _ebss = .; 54 _ebss = .;
47 } 55 }
56#ifdef CONFIG_X86_64
57 . = ALIGN(PAGE_SIZE);
58 .pgtable : {
59 _pgtable = . ;
60 *(.pgtable)
61 _epgtable = . ;
62 }
63#endif
64 _end = .;
48} 65}
diff --git a/arch/x86/boot/compressed/vmlinux.scr b/arch/x86/boot/compressed/vmlinux.scr
deleted file mode 100644
index f02382ae5c48..000000000000
--- a/arch/x86/boot/compressed/vmlinux.scr
+++ /dev/null
@@ -1,10 +0,0 @@
1SECTIONS
2{
3 .rodata.compressed : {
4 input_len = .;
5 LONG(input_data_end - input_data) input_data = .;
6 *(.data)
7 output_len = . - 4;
8 input_data_end = .;
9 }
10}
diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds
deleted file mode 100644
index bb3c48379c40..000000000000
--- a/arch/x86/boot/compressed/vmlinux_32.lds
+++ /dev/null
@@ -1,43 +0,0 @@
1OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
2OUTPUT_ARCH(i386)
3ENTRY(startup_32)
4SECTIONS
5{
6 /* Be careful parts of head_32.S assume startup_32 is at
7 * address 0.
8 */
9 . = 0;
10 .text.head : {
11 _head = . ;
12 *(.text.head)
13 _ehead = . ;
14 }
15 .rodata.compressed : {
16 *(.rodata.compressed)
17 }
18 .text : {
19 _text = .; /* Text */
20 *(.text)
21 *(.text.*)
22 _etext = . ;
23 }
24 .rodata : {
25 _rodata = . ;
26 *(.rodata) /* read-only data */
27 *(.rodata.*)
28 _erodata = . ;
29 }
30 .data : {
31 _data = . ;
32 *(.data)
33 *(.data.*)
34 _edata = . ;
35 }
36 .bss : {
37 _bss = . ;
38 *(.bss)
39 *(.bss.*)
40 *(COMMON)
41 _end = . ;
42 }
43}
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index 1aae8f3e5ca1..c501a5b466f8 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -22,17 +23,17 @@
22 */ 23 */
23static int read_mbr(u8 devno, void *buf) 24static int read_mbr(u8 devno, void *buf)
24{ 25{
25 u16 ax, bx, cx, dx; 26 struct biosregs ireg, oreg;
26 27
27 ax = 0x0201; /* Legacy Read, one sector */ 28 initregs(&ireg);
28 cx = 0x0001; /* Sector 0-0-1 */ 29 ireg.ax = 0x0201; /* Legacy Read, one sector */
29 dx = devno; 30 ireg.cx = 0x0001; /* Sector 0-0-1 */
30 bx = (size_t)buf; 31 ireg.dl = devno;
31 asm volatile("pushfl; stc; int $0x13; setc %%al; popfl" 32 ireg.bx = (size_t)buf;
32 : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx)
33 : : "esi", "edi", "memory");
34 33
35 return -(u8)ax; /* 0 or -1 */ 34 intcall(0x13, &ireg, &oreg);
35
36 return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
36} 37}
37 38
38static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) 39static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
@@ -72,56 +73,46 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
72 73
73static int get_edd_info(u8 devno, struct edd_info *ei) 74static int get_edd_info(u8 devno, struct edd_info *ei)
74{ 75{
75 u16 ax, bx, cx, dx, di; 76 struct biosregs ireg, oreg;
76 77
77 memset(ei, 0, sizeof *ei); 78 memset(ei, 0, sizeof *ei);
78 79
79 /* Check Extensions Present */ 80 /* Check Extensions Present */
80 81
81 ax = 0x4100; 82 initregs(&ireg);
82 bx = EDDMAGIC1; 83 ireg.ah = 0x41;
83 dx = devno; 84 ireg.bx = EDDMAGIC1;
84 asm("pushfl; stc; int $0x13; setc %%al; popfl" 85 ireg.dl = devno;
85 : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx) 86 intcall(0x13, &ireg, &oreg);
86 : : "esi", "edi");
87 87
88 if ((u8)ax) 88 if (oreg.eflags & X86_EFLAGS_CF)
89 return -1; /* No extended information */ 89 return -1; /* No extended information */
90 90
91 if (bx != EDDMAGIC2) 91 if (oreg.bx != EDDMAGIC2)
92 return -1; 92 return -1;
93 93
94 ei->device = devno; 94 ei->device = devno;
95 ei->version = ax >> 8; /* EDD version number */ 95 ei->version = oreg.ah; /* EDD version number */
96 ei->interface_support = cx; /* EDD functionality subsets */ 96 ei->interface_support = oreg.cx; /* EDD functionality subsets */
97 97
98 /* Extended Get Device Parameters */ 98 /* Extended Get Device Parameters */
99 99
100 ei->params.length = sizeof(ei->params); 100 ei->params.length = sizeof(ei->params);
101 ax = 0x4800; 101 ireg.ah = 0x48;
102 dx = devno; 102 ireg.si = (size_t)&ei->params;
103 asm("pushfl; int $0x13; popfl" 103 intcall(0x13, &ireg, &oreg);
104 : "+a" (ax), "+d" (dx), "=m" (ei->params)
105 : "S" (&ei->params)
106 : "ebx", "ecx", "edi");
107 104
108 /* Get legacy CHS parameters */ 105 /* Get legacy CHS parameters */
109 106
110 /* Ralf Brown recommends setting ES:DI to 0:0 */ 107 /* Ralf Brown recommends setting ES:DI to 0:0 */
111 ax = 0x0800; 108 ireg.ah = 0x08;
112 dx = devno; 109 ireg.es = 0;
113 di = 0; 110 intcall(0x13, &ireg, &oreg);
114 asm("pushw %%es; " 111
115 "movw %%di,%%es; " 112 if (!(oreg.eflags & X86_EFLAGS_CF)) {
116 "pushfl; stc; int $0x13; setc %%al; popfl; " 113 ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2);
117 "popw %%es" 114 ei->legacy_max_head = oreg.dh;
118 : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di) 115 ei->legacy_sectors_per_track = oreg.cl & 0x3f;
119 : : "esi");
120
121 if ((u8)ax == 0) {
122 ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2);
123 ei->legacy_max_head = dx >> 8;
124 ei->legacy_sectors_per_track = cx & 0x3f;
125 } 116 }
126 117
127 return 0; 118 return 0;
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 5d84d1c74e4c..b31cc54b4641 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -22,7 +22,8 @@
22#include <asm/page_types.h> 22#include <asm/page_types.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24#include "boot.h" 24#include "boot.h"
25#include "offsets.h" 25#include "voffset.h"
26#include "zoffset.h"
26 27
27BOOTSEG = 0x07C0 /* original address of boot-sector */ 28BOOTSEG = 0x07C0 /* original address of boot-sector */
28SYSSEG = 0x1000 /* historical load address >> 4 */ 29SYSSEG = 0x1000 /* historical load address >> 4 */
@@ -115,7 +116,7 @@ _start:
115 # Part 2 of the header, from the old setup.S 116 # Part 2 of the header, from the old setup.S
116 117
117 .ascii "HdrS" # header signature 118 .ascii "HdrS" # header signature
118 .word 0x0209 # header version number (>= 0x0105) 119 .word 0x020a # header version number (>= 0x0105)
119 # or else old loadlin-1.5 will fail) 120 # or else old loadlin-1.5 will fail)
120 .globl realmode_swtch 121 .globl realmode_swtch
121realmode_swtch: .word 0, 0 # default_switch, SETUPSEG 122realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -168,7 +169,11 @@ heap_end_ptr: .word _end+STACK_SIZE-512
168 # end of setup code can be used by setup 169 # end of setup code can be used by setup
169 # for local heap purposes. 170 # for local heap purposes.
170 171
171pad1: .word 0 172ext_loader_ver:
173 .byte 0 # Extended boot loader version
174ext_loader_type:
175 .byte 0 # Extended boot loader type
176
172cmd_line_ptr: .long 0 # (Header version 0x0202 or later) 177cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
173 # If nonzero, a 32-bit pointer 178 # If nonzero, a 32-bit pointer
174 # to the kernel command line. 179 # to the kernel command line.
@@ -200,7 +205,7 @@ relocatable_kernel: .byte 1
200#else 205#else
201relocatable_kernel: .byte 0 206relocatable_kernel: .byte 0
202#endif 207#endif
203pad2: .byte 0 208min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
204pad3: .word 0 209pad3: .word 0
205 210
206cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, 211cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
@@ -212,16 +217,27 @@ hardware_subarch: .long 0 # subarchitecture, added with 2.07
212 217
213hardware_subarch_data: .quad 0 218hardware_subarch_data: .quad 0
214 219
215payload_offset: .long input_data 220payload_offset: .long ZO_input_data
216payload_length: .long input_data_end-input_data 221payload_length: .long ZO_z_input_len
217 222
218setup_data: .quad 0 # 64-bit physical pointer to 223setup_data: .quad 0 # 64-bit physical pointer to
219 # single linked list of 224 # single linked list of
220 # struct setup_data 225 # struct setup_data
221 226
227pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
228
229#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset)
230#define VO_INIT_SIZE (VO__end - VO__text)
231#if ZO_INIT_SIZE > VO_INIT_SIZE
232#define INIT_SIZE ZO_INIT_SIZE
233#else
234#define INIT_SIZE VO_INIT_SIZE
235#endif
236init_size: .long INIT_SIZE # kernel initialization size
237
222# End of setup header ##################################################### 238# End of setup header #####################################################
223 239
224 .section ".inittext", "ax" 240 .section ".entrytext", "ax"
225start_of_setup: 241start_of_setup:
226#ifdef SAFE_RESET_DISK_CONTROLLER 242#ifdef SAFE_RESET_DISK_CONTROLLER
227# Reset the disk controller. 243# Reset the disk controller.
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 58f0415d3ae0..140172b895bd 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -61,11 +62,10 @@ static void copy_boot_params(void)
61 */ 62 */
62static void keyboard_set_repeat(void) 63static void keyboard_set_repeat(void)
63{ 64{
64 u16 ax = 0x0305; 65 struct biosregs ireg;
65 u16 bx = 0; 66 initregs(&ireg);
66 asm volatile("int $0x16" 67 ireg.ax = 0x0305;
67 : "+a" (ax), "+b" (bx) 68 intcall(0x16, &ireg, NULL);
68 : : "ecx", "edx", "esi", "edi");
69} 69}
70 70
71/* 71/*
@@ -73,18 +73,22 @@ static void keyboard_set_repeat(void)
73 */ 73 */
74static void query_ist(void) 74static void query_ist(void)
75{ 75{
76 struct biosregs ireg, oreg;
77
76 /* Some older BIOSes apparently crash on this call, so filter 78 /* Some older BIOSes apparently crash on this call, so filter
77 it from machines too old to have SpeedStep at all. */ 79 it from machines too old to have SpeedStep at all. */
78 if (cpu.level < 6) 80 if (cpu.level < 6)
79 return; 81 return;
80 82
81 asm("int $0x15" 83 initregs(&ireg);
82 : "=a" (boot_params.ist_info.signature), 84 ireg.ax = 0xe980; /* IST Support */
83 "=b" (boot_params.ist_info.command), 85 ireg.edx = 0x47534943; /* Request value */
84 "=c" (boot_params.ist_info.event), 86 intcall(0x15, &ireg, &oreg);
85 "=d" (boot_params.ist_info.perf_level) 87
86 : "a" (0x0000e980), /* IST Support */ 88 boot_params.ist_info.signature = oreg.eax;
87 "d" (0x47534943)); /* Request value */ 89 boot_params.ist_info.command = oreg.ebx;
90 boot_params.ist_info.event = oreg.ecx;
91 boot_params.ist_info.perf_level = oreg.edx;
88} 92}
89 93
90/* 94/*
@@ -93,13 +97,12 @@ static void query_ist(void)
93static void set_bios_mode(void) 97static void set_bios_mode(void)
94{ 98{
95#ifdef CONFIG_X86_64 99#ifdef CONFIG_X86_64
96 u32 eax, ebx; 100 struct biosregs ireg;
97 101
98 eax = 0xec00; 102 initregs(&ireg);
99 ebx = 2; 103 ireg.ax = 0xec00;
100 asm volatile("int $0x15" 104 ireg.bx = 2;
101 : "+a" (eax), "+b" (ebx) 105 intcall(0x15, &ireg, NULL);
102 : : "ecx", "edx", "esi", "edi");
103#endif 106#endif
104} 107}
105 108
diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c
index 911eaae5d696..a95a531148ef 100644
--- a/arch/x86/boot/mca.c
+++ b/arch/x86/boot/mca.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -16,26 +17,22 @@
16 17
17int query_mca(void) 18int query_mca(void)
18{ 19{
19 u8 err; 20 struct biosregs ireg, oreg;
20 u16 es, bx, len; 21 u16 len;
21 22
22 asm("pushw %%es ; " 23 initregs(&ireg);
23 "int $0x15 ; " 24 ireg.ah = 0xc0;
24 "setc %0 ; " 25 intcall(0x15, &ireg, &oreg);
25 "movw %%es, %1 ; " 26
26 "popw %%es" 27 if (oreg.eflags & X86_EFLAGS_CF)
27 : "=acd" (err), "=acdSD" (es), "=b" (bx)
28 : "a" (0xc000));
29
30 if (err)
31 return -1; /* No MCA present */ 28 return -1; /* No MCA present */
32 29
33 set_fs(es); 30 set_fs(oreg.es);
34 len = rdfs16(bx); 31 len = rdfs16(oreg.bx);
35 32
36 if (len > sizeof(boot_params.sys_desc_table)) 33 if (len > sizeof(boot_params.sys_desc_table))
37 len = sizeof(boot_params.sys_desc_table); 34 len = sizeof(boot_params.sys_desc_table);
38 35
39 copy_from_fs(&boot_params.sys_desc_table, bx, len); 36 copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len);
40 return 0; 37 return 0;
41} 38}
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 5054c2ddd1a0..cae3feb1035e 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -17,43 +17,41 @@
17 17
18#define SMAP 0x534d4150 /* ASCII "SMAP" */ 18#define SMAP 0x534d4150 /* ASCII "SMAP" */
19 19
20struct e820_ext_entry {
21 struct e820entry std;
22 u32 ext_flags;
23} __attribute__((packed));
24
25static int detect_memory_e820(void) 20static int detect_memory_e820(void)
26{ 21{
27 int count = 0; 22 int count = 0;
28 u32 next = 0; 23 struct biosregs ireg, oreg;
29 u32 size, id, edi;
30 u8 err;
31 struct e820entry *desc = boot_params.e820_map; 24 struct e820entry *desc = boot_params.e820_map;
32 static struct e820_ext_entry buf; /* static so it is zeroed */ 25 static struct e820entry buf; /* static so it is zeroed */
26
27 initregs(&ireg);
28 ireg.ax = 0xe820;
29 ireg.cx = sizeof buf;
30 ireg.edx = SMAP;
31 ireg.di = (size_t)&buf;
33 32
34 /* 33 /*
35 * Set this here so that if the BIOS doesn't change this field 34 * Note: at least one BIOS is known which assumes that the
36 * but still doesn't change %ecx, we're still okay... 35 * buffer pointed to by one e820 call is the same one as
36 * the previous call, and only changes modified fields. Therefore,
37 * we use a temporary buffer and copy the results entry by entry.
38 *
39 * This routine deliberately does not try to account for
40 * ACPI 3+ extended attributes. This is because there are
41 * BIOSes in the field which report zero for the valid bit for
42 * all ranges, and we don't currently make any use of the
43 * other attribute bits. Revisit this if we see the extended
44 * attribute bits deployed in a meaningful way in the future.
37 */ 45 */
38 buf.ext_flags = 1;
39 46
40 do { 47 do {
41 size = sizeof buf; 48 intcall(0x15, &ireg, &oreg);
42 49 ireg.ebx = oreg.ebx; /* for next iteration... */
43 /* Important: %edx and %esi are clobbered by some BIOSes,
44 so they must be either used for the error output
45 or explicitly marked clobbered. Given that, assume there
46 is something out there clobbering %ebp and %edi, too. */
47 asm("pushl %%ebp; int $0x15; popl %%ebp; setc %0"
48 : "=d" (err), "+b" (next), "=a" (id), "+c" (size),
49 "=D" (edi), "+m" (buf)
50 : "D" (&buf), "d" (SMAP), "a" (0xe820)
51 : "esi");
52 50
53 /* BIOSes which terminate the chain with CF = 1 as opposed 51 /* BIOSes which terminate the chain with CF = 1 as opposed
54 to %ebx = 0 don't always report the SMAP signature on 52 to %ebx = 0 don't always report the SMAP signature on
55 the final, failing, probe. */ 53 the final, failing, probe. */
56 if (err) 54 if (oreg.eflags & X86_EFLAGS_CF)
57 break; 55 break;
58 56
59 /* Some BIOSes stop returning SMAP in the middle of 57 /* Some BIOSes stop returning SMAP in the middle of
@@ -61,66 +59,64 @@ static int detect_memory_e820(void)
61 screwed up the map at that point, we might have a 59 screwed up the map at that point, we might have a
62 partial map, the full map, or complete garbage, so 60 partial map, the full map, or complete garbage, so
63 just return failure. */ 61 just return failure. */
64 if (id != SMAP) { 62 if (oreg.eax != SMAP) {
65 count = 0; 63 count = 0;
66 break; 64 break;
67 } 65 }
68 66
69 /* ACPI 3.0 added the extended flags support. If bit 0 67 *desc++ = buf;
70 in the extended flags is zero, we're supposed to simply
71 ignore the entry -- a backwards incompatible change! */
72 if (size > 20 && !(buf.ext_flags & 1))
73 continue;
74
75 *desc++ = buf.std;
76 count++; 68 count++;
77 } while (next && count < ARRAY_SIZE(boot_params.e820_map)); 69 } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
78 70
79 return boot_params.e820_entries = count; 71 return boot_params.e820_entries = count;
80} 72}
81 73
82static int detect_memory_e801(void) 74static int detect_memory_e801(void)
83{ 75{
84 u16 ax, bx, cx, dx; 76 struct biosregs ireg, oreg;
85 u8 err;
86 77
87 bx = cx = dx = 0; 78 initregs(&ireg);
88 ax = 0xe801; 79 ireg.ax = 0xe801;
89 asm("stc; int $0x15; setc %0" 80 intcall(0x15, &ireg, &oreg);
90 : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx));
91 81
92 if (err) 82 if (oreg.eflags & X86_EFLAGS_CF)
93 return -1; 83 return -1;
94 84
95 /* Do we really need to do this? */ 85 /* Do we really need to do this? */
96 if (cx || dx) { 86 if (oreg.cx || oreg.dx) {
97 ax = cx; 87 oreg.ax = oreg.cx;
98 bx = dx; 88 oreg.bx = oreg.dx;
99 } 89 }
100 90
101 if (ax > 15*1024) 91 if (oreg.ax > 15*1024) {
102 return -1; /* Bogus! */ 92 return -1; /* Bogus! */
103 93 } else if (oreg.ax == 15*1024) {
104 /* This ignores memory above 16MB if we have a memory hole 94 boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax;
105 there. If someone actually finds a machine with a memory 95 } else {
106 hole at 16MB and no support for 0E820h they should probably 96 /*
107 generate a fake e820 map. */ 97 * This ignores memory above 16MB if we have a memory
108 boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax; 98 * hole there. If someone actually finds a machine
99 * with a memory hole at 16MB and no support for
100 * 0E820h they should probably generate a fake e820
101 * map.
102 */
103 boot_params.alt_mem_k = oreg.ax;
104 }
109 105
110 return 0; 106 return 0;
111} 107}
112 108
113static int detect_memory_88(void) 109static int detect_memory_88(void)
114{ 110{
115 u16 ax; 111 struct biosregs ireg, oreg;
116 u8 err;
117 112
118 ax = 0x8800; 113 initregs(&ireg);
119 asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax)); 114 ireg.ah = 0x88;
115 intcall(0x15, &ireg, &oreg);
120 116
121 boot_params.screen_info.ext_mem_k = ax; 117 boot_params.screen_info.ext_mem_k = oreg.ax;
122 118
123 return -err; 119 return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
124} 120}
125 121
126int detect_memory(void) 122int detect_memory(void)
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c
new file mode 100644
index 000000000000..958019b1cfa5
--- /dev/null
+++ b/arch/x86/boot/regs.c
@@ -0,0 +1,29 @@
1/* -----------------------------------------------------------------------
2 *
3 * Copyright 2009 Intel Corporation; author H. Peter Anvin
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * Simple helper function for initializing a register set.
13 *
14 * Note that this sets EFLAGS_CF in the input register set; this
15 * makes it easier to catch functions which do nothing but don't
16 * explicitly set CF.
17 */
18
19#include "boot.h"
20
21void initregs(struct biosregs *reg)
22{
23 memset(reg, 0, sizeof *reg);
24 reg->eflags |= X86_EFLAGS_CF;
25 reg->ds = ds();
26 reg->es = ds();
27 reg->fs = fs();
28 reg->gs = gs();
29}
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index bb8dc2de7969..0f6ec455a2b1 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -15,8 +15,11 @@ SECTIONS
15 15
16 . = 497; 16 . = 497;
17 .header : { *(.header) } 17 .header : { *(.header) }
18 .entrytext : { *(.entrytext) }
18 .inittext : { *(.inittext) } 19 .inittext : { *(.inittext) }
19 .initdata : { *(.initdata) } 20 .initdata : { *(.initdata) }
21 __end_init = .;
22
20 .text : { *(.text) } 23 .text : { *(.text) }
21 .text32 : { *(.text32) } 24 .text32 : { *(.text32) }
22 25
@@ -52,4 +55,7 @@ SECTIONS
52 55
53 . = ASSERT(_end <= 0x8000, "Setup too big!"); 56 . = ASSERT(_end <= 0x8000, "Setup too big!");
54 . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); 57 . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
58 /* Necessary for the very-old-loader check to work... */
59 . = ASSERT(__end_init <= 5*512, "init sections too big!");
60
55} 61}
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 7e8e8b25f5f6..01ec69c901c7 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -22,24 +23,23 @@
22 23
23void __attribute__((section(".inittext"))) putchar(int ch) 24void __attribute__((section(".inittext"))) putchar(int ch)
24{ 25{
25 unsigned char c = ch; 26 struct biosregs ireg;
26 27
27 if (c == '\n') 28 if (ch == '\n')
28 putchar('\r'); /* \n -> \r\n */ 29 putchar('\r'); /* \n -> \r\n */
29 30
30 /* int $0x10 is known to have bugs involving touching registers 31 initregs(&ireg);
31 it shouldn't. Be extra conservative... */ 32 ireg.bx = 0x0007;
32 asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal" 33 ireg.cx = 0x0001;
33 : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch)); 34 ireg.ah = 0x0e;
35 ireg.al = ch;
36 intcall(0x10, &ireg, NULL);
34} 37}
35 38
36void __attribute__((section(".inittext"))) puts(const char *str) 39void __attribute__((section(".inittext"))) puts(const char *str)
37{ 40{
38 int n = 0; 41 while (*str)
39 while (*str) {
40 putchar(*str++); 42 putchar(*str++);
41 n++;
42 }
43} 43}
44 44
45/* 45/*
@@ -49,14 +49,13 @@ void __attribute__((section(".inittext"))) puts(const char *str)
49 49
50static u8 gettime(void) 50static u8 gettime(void)
51{ 51{
52 u16 ax = 0x0200; 52 struct biosregs ireg, oreg;
53 u16 cx, dx;
54 53
55 asm volatile("int $0x1a" 54 initregs(&ireg);
56 : "+a" (ax), "=c" (cx), "=d" (dx) 55 ireg.ah = 0x02;
57 : : "ebx", "esi", "edi"); 56 intcall(0x1a, &ireg, &oreg);
58 57
59 return dx >> 8; 58 return oreg.dh;
60} 59}
61 60
62/* 61/*
@@ -64,19 +63,24 @@ static u8 gettime(void)
64 */ 63 */
65int getchar(void) 64int getchar(void)
66{ 65{
67 u16 ax = 0; 66 struct biosregs ireg, oreg;
68 asm volatile("int $0x16" : "+a" (ax)); 67
68 initregs(&ireg);
69 /* ireg.ah = 0x00; */
70 intcall(0x16, &ireg, &oreg);
69 71
70 return ax & 0xff; 72 return oreg.al;
71} 73}
72 74
73static int kbd_pending(void) 75static int kbd_pending(void)
74{ 76{
75 u8 pending; 77 struct biosregs ireg, oreg;
76 asm volatile("int $0x16; setnz %0" 78
77 : "=qm" (pending) 79 initregs(&ireg);
78 : "a" (0x0100)); 80 ireg.ah = 0x01;
79 return pending; 81 intcall(0x16, &ireg, &oreg);
82
83 return !(oreg.eflags & X86_EFLAGS_ZF);
80} 84}
81 85
82void kbd_flush(void) 86void kbd_flush(void)
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
index 3fa979c9c363..d660be492363 100644
--- a/arch/x86/boot/video-bios.c
+++ b/arch/x86/boot/video-bios.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -29,21 +30,21 @@ static int bios_set_mode(struct mode_info *mi)
29 30
30static int set_bios_mode(u8 mode) 31static int set_bios_mode(u8 mode)
31{ 32{
32 u16 ax; 33 struct biosregs ireg, oreg;
33 u8 new_mode; 34 u8 new_mode;
34 35
35 ax = mode; /* AH=0x00 Set Video Mode */ 36 initregs(&ireg);
36 asm volatile(INT10 37 ireg.al = mode; /* AH=0x00 Set Video Mode */
37 : "+a" (ax) 38 intcall(0x10, &ireg, NULL);
38 : : "ebx", "ecx", "edx", "esi", "edi");
39 39
40 ax = 0x0f00; /* Get Current Video Mode */ 40
41 asm volatile(INT10 41 ireg.ah = 0x0f; /* Get Current Video Mode */
42 : "+a" (ax) 42 intcall(0x10, &ireg, &oreg);
43 : : "ebx", "ecx", "edx", "esi", "edi");
44 43
45 do_restore = 1; /* Assume video contents were lost */ 44 do_restore = 1; /* Assume video contents were lost */
46 new_mode = ax & 0x7f; /* Not all BIOSes are clean with the top bit */ 45
46 /* Not all BIOSes are clean with the top bit */
47 new_mode = ireg.al & 0x7f;
47 48
48 if (new_mode == mode) 49 if (new_mode == mode)
49 return 0; /* Mode change OK */ 50 return 0; /* Mode change OK */
@@ -53,10 +54,8 @@ static int set_bios_mode(u8 mode)
53 /* Mode setting failed, but we didn't end up where we 54 /* Mode setting failed, but we didn't end up where we
54 started. That's bad. Try to revert to the original 55 started. That's bad. Try to revert to the original
55 video mode. */ 56 video mode. */
56 ax = boot_params.screen_info.orig_video_mode; 57 ireg.ax = boot_params.screen_info.orig_video_mode;
57 asm volatile(INT10 58 intcall(0x10, &ireg, NULL);
58 : "+a" (ax)
59 : : "ebx", "ecx", "edx", "esi", "edi");
60 } 59 }
61#endif 60#endif
62 return -1; 61 return -1;
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 4a58c8ce3f69..c700147d6ffb 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -31,7 +32,7 @@ static inline void vesa_store_mode_params_graphics(void) {}
31static int vesa_probe(void) 32static int vesa_probe(void)
32{ 33{
33#if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID) 34#if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID)
34 u16 ax, cx, di; 35 struct biosregs ireg, oreg;
35 u16 mode; 36 u16 mode;
36 addr_t mode_ptr; 37 addr_t mode_ptr;
37 struct mode_info *mi; 38 struct mode_info *mi;
@@ -39,13 +40,12 @@ static int vesa_probe(void)
39 40
40 video_vesa.modes = GET_HEAP(struct mode_info, 0); 41 video_vesa.modes = GET_HEAP(struct mode_info, 0);
41 42
42 ax = 0x4f00; 43 initregs(&ireg);
43 di = (size_t)&vginfo; 44 ireg.ax = 0x4f00;
44 asm(INT10 45 ireg.di = (size_t)&vginfo;
45 : "+a" (ax), "+D" (di), "=m" (vginfo) 46 intcall(0x10, &ireg, &oreg);
46 : : "ebx", "ecx", "edx", "esi");
47 47
48 if (ax != 0x004f || 48 if (ireg.ax != 0x004f ||
49 vginfo.signature != VESA_MAGIC || 49 vginfo.signature != VESA_MAGIC ||
50 vginfo.version < 0x0102) 50 vginfo.version < 0x0102)
51 return 0; /* Not present */ 51 return 0; /* Not present */
@@ -65,14 +65,12 @@ static int vesa_probe(void)
65 65
66 memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ 66 memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
67 67
68 ax = 0x4f01; 68 ireg.ax = 0x4f01;
69 cx = mode; 69 ireg.cx = mode;
70 di = (size_t)&vminfo; 70 ireg.di = (size_t)&vminfo;
71 asm(INT10 71 intcall(0x10, &ireg, &oreg);
72 : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo)
73 : : "ebx", "edx", "esi");
74 72
75 if (ax != 0x004f) 73 if (ireg.ax != 0x004f)
76 continue; 74 continue;
77 75
78 if ((vminfo.mode_attr & 0x15) == 0x05) { 76 if ((vminfo.mode_attr & 0x15) == 0x05) {
@@ -111,20 +109,19 @@ static int vesa_probe(void)
111 109
112static int vesa_set_mode(struct mode_info *mode) 110static int vesa_set_mode(struct mode_info *mode)
113{ 111{
114 u16 ax, bx, cx, di; 112 struct biosregs ireg, oreg;
115 int is_graphic; 113 int is_graphic;
116 u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA; 114 u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
117 115
118 memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ 116 memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
119 117
120 ax = 0x4f01; 118 initregs(&ireg);
121 cx = vesa_mode; 119 ireg.ax = 0x4f01;
122 di = (size_t)&vminfo; 120 ireg.cx = vesa_mode;
123 asm(INT10 121 ireg.di = (size_t)&vminfo;
124 : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) 122 intcall(0x10, &ireg, &oreg);
125 : : "ebx", "edx", "esi");
126 123
127 if (ax != 0x004f) 124 if (oreg.ax != 0x004f)
128 return -1; 125 return -1;
129 126
130 if ((vminfo.mode_attr & 0x15) == 0x05) { 127 if ((vminfo.mode_attr & 0x15) == 0x05) {
@@ -141,14 +138,12 @@ static int vesa_set_mode(struct mode_info *mode)
141 } 138 }
142 139
143 140
144 ax = 0x4f02; 141 initregs(&ireg);
145 bx = vesa_mode; 142 ireg.ax = 0x4f02;
146 di = 0; 143 ireg.bx = vesa_mode;
147 asm volatile(INT10 144 intcall(0x10, &ireg, &oreg);
148 : "+a" (ax), "+b" (bx), "+D" (di)
149 : : "ecx", "edx", "esi");
150 145
151 if (ax != 0x004f) 146 if (oreg.ax != 0x004f)
152 return -1; 147 return -1;
153 148
154 graphic_mode = is_graphic; 149 graphic_mode = is_graphic;
@@ -171,50 +166,45 @@ static int vesa_set_mode(struct mode_info *mode)
171/* Switch DAC to 8-bit mode */ 166/* Switch DAC to 8-bit mode */
172static void vesa_dac_set_8bits(void) 167static void vesa_dac_set_8bits(void)
173{ 168{
169 struct biosregs ireg, oreg;
174 u8 dac_size = 6; 170 u8 dac_size = 6;
175 171
176 /* If possible, switch the DAC to 8-bit mode */ 172 /* If possible, switch the DAC to 8-bit mode */
177 if (vginfo.capabilities & 1) { 173 if (vginfo.capabilities & 1) {
178 u16 ax, bx; 174 initregs(&ireg);
179 175 ireg.ax = 0x4f08;
180 ax = 0x4f08; 176 ireg.bh = 0x08;
181 bx = 0x0800; 177 intcall(0x10, &ireg, &oreg);
182 asm volatile(INT10 178 if (oreg.ax == 0x004f)
183 : "+a" (ax), "+b" (bx) 179 dac_size = oreg.bh;
184 : : "ecx", "edx", "esi", "edi");
185
186 if (ax == 0x004f)
187 dac_size = bx >> 8;
188 } 180 }
189 181
190 /* Set the color sizes to the DAC size, and offsets to 0 */ 182 /* Set the color sizes to the DAC size, and offsets to 0 */
191 boot_params.screen_info.red_size = dac_size; 183 boot_params.screen_info.red_size = dac_size;
192 boot_params.screen_info.green_size = dac_size; 184 boot_params.screen_info.green_size = dac_size;
193 boot_params.screen_info.blue_size = dac_size; 185 boot_params.screen_info.blue_size = dac_size;
194 boot_params.screen_info.rsvd_size = dac_size; 186 boot_params.screen_info.rsvd_size = dac_size;
195 187
196 boot_params.screen_info.red_pos = 0; 188 boot_params.screen_info.red_pos = 0;
197 boot_params.screen_info.green_pos = 0; 189 boot_params.screen_info.green_pos = 0;
198 boot_params.screen_info.blue_pos = 0; 190 boot_params.screen_info.blue_pos = 0;
199 boot_params.screen_info.rsvd_pos = 0; 191 boot_params.screen_info.rsvd_pos = 0;
200} 192}
201 193
202/* Save the VESA protected mode info */ 194/* Save the VESA protected mode info */
203static void vesa_store_pm_info(void) 195static void vesa_store_pm_info(void)
204{ 196{
205 u16 ax, bx, di, es; 197 struct biosregs ireg, oreg;
206 198
207 ax = 0x4f0a; 199 initregs(&ireg);
208 bx = di = 0; 200 ireg.ax = 0x4f0a;
209 asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es" 201 intcall(0x10, &ireg, &oreg);
210 : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di)
211 : : "ecx", "esi");
212 202
213 if (ax != 0x004f) 203 if (oreg.ax != 0x004f)
214 return; 204 return;
215 205
216 boot_params.screen_info.vesapm_seg = es; 206 boot_params.screen_info.vesapm_seg = oreg.es;
217 boot_params.screen_info.vesapm_off = di; 207 boot_params.screen_info.vesapm_off = oreg.di;
218} 208}
219 209
220/* 210/*
@@ -252,7 +242,7 @@ static void vesa_store_mode_params_graphics(void)
252void vesa_store_edid(void) 242void vesa_store_edid(void)
253{ 243{
254#ifdef CONFIG_FIRMWARE_EDID 244#ifdef CONFIG_FIRMWARE_EDID
255 u16 ax, bx, cx, dx, di; 245 struct biosregs ireg, oreg;
256 246
257 /* Apparently used as a nonsense token... */ 247 /* Apparently used as a nonsense token... */
258 memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info); 248 memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
@@ -260,33 +250,26 @@ void vesa_store_edid(void)
260 if (vginfo.version < 0x0200) 250 if (vginfo.version < 0x0200)
261 return; /* EDID requires VBE 2.0+ */ 251 return; /* EDID requires VBE 2.0+ */
262 252
263 ax = 0x4f15; /* VBE DDC */ 253 initregs(&ireg);
264 bx = 0x0000; /* Report DDC capabilities */ 254 ireg.ax = 0x4f15; /* VBE DDC */
265 cx = 0; /* Controller 0 */ 255 /* ireg.bx = 0x0000; */ /* Report DDC capabilities */
266 di = 0; /* ES:DI must be 0 by spec */ 256 /* ireg.cx = 0; */ /* Controller 0 */
267 257 ireg.es = 0; /* ES:DI must be 0 by spec */
268 /* Note: The VBE DDC spec is different from the main VESA spec; 258 intcall(0x10, &ireg, &oreg);
269 we genuinely have to assume all registers are destroyed here. */
270
271 asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es"
272 : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di)
273 : : "esi", "edx");
274 259
275 if (ax != 0x004f) 260 if (oreg.ax != 0x004f)
276 return; /* No EDID */ 261 return; /* No EDID */
277 262
278 /* BH = time in seconds to transfer EDD information */ 263 /* BH = time in seconds to transfer EDD information */
279 /* BL = DDC level supported */ 264 /* BL = DDC level supported */
280 265
281 ax = 0x4f15; /* VBE DDC */ 266 ireg.ax = 0x4f15; /* VBE DDC */
282 bx = 0x0001; /* Read EDID */ 267 ireg.bx = 0x0001; /* Read EDID */
283 cx = 0; /* Controller 0 */ 268 /* ireg.cx = 0; */ /* Controller 0 */
284 dx = 0; /* EDID block number */ 269 /* ireg.dx = 0; */ /* EDID block number */
285 di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ 270 ireg.es = ds();
286 asm(INT10 271 ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */
287 : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info), 272 intcall(0x10, &ireg, &oreg);
288 "+c" (cx), "+D" (di)
289 : : "esi");
290#endif /* CONFIG_FIRMWARE_EDID */ 273#endif /* CONFIG_FIRMWARE_EDID */
291} 274}
292 275
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 9e0587a37768..8f8d827e254d 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -39,30 +40,30 @@ static __videocard video_vga;
39/* Set basic 80x25 mode */ 40/* Set basic 80x25 mode */
40static u8 vga_set_basic_mode(void) 41static u8 vga_set_basic_mode(void)
41{ 42{
43 struct biosregs ireg, oreg;
42 u16 ax; 44 u16 ax;
43 u8 rows; 45 u8 rows;
44 u8 mode; 46 u8 mode;
45 47
48 initregs(&ireg);
49
46#ifdef CONFIG_VIDEO_400_HACK 50#ifdef CONFIG_VIDEO_400_HACK
47 if (adapter >= ADAPTER_VGA) { 51 if (adapter >= ADAPTER_VGA) {
48 asm volatile(INT10 52 ireg.ax = 0x1202;
49 : : "a" (0x1202), "b" (0x0030) 53 ireg.bx = 0x0030;
50 : "ecx", "edx", "esi", "edi"); 54 intcall(0x10, &ireg, NULL);
51 } 55 }
52#endif 56#endif
53 57
54 ax = 0x0f00; 58 ax = 0x0f00;
55 asm volatile(INT10 59 intcall(0x10, &ireg, &oreg);
56 : "+a" (ax) 60 mode = oreg.al;
57 : : "ebx", "ecx", "edx", "esi", "edi");
58
59 mode = (u8)ax;
60 61
61 set_fs(0); 62 set_fs(0);
62 rows = rdfs8(0x484); /* rows minus one */ 63 rows = rdfs8(0x484); /* rows minus one */
63 64
64#ifndef CONFIG_VIDEO_400_HACK 65#ifndef CONFIG_VIDEO_400_HACK
65 if ((ax == 0x5003 || ax == 0x5007) && 66 if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) &&
66 (rows == 0 || rows == 24)) 67 (rows == 0 || rows == 24))
67 return mode; 68 return mode;
68#endif 69#endif
@@ -71,10 +72,8 @@ static u8 vga_set_basic_mode(void)
71 mode = 3; 72 mode = 3;
72 73
73 /* Set the mode */ 74 /* Set the mode */
74 ax = mode; 75 ireg.ax = mode; /* AH=0: set mode */
75 asm volatile(INT10 76 intcall(0x10, &ireg, NULL);
76 : "+a" (ax)
77 : : "ebx", "ecx", "edx", "esi", "edi");
78 do_restore = 1; 77 do_restore = 1;
79 return mode; 78 return mode;
80} 79}
@@ -82,43 +81,69 @@ static u8 vga_set_basic_mode(void)
82static void vga_set_8font(void) 81static void vga_set_8font(void)
83{ 82{
84 /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */ 83 /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
84 struct biosregs ireg;
85
86 initregs(&ireg);
85 87
86 /* Set 8x8 font */ 88 /* Set 8x8 font */
87 asm volatile(INT10 : : "a" (0x1112), "b" (0)); 89 ireg.ax = 0x1112;
90 /* ireg.bl = 0; */
91 intcall(0x10, &ireg, NULL);
88 92
89 /* Use alternate print screen */ 93 /* Use alternate print screen */
90 asm volatile(INT10 : : "a" (0x1200), "b" (0x20)); 94 ireg.ax = 0x1200;
95 ireg.bl = 0x20;
96 intcall(0x10, &ireg, NULL);
91 97
92 /* Turn off cursor emulation */ 98 /* Turn off cursor emulation */
93 asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); 99 ireg.ax = 0x1201;
100 ireg.bl = 0x34;
101 intcall(0x10, &ireg, NULL);
94 102
95 /* Cursor is scan lines 6-7 */ 103 /* Cursor is scan lines 6-7 */
96 asm volatile(INT10 : : "a" (0x0100), "c" (0x0607)); 104 ireg.ax = 0x0100;
105 ireg.cx = 0x0607;
106 intcall(0x10, &ireg, NULL);
97} 107}
98 108
99static void vga_set_14font(void) 109static void vga_set_14font(void)
100{ 110{
101 /* Set 9x14 font - 80x28 on VGA */ 111 /* Set 9x14 font - 80x28 on VGA */
112 struct biosregs ireg;
113
114 initregs(&ireg);
102 115
103 /* Set 9x14 font */ 116 /* Set 9x14 font */
104 asm volatile(INT10 : : "a" (0x1111), "b" (0)); 117 ireg.ax = 0x1111;
118 /* ireg.bl = 0; */
119 intcall(0x10, &ireg, NULL);
105 120
106 /* Turn off cursor emulation */ 121 /* Turn off cursor emulation */
107 asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); 122 ireg.ax = 0x1201;
123 ireg.bl = 0x34;
124 intcall(0x10, &ireg, NULL);
108 125
109 /* Cursor is scan lines 11-12 */ 126 /* Cursor is scan lines 11-12 */
110 asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c)); 127 ireg.ax = 0x0100;
128 ireg.cx = 0x0b0c;
129 intcall(0x10, &ireg, NULL);
111} 130}
112 131
113static void vga_set_80x43(void) 132static void vga_set_80x43(void)
114{ 133{
115 /* Set 80x43 mode on VGA (not EGA) */ 134 /* Set 80x43 mode on VGA (not EGA) */
135 struct biosregs ireg;
136
137 initregs(&ireg);
116 138
117 /* Set 350 scans */ 139 /* Set 350 scans */
118 asm volatile(INT10 : : "a" (0x1201), "b" (0x30)); 140 ireg.ax = 0x1201;
141 ireg.bl = 0x30;
142 intcall(0x10, &ireg, NULL);
119 143
120 /* Reset video mode */ 144 /* Reset video mode */
121 asm volatile(INT10 : : "a" (0x0003)); 145 ireg.ax = 0x0003;
146 intcall(0x10, &ireg, NULL);
122 147
123 vga_set_8font(); 148 vga_set_8font();
124} 149}
@@ -225,8 +250,6 @@ static int vga_set_mode(struct mode_info *mode)
225 */ 250 */
226static int vga_probe(void) 251static int vga_probe(void)
227{ 252{
228 u16 ega_bx;
229
230 static const char *card_name[] = { 253 static const char *card_name[] = {
231 "CGA/MDA/HGC", "EGA", "VGA" 254 "CGA/MDA/HGC", "EGA", "VGA"
232 }; 255 };
@@ -240,26 +263,26 @@ static int vga_probe(void)
240 sizeof(ega_modes)/sizeof(struct mode_info), 263 sizeof(ega_modes)/sizeof(struct mode_info),
241 sizeof(vga_modes)/sizeof(struct mode_info), 264 sizeof(vga_modes)/sizeof(struct mode_info),
242 }; 265 };
243 u8 vga_flag;
244 266
245 asm(INT10 267 struct biosregs ireg, oreg;
246 : "=b" (ega_bx) 268
247 : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */ 269 initregs(&ireg);
248 : "ecx", "edx", "esi", "edi"); 270
271 ireg.ax = 0x1200;
272 ireg.bl = 0x10; /* Check EGA/VGA */
273 intcall(0x10, &ireg, &oreg);
249 274
250#ifndef _WAKEUP 275#ifndef _WAKEUP
251 boot_params.screen_info.orig_video_ega_bx = ega_bx; 276 boot_params.screen_info.orig_video_ega_bx = oreg.bx;
252#endif 277#endif
253 278
254 /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ 279 /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
255 if ((u8)ega_bx != 0x10) { 280 if (oreg.bl != 0x10) {
256 /* EGA/VGA */ 281 /* EGA/VGA */
257 asm(INT10 282 ireg.ax = 0x1a00;
258 : "=a" (vga_flag) 283 intcall(0x10, &ireg, &oreg);
259 : "a" (0x1a00)
260 : "ebx", "ecx", "edx", "esi", "edi");
261 284
262 if (vga_flag == 0x1a) { 285 if (oreg.al == 0x1a) {
263 adapter = ADAPTER_VGA; 286 adapter = ADAPTER_VGA;
264#ifndef _WAKEUP 287#ifndef _WAKEUP
265 boot_params.screen_info.orig_video_isVGA = 1; 288 boot_params.screen_info.orig_video_isVGA = 1;
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 3bef2c1febe9..bad728b76fc2 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -18,33 +19,29 @@
18 19
19static void store_cursor_position(void) 20static void store_cursor_position(void)
20{ 21{
21 u16 curpos; 22 struct biosregs ireg, oreg;
22 u16 ax, bx;
23 23
24 ax = 0x0300; 24 initregs(&ireg);
25 bx = 0; 25 ireg.ah = 0x03;
26 asm(INT10 26 intcall(0x10, &ireg, &oreg);
27 : "=d" (curpos), "+a" (ax), "+b" (bx)
28 : : "ecx", "esi", "edi");
29 27
30 boot_params.screen_info.orig_x = curpos; 28 boot_params.screen_info.orig_x = oreg.dl;
31 boot_params.screen_info.orig_y = curpos >> 8; 29 boot_params.screen_info.orig_y = oreg.dh;
32} 30}
33 31
34static void store_video_mode(void) 32static void store_video_mode(void)
35{ 33{
36 u16 ax, page; 34 struct biosregs ireg, oreg;
37 35
38 /* N.B.: the saving of the video page here is a bit silly, 36 /* N.B.: the saving of the video page here is a bit silly,
39 since we pretty much assume page 0 everywhere. */ 37 since we pretty much assume page 0 everywhere. */
40 ax = 0x0f00; 38 initregs(&ireg);
41 asm(INT10 39 ireg.ah = 0x0f;
42 : "+a" (ax), "=b" (page) 40 intcall(0x10, &ireg, &oreg);
43 : : "ecx", "edx", "esi", "edi");
44 41
45 /* Not all BIOSes are clean with respect to the top bit */ 42 /* Not all BIOSes are clean with respect to the top bit */
46 boot_params.screen_info.orig_video_mode = ax & 0x7f; 43 boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
47 boot_params.screen_info.orig_video_page = page >> 8; 44 boot_params.screen_info.orig_video_page = oreg.bh;
48} 45}
49 46
50/* 47/*
@@ -257,7 +254,7 @@ static void restore_screen(void)
257 int y; 254 int y;
258 addr_t dst = 0; 255 addr_t dst = 0;
259 u16 *src = saved.data; 256 u16 *src = saved.data;
260 u16 ax, bx, dx; 257 struct biosregs ireg;
261 258
262 if (graphic_mode) 259 if (graphic_mode)
263 return; /* Can't restore onto a graphic mode */ 260 return; /* Can't restore onto a graphic mode */
@@ -296,12 +293,11 @@ static void restore_screen(void)
296 } 293 }
297 294
298 /* Restore cursor position */ 295 /* Restore cursor position */
299 ax = 0x0200; /* Set cursor position */ 296 initregs(&ireg);
300 bx = 0; /* Page number (<< 8) */ 297 ireg.ah = 0x02; /* Set cursor position */
301 dx = (saved.cury << 8)+saved.curx; 298 ireg.dh = saved.cury;
302 asm volatile(INT10 299 ireg.dl = saved.curx;
303 : "+a" (ax), "+b" (bx), "+d" (dx) 300 intcall(0x10, &ireg, NULL);
304 : : "ecx", "esi", "edi");
305} 301}
306#else 302#else
307#define save_screen() ((void)0) 303#define save_screen() ((void)0)
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index ee63f5d14461..5bb174a997fc 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -112,20 +112,6 @@ extern int force_x, force_y; /* Don't query the BIOS for cols/rows */
112extern int do_restore; /* Restore screen contents */ 112extern int do_restore; /* Restore screen contents */
113extern int graphic_mode; /* Graphics mode with linear frame buffer */ 113extern int graphic_mode; /* Graphics mode with linear frame buffer */
114 114
115/*
116 * int $0x10 is notorious for touching registers it shouldn't.
117 * gcc doesn't like %ebp being clobbered, so define it as a push/pop
118 * sequence here.
119 *
120 * A number of systems, including the original PC can clobber %bp in
121 * certain circumstances, like when scrolling. There exists at least
122 * one Trident video card which could clobber DS under a set of
123 * circumstances that we are unlikely to encounter (scrolling when
124 * using an extended graphics mode of more than 800x600 pixels), but
125 * it's cheap insurance to deal with that here.
126 */
127#define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp"
128
129/* Accessing VGA indexed registers */ 115/* Accessing VGA indexed registers */
130static inline u8 in_idx(u16 port, u8 index) 116static inline u8 in_idx(u16 port, u8 index)
131{ 117{
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 235b81d0f6f2..edb992ebef92 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,12 +1,13 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.29-rc4 3# Linux kernel version: 2.6.30-rc2
4# Tue Feb 24 15:50:58 2009 4# Mon May 11 16:21:55 2009
5# 5#
6# CONFIG_64BIT is not set 6# CONFIG_64BIT is not set
7CONFIG_X86_32=y 7CONFIG_X86_32=y
8# CONFIG_X86_64 is not set 8# CONFIG_X86_64 is not set
9CONFIG_X86=y 9CONFIG_X86=y
10CONFIG_OUTPUT_FORMAT="elf32-i386"
10CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" 11CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
11CONFIG_GENERIC_TIME=y 12CONFIG_GENERIC_TIME=y
12CONFIG_GENERIC_CMOS_UPDATE=y 13CONFIG_GENERIC_CMOS_UPDATE=y
@@ -33,6 +34,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y
33CONFIG_ARCH_HAS_DEFAULT_IDLE=y 34CONFIG_ARCH_HAS_DEFAULT_IDLE=y
34CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y 35CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
35CONFIG_HAVE_SETUP_PER_CPU_AREA=y 36CONFIG_HAVE_SETUP_PER_CPU_AREA=y
37CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
36# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set 38# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
37CONFIG_ARCH_HIBERNATION_POSSIBLE=y 39CONFIG_ARCH_HIBERNATION_POSSIBLE=y
38CONFIG_ARCH_SUSPEND_POSSIBLE=y 40CONFIG_ARCH_SUSPEND_POSSIBLE=y
@@ -40,15 +42,16 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y
40CONFIG_ARCH_POPULATES_NODE_MAP=y 42CONFIG_ARCH_POPULATES_NODE_MAP=y
41# CONFIG_AUDIT_ARCH is not set 43# CONFIG_AUDIT_ARCH is not set
42CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y 44CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
45CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
43CONFIG_GENERIC_HARDIRQS=y 46CONFIG_GENERIC_HARDIRQS=y
47CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
44CONFIG_GENERIC_IRQ_PROBE=y 48CONFIG_GENERIC_IRQ_PROBE=y
45CONFIG_GENERIC_PENDING_IRQ=y 49CONFIG_GENERIC_PENDING_IRQ=y
46CONFIG_X86_SMP=y
47CONFIG_USE_GENERIC_SMP_HELPERS=y 50CONFIG_USE_GENERIC_SMP_HELPERS=y
48CONFIG_X86_32_SMP=y 51CONFIG_X86_32_SMP=y
49CONFIG_X86_HT=y 52CONFIG_X86_HT=y
50CONFIG_X86_BIOS_REBOOT=y
51CONFIG_X86_TRAMPOLINE=y 53CONFIG_X86_TRAMPOLINE=y
54CONFIG_X86_32_LAZY_GS=y
52CONFIG_KTIME_SCALAR=y 55CONFIG_KTIME_SCALAR=y
53CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" 56CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
54 57
@@ -60,10 +63,17 @@ CONFIG_LOCK_KERNEL=y
60CONFIG_INIT_ENV_ARG_LIMIT=32 63CONFIG_INIT_ENV_ARG_LIMIT=32
61CONFIG_LOCALVERSION="" 64CONFIG_LOCALVERSION=""
62# CONFIG_LOCALVERSION_AUTO is not set 65# CONFIG_LOCALVERSION_AUTO is not set
66CONFIG_HAVE_KERNEL_GZIP=y
67CONFIG_HAVE_KERNEL_BZIP2=y
68CONFIG_HAVE_KERNEL_LZMA=y
69CONFIG_KERNEL_GZIP=y
70# CONFIG_KERNEL_BZIP2 is not set
71# CONFIG_KERNEL_LZMA is not set
63CONFIG_SWAP=y 72CONFIG_SWAP=y
64CONFIG_SYSVIPC=y 73CONFIG_SYSVIPC=y
65CONFIG_SYSVIPC_SYSCTL=y 74CONFIG_SYSVIPC_SYSCTL=y
66CONFIG_POSIX_MQUEUE=y 75CONFIG_POSIX_MQUEUE=y
76CONFIG_POSIX_MQUEUE_SYSCTL=y
67CONFIG_BSD_PROCESS_ACCT=y 77CONFIG_BSD_PROCESS_ACCT=y
68# CONFIG_BSD_PROCESS_ACCT_V3 is not set 78# CONFIG_BSD_PROCESS_ACCT_V3 is not set
69CONFIG_TASKSTATS=y 79CONFIG_TASKSTATS=y
@@ -113,23 +123,26 @@ CONFIG_PID_NS=y
113CONFIG_NET_NS=y 123CONFIG_NET_NS=y
114CONFIG_BLK_DEV_INITRD=y 124CONFIG_BLK_DEV_INITRD=y
115CONFIG_INITRAMFS_SOURCE="" 125CONFIG_INITRAMFS_SOURCE=""
126CONFIG_RD_GZIP=y
127CONFIG_RD_BZIP2=y
128CONFIG_RD_LZMA=y
116CONFIG_CC_OPTIMIZE_FOR_SIZE=y 129CONFIG_CC_OPTIMIZE_FOR_SIZE=y
117CONFIG_SYSCTL=y 130CONFIG_SYSCTL=y
131CONFIG_ANON_INODES=y
118# CONFIG_EMBEDDED is not set 132# CONFIG_EMBEDDED is not set
119CONFIG_UID16=y 133CONFIG_UID16=y
120CONFIG_SYSCTL_SYSCALL=y 134CONFIG_SYSCTL_SYSCALL=y
121CONFIG_KALLSYMS=y 135CONFIG_KALLSYMS=y
122CONFIG_KALLSYMS_ALL=y 136CONFIG_KALLSYMS_ALL=y
123CONFIG_KALLSYMS_EXTRA_PASS=y 137CONFIG_KALLSYMS_EXTRA_PASS=y
138# CONFIG_STRIP_ASM_SYMS is not set
124CONFIG_HOTPLUG=y 139CONFIG_HOTPLUG=y
125CONFIG_PRINTK=y 140CONFIG_PRINTK=y
126CONFIG_BUG=y 141CONFIG_BUG=y
127CONFIG_ELF_CORE=y 142CONFIG_ELF_CORE=y
128CONFIG_PCSPKR_PLATFORM=y 143CONFIG_PCSPKR_PLATFORM=y
129# CONFIG_COMPAT_BRK is not set
130CONFIG_BASE_FULL=y 144CONFIG_BASE_FULL=y
131CONFIG_FUTEX=y 145CONFIG_FUTEX=y
132CONFIG_ANON_INODES=y
133CONFIG_EPOLL=y 146CONFIG_EPOLL=y
134CONFIG_SIGNALFD=y 147CONFIG_SIGNALFD=y
135CONFIG_TIMERFD=y 148CONFIG_TIMERFD=y
@@ -139,6 +152,7 @@ CONFIG_AIO=y
139CONFIG_VM_EVENT_COUNTERS=y 152CONFIG_VM_EVENT_COUNTERS=y
140CONFIG_PCI_QUIRKS=y 153CONFIG_PCI_QUIRKS=y
141CONFIG_SLUB_DEBUG=y 154CONFIG_SLUB_DEBUG=y
155# CONFIG_COMPAT_BRK is not set
142# CONFIG_SLAB is not set 156# CONFIG_SLAB is not set
143CONFIG_SLUB=y 157CONFIG_SLUB=y
144# CONFIG_SLOB is not set 158# CONFIG_SLOB is not set
@@ -154,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y
154CONFIG_HAVE_KPROBES=y 168CONFIG_HAVE_KPROBES=y
155CONFIG_HAVE_KRETPROBES=y 169CONFIG_HAVE_KRETPROBES=y
156CONFIG_HAVE_ARCH_TRACEHOOK=y 170CONFIG_HAVE_ARCH_TRACEHOOK=y
171CONFIG_HAVE_DMA_API_DEBUG=y
172# CONFIG_SLOW_WORK is not set
157CONFIG_HAVE_GENERIC_DMA_COHERENT=y 173CONFIG_HAVE_GENERIC_DMA_COHERENT=y
158CONFIG_SLABINFO=y 174CONFIG_SLABINFO=y
159CONFIG_RT_MUTEXES=y 175CONFIG_RT_MUTEXES=y
@@ -167,7 +183,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
167CONFIG_STOP_MACHINE=y 183CONFIG_STOP_MACHINE=y
168CONFIG_BLOCK=y 184CONFIG_BLOCK=y
169# CONFIG_LBD is not set 185# CONFIG_LBD is not set
170CONFIG_BLK_DEV_IO_TRACE=y
171CONFIG_BLK_DEV_BSG=y 186CONFIG_BLK_DEV_BSG=y
172# CONFIG_BLK_DEV_INTEGRITY is not set 187# CONFIG_BLK_DEV_INTEGRITY is not set
173 188
@@ -194,12 +209,12 @@ CONFIG_HIGH_RES_TIMERS=y
194CONFIG_GENERIC_CLOCKEVENTS_BUILD=y 209CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
195CONFIG_SMP=y 210CONFIG_SMP=y
196CONFIG_SPARSE_IRQ=y 211CONFIG_SPARSE_IRQ=y
197CONFIG_X86_FIND_SMP_CONFIG=y
198CONFIG_X86_MPPARSE=y 212CONFIG_X86_MPPARSE=y
213# CONFIG_X86_BIGSMP is not set
214CONFIG_X86_EXTENDED_PLATFORM=y
199# CONFIG_X86_ELAN is not set 215# CONFIG_X86_ELAN is not set
200# CONFIG_X86_GENERICARCH is not set
201# CONFIG_X86_VSMP is not set
202# CONFIG_X86_RDC321X is not set 216# CONFIG_X86_RDC321X is not set
217# CONFIG_X86_32_NON_STANDARD is not set
203CONFIG_SCHED_OMIT_FRAME_POINTER=y 218CONFIG_SCHED_OMIT_FRAME_POINTER=y
204# CONFIG_PARAVIRT_GUEST is not set 219# CONFIG_PARAVIRT_GUEST is not set
205# CONFIG_MEMTEST is not set 220# CONFIG_MEMTEST is not set
@@ -230,8 +245,10 @@ CONFIG_M686=y
230# CONFIG_GENERIC_CPU is not set 245# CONFIG_GENERIC_CPU is not set
231CONFIG_X86_GENERIC=y 246CONFIG_X86_GENERIC=y
232CONFIG_X86_CPU=y 247CONFIG_X86_CPU=y
248CONFIG_X86_L1_CACHE_BYTES=64
249CONFIG_X86_INTERNODE_CACHE_BYTES=64
233CONFIG_X86_CMPXCHG=y 250CONFIG_X86_CMPXCHG=y
234CONFIG_X86_L1_CACHE_SHIFT=7 251CONFIG_X86_L1_CACHE_SHIFT=5
235CONFIG_X86_XADD=y 252CONFIG_X86_XADD=y
236# CONFIG_X86_PPRO_FENCE is not set 253# CONFIG_X86_PPRO_FENCE is not set
237CONFIG_X86_WP_WORKS_OK=y 254CONFIG_X86_WP_WORKS_OK=y
@@ -247,7 +264,7 @@ CONFIG_X86_DEBUGCTLMSR=y
247CONFIG_CPU_SUP_INTEL=y 264CONFIG_CPU_SUP_INTEL=y
248CONFIG_CPU_SUP_CYRIX_32=y 265CONFIG_CPU_SUP_CYRIX_32=y
249CONFIG_CPU_SUP_AMD=y 266CONFIG_CPU_SUP_AMD=y
250CONFIG_CPU_SUP_CENTAUR_32=y 267CONFIG_CPU_SUP_CENTAUR=y
251CONFIG_CPU_SUP_TRANSMETA_32=y 268CONFIG_CPU_SUP_TRANSMETA_32=y
252CONFIG_CPU_SUP_UMC_32=y 269CONFIG_CPU_SUP_UMC_32=y
253CONFIG_X86_DS=y 270CONFIG_X86_DS=y
@@ -279,6 +296,7 @@ CONFIG_MICROCODE_AMD=y
279CONFIG_MICROCODE_OLD_INTERFACE=y 296CONFIG_MICROCODE_OLD_INTERFACE=y
280CONFIG_X86_MSR=y 297CONFIG_X86_MSR=y
281CONFIG_X86_CPUID=y 298CONFIG_X86_CPUID=y
299# CONFIG_X86_CPU_DEBUG is not set
282# CONFIG_NOHIGHMEM is not set 300# CONFIG_NOHIGHMEM is not set
283CONFIG_HIGHMEM4G=y 301CONFIG_HIGHMEM4G=y
284# CONFIG_HIGHMEM64G is not set 302# CONFIG_HIGHMEM64G is not set
@@ -302,6 +320,8 @@ CONFIG_ZONE_DMA_FLAG=1
302CONFIG_BOUNCE=y 320CONFIG_BOUNCE=y
303CONFIG_VIRT_TO_BUS=y 321CONFIG_VIRT_TO_BUS=y
304CONFIG_UNEVICTABLE_LRU=y 322CONFIG_UNEVICTABLE_LRU=y
323CONFIG_HAVE_MLOCK=y
324CONFIG_HAVE_MLOCKED_PAGE_BIT=y
305CONFIG_HIGHPTE=y 325CONFIG_HIGHPTE=y
306CONFIG_X86_CHECK_BIOS_CORRUPTION=y 326CONFIG_X86_CHECK_BIOS_CORRUPTION=y
307CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y 327CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
@@ -312,6 +332,7 @@ CONFIG_MTRR=y
312CONFIG_X86_PAT=y 332CONFIG_X86_PAT=y
313CONFIG_EFI=y 333CONFIG_EFI=y
314CONFIG_SECCOMP=y 334CONFIG_SECCOMP=y
335# CONFIG_CC_STACKPROTECTOR is not set
315# CONFIG_HZ_100 is not set 336# CONFIG_HZ_100 is not set
316# CONFIG_HZ_250 is not set 337# CONFIG_HZ_250 is not set
317# CONFIG_HZ_300 is not set 338# CONFIG_HZ_300 is not set
@@ -322,8 +343,9 @@ CONFIG_KEXEC=y
322CONFIG_CRASH_DUMP=y 343CONFIG_CRASH_DUMP=y
323# CONFIG_KEXEC_JUMP is not set 344# CONFIG_KEXEC_JUMP is not set
324CONFIG_PHYSICAL_START=0x1000000 345CONFIG_PHYSICAL_START=0x1000000
325# CONFIG_RELOCATABLE is not set 346CONFIG_RELOCATABLE=y
326CONFIG_PHYSICAL_ALIGN=0x200000 347CONFIG_X86_NEED_RELOCS=y
348CONFIG_PHYSICAL_ALIGN=0x1000000
327CONFIG_HOTPLUG_CPU=y 349CONFIG_HOTPLUG_CPU=y
328# CONFIG_COMPAT_VDSO is not set 350# CONFIG_COMPAT_VDSO is not set
329# CONFIG_CMDLINE_BOOL is not set 351# CONFIG_CMDLINE_BOOL is not set
@@ -363,7 +385,6 @@ CONFIG_ACPI_THERMAL=y
363CONFIG_ACPI_BLACKLIST_YEAR=0 385CONFIG_ACPI_BLACKLIST_YEAR=0
364# CONFIG_ACPI_DEBUG is not set 386# CONFIG_ACPI_DEBUG is not set
365# CONFIG_ACPI_PCI_SLOT is not set 387# CONFIG_ACPI_PCI_SLOT is not set
366CONFIG_ACPI_SYSTEM=y
367CONFIG_X86_PM_TIMER=y 388CONFIG_X86_PM_TIMER=y
368CONFIG_ACPI_CONTAINER=y 389CONFIG_ACPI_CONTAINER=y
369# CONFIG_ACPI_SBS is not set 390# CONFIG_ACPI_SBS is not set
@@ -425,6 +446,7 @@ CONFIG_PCI_BIOS=y
425CONFIG_PCI_DIRECT=y 446CONFIG_PCI_DIRECT=y
426CONFIG_PCI_MMCONFIG=y 447CONFIG_PCI_MMCONFIG=y
427CONFIG_PCI_DOMAINS=y 448CONFIG_PCI_DOMAINS=y
449# CONFIG_DMAR is not set
428CONFIG_PCIEPORTBUS=y 450CONFIG_PCIEPORTBUS=y
429# CONFIG_HOTPLUG_PCI_PCIE is not set 451# CONFIG_HOTPLUG_PCI_PCIE is not set
430CONFIG_PCIEAER=y 452CONFIG_PCIEAER=y
@@ -435,6 +457,7 @@ CONFIG_PCI_MSI=y
435# CONFIG_PCI_DEBUG is not set 457# CONFIG_PCI_DEBUG is not set
436# CONFIG_PCI_STUB is not set 458# CONFIG_PCI_STUB is not set
437CONFIG_HT_IRQ=y 459CONFIG_HT_IRQ=y
460# CONFIG_PCI_IOV is not set
438CONFIG_ISA_DMA_API=y 461CONFIG_ISA_DMA_API=y
439# CONFIG_ISA is not set 462# CONFIG_ISA is not set
440# CONFIG_MCA is not set 463# CONFIG_MCA is not set
@@ -481,7 +504,6 @@ CONFIG_NET=y
481# 504#
482# Networking options 505# Networking options
483# 506#
484CONFIG_COMPAT_NET_DEV_OPS=y
485CONFIG_PACKET=y 507CONFIG_PACKET=y
486CONFIG_PACKET_MMAP=y 508CONFIG_PACKET_MMAP=y
487CONFIG_UNIX=y 509CONFIG_UNIX=y
@@ -639,6 +661,7 @@ CONFIG_LLC=y
639# CONFIG_LAPB is not set 661# CONFIG_LAPB is not set
640# CONFIG_ECONET is not set 662# CONFIG_ECONET is not set
641# CONFIG_WAN_ROUTER is not set 663# CONFIG_WAN_ROUTER is not set
664# CONFIG_PHONET is not set
642CONFIG_NET_SCHED=y 665CONFIG_NET_SCHED=y
643 666
644# 667#
@@ -696,6 +719,7 @@ CONFIG_NET_SCH_FIFO=y
696# 719#
697# CONFIG_NET_PKTGEN is not set 720# CONFIG_NET_PKTGEN is not set
698# CONFIG_NET_TCPPROBE is not set 721# CONFIG_NET_TCPPROBE is not set
722# CONFIG_NET_DROP_MONITOR is not set
699CONFIG_HAMRADIO=y 723CONFIG_HAMRADIO=y
700 724
701# 725#
@@ -706,12 +730,10 @@ CONFIG_HAMRADIO=y
706# CONFIG_IRDA is not set 730# CONFIG_IRDA is not set
707# CONFIG_BT is not set 731# CONFIG_BT is not set
708# CONFIG_AF_RXRPC is not set 732# CONFIG_AF_RXRPC is not set
709# CONFIG_PHONET is not set
710CONFIG_FIB_RULES=y 733CONFIG_FIB_RULES=y
711CONFIG_WIRELESS=y 734CONFIG_WIRELESS=y
712CONFIG_CFG80211=y 735CONFIG_CFG80211=y
713# CONFIG_CFG80211_REG_DEBUG is not set 736# CONFIG_CFG80211_REG_DEBUG is not set
714CONFIG_NL80211=y
715CONFIG_WIRELESS_OLD_REGULATORY=y 737CONFIG_WIRELESS_OLD_REGULATORY=y
716CONFIG_WIRELESS_EXT=y 738CONFIG_WIRELESS_EXT=y
717CONFIG_WIRELESS_EXT_SYSFS=y 739CONFIG_WIRELESS_EXT_SYSFS=y
@@ -789,6 +811,7 @@ CONFIG_MISC_DEVICES=y
789# CONFIG_ICS932S401 is not set 811# CONFIG_ICS932S401 is not set
790# CONFIG_ENCLOSURE_SERVICES is not set 812# CONFIG_ENCLOSURE_SERVICES is not set
791# CONFIG_HP_ILO is not set 813# CONFIG_HP_ILO is not set
814# CONFIG_ISL29003 is not set
792# CONFIG_C2PORT is not set 815# CONFIG_C2PORT is not set
793 816
794# 817#
@@ -842,6 +865,7 @@ CONFIG_SCSI_SPI_ATTRS=y
842# CONFIG_SCSI_LOWLEVEL is not set 865# CONFIG_SCSI_LOWLEVEL is not set
843# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set 866# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
844# CONFIG_SCSI_DH is not set 867# CONFIG_SCSI_DH is not set
868# CONFIG_SCSI_OSD_INITIATOR is not set
845CONFIG_ATA=y 869CONFIG_ATA=y
846# CONFIG_ATA_NONSTANDARD is not set 870# CONFIG_ATA_NONSTANDARD is not set
847CONFIG_ATA_ACPI=y 871CONFIG_ATA_ACPI=y
@@ -940,6 +964,7 @@ CONFIG_DM_ZERO=y
940CONFIG_MACINTOSH_DRIVERS=y 964CONFIG_MACINTOSH_DRIVERS=y
941CONFIG_MAC_EMUMOUSEBTN=y 965CONFIG_MAC_EMUMOUSEBTN=y
942CONFIG_NETDEVICES=y 966CONFIG_NETDEVICES=y
967CONFIG_COMPAT_NET_DEV_OPS=y
943# CONFIG_IFB is not set 968# CONFIG_IFB is not set
944# CONFIG_DUMMY is not set 969# CONFIG_DUMMY is not set
945# CONFIG_BONDING is not set 970# CONFIG_BONDING is not set
@@ -977,6 +1002,8 @@ CONFIG_MII=y
977CONFIG_NET_VENDOR_3COM=y 1002CONFIG_NET_VENDOR_3COM=y
978# CONFIG_VORTEX is not set 1003# CONFIG_VORTEX is not set
979# CONFIG_TYPHOON is not set 1004# CONFIG_TYPHOON is not set
1005# CONFIG_ETHOC is not set
1006# CONFIG_DNET is not set
980CONFIG_NET_TULIP=y 1007CONFIG_NET_TULIP=y
981# CONFIG_DE2104X is not set 1008# CONFIG_DE2104X is not set
982# CONFIG_TULIP is not set 1009# CONFIG_TULIP is not set
@@ -1026,6 +1053,7 @@ CONFIG_E1000=y
1026CONFIG_E1000E=y 1053CONFIG_E1000E=y
1027# CONFIG_IP1000 is not set 1054# CONFIG_IP1000 is not set
1028# CONFIG_IGB is not set 1055# CONFIG_IGB is not set
1056# CONFIG_IGBVF is not set
1029# CONFIG_NS83820 is not set 1057# CONFIG_NS83820 is not set
1030# CONFIG_HAMACHI is not set 1058# CONFIG_HAMACHI is not set
1031# CONFIG_YELLOWFIN is not set 1059# CONFIG_YELLOWFIN is not set
@@ -1040,6 +1068,7 @@ CONFIG_BNX2=y
1040# CONFIG_QLA3XXX is not set 1068# CONFIG_QLA3XXX is not set
1041# CONFIG_ATL1 is not set 1069# CONFIG_ATL1 is not set
1042# CONFIG_ATL1E is not set 1070# CONFIG_ATL1E is not set
1071# CONFIG_ATL1C is not set
1043# CONFIG_JME is not set 1072# CONFIG_JME is not set
1044CONFIG_NETDEV_10000=y 1073CONFIG_NETDEV_10000=y
1045# CONFIG_CHELSIO_T1 is not set 1074# CONFIG_CHELSIO_T1 is not set
@@ -1049,6 +1078,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
1049# CONFIG_IXGBE is not set 1078# CONFIG_IXGBE is not set
1050# CONFIG_IXGB is not set 1079# CONFIG_IXGB is not set
1051# CONFIG_S2IO is not set 1080# CONFIG_S2IO is not set
1081# CONFIG_VXGE is not set
1052# CONFIG_MYRI10GE is not set 1082# CONFIG_MYRI10GE is not set
1053# CONFIG_NETXEN_NIC is not set 1083# CONFIG_NETXEN_NIC is not set
1054# CONFIG_NIU is not set 1084# CONFIG_NIU is not set
@@ -1058,6 +1088,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
1058# CONFIG_BNX2X is not set 1088# CONFIG_BNX2X is not set
1059# CONFIG_QLGE is not set 1089# CONFIG_QLGE is not set
1060# CONFIG_SFC is not set 1090# CONFIG_SFC is not set
1091# CONFIG_BE2NET is not set
1061CONFIG_TR=y 1092CONFIG_TR=y
1062# CONFIG_IBMOL is not set 1093# CONFIG_IBMOL is not set
1063# CONFIG_IBMLS is not set 1094# CONFIG_IBMLS is not set
@@ -1073,8 +1104,8 @@ CONFIG_WLAN_80211=y
1073# CONFIG_LIBERTAS is not set 1104# CONFIG_LIBERTAS is not set
1074# CONFIG_LIBERTAS_THINFIRM is not set 1105# CONFIG_LIBERTAS_THINFIRM is not set
1075# CONFIG_AIRO is not set 1106# CONFIG_AIRO is not set
1076# CONFIG_HERMES is not set
1077# CONFIG_ATMEL is not set 1107# CONFIG_ATMEL is not set
1108# CONFIG_AT76C50X_USB is not set
1078# CONFIG_AIRO_CS is not set 1109# CONFIG_AIRO_CS is not set
1079# CONFIG_PCMCIA_WL3501 is not set 1110# CONFIG_PCMCIA_WL3501 is not set
1080# CONFIG_PRISM54 is not set 1111# CONFIG_PRISM54 is not set
@@ -1084,21 +1115,21 @@ CONFIG_WLAN_80211=y
1084# CONFIG_RTL8187 is not set 1115# CONFIG_RTL8187 is not set
1085# CONFIG_ADM8211 is not set 1116# CONFIG_ADM8211 is not set
1086# CONFIG_MAC80211_HWSIM is not set 1117# CONFIG_MAC80211_HWSIM is not set
1118# CONFIG_MWL8K is not set
1087# CONFIG_P54_COMMON is not set 1119# CONFIG_P54_COMMON is not set
1088CONFIG_ATH5K=y 1120CONFIG_ATH5K=y
1089# CONFIG_ATH5K_DEBUG is not set 1121# CONFIG_ATH5K_DEBUG is not set
1090# CONFIG_ATH9K is not set 1122# CONFIG_ATH9K is not set
1123# CONFIG_AR9170_USB is not set
1091# CONFIG_IPW2100 is not set 1124# CONFIG_IPW2100 is not set
1092# CONFIG_IPW2200 is not set 1125# CONFIG_IPW2200 is not set
1093# CONFIG_IWLCORE is not set 1126# CONFIG_IWLWIFI is not set
1094# CONFIG_IWLWIFI_LEDS is not set
1095# CONFIG_IWLAGN is not set
1096# CONFIG_IWL3945 is not set
1097# CONFIG_HOSTAP is not set 1127# CONFIG_HOSTAP is not set
1098# CONFIG_B43 is not set 1128# CONFIG_B43 is not set
1099# CONFIG_B43LEGACY is not set 1129# CONFIG_B43LEGACY is not set
1100# CONFIG_ZD1211RW is not set 1130# CONFIG_ZD1211RW is not set
1101# CONFIG_RT2X00 is not set 1131# CONFIG_RT2X00 is not set
1132# CONFIG_HERMES is not set
1102 1133
1103# 1134#
1104# Enable WiMAX (Networking options) to see the WiMAX drivers 1135# Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -1209,6 +1240,8 @@ CONFIG_INPUT_TABLET=y
1209# CONFIG_TABLET_USB_KBTAB is not set 1240# CONFIG_TABLET_USB_KBTAB is not set
1210# CONFIG_TABLET_USB_WACOM is not set 1241# CONFIG_TABLET_USB_WACOM is not set
1211CONFIG_INPUT_TOUCHSCREEN=y 1242CONFIG_INPUT_TOUCHSCREEN=y
1243# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
1244# CONFIG_TOUCHSCREEN_AD7879 is not set
1212# CONFIG_TOUCHSCREEN_FUJITSU is not set 1245# CONFIG_TOUCHSCREEN_FUJITSU is not set
1213# CONFIG_TOUCHSCREEN_GUNZE is not set 1246# CONFIG_TOUCHSCREEN_GUNZE is not set
1214# CONFIG_TOUCHSCREEN_ELO is not set 1247# CONFIG_TOUCHSCREEN_ELO is not set
@@ -1303,6 +1336,7 @@ CONFIG_UNIX98_PTYS=y
1303# CONFIG_LEGACY_PTYS is not set 1336# CONFIG_LEGACY_PTYS is not set
1304# CONFIG_IPMI_HANDLER is not set 1337# CONFIG_IPMI_HANDLER is not set
1305CONFIG_HW_RANDOM=y 1338CONFIG_HW_RANDOM=y
1339# CONFIG_HW_RANDOM_TIMERIOMEM is not set
1306CONFIG_HW_RANDOM_INTEL=y 1340CONFIG_HW_RANDOM_INTEL=y
1307CONFIG_HW_RANDOM_AMD=y 1341CONFIG_HW_RANDOM_AMD=y
1308CONFIG_HW_RANDOM_GEODE=y 1342CONFIG_HW_RANDOM_GEODE=y
@@ -1390,7 +1424,6 @@ CONFIG_I2C_I801=y
1390# CONFIG_SENSORS_PCF8574 is not set 1424# CONFIG_SENSORS_PCF8574 is not set
1391# CONFIG_PCF8575 is not set 1425# CONFIG_PCF8575 is not set
1392# CONFIG_SENSORS_PCA9539 is not set 1426# CONFIG_SENSORS_PCA9539 is not set
1393# CONFIG_SENSORS_PCF8591 is not set
1394# CONFIG_SENSORS_MAX6875 is not set 1427# CONFIG_SENSORS_MAX6875 is not set
1395# CONFIG_SENSORS_TSL2550 is not set 1428# CONFIG_SENSORS_TSL2550 is not set
1396# CONFIG_I2C_DEBUG_CORE is not set 1429# CONFIG_I2C_DEBUG_CORE is not set
@@ -1424,6 +1457,7 @@ CONFIG_HWMON=y
1424# CONFIG_SENSORS_ADT7475 is not set 1457# CONFIG_SENSORS_ADT7475 is not set
1425# CONFIG_SENSORS_K8TEMP is not set 1458# CONFIG_SENSORS_K8TEMP is not set
1426# CONFIG_SENSORS_ASB100 is not set 1459# CONFIG_SENSORS_ASB100 is not set
1460# CONFIG_SENSORS_ATK0110 is not set
1427# CONFIG_SENSORS_ATXP1 is not set 1461# CONFIG_SENSORS_ATXP1 is not set
1428# CONFIG_SENSORS_DS1621 is not set 1462# CONFIG_SENSORS_DS1621 is not set
1429# CONFIG_SENSORS_I5K_AMB is not set 1463# CONFIG_SENSORS_I5K_AMB is not set
@@ -1433,6 +1467,7 @@ CONFIG_HWMON=y
1433# CONFIG_SENSORS_FSCHER is not set 1467# CONFIG_SENSORS_FSCHER is not set
1434# CONFIG_SENSORS_FSCPOS is not set 1468# CONFIG_SENSORS_FSCPOS is not set
1435# CONFIG_SENSORS_FSCHMD is not set 1469# CONFIG_SENSORS_FSCHMD is not set
1470# CONFIG_SENSORS_G760A is not set
1436# CONFIG_SENSORS_GL518SM is not set 1471# CONFIG_SENSORS_GL518SM is not set
1437# CONFIG_SENSORS_GL520SM is not set 1472# CONFIG_SENSORS_GL520SM is not set
1438# CONFIG_SENSORS_CORETEMP is not set 1473# CONFIG_SENSORS_CORETEMP is not set
@@ -1448,11 +1483,14 @@ CONFIG_HWMON=y
1448# CONFIG_SENSORS_LM90 is not set 1483# CONFIG_SENSORS_LM90 is not set
1449# CONFIG_SENSORS_LM92 is not set 1484# CONFIG_SENSORS_LM92 is not set
1450# CONFIG_SENSORS_LM93 is not set 1485# CONFIG_SENSORS_LM93 is not set
1486# CONFIG_SENSORS_LTC4215 is not set
1451# CONFIG_SENSORS_LTC4245 is not set 1487# CONFIG_SENSORS_LTC4245 is not set
1488# CONFIG_SENSORS_LM95241 is not set
1452# CONFIG_SENSORS_MAX1619 is not set 1489# CONFIG_SENSORS_MAX1619 is not set
1453# CONFIG_SENSORS_MAX6650 is not set 1490# CONFIG_SENSORS_MAX6650 is not set
1454# CONFIG_SENSORS_PC87360 is not set 1491# CONFIG_SENSORS_PC87360 is not set
1455# CONFIG_SENSORS_PC87427 is not set 1492# CONFIG_SENSORS_PC87427 is not set
1493# CONFIG_SENSORS_PCF8591 is not set
1456# CONFIG_SENSORS_SIS5595 is not set 1494# CONFIG_SENSORS_SIS5595 is not set
1457# CONFIG_SENSORS_DME1737 is not set 1495# CONFIG_SENSORS_DME1737 is not set
1458# CONFIG_SENSORS_SMSC47M1 is not set 1496# CONFIG_SENSORS_SMSC47M1 is not set
@@ -1643,7 +1681,6 @@ CONFIG_FB_EFI=y
1643# CONFIG_FB_3DFX is not set 1681# CONFIG_FB_3DFX is not set
1644# CONFIG_FB_VOODOO1 is not set 1682# CONFIG_FB_VOODOO1 is not set
1645# CONFIG_FB_VT8623 is not set 1683# CONFIG_FB_VT8623 is not set
1646# CONFIG_FB_CYBLA is not set
1647# CONFIG_FB_TRIDENT is not set 1684# CONFIG_FB_TRIDENT is not set
1648# CONFIG_FB_ARK is not set 1685# CONFIG_FB_ARK is not set
1649# CONFIG_FB_PM3 is not set 1686# CONFIG_FB_PM3 is not set
@@ -1652,6 +1689,7 @@ CONFIG_FB_EFI=y
1652# CONFIG_FB_VIRTUAL is not set 1689# CONFIG_FB_VIRTUAL is not set
1653# CONFIG_FB_METRONOME is not set 1690# CONFIG_FB_METRONOME is not set
1654# CONFIG_FB_MB862XX is not set 1691# CONFIG_FB_MB862XX is not set
1692# CONFIG_FB_BROADSHEET is not set
1655CONFIG_BACKLIGHT_LCD_SUPPORT=y 1693CONFIG_BACKLIGHT_LCD_SUPPORT=y
1656# CONFIG_LCD_CLASS_DEVICE is not set 1694# CONFIG_LCD_CLASS_DEVICE is not set
1657CONFIG_BACKLIGHT_CLASS_DEVICE=y 1695CONFIG_BACKLIGHT_CLASS_DEVICE=y
@@ -1738,6 +1776,8 @@ CONFIG_SND_PCI=y
1738# CONFIG_SND_INDIGO is not set 1776# CONFIG_SND_INDIGO is not set
1739# CONFIG_SND_INDIGOIO is not set 1777# CONFIG_SND_INDIGOIO is not set
1740# CONFIG_SND_INDIGODJ is not set 1778# CONFIG_SND_INDIGODJ is not set
1779# CONFIG_SND_INDIGOIOX is not set
1780# CONFIG_SND_INDIGODJX is not set
1741# CONFIG_SND_EMU10K1 is not set 1781# CONFIG_SND_EMU10K1 is not set
1742# CONFIG_SND_EMU10K1X is not set 1782# CONFIG_SND_EMU10K1X is not set
1743# CONFIG_SND_ENS1370 is not set 1783# CONFIG_SND_ENS1370 is not set
@@ -1811,15 +1851,17 @@ CONFIG_USB_HIDDEV=y
1811# 1851#
1812# Special HID drivers 1852# Special HID drivers
1813# 1853#
1814CONFIG_HID_COMPAT=y
1815CONFIG_HID_A4TECH=y 1854CONFIG_HID_A4TECH=y
1816CONFIG_HID_APPLE=y 1855CONFIG_HID_APPLE=y
1817CONFIG_HID_BELKIN=y 1856CONFIG_HID_BELKIN=y
1818CONFIG_HID_CHERRY=y 1857CONFIG_HID_CHERRY=y
1819CONFIG_HID_CHICONY=y 1858CONFIG_HID_CHICONY=y
1820CONFIG_HID_CYPRESS=y 1859CONFIG_HID_CYPRESS=y
1860# CONFIG_DRAGONRISE_FF is not set
1821CONFIG_HID_EZKEY=y 1861CONFIG_HID_EZKEY=y
1862CONFIG_HID_KYE=y
1822CONFIG_HID_GYRATION=y 1863CONFIG_HID_GYRATION=y
1864CONFIG_HID_KENSINGTON=y
1823CONFIG_HID_LOGITECH=y 1865CONFIG_HID_LOGITECH=y
1824CONFIG_LOGITECH_FF=y 1866CONFIG_LOGITECH_FF=y
1825# CONFIG_LOGIRUMBLEPAD2_FF is not set 1867# CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1885,11 +1927,11 @@ CONFIG_USB_PRINTER=y
1885# CONFIG_USB_TMC is not set 1927# CONFIG_USB_TMC is not set
1886 1928
1887# 1929#
1888# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; 1930# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
1889# 1931#
1890 1932
1891# 1933#
1892# see USB_STORAGE Help for more information 1934# also be needed; see USB_STORAGE Help for more info
1893# 1935#
1894CONFIG_USB_STORAGE=y 1936CONFIG_USB_STORAGE=y
1895# CONFIG_USB_STORAGE_DEBUG is not set 1937# CONFIG_USB_STORAGE_DEBUG is not set
@@ -1931,7 +1973,6 @@ CONFIG_USB_LIBUSUAL=y
1931# CONFIG_USB_LED is not set 1973# CONFIG_USB_LED is not set
1932# CONFIG_USB_CYPRESS_CY7C63 is not set 1974# CONFIG_USB_CYPRESS_CY7C63 is not set
1933# CONFIG_USB_CYTHERM is not set 1975# CONFIG_USB_CYTHERM is not set
1934# CONFIG_USB_PHIDGET is not set
1935# CONFIG_USB_IDMOUSE is not set 1976# CONFIG_USB_IDMOUSE is not set
1936# CONFIG_USB_FTDI_ELAN is not set 1977# CONFIG_USB_FTDI_ELAN is not set
1937# CONFIG_USB_APPLEDISPLAY is not set 1978# CONFIG_USB_APPLEDISPLAY is not set
@@ -1947,6 +1988,7 @@ CONFIG_USB_LIBUSUAL=y
1947# 1988#
1948# OTG and related infrastructure 1989# OTG and related infrastructure
1949# 1990#
1991# CONFIG_NOP_USB_XCEIV is not set
1950# CONFIG_UWB is not set 1992# CONFIG_UWB is not set
1951# CONFIG_MMC is not set 1993# CONFIG_MMC is not set
1952# CONFIG_MEMSTICK is not set 1994# CONFIG_MEMSTICK is not set
@@ -1958,8 +2000,10 @@ CONFIG_LEDS_CLASS=y
1958# 2000#
1959# CONFIG_LEDS_ALIX2 is not set 2001# CONFIG_LEDS_ALIX2 is not set
1960# CONFIG_LEDS_PCA9532 is not set 2002# CONFIG_LEDS_PCA9532 is not set
2003# CONFIG_LEDS_LP5521 is not set
1961# CONFIG_LEDS_CLEVO_MAIL is not set 2004# CONFIG_LEDS_CLEVO_MAIL is not set
1962# CONFIG_LEDS_PCA955X is not set 2005# CONFIG_LEDS_PCA955X is not set
2006# CONFIG_LEDS_BD2802 is not set
1963 2007
1964# 2008#
1965# LED Triggers 2009# LED Triggers
@@ -1969,6 +2013,10 @@ CONFIG_LEDS_TRIGGERS=y
1969# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set 2013# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
1970# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set 2014# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
1971# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set 2015# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
2016
2017#
2018# iptables trigger is under Netfilter config (LED target)
2019#
1972# CONFIG_ACCESSIBILITY is not set 2020# CONFIG_ACCESSIBILITY is not set
1973# CONFIG_INFINIBAND is not set 2021# CONFIG_INFINIBAND is not set
1974CONFIG_EDAC=y 2022CONFIG_EDAC=y
@@ -2037,6 +2085,7 @@ CONFIG_DMADEVICES=y
2037# DMA Devices 2085# DMA Devices
2038# 2086#
2039# CONFIG_INTEL_IOATDMA is not set 2087# CONFIG_INTEL_IOATDMA is not set
2088# CONFIG_AUXDISPLAY is not set
2040# CONFIG_UIO is not set 2089# CONFIG_UIO is not set
2041# CONFIG_STAGING is not set 2090# CONFIG_STAGING is not set
2042CONFIG_X86_PLATFORM_DEVICES=y 2091CONFIG_X86_PLATFORM_DEVICES=y
@@ -2071,6 +2120,7 @@ CONFIG_DMIID=y
2071# 2120#
2072# CONFIG_EXT2_FS is not set 2121# CONFIG_EXT2_FS is not set
2073CONFIG_EXT3_FS=y 2122CONFIG_EXT3_FS=y
2123# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
2074CONFIG_EXT3_FS_XATTR=y 2124CONFIG_EXT3_FS_XATTR=y
2075CONFIG_EXT3_FS_POSIX_ACL=y 2125CONFIG_EXT3_FS_POSIX_ACL=y
2076CONFIG_EXT3_FS_SECURITY=y 2126CONFIG_EXT3_FS_SECURITY=y
@@ -2101,6 +2151,11 @@ CONFIG_AUTOFS4_FS=y
2101CONFIG_GENERIC_ACL=y 2151CONFIG_GENERIC_ACL=y
2102 2152
2103# 2153#
2154# Caches
2155#
2156# CONFIG_FSCACHE is not set
2157
2158#
2104# CD-ROM/DVD Filesystems 2159# CD-ROM/DVD Filesystems
2105# 2160#
2106CONFIG_ISO9660_FS=y 2161CONFIG_ISO9660_FS=y
@@ -2151,6 +2206,7 @@ CONFIG_MISC_FILESYSTEMS=y
2151# CONFIG_ROMFS_FS is not set 2206# CONFIG_ROMFS_FS is not set
2152# CONFIG_SYSV_FS is not set 2207# CONFIG_SYSV_FS is not set
2153# CONFIG_UFS_FS is not set 2208# CONFIG_UFS_FS is not set
2209# CONFIG_NILFS2_FS is not set
2154CONFIG_NETWORK_FILESYSTEMS=y 2210CONFIG_NETWORK_FILESYSTEMS=y
2155CONFIG_NFS_FS=y 2211CONFIG_NFS_FS=y
2156CONFIG_NFS_V3=y 2212CONFIG_NFS_V3=y
@@ -2164,7 +2220,6 @@ CONFIG_NFS_ACL_SUPPORT=y
2164CONFIG_NFS_COMMON=y 2220CONFIG_NFS_COMMON=y
2165CONFIG_SUNRPC=y 2221CONFIG_SUNRPC=y
2166CONFIG_SUNRPC_GSS=y 2222CONFIG_SUNRPC_GSS=y
2167# CONFIG_SUNRPC_REGISTER_V4 is not set
2168CONFIG_RPCSEC_GSS_KRB5=y 2223CONFIG_RPCSEC_GSS_KRB5=y
2169# CONFIG_RPCSEC_GSS_SPKM3 is not set 2224# CONFIG_RPCSEC_GSS_SPKM3 is not set
2170# CONFIG_SMB_FS is not set 2225# CONFIG_SMB_FS is not set
@@ -2251,6 +2306,7 @@ CONFIG_DEBUG_FS=y
2251CONFIG_DEBUG_KERNEL=y 2306CONFIG_DEBUG_KERNEL=y
2252# CONFIG_DEBUG_SHIRQ is not set 2307# CONFIG_DEBUG_SHIRQ is not set
2253# CONFIG_DETECT_SOFTLOCKUP is not set 2308# CONFIG_DETECT_SOFTLOCKUP is not set
2309# CONFIG_DETECT_HUNG_TASK is not set
2254# CONFIG_SCHED_DEBUG is not set 2310# CONFIG_SCHED_DEBUG is not set
2255CONFIG_SCHEDSTATS=y 2311CONFIG_SCHEDSTATS=y
2256CONFIG_TIMER_STATS=y 2312CONFIG_TIMER_STATS=y
@@ -2266,6 +2322,7 @@ CONFIG_TIMER_STATS=y
2266# CONFIG_LOCK_STAT is not set 2322# CONFIG_LOCK_STAT is not set
2267# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 2323# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
2268# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set 2324# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
2325CONFIG_STACKTRACE=y
2269# CONFIG_DEBUG_KOBJECT is not set 2326# CONFIG_DEBUG_KOBJECT is not set
2270# CONFIG_DEBUG_HIGHMEM is not set 2327# CONFIG_DEBUG_HIGHMEM is not set
2271CONFIG_DEBUG_BUGVERBOSE=y 2328CONFIG_DEBUG_BUGVERBOSE=y
@@ -2289,13 +2346,19 @@ CONFIG_FRAME_POINTER=y
2289# CONFIG_FAULT_INJECTION is not set 2346# CONFIG_FAULT_INJECTION is not set
2290# CONFIG_LATENCYTOP is not set 2347# CONFIG_LATENCYTOP is not set
2291CONFIG_SYSCTL_SYSCALL_CHECK=y 2348CONFIG_SYSCTL_SYSCALL_CHECK=y
2349# CONFIG_DEBUG_PAGEALLOC is not set
2292CONFIG_USER_STACKTRACE_SUPPORT=y 2350CONFIG_USER_STACKTRACE_SUPPORT=y
2351CONFIG_NOP_TRACER=y
2293CONFIG_HAVE_FUNCTION_TRACER=y 2352CONFIG_HAVE_FUNCTION_TRACER=y
2294CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y 2353CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
2295CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y 2354CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
2296CONFIG_HAVE_DYNAMIC_FTRACE=y 2355CONFIG_HAVE_DYNAMIC_FTRACE=y
2297CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y 2356CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
2298CONFIG_HAVE_HW_BRANCH_TRACER=y 2357CONFIG_HAVE_HW_BRANCH_TRACER=y
2358CONFIG_HAVE_FTRACE_SYSCALLS=y
2359CONFIG_RING_BUFFER=y
2360CONFIG_TRACING=y
2361CONFIG_TRACING_SUPPORT=y
2299 2362
2300# 2363#
2301# Tracers 2364# Tracers
@@ -2305,13 +2368,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y
2305# CONFIG_SYSPROF_TRACER is not set 2368# CONFIG_SYSPROF_TRACER is not set
2306# CONFIG_SCHED_TRACER is not set 2369# CONFIG_SCHED_TRACER is not set
2307# CONFIG_CONTEXT_SWITCH_TRACER is not set 2370# CONFIG_CONTEXT_SWITCH_TRACER is not set
2371# CONFIG_EVENT_TRACER is not set
2372# CONFIG_FTRACE_SYSCALLS is not set
2308# CONFIG_BOOT_TRACER is not set 2373# CONFIG_BOOT_TRACER is not set
2309# CONFIG_TRACE_BRANCH_PROFILING is not set 2374# CONFIG_TRACE_BRANCH_PROFILING is not set
2310# CONFIG_POWER_TRACER is not set 2375# CONFIG_POWER_TRACER is not set
2311# CONFIG_STACK_TRACER is not set 2376# CONFIG_STACK_TRACER is not set
2312# CONFIG_HW_BRANCH_TRACER is not set 2377# CONFIG_HW_BRANCH_TRACER is not set
2378# CONFIG_KMEMTRACE is not set
2379# CONFIG_WORKQUEUE_TRACER is not set
2380CONFIG_BLK_DEV_IO_TRACE=y
2381# CONFIG_FTRACE_STARTUP_TEST is not set
2382# CONFIG_MMIOTRACE is not set
2313CONFIG_PROVIDE_OHCI1394_DMA_INIT=y 2383CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
2314# CONFIG_DYNAMIC_PRINTK_DEBUG is not set 2384# CONFIG_DYNAMIC_DEBUG is not set
2385# CONFIG_DMA_API_DEBUG is not set
2315# CONFIG_SAMPLES is not set 2386# CONFIG_SAMPLES is not set
2316CONFIG_HAVE_ARCH_KGDB=y 2387CONFIG_HAVE_ARCH_KGDB=y
2317# CONFIG_KGDB is not set 2388# CONFIG_KGDB is not set
@@ -2321,7 +2392,6 @@ CONFIG_EARLY_PRINTK=y
2321CONFIG_EARLY_PRINTK_DBGP=y 2392CONFIG_EARLY_PRINTK_DBGP=y
2322CONFIG_DEBUG_STACKOVERFLOW=y 2393CONFIG_DEBUG_STACKOVERFLOW=y
2323CONFIG_DEBUG_STACK_USAGE=y 2394CONFIG_DEBUG_STACK_USAGE=y
2324# CONFIG_DEBUG_PAGEALLOC is not set
2325# CONFIG_DEBUG_PER_CPU_MAPS is not set 2395# CONFIG_DEBUG_PER_CPU_MAPS is not set
2326# CONFIG_X86_PTDUMP is not set 2396# CONFIG_X86_PTDUMP is not set
2327CONFIG_DEBUG_RODATA=y 2397CONFIG_DEBUG_RODATA=y
@@ -2329,7 +2399,7 @@ CONFIG_DEBUG_RODATA=y
2329CONFIG_DEBUG_NX_TEST=m 2399CONFIG_DEBUG_NX_TEST=m
2330# CONFIG_4KSTACKS is not set 2400# CONFIG_4KSTACKS is not set
2331CONFIG_DOUBLEFAULT=y 2401CONFIG_DOUBLEFAULT=y
2332# CONFIG_MMIOTRACE is not set 2402CONFIG_HAVE_MMIOTRACE_SUPPORT=y
2333CONFIG_IO_DELAY_TYPE_0X80=0 2403CONFIG_IO_DELAY_TYPE_0X80=0
2334CONFIG_IO_DELAY_TYPE_0XED=1 2404CONFIG_IO_DELAY_TYPE_0XED=1
2335CONFIG_IO_DELAY_TYPE_UDELAY=2 2405CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2365,6 +2435,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y
2365CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 2435CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
2366# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set 2436# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
2367# CONFIG_SECURITY_SMACK is not set 2437# CONFIG_SECURITY_SMACK is not set
2438# CONFIG_SECURITY_TOMOYO is not set
2439# CONFIG_IMA is not set
2368CONFIG_CRYPTO=y 2440CONFIG_CRYPTO=y
2369 2441
2370# 2442#
@@ -2380,10 +2452,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y
2380CONFIG_CRYPTO_HASH=y 2452CONFIG_CRYPTO_HASH=y
2381CONFIG_CRYPTO_HASH2=y 2453CONFIG_CRYPTO_HASH2=y
2382CONFIG_CRYPTO_RNG2=y 2454CONFIG_CRYPTO_RNG2=y
2455CONFIG_CRYPTO_PCOMP=y
2383CONFIG_CRYPTO_MANAGER=y 2456CONFIG_CRYPTO_MANAGER=y
2384CONFIG_CRYPTO_MANAGER2=y 2457CONFIG_CRYPTO_MANAGER2=y
2385# CONFIG_CRYPTO_GF128MUL is not set 2458# CONFIG_CRYPTO_GF128MUL is not set
2386# CONFIG_CRYPTO_NULL is not set 2459# CONFIG_CRYPTO_NULL is not set
2460CONFIG_CRYPTO_WORKQUEUE=y
2387# CONFIG_CRYPTO_CRYPTD is not set 2461# CONFIG_CRYPTO_CRYPTD is not set
2388CONFIG_CRYPTO_AUTHENC=y 2462CONFIG_CRYPTO_AUTHENC=y
2389# CONFIG_CRYPTO_TEST is not set 2463# CONFIG_CRYPTO_TEST is not set
@@ -2456,6 +2530,7 @@ CONFIG_CRYPTO_DES=y
2456# Compression 2530# Compression
2457# 2531#
2458# CONFIG_CRYPTO_DEFLATE is not set 2532# CONFIG_CRYPTO_DEFLATE is not set
2533# CONFIG_CRYPTO_ZLIB is not set
2459# CONFIG_CRYPTO_LZO is not set 2534# CONFIG_CRYPTO_LZO is not set
2460 2535
2461# 2536#
@@ -2467,11 +2542,13 @@ CONFIG_CRYPTO_HW=y
2467# CONFIG_CRYPTO_DEV_GEODE is not set 2542# CONFIG_CRYPTO_DEV_GEODE is not set
2468# CONFIG_CRYPTO_DEV_HIFN_795X is not set 2543# CONFIG_CRYPTO_DEV_HIFN_795X is not set
2469CONFIG_HAVE_KVM=y 2544CONFIG_HAVE_KVM=y
2545CONFIG_HAVE_KVM_IRQCHIP=y
2470CONFIG_VIRTUALIZATION=y 2546CONFIG_VIRTUALIZATION=y
2471# CONFIG_KVM is not set 2547# CONFIG_KVM is not set
2472# CONFIG_LGUEST is not set 2548# CONFIG_LGUEST is not set
2473# CONFIG_VIRTIO_PCI is not set 2549# CONFIG_VIRTIO_PCI is not set
2474# CONFIG_VIRTIO_BALLOON is not set 2550# CONFIG_VIRTIO_BALLOON is not set
2551CONFIG_BINARY_PRINTF=y
2475 2552
2476# 2553#
2477# Library routines 2554# Library routines
@@ -2489,7 +2566,10 @@ CONFIG_CRC32=y
2489# CONFIG_LIBCRC32C is not set 2566# CONFIG_LIBCRC32C is not set
2490CONFIG_AUDIT_GENERIC=y 2567CONFIG_AUDIT_GENERIC=y
2491CONFIG_ZLIB_INFLATE=y 2568CONFIG_ZLIB_INFLATE=y
2492CONFIG_PLIST=y 2569CONFIG_DECOMPRESS_GZIP=y
2570CONFIG_DECOMPRESS_BZIP2=y
2571CONFIG_DECOMPRESS_LZMA=y
2493CONFIG_HAS_IOMEM=y 2572CONFIG_HAS_IOMEM=y
2494CONFIG_HAS_IOPORT=y 2573CONFIG_HAS_IOPORT=y
2495CONFIG_HAS_DMA=y 2574CONFIG_HAS_DMA=y
2575CONFIG_NLATTR=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9fe5d212ab4c..cee1dd2e69b2 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,12 +1,13 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.29-rc4 3# Linux kernel version: 2.6.30-rc2
4# Tue Feb 24 15:44:16 2009 4# Mon May 11 16:22:00 2009
5# 5#
6CONFIG_64BIT=y 6CONFIG_64BIT=y
7# CONFIG_X86_32 is not set 7# CONFIG_X86_32 is not set
8CONFIG_X86_64=y 8CONFIG_X86_64=y
9CONFIG_X86=y 9CONFIG_X86=y
10CONFIG_OUTPUT_FORMAT="elf64-x86-64"
10CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" 11CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
11CONFIG_GENERIC_TIME=y 12CONFIG_GENERIC_TIME=y
12CONFIG_GENERIC_CMOS_UPDATE=y 13CONFIG_GENERIC_CMOS_UPDATE=y
@@ -34,6 +35,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y
34CONFIG_ARCH_HAS_DEFAULT_IDLE=y 35CONFIG_ARCH_HAS_DEFAULT_IDLE=y
35CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y 36CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
36CONFIG_HAVE_SETUP_PER_CPU_AREA=y 37CONFIG_HAVE_SETUP_PER_CPU_AREA=y
38CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
37CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y 39CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
38CONFIG_ARCH_HIBERNATION_POSSIBLE=y 40CONFIG_ARCH_HIBERNATION_POSSIBLE=y
39CONFIG_ARCH_SUSPEND_POSSIBLE=y 41CONFIG_ARCH_SUSPEND_POSSIBLE=y
@@ -41,14 +43,14 @@ CONFIG_ZONE_DMA32=y
41CONFIG_ARCH_POPULATES_NODE_MAP=y 43CONFIG_ARCH_POPULATES_NODE_MAP=y
42CONFIG_AUDIT_ARCH=y 44CONFIG_AUDIT_ARCH=y
43CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y 45CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
46CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
44CONFIG_GENERIC_HARDIRQS=y 47CONFIG_GENERIC_HARDIRQS=y
48CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
45CONFIG_GENERIC_IRQ_PROBE=y 49CONFIG_GENERIC_IRQ_PROBE=y
46CONFIG_GENERIC_PENDING_IRQ=y 50CONFIG_GENERIC_PENDING_IRQ=y
47CONFIG_X86_SMP=y
48CONFIG_USE_GENERIC_SMP_HELPERS=y 51CONFIG_USE_GENERIC_SMP_HELPERS=y
49CONFIG_X86_64_SMP=y 52CONFIG_X86_64_SMP=y
50CONFIG_X86_HT=y 53CONFIG_X86_HT=y
51CONFIG_X86_BIOS_REBOOT=y
52CONFIG_X86_TRAMPOLINE=y 54CONFIG_X86_TRAMPOLINE=y
53# CONFIG_KTIME_SCALAR is not set 55# CONFIG_KTIME_SCALAR is not set
54CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" 56CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
@@ -61,10 +63,17 @@ CONFIG_LOCK_KERNEL=y
61CONFIG_INIT_ENV_ARG_LIMIT=32 63CONFIG_INIT_ENV_ARG_LIMIT=32
62CONFIG_LOCALVERSION="" 64CONFIG_LOCALVERSION=""
63# CONFIG_LOCALVERSION_AUTO is not set 65# CONFIG_LOCALVERSION_AUTO is not set
66CONFIG_HAVE_KERNEL_GZIP=y
67CONFIG_HAVE_KERNEL_BZIP2=y
68CONFIG_HAVE_KERNEL_LZMA=y
69CONFIG_KERNEL_GZIP=y
70# CONFIG_KERNEL_BZIP2 is not set
71# CONFIG_KERNEL_LZMA is not set
64CONFIG_SWAP=y 72CONFIG_SWAP=y
65CONFIG_SYSVIPC=y 73CONFIG_SYSVIPC=y
66CONFIG_SYSVIPC_SYSCTL=y 74CONFIG_SYSVIPC_SYSCTL=y
67CONFIG_POSIX_MQUEUE=y 75CONFIG_POSIX_MQUEUE=y
76CONFIG_POSIX_MQUEUE_SYSCTL=y
68CONFIG_BSD_PROCESS_ACCT=y 77CONFIG_BSD_PROCESS_ACCT=y
69# CONFIG_BSD_PROCESS_ACCT_V3 is not set 78# CONFIG_BSD_PROCESS_ACCT_V3 is not set
70CONFIG_TASKSTATS=y 79CONFIG_TASKSTATS=y
@@ -114,23 +123,26 @@ CONFIG_PID_NS=y
114CONFIG_NET_NS=y 123CONFIG_NET_NS=y
115CONFIG_BLK_DEV_INITRD=y 124CONFIG_BLK_DEV_INITRD=y
116CONFIG_INITRAMFS_SOURCE="" 125CONFIG_INITRAMFS_SOURCE=""
126CONFIG_RD_GZIP=y
127CONFIG_RD_BZIP2=y
128CONFIG_RD_LZMA=y
117CONFIG_CC_OPTIMIZE_FOR_SIZE=y 129CONFIG_CC_OPTIMIZE_FOR_SIZE=y
118CONFIG_SYSCTL=y 130CONFIG_SYSCTL=y
131CONFIG_ANON_INODES=y
119# CONFIG_EMBEDDED is not set 132# CONFIG_EMBEDDED is not set
120CONFIG_UID16=y 133CONFIG_UID16=y
121CONFIG_SYSCTL_SYSCALL=y 134CONFIG_SYSCTL_SYSCALL=y
122CONFIG_KALLSYMS=y 135CONFIG_KALLSYMS=y
123CONFIG_KALLSYMS_ALL=y 136CONFIG_KALLSYMS_ALL=y
124CONFIG_KALLSYMS_EXTRA_PASS=y 137CONFIG_KALLSYMS_EXTRA_PASS=y
138# CONFIG_STRIP_ASM_SYMS is not set
125CONFIG_HOTPLUG=y 139CONFIG_HOTPLUG=y
126CONFIG_PRINTK=y 140CONFIG_PRINTK=y
127CONFIG_BUG=y 141CONFIG_BUG=y
128CONFIG_ELF_CORE=y 142CONFIG_ELF_CORE=y
129CONFIG_PCSPKR_PLATFORM=y 143CONFIG_PCSPKR_PLATFORM=y
130# CONFIG_COMPAT_BRK is not set
131CONFIG_BASE_FULL=y 144CONFIG_BASE_FULL=y
132CONFIG_FUTEX=y 145CONFIG_FUTEX=y
133CONFIG_ANON_INODES=y
134CONFIG_EPOLL=y 146CONFIG_EPOLL=y
135CONFIG_SIGNALFD=y 147CONFIG_SIGNALFD=y
136CONFIG_TIMERFD=y 148CONFIG_TIMERFD=y
@@ -140,6 +152,7 @@ CONFIG_AIO=y
140CONFIG_VM_EVENT_COUNTERS=y 152CONFIG_VM_EVENT_COUNTERS=y
141CONFIG_PCI_QUIRKS=y 153CONFIG_PCI_QUIRKS=y
142CONFIG_SLUB_DEBUG=y 154CONFIG_SLUB_DEBUG=y
155# CONFIG_COMPAT_BRK is not set
143# CONFIG_SLAB is not set 156# CONFIG_SLAB is not set
144CONFIG_SLUB=y 157CONFIG_SLUB=y
145# CONFIG_SLOB is not set 158# CONFIG_SLOB is not set
@@ -155,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y
155CONFIG_HAVE_KPROBES=y 168CONFIG_HAVE_KPROBES=y
156CONFIG_HAVE_KRETPROBES=y 169CONFIG_HAVE_KRETPROBES=y
157CONFIG_HAVE_ARCH_TRACEHOOK=y 170CONFIG_HAVE_ARCH_TRACEHOOK=y
171CONFIG_HAVE_DMA_API_DEBUG=y
172# CONFIG_SLOW_WORK is not set
158# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set 173# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
159CONFIG_SLABINFO=y 174CONFIG_SLABINFO=y
160CONFIG_RT_MUTEXES=y 175CONFIG_RT_MUTEXES=y
@@ -167,7 +182,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
167# CONFIG_MODULE_SRCVERSION_ALL is not set 182# CONFIG_MODULE_SRCVERSION_ALL is not set
168CONFIG_STOP_MACHINE=y 183CONFIG_STOP_MACHINE=y
169CONFIG_BLOCK=y 184CONFIG_BLOCK=y
170CONFIG_BLK_DEV_IO_TRACE=y
171CONFIG_BLK_DEV_BSG=y 185CONFIG_BLK_DEV_BSG=y
172# CONFIG_BLK_DEV_INTEGRITY is not set 186# CONFIG_BLK_DEV_INTEGRITY is not set
173CONFIG_BLOCK_COMPAT=y 187CONFIG_BLOCK_COMPAT=y
@@ -195,12 +209,10 @@ CONFIG_HIGH_RES_TIMERS=y
195CONFIG_GENERIC_CLOCKEVENTS_BUILD=y 209CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
196CONFIG_SMP=y 210CONFIG_SMP=y
197CONFIG_SPARSE_IRQ=y 211CONFIG_SPARSE_IRQ=y
198# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
199CONFIG_X86_FIND_SMP_CONFIG=y
200CONFIG_X86_MPPARSE=y 212CONFIG_X86_MPPARSE=y
201# CONFIG_X86_ELAN is not set 213CONFIG_X86_EXTENDED_PLATFORM=y
202# CONFIG_X86_GENERICARCH is not set
203# CONFIG_X86_VSMP is not set 214# CONFIG_X86_VSMP is not set
215# CONFIG_X86_UV is not set
204CONFIG_SCHED_OMIT_FRAME_POINTER=y 216CONFIG_SCHED_OMIT_FRAME_POINTER=y
205# CONFIG_PARAVIRT_GUEST is not set 217# CONFIG_PARAVIRT_GUEST is not set
206# CONFIG_MEMTEST is not set 218# CONFIG_MEMTEST is not set
@@ -230,10 +242,10 @@ CONFIG_SCHED_OMIT_FRAME_POINTER=y
230# CONFIG_MCORE2 is not set 242# CONFIG_MCORE2 is not set
231CONFIG_GENERIC_CPU=y 243CONFIG_GENERIC_CPU=y
232CONFIG_X86_CPU=y 244CONFIG_X86_CPU=y
233CONFIG_X86_L1_CACHE_BYTES=128 245CONFIG_X86_L1_CACHE_BYTES=64
234CONFIG_X86_INTERNODE_CACHE_BYTES=128 246CONFIG_X86_INTERNODE_CACHE_BYTES=64
235CONFIG_X86_CMPXCHG=y 247CONFIG_X86_CMPXCHG=y
236CONFIG_X86_L1_CACHE_SHIFT=7 248CONFIG_X86_L1_CACHE_SHIFT=6
237CONFIG_X86_WP_WORKS_OK=y 249CONFIG_X86_WP_WORKS_OK=y
238CONFIG_X86_TSC=y 250CONFIG_X86_TSC=y
239CONFIG_X86_CMPXCHG64=y 251CONFIG_X86_CMPXCHG64=y
@@ -242,7 +254,7 @@ CONFIG_X86_MINIMUM_CPU_FAMILY=64
242CONFIG_X86_DEBUGCTLMSR=y 254CONFIG_X86_DEBUGCTLMSR=y
243CONFIG_CPU_SUP_INTEL=y 255CONFIG_CPU_SUP_INTEL=y
244CONFIG_CPU_SUP_AMD=y 256CONFIG_CPU_SUP_AMD=y
245CONFIG_CPU_SUP_CENTAUR_64=y 257CONFIG_CPU_SUP_CENTAUR=y
246CONFIG_X86_DS=y 258CONFIG_X86_DS=y
247CONFIG_X86_PTRACE_BTS=y 259CONFIG_X86_PTRACE_BTS=y
248CONFIG_HPET_TIMER=y 260CONFIG_HPET_TIMER=y
@@ -269,6 +281,7 @@ CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
269CONFIG_X86_MCE=y 281CONFIG_X86_MCE=y
270CONFIG_X86_MCE_INTEL=y 282CONFIG_X86_MCE_INTEL=y
271CONFIG_X86_MCE_AMD=y 283CONFIG_X86_MCE_AMD=y
284CONFIG_X86_MCE_THRESHOLD=y
272# CONFIG_I8K is not set 285# CONFIG_I8K is not set
273CONFIG_MICROCODE=y 286CONFIG_MICROCODE=y
274CONFIG_MICROCODE_INTEL=y 287CONFIG_MICROCODE_INTEL=y
@@ -276,6 +289,7 @@ CONFIG_MICROCODE_AMD=y
276CONFIG_MICROCODE_OLD_INTERFACE=y 289CONFIG_MICROCODE_OLD_INTERFACE=y
277CONFIG_X86_MSR=y 290CONFIG_X86_MSR=y
278CONFIG_X86_CPUID=y 291CONFIG_X86_CPUID=y
292# CONFIG_X86_CPU_DEBUG is not set
279CONFIG_ARCH_PHYS_ADDR_T_64BIT=y 293CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
280CONFIG_DIRECT_GBPAGES=y 294CONFIG_DIRECT_GBPAGES=y
281CONFIG_NUMA=y 295CONFIG_NUMA=y
@@ -309,6 +323,8 @@ CONFIG_ZONE_DMA_FLAG=1
309CONFIG_BOUNCE=y 323CONFIG_BOUNCE=y
310CONFIG_VIRT_TO_BUS=y 324CONFIG_VIRT_TO_BUS=y
311CONFIG_UNEVICTABLE_LRU=y 325CONFIG_UNEVICTABLE_LRU=y
326CONFIG_HAVE_MLOCK=y
327CONFIG_HAVE_MLOCKED_PAGE_BIT=y
312CONFIG_X86_CHECK_BIOS_CORRUPTION=y 328CONFIG_X86_CHECK_BIOS_CORRUPTION=y
313CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y 329CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
314CONFIG_X86_RESERVE_LOW_64K=y 330CONFIG_X86_RESERVE_LOW_64K=y
@@ -317,6 +333,7 @@ CONFIG_MTRR=y
317CONFIG_X86_PAT=y 333CONFIG_X86_PAT=y
318CONFIG_EFI=y 334CONFIG_EFI=y
319CONFIG_SECCOMP=y 335CONFIG_SECCOMP=y
336# CONFIG_CC_STACKPROTECTOR is not set
320# CONFIG_HZ_100 is not set 337# CONFIG_HZ_100 is not set
321# CONFIG_HZ_250 is not set 338# CONFIG_HZ_250 is not set
322# CONFIG_HZ_300 is not set 339# CONFIG_HZ_300 is not set
@@ -325,9 +342,10 @@ CONFIG_HZ=1000
325CONFIG_SCHED_HRTICK=y 342CONFIG_SCHED_HRTICK=y
326CONFIG_KEXEC=y 343CONFIG_KEXEC=y
327CONFIG_CRASH_DUMP=y 344CONFIG_CRASH_DUMP=y
345# CONFIG_KEXEC_JUMP is not set
328CONFIG_PHYSICAL_START=0x1000000 346CONFIG_PHYSICAL_START=0x1000000
329# CONFIG_RELOCATABLE is not set 347CONFIG_RELOCATABLE=y
330CONFIG_PHYSICAL_ALIGN=0x200000 348CONFIG_PHYSICAL_ALIGN=0x1000000
331CONFIG_HOTPLUG_CPU=y 349CONFIG_HOTPLUG_CPU=y
332# CONFIG_COMPAT_VDSO is not set 350# CONFIG_COMPAT_VDSO is not set
333# CONFIG_CMDLINE_BOOL is not set 351# CONFIG_CMDLINE_BOOL is not set
@@ -370,7 +388,6 @@ CONFIG_ACPI_NUMA=y
370CONFIG_ACPI_BLACKLIST_YEAR=0 388CONFIG_ACPI_BLACKLIST_YEAR=0
371# CONFIG_ACPI_DEBUG is not set 389# CONFIG_ACPI_DEBUG is not set
372# CONFIG_ACPI_PCI_SLOT is not set 390# CONFIG_ACPI_PCI_SLOT is not set
373CONFIG_ACPI_SYSTEM=y
374CONFIG_X86_PM_TIMER=y 391CONFIG_X86_PM_TIMER=y
375CONFIG_ACPI_CONTAINER=y 392CONFIG_ACPI_CONTAINER=y
376# CONFIG_ACPI_SBS is not set 393# CONFIG_ACPI_SBS is not set
@@ -436,6 +453,7 @@ CONFIG_PCI_MSI=y
436# CONFIG_PCI_DEBUG is not set 453# CONFIG_PCI_DEBUG is not set
437# CONFIG_PCI_STUB is not set 454# CONFIG_PCI_STUB is not set
438CONFIG_HT_IRQ=y 455CONFIG_HT_IRQ=y
456# CONFIG_PCI_IOV is not set
439CONFIG_ISA_DMA_API=y 457CONFIG_ISA_DMA_API=y
440CONFIG_K8_NB=y 458CONFIG_K8_NB=y
441CONFIG_PCCARD=y 459CONFIG_PCCARD=y
@@ -481,7 +499,6 @@ CONFIG_NET=y
481# 499#
482# Networking options 500# Networking options
483# 501#
484CONFIG_COMPAT_NET_DEV_OPS=y
485CONFIG_PACKET=y 502CONFIG_PACKET=y
486CONFIG_PACKET_MMAP=y 503CONFIG_PACKET_MMAP=y
487CONFIG_UNIX=y 504CONFIG_UNIX=y
@@ -639,6 +656,7 @@ CONFIG_LLC=y
639# CONFIG_LAPB is not set 656# CONFIG_LAPB is not set
640# CONFIG_ECONET is not set 657# CONFIG_ECONET is not set
641# CONFIG_WAN_ROUTER is not set 658# CONFIG_WAN_ROUTER is not set
659# CONFIG_PHONET is not set
642CONFIG_NET_SCHED=y 660CONFIG_NET_SCHED=y
643 661
644# 662#
@@ -696,6 +714,7 @@ CONFIG_NET_SCH_FIFO=y
696# 714#
697# CONFIG_NET_PKTGEN is not set 715# CONFIG_NET_PKTGEN is not set
698# CONFIG_NET_TCPPROBE is not set 716# CONFIG_NET_TCPPROBE is not set
717# CONFIG_NET_DROP_MONITOR is not set
699CONFIG_HAMRADIO=y 718CONFIG_HAMRADIO=y
700 719
701# 720#
@@ -706,12 +725,10 @@ CONFIG_HAMRADIO=y
706# CONFIG_IRDA is not set 725# CONFIG_IRDA is not set
707# CONFIG_BT is not set 726# CONFIG_BT is not set
708# CONFIG_AF_RXRPC is not set 727# CONFIG_AF_RXRPC is not set
709# CONFIG_PHONET is not set
710CONFIG_FIB_RULES=y 728CONFIG_FIB_RULES=y
711CONFIG_WIRELESS=y 729CONFIG_WIRELESS=y
712CONFIG_CFG80211=y 730CONFIG_CFG80211=y
713# CONFIG_CFG80211_REG_DEBUG is not set 731# CONFIG_CFG80211_REG_DEBUG is not set
714CONFIG_NL80211=y
715CONFIG_WIRELESS_OLD_REGULATORY=y 732CONFIG_WIRELESS_OLD_REGULATORY=y
716CONFIG_WIRELESS_EXT=y 733CONFIG_WIRELESS_EXT=y
717CONFIG_WIRELESS_EXT_SYSFS=y 734CONFIG_WIRELESS_EXT_SYSFS=y
@@ -788,9 +805,8 @@ CONFIG_MISC_DEVICES=y
788# CONFIG_TIFM_CORE is not set 805# CONFIG_TIFM_CORE is not set
789# CONFIG_ICS932S401 is not set 806# CONFIG_ICS932S401 is not set
790# CONFIG_ENCLOSURE_SERVICES is not set 807# CONFIG_ENCLOSURE_SERVICES is not set
791# CONFIG_SGI_XP is not set
792# CONFIG_HP_ILO is not set 808# CONFIG_HP_ILO is not set
793# CONFIG_SGI_GRU is not set 809# CONFIG_ISL29003 is not set
794# CONFIG_C2PORT is not set 810# CONFIG_C2PORT is not set
795 811
796# 812#
@@ -844,6 +860,7 @@ CONFIG_SCSI_SPI_ATTRS=y
844# CONFIG_SCSI_LOWLEVEL is not set 860# CONFIG_SCSI_LOWLEVEL is not set
845# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set 861# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
846# CONFIG_SCSI_DH is not set 862# CONFIG_SCSI_DH is not set
863# CONFIG_SCSI_OSD_INITIATOR is not set
847CONFIG_ATA=y 864CONFIG_ATA=y
848# CONFIG_ATA_NONSTANDARD is not set 865# CONFIG_ATA_NONSTANDARD is not set
849CONFIG_ATA_ACPI=y 866CONFIG_ATA_ACPI=y
@@ -940,6 +957,7 @@ CONFIG_DM_ZERO=y
940CONFIG_MACINTOSH_DRIVERS=y 957CONFIG_MACINTOSH_DRIVERS=y
941CONFIG_MAC_EMUMOUSEBTN=y 958CONFIG_MAC_EMUMOUSEBTN=y
942CONFIG_NETDEVICES=y 959CONFIG_NETDEVICES=y
960CONFIG_COMPAT_NET_DEV_OPS=y
943# CONFIG_IFB is not set 961# CONFIG_IFB is not set
944# CONFIG_DUMMY is not set 962# CONFIG_DUMMY is not set
945# CONFIG_BONDING is not set 963# CONFIG_BONDING is not set
@@ -977,6 +995,8 @@ CONFIG_MII=y
977CONFIG_NET_VENDOR_3COM=y 995CONFIG_NET_VENDOR_3COM=y
978# CONFIG_VORTEX is not set 996# CONFIG_VORTEX is not set
979# CONFIG_TYPHOON is not set 997# CONFIG_TYPHOON is not set
998# CONFIG_ETHOC is not set
999# CONFIG_DNET is not set
980CONFIG_NET_TULIP=y 1000CONFIG_NET_TULIP=y
981# CONFIG_DE2104X is not set 1001# CONFIG_DE2104X is not set
982# CONFIG_TULIP is not set 1002# CONFIG_TULIP is not set
@@ -1026,6 +1046,7 @@ CONFIG_E1000=y
1026# CONFIG_E1000E is not set 1046# CONFIG_E1000E is not set
1027# CONFIG_IP1000 is not set 1047# CONFIG_IP1000 is not set
1028# CONFIG_IGB is not set 1048# CONFIG_IGB is not set
1049# CONFIG_IGBVF is not set
1029# CONFIG_NS83820 is not set 1050# CONFIG_NS83820 is not set
1030# CONFIG_HAMACHI is not set 1051# CONFIG_HAMACHI is not set
1031# CONFIG_YELLOWFIN is not set 1052# CONFIG_YELLOWFIN is not set
@@ -1040,6 +1061,7 @@ CONFIG_TIGON3=y
1040# CONFIG_QLA3XXX is not set 1061# CONFIG_QLA3XXX is not set
1041# CONFIG_ATL1 is not set 1062# CONFIG_ATL1 is not set
1042# CONFIG_ATL1E is not set 1063# CONFIG_ATL1E is not set
1064# CONFIG_ATL1C is not set
1043# CONFIG_JME is not set 1065# CONFIG_JME is not set
1044CONFIG_NETDEV_10000=y 1066CONFIG_NETDEV_10000=y
1045# CONFIG_CHELSIO_T1 is not set 1067# CONFIG_CHELSIO_T1 is not set
@@ -1049,6 +1071,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
1049# CONFIG_IXGBE is not set 1071# CONFIG_IXGBE is not set
1050# CONFIG_IXGB is not set 1072# CONFIG_IXGB is not set
1051# CONFIG_S2IO is not set 1073# CONFIG_S2IO is not set
1074# CONFIG_VXGE is not set
1052# CONFIG_MYRI10GE is not set 1075# CONFIG_MYRI10GE is not set
1053# CONFIG_NETXEN_NIC is not set 1076# CONFIG_NETXEN_NIC is not set
1054# CONFIG_NIU is not set 1077# CONFIG_NIU is not set
@@ -1058,6 +1081,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
1058# CONFIG_BNX2X is not set 1081# CONFIG_BNX2X is not set
1059# CONFIG_QLGE is not set 1082# CONFIG_QLGE is not set
1060# CONFIG_SFC is not set 1083# CONFIG_SFC is not set
1084# CONFIG_BE2NET is not set
1061CONFIG_TR=y 1085CONFIG_TR=y
1062# CONFIG_IBMOL is not set 1086# CONFIG_IBMOL is not set
1063# CONFIG_3C359 is not set 1087# CONFIG_3C359 is not set
@@ -1072,8 +1096,8 @@ CONFIG_WLAN_80211=y
1072# CONFIG_LIBERTAS is not set 1096# CONFIG_LIBERTAS is not set
1073# CONFIG_LIBERTAS_THINFIRM is not set 1097# CONFIG_LIBERTAS_THINFIRM is not set
1074# CONFIG_AIRO is not set 1098# CONFIG_AIRO is not set
1075# CONFIG_HERMES is not set
1076# CONFIG_ATMEL is not set 1099# CONFIG_ATMEL is not set
1100# CONFIG_AT76C50X_USB is not set
1077# CONFIG_AIRO_CS is not set 1101# CONFIG_AIRO_CS is not set
1078# CONFIG_PCMCIA_WL3501 is not set 1102# CONFIG_PCMCIA_WL3501 is not set
1079# CONFIG_PRISM54 is not set 1103# CONFIG_PRISM54 is not set
@@ -1083,21 +1107,21 @@ CONFIG_WLAN_80211=y
1083# CONFIG_RTL8187 is not set 1107# CONFIG_RTL8187 is not set
1084# CONFIG_ADM8211 is not set 1108# CONFIG_ADM8211 is not set
1085# CONFIG_MAC80211_HWSIM is not set 1109# CONFIG_MAC80211_HWSIM is not set
1110# CONFIG_MWL8K is not set
1086# CONFIG_P54_COMMON is not set 1111# CONFIG_P54_COMMON is not set
1087CONFIG_ATH5K=y 1112CONFIG_ATH5K=y
1088# CONFIG_ATH5K_DEBUG is not set 1113# CONFIG_ATH5K_DEBUG is not set
1089# CONFIG_ATH9K is not set 1114# CONFIG_ATH9K is not set
1115# CONFIG_AR9170_USB is not set
1090# CONFIG_IPW2100 is not set 1116# CONFIG_IPW2100 is not set
1091# CONFIG_IPW2200 is not set 1117# CONFIG_IPW2200 is not set
1092# CONFIG_IWLCORE is not set 1118# CONFIG_IWLWIFI is not set
1093# CONFIG_IWLWIFI_LEDS is not set
1094# CONFIG_IWLAGN is not set
1095# CONFIG_IWL3945 is not set
1096# CONFIG_HOSTAP is not set 1119# CONFIG_HOSTAP is not set
1097# CONFIG_B43 is not set 1120# CONFIG_B43 is not set
1098# CONFIG_B43LEGACY is not set 1121# CONFIG_B43LEGACY is not set
1099# CONFIG_ZD1211RW is not set 1122# CONFIG_ZD1211RW is not set
1100# CONFIG_RT2X00 is not set 1123# CONFIG_RT2X00 is not set
1124# CONFIG_HERMES is not set
1101 1125
1102# 1126#
1103# Enable WiMAX (Networking options) to see the WiMAX drivers 1127# Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -1208,6 +1232,8 @@ CONFIG_INPUT_TABLET=y
1208# CONFIG_TABLET_USB_KBTAB is not set 1232# CONFIG_TABLET_USB_KBTAB is not set
1209# CONFIG_TABLET_USB_WACOM is not set 1233# CONFIG_TABLET_USB_WACOM is not set
1210CONFIG_INPUT_TOUCHSCREEN=y 1234CONFIG_INPUT_TOUCHSCREEN=y
1235# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
1236# CONFIG_TOUCHSCREEN_AD7879 is not set
1211# CONFIG_TOUCHSCREEN_FUJITSU is not set 1237# CONFIG_TOUCHSCREEN_FUJITSU is not set
1212# CONFIG_TOUCHSCREEN_GUNZE is not set 1238# CONFIG_TOUCHSCREEN_GUNZE is not set
1213# CONFIG_TOUCHSCREEN_ELO is not set 1239# CONFIG_TOUCHSCREEN_ELO is not set
@@ -1301,6 +1327,7 @@ CONFIG_UNIX98_PTYS=y
1301# CONFIG_LEGACY_PTYS is not set 1327# CONFIG_LEGACY_PTYS is not set
1302# CONFIG_IPMI_HANDLER is not set 1328# CONFIG_IPMI_HANDLER is not set
1303CONFIG_HW_RANDOM=y 1329CONFIG_HW_RANDOM=y
1330# CONFIG_HW_RANDOM_TIMERIOMEM is not set
1304# CONFIG_HW_RANDOM_INTEL is not set 1331# CONFIG_HW_RANDOM_INTEL is not set
1305# CONFIG_HW_RANDOM_AMD is not set 1332# CONFIG_HW_RANDOM_AMD is not set
1306CONFIG_NVRAM=y 1333CONFIG_NVRAM=y
@@ -1382,7 +1409,6 @@ CONFIG_I2C_I801=y
1382# CONFIG_SENSORS_PCF8574 is not set 1409# CONFIG_SENSORS_PCF8574 is not set
1383# CONFIG_PCF8575 is not set 1410# CONFIG_PCF8575 is not set
1384# CONFIG_SENSORS_PCA9539 is not set 1411# CONFIG_SENSORS_PCA9539 is not set
1385# CONFIG_SENSORS_PCF8591 is not set
1386# CONFIG_SENSORS_MAX6875 is not set 1412# CONFIG_SENSORS_MAX6875 is not set
1387# CONFIG_SENSORS_TSL2550 is not set 1413# CONFIG_SENSORS_TSL2550 is not set
1388# CONFIG_I2C_DEBUG_CORE is not set 1414# CONFIG_I2C_DEBUG_CORE is not set
@@ -1416,6 +1442,7 @@ CONFIG_HWMON=y
1416# CONFIG_SENSORS_ADT7475 is not set 1442# CONFIG_SENSORS_ADT7475 is not set
1417# CONFIG_SENSORS_K8TEMP is not set 1443# CONFIG_SENSORS_K8TEMP is not set
1418# CONFIG_SENSORS_ASB100 is not set 1444# CONFIG_SENSORS_ASB100 is not set
1445# CONFIG_SENSORS_ATK0110 is not set
1419# CONFIG_SENSORS_ATXP1 is not set 1446# CONFIG_SENSORS_ATXP1 is not set
1420# CONFIG_SENSORS_DS1621 is not set 1447# CONFIG_SENSORS_DS1621 is not set
1421# CONFIG_SENSORS_I5K_AMB is not set 1448# CONFIG_SENSORS_I5K_AMB is not set
@@ -1425,6 +1452,7 @@ CONFIG_HWMON=y
1425# CONFIG_SENSORS_FSCHER is not set 1452# CONFIG_SENSORS_FSCHER is not set
1426# CONFIG_SENSORS_FSCPOS is not set 1453# CONFIG_SENSORS_FSCPOS is not set
1427# CONFIG_SENSORS_FSCHMD is not set 1454# CONFIG_SENSORS_FSCHMD is not set
1455# CONFIG_SENSORS_G760A is not set
1428# CONFIG_SENSORS_GL518SM is not set 1456# CONFIG_SENSORS_GL518SM is not set
1429# CONFIG_SENSORS_GL520SM is not set 1457# CONFIG_SENSORS_GL520SM is not set
1430# CONFIG_SENSORS_CORETEMP is not set 1458# CONFIG_SENSORS_CORETEMP is not set
@@ -1440,11 +1468,14 @@ CONFIG_HWMON=y
1440# CONFIG_SENSORS_LM90 is not set 1468# CONFIG_SENSORS_LM90 is not set
1441# CONFIG_SENSORS_LM92 is not set 1469# CONFIG_SENSORS_LM92 is not set
1442# CONFIG_SENSORS_LM93 is not set 1470# CONFIG_SENSORS_LM93 is not set
1471# CONFIG_SENSORS_LTC4215 is not set
1443# CONFIG_SENSORS_LTC4245 is not set 1472# CONFIG_SENSORS_LTC4245 is not set
1473# CONFIG_SENSORS_LM95241 is not set
1444# CONFIG_SENSORS_MAX1619 is not set 1474# CONFIG_SENSORS_MAX1619 is not set
1445# CONFIG_SENSORS_MAX6650 is not set 1475# CONFIG_SENSORS_MAX6650 is not set
1446# CONFIG_SENSORS_PC87360 is not set 1476# CONFIG_SENSORS_PC87360 is not set
1447# CONFIG_SENSORS_PC87427 is not set 1477# CONFIG_SENSORS_PC87427 is not set
1478# CONFIG_SENSORS_PCF8591 is not set
1448# CONFIG_SENSORS_SIS5595 is not set 1479# CONFIG_SENSORS_SIS5595 is not set
1449# CONFIG_SENSORS_DME1737 is not set 1480# CONFIG_SENSORS_DME1737 is not set
1450# CONFIG_SENSORS_SMSC47M1 is not set 1481# CONFIG_SENSORS_SMSC47M1 is not set
@@ -1635,6 +1666,7 @@ CONFIG_FB_EFI=y
1635# CONFIG_FB_VIRTUAL is not set 1666# CONFIG_FB_VIRTUAL is not set
1636# CONFIG_FB_METRONOME is not set 1667# CONFIG_FB_METRONOME is not set
1637# CONFIG_FB_MB862XX is not set 1668# CONFIG_FB_MB862XX is not set
1669# CONFIG_FB_BROADSHEET is not set
1638CONFIG_BACKLIGHT_LCD_SUPPORT=y 1670CONFIG_BACKLIGHT_LCD_SUPPORT=y
1639# CONFIG_LCD_CLASS_DEVICE is not set 1671# CONFIG_LCD_CLASS_DEVICE is not set
1640CONFIG_BACKLIGHT_CLASS_DEVICE=y 1672CONFIG_BACKLIGHT_CLASS_DEVICE=y
@@ -1720,6 +1752,8 @@ CONFIG_SND_PCI=y
1720# CONFIG_SND_INDIGO is not set 1752# CONFIG_SND_INDIGO is not set
1721# CONFIG_SND_INDIGOIO is not set 1753# CONFIG_SND_INDIGOIO is not set
1722# CONFIG_SND_INDIGODJ is not set 1754# CONFIG_SND_INDIGODJ is not set
1755# CONFIG_SND_INDIGOIOX is not set
1756# CONFIG_SND_INDIGODJX is not set
1723# CONFIG_SND_EMU10K1 is not set 1757# CONFIG_SND_EMU10K1 is not set
1724# CONFIG_SND_EMU10K1X is not set 1758# CONFIG_SND_EMU10K1X is not set
1725# CONFIG_SND_ENS1370 is not set 1759# CONFIG_SND_ENS1370 is not set
@@ -1792,15 +1826,17 @@ CONFIG_USB_HIDDEV=y
1792# 1826#
1793# Special HID drivers 1827# Special HID drivers
1794# 1828#
1795CONFIG_HID_COMPAT=y
1796CONFIG_HID_A4TECH=y 1829CONFIG_HID_A4TECH=y
1797CONFIG_HID_APPLE=y 1830CONFIG_HID_APPLE=y
1798CONFIG_HID_BELKIN=y 1831CONFIG_HID_BELKIN=y
1799CONFIG_HID_CHERRY=y 1832CONFIG_HID_CHERRY=y
1800CONFIG_HID_CHICONY=y 1833CONFIG_HID_CHICONY=y
1801CONFIG_HID_CYPRESS=y 1834CONFIG_HID_CYPRESS=y
1835# CONFIG_DRAGONRISE_FF is not set
1802CONFIG_HID_EZKEY=y 1836CONFIG_HID_EZKEY=y
1837CONFIG_HID_KYE=y
1803CONFIG_HID_GYRATION=y 1838CONFIG_HID_GYRATION=y
1839CONFIG_HID_KENSINGTON=y
1804CONFIG_HID_LOGITECH=y 1840CONFIG_HID_LOGITECH=y
1805CONFIG_LOGITECH_FF=y 1841CONFIG_LOGITECH_FF=y
1806# CONFIG_LOGIRUMBLEPAD2_FF is not set 1842# CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1866,11 +1902,11 @@ CONFIG_USB_PRINTER=y
1866# CONFIG_USB_TMC is not set 1902# CONFIG_USB_TMC is not set
1867 1903
1868# 1904#
1869# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; 1905# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
1870# 1906#
1871 1907
1872# 1908#
1873# see USB_STORAGE Help for more information 1909# also be needed; see USB_STORAGE Help for more info
1874# 1910#
1875CONFIG_USB_STORAGE=y 1911CONFIG_USB_STORAGE=y
1876# CONFIG_USB_STORAGE_DEBUG is not set 1912# CONFIG_USB_STORAGE_DEBUG is not set
@@ -1912,7 +1948,6 @@ CONFIG_USB_LIBUSUAL=y
1912# CONFIG_USB_LED is not set 1948# CONFIG_USB_LED is not set
1913# CONFIG_USB_CYPRESS_CY7C63 is not set 1949# CONFIG_USB_CYPRESS_CY7C63 is not set
1914# CONFIG_USB_CYTHERM is not set 1950# CONFIG_USB_CYTHERM is not set
1915# CONFIG_USB_PHIDGET is not set
1916# CONFIG_USB_IDMOUSE is not set 1951# CONFIG_USB_IDMOUSE is not set
1917# CONFIG_USB_FTDI_ELAN is not set 1952# CONFIG_USB_FTDI_ELAN is not set
1918# CONFIG_USB_APPLEDISPLAY is not set 1953# CONFIG_USB_APPLEDISPLAY is not set
@@ -1928,6 +1963,7 @@ CONFIG_USB_LIBUSUAL=y
1928# 1963#
1929# OTG and related infrastructure 1964# OTG and related infrastructure
1930# 1965#
1966# CONFIG_NOP_USB_XCEIV is not set
1931# CONFIG_UWB is not set 1967# CONFIG_UWB is not set
1932# CONFIG_MMC is not set 1968# CONFIG_MMC is not set
1933# CONFIG_MEMSTICK is not set 1969# CONFIG_MEMSTICK is not set
@@ -1939,8 +1975,10 @@ CONFIG_LEDS_CLASS=y
1939# 1975#
1940# CONFIG_LEDS_ALIX2 is not set 1976# CONFIG_LEDS_ALIX2 is not set
1941# CONFIG_LEDS_PCA9532 is not set 1977# CONFIG_LEDS_PCA9532 is not set
1978# CONFIG_LEDS_LP5521 is not set
1942# CONFIG_LEDS_CLEVO_MAIL is not set 1979# CONFIG_LEDS_CLEVO_MAIL is not set
1943# CONFIG_LEDS_PCA955X is not set 1980# CONFIG_LEDS_PCA955X is not set
1981# CONFIG_LEDS_BD2802 is not set
1944 1982
1945# 1983#
1946# LED Triggers 1984# LED Triggers
@@ -1950,6 +1988,10 @@ CONFIG_LEDS_TRIGGERS=y
1950# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set 1988# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
1951# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set 1989# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
1952# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set 1990# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
1991
1992#
1993# iptables trigger is under Netfilter config (LED target)
1994#
1953# CONFIG_ACCESSIBILITY is not set 1995# CONFIG_ACCESSIBILITY is not set
1954# CONFIG_INFINIBAND is not set 1996# CONFIG_INFINIBAND is not set
1955CONFIG_EDAC=y 1997CONFIG_EDAC=y
@@ -2018,6 +2060,7 @@ CONFIG_DMADEVICES=y
2018# DMA Devices 2060# DMA Devices
2019# 2061#
2020# CONFIG_INTEL_IOATDMA is not set 2062# CONFIG_INTEL_IOATDMA is not set
2063# CONFIG_AUXDISPLAY is not set
2021# CONFIG_UIO is not set 2064# CONFIG_UIO is not set
2022# CONFIG_STAGING is not set 2065# CONFIG_STAGING is not set
2023CONFIG_X86_PLATFORM_DEVICES=y 2066CONFIG_X86_PLATFORM_DEVICES=y
@@ -2051,6 +2094,7 @@ CONFIG_DMIID=y
2051# 2094#
2052# CONFIG_EXT2_FS is not set 2095# CONFIG_EXT2_FS is not set
2053CONFIG_EXT3_FS=y 2096CONFIG_EXT3_FS=y
2097# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
2054CONFIG_EXT3_FS_XATTR=y 2098CONFIG_EXT3_FS_XATTR=y
2055CONFIG_EXT3_FS_POSIX_ACL=y 2099CONFIG_EXT3_FS_POSIX_ACL=y
2056CONFIG_EXT3_FS_SECURITY=y 2100CONFIG_EXT3_FS_SECURITY=y
@@ -2082,6 +2126,11 @@ CONFIG_AUTOFS4_FS=y
2082CONFIG_GENERIC_ACL=y 2126CONFIG_GENERIC_ACL=y
2083 2127
2084# 2128#
2129# Caches
2130#
2131# CONFIG_FSCACHE is not set
2132
2133#
2085# CD-ROM/DVD Filesystems 2134# CD-ROM/DVD Filesystems
2086# 2135#
2087CONFIG_ISO9660_FS=y 2136CONFIG_ISO9660_FS=y
@@ -2132,6 +2181,7 @@ CONFIG_MISC_FILESYSTEMS=y
2132# CONFIG_ROMFS_FS is not set 2181# CONFIG_ROMFS_FS is not set
2133# CONFIG_SYSV_FS is not set 2182# CONFIG_SYSV_FS is not set
2134# CONFIG_UFS_FS is not set 2183# CONFIG_UFS_FS is not set
2184# CONFIG_NILFS2_FS is not set
2135CONFIG_NETWORK_FILESYSTEMS=y 2185CONFIG_NETWORK_FILESYSTEMS=y
2136CONFIG_NFS_FS=y 2186CONFIG_NFS_FS=y
2137CONFIG_NFS_V3=y 2187CONFIG_NFS_V3=y
@@ -2145,7 +2195,6 @@ CONFIG_NFS_ACL_SUPPORT=y
2145CONFIG_NFS_COMMON=y 2195CONFIG_NFS_COMMON=y
2146CONFIG_SUNRPC=y 2196CONFIG_SUNRPC=y
2147CONFIG_SUNRPC_GSS=y 2197CONFIG_SUNRPC_GSS=y
2148# CONFIG_SUNRPC_REGISTER_V4 is not set
2149CONFIG_RPCSEC_GSS_KRB5=y 2198CONFIG_RPCSEC_GSS_KRB5=y
2150# CONFIG_RPCSEC_GSS_SPKM3 is not set 2199# CONFIG_RPCSEC_GSS_SPKM3 is not set
2151# CONFIG_SMB_FS is not set 2200# CONFIG_SMB_FS is not set
@@ -2232,6 +2281,7 @@ CONFIG_DEBUG_FS=y
2232CONFIG_DEBUG_KERNEL=y 2281CONFIG_DEBUG_KERNEL=y
2233# CONFIG_DEBUG_SHIRQ is not set 2282# CONFIG_DEBUG_SHIRQ is not set
2234# CONFIG_DETECT_SOFTLOCKUP is not set 2283# CONFIG_DETECT_SOFTLOCKUP is not set
2284# CONFIG_DETECT_HUNG_TASK is not set
2235# CONFIG_SCHED_DEBUG is not set 2285# CONFIG_SCHED_DEBUG is not set
2236CONFIG_SCHEDSTATS=y 2286CONFIG_SCHEDSTATS=y
2237CONFIG_TIMER_STATS=y 2287CONFIG_TIMER_STATS=y
@@ -2247,6 +2297,7 @@ CONFIG_TIMER_STATS=y
2247# CONFIG_LOCK_STAT is not set 2297# CONFIG_LOCK_STAT is not set
2248# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 2298# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
2249# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set 2299# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
2300CONFIG_STACKTRACE=y
2250# CONFIG_DEBUG_KOBJECT is not set 2301# CONFIG_DEBUG_KOBJECT is not set
2251CONFIG_DEBUG_BUGVERBOSE=y 2302CONFIG_DEBUG_BUGVERBOSE=y
2252# CONFIG_DEBUG_INFO is not set 2303# CONFIG_DEBUG_INFO is not set
@@ -2269,13 +2320,19 @@ CONFIG_FRAME_POINTER=y
2269# CONFIG_FAULT_INJECTION is not set 2320# CONFIG_FAULT_INJECTION is not set
2270# CONFIG_LATENCYTOP is not set 2321# CONFIG_LATENCYTOP is not set
2271CONFIG_SYSCTL_SYSCALL_CHECK=y 2322CONFIG_SYSCTL_SYSCALL_CHECK=y
2323# CONFIG_DEBUG_PAGEALLOC is not set
2272CONFIG_USER_STACKTRACE_SUPPORT=y 2324CONFIG_USER_STACKTRACE_SUPPORT=y
2325CONFIG_NOP_TRACER=y
2273CONFIG_HAVE_FUNCTION_TRACER=y 2326CONFIG_HAVE_FUNCTION_TRACER=y
2274CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y 2327CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
2275CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y 2328CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
2276CONFIG_HAVE_DYNAMIC_FTRACE=y 2329CONFIG_HAVE_DYNAMIC_FTRACE=y
2277CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y 2330CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
2278CONFIG_HAVE_HW_BRANCH_TRACER=y 2331CONFIG_HAVE_HW_BRANCH_TRACER=y
2332CONFIG_HAVE_FTRACE_SYSCALLS=y
2333CONFIG_RING_BUFFER=y
2334CONFIG_TRACING=y
2335CONFIG_TRACING_SUPPORT=y
2279 2336
2280# 2337#
2281# Tracers 2338# Tracers
@@ -2285,13 +2342,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y
2285# CONFIG_SYSPROF_TRACER is not set 2342# CONFIG_SYSPROF_TRACER is not set
2286# CONFIG_SCHED_TRACER is not set 2343# CONFIG_SCHED_TRACER is not set
2287# CONFIG_CONTEXT_SWITCH_TRACER is not set 2344# CONFIG_CONTEXT_SWITCH_TRACER is not set
2345# CONFIG_EVENT_TRACER is not set
2346# CONFIG_FTRACE_SYSCALLS is not set
2288# CONFIG_BOOT_TRACER is not set 2347# CONFIG_BOOT_TRACER is not set
2289# CONFIG_TRACE_BRANCH_PROFILING is not set 2348# CONFIG_TRACE_BRANCH_PROFILING is not set
2290# CONFIG_POWER_TRACER is not set 2349# CONFIG_POWER_TRACER is not set
2291# CONFIG_STACK_TRACER is not set 2350# CONFIG_STACK_TRACER is not set
2292# CONFIG_HW_BRANCH_TRACER is not set 2351# CONFIG_HW_BRANCH_TRACER is not set
2352# CONFIG_KMEMTRACE is not set
2353# CONFIG_WORKQUEUE_TRACER is not set
2354CONFIG_BLK_DEV_IO_TRACE=y
2355# CONFIG_FTRACE_STARTUP_TEST is not set
2356# CONFIG_MMIOTRACE is not set
2293CONFIG_PROVIDE_OHCI1394_DMA_INIT=y 2357CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
2294# CONFIG_DYNAMIC_PRINTK_DEBUG is not set 2358# CONFIG_DYNAMIC_DEBUG is not set
2359# CONFIG_DMA_API_DEBUG is not set
2295# CONFIG_SAMPLES is not set 2360# CONFIG_SAMPLES is not set
2296CONFIG_HAVE_ARCH_KGDB=y 2361CONFIG_HAVE_ARCH_KGDB=y
2297# CONFIG_KGDB is not set 2362# CONFIG_KGDB is not set
@@ -2301,14 +2366,13 @@ CONFIG_EARLY_PRINTK=y
2301CONFIG_EARLY_PRINTK_DBGP=y 2366CONFIG_EARLY_PRINTK_DBGP=y
2302CONFIG_DEBUG_STACKOVERFLOW=y 2367CONFIG_DEBUG_STACKOVERFLOW=y
2303CONFIG_DEBUG_STACK_USAGE=y 2368CONFIG_DEBUG_STACK_USAGE=y
2304# CONFIG_DEBUG_PAGEALLOC is not set
2305# CONFIG_DEBUG_PER_CPU_MAPS is not set 2369# CONFIG_DEBUG_PER_CPU_MAPS is not set
2306# CONFIG_X86_PTDUMP is not set 2370# CONFIG_X86_PTDUMP is not set
2307CONFIG_DEBUG_RODATA=y 2371CONFIG_DEBUG_RODATA=y
2308# CONFIG_DEBUG_RODATA_TEST is not set 2372# CONFIG_DEBUG_RODATA_TEST is not set
2309CONFIG_DEBUG_NX_TEST=m 2373CONFIG_DEBUG_NX_TEST=m
2310# CONFIG_IOMMU_DEBUG is not set 2374# CONFIG_IOMMU_DEBUG is not set
2311# CONFIG_MMIOTRACE is not set 2375CONFIG_HAVE_MMIOTRACE_SUPPORT=y
2312CONFIG_IO_DELAY_TYPE_0X80=0 2376CONFIG_IO_DELAY_TYPE_0X80=0
2313CONFIG_IO_DELAY_TYPE_0XED=1 2377CONFIG_IO_DELAY_TYPE_0XED=1
2314CONFIG_IO_DELAY_TYPE_UDELAY=2 2378CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2344,6 +2408,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y
2344CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 2408CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
2345# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set 2409# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
2346# CONFIG_SECURITY_SMACK is not set 2410# CONFIG_SECURITY_SMACK is not set
2411# CONFIG_SECURITY_TOMOYO is not set
2412# CONFIG_IMA is not set
2347CONFIG_CRYPTO=y 2413CONFIG_CRYPTO=y
2348 2414
2349# 2415#
@@ -2359,10 +2425,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y
2359CONFIG_CRYPTO_HASH=y 2425CONFIG_CRYPTO_HASH=y
2360CONFIG_CRYPTO_HASH2=y 2426CONFIG_CRYPTO_HASH2=y
2361CONFIG_CRYPTO_RNG2=y 2427CONFIG_CRYPTO_RNG2=y
2428CONFIG_CRYPTO_PCOMP=y
2362CONFIG_CRYPTO_MANAGER=y 2429CONFIG_CRYPTO_MANAGER=y
2363CONFIG_CRYPTO_MANAGER2=y 2430CONFIG_CRYPTO_MANAGER2=y
2364# CONFIG_CRYPTO_GF128MUL is not set 2431# CONFIG_CRYPTO_GF128MUL is not set
2365# CONFIG_CRYPTO_NULL is not set 2432# CONFIG_CRYPTO_NULL is not set
2433CONFIG_CRYPTO_WORKQUEUE=y
2366# CONFIG_CRYPTO_CRYPTD is not set 2434# CONFIG_CRYPTO_CRYPTD is not set
2367CONFIG_CRYPTO_AUTHENC=y 2435CONFIG_CRYPTO_AUTHENC=y
2368# CONFIG_CRYPTO_TEST is not set 2436# CONFIG_CRYPTO_TEST is not set
@@ -2414,6 +2482,7 @@ CONFIG_CRYPTO_SHA1=y
2414# 2482#
2415CONFIG_CRYPTO_AES=y 2483CONFIG_CRYPTO_AES=y
2416# CONFIG_CRYPTO_AES_X86_64 is not set 2484# CONFIG_CRYPTO_AES_X86_64 is not set
2485# CONFIG_CRYPTO_AES_NI_INTEL is not set
2417# CONFIG_CRYPTO_ANUBIS is not set 2486# CONFIG_CRYPTO_ANUBIS is not set
2418CONFIG_CRYPTO_ARC4=y 2487CONFIG_CRYPTO_ARC4=y
2419# CONFIG_CRYPTO_BLOWFISH is not set 2488# CONFIG_CRYPTO_BLOWFISH is not set
@@ -2435,6 +2504,7 @@ CONFIG_CRYPTO_DES=y
2435# Compression 2504# Compression
2436# 2505#
2437# CONFIG_CRYPTO_DEFLATE is not set 2506# CONFIG_CRYPTO_DEFLATE is not set
2507# CONFIG_CRYPTO_ZLIB is not set
2438# CONFIG_CRYPTO_LZO is not set 2508# CONFIG_CRYPTO_LZO is not set
2439 2509
2440# 2510#
@@ -2444,10 +2514,12 @@ CONFIG_CRYPTO_DES=y
2444CONFIG_CRYPTO_HW=y 2514CONFIG_CRYPTO_HW=y
2445# CONFIG_CRYPTO_DEV_HIFN_795X is not set 2515# CONFIG_CRYPTO_DEV_HIFN_795X is not set
2446CONFIG_HAVE_KVM=y 2516CONFIG_HAVE_KVM=y
2517CONFIG_HAVE_KVM_IRQCHIP=y
2447CONFIG_VIRTUALIZATION=y 2518CONFIG_VIRTUALIZATION=y
2448# CONFIG_KVM is not set 2519# CONFIG_KVM is not set
2449# CONFIG_VIRTIO_PCI is not set 2520# CONFIG_VIRTIO_PCI is not set
2450# CONFIG_VIRTIO_BALLOON is not set 2521# CONFIG_VIRTIO_BALLOON is not set
2522CONFIG_BINARY_PRINTF=y
2451 2523
2452# 2524#
2453# Library routines 2525# Library routines
@@ -2464,7 +2536,10 @@ CONFIG_CRC32=y
2464# CONFIG_CRC7 is not set 2536# CONFIG_CRC7 is not set
2465# CONFIG_LIBCRC32C is not set 2537# CONFIG_LIBCRC32C is not set
2466CONFIG_ZLIB_INFLATE=y 2538CONFIG_ZLIB_INFLATE=y
2467CONFIG_PLIST=y 2539CONFIG_DECOMPRESS_GZIP=y
2540CONFIG_DECOMPRESS_BZIP2=y
2541CONFIG_DECOMPRESS_LZMA=y
2468CONFIG_HAS_IOMEM=y 2542CONFIG_HAS_IOMEM=y
2469CONFIG_HAS_IOPORT=y 2543CONFIG_HAS_IOPORT=y
2470CONFIG_HAS_DMA=y 2544CONFIG_HAS_DMA=y
2545CONFIG_NLATTR=y
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a505202086e8..dcef387ddc36 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -830,4 +830,5 @@ ia32_sys_call_table:
830 .quad sys_inotify_init1 830 .quad sys_inotify_init1
831 .quad compat_sys_preadv 831 .quad compat_sys_preadv
832 .quad compat_sys_pwritev 832 .quad compat_sys_pwritev
833 .quad compat_sys_rt_tgsigqueueinfo /* 335 */
833ia32_syscall_end: 834ia32_syscall_end:
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index f6aa18eadf71..1a37bcdc8606 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/stringify.h>
6#include <asm/asm.h> 7#include <asm/asm.h>
7 8
8/* 9/*
@@ -74,6 +75,22 @@ static inline void alternatives_smp_switch(int smp) {}
74 75
75const unsigned char *const *find_nop_table(void); 76const unsigned char *const *find_nop_table(void);
76 77
78/* alternative assembly primitive: */
79#define ALTERNATIVE(oldinstr, newinstr, feature) \
80 \
81 "661:\n\t" oldinstr "\n662:\n" \
82 ".section .altinstructions,\"a\"\n" \
83 _ASM_ALIGN "\n" \
84 _ASM_PTR "661b\n" /* label */ \
85 _ASM_PTR "663f\n" /* new instruction */ \
86 " .byte " __stringify(feature) "\n" /* feature bit */ \
87 " .byte 662b-661b\n" /* sourcelen */ \
88 " .byte 664f-663f\n" /* replacementlen */ \
89 ".previous\n" \
90 ".section .altinstr_replacement, \"ax\"\n" \
91 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
92 ".previous"
93
77/* 94/*
78 * Alternative instructions for different CPU types or capabilities. 95 * Alternative instructions for different CPU types or capabilities.
79 * 96 *
@@ -87,18 +104,7 @@ const unsigned char *const *find_nop_table(void);
87 * without volatile and memory clobber. 104 * without volatile and memory clobber.
88 */ 105 */
89#define alternative(oldinstr, newinstr, feature) \ 106#define alternative(oldinstr, newinstr, feature) \
90 asm volatile ("661:\n\t" oldinstr "\n662:\n" \ 107 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
91 ".section .altinstructions,\"a\"\n" \
92 _ASM_ALIGN "\n" \
93 _ASM_PTR "661b\n" /* label */ \
94 _ASM_PTR "663f\n" /* new instruction */ \
95 " .byte %c0\n" /* feature bit */ \
96 " .byte 662b-661b\n" /* sourcelen */ \
97 " .byte 664f-663f\n" /* replacementlen */ \
98 ".previous\n" \
99 ".section .altinstr_replacement,\"ax\"\n" \
100 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
101 ".previous" :: "i" (feature) : "memory")
102 108
103/* 109/*
104 * Alternative inline assembly with input. 110 * Alternative inline assembly with input.
@@ -109,35 +115,16 @@ const unsigned char *const *find_nop_table(void);
109 * Best is to use constraints that are fixed size (like (%1) ... "r") 115 * Best is to use constraints that are fixed size (like (%1) ... "r")
110 * If you use variable sized constraints like "m" or "g" in the 116 * If you use variable sized constraints like "m" or "g" in the
111 * replacement make sure to pad to the worst case length. 117 * replacement make sure to pad to the worst case length.
118 * Leaving an unused argument 0 to keep API compatibility.
112 */ 119 */
113#define alternative_input(oldinstr, newinstr, feature, input...) \ 120#define alternative_input(oldinstr, newinstr, feature, input...) \
114 asm volatile ("661:\n\t" oldinstr "\n662:\n" \ 121 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
115 ".section .altinstructions,\"a\"\n" \ 122 : : "i" (0), ## input)
116 _ASM_ALIGN "\n" \
117 _ASM_PTR "661b\n" /* label */ \
118 _ASM_PTR "663f\n" /* new instruction */ \
119 " .byte %c0\n" /* feature bit */ \
120 " .byte 662b-661b\n" /* sourcelen */ \
121 " .byte 664f-663f\n" /* replacementlen */ \
122 ".previous\n" \
123 ".section .altinstr_replacement,\"ax\"\n" \
124 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
125 ".previous" :: "i" (feature), ##input)
126 123
127/* Like alternative_input, but with a single output argument */ 124/* Like alternative_input, but with a single output argument */
128#define alternative_io(oldinstr, newinstr, feature, output, input...) \ 125#define alternative_io(oldinstr, newinstr, feature, output, input...) \
129 asm volatile ("661:\n\t" oldinstr "\n662:\n" \ 126 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
130 ".section .altinstructions,\"a\"\n" \ 127 : output : "i" (0), ## input)
131 _ASM_ALIGN "\n" \
132 _ASM_PTR "661b\n" /* label */ \
133 _ASM_PTR "663f\n" /* new instruction */ \
134 " .byte %c[feat]\n" /* feature bit */ \
135 " .byte 662b-661b\n" /* sourcelen */ \
136 " .byte 664f-663f\n" /* replacementlen */ \
137 ".previous\n" \
138 ".section .altinstr_replacement,\"ax\"\n" \
139 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
140 ".previous" : output : [feat] "i" (feature), ##input)
141 128
142/* 129/*
143 * use this macro(s) if you need more than one output parameter 130 * use this macro(s) if you need more than one output parameter
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index f712344329bc..262e02820049 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -27,6 +27,8 @@ extern int amd_iommu_init(void);
27extern int amd_iommu_init_dma_ops(void); 27extern int amd_iommu_init_dma_ops(void);
28extern void amd_iommu_detect(void); 28extern void amd_iommu_detect(void);
29extern irqreturn_t amd_iommu_int_handler(int irq, void *data); 29extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
30extern void amd_iommu_flush_all_domains(void);
31extern void amd_iommu_flush_all_devices(void);
30#else 32#else
31static inline int amd_iommu_init(void) { return -ENODEV; } 33static inline int amd_iommu_init(void) { return -ENODEV; }
32static inline void amd_iommu_detect(void) { } 34static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 95c8cd9d22b5..0c878caaa0a2 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -194,6 +194,27 @@
194#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ 194#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
195#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops 195#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
196 domain for an IOMMU */ 196 domain for an IOMMU */
197extern bool amd_iommu_dump;
198#define DUMP_printk(format, arg...) \
199 do { \
200 if (amd_iommu_dump) \
201 printk(KERN_INFO "AMD IOMMU: " format, ## arg); \
202 } while(0);
203
204/*
205 * Make iterating over all IOMMUs easier
206 */
207#define for_each_iommu(iommu) \
208 list_for_each_entry((iommu), &amd_iommu_list, list)
209#define for_each_iommu_safe(iommu, next) \
210 list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
211
212#define APERTURE_RANGE_SHIFT 27 /* 128 MB */
213#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT)
214#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
215#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */
216#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
217#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
197 218
198/* 219/*
199 * This structure contains generic data for IOMMU protection domains 220 * This structure contains generic data for IOMMU protection domains
@@ -210,6 +231,26 @@ struct protection_domain {
210}; 231};
211 232
212/* 233/*
234 * For dynamic growth the aperture size is split into ranges of 128MB of
235 * DMA address space each. This struct represents one such range.
236 */
237struct aperture_range {
238
239 /* address allocation bitmap */
240 unsigned long *bitmap;
241
242 /*
243 * Array of PTE pages for the aperture. In this array we save all the
244 * leaf pages of the domain page table used for the aperture. This way
245 * we don't need to walk the page table to find a specific PTE. We can
246 * just calculate its address in constant time.
247 */
248 u64 *pte_pages[64];
249
250 unsigned long offset;
251};
252
253/*
213 * Data container for a dma_ops specific protection domain 254 * Data container for a dma_ops specific protection domain
214 */ 255 */
215struct dma_ops_domain { 256struct dma_ops_domain {
@@ -222,18 +263,10 @@ struct dma_ops_domain {
222 unsigned long aperture_size; 263 unsigned long aperture_size;
223 264
224 /* address we start to search for free addresses */ 265 /* address we start to search for free addresses */
225 unsigned long next_bit; 266 unsigned long next_address;
226
227 /* address allocation bitmap */
228 unsigned long *bitmap;
229 267
230 /* 268 /* address space relevant data */
231 * Array of PTE pages for the aperture. In this array we save all the 269 struct aperture_range *aperture[APERTURE_MAX_RANGES];
232 * leaf pages of the domain page table used for the aperture. This way
233 * we don't need to walk the page table to find a specific PTE. We can
234 * just calculate its address in constant time.
235 */
236 u64 **pte_pages;
237 270
238 /* This will be set to true when TLB needs to be flushed */ 271 /* This will be set to true when TLB needs to be flushed */
239 bool need_flush; 272 bool need_flush;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 42f2f8377422..bb7d47925847 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -107,8 +107,7 @@ extern u32 native_safe_apic_wait_icr_idle(void);
107extern void native_apic_icr_write(u32 low, u32 id); 107extern void native_apic_icr_write(u32 low, u32 id);
108extern u64 native_apic_icr_read(void); 108extern u64 native_apic_icr_read(void);
109 109
110#define EIM_8BIT_APIC_ID 0 110extern int x2apic_mode;
111#define EIM_32BIT_APIC_ID 1
112 111
113#ifdef CONFIG_X86_X2APIC 112#ifdef CONFIG_X86_X2APIC
114/* 113/*
@@ -166,10 +165,9 @@ static inline u64 native_x2apic_icr_read(void)
166 return val; 165 return val;
167} 166}
168 167
169extern int x2apic, x2apic_phys; 168extern int x2apic_phys;
170extern void check_x2apic(void); 169extern void check_x2apic(void);
171extern void enable_x2apic(void); 170extern void enable_x2apic(void);
172extern void enable_IR_x2apic(void);
173extern void x2apic_icr_write(u32 low, u32 id); 171extern void x2apic_icr_write(u32 low, u32 id);
174static inline int x2apic_enabled(void) 172static inline int x2apic_enabled(void)
175{ 173{
@@ -183,6 +181,8 @@ static inline int x2apic_enabled(void)
183 return 1; 181 return 1;
184 return 0; 182 return 0;
185} 183}
184
185#define x2apic_supported() (cpu_has_x2apic)
186#else 186#else
187static inline void check_x2apic(void) 187static inline void check_x2apic(void)
188{ 188{
@@ -190,28 +190,20 @@ static inline void check_x2apic(void)
190static inline void enable_x2apic(void) 190static inline void enable_x2apic(void)
191{ 191{
192} 192}
193static inline void enable_IR_x2apic(void)
194{
195}
196static inline int x2apic_enabled(void) 193static inline int x2apic_enabled(void)
197{ 194{
198 return 0; 195 return 0;
199} 196}
200 197
201#define x2apic 0 198#define x2apic_preenabled 0
202 199#define x2apic_supported() 0
203#endif 200#endif
204 201
205extern int get_physical_broadcast(void); 202extern void enable_IR_x2apic(void);
206 203
207#ifdef CONFIG_X86_X2APIC 204extern int get_physical_broadcast(void);
208static inline void ack_x2APIC_irq(void)
209{
210 /* Docs say use 0 for future compatibility */
211 native_apic_msr_write(APIC_EOI, 0);
212}
213#endif
214 205
206extern void apic_disable(void);
215extern int lapic_get_maxlvt(void); 207extern int lapic_get_maxlvt(void);
216extern void clear_local_APIC(void); 208extern void clear_local_APIC(void);
217extern void connect_bsp_APIC(void); 209extern void connect_bsp_APIC(void);
@@ -252,7 +244,7 @@ static inline void lapic_shutdown(void) { }
252#define local_apic_timer_c2_ok 1 244#define local_apic_timer_c2_ok 1
253static inline void init_apic_mappings(void) { } 245static inline void init_apic_mappings(void) { }
254static inline void disable_local_APIC(void) { } 246static inline void disable_local_APIC(void) { }
255 247static inline void apic_disable(void) { }
256#endif /* !CONFIG_X86_LOCAL_APIC */ 248#endif /* !CONFIG_X86_LOCAL_APIC */
257 249
258#ifdef CONFIG_X86_64 250#ifdef CONFIG_X86_64
@@ -410,7 +402,7 @@ static inline unsigned default_get_apic_id(unsigned long x)
410{ 402{
411 unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); 403 unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
412 404
413 if (APIC_XAPIC(ver)) 405 if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID))
414 return (x >> 24) & 0xFF; 406 return (x >> 24) & 0xFF;
415 else 407 else
416 return (x >> 24) & 0x0F; 408 return (x >> 24) & 0x0F;
@@ -478,6 +470,9 @@ static inline unsigned int read_apic_id(void)
478extern void default_setup_apic_routing(void); 470extern void default_setup_apic_routing(void);
479 471
480#ifdef CONFIG_X86_32 472#ifdef CONFIG_X86_32
473
474extern struct apic apic_default;
475
481/* 476/*
482 * Set up the logical destination ID. 477 * Set up the logical destination ID.
483 * 478 *
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index bc9514fb3b13..7ddb36ab933b 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -22,6 +22,7 @@
22# define APIC_INTEGRATED(x) (1) 22# define APIC_INTEGRATED(x) (1)
23#endif 23#endif
24#define APIC_XAPIC(x) ((x) >= 0x14) 24#define APIC_XAPIC(x) ((x) >= 0x14)
25#define APIC_EXT_SPACE(x) ((x) & 0x80000000)
25#define APIC_TASKPRI 0x80 26#define APIC_TASKPRI 0x80
26#define APIC_TPRI_MASK 0xFFu 27#define APIC_TPRI_MASK 0xFFu
27#define APIC_ARBPRI 0x90 28#define APIC_ARBPRI 0x90
@@ -116,7 +117,9 @@
116#define APIC_TDR_DIV_32 0x8 117#define APIC_TDR_DIV_32 0x8
117#define APIC_TDR_DIV_64 0x9 118#define APIC_TDR_DIV_64 0x9
118#define APIC_TDR_DIV_128 0xA 119#define APIC_TDR_DIV_128 0xA
119#define APIC_EILVT0 0x500 120#define APIC_EFEAT 0x400
121#define APIC_ECTRL 0x410
122#define APIC_EILVTn(n) (0x500 + 0x10 * n)
120#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ 123#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
121#define APIC_EILVT_NR_AMD_10H 4 124#define APIC_EILVT_NR_AMD_10H 4
122#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) 125#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF)
@@ -125,9 +128,6 @@
125#define APIC_EILVT_MSG_NMI 0x4 128#define APIC_EILVT_MSG_NMI 0x4
126#define APIC_EILVT_MSG_EXT 0x7 129#define APIC_EILVT_MSG_EXT 0x7
127#define APIC_EILVT_MASKED (1 << 16) 130#define APIC_EILVT_MASKED (1 << 16)
128#define APIC_EILVT1 0x510
129#define APIC_EILVT2 0x520
130#define APIC_EILVT3 0x530
131 131
132#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) 132#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
133#define APIC_BASE_MSR 0x800 133#define APIC_BASE_MSR 0x800
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 6ba23dd9fc92..418e632d4a80 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -8,11 +8,26 @@
8 8
9#ifdef __KERNEL__ 9#ifdef __KERNEL__
10 10
11#include <asm/page_types.h>
12
11/* Physical address where kernel should be loaded. */ 13/* Physical address where kernel should be loaded. */
12#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ 14#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
13 + (CONFIG_PHYSICAL_ALIGN - 1)) \ 15 + (CONFIG_PHYSICAL_ALIGN - 1)) \
14 & ~(CONFIG_PHYSICAL_ALIGN - 1)) 16 & ~(CONFIG_PHYSICAL_ALIGN - 1))
15 17
18/* Minimum kernel alignment, as a power of two */
19#ifdef CONFIG_x86_64
20#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
21#else
22#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1)
23#endif
24#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
25
26#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \
27 (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2))
28#error "Invalid value for CONFIG_PHYSICAL_ALIGN"
29#endif
30
16#ifdef CONFIG_KERNEL_BZIP2 31#ifdef CONFIG_KERNEL_BZIP2
17#define BOOT_HEAP_SIZE 0x400000 32#define BOOT_HEAP_SIZE 0x400000
18#else /* !CONFIG_KERNEL_BZIP2 */ 33#else /* !CONFIG_KERNEL_BZIP2 */
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 433adaebf9b6..1724e8de317c 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -50,7 +50,8 @@ struct setup_header {
50 __u32 ramdisk_size; 50 __u32 ramdisk_size;
51 __u32 bootsect_kludge; 51 __u32 bootsect_kludge;
52 __u16 heap_end_ptr; 52 __u16 heap_end_ptr;
53 __u16 _pad1; 53 __u8 ext_loader_ver;
54 __u8 ext_loader_type;
54 __u32 cmd_line_ptr; 55 __u32 cmd_line_ptr;
55 __u32 initrd_addr_max; 56 __u32 initrd_addr_max;
56 __u32 kernel_alignment; 57 __u32 kernel_alignment;
diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h
index 222802029fa6..d96c1ee3a95c 100644
--- a/arch/x86/include/asm/cpu_debug.h
+++ b/arch/x86/include/asm/cpu_debug.h
@@ -86,105 +86,7 @@ enum cpu_file_bit {
86 CPU_VALUE_BIT, /* value */ 86 CPU_VALUE_BIT, /* value */
87}; 87};
88 88
89#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) 89#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT)
90
91/*
92 * DisplayFamily_DisplayModel Processor Families/Processor Number Series
93 * -------------------------- ------------------------------------------
94 * 05_01, 05_02, 05_04 Pentium, Pentium with MMX
95 *
96 * 06_01 Pentium Pro
97 * 06_03, 06_05 Pentium II Xeon, Pentium II
98 * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III
99 *
100 * 06_09, 060D Pentium M
101 *
102 * 06_0E Core Duo, Core Solo
103 *
104 * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series,
105 * Core 2 Quad, Core 2 Extreme, Core 2 Duo,
106 * Pentium dual-core
107 * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650
108 *
109 * 06_1C Atom
110 *
111 * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4
112 * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D
113 *
114 * 0F_06 Xeon 7100, 5000 Series, Xeon MP,
115 * Pentium 4, Pentium D
116 */
117
118/* Register processors bits */
119enum cpu_processor_bit {
120 CPU_NONE,
121/* Intel */
122 CPU_INTEL_PENTIUM_BIT,
123 CPU_INTEL_P6_BIT,
124 CPU_INTEL_PENTIUM_M_BIT,
125 CPU_INTEL_CORE_BIT,
126 CPU_INTEL_CORE2_BIT,
127 CPU_INTEL_ATOM_BIT,
128 CPU_INTEL_XEON_P4_BIT,
129 CPU_INTEL_XEON_MP_BIT,
130/* AMD */
131 CPU_AMD_K6_BIT,
132 CPU_AMD_K7_BIT,
133 CPU_AMD_K8_BIT,
134 CPU_AMD_0F_BIT,
135 CPU_AMD_10_BIT,
136 CPU_AMD_11_BIT,
137};
138
139#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT)
140#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT)
141#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT)
142#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT)
143#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT)
144#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT)
145#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT)
146#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT)
147
148#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
149#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2)
150#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
151#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM)
152#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
153#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM)
154#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON)
155#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON)
156#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT)
157#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON)
158#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON)
159#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT)
160#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX)
161#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE)
162#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
163#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT)
164#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE)
165#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT)
166#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE)
167
168/* Select all supported Intel CPUs */
169#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
170
171#define CPU_AMD_K6 (1 << CPU_AMD_K6_BIT)
172#define CPU_AMD_K7 (1 << CPU_AMD_K7_BIT)
173#define CPU_AMD_K8 (1 << CPU_AMD_K8_BIT)
174#define CPU_AMD_0F (1 << CPU_AMD_0F_BIT)
175#define CPU_AMD_10 (1 << CPU_AMD_10_BIT)
176#define CPU_AMD_11 (1 << CPU_AMD_11_BIT)
177
178#define CPU_K10_PLUS (CPU_AMD_10 | CPU_AMD_11)
179#define CPU_K0F_PLUS (CPU_AMD_0F | CPU_K10_PLUS)
180#define CPU_K8_PLUS (CPU_AMD_K8 | CPU_K0F_PLUS)
181#define CPU_K7_PLUS (CPU_AMD_K7 | CPU_K8_PLUS)
182
183/* Select all supported AMD CPUs */
184#define CPU_AMD_ALL (CPU_AMD_K6 | CPU_K7_PLUS)
185
186/* Select all supported CPUs */
187#define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL)
188 90
189#define MAX_CPU_FILES 512 91#define MAX_CPU_FILES 512
190 92
@@ -220,7 +122,6 @@ struct cpu_debug_range {
220 unsigned min; /* Register range min */ 122 unsigned min; /* Register range min */
221 unsigned max; /* Register range max */ 123 unsigned max; /* Register range max */
222 unsigned flag; /* Supported flags */ 124 unsigned flag; /* Supported flags */
223 unsigned model; /* Supported models */
224}; 125};
225 126
226#endif /* _ASM_X86_CPU_DEBUG_H */ 127#endif /* _ASM_X86_CPU_DEBUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb83b1c397aa..19af42138f78 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -22,7 +22,7 @@
22#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ 22#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
23#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */ 23#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */
24#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ 24#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
25#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ 25#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */
26#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ 26#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
27#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ 27#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
28#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ 28#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
@@ -94,6 +94,7 @@
94#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ 94#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
95#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ 95#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
96#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ 96#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
97#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
97 98
98/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 99/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
99#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ 100#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
@@ -192,11 +193,11 @@ extern const char * const x86_power_flags[32];
192#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) 193#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
193#define setup_clear_cpu_cap(bit) do { \ 194#define setup_clear_cpu_cap(bit) do { \
194 clear_cpu_cap(&boot_cpu_data, bit); \ 195 clear_cpu_cap(&boot_cpu_data, bit); \
195 set_bit(bit, (unsigned long *)cleared_cpu_caps); \ 196 set_bit(bit, (unsigned long *)cpu_caps_cleared); \
196} while (0) 197} while (0)
197#define setup_force_cpu_cap(bit) do { \ 198#define setup_force_cpu_cap(bit) do { \
198 set_cpu_cap(&boot_cpu_data, bit); \ 199 set_cpu_cap(&boot_cpu_data, bit); \
199 clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ 200 set_bit(bit, (unsigned long *)cpu_caps_set); \
200} while (0) 201} while (0)
201 202
202#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) 203#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index a8f672ba100c..70dac199b093 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -15,8 +15,8 @@
15 * - buffer allocation (memory accounting) 15 * - buffer allocation (memory accounting)
16 * 16 *
17 * 17 *
18 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2009 Intel Corporation.
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
20 */ 20 */
21 21
22#ifndef _ASM_X86_DS_H 22#ifndef _ASM_X86_DS_H
@@ -83,8 +83,10 @@ enum ds_feature {
83 * The interrupt threshold is independent from the overflow callback 83 * The interrupt threshold is independent from the overflow callback
84 * to allow users to use their own overflow interrupt handling mechanism. 84 * to allow users to use their own overflow interrupt handling mechanism.
85 * 85 *
86 * task: the task to request recording for; 86 * The function might sleep.
87 * NULL for per-cpu recording on the current cpu 87 *
88 * task: the task to request recording for
89 * cpu: the cpu to request recording for
88 * base: the base pointer for the (non-pageable) buffer; 90 * base: the base pointer for the (non-pageable) buffer;
89 * size: the size of the provided buffer in bytes 91 * size: the size of the provided buffer in bytes
90 * ovfl: pointer to a function to be called on buffer overflow; 92 * ovfl: pointer to a function to be called on buffer overflow;
@@ -93,19 +95,28 @@ enum ds_feature {
93 * -1 if no interrupt threshold is requested. 95 * -1 if no interrupt threshold is requested.
94 * flags: a bit-mask of the above flags 96 * flags: a bit-mask of the above flags
95 */ 97 */
96extern struct bts_tracer *ds_request_bts(struct task_struct *task, 98extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
97 void *base, size_t size, 99 void *base, size_t size,
98 bts_ovfl_callback_t ovfl, 100 bts_ovfl_callback_t ovfl,
99 size_t th, unsigned int flags); 101 size_t th, unsigned int flags);
100extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, 102extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
101 void *base, size_t size, 103 bts_ovfl_callback_t ovfl,
102 pebs_ovfl_callback_t ovfl, 104 size_t th, unsigned int flags);
103 size_t th, unsigned int flags); 105extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
106 void *base, size_t size,
107 pebs_ovfl_callback_t ovfl,
108 size_t th, unsigned int flags);
109extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
110 void *base, size_t size,
111 pebs_ovfl_callback_t ovfl,
112 size_t th, unsigned int flags);
104 113
105/* 114/*
106 * Release BTS or PEBS resources 115 * Release BTS or PEBS resources
107 * Suspend and resume BTS or PEBS tracing 116 * Suspend and resume BTS or PEBS tracing
108 * 117 *
118 * Must be called with irq's enabled.
119 *
109 * tracer: the tracer handle returned from ds_request_~() 120 * tracer: the tracer handle returned from ds_request_~()
110 */ 121 */
111extern void ds_release_bts(struct bts_tracer *tracer); 122extern void ds_release_bts(struct bts_tracer *tracer);
@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer);
115extern void ds_suspend_pebs(struct pebs_tracer *tracer); 126extern void ds_suspend_pebs(struct pebs_tracer *tracer);
116extern void ds_resume_pebs(struct pebs_tracer *tracer); 127extern void ds_resume_pebs(struct pebs_tracer *tracer);
117 128
129/*
130 * Release BTS or PEBS resources
131 * Suspend and resume BTS or PEBS tracing
132 *
133 * Cpu tracers must call this on the traced cpu.
134 * Task tracers must call ds_release_~_noirq() for themselves.
135 *
136 * May be called with irq's disabled.
137 *
138 * Returns 0 if successful;
139 * -EPERM if the cpu tracer does not trace the current cpu.
140 * -EPERM if the task tracer does not trace itself.
141 *
142 * tracer: the tracer handle returned from ds_request_~()
143 */
144extern int ds_release_bts_noirq(struct bts_tracer *tracer);
145extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
146extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
147extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
148extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
149extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
150
118 151
119/* 152/*
120 * The raw DS buffer state as it is used for BTS and PEBS recording. 153 * The raw DS buffer state as it is used for BTS and PEBS recording.
@@ -170,9 +203,9 @@ struct bts_struct {
170 } lbr; 203 } lbr;
171 /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ 204 /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
172 struct { 205 struct {
173 __u64 jiffies; 206 __u64 clock;
174 pid_t pid; 207 pid_t pid;
175 } timestamp; 208 } event;
176 } variant; 209 } variant;
177}; 210};
178 211
@@ -201,8 +234,12 @@ struct bts_trace {
201struct pebs_trace { 234struct pebs_trace {
202 struct ds_trace ds; 235 struct ds_trace ds;
203 236
204 /* the PEBS reset value */ 237 /* the number of valid counters in the below array */
205 unsigned long long reset_value; 238 unsigned int counters;
239
240#define MAX_PEBS_COUNTERS 4
241 /* the counter reset value */
242 unsigned long long counter_reset[MAX_PEBS_COUNTERS];
206}; 243};
207 244
208 245
@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer);
237 * Returns 0 on success; -Eerrno on error 274 * Returns 0 on success; -Eerrno on error
238 * 275 *
239 * tracer: the tracer handle returned from ds_request_pebs() 276 * tracer: the tracer handle returned from ds_request_pebs()
277 * counter: the index of the counter
240 * value: the new counter reset value 278 * value: the new counter reset value
241 */ 279 */
242extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); 280extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
281 unsigned int counter, u64 value);
243 282
244/* 283/*
245 * Initialization 284 * Initialization
@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
252 */ 291 */
253extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); 292extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
254 293
255/*
256 * Task clone/init and cleanup work
257 */
258extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
259extern void ds_exit_thread(struct task_struct *tsk);
260
261#else /* CONFIG_X86_DS */ 294#else /* CONFIG_X86_DS */
262 295
263struct cpuinfo_x86; 296struct cpuinfo_x86;
264static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} 297static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
265static inline void ds_switch_to(struct task_struct *prev, 298static inline void ds_switch_to(struct task_struct *prev,
266 struct task_struct *next) {} 299 struct task_struct *next) {}
267static inline void ds_copy_thread(struct task_struct *tsk,
268 struct task_struct *father) {}
269static inline void ds_exit_thread(struct task_struct *tsk) {}
270 300
271#endif /* CONFIG_X86_DS */ 301#endif /* CONFIG_X86_DS */
272#endif /* _ASM_X86_DS_H */ 302#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b762ea49bd70..3bd1777a4c8b 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -63,7 +63,26 @@ extern unsigned long io_apic_irqs;
63extern void init_VISWS_APIC_irqs(void); 63extern void init_VISWS_APIC_irqs(void);
64extern void setup_IO_APIC(void); 64extern void setup_IO_APIC(void);
65extern void disable_IO_APIC(void); 65extern void disable_IO_APIC(void);
66extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); 66
67struct io_apic_irq_attr {
68 int ioapic;
69 int ioapic_pin;
70 int trigger;
71 int polarity;
72};
73
74static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
75 int ioapic, int ioapic_pin,
76 int trigger, int polarity)
77{
78 irq_attr->ioapic = ioapic;
79 irq_attr->ioapic_pin = ioapic_pin;
80 irq_attr->trigger = trigger;
81 irq_attr->polarity = polarity;
82}
83
84extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
85 struct io_apic_irq_attr *irq_attr);
67extern void setup_ioapic_dest(void); 86extern void setup_ioapic_dest(void);
68 87
69extern void enable_IO_APIC(void); 88extern void enable_IO_APIC(void);
@@ -78,7 +97,11 @@ extern void eisa_set_level_irq(unsigned int irq);
78/* SMP */ 97/* SMP */
79extern void smp_apic_timer_interrupt(struct pt_regs *); 98extern void smp_apic_timer_interrupt(struct pt_regs *);
80extern void smp_spurious_interrupt(struct pt_regs *); 99extern void smp_spurious_interrupt(struct pt_regs *);
100extern void smp_generic_interrupt(struct pt_regs *);
81extern void smp_error_interrupt(struct pt_regs *); 101extern void smp_error_interrupt(struct pt_regs *);
102#ifdef CONFIG_X86_IO_APIC
103extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
104#endif
82#ifdef CONFIG_SMP 105#ifdef CONFIG_SMP
83extern void smp_reschedule_interrupt(struct pt_regs *); 106extern void smp_reschedule_interrupt(struct pt_regs *);
84extern void smp_call_function_interrupt(struct pt_regs *); 107extern void smp_call_function_interrupt(struct pt_regs *);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 71c9e5183982..175adf58dd4f 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -67,7 +67,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
67 ".previous\n" 67 ".previous\n"
68 _ASM_EXTABLE(1b, 3b) 68 _ASM_EXTABLE(1b, 3b)
69 : [err] "=r" (err) 69 : [err] "=r" (err)
70#if 0 /* See comment in __save_init_fpu() below. */ 70#if 0 /* See comment in fxsave() below. */
71 : [fx] "r" (fx), "m" (*fx), "0" (0)); 71 : [fx] "r" (fx), "m" (*fx), "0" (0));
72#else 72#else
73 : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); 73 : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
@@ -75,14 +75,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
75 return err; 75 return err;
76} 76}
77 77
78static inline int restore_fpu_checking(struct task_struct *tsk)
79{
80 if (task_thread_info(tsk)->status & TS_XSAVE)
81 return xrstor_checking(&tsk->thread.xstate->xsave);
82 else
83 return fxrstor_checking(&tsk->thread.xstate->fxsave);
84}
85
86/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception 78/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
87 is pending. Clear the x87 state here by setting it to fixed 79 is pending. Clear the x87 state here by setting it to fixed
88 values. The kernel data segment can be sometimes 0 and sometimes 80 values. The kernel data segment can be sometimes 0 and sometimes
@@ -120,7 +112,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
120 ".previous\n" 112 ".previous\n"
121 _ASM_EXTABLE(1b, 3b) 113 _ASM_EXTABLE(1b, 3b)
122 : [err] "=r" (err), "=m" (*fx) 114 : [err] "=r" (err), "=m" (*fx)
123#if 0 /* See comment in __fxsave_clear() below. */ 115#if 0 /* See comment in fxsave() below. */
124 : [fx] "r" (fx), "0" (0)); 116 : [fx] "r" (fx), "0" (0));
125#else 117#else
126 : [fx] "cdaSDb" (fx), "0" (0)); 118 : [fx] "cdaSDb" (fx), "0" (0));
@@ -185,12 +177,9 @@ static inline void tolerant_fwait(void)
185 asm volatile("fnclex ; fwait"); 177 asm volatile("fnclex ; fwait");
186} 178}
187 179
188static inline void restore_fpu(struct task_struct *tsk) 180/* perform fxrstor iff the processor has extended states, otherwise frstor */
181static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
189{ 182{
190 if (task_thread_info(tsk)->status & TS_XSAVE) {
191 xrstor_checking(&tsk->thread.xstate->xsave);
192 return;
193 }
194 /* 183 /*
195 * The "nop" is needed to make the instructions the same 184 * The "nop" is needed to make the instructions the same
196 * length. 185 * length.
@@ -199,7 +188,9 @@ static inline void restore_fpu(struct task_struct *tsk)
199 "nop ; frstor %1", 188 "nop ; frstor %1",
200 "fxrstor %1", 189 "fxrstor %1",
201 X86_FEATURE_FXSR, 190 X86_FEATURE_FXSR,
202 "m" (tsk->thread.xstate->fxsave)); 191 "m" (*fx));
192
193 return 0;
203} 194}
204 195
205/* We need a safe address that is cheap to find and that is already 196/* We need a safe address that is cheap to find and that is already
@@ -262,6 +253,14 @@ end:
262 253
263#endif /* CONFIG_X86_64 */ 254#endif /* CONFIG_X86_64 */
264 255
256static inline int restore_fpu_checking(struct task_struct *tsk)
257{
258 if (task_thread_info(tsk)->status & TS_XSAVE)
259 return xrstor_checking(&tsk->thread.xstate->xsave);
260 else
261 return fxrstor_checking(&tsk->thread.xstate->fxsave);
262}
263
265/* 264/*
266 * Signal frame handlers... 265 * Signal frame handlers...
267 */ 266 */
@@ -305,18 +304,18 @@ static inline void kernel_fpu_end(void)
305/* 304/*
306 * Some instructions like VIA's padlock instructions generate a spurious 305 * Some instructions like VIA's padlock instructions generate a spurious
307 * DNA fault but don't modify SSE registers. And these instructions 306 * DNA fault but don't modify SSE registers. And these instructions
308 * get used from interrupt context aswell. To prevent these kernel instructions 307 * get used from interrupt context as well. To prevent these kernel instructions
309 * in interrupt context interact wrongly with other user/kernel fpu usage, we 308 * in interrupt context interacting wrongly with other user/kernel fpu usage, we
310 * should use them only in the context of irq_ts_save/restore() 309 * should use them only in the context of irq_ts_save/restore()
311 */ 310 */
312static inline int irq_ts_save(void) 311static inline int irq_ts_save(void)
313{ 312{
314 /* 313 /*
315 * If we are in process context, we are ok to take a spurious DNA fault. 314 * If in process context and not atomic, we can take a spurious DNA fault.
316 * Otherwise, doing clts() in process context require pre-emption to 315 * Otherwise, doing clts() in process context requires disabling preemption
317 * be disabled or some heavy lifting like kernel_fpu_begin() 316 * or some heavy lifting like kernel_fpu_begin()
318 */ 317 */
319 if (!in_interrupt()) 318 if (!in_atomic())
320 return 0; 319 return 0;
321 320
322 if (read_cr0() & X86_CR0_TS) { 321 if (read_cr0() & X86_CR0_TS) {
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 1a99e6c092af..58d7091eeb1f 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -60,8 +60,4 @@ extern struct irq_chip i8259A_chip;
60extern void mask_8259A(void); 60extern void mask_8259A(void);
61extern void unmask_8259A(void); 61extern void unmask_8259A(void);
62 62
63#ifdef CONFIG_X86_32
64extern void init_ISA_irqs(void);
65#endif
66
67#endif /* _ASM_X86_I8259_H */ 63#endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 9d826e436010..daf866ed0612 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -154,22 +154,19 @@ extern int timer_through_8259;
154extern int io_apic_get_unique_id(int ioapic, int apic_id); 154extern int io_apic_get_unique_id(int ioapic, int apic_id);
155extern int io_apic_get_version(int ioapic); 155extern int io_apic_get_version(int ioapic);
156extern int io_apic_get_redir_entries(int ioapic); 156extern int io_apic_get_redir_entries(int ioapic);
157extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
158 int edge_level, int active_high_low);
159#endif /* CONFIG_ACPI */ 157#endif /* CONFIG_ACPI */
160 158
159struct io_apic_irq_attr;
160extern int io_apic_set_pci_routing(struct device *dev, int irq,
161 struct io_apic_irq_attr *irq_attr);
161extern int (*ioapic_renumber_irq)(int ioapic, int irq); 162extern int (*ioapic_renumber_irq)(int ioapic, int irq);
162extern void ioapic_init_mappings(void); 163extern void ioapic_init_mappings(void);
163 164
164#ifdef CONFIG_X86_64
165extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); 165extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
166extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); 166extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
167extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); 167extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
168extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); 168extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
169extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); 169extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
170extern void reinit_intr_remapped_IO_APIC(int intr_remapping,
171 struct IO_APIC_route_entry **ioapic_entries);
172#endif
173 170
174extern void probe_nr_irqs_gsi(void); 171extern void probe_nr_irqs_gsi(void);
175 172
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 86af26091d6c..0e9fe1d9d971 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -1,3 +1,6 @@
1#ifndef _ASM_X86_IOMAP_H
2#define _ASM_X86_IOMAP_H
3
1/* 4/*
2 * Copyright © 2008 Ingo Molnar 5 * Copyright © 2008 Ingo Molnar
3 * 6 *
@@ -31,3 +34,5 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
31 34
32void 35void
33iounmap_atomic(void *kvaddr, enum km_type type); 36iounmap_atomic(void *kvaddr, enum km_type type);
37
38#endif /* _ASM_X86_IOMAP_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 0396760fccb8..f275e2244505 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,6 +1,6 @@
1#ifndef _ASM_X86_IRQ_REMAPPING_H 1#ifndef _ASM_X86_IRQ_REMAPPING_H
2#define _ASM_X86_IRQ_REMAPPING_H 2#define _ASM_X86_IRQ_REMAPPING_H
3 3
4#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) 4#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
5 5
6#endif /* _ASM_X86_IRQ_REMAPPING_H */ 6#endif /* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3cbd79bbb47c..910b5a3d6751 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -34,6 +34,7 @@
34 34
35#ifdef CONFIG_X86_32 35#ifdef CONFIG_X86_32
36# define SYSCALL_VECTOR 0x80 36# define SYSCALL_VECTOR 0x80
37# define IA32_SYSCALL_VECTOR 0x80
37#else 38#else
38# define IA32_SYSCALL_VECTOR 0x80 39# define IA32_SYSCALL_VECTOR 0x80
39#endif 40#endif
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h
index 54c8cc53b24d..c2d1f3b58e5f 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/k8.h
@@ -12,4 +12,17 @@ extern int cache_k8_northbridges(void);
12extern void k8_flush_garts(void); 12extern void k8_flush_garts(void);
13extern int k8_scan_nodes(unsigned long start, unsigned long end); 13extern int k8_scan_nodes(unsigned long start, unsigned long end);
14 14
15#ifdef CONFIG_K8_NB
16static inline struct pci_dev *node_to_k8_nb_misc(int node)
17{
18 return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL;
19}
20#else
21static inline struct pci_dev *node_to_k8_nb_misc(int node)
22{
23 return NULL;
24}
25#endif
26
27
15#endif /* _ASM_X86_K8_H */ 28#endif /* _ASM_X86_K8_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index c882664716c1..ef51b501e22a 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -9,20 +9,31 @@ struct cpu_signature {
9 9
10struct device; 10struct device;
11 11
12enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
13
12struct microcode_ops { 14struct microcode_ops {
13 int (*request_microcode_user) (int cpu, const void __user *buf, size_t size); 15 enum ucode_state (*request_microcode_user) (int cpu,
14 int (*request_microcode_fw) (int cpu, struct device *device); 16 const void __user *buf, size_t size);
15 17
16 void (*apply_microcode) (int cpu); 18 enum ucode_state (*request_microcode_fw) (int cpu,
19 struct device *device);
17 20
18 int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
19 void (*microcode_fini_cpu) (int cpu); 21 void (*microcode_fini_cpu) (int cpu);
22
23 /*
24 * The generic 'microcode_core' part guarantees that
25 * the callbacks below run on a target cpu when they
26 * are being called.
27 * See also the "Synchronization" section in microcode_core.c.
28 */
29 int (*apply_microcode) (int cpu);
30 int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
20}; 31};
21 32
22struct ucode_cpu_info { 33struct ucode_cpu_info {
23 struct cpu_signature cpu_sig; 34 struct cpu_signature cpu_sig;
24 int valid; 35 int valid;
25 void *mc; 36 void *mc;
26}; 37};
27extern struct ucode_cpu_info ucode_cpu_info[]; 38extern struct ucode_cpu_info ucode_cpu_info[];
28 39
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 642fc7fc8cdc..e2a1bb6d71ea 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -61,9 +61,11 @@ extern void get_smp_config(void);
61#ifdef CONFIG_X86_MPPARSE 61#ifdef CONFIG_X86_MPPARSE
62extern void find_smp_config(void); 62extern void find_smp_config(void);
63extern void early_reserve_e820_mpc_new(void); 63extern void early_reserve_e820_mpc_new(void);
64extern int enable_update_mptable;
64#else 65#else
65static inline void find_smp_config(void) { } 66static inline void find_smp_config(void) { }
66static inline void early_reserve_e820_mpc_new(void) { } 67static inline void early_reserve_e820_mpc_new(void) { }
68#define enable_update_mptable 0
67#endif 69#endif
68 70
69void __cpuinit generic_processor_info(int apicid, int version); 71void __cpuinit generic_processor_info(int apicid, int version);
@@ -72,20 +74,13 @@ extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
72extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, 74extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
73 u32 gsi); 75 u32 gsi);
74extern void mp_config_acpi_legacy_irqs(void); 76extern void mp_config_acpi_legacy_irqs(void);
75extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); 77struct device;
78extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
79 int active_high_low);
76extern int acpi_probe_gsi(void); 80extern int acpi_probe_gsi(void);
77#ifdef CONFIG_X86_IO_APIC 81#ifdef CONFIG_X86_IO_APIC
78extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
79 u32 gsi, int triggering, int polarity);
80extern int mp_find_ioapic(int gsi); 82extern int mp_find_ioapic(int gsi);
81extern int mp_find_ioapic_pin(int ioapic, int gsi); 83extern int mp_find_ioapic_pin(int ioapic, int gsi);
82#else
83static inline int
84mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
85 u32 gsi, int triggering, int polarity)
86{
87 return 0;
88}
89#endif 84#endif
90#else /* !CONFIG_ACPI: */ 85#else /* !CONFIG_ACPI: */
91static inline int acpi_probe_gsi(void) 86static inline int acpi_probe_gsi(void)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec41fc16c167..4d58d04fca83 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,7 +121,6 @@
121#define MSR_K8_TOP_MEM1 0xc001001a 121#define MSR_K8_TOP_MEM1 0xc001001a
122#define MSR_K8_TOP_MEM2 0xc001001d 122#define MSR_K8_TOP_MEM2 0xc001001d
123#define MSR_K8_SYSCFG 0xc0010010 123#define MSR_K8_SYSCFG 0xc0010010
124#define MSR_K8_HWCR 0xc0010015
125#define MSR_K8_INT_PENDING_MSG 0xc0010055 124#define MSR_K8_INT_PENDING_MSG 0xc0010055
126/* C1E active bits in int pending message */ 125/* C1E active bits in int pending message */
127#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 126#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c45a0a568dff..c97264409934 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -64,7 +64,7 @@ static inline int nmi_watchdog_active(void)
64 * but since they are power of two we could use a 64 * but since they are power of two we could use a
65 * cheaper way --cvg 65 * cheaper way --cvg
66 */ 66 */
67 return nmi_watchdog & 0x3; 67 return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
68} 68}
69#endif 69#endif
70 70
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 064ed6df4cbe..c4ae822e415f 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
17extern void numa_init_array(void); 17extern void numa_init_array(void);
18extern int numa_off; 18extern int numa_off;
19 19
20extern void srat_reserve_add_area(int nodeid);
21extern int hotadd_percent;
22
23extern s16 apicid_to_node[MAX_LOCAL_APIC]; 20extern s16 apicid_to_node[MAX_LOCAL_APIC];
24 21
25extern unsigned long numa_free_all_bootmem(void); 22extern unsigned long numa_free_all_bootmem(void);
@@ -27,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
27 unsigned long end); 24 unsigned long end);
28 25
29#ifdef CONFIG_NUMA 26#ifdef CONFIG_NUMA
27/*
28 * Too small node sizes may confuse the VM badly. Usually they
29 * result from BIOS bugs. So dont recognize nodes as standalone
30 * NUMA entities that have less than this amount of RAM listed:
31 */
32#define NODE_MIN_SIZE (4*1024*1024)
33
30extern void __init init_cpu_to_node(void); 34extern void __init init_cpu_to_node(void);
31extern void __cpuinit numa_set_node(int cpu, int node); 35extern void __cpuinit numa_set_node(int cpu, int node);
32extern void __cpuinit numa_clear_node(int cpu); 36extern void __cpuinit numa_clear_node(int cpu);
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 0f915ae649a7..6f1b7331313f 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -54,10 +54,6 @@ extern unsigned int __VMALLOC_RESERVE;
54extern int sysctl_legacy_va_layout; 54extern int sysctl_legacy_va_layout;
55 55
56extern void find_low_pfn_range(void); 56extern void find_low_pfn_range(void);
57extern unsigned long init_memory_mapping(unsigned long start,
58 unsigned long end);
59extern void initmem_init(unsigned long, unsigned long);
60extern void free_initmem(void);
61extern void setup_bootmem_allocator(void); 57extern void setup_bootmem_allocator(void);
62 58
63#endif /* !__ASSEMBLY__ */ 59#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index d38c91b70248..8d382d3abf38 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -32,22 +32,14 @@
32 */ 32 */
33#define __PAGE_OFFSET _AC(0xffff880000000000, UL) 33#define __PAGE_OFFSET _AC(0xffff880000000000, UL)
34 34
35#define __PHYSICAL_START CONFIG_PHYSICAL_START 35#define __PHYSICAL_START ((CONFIG_PHYSICAL_START + \
36#define __KERNEL_ALIGN 0x200000 36 (CONFIG_PHYSICAL_ALIGN - 1)) & \
37 37 ~(CONFIG_PHYSICAL_ALIGN - 1))
38/*
39 * Make sure kernel is aligned to 2MB address. Catching it at compile
40 * time is better. Change your config file and compile the kernel
41 * for a 2MB aligned address (CONFIG_PHYSICAL_START)
42 */
43#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
44#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
45#endif
46 38
47#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) 39#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
48#define __START_KERNEL_map _AC(0xffffffff80000000, UL) 40#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
49 41
50/* See Documentation/x86_64/mm.txt for a description of the memory map. */ 42/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
51#define __PHYSICAL_MASK_SHIFT 46 43#define __PHYSICAL_MASK_SHIFT 46
52#define __VIRTUAL_MASK_SHIFT 48 44#define __VIRTUAL_MASK_SHIFT 48
53 45
@@ -71,12 +63,6 @@ extern unsigned long __phys_addr(unsigned long);
71 63
72#define vmemmap ((struct page *)VMEMMAP_START) 64#define vmemmap ((struct page *)VMEMMAP_START)
73 65
74extern unsigned long init_memory_mapping(unsigned long start,
75 unsigned long end);
76
77extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
78extern void free_initmem(void);
79
80extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); 66extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
81extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); 67extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
82 68
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 826ad37006ab..6473f5ccff85 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -46,6 +46,12 @@ extern int devmem_is_allowed(unsigned long pagenr);
46extern unsigned long max_low_pfn_mapped; 46extern unsigned long max_low_pfn_mapped;
47extern unsigned long max_pfn_mapped; 47extern unsigned long max_pfn_mapped;
48 48
49extern unsigned long init_memory_mapping(unsigned long start,
50 unsigned long end);
51
52extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
53extern void free_initmem(void);
54
49#endif /* !__ASSEMBLY__ */ 55#endif /* !__ASSEMBLY__ */
50 56
51#endif /* _ASM_X86_PAGE_DEFS_H */ 57#endif /* _ASM_X86_PAGE_DEFS_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a53da004e08e..4fb37c8a0832 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -56,6 +56,7 @@ struct desc_ptr;
56struct tss_struct; 56struct tss_struct;
57struct mm_struct; 57struct mm_struct;
58struct desc_struct; 58struct desc_struct;
59struct task_struct;
59 60
60/* 61/*
61 * Wrapper type for pointers to code which uses the non-standard 62 * Wrapper type for pointers to code which uses the non-standard
@@ -203,7 +204,8 @@ struct pv_cpu_ops {
203 204
204 void (*swapgs)(void); 205 void (*swapgs)(void);
205 206
206 struct pv_lazy_ops lazy_mode; 207 void (*start_context_switch)(struct task_struct *prev);
208 void (*end_context_switch)(struct task_struct *next);
207}; 209};
208 210
209struct pv_irq_ops { 211struct pv_irq_ops {
@@ -1399,25 +1401,23 @@ enum paravirt_lazy_mode {
1399}; 1401};
1400 1402
1401enum paravirt_lazy_mode paravirt_get_lazy_mode(void); 1403enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
1402void paravirt_enter_lazy_cpu(void); 1404void paravirt_start_context_switch(struct task_struct *prev);
1403void paravirt_leave_lazy_cpu(void); 1405void paravirt_end_context_switch(struct task_struct *next);
1406
1404void paravirt_enter_lazy_mmu(void); 1407void paravirt_enter_lazy_mmu(void);
1405void paravirt_leave_lazy_mmu(void); 1408void paravirt_leave_lazy_mmu(void);
1406void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
1407 1409
1408#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE 1410#define __HAVE_ARCH_START_CONTEXT_SWITCH
1409static inline void arch_enter_lazy_cpu_mode(void) 1411static inline void arch_start_context_switch(struct task_struct *prev)
1410{ 1412{
1411 PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); 1413 PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
1412} 1414}
1413 1415
1414static inline void arch_leave_lazy_cpu_mode(void) 1416static inline void arch_end_context_switch(struct task_struct *next)
1415{ 1417{
1416 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 1418 PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
1417} 1419}
1418 1420
1419void arch_flush_lazy_cpu_mode(void);
1420
1421#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 1421#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
1422static inline void arch_enter_lazy_mmu_mode(void) 1422static inline void arch_enter_lazy_mmu_mode(void)
1423{ 1423{
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 29d96d168bc0..18ef7ebf2631 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -81,6 +81,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
81#define pte_val(x) native_pte_val(x) 81#define pte_val(x) native_pte_val(x)
82#define __pte(x) native_make_pte(x) 82#define __pte(x) native_make_pte(x)
83 83
84#define arch_end_context_switch(prev) do {} while(0)
85
84#endif /* CONFIG_PARAVIRT */ 86#endif /* CONFIG_PARAVIRT */
85 87
86/* 88/*
@@ -503,6 +505,8 @@ static inline int pgd_none(pgd_t pgd)
503 505
504#ifndef __ASSEMBLY__ 506#ifndef __ASSEMBLY__
505 507
508extern int direct_gbpages;
509
506/* local pte updates need not use xchg for locking */ 510/* local pte updates need not use xchg for locking */
507static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) 511static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
508{ 512{
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 6b87bc6d5018..abde308fdb0f 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -25,10 +25,6 @@ extern pgd_t init_level4_pgt[];
25 25
26extern void paging_init(void); 26extern void paging_init(void);
27 27
28#endif /* !__ASSEMBLY__ */
29
30#ifndef __ASSEMBLY__
31
32#define pte_ERROR(e) \ 28#define pte_ERROR(e) \
33 printk("%s:%d: bad pte %p(%016lx).\n", \ 29 printk("%s:%d: bad pte %p(%016lx).\n", \
34 __FILE__, __LINE__, &(e), pte_val(e)) 30 __FILE__, __LINE__, &(e), pte_val(e))
@@ -135,8 +131,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
135 131
136#define update_mmu_cache(vma, address, pte) do { } while (0) 132#define update_mmu_cache(vma, address, pte) do { } while (0)
137 133
138extern int direct_gbpages;
139
140/* Encode and de-code a swap entry */ 134/* Encode and de-code a swap entry */
141#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE 135#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
142#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) 136#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index fbf42b8e0383..766ea16fbbbd 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -51,11 +51,11 @@ typedef struct { pteval_t pte; } pte_t;
51#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) 51#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
52#define PGDIR_MASK (~(PGDIR_SIZE - 1)) 52#define PGDIR_MASK (~(PGDIR_SIZE - 1))
53 53
54 54/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
55#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) 55#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
56#define VMALLOC_START _AC(0xffffc20000000000, UL) 56#define VMALLOC_START _AC(0xffffc90000000000, UL)
57#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) 57#define VMALLOC_END _AC(0xffffe8ffffffffff, UL)
58#define VMEMMAP_START _AC(0xffffe20000000000, UL) 58#define VMEMMAP_START _AC(0xffffea0000000000, UL)
59#define MODULES_VADDR _AC(0xffffffffa0000000, UL) 59#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
60#define MODULES_END _AC(0xffffffffff000000, UL) 60#define MODULES_END _AC(0xffffffffff000000, UL)
61#define MODULES_LEN (MODULES_END - MODULES_VADDR) 61#define MODULES_LEN (MODULES_END - MODULES_VADDR)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b8238dc8786d..4d258ad76a0f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -273,7 +273,6 @@ typedef struct page *pgtable_t;
273 273
274extern pteval_t __supported_pte_mask; 274extern pteval_t __supported_pte_mask;
275extern int nx_enabled; 275extern int nx_enabled;
276extern void set_nx(void);
277 276
278#define pgprot_writecombine pgprot_writecombine 277#define pgprot_writecombine pgprot_writecombine
279extern pgprot_t pgprot_writecombine(pgprot_t prot); 278extern pgprot_t pgprot_writecombine(pgprot_t prot);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c2cceae709c8..c7768269b1cf 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -135,7 +135,8 @@ extern struct cpuinfo_x86 boot_cpu_data;
135extern struct cpuinfo_x86 new_cpu_data; 135extern struct cpuinfo_x86 new_cpu_data;
136 136
137extern struct tss_struct doublefault_tss; 137extern struct tss_struct doublefault_tss;
138extern __u32 cleared_cpu_caps[NCAPINTS]; 138extern __u32 cpu_caps_cleared[NCAPINTS];
139extern __u32 cpu_caps_set[NCAPINTS];
139 140
140#ifdef CONFIG_SMP 141#ifdef CONFIG_SMP
141DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 142DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
@@ -409,9 +410,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary);
409extern unsigned int xstate_size; 410extern unsigned int xstate_size;
410extern void free_thread_xstate(struct task_struct *); 411extern void free_thread_xstate(struct task_struct *);
411extern struct kmem_cache *task_xstate_cachep; 412extern struct kmem_cache *task_xstate_cachep;
412extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
413extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
414extern unsigned short num_cache_leaves;
415 413
416struct thread_struct { 414struct thread_struct {
417 /* Cached TLS descriptors: */ 415 /* Cached TLS descriptors: */
@@ -427,8 +425,12 @@ struct thread_struct {
427 unsigned short fsindex; 425 unsigned short fsindex;
428 unsigned short gsindex; 426 unsigned short gsindex;
429#endif 427#endif
428#ifdef CONFIG_X86_32
430 unsigned long ip; 429 unsigned long ip;
430#endif
431#ifdef CONFIG_X86_64
431 unsigned long fs; 432 unsigned long fs;
433#endif
432 unsigned long gs; 434 unsigned long gs;
433 /* Hardware debugging registers: */ 435 /* Hardware debugging registers: */
434 unsigned long debugreg0; 436 unsigned long debugreg0;
@@ -460,14 +462,8 @@ struct thread_struct {
460 unsigned io_bitmap_max; 462 unsigned io_bitmap_max;
461/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ 463/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
462 unsigned long debugctlmsr; 464 unsigned long debugctlmsr;
463#ifdef CONFIG_X86_DS 465 /* Debug Store context; see asm/ds.h */
464/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
465 struct ds_context *ds_ctx; 466 struct ds_context *ds_ctx;
466#endif /* CONFIG_X86_DS */
467#ifdef CONFIG_X86_PTRACE_BTS
468/* the signal to send on a bts buffer overflow */
469 unsigned int bts_ovfl_signal;
470#endif /* CONFIG_X86_PTRACE_BTS */
471}; 467};
472 468
473static inline unsigned long native_get_debugreg(int regno) 469static inline unsigned long native_get_debugreg(int regno)
@@ -795,6 +791,21 @@ static inline unsigned long get_debugctlmsr(void)
795 return debugctlmsr; 791 return debugctlmsr;
796} 792}
797 793
794static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
795{
796 u64 debugctlmsr = 0;
797 u32 val1, val2;
798
799#ifndef CONFIG_X86_DEBUGCTLMSR
800 if (boot_cpu_data.x86 < 6)
801 return 0;
802#endif
803 rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
804 debugctlmsr = val1 | ((u64)val2 << 32);
805
806 return debugctlmsr;
807}
808
798static inline void update_debugctlmsr(unsigned long debugctlmsr) 809static inline void update_debugctlmsr(unsigned long debugctlmsr)
799{ 810{
800#ifndef CONFIG_X86_DEBUGCTLMSR 811#ifndef CONFIG_X86_DEBUGCTLMSR
@@ -804,6 +815,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
804 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); 815 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
805} 816}
806 817
818static inline void update_debugctlmsr_on_cpu(int cpu,
819 unsigned long debugctlmsr)
820{
821#ifndef CONFIG_X86_DEBUGCTLMSR
822 if (boot_cpu_data.x86 < 6)
823 return;
824#endif
825 wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
826 (u32)((u64)debugctlmsr),
827 (u32)((u64)debugctlmsr >> 32));
828}
829
807/* 830/*
808 * from system description table in BIOS. Mostly for MCA use, but 831 * from system description table in BIOS. Mostly for MCA use, but
809 * others may find it useful: 832 * others may find it useful:
@@ -814,6 +837,7 @@ extern unsigned int BIOS_revision;
814 837
815/* Boot loader type from the setup header: */ 838/* Boot loader type from the setup header: */
816extern int bootloader_type; 839extern int bootloader_type;
840extern int bootloader_version;
817 841
818extern char ignore_fpu_irq; 842extern char ignore_fpu_irq;
819 843
@@ -874,7 +898,6 @@ static inline void spin_lock_prefetch(const void *x)
874 .vm86_info = NULL, \ 898 .vm86_info = NULL, \
875 .sysenter_cs = __KERNEL_CS, \ 899 .sysenter_cs = __KERNEL_CS, \
876 .io_bitmap_ptr = NULL, \ 900 .io_bitmap_ptr = NULL, \
877 .fs = __KERNEL_PERCPU, \
878} 901}
879 902
880/* 903/*
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 624f133943ed..0f0d908349aa 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -236,12 +236,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
236extern int do_set_thread_area(struct task_struct *p, int idx, 236extern int do_set_thread_area(struct task_struct *p, int idx,
237 struct user_desc __user *info, int can_allocate); 237 struct user_desc __user *info, int can_allocate);
238 238
239extern void x86_ptrace_untrace(struct task_struct *); 239#ifdef CONFIG_X86_PTRACE_BTS
240extern void x86_ptrace_fork(struct task_struct *child, 240extern void ptrace_bts_untrace(struct task_struct *tsk);
241 unsigned long clone_flags);
242 241
243#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) 242#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk)
244#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) 243#endif /* CONFIG_X86_PTRACE_BTS */
245 244
246#endif /* __KERNEL__ */ 245#endif /* __KERNEL__ */
247 246
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index a4737dddfd58..64cf2d24fad1 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -48,9 +48,15 @@
48#endif 48#endif
49 49
50#ifdef CONFIG_X86_64 50#ifdef CONFIG_X86_64
51#ifdef CONFIG_PARAVIRT
52/* Paravirtualized systems may not have PSE or PGE available */
51#define NEED_PSE 0 53#define NEED_PSE 0
52#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
53#define NEED_PGE 0 54#define NEED_PGE 0
55#else
56#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
57#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
58#endif
59#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
54#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) 60#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
55#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) 61#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
56#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) 62#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index bdc2ada05ae0..4093d1ed6db2 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -33,7 +33,6 @@ struct x86_quirks {
33 int (*setup_ioapic_ids)(void); 33 int (*setup_ioapic_ids)(void);
34}; 34};
35 35
36extern void x86_quirk_pre_intr_init(void);
37extern void x86_quirk_intr_init(void); 36extern void x86_quirk_intr_init(void);
38 37
39extern void x86_quirk_trap_init(void); 38extern void x86_quirk_trap_init(void);
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 19e0d88b966d..6a84ed166aec 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -180,7 +180,7 @@ extern int safe_smp_processor_id(void);
180static inline int logical_smp_processor_id(void) 180static inline int logical_smp_processor_id(void)
181{ 181{
182 /* we don't want to mark this access volatile - bad code generation */ 182 /* we don't want to mark this access volatile - bad code generation */
183 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); 183 return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
184} 184}
185 185
186#endif 186#endif
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index e3cc3c063ec5..4517d6b93188 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,7 +27,7 @@
27#else /* CONFIG_X86_32 */ 27#else /* CONFIG_X86_32 */
28# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ 28# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
29# define MAX_PHYSADDR_BITS 44 29# define MAX_PHYSADDR_BITS 44
30# define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */ 30# define MAX_PHYSMEM_BITS 46
31#endif 31#endif
32 32
33#endif /* CONFIG_SPARSEMEM */ 33#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 7043408f6904..372b76edd63f 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * syscalls.h - Linux syscall interfaces (arch-specific) 2 * syscalls.h - Linux syscall interfaces (arch-specific)
3 * 3 *
4 * Copyright (c) 2008 Jaswinder Singh 4 * Copyright (c) 2008 Jaswinder Singh Rajput
5 * 5 *
6 * This file is released under the GPLv2. 6 * This file is released under the GPLv2.
7 * See the file COPYING for more details. 7 * See the file COPYING for more details.
@@ -12,50 +12,55 @@
12 12
13#include <linux/compiler.h> 13#include <linux/compiler.h>
14#include <linux/linkage.h> 14#include <linux/linkage.h>
15#include <linux/types.h>
16#include <linux/signal.h> 15#include <linux/signal.h>
16#include <linux/types.h>
17 17
18/* Common in X86_32 and X86_64 */ 18/* Common in X86_32 and X86_64 */
19/* kernel/ioport.c */ 19/* kernel/ioport.c */
20asmlinkage long sys_ioperm(unsigned long, unsigned long, int); 20asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
21 21
22/* kernel/process.c */
23int sys_fork(struct pt_regs *);
24int sys_vfork(struct pt_regs *);
25
22/* kernel/ldt.c */ 26/* kernel/ldt.c */
23asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); 27asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
24 28
29/* kernel/signal.c */
30long sys_rt_sigreturn(struct pt_regs *);
31
25/* kernel/tls.c */ 32/* kernel/tls.c */
26asmlinkage int sys_set_thread_area(struct user_desc __user *); 33asmlinkage int sys_set_thread_area(struct user_desc __user *);
27asmlinkage int sys_get_thread_area(struct user_desc __user *); 34asmlinkage int sys_get_thread_area(struct user_desc __user *);
28 35
29/* X86_32 only */ 36/* X86_32 only */
30#ifdef CONFIG_X86_32 37#ifdef CONFIG_X86_32
38/* kernel/ioport.c */
39long sys_iopl(struct pt_regs *);
40
31/* kernel/process_32.c */ 41/* kernel/process_32.c */
32int sys_fork(struct pt_regs *);
33int sys_clone(struct pt_regs *); 42int sys_clone(struct pt_regs *);
34int sys_vfork(struct pt_regs *);
35int sys_execve(struct pt_regs *); 43int sys_execve(struct pt_regs *);
36 44
37/* kernel/signal_32.c */ 45/* kernel/signal.c */
38asmlinkage int sys_sigsuspend(int, int, old_sigset_t); 46asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
39asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, 47asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
40 struct old_sigaction __user *); 48 struct old_sigaction __user *);
41int sys_sigaltstack(struct pt_regs *); 49int sys_sigaltstack(struct pt_regs *);
42unsigned long sys_sigreturn(struct pt_regs *); 50unsigned long sys_sigreturn(struct pt_regs *);
43long sys_rt_sigreturn(struct pt_regs *);
44
45/* kernel/ioport.c */
46long sys_iopl(struct pt_regs *);
47 51
48/* kernel/sys_i386_32.c */ 52/* kernel/sys_i386_32.c */
53struct mmap_arg_struct;
54struct sel_arg_struct;
55struct oldold_utsname;
56struct old_utsname;
57
49asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, 58asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
50 unsigned long, unsigned long, unsigned long); 59 unsigned long, unsigned long, unsigned long);
51struct mmap_arg_struct;
52asmlinkage int old_mmap(struct mmap_arg_struct __user *); 60asmlinkage int old_mmap(struct mmap_arg_struct __user *);
53struct sel_arg_struct;
54asmlinkage int old_select(struct sel_arg_struct __user *); 61asmlinkage int old_select(struct sel_arg_struct __user *);
55asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); 62asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
56struct old_utsname;
57asmlinkage int sys_uname(struct old_utsname __user *); 63asmlinkage int sys_uname(struct old_utsname __user *);
58struct oldold_utsname;
59asmlinkage int sys_olduname(struct oldold_utsname __user *); 64asmlinkage int sys_olduname(struct oldold_utsname __user *);
60 65
61/* kernel/vm86_32.c */ 66/* kernel/vm86_32.c */
@@ -65,29 +70,27 @@ int sys_vm86(struct pt_regs *);
65#else /* CONFIG_X86_32 */ 70#else /* CONFIG_X86_32 */
66 71
67/* X86_64 only */ 72/* X86_64 only */
73/* kernel/ioport.c */
74asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
75
68/* kernel/process_64.c */ 76/* kernel/process_64.c */
69asmlinkage long sys_fork(struct pt_regs *);
70asmlinkage long sys_clone(unsigned long, unsigned long, 77asmlinkage long sys_clone(unsigned long, unsigned long,
71 void __user *, void __user *, 78 void __user *, void __user *,
72 struct pt_regs *); 79 struct pt_regs *);
73asmlinkage long sys_vfork(struct pt_regs *);
74asmlinkage long sys_execve(char __user *, char __user * __user *, 80asmlinkage long sys_execve(char __user *, char __user * __user *,
75 char __user * __user *, 81 char __user * __user *,
76 struct pt_regs *); 82 struct pt_regs *);
77long sys_arch_prctl(int, unsigned long); 83long sys_arch_prctl(int, unsigned long);
78 84
79/* kernel/ioport.c */ 85/* kernel/signal.c */
80asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
81
82/* kernel/signal_64.c */
83asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, 86asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
84 struct pt_regs *); 87 struct pt_regs *);
85long sys_rt_sigreturn(struct pt_regs *);
86 88
87/* kernel/sys_x86_64.c */ 89/* kernel/sys_x86_64.c */
90struct new_utsname;
91
88asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, 92asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
89 unsigned long, unsigned long, unsigned long); 93 unsigned long, unsigned long, unsigned long);
90struct new_utsname;
91asmlinkage long sys_uname(struct new_utsname __user *); 94asmlinkage long sys_uname(struct new_utsname __user *);
92 95
93#endif /* CONFIG_X86_32 */ 96#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8820a73ae090..602c769fc98c 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -94,7 +94,8 @@ struct thread_info {
94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ 95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ 96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
97#define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */ 97#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
98#define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */
98 99
99#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 100#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
100#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 101#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -116,6 +117,7 @@ struct thread_info {
116#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 117#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
117#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) 118#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
118#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) 119#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
120#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
119#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) 121#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE)
120 122
121/* work to do in syscall_trace_enter() */ 123/* work to do in syscall_trace_enter() */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 16a5c84b0329..a5ecc9c33e92 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -17,7 +17,7 @@
17 17
18static inline void __native_flush_tlb(void) 18static inline void __native_flush_tlb(void)
19{ 19{
20 write_cr3(read_cr3()); 20 native_write_cr3(native_read_cr3());
21} 21}
22 22
23static inline void __native_flush_tlb_global(void) 23static inline void __native_flush_tlb_global(void)
@@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void)
32 */ 32 */
33 raw_local_irq_save(flags); 33 raw_local_irq_save(flags);
34 34
35 cr4 = read_cr4(); 35 cr4 = native_read_cr4();
36 /* clear PGE */ 36 /* clear PGE */
37 write_cr4(cr4 & ~X86_CR4_PGE); 37 native_write_cr4(cr4 & ~X86_CR4_PGE);
38 /* write old PGE again and flush TLBs */ 38 /* write old PGE again and flush TLBs */
39 write_cr4(cr4); 39 native_write_cr4(cr4);
40 40
41 raw_local_irq_restore(flags); 41 raw_local_irq_restore(flags);
42} 42}
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index f44b49abca49..066ef590d7e0 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -203,7 +203,8 @@ struct pci_bus;
203void x86_pci_root_bus_res_quirks(struct pci_bus *b); 203void x86_pci_root_bus_res_quirks(struct pci_bus *b);
204 204
205#ifdef CONFIG_SMP 205#ifdef CONFIG_SMP
206#define mc_capable() (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids) 206#define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \
207 (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids))
207#define smt_capable() (smp_num_siblings > 1) 208#define smt_capable() (smp_num_siblings > 1)
208#endif 209#endif
209 210
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0d5342515b86..bfd74c032fca 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_TRAPS_H 2#define _ASM_X86_TRAPS_H
3 3
4#include <asm/debugreg.h> 4#include <asm/debugreg.h>
5#include <asm/siginfo.h> /* TRAP_TRACE, ... */
5 6
6#ifdef CONFIG_X86_32 7#ifdef CONFIG_X86_32
7#define dotraplinkage 8#define dotraplinkage
@@ -13,6 +14,9 @@ asmlinkage void divide_error(void);
13asmlinkage void debug(void); 14asmlinkage void debug(void);
14asmlinkage void nmi(void); 15asmlinkage void nmi(void);
15asmlinkage void int3(void); 16asmlinkage void int3(void);
17asmlinkage void xen_debug(void);
18asmlinkage void xen_int3(void);
19asmlinkage void xen_stack_segment(void);
16asmlinkage void overflow(void); 20asmlinkage void overflow(void);
17asmlinkage void bounds(void); 21asmlinkage void bounds(void);
18asmlinkage void invalid_op(void); 22asmlinkage void invalid_op(void);
@@ -74,7 +78,6 @@ static inline int get_si_code(unsigned long condition)
74} 78}
75 79
76extern int panic_on_unrecovered_nmi; 80extern int panic_on_unrecovered_nmi;
77extern int kstack_depth_to_print;
78 81
79void math_error(void __user *); 82void math_error(void __user *);
80void math_emulate(struct math_emu_info *); 83void math_emulate(struct math_emu_info *);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6e72d74cf8dc..708dae61262d 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -340,6 +340,7 @@
340#define __NR_inotify_init1 332 340#define __NR_inotify_init1 332
341#define __NR_preadv 333 341#define __NR_preadv 333
342#define __NR_pwritev 334 342#define __NR_pwritev 334
343#define __NR_rt_tgsigqueueinfo 335
343 344
344#ifdef __KERNEL__ 345#ifdef __KERNEL__
345 346
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index f81829462325..4e2b05404400 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -657,6 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
657__SYSCALL(__NR_preadv, sys_preadv) 657__SYSCALL(__NR_preadv, sys_preadv)
658#define __NR_pwritev 296 658#define __NR_pwritev 296
659__SYSCALL(__NR_pwritev, sys_pwritev) 659__SYSCALL(__NR_pwritev, sys_pwritev)
660#define __NR_rt_tgsigqueueinfo 297
661__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
660 662
661 663
662#ifndef __NO_STUBS 664#ifndef __NO_STUBS
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 9b0e61bf7a88..bddd44f2f0ab 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -37,7 +37,7 @@
37#define UV_CPUS_PER_ACT_STATUS 32 37#define UV_CPUS_PER_ACT_STATUS 32
38#define UV_ACT_STATUS_MASK 0x3 38#define UV_ACT_STATUS_MASK 0x3
39#define UV_ACT_STATUS_SIZE 2 39#define UV_ACT_STATUS_SIZE 2
40#define UV_ACTIVATION_DESCRIPTOR_SIZE 32 40#define UV_ADP_SIZE 32
41#define UV_DISTRIBUTION_SIZE 256 41#define UV_DISTRIBUTION_SIZE 256
42#define UV_SW_ACK_NPENDING 8 42#define UV_SW_ACK_NPENDING 8
43#define UV_NET_ENDPOINT_INTD 0x38 43#define UV_NET_ENDPOINT_INTD 0x38
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index d3a98ea1062e..341070f7ad5c 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -133,6 +133,7 @@ struct uv_scir_s {
133struct uv_hub_info_s { 133struct uv_hub_info_s {
134 unsigned long global_mmr_base; 134 unsigned long global_mmr_base;
135 unsigned long gpa_mask; 135 unsigned long gpa_mask;
136 unsigned int gnode_extra;
136 unsigned long gnode_upper; 137 unsigned long gnode_upper;
137 unsigned long lowmem_remap_top; 138 unsigned long lowmem_remap_top;
138 unsigned long lowmem_remap_base; 139 unsigned long lowmem_remap_base;
@@ -159,7 +160,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
159 * p - PNODE (local part of nsids, right shifted 1) 160 * p - PNODE (local part of nsids, right shifted 1)
160 */ 161 */
161#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) 162#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask)
162#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) 163#define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra)
164#define UV_PNODE_TO_NASID(p) (UV_PNODE_TO_GNODE(p) << 1)
163 165
164#define UV_LOCAL_MMR_BASE 0xf4000000UL 166#define UV_LOCAL_MMR_BASE 0xf4000000UL
165#define UV_GLOBAL_MMR32_BASE 0xf8000000UL 167#define UV_GLOBAL_MMR32_BASE 0xf8000000UL
@@ -173,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
173#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) 175#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
174 176
175#define UV_GLOBAL_MMR64_PNODE_BITS(p) \ 177#define UV_GLOBAL_MMR64_PNODE_BITS(p) \
176 ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) 178 ((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT)
177 179
178#define UV_APIC_PNODE_SHIFT 6 180#define UV_APIC_PNODE_SHIFT 6
179 181
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 88d1bfc847d3..4f78bd682125 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,7 +28,7 @@ CFLAGS_paravirt.o := $(nostackp)
28obj-y := process_$(BITS).o signal.o entry_$(BITS).o 28obj-y := process_$(BITS).o signal.o entry_$(BITS).o
29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
30obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o 30obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
31obj-y += setup.o i8259.o irqinit_$(BITS).o 31obj-y += setup.o i8259.o irqinit.o
32obj-$(CONFIG_X86_VISWS) += visws_quirks.o 32obj-$(CONFIG_X86_VISWS) += visws_quirks.o
33obj-$(CONFIG_X86_32) += probe_roms_32.o 33obj-$(CONFIG_X86_32) += probe_roms_32.o
34obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 34obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -44,6 +44,7 @@ obj-y += process.o
44obj-y += i387.o xsave.o 44obj-y += i387.o xsave.o
45obj-y += ptrace.o 45obj-y += ptrace.o
46obj-$(CONFIG_X86_DS) += ds.o 46obj-$(CONFIG_X86_DS) += ds.o
47obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o
47obj-$(CONFIG_X86_32) += tls.o 48obj-$(CONFIG_X86_32) += tls.o
48obj-$(CONFIG_IA32_EMULATION) += tls.o 49obj-$(CONFIG_IA32_EMULATION) += tls.o
49obj-y += step.o 50obj-y += step.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 723989d7f802..631086159c53 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -33,6 +33,7 @@
33#include <linux/irq.h> 33#include <linux/irq.h>
34#include <linux/bootmem.h> 34#include <linux/bootmem.h>
35#include <linux/ioport.h> 35#include <linux/ioport.h>
36#include <linux/pci.h>
36 37
37#include <asm/pgtable.h> 38#include <asm/pgtable.h>
38#include <asm/io_apic.h> 39#include <asm/io_apic.h>
@@ -522,7 +523,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
522 * success: return IRQ number (>=0) 523 * success: return IRQ number (>=0)
523 * failure: return < 0 524 * failure: return < 0
524 */ 525 */
525int acpi_register_gsi(u32 gsi, int triggering, int polarity) 526int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
526{ 527{
527 unsigned int irq; 528 unsigned int irq;
528 unsigned int plat_gsi = gsi; 529 unsigned int plat_gsi = gsi;
@@ -532,14 +533,14 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
532 * Make sure all (legacy) PCI IRQs are set as level-triggered. 533 * Make sure all (legacy) PCI IRQs are set as level-triggered.
533 */ 534 */
534 if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { 535 if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
535 if (triggering == ACPI_LEVEL_SENSITIVE) 536 if (trigger == ACPI_LEVEL_SENSITIVE)
536 eisa_set_level_irq(gsi); 537 eisa_set_level_irq(gsi);
537 } 538 }
538#endif 539#endif
539 540
540#ifdef CONFIG_X86_IO_APIC 541#ifdef CONFIG_X86_IO_APIC
541 if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { 542 if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
542 plat_gsi = mp_register_gsi(gsi, triggering, polarity); 543 plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
543 } 544 }
544#endif 545#endif
545 acpi_gsi_to_irq(plat_gsi, &irq); 546 acpi_gsi_to_irq(plat_gsi, &irq);
@@ -903,10 +904,8 @@ extern int es7000_plat;
903#endif 904#endif
904 905
905static struct { 906static struct {
906 int apic_id;
907 int gsi_base; 907 int gsi_base;
908 int gsi_end; 908 int gsi_end;
909 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
910} mp_ioapic_routing[MAX_IO_APICS]; 909} mp_ioapic_routing[MAX_IO_APICS];
911 910
912int mp_find_ioapic(int gsi) 911int mp_find_ioapic(int gsi)
@@ -986,16 +985,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
986 985
987 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 986 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
988 mp_ioapics[idx].apicid = uniq_ioapic_id(id); 987 mp_ioapics[idx].apicid = uniq_ioapic_id(id);
989#ifdef CONFIG_X86_32
990 mp_ioapics[idx].apicver = io_apic_get_version(idx); 988 mp_ioapics[idx].apicver = io_apic_get_version(idx);
991#else 989
992 mp_ioapics[idx].apicver = 0;
993#endif
994 /* 990 /*
995 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 991 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
996 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 992 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
997 */ 993 */
998 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
999 mp_ioapic_routing[idx].gsi_base = gsi_base; 994 mp_ioapic_routing[idx].gsi_base = gsi_base;
1000 mp_ioapic_routing[idx].gsi_end = gsi_base + 995 mp_ioapic_routing[idx].gsi_end = gsi_base +
1001 io_apic_get_redir_entries(idx); 996 io_apic_get_redir_entries(idx);
@@ -1158,26 +1153,52 @@ void __init mp_config_acpi_legacy_irqs(void)
1158 } 1153 }
1159} 1154}
1160 1155
1161int mp_register_gsi(u32 gsi, int triggering, int polarity) 1156static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
1157 int polarity)
1162{ 1158{
1159#ifdef CONFIG_X86_MPPARSE
1160 struct mpc_intsrc mp_irq;
1161 struct pci_dev *pdev;
1162 unsigned char number;
1163 unsigned int devfn;
1163 int ioapic; 1164 int ioapic;
1164 int ioapic_pin; 1165 u8 pin;
1165#ifdef CONFIG_X86_32
1166#define MAX_GSI_NUM 4096
1167#define IRQ_COMPRESSION_START 64
1168 1166
1169 static int pci_irq = IRQ_COMPRESSION_START; 1167 if (!acpi_ioapic)
1170 /* 1168 return 0;
1171 * Mapping between Global System Interrupts, which 1169 if (!dev)
1172 * represent all possible interrupts, and IRQs 1170 return 0;
1173 * assigned to actual devices. 1171 if (dev->bus != &pci_bus_type)
1174 */ 1172 return 0;
1175 static int gsi_to_irq[MAX_GSI_NUM]; 1173
1176#else 1174 pdev = to_pci_dev(dev);
1175 number = pdev->bus->number;
1176 devfn = pdev->devfn;
1177 pin = pdev->pin;
1178 /* print the entry should happen on mptable identically */
1179 mp_irq.type = MP_INTSRC;
1180 mp_irq.irqtype = mp_INT;
1181 mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1182 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1183 mp_irq.srcbus = number;
1184 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1185 ioapic = mp_find_ioapic(gsi);
1186 mp_irq.dstapic = mp_ioapics[ioapic].apicid;
1187 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
1188
1189 save_mp_irq(&mp_irq);
1190#endif
1191 return 0;
1192}
1193
1194int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
1195{
1196 int ioapic;
1197 int ioapic_pin;
1198 struct io_apic_irq_attr irq_attr;
1177 1199
1178 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) 1200 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
1179 return gsi; 1201 return gsi;
1180#endif
1181 1202
1182 /* Don't set up the ACPI SCI because it's already set up */ 1203 /* Don't set up the ACPI SCI because it's already set up */
1183 if (acpi_gbl_FADT.sci_interrupt == gsi) 1204 if (acpi_gbl_FADT.sci_interrupt == gsi)
@@ -1196,93 +1217,22 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
1196 gsi = ioapic_renumber_irq(ioapic, gsi); 1217 gsi = ioapic_renumber_irq(ioapic, gsi);
1197#endif 1218#endif
1198 1219
1199 /*
1200 * Avoid pin reprogramming. PRTs typically include entries
1201 * with redundant pin->gsi mappings (but unique PCI devices);
1202 * we only program the IOAPIC on the first.
1203 */
1204 if (ioapic_pin > MP_MAX_IOAPIC_PIN) { 1220 if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
1205 printk(KERN_ERR "Invalid reference to IOAPIC pin " 1221 printk(KERN_ERR "Invalid reference to IOAPIC pin "
1206 "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 1222 "%d-%d\n", mp_ioapics[ioapic].apicid,
1207 ioapic_pin); 1223 ioapic_pin);
1208 return gsi; 1224 return gsi;
1209 } 1225 }
1210 if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
1211 pr_debug("Pin %d-%d already programmed\n",
1212 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
1213#ifdef CONFIG_X86_32
1214 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
1215#else
1216 return gsi;
1217#endif
1218 }
1219
1220 set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
1221#ifdef CONFIG_X86_32
1222 /*
1223 * For GSI >= 64, use IRQ compression
1224 */
1225 if ((gsi >= IRQ_COMPRESSION_START)
1226 && (triggering == ACPI_LEVEL_SENSITIVE)) {
1227 /*
1228 * For PCI devices assign IRQs in order, avoiding gaps
1229 * due to unused I/O APIC pins.
1230 */
1231 int irq = gsi;
1232 if (gsi < MAX_GSI_NUM) {
1233 /*
1234 * Retain the VIA chipset work-around (gsi > 15), but
1235 * avoid a problem where the 8254 timer (IRQ0) is setup
1236 * via an override (so it's not on pin 0 of the ioapic),
1237 * and at the same time, the pin 0 interrupt is a PCI
1238 * type. The gsi > 15 test could cause these two pins
1239 * to be shared as IRQ0, and they are not shareable.
1240 * So test for this condition, and if necessary, avoid
1241 * the pin collision.
1242 */
1243 gsi = pci_irq++;
1244 /*
1245 * Don't assign IRQ used by ACPI SCI
1246 */
1247 if (gsi == acpi_gbl_FADT.sci_interrupt)
1248 gsi = pci_irq++;
1249 gsi_to_irq[irq] = gsi;
1250 } else {
1251 printk(KERN_ERR "GSI %u is too high\n", gsi);
1252 return gsi;
1253 }
1254 }
1255#endif
1256 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
1257 triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
1258 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
1259 return gsi;
1260}
1261 1226
1262int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, 1227 if (enable_update_mptable)
1263 u32 gsi, int triggering, int polarity) 1228 mp_config_acpi_gsi(dev, gsi, trigger, polarity);
1264{
1265#ifdef CONFIG_X86_MPPARSE
1266 struct mpc_intsrc mp_irq;
1267 int ioapic;
1268 1229
1269 if (!acpi_ioapic) 1230 set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
1270 return 0; 1231 trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
1232 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
1233 io_apic_set_pci_routing(dev, gsi, &irq_attr);
1271 1234
1272 /* print the entry should happen on mptable identically */ 1235 return gsi;
1273 mp_irq.type = MP_INTSRC;
1274 mp_irq.irqtype = mp_INT;
1275 mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1276 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1277 mp_irq.srcbus = number;
1278 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1279 ioapic = mp_find_ioapic(gsi);
1280 mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
1281 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
1282
1283 save_mp_irq(&mp_irq);
1284#endif
1285 return 0;
1286} 1236}
1287 1237
1288/* 1238/*
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
index 1c31cc0e9def..167bc16ce0e5 100644
--- a/arch/x86/kernel/acpi/realmode/Makefile
+++ b/arch/x86/kernel/acpi/realmode/Makefile
@@ -9,7 +9,7 @@
9always := wakeup.bin 9always := wakeup.bin
10targets := wakeup.elf wakeup.lds 10targets := wakeup.elf wakeup.lds
11 11
12wakeup-y += wakeup.o wakemain.o video-mode.o copy.o 12wakeup-y += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o
13 13
14# The link order of the video-*.o modules can matter. In particular, 14# The link order of the video-*.o modules can matter. In particular,
15# video-vga.o *must* be listed first, followed by video-vesa.o. 15# video-vga.o *must* be listed first, followed by video-vesa.o.
diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S
new file mode 100644
index 000000000000..f51eb0bb56ce
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/bioscall.S
@@ -0,0 +1 @@
#include "../../../boot/bioscall.S"
diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c
new file mode 100644
index 000000000000..6206033ba202
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/regs.c
@@ -0,0 +1 @@
#include "../../../boot/regs.c"
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a97db99dad52..1c60554537c3 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -55,7 +55,16 @@ struct iommu_cmd {
55static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 55static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
56 struct unity_map_entry *e); 56 struct unity_map_entry *e);
57static struct dma_ops_domain *find_protection_domain(u16 devid); 57static struct dma_ops_domain *find_protection_domain(u16 devid);
58static u64* alloc_pte(struct protection_domain *dom,
59 unsigned long address, u64
60 **pte_page, gfp_t gfp);
61static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
62 unsigned long start_page,
63 unsigned int pages);
58 64
65#ifndef BUS_NOTIFY_UNBOUND_DRIVER
66#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
67#endif
59 68
60#ifdef CONFIG_AMD_IOMMU_STATS 69#ifdef CONFIG_AMD_IOMMU_STATS
61 70
@@ -213,7 +222,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
213{ 222{
214 struct amd_iommu *iommu; 223 struct amd_iommu *iommu;
215 224
216 list_for_each_entry(iommu, &amd_iommu_list, list) 225 for_each_iommu(iommu)
217 iommu_poll_events(iommu); 226 iommu_poll_events(iommu);
218 227
219 return IRQ_HANDLED; 228 return IRQ_HANDLED;
@@ -440,7 +449,7 @@ static void iommu_flush_domain(u16 domid)
440 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 449 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
441 domid, 1, 1); 450 domid, 1, 1);
442 451
443 list_for_each_entry(iommu, &amd_iommu_list, list) { 452 for_each_iommu(iommu) {
444 spin_lock_irqsave(&iommu->lock, flags); 453 spin_lock_irqsave(&iommu->lock, flags);
445 __iommu_queue_command(iommu, &cmd); 454 __iommu_queue_command(iommu, &cmd);
446 __iommu_completion_wait(iommu); 455 __iommu_completion_wait(iommu);
@@ -449,6 +458,35 @@ static void iommu_flush_domain(u16 domid)
449 } 458 }
450} 459}
451 460
461void amd_iommu_flush_all_domains(void)
462{
463 int i;
464
465 for (i = 1; i < MAX_DOMAIN_ID; ++i) {
466 if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
467 continue;
468 iommu_flush_domain(i);
469 }
470}
471
472void amd_iommu_flush_all_devices(void)
473{
474 struct amd_iommu *iommu;
475 int i;
476
477 for (i = 0; i <= amd_iommu_last_bdf; ++i) {
478 if (amd_iommu_pd_table[i] == NULL)
479 continue;
480
481 iommu = amd_iommu_rlookup_table[i];
482 if (!iommu)
483 continue;
484
485 iommu_queue_inv_dev_entry(iommu, i);
486 iommu_completion_wait(iommu);
487 }
488}
489
452/**************************************************************************** 490/****************************************************************************
453 * 491 *
454 * The functions below are used the create the page table mappings for 492 * The functions below are used the create the page table mappings for
@@ -468,7 +506,7 @@ static int iommu_map_page(struct protection_domain *dom,
468 unsigned long phys_addr, 506 unsigned long phys_addr,
469 int prot) 507 int prot)
470{ 508{
471 u64 __pte, *pte, *page; 509 u64 __pte, *pte;
472 510
473 bus_addr = PAGE_ALIGN(bus_addr); 511 bus_addr = PAGE_ALIGN(bus_addr);
474 phys_addr = PAGE_ALIGN(phys_addr); 512 phys_addr = PAGE_ALIGN(phys_addr);
@@ -477,27 +515,7 @@ static int iommu_map_page(struct protection_domain *dom,
477 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) 515 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
478 return -EINVAL; 516 return -EINVAL;
479 517
480 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; 518 pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
481
482 if (!IOMMU_PTE_PRESENT(*pte)) {
483 page = (u64 *)get_zeroed_page(GFP_KERNEL);
484 if (!page)
485 return -ENOMEM;
486 *pte = IOMMU_L2_PDE(virt_to_phys(page));
487 }
488
489 pte = IOMMU_PTE_PAGE(*pte);
490 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
491
492 if (!IOMMU_PTE_PRESENT(*pte)) {
493 page = (u64 *)get_zeroed_page(GFP_KERNEL);
494 if (!page)
495 return -ENOMEM;
496 *pte = IOMMU_L1_PDE(virt_to_phys(page));
497 }
498
499 pte = IOMMU_PTE_PAGE(*pte);
500 pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
501 519
502 if (IOMMU_PTE_PRESENT(*pte)) 520 if (IOMMU_PTE_PRESENT(*pte))
503 return -EBUSY; 521 return -EBUSY;
@@ -595,7 +613,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
595 * as allocated in the aperture 613 * as allocated in the aperture
596 */ 614 */
597 if (addr < dma_dom->aperture_size) 615 if (addr < dma_dom->aperture_size)
598 __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap); 616 __set_bit(addr >> PAGE_SHIFT,
617 dma_dom->aperture[0]->bitmap);
599 } 618 }
600 619
601 return 0; 620 return 0;
@@ -632,42 +651,191 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
632 ****************************************************************************/ 651 ****************************************************************************/
633 652
634/* 653/*
635 * The address allocator core function. 654 * The address allocator core functions.
636 * 655 *
637 * called with domain->lock held 656 * called with domain->lock held
638 */ 657 */
658
659/*
660 * This function checks if there is a PTE for a given dma address. If
661 * there is one, it returns the pointer to it.
662 */
663static u64* fetch_pte(struct protection_domain *domain,
664 unsigned long address)
665{
666 u64 *pte;
667
668 pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
669
670 if (!IOMMU_PTE_PRESENT(*pte))
671 return NULL;
672
673 pte = IOMMU_PTE_PAGE(*pte);
674 pte = &pte[IOMMU_PTE_L1_INDEX(address)];
675
676 if (!IOMMU_PTE_PRESENT(*pte))
677 return NULL;
678
679 pte = IOMMU_PTE_PAGE(*pte);
680 pte = &pte[IOMMU_PTE_L0_INDEX(address)];
681
682 return pte;
683}
684
685/*
686 * This function is used to add a new aperture range to an existing
687 * aperture in case of dma_ops domain allocation or address allocation
688 * failure.
689 */
690static int alloc_new_range(struct amd_iommu *iommu,
691 struct dma_ops_domain *dma_dom,
692 bool populate, gfp_t gfp)
693{
694 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
695 int i;
696
697#ifdef CONFIG_IOMMU_STRESS
698 populate = false;
699#endif
700
701 if (index >= APERTURE_MAX_RANGES)
702 return -ENOMEM;
703
704 dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
705 if (!dma_dom->aperture[index])
706 return -ENOMEM;
707
708 dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
709 if (!dma_dom->aperture[index]->bitmap)
710 goto out_free;
711
712 dma_dom->aperture[index]->offset = dma_dom->aperture_size;
713
714 if (populate) {
715 unsigned long address = dma_dom->aperture_size;
716 int i, num_ptes = APERTURE_RANGE_PAGES / 512;
717 u64 *pte, *pte_page;
718
719 for (i = 0; i < num_ptes; ++i) {
720 pte = alloc_pte(&dma_dom->domain, address,
721 &pte_page, gfp);
722 if (!pte)
723 goto out_free;
724
725 dma_dom->aperture[index]->pte_pages[i] = pte_page;
726
727 address += APERTURE_RANGE_SIZE / 64;
728 }
729 }
730
731 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
732
733 /* Intialize the exclusion range if necessary */
734 if (iommu->exclusion_start &&
735 iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
736 iommu->exclusion_start < dma_dom->aperture_size) {
737 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
738 int pages = iommu_num_pages(iommu->exclusion_start,
739 iommu->exclusion_length,
740 PAGE_SIZE);
741 dma_ops_reserve_addresses(dma_dom, startpage, pages);
742 }
743
744 /*
745 * Check for areas already mapped as present in the new aperture
746 * range and mark those pages as reserved in the allocator. Such
747 * mappings may already exist as a result of requested unity
748 * mappings for devices.
749 */
750 for (i = dma_dom->aperture[index]->offset;
751 i < dma_dom->aperture_size;
752 i += PAGE_SIZE) {
753 u64 *pte = fetch_pte(&dma_dom->domain, i);
754 if (!pte || !IOMMU_PTE_PRESENT(*pte))
755 continue;
756
757 dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
758 }
759
760 return 0;
761
762out_free:
763 free_page((unsigned long)dma_dom->aperture[index]->bitmap);
764
765 kfree(dma_dom->aperture[index]);
766 dma_dom->aperture[index] = NULL;
767
768 return -ENOMEM;
769}
770
771static unsigned long dma_ops_area_alloc(struct device *dev,
772 struct dma_ops_domain *dom,
773 unsigned int pages,
774 unsigned long align_mask,
775 u64 dma_mask,
776 unsigned long start)
777{
778 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
779 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
780 int i = start >> APERTURE_RANGE_SHIFT;
781 unsigned long boundary_size;
782 unsigned long address = -1;
783 unsigned long limit;
784
785 next_bit >>= PAGE_SHIFT;
786
787 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
788 PAGE_SIZE) >> PAGE_SHIFT;
789
790 for (;i < max_index; ++i) {
791 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
792
793 if (dom->aperture[i]->offset >= dma_mask)
794 break;
795
796 limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
797 dma_mask >> PAGE_SHIFT);
798
799 address = iommu_area_alloc(dom->aperture[i]->bitmap,
800 limit, next_bit, pages, 0,
801 boundary_size, align_mask);
802 if (address != -1) {
803 address = dom->aperture[i]->offset +
804 (address << PAGE_SHIFT);
805 dom->next_address = address + (pages << PAGE_SHIFT);
806 break;
807 }
808
809 next_bit = 0;
810 }
811
812 return address;
813}
814
639static unsigned long dma_ops_alloc_addresses(struct device *dev, 815static unsigned long dma_ops_alloc_addresses(struct device *dev,
640 struct dma_ops_domain *dom, 816 struct dma_ops_domain *dom,
641 unsigned int pages, 817 unsigned int pages,
642 unsigned long align_mask, 818 unsigned long align_mask,
643 u64 dma_mask) 819 u64 dma_mask)
644{ 820{
645 unsigned long limit;
646 unsigned long address; 821 unsigned long address;
647 unsigned long boundary_size;
648 822
649 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 823#ifdef CONFIG_IOMMU_STRESS
650 PAGE_SIZE) >> PAGE_SHIFT; 824 dom->next_address = 0;
651 limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, 825 dom->need_flush = true;
652 dma_mask >> PAGE_SHIFT); 826#endif
653 827
654 if (dom->next_bit >= limit) { 828 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
655 dom->next_bit = 0; 829 dma_mask, dom->next_address);
656 dom->need_flush = true;
657 }
658 830
659 address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
660 0 , boundary_size, align_mask);
661 if (address == -1) { 831 if (address == -1) {
662 address = iommu_area_alloc(dom->bitmap, limit, 0, pages, 832 dom->next_address = 0;
663 0, boundary_size, align_mask); 833 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
834 dma_mask, 0);
664 dom->need_flush = true; 835 dom->need_flush = true;
665 } 836 }
666 837
667 if (likely(address != -1)) { 838 if (unlikely(address == -1))
668 dom->next_bit = address + pages;
669 address <<= PAGE_SHIFT;
670 } else
671 address = bad_dma_address; 839 address = bad_dma_address;
672 840
673 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); 841 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
@@ -684,11 +852,23 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
684 unsigned long address, 852 unsigned long address,
685 unsigned int pages) 853 unsigned int pages)
686{ 854{
687 address >>= PAGE_SHIFT; 855 unsigned i = address >> APERTURE_RANGE_SHIFT;
688 iommu_area_free(dom->bitmap, address, pages); 856 struct aperture_range *range = dom->aperture[i];
689 857
690 if (address >= dom->next_bit) 858 BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
859
860#ifdef CONFIG_IOMMU_STRESS
861 if (i < 4)
862 return;
863#endif
864
865 if (address >= dom->next_address)
691 dom->need_flush = true; 866 dom->need_flush = true;
867
868 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
869
870 iommu_area_free(range->bitmap, address, pages);
871
692} 872}
693 873
694/**************************************************************************** 874/****************************************************************************
@@ -736,12 +916,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
736 unsigned long start_page, 916 unsigned long start_page,
737 unsigned int pages) 917 unsigned int pages)
738{ 918{
739 unsigned int last_page = dom->aperture_size >> PAGE_SHIFT; 919 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
740 920
741 if (start_page + pages > last_page) 921 if (start_page + pages > last_page)
742 pages = last_page - start_page; 922 pages = last_page - start_page;
743 923
744 iommu_area_reserve(dom->bitmap, start_page, pages); 924 for (i = start_page; i < start_page + pages; ++i) {
925 int index = i / APERTURE_RANGE_PAGES;
926 int page = i % APERTURE_RANGE_PAGES;
927 __set_bit(page, dom->aperture[index]->bitmap);
928 }
745} 929}
746 930
747static void free_pagetable(struct protection_domain *domain) 931static void free_pagetable(struct protection_domain *domain)
@@ -780,14 +964,19 @@ static void free_pagetable(struct protection_domain *domain)
780 */ 964 */
781static void dma_ops_domain_free(struct dma_ops_domain *dom) 965static void dma_ops_domain_free(struct dma_ops_domain *dom)
782{ 966{
967 int i;
968
783 if (!dom) 969 if (!dom)
784 return; 970 return;
785 971
786 free_pagetable(&dom->domain); 972 free_pagetable(&dom->domain);
787 973
788 kfree(dom->pte_pages); 974 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
789 975 if (!dom->aperture[i])
790 kfree(dom->bitmap); 976 continue;
977 free_page((unsigned long)dom->aperture[i]->bitmap);
978 kfree(dom->aperture[i]);
979 }
791 980
792 kfree(dom); 981 kfree(dom);
793} 982}
@@ -797,19 +986,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
797 * It also intializes the page table and the address allocator data 986 * It also intializes the page table and the address allocator data
798 * structures required for the dma_ops interface 987 * structures required for the dma_ops interface
799 */ 988 */
800static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, 989static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
801 unsigned order)
802{ 990{
803 struct dma_ops_domain *dma_dom; 991 struct dma_ops_domain *dma_dom;
804 unsigned i, num_pte_pages;
805 u64 *l2_pde;
806 u64 address;
807
808 /*
809 * Currently the DMA aperture must be between 32 MB and 1GB in size
810 */
811 if ((order < 25) || (order > 30))
812 return NULL;
813 992
814 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); 993 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
815 if (!dma_dom) 994 if (!dma_dom)
@@ -826,55 +1005,20 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
826 dma_dom->domain.priv = dma_dom; 1005 dma_dom->domain.priv = dma_dom;
827 if (!dma_dom->domain.pt_root) 1006 if (!dma_dom->domain.pt_root)
828 goto free_dma_dom; 1007 goto free_dma_dom;
829 dma_dom->aperture_size = (1ULL << order);
830 dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
831 GFP_KERNEL);
832 if (!dma_dom->bitmap)
833 goto free_dma_dom;
834 /*
835 * mark the first page as allocated so we never return 0 as
836 * a valid dma-address. So we can use 0 as error value
837 */
838 dma_dom->bitmap[0] = 1;
839 dma_dom->next_bit = 0;
840 1008
841 dma_dom->need_flush = false; 1009 dma_dom->need_flush = false;
842 dma_dom->target_dev = 0xffff; 1010 dma_dom->target_dev = 0xffff;
843 1011
844 /* Intialize the exclusion range if necessary */ 1012 if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
845 if (iommu->exclusion_start && 1013 goto free_dma_dom;
846 iommu->exclusion_start < dma_dom->aperture_size) {
847 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
848 int pages = iommu_num_pages(iommu->exclusion_start,
849 iommu->exclusion_length,
850 PAGE_SIZE);
851 dma_ops_reserve_addresses(dma_dom, startpage, pages);
852 }
853 1014
854 /* 1015 /*
855 * At the last step, build the page tables so we don't need to 1016 * mark the first page as allocated so we never return 0 as
856 * allocate page table pages in the dma_ops mapping/unmapping 1017 * a valid dma-address. So we can use 0 as error value
857 * path.
858 */ 1018 */
859 num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); 1019 dma_dom->aperture[0]->bitmap[0] = 1;
860 dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), 1020 dma_dom->next_address = 0;
861 GFP_KERNEL);
862 if (!dma_dom->pte_pages)
863 goto free_dma_dom;
864
865 l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
866 if (l2_pde == NULL)
867 goto free_dma_dom;
868 1021
869 dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
870
871 for (i = 0; i < num_pte_pages; ++i) {
872 dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
873 if (!dma_dom->pte_pages[i])
874 goto free_dma_dom;
875 address = virt_to_phys(dma_dom->pte_pages[i]);
876 l2_pde[i] = IOMMU_L1_PDE(address);
877 }
878 1022
879 return dma_dom; 1023 return dma_dom;
880 1024
@@ -983,7 +1127,6 @@ static int device_change_notifier(struct notifier_block *nb,
983 struct protection_domain *domain; 1127 struct protection_domain *domain;
984 struct dma_ops_domain *dma_domain; 1128 struct dma_ops_domain *dma_domain;
985 struct amd_iommu *iommu; 1129 struct amd_iommu *iommu;
986 int order = amd_iommu_aperture_order;
987 unsigned long flags; 1130 unsigned long flags;
988 1131
989 if (devid > amd_iommu_last_bdf) 1132 if (devid > amd_iommu_last_bdf)
@@ -1002,17 +1145,7 @@ static int device_change_notifier(struct notifier_block *nb,
1002 "to a non-dma-ops domain\n", dev_name(dev)); 1145 "to a non-dma-ops domain\n", dev_name(dev));
1003 1146
1004 switch (action) { 1147 switch (action) {
1005 case BUS_NOTIFY_BOUND_DRIVER: 1148 case BUS_NOTIFY_UNBOUND_DRIVER:
1006 if (domain)
1007 goto out;
1008 dma_domain = find_protection_domain(devid);
1009 if (!dma_domain)
1010 dma_domain = iommu->default_dom;
1011 attach_device(iommu, &dma_domain->domain, devid);
1012 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
1013 "device %s\n", dma_domain->domain.id, dev_name(dev));
1014 break;
1015 case BUS_NOTIFY_UNBIND_DRIVER:
1016 if (!domain) 1149 if (!domain)
1017 goto out; 1150 goto out;
1018 detach_device(domain, devid); 1151 detach_device(domain, devid);
@@ -1022,7 +1155,7 @@ static int device_change_notifier(struct notifier_block *nb,
1022 dma_domain = find_protection_domain(devid); 1155 dma_domain = find_protection_domain(devid);
1023 if (dma_domain) 1156 if (dma_domain)
1024 goto out; 1157 goto out;
1025 dma_domain = dma_ops_domain_alloc(iommu, order); 1158 dma_domain = dma_ops_domain_alloc(iommu);
1026 if (!dma_domain) 1159 if (!dma_domain)
1027 goto out; 1160 goto out;
1028 dma_domain->target_dev = devid; 1161 dma_domain->target_dev = devid;
@@ -1133,8 +1266,8 @@ static int get_device_resources(struct device *dev,
1133 dma_dom = (*iommu)->default_dom; 1266 dma_dom = (*iommu)->default_dom;
1134 *domain = &dma_dom->domain; 1267 *domain = &dma_dom->domain;
1135 attach_device(*iommu, *domain, *bdf); 1268 attach_device(*iommu, *domain, *bdf);
1136 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1269 DUMP_printk("Using protection domain %d for device %s\n",
1137 "device %s\n", (*domain)->id, dev_name(dev)); 1270 (*domain)->id, dev_name(dev));
1138 } 1271 }
1139 1272
1140 if (domain_for_device(_bdf) == NULL) 1273 if (domain_for_device(_bdf) == NULL)
@@ -1144,6 +1277,66 @@ static int get_device_resources(struct device *dev,
1144} 1277}
1145 1278
1146/* 1279/*
1280 * If the pte_page is not yet allocated this function is called
1281 */
1282static u64* alloc_pte(struct protection_domain *dom,
1283 unsigned long address, u64 **pte_page, gfp_t gfp)
1284{
1285 u64 *pte, *page;
1286
1287 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
1288
1289 if (!IOMMU_PTE_PRESENT(*pte)) {
1290 page = (u64 *)get_zeroed_page(gfp);
1291 if (!page)
1292 return NULL;
1293 *pte = IOMMU_L2_PDE(virt_to_phys(page));
1294 }
1295
1296 pte = IOMMU_PTE_PAGE(*pte);
1297 pte = &pte[IOMMU_PTE_L1_INDEX(address)];
1298
1299 if (!IOMMU_PTE_PRESENT(*pte)) {
1300 page = (u64 *)get_zeroed_page(gfp);
1301 if (!page)
1302 return NULL;
1303 *pte = IOMMU_L1_PDE(virt_to_phys(page));
1304 }
1305
1306 pte = IOMMU_PTE_PAGE(*pte);
1307
1308 if (pte_page)
1309 *pte_page = pte;
1310
1311 pte = &pte[IOMMU_PTE_L0_INDEX(address)];
1312
1313 return pte;
1314}
1315
1316/*
1317 * This function fetches the PTE for a given address in the aperture
1318 */
1319static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1320 unsigned long address)
1321{
1322 struct aperture_range *aperture;
1323 u64 *pte, *pte_page;
1324
1325 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1326 if (!aperture)
1327 return NULL;
1328
1329 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1330 if (!pte) {
1331 pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
1332 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1333 } else
1334 pte += IOMMU_PTE_L0_INDEX(address);
1335
1336 return pte;
1337}
1338
1339/*
1147 * This is the generic map function. It maps one 4kb page at paddr to 1340 * This is the generic map function. It maps one 4kb page at paddr to
1148 * the given address in the DMA address space for the domain. 1341 * the given address in the DMA address space for the domain.
1149 */ 1342 */
@@ -1159,8 +1352,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1159 1352
1160 paddr &= PAGE_MASK; 1353 paddr &= PAGE_MASK;
1161 1354
1162 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; 1355 pte = dma_ops_get_pte(dom, address);
1163 pte += IOMMU_PTE_L0_INDEX(address); 1356 if (!pte)
1357 return bad_dma_address;
1164 1358
1165 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; 1359 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1166 1360
@@ -1185,14 +1379,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
1185 struct dma_ops_domain *dom, 1379 struct dma_ops_domain *dom,
1186 unsigned long address) 1380 unsigned long address)
1187{ 1381{
1382 struct aperture_range *aperture;
1188 u64 *pte; 1383 u64 *pte;
1189 1384
1190 if (address >= dom->aperture_size) 1385 if (address >= dom->aperture_size)
1191 return; 1386 return;
1192 1387
1193 WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); 1388 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1389 if (!aperture)
1390 return;
1391
1392 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1393 if (!pte)
1394 return;
1194 1395
1195 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
1196 pte += IOMMU_PTE_L0_INDEX(address); 1396 pte += IOMMU_PTE_L0_INDEX(address);
1197 1397
1198 WARN_ON(!*pte); 1398 WARN_ON(!*pte);
@@ -1216,7 +1416,7 @@ static dma_addr_t __map_single(struct device *dev,
1216 u64 dma_mask) 1416 u64 dma_mask)
1217{ 1417{
1218 dma_addr_t offset = paddr & ~PAGE_MASK; 1418 dma_addr_t offset = paddr & ~PAGE_MASK;
1219 dma_addr_t address, start; 1419 dma_addr_t address, start, ret;
1220 unsigned int pages; 1420 unsigned int pages;
1221 unsigned long align_mask = 0; 1421 unsigned long align_mask = 0;
1222 int i; 1422 int i;
@@ -1232,14 +1432,33 @@ static dma_addr_t __map_single(struct device *dev,
1232 if (align) 1432 if (align)
1233 align_mask = (1UL << get_order(size)) - 1; 1433 align_mask = (1UL << get_order(size)) - 1;
1234 1434
1435retry:
1235 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, 1436 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
1236 dma_mask); 1437 dma_mask);
1237 if (unlikely(address == bad_dma_address)) 1438 if (unlikely(address == bad_dma_address)) {
1238 goto out; 1439 /*
1440 * setting next_address here will let the address
1441 * allocator only scan the new allocated range in the
1442 * first run. This is a small optimization.
1443 */
1444 dma_dom->next_address = dma_dom->aperture_size;
1445
1446 if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
1447 goto out;
1448
1449 /*
1450 * aperture was sucessfully enlarged by 128 MB, try
1451 * allocation again
1452 */
1453 goto retry;
1454 }
1239 1455
1240 start = address; 1456 start = address;
1241 for (i = 0; i < pages; ++i) { 1457 for (i = 0; i < pages; ++i) {
1242 dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); 1458 ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
1459 if (ret == bad_dma_address)
1460 goto out_unmap;
1461
1243 paddr += PAGE_SIZE; 1462 paddr += PAGE_SIZE;
1244 start += PAGE_SIZE; 1463 start += PAGE_SIZE;
1245 } 1464 }
@@ -1255,6 +1474,17 @@ static dma_addr_t __map_single(struct device *dev,
1255 1474
1256out: 1475out:
1257 return address; 1476 return address;
1477
1478out_unmap:
1479
1480 for (--i; i >= 0; --i) {
1481 start -= PAGE_SIZE;
1482 dma_ops_domain_unmap(iommu, dma_dom, start);
1483 }
1484
1485 dma_ops_free_addresses(dma_dom, address, pages);
1486
1487 return bad_dma_address;
1258} 1488}
1259 1489
1260/* 1490/*
@@ -1537,8 +1767,10 @@ static void *alloc_coherent(struct device *dev, size_t size,
1537 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1767 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1538 size, DMA_BIDIRECTIONAL, true, dma_mask); 1768 size, DMA_BIDIRECTIONAL, true, dma_mask);
1539 1769
1540 if (*dma_addr == bad_dma_address) 1770 if (*dma_addr == bad_dma_address) {
1771 spin_unlock_irqrestore(&domain->lock, flags);
1541 goto out_free; 1772 goto out_free;
1773 }
1542 1774
1543 iommu_completion_wait(iommu); 1775 iommu_completion_wait(iommu);
1544 1776
@@ -1625,7 +1857,6 @@ static void prealloc_protection_domains(void)
1625 struct pci_dev *dev = NULL; 1857 struct pci_dev *dev = NULL;
1626 struct dma_ops_domain *dma_dom; 1858 struct dma_ops_domain *dma_dom;
1627 struct amd_iommu *iommu; 1859 struct amd_iommu *iommu;
1628 int order = amd_iommu_aperture_order;
1629 u16 devid; 1860 u16 devid;
1630 1861
1631 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1862 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1638,7 +1869,7 @@ static void prealloc_protection_domains(void)
1638 iommu = amd_iommu_rlookup_table[devid]; 1869 iommu = amd_iommu_rlookup_table[devid];
1639 if (!iommu) 1870 if (!iommu)
1640 continue; 1871 continue;
1641 dma_dom = dma_ops_domain_alloc(iommu, order); 1872 dma_dom = dma_ops_domain_alloc(iommu);
1642 if (!dma_dom) 1873 if (!dma_dom)
1643 continue; 1874 continue;
1644 init_unity_mappings_for_device(dma_dom, devid); 1875 init_unity_mappings_for_device(dma_dom, devid);
@@ -1664,7 +1895,6 @@ static struct dma_map_ops amd_iommu_dma_ops = {
1664int __init amd_iommu_init_dma_ops(void) 1895int __init amd_iommu_init_dma_ops(void)
1665{ 1896{
1666 struct amd_iommu *iommu; 1897 struct amd_iommu *iommu;
1667 int order = amd_iommu_aperture_order;
1668 int ret; 1898 int ret;
1669 1899
1670 /* 1900 /*
@@ -1672,8 +1902,8 @@ int __init amd_iommu_init_dma_ops(void)
1672 * found in the system. Devices not assigned to any other 1902 * found in the system. Devices not assigned to any other
1673 * protection domain will be assigned to the default one. 1903 * protection domain will be assigned to the default one.
1674 */ 1904 */
1675 list_for_each_entry(iommu, &amd_iommu_list, list) { 1905 for_each_iommu(iommu) {
1676 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1906 iommu->default_dom = dma_ops_domain_alloc(iommu);
1677 if (iommu->default_dom == NULL) 1907 if (iommu->default_dom == NULL)
1678 return -ENOMEM; 1908 return -ENOMEM;
1679 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; 1909 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -1710,7 +1940,7 @@ int __init amd_iommu_init_dma_ops(void)
1710 1940
1711free_domains: 1941free_domains:
1712 1942
1713 list_for_each_entry(iommu, &amd_iommu_list, list) { 1943 for_each_iommu(iommu) {
1714 if (iommu->default_dom) 1944 if (iommu->default_dom)
1715 dma_ops_domain_free(iommu->default_dom); 1945 dma_ops_domain_free(iommu->default_dom);
1716 } 1946 }
@@ -1842,7 +2072,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
1842 2072
1843 old_domain = domain_for_device(devid); 2073 old_domain = domain_for_device(devid);
1844 if (old_domain) 2074 if (old_domain)
1845 return -EBUSY; 2075 detach_device(old_domain, devid);
1846 2076
1847 attach_device(iommu, domain, devid); 2077 attach_device(iommu, domain, devid);
1848 2078
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8c0be0902dac..238989ec077d 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -115,15 +115,21 @@ struct ivmd_header {
115 u64 range_length; 115 u64 range_length;
116} __attribute__((packed)); 116} __attribute__((packed));
117 117
118bool amd_iommu_dump;
119
118static int __initdata amd_iommu_detected; 120static int __initdata amd_iommu_detected;
119 121
120u16 amd_iommu_last_bdf; /* largest PCI device id we have 122u16 amd_iommu_last_bdf; /* largest PCI device id we have
121 to handle */ 123 to handle */
122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 124LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
123 we find in ACPI */ 125 we find in ACPI */
124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 126#ifdef CONFIG_IOMMU_STRESS
127bool amd_iommu_isolate = false;
128#else
125bool amd_iommu_isolate = true; /* if true, device isolation is 129bool amd_iommu_isolate = true; /* if true, device isolation is
126 enabled */ 130 enabled */
131#endif
132
127bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 133bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
128 134
129LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 135LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -175,7 +181,7 @@ static inline void update_last_devid(u16 devid)
175static inline unsigned long tbl_size(int entry_size) 181static inline unsigned long tbl_size(int entry_size)
176{ 182{
177 unsigned shift = PAGE_SHIFT + 183 unsigned shift = PAGE_SHIFT +
178 get_order(amd_iommu_last_bdf * entry_size); 184 get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
179 185
180 return 1UL << shift; 186 return 1UL << shift;
181} 187}
@@ -193,7 +199,7 @@ static inline unsigned long tbl_size(int entry_size)
193 * This function set the exclusion range in the IOMMU. DMA accesses to the 199 * This function set the exclusion range in the IOMMU. DMA accesses to the
194 * exclusion range are passed through untranslated 200 * exclusion range are passed through untranslated
195 */ 201 */
196static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) 202static void iommu_set_exclusion_range(struct amd_iommu *iommu)
197{ 203{
198 u64 start = iommu->exclusion_start & PAGE_MASK; 204 u64 start = iommu->exclusion_start & PAGE_MASK;
199 u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; 205 u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
@@ -225,7 +231,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
225} 231}
226 232
227/* Generic functions to enable/disable certain features of the IOMMU. */ 233/* Generic functions to enable/disable certain features of the IOMMU. */
228static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 234static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
229{ 235{
230 u32 ctrl; 236 u32 ctrl;
231 237
@@ -244,7 +250,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
244} 250}
245 251
246/* Function to enable the hardware */ 252/* Function to enable the hardware */
247static void __init iommu_enable(struct amd_iommu *iommu) 253static void iommu_enable(struct amd_iommu *iommu)
248{ 254{
249 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", 255 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
250 dev_name(&iommu->dev->dev), iommu->cap_ptr); 256 dev_name(&iommu->dev->dev), iommu->cap_ptr);
@@ -252,11 +258,9 @@ static void __init iommu_enable(struct amd_iommu *iommu)
252 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 258 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
253} 259}
254 260
255/* Function to enable IOMMU event logging and event interrupts */ 261static void iommu_disable(struct amd_iommu *iommu)
256static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
257{ 262{
258 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 263 iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
259 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
260} 264}
261 265
262/* 266/*
@@ -413,25 +417,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
413{ 417{
414 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 418 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
415 get_order(CMD_BUFFER_SIZE)); 419 get_order(CMD_BUFFER_SIZE));
416 u64 entry;
417 420
418 if (cmd_buf == NULL) 421 if (cmd_buf == NULL)
419 return NULL; 422 return NULL;
420 423
421 iommu->cmd_buf_size = CMD_BUFFER_SIZE; 424 iommu->cmd_buf_size = CMD_BUFFER_SIZE;
422 425
423 entry = (u64)virt_to_phys(cmd_buf); 426 return cmd_buf;
427}
428
429/*
430 * This function writes the command buffer address to the hardware and
431 * enables it.
432 */
433static void iommu_enable_command_buffer(struct amd_iommu *iommu)
434{
435 u64 entry;
436
437 BUG_ON(iommu->cmd_buf == NULL);
438
439 entry = (u64)virt_to_phys(iommu->cmd_buf);
424 entry |= MMIO_CMD_SIZE_512; 440 entry |= MMIO_CMD_SIZE_512;
441
425 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 442 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
426 &entry, sizeof(entry)); 443 &entry, sizeof(entry));
427 444
428 /* set head and tail to zero manually */ 445 /* set head and tail to zero manually */
429 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 446 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
430 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 447 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
431 448
432 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 449 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
433
434 return cmd_buf;
435} 450}
436 451
437static void __init free_command_buffer(struct amd_iommu *iommu) 452static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -443,20 +458,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
443/* allocates the memory where the IOMMU will log its events to */ 458/* allocates the memory where the IOMMU will log its events to */
444static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) 459static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
445{ 460{
446 u64 entry;
447 iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 461 iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
448 get_order(EVT_BUFFER_SIZE)); 462 get_order(EVT_BUFFER_SIZE));
449 463
450 if (iommu->evt_buf == NULL) 464 if (iommu->evt_buf == NULL)
451 return NULL; 465 return NULL;
452 466
467 return iommu->evt_buf;
468}
469
470static void iommu_enable_event_buffer(struct amd_iommu *iommu)
471{
472 u64 entry;
473
474 BUG_ON(iommu->evt_buf == NULL);
475
453 entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; 476 entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
477
454 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 478 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
455 &entry, sizeof(entry)); 479 &entry, sizeof(entry));
456 480
457 iommu->evt_buf_size = EVT_BUFFER_SIZE; 481 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
458
459 return iommu->evt_buf;
460} 482}
461 483
462static void __init free_event_buffer(struct amd_iommu *iommu) 484static void __init free_event_buffer(struct amd_iommu *iommu)
@@ -596,32 +618,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
596 p += sizeof(struct ivhd_header); 618 p += sizeof(struct ivhd_header);
597 end += h->length; 619 end += h->length;
598 620
621
599 while (p < end) { 622 while (p < end) {
600 e = (struct ivhd_entry *)p; 623 e = (struct ivhd_entry *)p;
601 switch (e->type) { 624 switch (e->type) {
602 case IVHD_DEV_ALL: 625 case IVHD_DEV_ALL:
626
627 DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x"
628 " last device %02x:%02x.%x flags: %02x\n",
629 PCI_BUS(iommu->first_device),
630 PCI_SLOT(iommu->first_device),
631 PCI_FUNC(iommu->first_device),
632 PCI_BUS(iommu->last_device),
633 PCI_SLOT(iommu->last_device),
634 PCI_FUNC(iommu->last_device),
635 e->flags);
636
603 for (dev_i = iommu->first_device; 637 for (dev_i = iommu->first_device;
604 dev_i <= iommu->last_device; ++dev_i) 638 dev_i <= iommu->last_device; ++dev_i)
605 set_dev_entry_from_acpi(iommu, dev_i, 639 set_dev_entry_from_acpi(iommu, dev_i,
606 e->flags, 0); 640 e->flags, 0);
607 break; 641 break;
608 case IVHD_DEV_SELECT: 642 case IVHD_DEV_SELECT:
643
644 DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x "
645 "flags: %02x\n",
646 PCI_BUS(e->devid),
647 PCI_SLOT(e->devid),
648 PCI_FUNC(e->devid),
649 e->flags);
650
609 devid = e->devid; 651 devid = e->devid;
610 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 652 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
611 break; 653 break;
612 case IVHD_DEV_SELECT_RANGE_START: 654 case IVHD_DEV_SELECT_RANGE_START:
655
656 DUMP_printk(" DEV_SELECT_RANGE_START\t "
657 "devid: %02x:%02x.%x flags: %02x\n",
658 PCI_BUS(e->devid),
659 PCI_SLOT(e->devid),
660 PCI_FUNC(e->devid),
661 e->flags);
662
613 devid_start = e->devid; 663 devid_start = e->devid;
614 flags = e->flags; 664 flags = e->flags;
615 ext_flags = 0; 665 ext_flags = 0;
616 alias = false; 666 alias = false;
617 break; 667 break;
618 case IVHD_DEV_ALIAS: 668 case IVHD_DEV_ALIAS:
669
670 DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
671 "flags: %02x devid_to: %02x:%02x.%x\n",
672 PCI_BUS(e->devid),
673 PCI_SLOT(e->devid),
674 PCI_FUNC(e->devid),
675 e->flags,
676 PCI_BUS(e->ext >> 8),
677 PCI_SLOT(e->ext >> 8),
678 PCI_FUNC(e->ext >> 8));
679
619 devid = e->devid; 680 devid = e->devid;
620 devid_to = e->ext >> 8; 681 devid_to = e->ext >> 8;
621 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 682 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
622 amd_iommu_alias_table[devid] = devid_to; 683 amd_iommu_alias_table[devid] = devid_to;
623 break; 684 break;
624 case IVHD_DEV_ALIAS_RANGE: 685 case IVHD_DEV_ALIAS_RANGE:
686
687 DUMP_printk(" DEV_ALIAS_RANGE\t\t "
688 "devid: %02x:%02x.%x flags: %02x "
689 "devid_to: %02x:%02x.%x\n",
690 PCI_BUS(e->devid),
691 PCI_SLOT(e->devid),
692 PCI_FUNC(e->devid),
693 e->flags,
694 PCI_BUS(e->ext >> 8),
695 PCI_SLOT(e->ext >> 8),
696 PCI_FUNC(e->ext >> 8));
697
625 devid_start = e->devid; 698 devid_start = e->devid;
626 flags = e->flags; 699 flags = e->flags;
627 devid_to = e->ext >> 8; 700 devid_to = e->ext >> 8;
@@ -629,17 +702,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
629 alias = true; 702 alias = true;
630 break; 703 break;
631 case IVHD_DEV_EXT_SELECT: 704 case IVHD_DEV_EXT_SELECT:
705
706 DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
707 "flags: %02x ext: %08x\n",
708 PCI_BUS(e->devid),
709 PCI_SLOT(e->devid),
710 PCI_FUNC(e->devid),
711 e->flags, e->ext);
712
632 devid = e->devid; 713 devid = e->devid;
633 set_dev_entry_from_acpi(iommu, devid, e->flags, 714 set_dev_entry_from_acpi(iommu, devid, e->flags,
634 e->ext); 715 e->ext);
635 break; 716 break;
636 case IVHD_DEV_EXT_SELECT_RANGE: 717 case IVHD_DEV_EXT_SELECT_RANGE:
718
719 DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
720 "%02x:%02x.%x flags: %02x ext: %08x\n",
721 PCI_BUS(e->devid),
722 PCI_SLOT(e->devid),
723 PCI_FUNC(e->devid),
724 e->flags, e->ext);
725
637 devid_start = e->devid; 726 devid_start = e->devid;
638 flags = e->flags; 727 flags = e->flags;
639 ext_flags = e->ext; 728 ext_flags = e->ext;
640 alias = false; 729 alias = false;
641 break; 730 break;
642 case IVHD_DEV_RANGE_END: 731 case IVHD_DEV_RANGE_END:
732
733 DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
734 PCI_BUS(e->devid),
735 PCI_SLOT(e->devid),
736 PCI_FUNC(e->devid));
737
643 devid = e->devid; 738 devid = e->devid;
644 for (dev_i = devid_start; dev_i <= devid; ++dev_i) { 739 for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
645 if (alias) 740 if (alias)
@@ -679,7 +774,7 @@ static void __init free_iommu_all(void)
679{ 774{
680 struct amd_iommu *iommu, *next; 775 struct amd_iommu *iommu, *next;
681 776
682 list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) { 777 for_each_iommu_safe(iommu, next) {
683 list_del(&iommu->list); 778 list_del(&iommu->list);
684 free_iommu_one(iommu); 779 free_iommu_one(iommu);
685 kfree(iommu); 780 kfree(iommu);
@@ -710,7 +805,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
710 if (!iommu->mmio_base) 805 if (!iommu->mmio_base)
711 return -ENOMEM; 806 return -ENOMEM;
712 807
713 iommu_set_device_table(iommu);
714 iommu->cmd_buf = alloc_command_buffer(iommu); 808 iommu->cmd_buf = alloc_command_buffer(iommu);
715 if (!iommu->cmd_buf) 809 if (!iommu->cmd_buf)
716 return -ENOMEM; 810 return -ENOMEM;
@@ -746,6 +840,15 @@ static int __init init_iommu_all(struct acpi_table_header *table)
746 h = (struct ivhd_header *)p; 840 h = (struct ivhd_header *)p;
747 switch (*p) { 841 switch (*p) {
748 case ACPI_IVHD_TYPE: 842 case ACPI_IVHD_TYPE:
843
844 DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
845 "seg: %d flags: %01x info %04x\n",
846 PCI_BUS(h->devid), PCI_SLOT(h->devid),
847 PCI_FUNC(h->devid), h->cap_ptr,
848 h->pci_seg, h->flags, h->info);
849 DUMP_printk(" mmio-addr: %016llx\n",
850 h->mmio_phys);
851
749 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); 852 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
750 if (iommu == NULL) 853 if (iommu == NULL)
751 return -ENOMEM; 854 return -ENOMEM;
@@ -773,56 +876,9 @@ static int __init init_iommu_all(struct acpi_table_header *table)
773 * 876 *
774 ****************************************************************************/ 877 ****************************************************************************/
775 878
776static int __init iommu_setup_msix(struct amd_iommu *iommu)
777{
778 struct amd_iommu *curr;
779 struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
780 int nvec = 0, i;
781
782 list_for_each_entry(curr, &amd_iommu_list, list) {
783 if (curr->dev == iommu->dev) {
784 entries[nvec].entry = curr->evt_msi_num;
785 entries[nvec].vector = 0;
786 curr->int_enabled = true;
787 nvec++;
788 }
789 }
790
791 if (pci_enable_msix(iommu->dev, entries, nvec)) {
792 pci_disable_msix(iommu->dev);
793 return 1;
794 }
795
796 for (i = 0; i < nvec; ++i) {
797 int r = request_irq(entries->vector, amd_iommu_int_handler,
798 IRQF_SAMPLE_RANDOM,
799 "AMD IOMMU",
800 NULL);
801 if (r)
802 goto out_free;
803 }
804
805 return 0;
806
807out_free:
808 for (i -= 1; i >= 0; --i)
809 free_irq(entries->vector, NULL);
810
811 pci_disable_msix(iommu->dev);
812
813 return 1;
814}
815
816static int __init iommu_setup_msi(struct amd_iommu *iommu) 879static int __init iommu_setup_msi(struct amd_iommu *iommu)
817{ 880{
818 int r; 881 int r;
819 struct amd_iommu *curr;
820
821 list_for_each_entry(curr, &amd_iommu_list, list) {
822 if (curr->dev == iommu->dev)
823 curr->int_enabled = true;
824 }
825
826 882
827 if (pci_enable_msi(iommu->dev)) 883 if (pci_enable_msi(iommu->dev))
828 return 1; 884 return 1;
@@ -837,17 +893,18 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
837 return 1; 893 return 1;
838 } 894 }
839 895
896 iommu->int_enabled = true;
897 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
898
840 return 0; 899 return 0;
841} 900}
842 901
843static int __init iommu_init_msi(struct amd_iommu *iommu) 902static int iommu_init_msi(struct amd_iommu *iommu)
844{ 903{
845 if (iommu->int_enabled) 904 if (iommu->int_enabled)
846 return 0; 905 return 0;
847 906
848 if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) 907 if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
849 return iommu_setup_msix(iommu);
850 else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
851 return iommu_setup_msi(iommu); 908 return iommu_setup_msi(iommu);
852 909
853 return 1; 910 return 1;
@@ -899,6 +956,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
899static int __init init_unity_map_range(struct ivmd_header *m) 956static int __init init_unity_map_range(struct ivmd_header *m)
900{ 957{
901 struct unity_map_entry *e = 0; 958 struct unity_map_entry *e = 0;
959 char *s;
902 960
903 e = kzalloc(sizeof(*e), GFP_KERNEL); 961 e = kzalloc(sizeof(*e), GFP_KERNEL);
904 if (e == NULL) 962 if (e == NULL)
@@ -906,14 +964,19 @@ static int __init init_unity_map_range(struct ivmd_header *m)
906 964
907 switch (m->type) { 965 switch (m->type) {
908 default: 966 default:
967 kfree(e);
968 return 0;
909 case ACPI_IVMD_TYPE: 969 case ACPI_IVMD_TYPE:
970 s = "IVMD_TYPEi\t\t\t";
910 e->devid_start = e->devid_end = m->devid; 971 e->devid_start = e->devid_end = m->devid;
911 break; 972 break;
912 case ACPI_IVMD_TYPE_ALL: 973 case ACPI_IVMD_TYPE_ALL:
974 s = "IVMD_TYPE_ALL\t\t";
913 e->devid_start = 0; 975 e->devid_start = 0;
914 e->devid_end = amd_iommu_last_bdf; 976 e->devid_end = amd_iommu_last_bdf;
915 break; 977 break;
916 case ACPI_IVMD_TYPE_RANGE: 978 case ACPI_IVMD_TYPE_RANGE:
979 s = "IVMD_TYPE_RANGE\t\t";
917 e->devid_start = m->devid; 980 e->devid_start = m->devid;
918 e->devid_end = m->aux; 981 e->devid_end = m->aux;
919 break; 982 break;
@@ -922,6 +985,13 @@ static int __init init_unity_map_range(struct ivmd_header *m)
922 e->address_end = e->address_start + PAGE_ALIGN(m->range_length); 985 e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
923 e->prot = m->flags >> 1; 986 e->prot = m->flags >> 1;
924 987
988 DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
989 " range_start: %016llx range_end: %016llx flags: %x\n", s,
990 PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
991 PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
992 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
993 e->address_start, e->address_end, m->flags);
994
925 list_add_tail(&e->list, &amd_iommu_unity_map); 995 list_add_tail(&e->list, &amd_iommu_unity_map);
926 996
927 return 0; 997 return 0;
@@ -967,18 +1037,28 @@ static void init_device_table(void)
967 * This function finally enables all IOMMUs found in the system after 1037 * This function finally enables all IOMMUs found in the system after
968 * they have been initialized 1038 * they have been initialized
969 */ 1039 */
970static void __init enable_iommus(void) 1040static void enable_iommus(void)
971{ 1041{
972 struct amd_iommu *iommu; 1042 struct amd_iommu *iommu;
973 1043
974 list_for_each_entry(iommu, &amd_iommu_list, list) { 1044 for_each_iommu(iommu) {
1045 iommu_set_device_table(iommu);
1046 iommu_enable_command_buffer(iommu);
1047 iommu_enable_event_buffer(iommu);
975 iommu_set_exclusion_range(iommu); 1048 iommu_set_exclusion_range(iommu);
976 iommu_init_msi(iommu); 1049 iommu_init_msi(iommu);
977 iommu_enable_event_logging(iommu);
978 iommu_enable(iommu); 1050 iommu_enable(iommu);
979 } 1051 }
980} 1052}
981 1053
1054static void disable_iommus(void)
1055{
1056 struct amd_iommu *iommu;
1057
1058 for_each_iommu(iommu)
1059 iommu_disable(iommu);
1060}
1061
982/* 1062/*
983 * Suspend/Resume support 1063 * Suspend/Resume support
984 * disable suspend until real resume implemented 1064 * disable suspend until real resume implemented
@@ -986,12 +1066,31 @@ static void __init enable_iommus(void)
986 1066
987static int amd_iommu_resume(struct sys_device *dev) 1067static int amd_iommu_resume(struct sys_device *dev)
988{ 1068{
1069 /*
1070 * Disable IOMMUs before reprogramming the hardware registers.
1071 * IOMMU is still enabled from the resume kernel.
1072 */
1073 disable_iommus();
1074
1075 /* re-load the hardware */
1076 enable_iommus();
1077
1078 /*
1079 * we have to flush after the IOMMUs are enabled because a
1080 * disabled IOMMU will never execute the commands we send
1081 */
1082 amd_iommu_flush_all_domains();
1083 amd_iommu_flush_all_devices();
1084
989 return 0; 1085 return 0;
990} 1086}
991 1087
992static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) 1088static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
993{ 1089{
994 return -EINVAL; 1090 /* disable IOMMUs to go out of the way for BIOS */
1091 disable_iommus();
1092
1093 return 0;
995} 1094}
996 1095
997static struct sysdev_class amd_iommu_sysdev_class = { 1096static struct sysdev_class amd_iommu_sysdev_class = {
@@ -1137,9 +1236,6 @@ int __init amd_iommu_init(void)
1137 1236
1138 enable_iommus(); 1237 enable_iommus();
1139 1238
1140 printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
1141 (1 << (amd_iommu_aperture_order-20)));
1142
1143 printk(KERN_INFO "AMD IOMMU: device isolation "); 1239 printk(KERN_INFO "AMD IOMMU: device isolation ");
1144 if (amd_iommu_isolate) 1240 if (amd_iommu_isolate)
1145 printk("enabled\n"); 1241 printk("enabled\n");
@@ -1211,6 +1307,13 @@ void __init amd_iommu_detect(void)
1211 * 1307 *
1212 ****************************************************************************/ 1308 ****************************************************************************/
1213 1309
1310static int __init parse_amd_iommu_dump(char *str)
1311{
1312 amd_iommu_dump = true;
1313
1314 return 1;
1315}
1316
1214static int __init parse_amd_iommu_options(char *str) 1317static int __init parse_amd_iommu_options(char *str)
1215{ 1318{
1216 for (; *str; ++str) { 1319 for (; *str; ++str) {
@@ -1225,15 +1328,5 @@ static int __init parse_amd_iommu_options(char *str)
1225 return 1; 1328 return 1;
1226} 1329}
1227 1330
1228static int __init parse_amd_iommu_size_options(char *str) 1331__setup("amd_iommu_dump", parse_amd_iommu_dump);
1229{
1230 unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
1231
1232 if ((order > 24) && (order < 31))
1233 amd_iommu_aperture_order = order;
1234
1235 return 1;
1236}
1237
1238__setup("amd_iommu=", parse_amd_iommu_options); 1332__setup("amd_iommu=", parse_amd_iommu_options);
1239__setup("amd_iommu_size=", parse_amd_iommu_size_options);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f2870920f246..a4c9cf0bf70b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -98,6 +98,29 @@ early_param("lapic", parse_lapic);
98/* Local APIC was disabled by the BIOS and enabled by the kernel */ 98/* Local APIC was disabled by the BIOS and enabled by the kernel */
99static int enabled_via_apicbase; 99static int enabled_via_apicbase;
100 100
101/*
102 * Handle interrupt mode configuration register (IMCR).
103 * This register controls whether the interrupt signals
104 * that reach the BSP come from the master PIC or from the
105 * local APIC. Before entering Symmetric I/O Mode, either
106 * the BIOS or the operating system must switch out of
107 * PIC Mode by changing the IMCR.
108 */
109static inline void imcr_pic_to_apic(void)
110{
111 /* select IMCR register */
112 outb(0x70, 0x22);
113 /* NMI and 8259 INTR go through APIC */
114 outb(0x01, 0x23);
115}
116
117static inline void imcr_apic_to_pic(void)
118{
119 /* select IMCR register */
120 outb(0x70, 0x22);
121 /* NMI and 8259 INTR go directly to BSP */
122 outb(0x00, 0x23);
123}
101#endif 124#endif
102 125
103#ifdef CONFIG_X86_64 126#ifdef CONFIG_X86_64
@@ -111,13 +134,19 @@ static __init int setup_apicpmtimer(char *s)
111__setup("apicpmtimer", setup_apicpmtimer); 134__setup("apicpmtimer", setup_apicpmtimer);
112#endif 135#endif
113 136
137int x2apic_mode;
114#ifdef CONFIG_X86_X2APIC 138#ifdef CONFIG_X86_X2APIC
115int x2apic;
116/* x2apic enabled before OS handover */ 139/* x2apic enabled before OS handover */
117static int x2apic_preenabled; 140static int x2apic_preenabled;
118static int disable_x2apic; 141static int disable_x2apic;
119static __init int setup_nox2apic(char *str) 142static __init int setup_nox2apic(char *str)
120{ 143{
144 if (x2apic_enabled()) {
145 pr_warning("Bios already enabled x2apic, "
146 "can't enforce nox2apic");
147 return 0;
148 }
149
121 disable_x2apic = 1; 150 disable_x2apic = 1;
122 setup_clear_cpu_cap(X86_FEATURE_X2APIC); 151 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
123 return 0; 152 return 0;
@@ -209,6 +238,31 @@ static int modern_apic(void)
209 return lapic_get_version() >= 0x14; 238 return lapic_get_version() >= 0x14;
210} 239}
211 240
241/*
242 * bare function to substitute write operation
243 * and it's _that_ fast :)
244 */
245static void native_apic_write_dummy(u32 reg, u32 v)
246{
247 WARN_ON_ONCE((cpu_has_apic || !disable_apic));
248}
249
250static u32 native_apic_read_dummy(u32 reg)
251{
252 WARN_ON_ONCE((cpu_has_apic && !disable_apic));
253 return 0;
254}
255
256/*
257 * right after this call apic->write/read doesn't do anything
258 * note that there is no restore operation it works one way
259 */
260void apic_disable(void)
261{
262 apic->read = native_apic_read_dummy;
263 apic->write = native_apic_write_dummy;
264}
265
212void native_apic_wait_icr_idle(void) 266void native_apic_wait_icr_idle(void)
213{ 267{
214 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 268 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
@@ -348,7 +402,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
348 402
349static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) 403static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
350{ 404{
351 unsigned long reg = (lvt_off << 4) + APIC_EILVT0; 405 unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0);
352 unsigned int v = (mask << 16) | (msg_type << 8) | vector; 406 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
353 407
354 apic_write(reg, v); 408 apic_write(reg, v);
@@ -815,7 +869,7 @@ void clear_local_APIC(void)
815 u32 v; 869 u32 v;
816 870
817 /* APIC hasn't been mapped yet */ 871 /* APIC hasn't been mapped yet */
818 if (!x2apic && !apic_phys) 872 if (!x2apic_mode && !apic_phys)
819 return; 873 return;
820 874
821 maxlvt = lapic_get_maxlvt(); 875 maxlvt = lapic_get_maxlvt();
@@ -1287,7 +1341,7 @@ void check_x2apic(void)
1287{ 1341{
1288 if (x2apic_enabled()) { 1342 if (x2apic_enabled()) {
1289 pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); 1343 pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
1290 x2apic_preenabled = x2apic = 1; 1344 x2apic_preenabled = x2apic_mode = 1;
1291 } 1345 }
1292} 1346}
1293 1347
@@ -1295,7 +1349,7 @@ void enable_x2apic(void)
1295{ 1349{
1296 int msr, msr2; 1350 int msr, msr2;
1297 1351
1298 if (!x2apic) 1352 if (!x2apic_mode)
1299 return; 1353 return;
1300 1354
1301 rdmsr(MSR_IA32_APICBASE, msr, msr2); 1355 rdmsr(MSR_IA32_APICBASE, msr, msr2);
@@ -1304,6 +1358,7 @@ void enable_x2apic(void)
1304 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); 1358 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1305 } 1359 }
1306} 1360}
1361#endif /* CONFIG_X86_X2APIC */
1307 1362
1308void __init enable_IR_x2apic(void) 1363void __init enable_IR_x2apic(void)
1309{ 1364{
@@ -1312,32 +1367,21 @@ void __init enable_IR_x2apic(void)
1312 unsigned long flags; 1367 unsigned long flags;
1313 struct IO_APIC_route_entry **ioapic_entries = NULL; 1368 struct IO_APIC_route_entry **ioapic_entries = NULL;
1314 1369
1315 if (!cpu_has_x2apic) 1370 ret = dmar_table_init();
1316 return; 1371 if (ret) {
1317 1372 pr_debug("dmar_table_init() failed with %d:\n", ret);
1318 if (!x2apic_preenabled && disable_x2apic) { 1373 goto ir_failed;
1319 pr_info("Skipped enabling x2apic and Interrupt-remapping "
1320 "because of nox2apic\n");
1321 return;
1322 } 1374 }
1323 1375
1324 if (x2apic_preenabled && disable_x2apic) 1376 if (!intr_remapping_supported()) {
1325 panic("Bios already enabled x2apic, can't enforce nox2apic"); 1377 pr_debug("intr-remapping not supported\n");
1326 1378 goto ir_failed;
1327 if (!x2apic_preenabled && skip_ioapic_setup) {
1328 pr_info("Skipped enabling x2apic and Interrupt-remapping "
1329 "because of skipping io-apic setup\n");
1330 return;
1331 } 1379 }
1332 1380
1333 ret = dmar_table_init();
1334 if (ret) {
1335 pr_info("dmar_table_init() failed with %d:\n", ret);
1336 1381
1337 if (x2apic_preenabled) 1382 if (!x2apic_preenabled && skip_ioapic_setup) {
1338 panic("x2apic enabled by bios. But IR enabling failed"); 1383 pr_info("Skipped enabling intr-remap because of skipping "
1339 else 1384 "io-apic setup\n");
1340 pr_info("Not enabling x2apic,Intr-remapping\n");
1341 return; 1385 return;
1342 } 1386 }
1343 1387
@@ -1357,19 +1401,16 @@ void __init enable_IR_x2apic(void)
1357 mask_IO_APIC_setup(ioapic_entries); 1401 mask_IO_APIC_setup(ioapic_entries);
1358 mask_8259A(); 1402 mask_8259A();
1359 1403
1360 ret = enable_intr_remapping(EIM_32BIT_APIC_ID); 1404 ret = enable_intr_remapping(x2apic_supported());
1361
1362 if (ret && x2apic_preenabled) {
1363 local_irq_restore(flags);
1364 panic("x2apic enabled by bios. But IR enabling failed");
1365 }
1366
1367 if (ret) 1405 if (ret)
1368 goto end_restore; 1406 goto end_restore;
1369 1407
1370 if (!x2apic) { 1408 pr_info("Enabled Interrupt-remapping\n");
1371 x2apic = 1; 1409
1410 if (x2apic_supported() && !x2apic_mode) {
1411 x2apic_mode = 1;
1372 enable_x2apic(); 1412 enable_x2apic();
1413 pr_info("Enabled x2apic\n");
1373 } 1414 }
1374 1415
1375end_restore: 1416end_restore:
@@ -1378,37 +1419,34 @@ end_restore:
1378 * IR enabling failed 1419 * IR enabling failed
1379 */ 1420 */
1380 restore_IO_APIC_setup(ioapic_entries); 1421 restore_IO_APIC_setup(ioapic_entries);
1381 else
1382 reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries);
1383 1422
1384 unmask_8259A(); 1423 unmask_8259A();
1385 local_irq_restore(flags); 1424 local_irq_restore(flags);
1386 1425
1387end: 1426end:
1388 if (!ret) {
1389 if (!x2apic_preenabled)
1390 pr_info("Enabled x2apic and interrupt-remapping\n");
1391 else
1392 pr_info("Enabled Interrupt-remapping\n");
1393 } else
1394 pr_err("Failed to enable Interrupt-remapping and x2apic\n");
1395 if (ioapic_entries) 1427 if (ioapic_entries)
1396 free_ioapic_entries(ioapic_entries); 1428 free_ioapic_entries(ioapic_entries);
1429
1430 if (!ret)
1431 return;
1432
1433ir_failed:
1434 if (x2apic_preenabled)
1435 panic("x2apic enabled by bios. But IR enabling failed");
1436 else if (cpu_has_x2apic)
1437 pr_info("Not enabling x2apic,Intr-remapping\n");
1397#else 1438#else
1398 if (!cpu_has_x2apic) 1439 if (!cpu_has_x2apic)
1399 return; 1440 return;
1400 1441
1401 if (x2apic_preenabled) 1442 if (x2apic_preenabled)
1402 panic("x2apic enabled prior OS handover," 1443 panic("x2apic enabled prior OS handover,"
1403 " enable CONFIG_INTR_REMAP"); 1444 " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
1404
1405 pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1406 " and x2apic\n");
1407#endif 1445#endif
1408 1446
1409 return; 1447 return;
1410} 1448}
1411#endif /* CONFIG_X86_X2APIC */ 1449
1412 1450
1413#ifdef CONFIG_X86_64 1451#ifdef CONFIG_X86_64
1414/* 1452/*
@@ -1425,7 +1463,6 @@ static int __init detect_init_APIC(void)
1425 } 1463 }
1426 1464
1427 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 1465 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1428 boot_cpu_physical_apicid = 0;
1429 return 0; 1466 return 0;
1430} 1467}
1431#else 1468#else
@@ -1539,32 +1576,49 @@ void __init early_init_lapic_mapping(void)
1539 */ 1576 */
1540void __init init_apic_mappings(void) 1577void __init init_apic_mappings(void)
1541{ 1578{
1542 if (x2apic) { 1579 unsigned int new_apicid;
1580
1581 if (x2apic_mode) {
1543 boot_cpu_physical_apicid = read_apic_id(); 1582 boot_cpu_physical_apicid = read_apic_id();
1544 return; 1583 return;
1545 } 1584 }
1546 1585
1547 /* 1586 /* If no local APIC can be found return early */
1548 * If no local APIC can be found then set up a fake all
1549 * zeroes page to simulate the local APIC and another
1550 * one for the IO-APIC.
1551 */
1552 if (!smp_found_config && detect_init_APIC()) { 1587 if (!smp_found_config && detect_init_APIC()) {
1553 apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); 1588 /* lets NOP'ify apic operations */
1554 apic_phys = __pa(apic_phys); 1589 pr_info("APIC: disable apic facility\n");
1555 } else 1590 apic_disable();
1591 } else {
1556 apic_phys = mp_lapic_addr; 1592 apic_phys = mp_lapic_addr;
1557 1593
1558 set_fixmap_nocache(FIX_APIC_BASE, apic_phys); 1594 /*
1559 apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", 1595 * acpi lapic path already maps that address in
1560 APIC_BASE, apic_phys); 1596 * acpi_register_lapic_address()
1597 */
1598 if (!acpi_lapic)
1599 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
1600
1601 apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
1602 APIC_BASE, apic_phys);
1603 }
1561 1604
1562 /* 1605 /*
1563 * Fetch the APIC ID of the BSP in case we have a 1606 * Fetch the APIC ID of the BSP in case we have a
1564 * default configuration (or the MP table is broken). 1607 * default configuration (or the MP table is broken).
1565 */ 1608 */
1566 if (boot_cpu_physical_apicid == -1U) 1609 new_apicid = read_apic_id();
1567 boot_cpu_physical_apicid = read_apic_id(); 1610 if (boot_cpu_physical_apicid != new_apicid) {
1611 boot_cpu_physical_apicid = new_apicid;
1612 /*
1613 * yeah -- we lie about apic_version
1614 * in case if apic was disabled via boot option
1615 * but it's not a problem for SMP compiled kernel
1616 * since smp_sanity_check is prepared for such a case
1617 * and disable smp mode
1618 */
1619 apic_version[new_apicid] =
1620 GET_APIC_VERSION(apic_read(APIC_LVR));
1621 }
1568} 1622}
1569 1623
1570/* 1624/*
@@ -1733,8 +1787,7 @@ void __init connect_bsp_APIC(void)
1733 */ 1787 */
1734 apic_printk(APIC_VERBOSE, "leaving PIC mode, " 1788 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1735 "enabling APIC mode.\n"); 1789 "enabling APIC mode.\n");
1736 outb(0x70, 0x22); 1790 imcr_pic_to_apic();
1737 outb(0x01, 0x23);
1738 } 1791 }
1739#endif 1792#endif
1740 if (apic->enable_apic_mode) 1793 if (apic->enable_apic_mode)
@@ -1762,8 +1815,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1762 */ 1815 */
1763 apic_printk(APIC_VERBOSE, "disabling APIC mode, " 1816 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1764 "entering PIC mode.\n"); 1817 "entering PIC mode.\n");
1765 outb(0x70, 0x22); 1818 imcr_apic_to_pic();
1766 outb(0x00, 0x23);
1767 return; 1819 return;
1768 } 1820 }
1769#endif 1821#endif
@@ -1969,10 +2021,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1969 2021
1970 local_irq_save(flags); 2022 local_irq_save(flags);
1971 disable_local_APIC(); 2023 disable_local_APIC();
1972#ifdef CONFIG_INTR_REMAP 2024
1973 if (intr_remapping_enabled) 2025 if (intr_remapping_enabled)
1974 disable_intr_remapping(); 2026 disable_intr_remapping();
1975#endif 2027
1976 local_irq_restore(flags); 2028 local_irq_restore(flags);
1977 return 0; 2029 return 0;
1978} 2030}
@@ -1982,42 +2034,34 @@ static int lapic_resume(struct sys_device *dev)
1982 unsigned int l, h; 2034 unsigned int l, h;
1983 unsigned long flags; 2035 unsigned long flags;
1984 int maxlvt; 2036 int maxlvt;
1985 2037 int ret = 0;
1986#ifdef CONFIG_INTR_REMAP
1987 int ret;
1988 struct IO_APIC_route_entry **ioapic_entries = NULL; 2038 struct IO_APIC_route_entry **ioapic_entries = NULL;
1989 2039
1990 if (!apic_pm_state.active) 2040 if (!apic_pm_state.active)
1991 return 0; 2041 return 0;
1992 2042
1993 local_irq_save(flags); 2043 local_irq_save(flags);
1994 if (x2apic) { 2044 if (intr_remapping_enabled) {
1995 ioapic_entries = alloc_ioapic_entries(); 2045 ioapic_entries = alloc_ioapic_entries();
1996 if (!ioapic_entries) { 2046 if (!ioapic_entries) {
1997 WARN(1, "Alloc ioapic_entries in lapic resume failed."); 2047 WARN(1, "Alloc ioapic_entries in lapic resume failed.");
1998 return -ENOMEM; 2048 ret = -ENOMEM;
2049 goto restore;
1999 } 2050 }
2000 2051
2001 ret = save_IO_APIC_setup(ioapic_entries); 2052 ret = save_IO_APIC_setup(ioapic_entries);
2002 if (ret) { 2053 if (ret) {
2003 WARN(1, "Saving IO-APIC state failed: %d\n", ret); 2054 WARN(1, "Saving IO-APIC state failed: %d\n", ret);
2004 free_ioapic_entries(ioapic_entries); 2055 free_ioapic_entries(ioapic_entries);
2005 return ret; 2056 goto restore;
2006 } 2057 }
2007 2058
2008 mask_IO_APIC_setup(ioapic_entries); 2059 mask_IO_APIC_setup(ioapic_entries);
2009 mask_8259A(); 2060 mask_8259A();
2010 enable_x2apic();
2011 } 2061 }
2012#else
2013 if (!apic_pm_state.active)
2014 return 0;
2015 2062
2016 local_irq_save(flags); 2063 if (x2apic_mode)
2017 if (x2apic)
2018 enable_x2apic(); 2064 enable_x2apic();
2019#endif
2020
2021 else { 2065 else {
2022 /* 2066 /*
2023 * Make sure the APICBASE points to the right address 2067 * Make sure the APICBASE points to the right address
@@ -2055,21 +2099,16 @@ static int lapic_resume(struct sys_device *dev)
2055 apic_write(APIC_ESR, 0); 2099 apic_write(APIC_ESR, 0);
2056 apic_read(APIC_ESR); 2100 apic_read(APIC_ESR);
2057 2101
2058#ifdef CONFIG_INTR_REMAP 2102 if (intr_remapping_enabled) {
2059 if (intr_remapping_enabled) 2103 reenable_intr_remapping(x2apic_mode);
2060 reenable_intr_remapping(EIM_32BIT_APIC_ID);
2061
2062 if (x2apic) {
2063 unmask_8259A(); 2104 unmask_8259A();
2064 restore_IO_APIC_setup(ioapic_entries); 2105 restore_IO_APIC_setup(ioapic_entries);
2065 free_ioapic_entries(ioapic_entries); 2106 free_ioapic_entries(ioapic_entries);
2066 } 2107 }
2067#endif 2108restore:
2068
2069 local_irq_restore(flags); 2109 local_irq_restore(flags);
2070 2110
2071 2111 return ret;
2072 return 0;
2073} 2112}
2074 2113
2075/* 2114/*
@@ -2117,31 +2156,14 @@ static void apic_pm_activate(void) { }
2117#endif /* CONFIG_PM */ 2156#endif /* CONFIG_PM */
2118 2157
2119#ifdef CONFIG_X86_64 2158#ifdef CONFIG_X86_64
2120/* 2159
2121 * apic_is_clustered_box() -- Check if we can expect good TSC 2160static int __cpuinit apic_cluster_num(void)
2122 *
2123 * Thus far, the major user of this is IBM's Summit2 series:
2124 *
2125 * Clustered boxes may have unsynced TSC problems if they are
2126 * multi-chassis. Use available data to take a good guess.
2127 * If in doubt, go HPET.
2128 */
2129__cpuinit int apic_is_clustered_box(void)
2130{ 2161{
2131 int i, clusters, zeros; 2162 int i, clusters, zeros;
2132 unsigned id; 2163 unsigned id;
2133 u16 *bios_cpu_apicid; 2164 u16 *bios_cpu_apicid;
2134 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); 2165 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
2135 2166
2136 /*
2137 * there is not this kind of box with AMD CPU yet.
2138 * Some AMD box with quadcore cpu and 8 sockets apicid
2139 * will be [4, 0x23] or [8, 0x27] could be thought to
2140 * vsmp box still need checking...
2141 */
2142 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
2143 return 0;
2144
2145 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); 2167 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2146 bitmap_zero(clustermap, NUM_APIC_CLUSTERS); 2168 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2147 2169
@@ -2177,18 +2199,67 @@ __cpuinit int apic_is_clustered_box(void)
2177 ++zeros; 2199 ++zeros;
2178 } 2200 }
2179 2201
2180 /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are 2202 return clusters;
2181 * not guaranteed to be synced between boards 2203}
2182 */ 2204
2183 if (is_vsmp_box() && clusters > 1) 2205static int __cpuinitdata multi_checked;
2206static int __cpuinitdata multi;
2207
2208static int __cpuinit set_multi(const struct dmi_system_id *d)
2209{
2210 if (multi)
2211 return 0;
2212 pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
2213 multi = 1;
2214 return 0;
2215}
2216
2217static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
2218 {
2219 .callback = set_multi,
2220 .ident = "IBM System Summit2",
2221 .matches = {
2222 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2223 DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
2224 },
2225 },
2226 {}
2227};
2228
2229static void __cpuinit dmi_check_multi(void)
2230{
2231 if (multi_checked)
2232 return;
2233
2234 dmi_check_system(multi_dmi_table);
2235 multi_checked = 1;
2236}
2237
2238/*
2239 * apic_is_clustered_box() -- Check if we can expect good TSC
2240 *
2241 * Thus far, the major user of this is IBM's Summit2 series:
2242 * Clustered boxes may have unsynced TSC problems if they are
2243 * multi-chassis.
2244 * Use DMI to check them
2245 */
2246__cpuinit int apic_is_clustered_box(void)
2247{
2248 dmi_check_multi();
2249 if (multi)
2184 return 1; 2250 return 1;
2185 2251
2252 if (!is_vsmp_box())
2253 return 0;
2254
2186 /* 2255 /*
2187 * If clusters > 2, then should be multi-chassis. 2256 * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
2188 * May have to revisit this when multi-core + hyperthreaded CPUs come 2257 * not guaranteed to be synced between boards
2189 * out, but AFAIK this will work even for them.
2190 */ 2258 */
2191 return (clusters > 2); 2259 if (apic_cluster_num() > 1)
2260 return 1;
2261
2262 return 0;
2192} 2263}
2193#endif 2264#endif
2194 2265
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 306e5e88fb6f..d0c99abc26c3 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -161,7 +161,7 @@ static int flat_apic_id_registered(void)
161 161
162static int flat_phys_pkg_id(int initial_apic_id, int index_msb) 162static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
163{ 163{
164 return hard_smp_processor_id() >> index_msb; 164 return initial_apic_id >> index_msb;
165} 165}
166 166
167struct apic apic_flat = { 167struct apic apic_flat = {
@@ -235,7 +235,7 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
235 * regardless of how many processors are present (x86_64 ES7000 235 * regardless of how many processors are present (x86_64 ES7000
236 * is an example). 236 * is an example).
237 */ 237 */
238 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && 238 if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
239 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { 239 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
240 printk(KERN_DEBUG "system APIC only can use physical flat"); 240 printk(KERN_DEBUG "system APIC only can use physical flat");
241 return 1; 241 return 1;
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 302947775575..69328ac8de9c 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -145,7 +145,7 @@ es7000_rename_gsi(int ioapic, int gsi)
145 return gsi; 145 return gsi;
146} 146}
147 147
148static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) 148static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
149{ 149{
150 unsigned long vect = 0, psaival = 0; 150 unsigned long vect = 0, psaival = 0;
151 151
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 30da617d18e4..1946fac42ab3 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -59,6 +59,7 @@
59#include <asm/setup.h> 59#include <asm/setup.h>
60#include <asm/irq_remapping.h> 60#include <asm/irq_remapping.h>
61#include <asm/hpet.h> 61#include <asm/hpet.h>
62#include <asm/hw_irq.h>
62#include <asm/uv/uv_hub.h> 63#include <asm/uv/uv_hub.h>
63#include <asm/uv/uv_irq.h> 64#include <asm/uv/uv_irq.h>
64 65
@@ -129,12 +130,9 @@ struct irq_pin_list {
129 struct irq_pin_list *next; 130 struct irq_pin_list *next;
130}; 131};
131 132
132static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) 133static struct irq_pin_list *get_one_free_irq_2_pin(int node)
133{ 134{
134 struct irq_pin_list *pin; 135 struct irq_pin_list *pin;
135 int node;
136
137 node = cpu_to_node(cpu);
138 136
139 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); 137 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
140 138
@@ -148,9 +146,6 @@ struct irq_cfg {
148 unsigned move_cleanup_count; 146 unsigned move_cleanup_count;
149 u8 vector; 147 u8 vector;
150 u8 move_in_progress : 1; 148 u8 move_in_progress : 1;
151#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
152 u8 move_desc_pending : 1;
153#endif
154}; 149};
155 150
156/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 151/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -212,12 +207,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
212 return cfg; 207 return cfg;
213} 208}
214 209
215static struct irq_cfg *get_one_free_irq_cfg(int cpu) 210static struct irq_cfg *get_one_free_irq_cfg(int node)
216{ 211{
217 struct irq_cfg *cfg; 212 struct irq_cfg *cfg;
218 int node;
219
220 node = cpu_to_node(cpu);
221 213
222 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 214 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
223 if (cfg) { 215 if (cfg) {
@@ -238,13 +230,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
238 return cfg; 230 return cfg;
239} 231}
240 232
241int arch_init_chip_data(struct irq_desc *desc, int cpu) 233int arch_init_chip_data(struct irq_desc *desc, int node)
242{ 234{
243 struct irq_cfg *cfg; 235 struct irq_cfg *cfg;
244 236
245 cfg = desc->chip_data; 237 cfg = desc->chip_data;
246 if (!cfg) { 238 if (!cfg) {
247 desc->chip_data = get_one_free_irq_cfg(cpu); 239 desc->chip_data = get_one_free_irq_cfg(node);
248 if (!desc->chip_data) { 240 if (!desc->chip_data) {
249 printk(KERN_ERR "can not alloc irq_cfg\n"); 241 printk(KERN_ERR "can not alloc irq_cfg\n");
250 BUG_ON(1); 242 BUG_ON(1);
@@ -254,10 +246,9 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
254 return 0; 246 return 0;
255} 247}
256 248
257#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC 249/* for move_irq_desc */
258
259static void 250static void
260init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) 251init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
261{ 252{
262 struct irq_pin_list *old_entry, *head, *tail, *entry; 253 struct irq_pin_list *old_entry, *head, *tail, *entry;
263 254
@@ -266,7 +257,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
266 if (!old_entry) 257 if (!old_entry)
267 return; 258 return;
268 259
269 entry = get_one_free_irq_2_pin(cpu); 260 entry = get_one_free_irq_2_pin(node);
270 if (!entry) 261 if (!entry)
271 return; 262 return;
272 263
@@ -276,7 +267,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
276 tail = entry; 267 tail = entry;
277 old_entry = old_entry->next; 268 old_entry = old_entry->next;
278 while (old_entry) { 269 while (old_entry) {
279 entry = get_one_free_irq_2_pin(cpu); 270 entry = get_one_free_irq_2_pin(node);
280 if (!entry) { 271 if (!entry) {
281 entry = head; 272 entry = head;
282 while (entry) { 273 while (entry) {
@@ -316,12 +307,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
316} 307}
317 308
318void arch_init_copy_chip_data(struct irq_desc *old_desc, 309void arch_init_copy_chip_data(struct irq_desc *old_desc,
319 struct irq_desc *desc, int cpu) 310 struct irq_desc *desc, int node)
320{ 311{
321 struct irq_cfg *cfg; 312 struct irq_cfg *cfg;
322 struct irq_cfg *old_cfg; 313 struct irq_cfg *old_cfg;
323 314
324 cfg = get_one_free_irq_cfg(cpu); 315 cfg = get_one_free_irq_cfg(node);
325 316
326 if (!cfg) 317 if (!cfg)
327 return; 318 return;
@@ -332,7 +323,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
332 323
333 memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); 324 memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
334 325
335 init_copy_irq_2_pin(old_cfg, cfg, cpu); 326 init_copy_irq_2_pin(old_cfg, cfg, node);
336} 327}
337 328
338static void free_irq_cfg(struct irq_cfg *old_cfg) 329static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -356,19 +347,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
356 old_desc->chip_data = NULL; 347 old_desc->chip_data = NULL;
357 } 348 }
358} 349}
359 350/* end for move_irq_desc */
360static void
361set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
362{
363 struct irq_cfg *cfg = desc->chip_data;
364
365 if (!cfg->move_in_progress) {
366 /* it means that domain is not changed */
367 if (!cpumask_intersects(desc->affinity, mask))
368 cfg->move_desc_pending = 1;
369 }
370}
371#endif
372 351
373#else 352#else
374static struct irq_cfg *irq_cfg(unsigned int irq) 353static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -378,13 +357,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
378 357
379#endif 358#endif
380 359
381#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
382static inline void
383set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
384{
385}
386#endif
387
388struct io_apic { 360struct io_apic {
389 unsigned int index; 361 unsigned int index;
390 unsigned int unused[3]; 362 unsigned int unused[3];
@@ -518,132 +490,18 @@ static void ioapic_mask_entry(int apic, int pin)
518 spin_unlock_irqrestore(&ioapic_lock, flags); 490 spin_unlock_irqrestore(&ioapic_lock, flags);
519} 491}
520 492
521#ifdef CONFIG_SMP
522static void send_cleanup_vector(struct irq_cfg *cfg)
523{
524 cpumask_var_t cleanup_mask;
525
526 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
527 unsigned int i;
528 cfg->move_cleanup_count = 0;
529 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
530 cfg->move_cleanup_count++;
531 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
532 apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
533 } else {
534 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
535 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
536 apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
537 free_cpumask_var(cleanup_mask);
538 }
539 cfg->move_in_progress = 0;
540}
541
542static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
543{
544 int apic, pin;
545 struct irq_pin_list *entry;
546 u8 vector = cfg->vector;
547
548 entry = cfg->irq_2_pin;
549 for (;;) {
550 unsigned int reg;
551
552 if (!entry)
553 break;
554
555 apic = entry->apic;
556 pin = entry->pin;
557 /*
558 * With interrupt-remapping, destination information comes
559 * from interrupt-remapping table entry.
560 */
561 if (!irq_remapped(irq))
562 io_apic_write(apic, 0x11 + pin*2, dest);
563 reg = io_apic_read(apic, 0x10 + pin*2);
564 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
565 reg |= vector;
566 io_apic_modify(apic, 0x10 + pin*2, reg);
567 if (!entry->next)
568 break;
569 entry = entry->next;
570 }
571}
572
573static int
574assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
575
576/*
577 * Either sets desc->affinity to a valid value, and returns
578 * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
579 * leaves desc->affinity untouched.
580 */
581static unsigned int
582set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
583{
584 struct irq_cfg *cfg;
585 unsigned int irq;
586
587 if (!cpumask_intersects(mask, cpu_online_mask))
588 return BAD_APICID;
589
590 irq = desc->irq;
591 cfg = desc->chip_data;
592 if (assign_irq_vector(irq, cfg, mask))
593 return BAD_APICID;
594
595 /* check that before desc->addinity get updated */
596 set_extra_move_desc(desc, mask);
597
598 cpumask_copy(desc->affinity, mask);
599
600 return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
601}
602
603static void
604set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
605{
606 struct irq_cfg *cfg;
607 unsigned long flags;
608 unsigned int dest;
609 unsigned int irq;
610
611 irq = desc->irq;
612 cfg = desc->chip_data;
613
614 spin_lock_irqsave(&ioapic_lock, flags);
615 dest = set_desc_affinity(desc, mask);
616 if (dest != BAD_APICID) {
617 /* Only the high 8 bits are valid. */
618 dest = SET_APIC_LOGICAL_ID(dest);
619 __target_IO_APIC_irq(irq, dest, cfg);
620 }
621 spin_unlock_irqrestore(&ioapic_lock, flags);
622}
623
624static void
625set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
626{
627 struct irq_desc *desc;
628
629 desc = irq_to_desc(irq);
630
631 set_ioapic_affinity_irq_desc(desc, mask);
632}
633#endif /* CONFIG_SMP */
634
635/* 493/*
636 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 494 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
637 * shared ISA-space IRQs, so we have to support them. We are super 495 * shared ISA-space IRQs, so we have to support them. We are super
638 * fast in the common case, and fast for shared ISA-space IRQs. 496 * fast in the common case, and fast for shared ISA-space IRQs.
639 */ 497 */
640static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) 498static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
641{ 499{
642 struct irq_pin_list *entry; 500 struct irq_pin_list *entry;
643 501
644 entry = cfg->irq_2_pin; 502 entry = cfg->irq_2_pin;
645 if (!entry) { 503 if (!entry) {
646 entry = get_one_free_irq_2_pin(cpu); 504 entry = get_one_free_irq_2_pin(node);
647 if (!entry) { 505 if (!entry) {
648 printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", 506 printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
649 apic, pin); 507 apic, pin);
@@ -663,7 +521,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
663 entry = entry->next; 521 entry = entry->next;
664 } 522 }
665 523
666 entry->next = get_one_free_irq_2_pin(cpu); 524 entry->next = get_one_free_irq_2_pin(node);
667 entry = entry->next; 525 entry = entry->next;
668 entry->apic = apic; 526 entry->apic = apic;
669 entry->pin = pin; 527 entry->pin = pin;
@@ -672,7 +530,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
672/* 530/*
673 * Reroute an IRQ to a different pin. 531 * Reroute an IRQ to a different pin.
674 */ 532 */
675static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, 533static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
676 int oldapic, int oldpin, 534 int oldapic, int oldpin,
677 int newapic, int newpin) 535 int newapic, int newpin)
678{ 536{
@@ -692,7 +550,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
692 550
693 /* why? call replace before add? */ 551 /* why? call replace before add? */
694 if (!replaced) 552 if (!replaced)
695 add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); 553 add_pin_to_irq_node(cfg, node, newapic, newpin);
696} 554}
697 555
698static inline void io_apic_modify_irq(struct irq_cfg *cfg, 556static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -850,7 +708,6 @@ static int __init ioapic_pirq_setup(char *str)
850__setup("pirq=", ioapic_pirq_setup); 708__setup("pirq=", ioapic_pirq_setup);
851#endif /* CONFIG_X86_32 */ 709#endif /* CONFIG_X86_32 */
852 710
853#ifdef CONFIG_INTR_REMAP
854struct IO_APIC_route_entry **alloc_ioapic_entries(void) 711struct IO_APIC_route_entry **alloc_ioapic_entries(void)
855{ 712{
856 int apic; 713 int apic;
@@ -948,20 +805,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
948 return 0; 805 return 0;
949} 806}
950 807
951void reinit_intr_remapped_IO_APIC(int intr_remapping,
952 struct IO_APIC_route_entry **ioapic_entries)
953
954{
955 /*
956 * for now plain restore of previous settings.
957 * TBD: In the case of OS enabling interrupt-remapping,
958 * IO-APIC RTE's need to be setup to point to interrupt-remapping
959 * table entries. for now, do a plain restore, and wait for
960 * the setup_IO_APIC_irqs() to do proper initialization.
961 */
962 restore_IO_APIC_setup(ioapic_entries);
963}
964
965void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) 808void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
966{ 809{
967 int apic; 810 int apic;
@@ -971,7 +814,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
971 814
972 kfree(ioapic_entries); 815 kfree(ioapic_entries);
973} 816}
974#endif
975 817
976/* 818/*
977 * Find the IRQ entry number of a certain pin. 819 * Find the IRQ entry number of a certain pin.
@@ -1032,54 +874,6 @@ static int __init find_isa_irq_apic(int irq, int type)
1032 return -1; 874 return -1;
1033} 875}
1034 876
1035/*
1036 * Find a specific PCI IRQ entry.
1037 * Not an __init, possibly needed by modules
1038 */
1039static int pin_2_irq(int idx, int apic, int pin);
1040
1041int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
1042{
1043 int apic, i, best_guess = -1;
1044
1045 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
1046 bus, slot, pin);
1047 if (test_bit(bus, mp_bus_not_pci)) {
1048 apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
1049 return -1;
1050 }
1051 for (i = 0; i < mp_irq_entries; i++) {
1052 int lbus = mp_irqs[i].srcbus;
1053
1054 for (apic = 0; apic < nr_ioapics; apic++)
1055 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
1056 mp_irqs[i].dstapic == MP_APIC_ALL)
1057 break;
1058
1059 if (!test_bit(lbus, mp_bus_not_pci) &&
1060 !mp_irqs[i].irqtype &&
1061 (bus == lbus) &&
1062 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1063 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
1064
1065 if (!(apic || IO_APIC_IRQ(irq)))
1066 continue;
1067
1068 if (pin == (mp_irqs[i].srcbusirq & 3))
1069 return irq;
1070 /*
1071 * Use the first all-but-pin matching entry as a
1072 * best-guess fuzzy result for broken mptables.
1073 */
1074 if (best_guess < 0)
1075 best_guess = irq;
1076 }
1077 }
1078 return best_guess;
1079}
1080
1081EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
1082
1083#if defined(CONFIG_EISA) || defined(CONFIG_MCA) 877#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1084/* 878/*
1085 * EISA Edge/Level control register, ELCR 879 * EISA Edge/Level control register, ELCR
@@ -1298,6 +1092,64 @@ static int pin_2_irq(int idx, int apic, int pin)
1298 return irq; 1092 return irq;
1299} 1093}
1300 1094
1095/*
1096 * Find a specific PCI IRQ entry.
1097 * Not an __init, possibly needed by modules
1098 */
1099int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
1100 struct io_apic_irq_attr *irq_attr)
1101{
1102 int apic, i, best_guess = -1;
1103
1104 apic_printk(APIC_DEBUG,
1105 "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
1106 bus, slot, pin);
1107 if (test_bit(bus, mp_bus_not_pci)) {
1108 apic_printk(APIC_VERBOSE,
1109 "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
1110 return -1;
1111 }
1112 for (i = 0; i < mp_irq_entries; i++) {
1113 int lbus = mp_irqs[i].srcbus;
1114
1115 for (apic = 0; apic < nr_ioapics; apic++)
1116 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
1117 mp_irqs[i].dstapic == MP_APIC_ALL)
1118 break;
1119
1120 if (!test_bit(lbus, mp_bus_not_pci) &&
1121 !mp_irqs[i].irqtype &&
1122 (bus == lbus) &&
1123 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1124 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
1125
1126 if (!(apic || IO_APIC_IRQ(irq)))
1127 continue;
1128
1129 if (pin == (mp_irqs[i].srcbusirq & 3)) {
1130 set_io_apic_irq_attr(irq_attr, apic,
1131 mp_irqs[i].dstirq,
1132 irq_trigger(i),
1133 irq_polarity(i));
1134 return irq;
1135 }
1136 /*
1137 * Use the first all-but-pin matching entry as a
1138 * best-guess fuzzy result for broken mptables.
1139 */
1140 if (best_guess < 0) {
1141 set_io_apic_irq_attr(irq_attr, apic,
1142 mp_irqs[i].dstirq,
1143 irq_trigger(i),
1144 irq_polarity(i));
1145 best_guess = irq;
1146 }
1147 }
1148 }
1149 return best_guess;
1150}
1151EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
1152
1301void lock_vector_lock(void) 1153void lock_vector_lock(void)
1302{ 1154{
1303 /* Used to the online set of cpus does not change 1155 /* Used to the online set of cpus does not change
@@ -1628,58 +1480,70 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1628 ioapic_write_entry(apic_id, pin, entry); 1480 ioapic_write_entry(apic_id, pin, entry);
1629} 1481}
1630 1482
1483static struct {
1484 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
1485} mp_ioapic_routing[MAX_IO_APICS];
1486
1631static void __init setup_IO_APIC_irqs(void) 1487static void __init setup_IO_APIC_irqs(void)
1632{ 1488{
1633 int apic_id, pin, idx, irq; 1489 int apic_id = 0, pin, idx, irq;
1634 int notcon = 0; 1490 int notcon = 0;
1635 struct irq_desc *desc; 1491 struct irq_desc *desc;
1636 struct irq_cfg *cfg; 1492 struct irq_cfg *cfg;
1637 int cpu = boot_cpu_id; 1493 int node = cpu_to_node(boot_cpu_id);
1638 1494
1639 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1495 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1640 1496
1641 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { 1497#ifdef CONFIG_ACPI
1642 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { 1498 if (!acpi_disabled && acpi_ioapic) {
1643 1499 apic_id = mp_find_ioapic(0);
1644 idx = find_irq_entry(apic_id, pin, mp_INT); 1500 if (apic_id < 0)
1645 if (idx == -1) { 1501 apic_id = 0;
1646 if (!notcon) { 1502 }
1647 notcon = 1; 1503#endif
1648 apic_printk(APIC_VERBOSE,
1649 KERN_DEBUG " %d-%d",
1650 mp_ioapics[apic_id].apicid, pin);
1651 } else
1652 apic_printk(APIC_VERBOSE, " %d-%d",
1653 mp_ioapics[apic_id].apicid, pin);
1654 continue;
1655 }
1656 if (notcon) {
1657 apic_printk(APIC_VERBOSE,
1658 " (apicid-pin) not connected\n");
1659 notcon = 0;
1660 }
1661 1504
1662 irq = pin_2_irq(idx, apic_id, pin); 1505 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
1506 idx = find_irq_entry(apic_id, pin, mp_INT);
1507 if (idx == -1) {
1508 if (!notcon) {
1509 notcon = 1;
1510 apic_printk(APIC_VERBOSE,
1511 KERN_DEBUG " %d-%d",
1512 mp_ioapics[apic_id].apicid, pin);
1513 } else
1514 apic_printk(APIC_VERBOSE, " %d-%d",
1515 mp_ioapics[apic_id].apicid, pin);
1516 continue;
1517 }
1518 if (notcon) {
1519 apic_printk(APIC_VERBOSE,
1520 " (apicid-pin) not connected\n");
1521 notcon = 0;
1522 }
1663 1523
1664 /* 1524 irq = pin_2_irq(idx, apic_id, pin);
1665 * Skip the timer IRQ if there's a quirk handler
1666 * installed and if it returns 1:
1667 */
1668 if (apic->multi_timer_check &&
1669 apic->multi_timer_check(apic_id, irq))
1670 continue;
1671 1525
1672 desc = irq_to_desc_alloc_cpu(irq, cpu); 1526 /*
1673 if (!desc) { 1527 * Skip the timer IRQ if there's a quirk handler
1674 printk(KERN_INFO "can not get irq_desc for %d\n", irq); 1528 * installed and if it returns 1:
1675 continue; 1529 */
1676 } 1530 if (apic->multi_timer_check &&
1677 cfg = desc->chip_data; 1531 apic->multi_timer_check(apic_id, irq))
1678 add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); 1532 continue;
1679 1533
1680 setup_IO_APIC_irq(apic_id, pin, irq, desc, 1534 desc = irq_to_desc_alloc_node(irq, node);
1681 irq_trigger(idx), irq_polarity(idx)); 1535 if (!desc) {
1536 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1537 continue;
1682 } 1538 }
1539 cfg = desc->chip_data;
1540 add_pin_to_irq_node(cfg, node, apic_id, pin);
1541 /*
1542 * don't mark it in pin_programmed, so later acpi could
1543 * set it correctly when irq < 16
1544 */
1545 setup_IO_APIC_irq(apic_id, pin, irq, desc,
1546 irq_trigger(idx), irq_polarity(idx));
1683 } 1547 }
1684 1548
1685 if (notcon) 1549 if (notcon)
@@ -1869,7 +1733,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
1869 1733
1870__apicdebuginit(void) print_local_APIC(void *dummy) 1734__apicdebuginit(void) print_local_APIC(void *dummy)
1871{ 1735{
1872 unsigned int v, ver, maxlvt; 1736 unsigned int i, v, ver, maxlvt;
1873 u64 icr; 1737 u64 icr;
1874 1738
1875 if (apic_verbosity == APIC_QUIET) 1739 if (apic_verbosity == APIC_QUIET)
@@ -1957,6 +1821,18 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1957 printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); 1821 printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
1958 v = apic_read(APIC_TDCR); 1822 v = apic_read(APIC_TDCR);
1959 printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); 1823 printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
1824
1825 if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
1826 v = apic_read(APIC_EFEAT);
1827 maxlvt = (v >> 16) & 0xff;
1828 printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
1829 v = apic_read(APIC_ECTRL);
1830 printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
1831 for (i = 0; i < maxlvt; i++) {
1832 v = apic_read(APIC_EILVTn(i));
1833 printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
1834 }
1835 }
1960 printk("\n"); 1836 printk("\n");
1961} 1837}
1962 1838
@@ -2005,6 +1881,11 @@ __apicdebuginit(void) print_PIC(void)
2005__apicdebuginit(int) print_all_ICs(void) 1881__apicdebuginit(int) print_all_ICs(void)
2006{ 1882{
2007 print_PIC(); 1883 print_PIC();
1884
1885 /* don't print out if apic is not there */
1886 if (!cpu_has_apic || disable_apic)
1887 return 0;
1888
2008 print_all_local_APICs(); 1889 print_all_local_APICs();
2009 print_IO_APIC(); 1890 print_IO_APIC();
2010 1891
@@ -2360,6 +2241,118 @@ static int ioapic_retrigger_irq(unsigned int irq)
2360 */ 2241 */
2361 2242
2362#ifdef CONFIG_SMP 2243#ifdef CONFIG_SMP
2244static void send_cleanup_vector(struct irq_cfg *cfg)
2245{
2246 cpumask_var_t cleanup_mask;
2247
2248 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
2249 unsigned int i;
2250 cfg->move_cleanup_count = 0;
2251 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
2252 cfg->move_cleanup_count++;
2253 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
2254 apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
2255 } else {
2256 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
2257 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
2258 apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2259 free_cpumask_var(cleanup_mask);
2260 }
2261 cfg->move_in_progress = 0;
2262}
2263
2264static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
2265{
2266 int apic, pin;
2267 struct irq_pin_list *entry;
2268 u8 vector = cfg->vector;
2269
2270 entry = cfg->irq_2_pin;
2271 for (;;) {
2272 unsigned int reg;
2273
2274 if (!entry)
2275 break;
2276
2277 apic = entry->apic;
2278 pin = entry->pin;
2279 /*
2280 * With interrupt-remapping, destination information comes
2281 * from interrupt-remapping table entry.
2282 */
2283 if (!irq_remapped(irq))
2284 io_apic_write(apic, 0x11 + pin*2, dest);
2285 reg = io_apic_read(apic, 0x10 + pin*2);
2286 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2287 reg |= vector;
2288 io_apic_modify(apic, 0x10 + pin*2, reg);
2289 if (!entry->next)
2290 break;
2291 entry = entry->next;
2292 }
2293}
2294
2295static int
2296assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
2297
2298/*
2299 * Either sets desc->affinity to a valid value, and returns
2300 * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
2301 * leaves desc->affinity untouched.
2302 */
2303static unsigned int
2304set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
2305{
2306 struct irq_cfg *cfg;
2307 unsigned int irq;
2308
2309 if (!cpumask_intersects(mask, cpu_online_mask))
2310 return BAD_APICID;
2311
2312 irq = desc->irq;
2313 cfg = desc->chip_data;
2314 if (assign_irq_vector(irq, cfg, mask))
2315 return BAD_APICID;
2316
2317 cpumask_copy(desc->affinity, mask);
2318
2319 return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
2320}
2321
2322static int
2323set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2324{
2325 struct irq_cfg *cfg;
2326 unsigned long flags;
2327 unsigned int dest;
2328 unsigned int irq;
2329 int ret = -1;
2330
2331 irq = desc->irq;
2332 cfg = desc->chip_data;
2333
2334 spin_lock_irqsave(&ioapic_lock, flags);
2335 dest = set_desc_affinity(desc, mask);
2336 if (dest != BAD_APICID) {
2337 /* Only the high 8 bits are valid. */
2338 dest = SET_APIC_LOGICAL_ID(dest);
2339 __target_IO_APIC_irq(irq, dest, cfg);
2340 ret = 0;
2341 }
2342 spin_unlock_irqrestore(&ioapic_lock, flags);
2343
2344 return ret;
2345}
2346
2347static int
2348set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
2349{
2350 struct irq_desc *desc;
2351
2352 desc = irq_to_desc(irq);
2353
2354 return set_ioapic_affinity_irq_desc(desc, mask);
2355}
2363 2356
2364#ifdef CONFIG_INTR_REMAP 2357#ifdef CONFIG_INTR_REMAP
2365 2358
@@ -2374,26 +2367,25 @@ static int ioapic_retrigger_irq(unsigned int irq)
2374 * Real vector that is used for interrupting cpu will be coming from 2367 * Real vector that is used for interrupting cpu will be coming from
2375 * the interrupt-remapping table entry. 2368 * the interrupt-remapping table entry.
2376 */ 2369 */
2377static void 2370static int
2378migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2371migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2379{ 2372{
2380 struct irq_cfg *cfg; 2373 struct irq_cfg *cfg;
2381 struct irte irte; 2374 struct irte irte;
2382 unsigned int dest; 2375 unsigned int dest;
2383 unsigned int irq; 2376 unsigned int irq;
2377 int ret = -1;
2384 2378
2385 if (!cpumask_intersects(mask, cpu_online_mask)) 2379 if (!cpumask_intersects(mask, cpu_online_mask))
2386 return; 2380 return ret;
2387 2381
2388 irq = desc->irq; 2382 irq = desc->irq;
2389 if (get_irte(irq, &irte)) 2383 if (get_irte(irq, &irte))
2390 return; 2384 return ret;
2391 2385
2392 cfg = desc->chip_data; 2386 cfg = desc->chip_data;
2393 if (assign_irq_vector(irq, cfg, mask)) 2387 if (assign_irq_vector(irq, cfg, mask))
2394 return; 2388 return ret;
2395
2396 set_extra_move_desc(desc, mask);
2397 2389
2398 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); 2390 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2399 2391
@@ -2409,27 +2401,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2409 send_cleanup_vector(cfg); 2401 send_cleanup_vector(cfg);
2410 2402
2411 cpumask_copy(desc->affinity, mask); 2403 cpumask_copy(desc->affinity, mask);
2404
2405 return 0;
2412} 2406}
2413 2407
2414/* 2408/*
2415 * Migrates the IRQ destination in the process context. 2409 * Migrates the IRQ destination in the process context.
2416 */ 2410 */
2417static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, 2411static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2418 const struct cpumask *mask) 2412 const struct cpumask *mask)
2419{ 2413{
2420 migrate_ioapic_irq_desc(desc, mask); 2414 return migrate_ioapic_irq_desc(desc, mask);
2421} 2415}
2422static void set_ir_ioapic_affinity_irq(unsigned int irq, 2416static int set_ir_ioapic_affinity_irq(unsigned int irq,
2423 const struct cpumask *mask) 2417 const struct cpumask *mask)
2424{ 2418{
2425 struct irq_desc *desc = irq_to_desc(irq); 2419 struct irq_desc *desc = irq_to_desc(irq);
2426 2420
2427 set_ir_ioapic_affinity_irq_desc(desc, mask); 2421 return set_ir_ioapic_affinity_irq_desc(desc, mask);
2428} 2422}
2429#else 2423#else
2430static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, 2424static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2431 const struct cpumask *mask) 2425 const struct cpumask *mask)
2432{ 2426{
2427 return 0;
2433} 2428}
2434#endif 2429#endif
2435 2430
@@ -2491,86 +2486,19 @@ static void irq_complete_move(struct irq_desc **descp)
2491 struct irq_cfg *cfg = desc->chip_data; 2486 struct irq_cfg *cfg = desc->chip_data;
2492 unsigned vector, me; 2487 unsigned vector, me;
2493 2488
2494 if (likely(!cfg->move_in_progress)) { 2489 if (likely(!cfg->move_in_progress))
2495#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2496 if (likely(!cfg->move_desc_pending))
2497 return;
2498
2499 /* domain has not changed, but affinity did */
2500 me = smp_processor_id();
2501 if (cpumask_test_cpu(me, desc->affinity)) {
2502 *descp = desc = move_irq_desc(desc, me);
2503 /* get the new one */
2504 cfg = desc->chip_data;
2505 cfg->move_desc_pending = 0;
2506 }
2507#endif
2508 return; 2490 return;
2509 }
2510 2491
2511 vector = ~get_irq_regs()->orig_ax; 2492 vector = ~get_irq_regs()->orig_ax;
2512 me = smp_processor_id(); 2493 me = smp_processor_id();
2513 2494
2514 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { 2495 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2515#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2516 *descp = desc = move_irq_desc(desc, me);
2517 /* get the new one */
2518 cfg = desc->chip_data;
2519#endif
2520 send_cleanup_vector(cfg); 2496 send_cleanup_vector(cfg);
2521 }
2522} 2497}
2523#else 2498#else
2524static inline void irq_complete_move(struct irq_desc **descp) {} 2499static inline void irq_complete_move(struct irq_desc **descp) {}
2525#endif 2500#endif
2526 2501
2527static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2528{
2529 int apic, pin;
2530 struct irq_pin_list *entry;
2531
2532 entry = cfg->irq_2_pin;
2533 for (;;) {
2534
2535 if (!entry)
2536 break;
2537
2538 apic = entry->apic;
2539 pin = entry->pin;
2540 io_apic_eoi(apic, pin);
2541 entry = entry->next;
2542 }
2543}
2544
2545static void
2546eoi_ioapic_irq(struct irq_desc *desc)
2547{
2548 struct irq_cfg *cfg;
2549 unsigned long flags;
2550 unsigned int irq;
2551
2552 irq = desc->irq;
2553 cfg = desc->chip_data;
2554
2555 spin_lock_irqsave(&ioapic_lock, flags);
2556 __eoi_ioapic_irq(irq, cfg);
2557 spin_unlock_irqrestore(&ioapic_lock, flags);
2558}
2559
2560#ifdef CONFIG_X86_X2APIC
2561static void ack_x2apic_level(unsigned int irq)
2562{
2563 struct irq_desc *desc = irq_to_desc(irq);
2564 ack_x2APIC_irq();
2565 eoi_ioapic_irq(desc);
2566}
2567
2568static void ack_x2apic_edge(unsigned int irq)
2569{
2570 ack_x2APIC_irq();
2571}
2572#endif
2573
2574static void ack_apic_edge(unsigned int irq) 2502static void ack_apic_edge(unsigned int irq)
2575{ 2503{
2576 struct irq_desc *desc = irq_to_desc(irq); 2504 struct irq_desc *desc = irq_to_desc(irq);
@@ -2634,9 +2562,6 @@ static void ack_apic_level(unsigned int irq)
2634 */ 2562 */
2635 ack_APIC_irq(); 2563 ack_APIC_irq();
2636 2564
2637 if (irq_remapped(irq))
2638 eoi_ioapic_irq(desc);
2639
2640 /* Now we can move and renable the irq */ 2565 /* Now we can move and renable the irq */
2641 if (unlikely(do_unmask_irq)) { 2566 if (unlikely(do_unmask_irq)) {
2642 /* Only migrate the irq if the ack has been received. 2567 /* Only migrate the irq if the ack has been received.
@@ -2683,22 +2608,50 @@ static void ack_apic_level(unsigned int irq)
2683} 2608}
2684 2609
2685#ifdef CONFIG_INTR_REMAP 2610#ifdef CONFIG_INTR_REMAP
2611static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2612{
2613 int apic, pin;
2614 struct irq_pin_list *entry;
2615
2616 entry = cfg->irq_2_pin;
2617 for (;;) {
2618
2619 if (!entry)
2620 break;
2621
2622 apic = entry->apic;
2623 pin = entry->pin;
2624 io_apic_eoi(apic, pin);
2625 entry = entry->next;
2626 }
2627}
2628
2629static void
2630eoi_ioapic_irq(struct irq_desc *desc)
2631{
2632 struct irq_cfg *cfg;
2633 unsigned long flags;
2634 unsigned int irq;
2635
2636 irq = desc->irq;
2637 cfg = desc->chip_data;
2638
2639 spin_lock_irqsave(&ioapic_lock, flags);
2640 __eoi_ioapic_irq(irq, cfg);
2641 spin_unlock_irqrestore(&ioapic_lock, flags);
2642}
2643
2686static void ir_ack_apic_edge(unsigned int irq) 2644static void ir_ack_apic_edge(unsigned int irq)
2687{ 2645{
2688#ifdef CONFIG_X86_X2APIC 2646 ack_APIC_irq();
2689 if (x2apic_enabled())
2690 return ack_x2apic_edge(irq);
2691#endif
2692 return ack_apic_edge(irq);
2693} 2647}
2694 2648
2695static void ir_ack_apic_level(unsigned int irq) 2649static void ir_ack_apic_level(unsigned int irq)
2696{ 2650{
2697#ifdef CONFIG_X86_X2APIC 2651 struct irq_desc *desc = irq_to_desc(irq);
2698 if (x2apic_enabled()) 2652
2699 return ack_x2apic_level(irq); 2653 ack_APIC_irq();
2700#endif 2654 eoi_ioapic_irq(desc);
2701 return ack_apic_level(irq);
2702} 2655}
2703#endif /* CONFIG_INTR_REMAP */ 2656#endif /* CONFIG_INTR_REMAP */
2704 2657
@@ -2903,7 +2856,7 @@ static inline void __init check_timer(void)
2903{ 2856{
2904 struct irq_desc *desc = irq_to_desc(0); 2857 struct irq_desc *desc = irq_to_desc(0);
2905 struct irq_cfg *cfg = desc->chip_data; 2858 struct irq_cfg *cfg = desc->chip_data;
2906 int cpu = boot_cpu_id; 2859 int node = cpu_to_node(boot_cpu_id);
2907 int apic1, pin1, apic2, pin2; 2860 int apic1, pin1, apic2, pin2;
2908 unsigned long flags; 2861 unsigned long flags;
2909 int no_pin1 = 0; 2862 int no_pin1 = 0;
@@ -2969,7 +2922,7 @@ static inline void __init check_timer(void)
2969 * Ok, does IRQ0 through the IOAPIC work? 2922 * Ok, does IRQ0 through the IOAPIC work?
2970 */ 2923 */
2971 if (no_pin1) { 2924 if (no_pin1) {
2972 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); 2925 add_pin_to_irq_node(cfg, node, apic1, pin1);
2973 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2926 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2974 } else { 2927 } else {
2975 /* for edge trigger, setup_IO_APIC_irq already 2928 /* for edge trigger, setup_IO_APIC_irq already
@@ -3006,7 +2959,7 @@ static inline void __init check_timer(void)
3006 /* 2959 /*
3007 * legacy devices should be connected to IO APIC #0 2960 * legacy devices should be connected to IO APIC #0
3008 */ 2961 */
3009 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); 2962 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
3010 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2963 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
3011 enable_8259A_irq(0); 2964 enable_8259A_irq(0);
3012 if (timer_irq_works()) { 2965 if (timer_irq_works()) {
@@ -3218,14 +3171,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
3218/* 3171/*
3219 * Dynamic irq allocate and deallocation 3172 * Dynamic irq allocate and deallocation
3220 */ 3173 */
3221unsigned int create_irq_nr(unsigned int irq_want) 3174unsigned int create_irq_nr(unsigned int irq_want, int node)
3222{ 3175{
3223 /* Allocate an unused irq */ 3176 /* Allocate an unused irq */
3224 unsigned int irq; 3177 unsigned int irq;
3225 unsigned int new; 3178 unsigned int new;
3226 unsigned long flags; 3179 unsigned long flags;
3227 struct irq_cfg *cfg_new = NULL; 3180 struct irq_cfg *cfg_new = NULL;
3228 int cpu = boot_cpu_id;
3229 struct irq_desc *desc_new = NULL; 3181 struct irq_desc *desc_new = NULL;
3230 3182
3231 irq = 0; 3183 irq = 0;
@@ -3234,7 +3186,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
3234 3186
3235 spin_lock_irqsave(&vector_lock, flags); 3187 spin_lock_irqsave(&vector_lock, flags);
3236 for (new = irq_want; new < nr_irqs; new++) { 3188 for (new = irq_want; new < nr_irqs; new++) {
3237 desc_new = irq_to_desc_alloc_cpu(new, cpu); 3189 desc_new = irq_to_desc_alloc_node(new, node);
3238 if (!desc_new) { 3190 if (!desc_new) {
3239 printk(KERN_INFO "can not get irq_desc for %d\n", new); 3191 printk(KERN_INFO "can not get irq_desc for %d\n", new);
3240 continue; 3192 continue;
@@ -3243,6 +3195,9 @@ unsigned int create_irq_nr(unsigned int irq_want)
3243 3195
3244 if (cfg_new->vector != 0) 3196 if (cfg_new->vector != 0)
3245 continue; 3197 continue;
3198
3199 desc_new = move_irq_desc(desc_new, node);
3200
3246 if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) 3201 if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
3247 irq = new; 3202 irq = new;
3248 break; 3203 break;
@@ -3260,11 +3215,12 @@ unsigned int create_irq_nr(unsigned int irq_want)
3260 3215
3261int create_irq(void) 3216int create_irq(void)
3262{ 3217{
3218 int node = cpu_to_node(boot_cpu_id);
3263 unsigned int irq_want; 3219 unsigned int irq_want;
3264 int irq; 3220 int irq;
3265 3221
3266 irq_want = nr_irqs_gsi; 3222 irq_want = nr_irqs_gsi;
3267 irq = create_irq_nr(irq_want); 3223 irq = create_irq_nr(irq_want, node);
3268 3224
3269 if (irq == 0) 3225 if (irq == 0)
3270 irq = -1; 3226 irq = -1;
@@ -3366,7 +3322,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3366} 3322}
3367 3323
3368#ifdef CONFIG_SMP 3324#ifdef CONFIG_SMP
3369static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3325static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3370{ 3326{
3371 struct irq_desc *desc = irq_to_desc(irq); 3327 struct irq_desc *desc = irq_to_desc(irq);
3372 struct irq_cfg *cfg; 3328 struct irq_cfg *cfg;
@@ -3375,7 +3331,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3375 3331
3376 dest = set_desc_affinity(desc, mask); 3332 dest = set_desc_affinity(desc, mask);
3377 if (dest == BAD_APICID) 3333 if (dest == BAD_APICID)
3378 return; 3334 return -1;
3379 3335
3380 cfg = desc->chip_data; 3336 cfg = desc->chip_data;
3381 3337
@@ -3387,13 +3343,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3387 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3343 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3388 3344
3389 write_msi_msg_desc(desc, &msg); 3345 write_msi_msg_desc(desc, &msg);
3346
3347 return 0;
3390} 3348}
3391#ifdef CONFIG_INTR_REMAP 3349#ifdef CONFIG_INTR_REMAP
3392/* 3350/*
3393 * Migrate the MSI irq to another cpumask. This migration is 3351 * Migrate the MSI irq to another cpumask. This migration is
3394 * done in the process context using interrupt-remapping hardware. 3352 * done in the process context using interrupt-remapping hardware.
3395 */ 3353 */
3396static void 3354static int
3397ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3355ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3398{ 3356{
3399 struct irq_desc *desc = irq_to_desc(irq); 3357 struct irq_desc *desc = irq_to_desc(irq);
@@ -3402,11 +3360,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3402 struct irte irte; 3360 struct irte irte;
3403 3361
3404 if (get_irte(irq, &irte)) 3362 if (get_irte(irq, &irte))
3405 return; 3363 return -1;
3406 3364
3407 dest = set_desc_affinity(desc, mask); 3365 dest = set_desc_affinity(desc, mask);
3408 if (dest == BAD_APICID) 3366 if (dest == BAD_APICID)
3409 return; 3367 return -1;
3410 3368
3411 irte.vector = cfg->vector; 3369 irte.vector = cfg->vector;
3412 irte.dest_id = IRTE_DEST(dest); 3370 irte.dest_id = IRTE_DEST(dest);
@@ -3423,6 +3381,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3423 */ 3381 */
3424 if (cfg->move_in_progress) 3382 if (cfg->move_in_progress)
3425 send_cleanup_vector(cfg); 3383 send_cleanup_vector(cfg);
3384
3385 return 0;
3426} 3386}
3427 3387
3428#endif 3388#endif
@@ -3518,15 +3478,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3518 unsigned int irq_want; 3478 unsigned int irq_want;
3519 struct intel_iommu *iommu = NULL; 3479 struct intel_iommu *iommu = NULL;
3520 int index = 0; 3480 int index = 0;
3481 int node;
3521 3482
3522 /* x86 doesn't support multiple MSI yet */ 3483 /* x86 doesn't support multiple MSI yet */
3523 if (type == PCI_CAP_ID_MSI && nvec > 1) 3484 if (type == PCI_CAP_ID_MSI && nvec > 1)
3524 return 1; 3485 return 1;
3525 3486
3487 node = dev_to_node(&dev->dev);
3526 irq_want = nr_irqs_gsi; 3488 irq_want = nr_irqs_gsi;
3527 sub_handle = 0; 3489 sub_handle = 0;
3528 list_for_each_entry(msidesc, &dev->msi_list, list) { 3490 list_for_each_entry(msidesc, &dev->msi_list, list) {
3529 irq = create_irq_nr(irq_want); 3491 irq = create_irq_nr(irq_want, node);
3530 if (irq == 0) 3492 if (irq == 0)
3531 return -1; 3493 return -1;
3532 irq_want = irq + 1; 3494 irq_want = irq + 1;
@@ -3576,7 +3538,7 @@ void arch_teardown_msi_irq(unsigned int irq)
3576 3538
3577#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) 3539#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
3578#ifdef CONFIG_SMP 3540#ifdef CONFIG_SMP
3579static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3541static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3580{ 3542{
3581 struct irq_desc *desc = irq_to_desc(irq); 3543 struct irq_desc *desc = irq_to_desc(irq);
3582 struct irq_cfg *cfg; 3544 struct irq_cfg *cfg;
@@ -3585,7 +3547,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3585 3547
3586 dest = set_desc_affinity(desc, mask); 3548 dest = set_desc_affinity(desc, mask);
3587 if (dest == BAD_APICID) 3549 if (dest == BAD_APICID)
3588 return; 3550 return -1;
3589 3551
3590 cfg = desc->chip_data; 3552 cfg = desc->chip_data;
3591 3553
@@ -3597,6 +3559,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3597 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3559 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3598 3560
3599 dmar_msi_write(irq, &msg); 3561 dmar_msi_write(irq, &msg);
3562
3563 return 0;
3600} 3564}
3601 3565
3602#endif /* CONFIG_SMP */ 3566#endif /* CONFIG_SMP */
@@ -3630,7 +3594,7 @@ int arch_setup_dmar_msi(unsigned int irq)
3630#ifdef CONFIG_HPET_TIMER 3594#ifdef CONFIG_HPET_TIMER
3631 3595
3632#ifdef CONFIG_SMP 3596#ifdef CONFIG_SMP
3633static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3597static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3634{ 3598{
3635 struct irq_desc *desc = irq_to_desc(irq); 3599 struct irq_desc *desc = irq_to_desc(irq);
3636 struct irq_cfg *cfg; 3600 struct irq_cfg *cfg;
@@ -3639,7 +3603,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3639 3603
3640 dest = set_desc_affinity(desc, mask); 3604 dest = set_desc_affinity(desc, mask);
3641 if (dest == BAD_APICID) 3605 if (dest == BAD_APICID)
3642 return; 3606 return -1;
3643 3607
3644 cfg = desc->chip_data; 3608 cfg = desc->chip_data;
3645 3609
@@ -3651,6 +3615,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3651 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3615 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3652 3616
3653 hpet_msi_write(irq, &msg); 3617 hpet_msi_write(irq, &msg);
3618
3619 return 0;
3654} 3620}
3655 3621
3656#endif /* CONFIG_SMP */ 3622#endif /* CONFIG_SMP */
@@ -3707,7 +3673,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3707 write_ht_irq_msg(irq, &msg); 3673 write_ht_irq_msg(irq, &msg);
3708} 3674}
3709 3675
3710static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) 3676static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3711{ 3677{
3712 struct irq_desc *desc = irq_to_desc(irq); 3678 struct irq_desc *desc = irq_to_desc(irq);
3713 struct irq_cfg *cfg; 3679 struct irq_cfg *cfg;
@@ -3715,11 +3681,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3715 3681
3716 dest = set_desc_affinity(desc, mask); 3682 dest = set_desc_affinity(desc, mask);
3717 if (dest == BAD_APICID) 3683 if (dest == BAD_APICID)
3718 return; 3684 return -1;
3719 3685
3720 cfg = desc->chip_data; 3686 cfg = desc->chip_data;
3721 3687
3722 target_ht_irq(irq, dest, cfg->vector); 3688 target_ht_irq(irq, dest, cfg->vector);
3689
3690 return 0;
3723} 3691}
3724 3692
3725#endif 3693#endif
@@ -3794,6 +3762,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3794 unsigned long flags; 3762 unsigned long flags;
3795 int err; 3763 int err;
3796 3764
3765 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3766
3797 cfg = irq_cfg(irq); 3767 cfg = irq_cfg(irq);
3798 3768
3799 err = assign_irq_vector(irq, cfg, eligible_cpu); 3769 err = assign_irq_vector(irq, cfg, eligible_cpu);
@@ -3807,15 +3777,13 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3807 3777
3808 mmr_value = 0; 3778 mmr_value = 0;
3809 entry = (struct uv_IO_APIC_route_entry *)&mmr_value; 3779 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3810 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); 3780 entry->vector = cfg->vector;
3811 3781 entry->delivery_mode = apic->irq_delivery_mode;
3812 entry->vector = cfg->vector; 3782 entry->dest_mode = apic->irq_dest_mode;
3813 entry->delivery_mode = apic->irq_delivery_mode; 3783 entry->polarity = 0;
3814 entry->dest_mode = apic->irq_dest_mode; 3784 entry->trigger = 0;
3815 entry->polarity = 0; 3785 entry->mask = 0;
3816 entry->trigger = 0; 3786 entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
3817 entry->mask = 0;
3818 entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
3819 3787
3820 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3788 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3821 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3789 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3833,10 +3801,10 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
3833 struct uv_IO_APIC_route_entry *entry; 3801 struct uv_IO_APIC_route_entry *entry;
3834 int mmr_pnode; 3802 int mmr_pnode;
3835 3803
3804 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3805
3836 mmr_value = 0; 3806 mmr_value = 0;
3837 entry = (struct uv_IO_APIC_route_entry *)&mmr_value; 3807 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3838 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3839
3840 entry->mask = 1; 3808 entry->mask = 1;
3841 3809
3842 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3810 mmr_pnode = uv_blade_to_pnode(mmr_blade);
@@ -3900,6 +3868,71 @@ int __init arch_probe_nr_irqs(void)
3900} 3868}
3901#endif 3869#endif
3902 3870
3871static int __io_apic_set_pci_routing(struct device *dev, int irq,
3872 struct io_apic_irq_attr *irq_attr)
3873{
3874 struct irq_desc *desc;
3875 struct irq_cfg *cfg;
3876 int node;
3877 int ioapic, pin;
3878 int trigger, polarity;
3879
3880 ioapic = irq_attr->ioapic;
3881 if (!IO_APIC_IRQ(irq)) {
3882 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3883 ioapic);
3884 return -EINVAL;
3885 }
3886
3887 if (dev)
3888 node = dev_to_node(dev);
3889 else
3890 node = cpu_to_node(boot_cpu_id);
3891
3892 desc = irq_to_desc_alloc_node(irq, node);
3893 if (!desc) {
3894 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3895 return 0;
3896 }
3897
3898 pin = irq_attr->ioapic_pin;
3899 trigger = irq_attr->trigger;
3900 polarity = irq_attr->polarity;
3901
3902 /*
3903 * IRQs < 16 are already in the irq_2_pin[] map
3904 */
3905 if (irq >= NR_IRQS_LEGACY) {
3906 cfg = desc->chip_data;
3907 add_pin_to_irq_node(cfg, node, ioapic, pin);
3908 }
3909
3910 setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
3911
3912 return 0;
3913}
3914
3915int io_apic_set_pci_routing(struct device *dev, int irq,
3916 struct io_apic_irq_attr *irq_attr)
3917{
3918 int ioapic, pin;
3919 /*
3920 * Avoid pin reprogramming. PRTs typically include entries
3921 * with redundant pin->gsi mappings (but unique PCI devices);
3922 * we only program the IOAPIC on the first.
3923 */
3924 ioapic = irq_attr->ioapic;
3925 pin = irq_attr->ioapic_pin;
3926 if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
3927 pr_debug("Pin %d-%d already programmed\n",
3928 mp_ioapics[ioapic].apicid, pin);
3929 return 0;
3930 }
3931 set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
3932
3933 return __io_apic_set_pci_routing(dev, irq, irq_attr);
3934}
3935
3903/* -------------------------------------------------------------------------- 3936/* --------------------------------------------------------------------------
3904 ACPI-based IOAPIC Configuration 3937 ACPI-based IOAPIC Configuration
3905 -------------------------------------------------------------------------- */ 3938 -------------------------------------------------------------------------- */
@@ -3980,6 +4013,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3980 4013
3981 return apic_id; 4014 return apic_id;
3982} 4015}
4016#endif
3983 4017
3984int __init io_apic_get_version(int ioapic) 4018int __init io_apic_get_version(int ioapic)
3985{ 4019{
@@ -3992,39 +4026,6 @@ int __init io_apic_get_version(int ioapic)
3992 4026
3993 return reg_01.bits.version; 4027 return reg_01.bits.version;
3994} 4028}
3995#endif
3996
3997int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
3998{
3999 struct irq_desc *desc;
4000 struct irq_cfg *cfg;
4001 int cpu = boot_cpu_id;
4002
4003 if (!IO_APIC_IRQ(irq)) {
4004 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
4005 ioapic);
4006 return -EINVAL;
4007 }
4008
4009 desc = irq_to_desc_alloc_cpu(irq, cpu);
4010 if (!desc) {
4011 printk(KERN_INFO "can not get irq_desc %d\n", irq);
4012 return 0;
4013 }
4014
4015 /*
4016 * IRQs < 16 are already in the irq_2_pin[] map
4017 */
4018 if (irq >= NR_IRQS_LEGACY) {
4019 cfg = desc->chip_data;
4020 add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
4021 }
4022
4023 setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
4024
4025 return 0;
4026}
4027
4028 4029
4029int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) 4030int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4030{ 4031{
@@ -4055,51 +4056,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4055#ifdef CONFIG_SMP 4056#ifdef CONFIG_SMP
4056void __init setup_ioapic_dest(void) 4057void __init setup_ioapic_dest(void)
4057{ 4058{
4058 int pin, ioapic, irq, irq_entry; 4059 int pin, ioapic = 0, irq, irq_entry;
4059 struct irq_desc *desc; 4060 struct irq_desc *desc;
4060 struct irq_cfg *cfg;
4061 const struct cpumask *mask; 4061 const struct cpumask *mask;
4062 4062
4063 if (skip_ioapic_setup == 1) 4063 if (skip_ioapic_setup == 1)
4064 return; 4064 return;
4065 4065
4066 for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { 4066#ifdef CONFIG_ACPI
4067 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { 4067 if (!acpi_disabled && acpi_ioapic) {
4068 irq_entry = find_irq_entry(ioapic, pin, mp_INT); 4068 ioapic = mp_find_ioapic(0);
4069 if (irq_entry == -1) 4069 if (ioapic < 0)
4070 continue; 4070 ioapic = 0;
4071 irq = pin_2_irq(irq_entry, ioapic, pin); 4071 }
4072 4072#endif
4073 /* setup_IO_APIC_irqs could fail to get vector for some device
4074 * when you have too many devices, because at that time only boot
4075 * cpu is online.
4076 */
4077 desc = irq_to_desc(irq);
4078 cfg = desc->chip_data;
4079 if (!cfg->vector) {
4080 setup_IO_APIC_irq(ioapic, pin, irq, desc,
4081 irq_trigger(irq_entry),
4082 irq_polarity(irq_entry));
4083 continue;
4084 4073
4085 } 4074 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
4075 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
4076 if (irq_entry == -1)
4077 continue;
4078 irq = pin_2_irq(irq_entry, ioapic, pin);
4086 4079
4087 /* 4080 desc = irq_to_desc(irq);
4088 * Honour affinities which have been set in early boot
4089 */
4090 if (desc->status &
4091 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4092 mask = desc->affinity;
4093 else
4094 mask = apic->target_cpus();
4095 4081
4096 if (intr_remapping_enabled) 4082 /*
4097 set_ir_ioapic_affinity_irq_desc(desc, mask); 4083 * Honour affinities which have been set in early boot
4098 else 4084 */
4099 set_ioapic_affinity_irq_desc(desc, mask); 4085 if (desc->status &
4100 } 4086 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4087 mask = desc->affinity;
4088 else
4089 mask = apic->target_cpus();
4101 4090
4091 if (intr_remapping_enabled)
4092 set_ir_ioapic_affinity_irq_desc(desc, mask);
4093 else
4094 set_ioapic_affinity_irq_desc(desc, mask);
4102 } 4095 }
4096
4103} 4097}
4104#endif 4098#endif
4105 4099
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index ce4fbfa315a1..a691302dc3ff 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -104,7 +104,7 @@ static __init void nmi_cpu_busy(void *data)
104} 104}
105#endif 105#endif
106 106
107static void report_broken_nmi(int cpu, int *prev_nmi_count) 107static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
108{ 108{
109 printk(KERN_CONT "\n"); 109 printk(KERN_CONT "\n");
110 110
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 01eda2ac65e4..440a8bccd91a 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -160,7 +160,6 @@ extern struct apic apic_summit;
160extern struct apic apic_bigsmp; 160extern struct apic apic_bigsmp;
161extern struct apic apic_es7000; 161extern struct apic apic_es7000;
162extern struct apic apic_es7000_cluster; 162extern struct apic apic_es7000_cluster;
163extern struct apic apic_default;
164 163
165struct apic *apic = &apic_default; 164struct apic *apic = &apic_default;
166EXPORT_SYMBOL_GPL(apic); 165EXPORT_SYMBOL_GPL(apic);
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 1783652bb0e5..bc3e880f9b82 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = {
50void __init default_setup_apic_routing(void) 50void __init default_setup_apic_routing(void)
51{ 51{
52#ifdef CONFIG_X86_X2APIC 52#ifdef CONFIG_X86_X2APIC
53 if (x2apic && (apic != &apic_x2apic_phys && 53 if (x2apic_mode && (apic != &apic_x2apic_phys &&
54#ifdef CONFIG_X86_UV 54#ifdef CONFIG_X86_UV
55 apic != &apic_x2apic_uv_x && 55 apic != &apic_x2apic_uv_x &&
56#endif 56#endif
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9cfe1f415d81..344eee4ac0a4 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -173,13 +173,6 @@ static inline int is_WPEG(struct rio_detail *rio){
173 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); 173 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
174} 174}
175 175
176
177/* In clustered mode, the high nibble of APIC ID is a cluster number.
178 * The low nibble is a 4-bit bitmap. */
179#define XAPIC_DEST_CPUS_SHIFT 4
180#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
181#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
182
183#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) 176#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
184 177
185static const struct cpumask *summit_target_cpus(void) 178static const struct cpumask *summit_target_cpus(void)
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 4a903e2f0d17..8e4cbb255c38 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -10,7 +10,7 @@
10#include <asm/apic.h> 10#include <asm/apic.h>
11#include <asm/ipi.h> 11#include <asm/ipi.h>
12 12
13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); 13static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14 14
15static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 15static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{ 16{
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 2bda69352976..ef0ae207a7c8 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -105,7 +105,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
105 cpumask_set_cpu(cpu, retmask); 105 cpumask_set_cpu(cpu, retmask);
106} 106}
107 107
108static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) 108static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
109{ 109{
110#ifdef CONFIG_SMP 110#ifdef CONFIG_SMP
111 unsigned long val; 111 unsigned long val;
@@ -562,7 +562,7 @@ void __init uv_system_init(void)
562 union uvh_node_id_u node_id; 562 union uvh_node_id_u node_id;
563 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; 563 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
564 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; 564 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
565 int max_pnode = 0; 565 int gnode_extra, max_pnode = 0;
566 unsigned long mmr_base, present, paddr; 566 unsigned long mmr_base, present, paddr;
567 unsigned short pnode_mask; 567 unsigned short pnode_mask;
568 568
@@ -574,6 +574,13 @@ void __init uv_system_init(void)
574 mmr_base = 574 mmr_base =
575 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & 575 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
576 ~UV_MMR_ENABLE; 576 ~UV_MMR_ENABLE;
577 pnode_mask = (1 << n_val) - 1;
578 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
579 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
580 gnode_upper = ((unsigned long)gnode_extra << m_val);
581 printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n",
582 n_val, m_val, gnode_upper, gnode_extra);
583
577 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); 584 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
578 585
579 for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) 586 for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
@@ -583,15 +590,18 @@ void __init uv_system_init(void)
583 590
584 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 591 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
585 uv_blade_info = kmalloc(bytes, GFP_KERNEL); 592 uv_blade_info = kmalloc(bytes, GFP_KERNEL);
593 BUG_ON(!uv_blade_info);
586 594
587 get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); 595 get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
588 596
589 bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); 597 bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
590 uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); 598 uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
599 BUG_ON(!uv_node_to_blade);
591 memset(uv_node_to_blade, 255, bytes); 600 memset(uv_node_to_blade, 255, bytes);
592 601
593 bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); 602 bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
594 uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); 603 uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
604 BUG_ON(!uv_cpu_to_blade);
595 memset(uv_cpu_to_blade, 255, bytes); 605 memset(uv_cpu_to_blade, 255, bytes);
596 606
597 blade = 0; 607 blade = 0;
@@ -607,11 +617,6 @@ void __init uv_system_init(void)
607 } 617 }
608 } 618 }
609 619
610 pnode_mask = (1 << n_val) - 1;
611 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
612 gnode_upper = (((unsigned long)node_id.s.node_id) &
613 ~((1 << n_val) - 1)) << m_val;
614
615 uv_bios_init(); 620 uv_bios_init();
616 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, 621 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
617 &sn_coherency_id, &sn_region_size); 622 &sn_coherency_id, &sn_region_size);
@@ -634,6 +639,7 @@ void __init uv_system_init(void)
634 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; 639 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
635 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; 640 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
636 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 641 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
642 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
637 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 643 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
638 uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; 644 uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
639 uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; 645 uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 5a6aa1c1162f..1a830cbd7015 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -146,4 +146,5 @@ void foo(void)
146 OFFSET(BP_loadflags, boot_params, hdr.loadflags); 146 OFFSET(BP_loadflags, boot_params, hdr.loadflags);
147 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); 147 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
148 OFFSET(BP_version, boot_params, hdr.version); 148 OFFSET(BP_version, boot_params, hdr.version);
149 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
149} 150}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index e72f062fb4b5..898ecc47e129 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -125,6 +125,7 @@ int main(void)
125 OFFSET(BP_loadflags, boot_params, hdr.loadflags); 125 OFFSET(BP_loadflags, boot_params, hdr.loadflags);
126 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); 126 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
127 OFFSET(BP_version, boot_params, hdr.version); 127 OFFSET(BP_version, boot_params, hdr.version);
128 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
128 129
129 BLANK(); 130 BLANK();
130 DEFINE(PAGE_SIZE_asm, PAGE_SIZE); 131 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7e4a459daa64..e5b27d8f1b47 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -6,6 +6,7 @@
6#include <asm/processor.h> 6#include <asm/processor.h>
7#include <asm/apic.h> 7#include <asm/apic.h>
8#include <asm/cpu.h> 8#include <asm/cpu.h>
9#include <asm/pci-direct.h>
9 10
10#ifdef CONFIG_X86_64 11#ifdef CONFIG_X86_64
11# include <asm/numa_64.h> 12# include <asm/numa_64.h>
@@ -272,7 +273,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
272#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 273#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
273 int cpu = smp_processor_id(); 274 int cpu = smp_processor_id();
274 int node; 275 int node;
275 unsigned apicid = hard_smp_processor_id(); 276 unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
276 277
277 node = c->phys_proc_id; 278 node = c->phys_proc_id;
278 if (apicid_to_node[apicid] != NUMA_NO_NODE) 279 if (apicid_to_node[apicid] != NUMA_NO_NODE)
@@ -351,6 +352,15 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
351 (c->x86_model == 8 && c->x86_mask >= 8)) 352 (c->x86_model == 8 && c->x86_mask >= 8))
352 set_cpu_cap(c, X86_FEATURE_K6_MTRR); 353 set_cpu_cap(c, X86_FEATURE_K6_MTRR);
353#endif 354#endif
355#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
356 /* check CPU config space for extended APIC ID */
357 if (c->x86 >= 0xf) {
358 unsigned int val;
359 val = read_pci_config(0, 24, 0, 0x68);
360 if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
361 set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
362 }
363#endif
354} 364}
355 365
356static void __cpuinit init_amd(struct cpuinfo_x86 *c) 366static void __cpuinit init_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c1caefc82e62..b0517aa2bd3b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -114,6 +114,13 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
114} }; 114} };
115EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 115EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
116 116
117static int __init x86_xsave_setup(char *s)
118{
119 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
120 return 1;
121}
122__setup("noxsave", x86_xsave_setup);
123
117#ifdef CONFIG_X86_32 124#ifdef CONFIG_X86_32
118static int cachesize_override __cpuinitdata = -1; 125static int cachesize_override __cpuinitdata = -1;
119static int disable_x86_serial_nr __cpuinitdata = 1; 126static int disable_x86_serial_nr __cpuinitdata = 1;
@@ -292,7 +299,8 @@ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
292 return NULL; /* Not found */ 299 return NULL; /* Not found */
293} 300}
294 301
295__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; 302__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
303__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
296 304
297void load_percpu_segment(int cpu) 305void load_percpu_segment(int cpu)
298{ 306{
@@ -761,6 +769,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
761 if (this_cpu->c_identify) 769 if (this_cpu->c_identify)
762 this_cpu->c_identify(c); 770 this_cpu->c_identify(c);
763 771
772 /* Clear/Set all flags overriden by options, after probe */
773 for (i = 0; i < NCAPINTS; i++) {
774 c->x86_capability[i] &= ~cpu_caps_cleared[i];
775 c->x86_capability[i] |= cpu_caps_set[i];
776 }
777
764#ifdef CONFIG_X86_64 778#ifdef CONFIG_X86_64
765 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); 779 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
766#endif 780#endif
@@ -806,6 +820,16 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
806#endif 820#endif
807 821
808 init_hypervisor(c); 822 init_hypervisor(c);
823
824 /*
825 * Clear/Set all flags overriden by options, need do it
826 * before following smp all cpus cap AND.
827 */
828 for (i = 0; i < NCAPINTS; i++) {
829 c->x86_capability[i] &= ~cpu_caps_cleared[i];
830 c->x86_capability[i] |= cpu_caps_set[i];
831 }
832
809 /* 833 /*
810 * On SMP, boot_cpu_data holds the common feature set between 834 * On SMP, boot_cpu_data holds the common feature set between
811 * all CPUs; so make sure that we indicate which features are 835 * all CPUs; so make sure that we indicate which features are
@@ -818,10 +842,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
818 boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; 842 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
819 } 843 }
820 844
821 /* Clear all flags overriden by options */
822 for (i = 0; i < NCAPINTS; i++)
823 c->x86_capability[i] &= ~cleared_cpu_caps[i];
824
825#ifdef CONFIG_X86_MCE 845#ifdef CONFIG_X86_MCE
826 /* Init Machine Check Exception if available. */ 846 /* Init Machine Check Exception if available. */
827 mcheck_init(c); 847 mcheck_init(c);
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 46e29ab96c6a..6b2a52dd0403 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -32,9 +32,7 @@
32 32
33static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); 33static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
34static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); 34static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
35static DEFINE_PER_CPU(unsigned, cpu_modelflag);
36static DEFINE_PER_CPU(int, cpu_priv_count); 35static DEFINE_PER_CPU(int, cpu_priv_count);
37static DEFINE_PER_CPU(unsigned, cpu_model);
38 36
39static DEFINE_MUTEX(cpu_debug_lock); 37static DEFINE_MUTEX(cpu_debug_lock);
40 38
@@ -80,302 +78,102 @@ static struct cpu_file_base cpu_file[] = {
80 { "value", CPU_REG_ALL, 1 }, 78 { "value", CPU_REG_ALL, 1 },
81}; 79};
82 80
83/* Intel Registers Range */ 81/* CPU Registers Range */
84static struct cpu_debug_range cpu_intel_range[] = { 82static struct cpu_debug_range cpu_reg_range[] = {
85 { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL }, 83 { 0x00000000, 0x00000001, CPU_MC, },
86 { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE }, 84 { 0x00000006, 0x00000007, CPU_MONITOR, },
87 { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL }, 85 { 0x00000010, 0x00000010, CPU_TIME, },
88 { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM }, 86 { 0x00000011, 0x00000013, CPU_PMC, },
89 { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE }, 87 { 0x00000017, 0x00000017, CPU_PLATFORM, },
90 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE }, 88 { 0x0000001B, 0x0000001B, CPU_APIC, },
91 89 { 0x0000002A, 0x0000002B, CPU_POWERON, },
92 { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE }, 90 { 0x0000002C, 0x0000002C, CPU_FREQ, },
93 { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON }, 91 { 0x0000003A, 0x0000003A, CPU_CONTROL, },
94 { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON }, 92 { 0x00000040, 0x00000047, CPU_LBRANCH, },
95 { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE }, 93 { 0x00000060, 0x00000067, CPU_LBRANCH, },
96 94 { 0x00000079, 0x00000079, CPU_BIOS, },
97 { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE }, 95 { 0x00000088, 0x0000008A, CPU_CACHE, },
98 { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT }, 96 { 0x0000008B, 0x0000008B, CPU_BIOS, },
99 { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT }, 97 { 0x0000009B, 0x0000009B, CPU_MONITOR, },
100 { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM }, 98 { 0x000000C1, 0x000000C4, CPU_PMC, },
101 99 { 0x000000CD, 0x000000CD, CPU_FREQ, },
102 { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE }, 100 { 0x000000E7, 0x000000E8, CPU_PERF, },
103 { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 }, 101 { 0x000000FE, 0x000000FE, CPU_MTRR, },
104 { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE }, 102
105 { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON }, 103 { 0x00000116, 0x0000011E, CPU_CACHE, },
106 104 { 0x00000174, 0x00000176, CPU_SYSENTER, },
107 { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT }, 105 { 0x00000179, 0x0000017B, CPU_MC, },
108 { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT }, 106 { 0x00000186, 0x00000189, CPU_PMC, },
109 { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT }, 107 { 0x00000198, 0x00000199, CPU_PERF, },
110 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE }, 108 { 0x0000019A, 0x0000019A, CPU_TIME, },
111 109 { 0x0000019B, 0x0000019D, CPU_THERM, },
112 { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 }, 110 { 0x000001A0, 0x000001A0, CPU_MISC, },
113 { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 }, 111 { 0x000001C9, 0x000001C9, CPU_LBRANCH, },
114 { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX }, 112 { 0x000001D7, 0x000001D8, CPU_LBRANCH, },
115 { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 }, 113 { 0x000001D9, 0x000001D9, CPU_DEBUG, },
116 { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT }, 114 { 0x000001DA, 0x000001E0, CPU_LBRANCH, },
117 115
118 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE }, 116 { 0x00000200, 0x0000020F, CPU_MTRR, },
119 { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE }, 117 { 0x00000250, 0x00000250, CPU_MTRR, },
120 { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE }, 118 { 0x00000258, 0x00000259, CPU_MTRR, },
121 { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT }, 119 { 0x00000268, 0x0000026F, CPU_MTRR, },
122 { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE }, 120 { 0x00000277, 0x00000277, CPU_PAT, },
123 { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE }, 121 { 0x000002FF, 0x000002FF, CPU_MTRR, },
124 { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE }, 122
125 { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE }, 123 { 0x00000300, 0x00000311, CPU_PMC, },
126 124 { 0x00000345, 0x00000345, CPU_PMC, },
127 { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT }, 125 { 0x00000360, 0x00000371, CPU_PMC, },
128 { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON }, 126 { 0x0000038D, 0x00000390, CPU_PMC, },
129 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE }, 127 { 0x000003A0, 0x000003BE, CPU_PMC, },
130 { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON }, 128 { 0x000003C0, 0x000003CD, CPU_PMC, },
131 { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE }, 129 { 0x000003E0, 0x000003E1, CPU_PMC, },
132 { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 }, 130 { 0x000003F0, 0x000003F2, CPU_PMC, },
133 { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE }, 131
134 { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 }, 132 { 0x00000400, 0x00000417, CPU_MC, },
135 133 { 0x00000480, 0x0000048B, CPU_VMX, },
136 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE }, 134
137 { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE }, 135 { 0x00000600, 0x00000600, CPU_DEBUG, },
138 { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE }, 136 { 0x00000680, 0x0000068F, CPU_LBRANCH, },
139 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE }, 137 { 0x000006C0, 0x000006CF, CPU_LBRANCH, },
140 { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE }, 138
141 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE }, 139 { 0x000107CC, 0x000107D3, CPU_PMC, },
142 140
143 { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON }, 141 { 0xC0000080, 0xC0000080, CPU_FEATURES, },
144 { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE }, 142 { 0xC0000081, 0xC0000084, CPU_CALL, },
145 { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON }, 143 { 0xC0000100, 0xC0000102, CPU_BASE, },
146 { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT }, 144 { 0xC0000103, 0xC0000103, CPU_TIME, },
147 { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON }, 145
148 { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT }, 146 { 0xC0010000, 0xC0010007, CPU_PMC, },
149 { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON }, 147 { 0xC0010010, 0xC0010010, CPU_CONF, },
150 { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON }, 148 { 0xC0010015, 0xC0010015, CPU_CONF, },
151 { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON }, 149 { 0xC0010016, 0xC001001A, CPU_MTRR, },
152 { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON }, 150 { 0xC001001D, 0xC001001D, CPU_MTRR, },
153 { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE }, 151 { 0xC001001F, 0xC001001F, CPU_CONF, },
154 { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON }, 152 { 0xC0010030, 0xC0010035, CPU_BIOS, },
155 153 { 0xC0010044, 0xC0010048, CPU_MC, },
156 { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE }, 154 { 0xC0010050, 0xC0010056, CPU_SMM, },
157 { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON }, 155 { 0xC0010058, 0xC0010058, CPU_CONF, },
158 { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE }, 156 { 0xC0010060, 0xC0010060, CPU_CACHE, },
159 { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON }, 157 { 0xC0010061, 0xC0010068, CPU_SMM, },
160 { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE }, 158 { 0xC0010069, 0xC001006B, CPU_SMM, },
161 { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON }, 159 { 0xC0010070, 0xC0010071, CPU_SMM, },
162 { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE }, 160 { 0xC0010111, 0xC0010113, CPU_SMM, },
163 { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON }, 161 { 0xC0010114, 0xC0010118, CPU_SVM, },
164 { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE }, 162 { 0xC0010140, 0xC0010141, CPU_OSVM, },
165 { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE }, 163 { 0xC0011022, 0xC0011023, CPU_CONF, },
166 { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE },
167
168 { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE },
169 { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON },
170 { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON },
171
172 { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP },
173
174 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON },
175 { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON },
176 { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON },
177 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON },
178}; 164};
179 165
180/* AMD Registers Range */
181static struct cpu_debug_range cpu_amd_range[] = {
182 { 0x00000000, 0x00000001, CPU_MC, CPU_K10_PLUS, },
183 { 0x00000010, 0x00000010, CPU_TIME, CPU_K8_PLUS, },
184 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_K8_PLUS, },
185 { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_K7_PLUS },
186 { 0x0000008B, 0x0000008B, CPU_VER, CPU_K8_PLUS },
187 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_K8_PLUS, },
188
189 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_K8_PLUS, },
190 { 0x00000179, 0x0000017B, CPU_MC, CPU_K8_PLUS, },
191 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_K8_PLUS, },
192 { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_K8_PLUS, },
193
194 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_K8_PLUS, },
195 { 0x00000250, 0x00000250, CPU_MTRR, CPU_K8_PLUS, },
196 { 0x00000258, 0x00000259, CPU_MTRR, CPU_K8_PLUS, },
197 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_K8_PLUS, },
198 { 0x00000277, 0x00000277, CPU_PAT, CPU_K8_PLUS, },
199 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_K8_PLUS, },
200
201 { 0x00000400, 0x00000413, CPU_MC, CPU_K8_PLUS, },
202
203 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_AMD_ALL, },
204 { 0xC0000081, 0xC0000084, CPU_CALL, CPU_K8_PLUS, },
205 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_K8_PLUS, },
206 { 0xC0000103, 0xC0000103, CPU_TIME, CPU_K10_PLUS, },
207
208 { 0xC0010000, 0xC0010007, CPU_PMC, CPU_K8_PLUS, },
209 { 0xC0010010, 0xC0010010, CPU_CONF, CPU_K7_PLUS, },
210 { 0xC0010015, 0xC0010015, CPU_CONF, CPU_K7_PLUS, },
211 { 0xC0010016, 0xC001001A, CPU_MTRR, CPU_K8_PLUS, },
212 { 0xC001001D, 0xC001001D, CPU_MTRR, CPU_K8_PLUS, },
213 { 0xC001001F, 0xC001001F, CPU_CONF, CPU_K8_PLUS, },
214 { 0xC0010030, 0xC0010035, CPU_BIOS, CPU_K8_PLUS, },
215 { 0xC0010044, 0xC0010048, CPU_MC, CPU_K8_PLUS, },
216 { 0xC0010050, 0xC0010056, CPU_SMM, CPU_K0F_PLUS, },
217 { 0xC0010058, 0xC0010058, CPU_CONF, CPU_K10_PLUS, },
218 { 0xC0010060, 0xC0010060, CPU_CACHE, CPU_AMD_11, },
219 { 0xC0010061, 0xC0010068, CPU_SMM, CPU_K10_PLUS, },
220 { 0xC0010069, 0xC001006B, CPU_SMM, CPU_AMD_11, },
221 { 0xC0010070, 0xC0010071, CPU_SMM, CPU_K10_PLUS, },
222 { 0xC0010111, 0xC0010113, CPU_SMM, CPU_K8_PLUS, },
223 { 0xC0010114, 0xC0010118, CPU_SVM, CPU_K10_PLUS, },
224 { 0xC0010140, 0xC0010141, CPU_OSVM, CPU_K10_PLUS, },
225 { 0xC0011022, 0xC0011023, CPU_CONF, CPU_K10_PLUS, },
226};
227
228
229/* Intel */
230static int get_intel_modelflag(unsigned model)
231{
232 int flag;
233
234 switch (model) {
235 case 0x0501:
236 case 0x0502:
237 case 0x0504:
238 flag = CPU_INTEL_PENTIUM;
239 break;
240 case 0x0601:
241 case 0x0603:
242 case 0x0605:
243 case 0x0607:
244 case 0x0608:
245 case 0x060A:
246 case 0x060B:
247 flag = CPU_INTEL_P6;
248 break;
249 case 0x0609:
250 case 0x060D:
251 flag = CPU_INTEL_PENTIUM_M;
252 break;
253 case 0x060E:
254 flag = CPU_INTEL_CORE;
255 break;
256 case 0x060F:
257 case 0x0617:
258 flag = CPU_INTEL_CORE2;
259 break;
260 case 0x061C:
261 flag = CPU_INTEL_ATOM;
262 break;
263 case 0x0F00:
264 case 0x0F01:
265 case 0x0F02:
266 case 0x0F03:
267 case 0x0F04:
268 flag = CPU_INTEL_XEON_P4;
269 break;
270 case 0x0F06:
271 flag = CPU_INTEL_XEON_MP;
272 break;
273 default:
274 flag = CPU_NONE;
275 break;
276 }
277
278 return flag;
279}
280
281/* AMD */
282static int get_amd_modelflag(unsigned model)
283{
284 int flag;
285
286 switch (model >> 8) {
287 case 0x6:
288 flag = CPU_AMD_K6;
289 break;
290 case 0x7:
291 flag = CPU_AMD_K7;
292 break;
293 case 0x8:
294 flag = CPU_AMD_K8;
295 break;
296 case 0xf:
297 flag = CPU_AMD_0F;
298 break;
299 case 0x10:
300 flag = CPU_AMD_10;
301 break;
302 case 0x11:
303 flag = CPU_AMD_11;
304 break;
305 default:
306 flag = CPU_NONE;
307 break;
308 }
309
310 return flag;
311}
312
313static int get_cpu_modelflag(unsigned cpu)
314{
315 int flag;
316
317 flag = per_cpu(cpu_model, cpu);
318
319 switch (flag >> 16) {
320 case X86_VENDOR_INTEL:
321 flag = get_intel_modelflag(flag);
322 break;
323 case X86_VENDOR_AMD:
324 flag = get_amd_modelflag(flag & 0xffff);
325 break;
326 default:
327 flag = CPU_NONE;
328 break;
329 }
330
331 return flag;
332}
333
334static int get_cpu_range_count(unsigned cpu)
335{
336 int index;
337
338 switch (per_cpu(cpu_model, cpu) >> 16) {
339 case X86_VENDOR_INTEL:
340 index = ARRAY_SIZE(cpu_intel_range);
341 break;
342 case X86_VENDOR_AMD:
343 index = ARRAY_SIZE(cpu_amd_range);
344 break;
345 default:
346 index = 0;
347 break;
348 }
349
350 return index;
351}
352
353static int is_typeflag_valid(unsigned cpu, unsigned flag) 166static int is_typeflag_valid(unsigned cpu, unsigned flag)
354{ 167{
355 unsigned vendor, modelflag; 168 int i;
356 int i, index;
357 169
358 /* Standard Registers should be always valid */ 170 /* Standard Registers should be always valid */
359 if (flag >= CPU_TSS) 171 if (flag >= CPU_TSS)
360 return 1; 172 return 1;
361 173
362 modelflag = per_cpu(cpu_modelflag, cpu); 174 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
363 vendor = per_cpu(cpu_model, cpu) >> 16; 175 if (cpu_reg_range[i].flag == flag)
364 index = get_cpu_range_count(cpu); 176 return 1;
365
366 for (i = 0; i < index; i++) {
367 switch (vendor) {
368 case X86_VENDOR_INTEL:
369 if ((cpu_intel_range[i].model & modelflag) &&
370 (cpu_intel_range[i].flag & flag))
371 return 1;
372 break;
373 case X86_VENDOR_AMD:
374 if ((cpu_amd_range[i].model & modelflag) &&
375 (cpu_amd_range[i].flag & flag))
376 return 1;
377 break;
378 }
379 } 177 }
380 178
381 /* Invalid */ 179 /* Invalid */
@@ -385,26 +183,11 @@ static int is_typeflag_valid(unsigned cpu, unsigned flag)
385static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, 183static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
386 int index, unsigned flag) 184 int index, unsigned flag)
387{ 185{
388 unsigned modelflag; 186 if (cpu_reg_range[index].flag == flag) {
389 187 *min = cpu_reg_range[index].min;
390 modelflag = per_cpu(cpu_modelflag, cpu); 188 *max = cpu_reg_range[index].max;
391 *max = 0; 189 } else
392 switch (per_cpu(cpu_model, cpu) >> 16) { 190 *max = 0;
393 case X86_VENDOR_INTEL:
394 if ((cpu_intel_range[index].model & modelflag) &&
395 (cpu_intel_range[index].flag & flag)) {
396 *min = cpu_intel_range[index].min;
397 *max = cpu_intel_range[index].max;
398 }
399 break;
400 case X86_VENDOR_AMD:
401 if ((cpu_amd_range[index].model & modelflag) &&
402 (cpu_amd_range[index].flag & flag)) {
403 *min = cpu_amd_range[index].min;
404 *max = cpu_amd_range[index].max;
405 }
406 break;
407 }
408 191
409 return *max; 192 return *max;
410} 193}
@@ -434,7 +217,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
434 unsigned msr, msr_min, msr_max; 217 unsigned msr, msr_min, msr_max;
435 struct cpu_private *priv; 218 struct cpu_private *priv;
436 u32 low, high; 219 u32 low, high;
437 int i, range; 220 int i;
438 221
439 if (seq) { 222 if (seq) {
440 priv = seq->private; 223 priv = seq->private;
@@ -446,9 +229,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
446 } 229 }
447 } 230 }
448 231
449 range = get_cpu_range_count(cpu); 232 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
450
451 for (i = 0; i < range; i++) {
452 if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) 233 if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
453 continue; 234 continue;
454 235
@@ -588,8 +369,20 @@ static void print_apic(void *arg)
588 seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); 369 seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
589 seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); 370 seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
590 seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); 371 seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
591#endif /* CONFIG_X86_LOCAL_APIC */ 372 if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
373 unsigned int i, v, maxeilvt;
374
375 v = apic_read(APIC_EFEAT);
376 maxeilvt = (v >> 16) & 0xff;
377 seq_printf(seq, " EFEAT\t\t: %08x\n", v);
378 seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL));
592 379
380 for (i = 0; i < maxeilvt; i++) {
381 v = apic_read(APIC_EILVTn(i));
382 seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v);
383 }
384 }
385#endif /* CONFIG_X86_LOCAL_APIC */
593 seq_printf(seq, "\n MSR\t:\n"); 386 seq_printf(seq, "\n MSR\t:\n");
594} 387}
595 388
@@ -788,13 +581,11 @@ static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
788{ 581{
789 struct dentry *cpu_dentry = NULL; 582 struct dentry *cpu_dentry = NULL;
790 unsigned reg, reg_min, reg_max; 583 unsigned reg, reg_min, reg_max;
791 int i, range, err = 0; 584 int i, err = 0;
792 char reg_dir[12]; 585 char reg_dir[12];
793 u32 low, high; 586 u32 low, high;
794 587
795 range = get_cpu_range_count(cpu); 588 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
796
797 for (i = 0; i < range; i++) {
798 if (!get_cpu_range(cpu, &reg_min, &reg_max, i, 589 if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
799 cpu_base[type].flag)) 590 cpu_base[type].flag))
800 continue; 591 continue;
@@ -850,10 +641,6 @@ static int cpu_init_cpu(void)
850 cpui = &cpu_data(cpu); 641 cpui = &cpu_data(cpu);
851 if (!cpu_has(cpui, X86_FEATURE_MSR)) 642 if (!cpu_has(cpui, X86_FEATURE_MSR))
852 continue; 643 continue;
853 per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
854 (cpui->x86 << 8) |
855 (cpui->x86_model));
856 per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
857 644
858 sprintf(cpu_dir, "cpu%d", cpu); 645 sprintf(cpu_dir, "cpu%d", cpu);
859 cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); 646 cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 52c839875478..f138c6c389b9 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -220,11 +220,14 @@ config X86_LONGHAUL
220 If in doubt, say N. 220 If in doubt, say N.
221 221
222config X86_E_POWERSAVER 222config X86_E_POWERSAVER
223 tristate "VIA C7 Enhanced PowerSaver" 223 tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
224 select CPU_FREQ_TABLE 224 select CPU_FREQ_TABLE
225 depends on X86_32 225 depends on X86_32 && EXPERIMENTAL
226 help 226 help
227 This adds the CPUFreq driver for VIA C7 processors. 227 This adds the CPUFreq driver for VIA C7 processors. However, this driver
228 does not have any safeguards to prevent operating the CPU out of spec
229 and is thus considered dangerous. Please use the regular ACPI cpufreq
230 driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
228 231
229 If in doubt, say N. 232 If in doubt, say N.
230 233
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 208ecf6643df..ae9b503220ca 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -90,11 +90,7 @@ static int check_est_cpu(unsigned int cpuid)
90{ 90{
91 struct cpuinfo_x86 *cpu = &cpu_data(cpuid); 91 struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
92 92
93 if (cpu->x86_vendor != X86_VENDOR_INTEL || 93 return cpu_has(cpu, X86_FEATURE_EST);
94 !cpu_has(cpu, X86_FEATURE_EST))
95 return 0;
96
97 return 1;
98} 94}
99 95
100static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) 96static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
@@ -550,7 +546,7 @@ static int __init acpi_cpufreq_early_init(void)
550 return -ENOMEM; 546 return -ENOMEM;
551 } 547 }
552 for_each_possible_cpu(i) { 548 for_each_possible_cpu(i) {
553 if (!alloc_cpumask_var_node( 549 if (!zalloc_cpumask_var_node(
554 &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, 550 &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
555 GFP_KERNEL, cpu_to_node(i))) { 551 GFP_KERNEL, cpu_to_node(i))) {
556 552
@@ -693,8 +689,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
693 if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && 689 if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
694 policy->cpuinfo.transition_latency > 20 * 1000) { 690 policy->cpuinfo.transition_latency > 20 * 1000) {
695 policy->cpuinfo.transition_latency = 20 * 1000; 691 policy->cpuinfo.transition_latency = 20 * 1000;
696 printk_once(KERN_INFO "Capping off P-state tranision" 692 printk_once(KERN_INFO
697 " latency at 20 uS\n"); 693 "P-state transition latency capped at 20 uS\n");
698 } 694 }
699 695
700 /* table init */ 696 /* table init */
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 6ac55bd341ae..869615193720 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -168,6 +168,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
168 case 0x0E: /* Core */ 168 case 0x0E: /* Core */
169 case 0x0F: /* Core Duo */ 169 case 0x0F: /* Core Duo */
170 case 0x16: /* Celeron Core */ 170 case 0x16: /* Celeron Core */
171 case 0x1C: /* Atom */
171 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; 172 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
172 return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE); 173 return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
173 case 0x0D: /* Pentium M (Dothan) */ 174 case 0x0D: /* Pentium M (Dothan) */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 3c28ccd49742..d47c775eb0ab 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -168,10 +168,12 @@ static int check_powernow(void)
168 return 1; 168 return 1;
169} 169}
170 170
171#ifdef CONFIG_X86_POWERNOW_K7_ACPI
171static void invalidate_entry(unsigned int entry) 172static void invalidate_entry(unsigned int entry)
172{ 173{
173 powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; 174 powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
174} 175}
176#endif
175 177
176static int get_ranges(unsigned char *pst) 178static int get_ranges(unsigned char *pst)
177{ 179{
@@ -320,7 +322,7 @@ static int powernow_acpi_init(void)
320 goto err0; 322 goto err0;
321 } 323 }
322 324
323 if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, 325 if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
324 GFP_KERNEL)) { 326 GFP_KERNEL)) {
325 retval = -ENOMEM; 327 retval = -ENOMEM;
326 goto err05; 328 goto err05;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 4709ead2db52..cf52215d9eb1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -649,6 +649,20 @@ static void print_basics(struct powernow_k8_data *data)
649 data->batps); 649 data->batps);
650} 650}
651 651
652static u32 freq_from_fid_did(u32 fid, u32 did)
653{
654 u32 mhz = 0;
655
656 if (boot_cpu_data.x86 == 0x10)
657 mhz = (100 * (fid + 0x10)) >> did;
658 else if (boot_cpu_data.x86 == 0x11)
659 mhz = (100 * (fid + 8)) >> did;
660 else
661 BUG();
662
663 return mhz * 1000;
664}
665
652static int fill_powernow_table(struct powernow_k8_data *data, 666static int fill_powernow_table(struct powernow_k8_data *data,
653 struct pst_s *pst, u8 maxvid) 667 struct pst_s *pst, u8 maxvid)
654{ 668{
@@ -821,7 +835,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
821{ 835{
822 struct cpufreq_frequency_table *powernow_table; 836 struct cpufreq_frequency_table *powernow_table;
823 int ret_val = -ENODEV; 837 int ret_val = -ENODEV;
824 acpi_integer space_id; 838 acpi_integer control, status;
825 839
826 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 840 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
827 dprintk("register performance failed: bad ACPI data\n"); 841 dprintk("register performance failed: bad ACPI data\n");
@@ -834,12 +848,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
834 goto err_out; 848 goto err_out;
835 } 849 }
836 850
837 space_id = data->acpi_data.control_register.space_id; 851 control = data->acpi_data.control_register.space_id;
838 if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || 852 status = data->acpi_data.status_register.space_id;
839 (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { 853
854 if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
855 (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
840 dprintk("Invalid control/status registers (%x - %x)\n", 856 dprintk("Invalid control/status registers (%x - %x)\n",
841 data->acpi_data.control_register.space_id, 857 control, status);
842 space_id);
843 goto err_out; 858 goto err_out;
844 } 859 }
845 860
@@ -872,7 +887,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
872 /* notify BIOS that we exist */ 887 /* notify BIOS that we exist */
873 acpi_processor_notify_smm(THIS_MODULE); 888 acpi_processor_notify_smm(THIS_MODULE);
874 889
875 if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { 890 if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
876 printk(KERN_ERR PFX 891 printk(KERN_ERR PFX
877 "unable to alloc powernow_k8_data cpumask\n"); 892 "unable to alloc powernow_k8_data cpumask\n");
878 ret_val = -ENOMEM; 893 ret_val = -ENOMEM;
@@ -923,8 +938,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
923 938
924 powernow_table[i].index = index; 939 powernow_table[i].index = index;
925 940
926 powernow_table[i].frequency = 941 /* Frequency may be rounded for these */
927 data->acpi_data.states[i].core_frequency * 1000; 942 if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) {
943 powernow_table[i].frequency =
944 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
945 } else
946 powernow_table[i].frequency =
947 data->acpi_data.states[i].core_frequency * 1000;
928 } 948 }
929 return 0; 949 return 0;
930} 950}
@@ -1215,13 +1235,16 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
1215 return cpufreq_frequency_table_verify(pol, data->powernow_table); 1235 return cpufreq_frequency_table_verify(pol, data->powernow_table);
1216} 1236}
1217 1237
1238static const char ACPI_PSS_BIOS_BUG_MSG[] =
1239 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
1240 KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n";
1241
1218/* per CPU init entry point to the driver */ 1242/* per CPU init entry point to the driver */
1219static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) 1243static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1220{ 1244{
1221 struct powernow_k8_data *data; 1245 struct powernow_k8_data *data;
1222 cpumask_t oldmask; 1246 cpumask_t oldmask;
1223 int rc; 1247 int rc;
1224 static int print_once;
1225 1248
1226 if (!cpu_online(pol->cpu)) 1249 if (!cpu_online(pol->cpu))
1227 return -ENODEV; 1250 return -ENODEV;
@@ -1244,19 +1267,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1244 * an UP version, and is deprecated by AMD. 1267 * an UP version, and is deprecated by AMD.
1245 */ 1268 */
1246 if (num_online_cpus() != 1) { 1269 if (num_online_cpus() != 1) {
1247 /* 1270 printk_once(ACPI_PSS_BIOS_BUG_MSG);
1248 * Replace this one with print_once as soon as such a
1249 * thing gets introduced
1250 */
1251 if (!print_once) {
1252 WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS "
1253 "does not provide ACPI _PSS objects "
1254 "in a way that Linux understands. "
1255 "Please report this to the Linux ACPI"
1256 " maintainers and complain to your "
1257 "BIOS vendor.\n");
1258 print_once++;
1259 }
1260 goto err_out; 1271 goto err_out;
1261 } 1272 }
1262 if (pol->cpu != 0) { 1273 if (pol->cpu != 0) {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index c9f1fdc02830..55c831ed71ce 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -471,7 +471,7 @@ static int centrino_target (struct cpufreq_policy *policy,
471 471
472 if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) 472 if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL)))
473 return -ENOMEM; 473 return -ENOMEM;
474 if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { 474 if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
475 free_cpumask_var(saved_mask); 475 free_cpumask_var(saved_mask);
476 return -ENOMEM; 476 return -ENOMEM;
477 } 477 }
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7437fa133c02..daed39ba2614 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -229,12 +229,12 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
229} 229}
230#endif 230#endif
231 231
232static void __cpuinit srat_detect_node(void) 232static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
233{ 233{
234#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 234#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
235 unsigned node; 235 unsigned node;
236 int cpu = smp_processor_id(); 236 int cpu = smp_processor_id();
237 int apicid = hard_smp_processor_id(); 237 int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
238 238
239 /* Don't do the funky fallback heuristics the AMD version employs 239 /* Don't do the funky fallback heuristics the AMD version employs
240 for now. */ 240 for now. */
@@ -400,7 +400,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
400 } 400 }
401 401
402 /* Work around errata */ 402 /* Work around errata */
403 srat_detect_node(); 403 srat_detect_node(c);
404 404
405 if (cpu_has(c, X86_FEATURE_VMX)) 405 if (cpu_has(c, X86_FEATURE_VMX))
406 detect_vmx_virtcap(c); 406 detect_vmx_virtcap(c);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 483eda96e102..789efe217e1a 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -17,6 +17,7 @@
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <asm/smp.h> 19#include <asm/smp.h>
20#include <asm/k8.h>
20 21
21#define LVL_1_INST 1 22#define LVL_1_INST 1
22#define LVL_1_DATA 2 23#define LVL_1_DATA 2
@@ -159,14 +160,6 @@ struct _cpuid4_info_regs {
159 unsigned long can_disable; 160 unsigned long can_disable;
160}; 161};
161 162
162#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
163static struct pci_device_id k8_nb_id[] = {
164 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
165 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
166 {}
167};
168#endif
169
170unsigned short num_cache_leaves; 163unsigned short num_cache_leaves;
171 164
172/* AMD doesn't have CPUID4. Emulate it here to report the same 165/* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -207,10 +200,17 @@ union l3_cache {
207}; 200};
208 201
209static const unsigned short __cpuinitconst assocs[] = { 202static const unsigned short __cpuinitconst assocs[] = {
210 [1] = 1, [2] = 2, [4] = 4, [6] = 8, 203 [1] = 1,
211 [8] = 16, [0xa] = 32, [0xb] = 48, 204 [2] = 2,
205 [4] = 4,
206 [6] = 8,
207 [8] = 16,
208 [0xa] = 32,
209 [0xb] = 48,
212 [0xc] = 64, 210 [0xc] = 64,
213 [0xf] = 0xffff // ?? 211 [0xd] = 96,
212 [0xe] = 128,
213 [0xf] = 0xffff /* fully associative - no way to show this currently */
214}; 214};
215 215
216static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; 216static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
@@ -271,7 +271,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
271 eax->split.type = types[leaf]; 271 eax->split.type = types[leaf];
272 eax->split.level = levels[leaf]; 272 eax->split.level = levels[leaf];
273 if (leaf == 3) 273 if (leaf == 3)
274 eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; 274 eax->split.num_threads_sharing =
275 current_cpu_data.x86_max_cores - 1;
275 else 276 else
276 eax->split.num_threads_sharing = 0; 277 eax->split.num_threads_sharing = 0;
277 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; 278 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
@@ -291,6 +292,14 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
291{ 292{
292 if (index < 3) 293 if (index < 3)
293 return; 294 return;
295
296 if (boot_cpu_data.x86 == 0x11)
297 return;
298
299 /* see erratum #382 */
300 if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8))
301 return;
302
294 this_leaf->can_disable = 1; 303 this_leaf->can_disable = 1;
295} 304}
296 305
@@ -696,97 +705,75 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
696#define to_object(k) container_of(k, struct _index_kobject, kobj) 705#define to_object(k) container_of(k, struct _index_kobject, kobj)
697#define to_attr(a) container_of(a, struct _cache_attr, attr) 706#define to_attr(a) container_of(a, struct _cache_attr, attr)
698 707
699#ifdef CONFIG_PCI 708static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
700static struct pci_dev *get_k8_northbridge(int node) 709 unsigned int index)
701{
702 struct pci_dev *dev = NULL;
703 int i;
704
705 for (i = 0; i <= node; i++) {
706 do {
707 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
708 if (!dev)
709 break;
710 } while (!pci_match_id(&k8_nb_id[0], dev));
711 if (!dev)
712 break;
713 }
714 return dev;
715}
716#else
717static struct pci_dev *get_k8_northbridge(int node)
718{
719 return NULL;
720}
721#endif
722
723static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
724{ 710{
725 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); 711 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
726 int node = cpu_to_node(cpumask_first(mask)); 712 int node = cpu_to_node(cpu);
727 struct pci_dev *dev = NULL; 713 struct pci_dev *dev = node_to_k8_nb_misc(node);
728 ssize_t ret = 0; 714 unsigned int reg = 0;
729 int i;
730 715
731 if (!this_leaf->can_disable) 716 if (!this_leaf->can_disable)
732 return sprintf(buf, "Feature not enabled\n");
733
734 dev = get_k8_northbridge(node);
735 if (!dev) {
736 printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
737 return -EINVAL; 717 return -EINVAL;
738 }
739 718
740 for (i = 0; i < 2; i++) { 719 if (!dev)
741 unsigned int reg; 720 return -EINVAL;
742 721
743 pci_read_config_dword(dev, 0x1BC + i * 4, &reg); 722 pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
723 return sprintf(buf, "%x\n", reg);
724}
744 725
745 ret += sprintf(buf, "%sEntry: %d\n", buf, i); 726#define SHOW_CACHE_DISABLE(index) \
746 ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", 727static ssize_t \
747 buf, 728show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
748 reg & 0x80000000 ? "Disabled" : "Allowed", 729{ \
749 reg & 0x40000000 ? "Disabled" : "Allowed"); 730 return show_cache_disable(this_leaf, buf, index); \
750 ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
751 buf, (reg & 0x30000) >> 16, reg & 0xfff);
752 }
753 return ret;
754} 731}
732SHOW_CACHE_DISABLE(0)
733SHOW_CACHE_DISABLE(1)
755 734
756static ssize_t 735static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
757store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, 736 const char *buf, size_t count, unsigned int index)
758 size_t count)
759{ 737{
760 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); 738 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
761 int node = cpu_to_node(cpumask_first(mask)); 739 int node = cpu_to_node(cpu);
762 struct pci_dev *dev = NULL; 740 struct pci_dev *dev = node_to_k8_nb_misc(node);
763 unsigned int ret, index, val; 741 unsigned long val = 0;
742 unsigned int scrubber = 0;
764 743
765 if (!this_leaf->can_disable) 744 if (!this_leaf->can_disable)
766 return 0;
767
768 if (strlen(buf) > 15)
769 return -EINVAL; 745 return -EINVAL;
770 746
771 ret = sscanf(buf, "%x %x", &index, &val); 747 if (!capable(CAP_SYS_ADMIN))
772 if (ret != 2) 748 return -EPERM;
749
750 if (!dev)
773 return -EINVAL; 751 return -EINVAL;
774 if (index > 1) 752
753 if (strict_strtoul(buf, 10, &val) < 0)
775 return -EINVAL; 754 return -EINVAL;
776 755
777 val |= 0xc0000000; 756 val |= 0xc0000000;
778 dev = get_k8_northbridge(node); 757
779 if (!dev) { 758 pci_read_config_dword(dev, 0x58, &scrubber);
780 printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); 759 scrubber &= ~0x1f000000;
781 return -EINVAL; 760 pci_write_config_dword(dev, 0x58, scrubber);
782 }
783 761
784 pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); 762 pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
785 wbinvd(); 763 wbinvd();
786 pci_write_config_dword(dev, 0x1BC + index * 4, val); 764 pci_write_config_dword(dev, 0x1BC + index * 4, val);
765 return count;
766}
787 767
788 return 1; 768#define STORE_CACHE_DISABLE(index) \
769static ssize_t \
770store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
771 const char *buf, size_t count) \
772{ \
773 return store_cache_disable(this_leaf, buf, count, index); \
789} 774}
775STORE_CACHE_DISABLE(0)
776STORE_CACHE_DISABLE(1)
790 777
791struct _cache_attr { 778struct _cache_attr {
792 struct attribute attr; 779 struct attribute attr;
@@ -808,7 +795,10 @@ define_one_ro(size);
808define_one_ro(shared_cpu_map); 795define_one_ro(shared_cpu_map);
809define_one_ro(shared_cpu_list); 796define_one_ro(shared_cpu_list);
810 797
811static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); 798static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
799 show_cache_disable_0, store_cache_disable_0);
800static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
801 show_cache_disable_1, store_cache_disable_1);
812 802
813static struct attribute * default_attrs[] = { 803static struct attribute * default_attrs[] = {
814 &type.attr, 804 &type.attr,
@@ -820,7 +810,8 @@ static struct attribute * default_attrs[] = {
820 &size.attr, 810 &size.attr,
821 &shared_cpu_map.attr, 811 &shared_cpu_map.attr,
822 &shared_cpu_list.attr, 812 &shared_cpu_list.attr,
823 &cache_disable.attr, 813 &cache_disable_0.attr,
814 &cache_disable_1.attr,
824 NULL 815 NULL
825}; 816};
826 817
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 6fb0b359d2a5..09dd1d414fc3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -1163,7 +1163,7 @@ static __init int mce_init_device(void)
1163 if (!mce_available(&boot_cpu_data)) 1163 if (!mce_available(&boot_cpu_data))
1164 return -EIO; 1164 return -EIO;
1165 1165
1166 alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); 1166 zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
1167 1167
1168 err = mce_init_banks(); 1168 err = mce_init_banks();
1169 if (err) 1169 if (err)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index cef3ee30744b..65a0fceedcd7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -15,7 +15,6 @@
15#include <asm/hw_irq.h> 15#include <asm/hw_irq.h>
16#include <asm/idle.h> 16#include <asm/idle.h>
17#include <asm/therm_throt.h> 17#include <asm/therm_throt.h>
18#include <asm/apic.h>
19 18
20asmlinkage void smp_thermal_interrupt(void) 19asmlinkage void smp_thermal_interrupt(void)
21{ 20{
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index ce0fe4b5c04f..1d584a18a50d 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -808,7 +808,7 @@ int __init mtrr_cleanup(unsigned address_bits)
808 808
809 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) 809 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
810 return 0; 810 return 0;
811 rdmsr(MTRRdefType_MSR, def, dummy); 811 rdmsr(MSR_MTRRdefType, def, dummy);
812 def &= 0xff; 812 def &= 0xff;
813 if (def != MTRR_TYPE_UNCACHABLE) 813 if (def != MTRR_TYPE_UNCACHABLE)
814 return 0; 814 return 0;
@@ -1003,7 +1003,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1003 */ 1003 */
1004 if (!is_cpu(INTEL) || disable_mtrr_trim) 1004 if (!is_cpu(INTEL) || disable_mtrr_trim)
1005 return 0; 1005 return 0;
1006 rdmsr(MTRRdefType_MSR, def, dummy); 1006 rdmsr(MSR_MTRRdefType, def, dummy);
1007 def &= 0xff; 1007 def &= 0xff;
1008 if (def != MTRR_TYPE_UNCACHABLE) 1008 if (def != MTRR_TYPE_UNCACHABLE)
1009 return 0; 1009 return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index d21d4fb161f7..0543f69f0b27 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -20,9 +20,9 @@ struct fixed_range_block {
20}; 20};
21 21
22static struct fixed_range_block fixed_range_blocks[] = { 22static struct fixed_range_block fixed_range_blocks[] = {
23 { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ 23 { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */
24 { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ 24 { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */
25 { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ 25 { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */
26 {} 26 {}
27}; 27};
28 28
@@ -194,12 +194,12 @@ get_fixed_ranges(mtrr_type * frs)
194 194
195 k8_check_syscfg_dram_mod_en(); 195 k8_check_syscfg_dram_mod_en();
196 196
197 rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); 197 rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]);
198 198
199 for (i = 0; i < 2; i++) 199 for (i = 0; i < 2; i++)
200 rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); 200 rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
201 for (i = 0; i < 8; i++) 201 for (i = 0; i < 8; i++)
202 rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); 202 rdmsr(MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]);
203} 203}
204 204
205void mtrr_save_fixed_ranges(void *info) 205void mtrr_save_fixed_ranges(void *info)
@@ -310,7 +310,7 @@ void __init get_mtrr_state(void)
310 310
311 vrs = mtrr_state.var_ranges; 311 vrs = mtrr_state.var_ranges;
312 312
313 rdmsr(MTRRcap_MSR, lo, dummy); 313 rdmsr(MSR_MTRRcap, lo, dummy);
314 mtrr_state.have_fixed = (lo >> 8) & 1; 314 mtrr_state.have_fixed = (lo >> 8) & 1;
315 315
316 for (i = 0; i < num_var_ranges; i++) 316 for (i = 0; i < num_var_ranges; i++)
@@ -318,7 +318,7 @@ void __init get_mtrr_state(void)
318 if (mtrr_state.have_fixed) 318 if (mtrr_state.have_fixed)
319 get_fixed_ranges(mtrr_state.fixed_ranges); 319 get_fixed_ranges(mtrr_state.fixed_ranges);
320 320
321 rdmsr(MTRRdefType_MSR, lo, dummy); 321 rdmsr(MSR_MTRRdefType, lo, dummy);
322 mtrr_state.def_type = (lo & 0xff); 322 mtrr_state.def_type = (lo & 0xff);
323 mtrr_state.enabled = (lo & 0xc00) >> 10; 323 mtrr_state.enabled = (lo & 0xc00) >> 10;
324 324
@@ -583,10 +583,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
583 __flush_tlb(); 583 __flush_tlb();
584 584
585 /* Save MTRR state */ 585 /* Save MTRR state */
586 rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); 586 rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
587 587
588 /* Disable MTRRs, and set the default type to uncached */ 588 /* Disable MTRRs, and set the default type to uncached */
589 mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi); 589 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
590} 590}
591 591
592static void post_set(void) __releases(set_atomicity_lock) 592static void post_set(void) __releases(set_atomicity_lock)
@@ -595,7 +595,7 @@ static void post_set(void) __releases(set_atomicity_lock)
595 __flush_tlb(); 595 __flush_tlb();
596 596
597 /* Intel (P6) standard MTRRs */ 597 /* Intel (P6) standard MTRRs */
598 mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); 598 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
599 599
600 /* Enable caches */ 600 /* Enable caches */
601 write_cr0(read_cr0() & 0xbfffffff); 601 write_cr0(read_cr0() & 0xbfffffff);
@@ -707,7 +707,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i
707static int generic_have_wrcomb(void) 707static int generic_have_wrcomb(void)
708{ 708{
709 unsigned long config, dummy; 709 unsigned long config, dummy;
710 rdmsr(MTRRcap_MSR, config, dummy); 710 rdmsr(MSR_MTRRcap, config, dummy);
711 return (config & (1 << 10)); 711 return (config & (1 << 10));
712} 712}
713 713
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 03cda01f57c7..8fc248b5aeaf 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -104,7 +104,7 @@ static void __init set_num_var_ranges(void)
104 unsigned long config = 0, dummy; 104 unsigned long config = 0, dummy;
105 105
106 if (use_intel()) { 106 if (use_intel()) {
107 rdmsr(MTRRcap_MSR, config, dummy); 107 rdmsr(MSR_MTRRcap, config, dummy);
108 } else if (is_cpu(AMD)) 108 } else if (is_cpu(AMD))
109 config = 2; 109 config = 2;
110 else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) 110 else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 77f67f7b347a..7538b767f206 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -5,21 +5,6 @@
5#include <linux/types.h> 5#include <linux/types.h>
6#include <linux/stddef.h> 6#include <linux/stddef.h>
7 7
8#define MTRRcap_MSR 0x0fe
9#define MTRRdefType_MSR 0x2ff
10
11#define MTRRfix64K_00000_MSR 0x250
12#define MTRRfix16K_80000_MSR 0x258
13#define MTRRfix16K_A0000_MSR 0x259
14#define MTRRfix4K_C0000_MSR 0x268
15#define MTRRfix4K_C8000_MSR 0x269
16#define MTRRfix4K_D0000_MSR 0x26a
17#define MTRRfix4K_D8000_MSR 0x26b
18#define MTRRfix4K_E0000_MSR 0x26c
19#define MTRRfix4K_E8000_MSR 0x26d
20#define MTRRfix4K_F0000_MSR 0x26e
21#define MTRRfix4K_F8000_MSR 0x26f
22
23#define MTRR_CHANGE_MASK_FIXED 0x01 8#define MTRR_CHANGE_MASK_FIXED 0x01
24#define MTRR_CHANGE_MASK_VARIABLE 0x02 9#define MTRR_CHANGE_MASK_VARIABLE 0x02
25#define MTRR_CHANGE_MASK_DEFTYPE 0x04 10#define MTRR_CHANGE_MASK_DEFTYPE 0x04
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 7f7e2753685b..1f5fb1588d1f 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -35,7 +35,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
35 35
36 if (use_intel()) 36 if (use_intel())
37 /* Save MTRR state */ 37 /* Save MTRR state */
38 rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); 38 rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
39 else 39 else
40 /* Cyrix ARRs - everything else were excluded at the top */ 40 /* Cyrix ARRs - everything else were excluded at the top */
41 ctxt->ccr3 = getCx86(CX86_CCR3); 41 ctxt->ccr3 = getCx86(CX86_CCR3);
@@ -46,7 +46,7 @@ void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
46{ 46{
47 if (use_intel()) 47 if (use_intel())
48 /* Disable MTRRs, and set the default type to uncached */ 48 /* Disable MTRRs, and set the default type to uncached */
49 mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, 49 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
50 ctxt->deftype_hi); 50 ctxt->deftype_hi);
51 else if (is_cpu(CYRIX)) 51 else if (is_cpu(CYRIX))
52 /* Cyrix ARRs - everything else were excluded at the top */ 52 /* Cyrix ARRs - everything else were excluded at the top */
@@ -64,7 +64,7 @@ void set_mtrr_done(struct set_mtrr_context *ctxt)
64 /* Restore MTRRdefType */ 64 /* Restore MTRRdefType */
65 if (use_intel()) 65 if (use_intel())
66 /* Intel (P6) standard MTRRs */ 66 /* Intel (P6) standard MTRRs */
67 mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); 67 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
68 else 68 else
69 /* Cyrix ARRs - everything else was excluded at the top */ 69 /* Cyrix ARRs - everything else was excluded at the top */
70 setCx86(CX86_CCR3, ctxt->ccr3); 70 setCx86(CX86_CCR3, ctxt->ccr3);
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 87b67e3a765a..48bfe1386038 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -19,45 +19,61 @@
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
20 */ 20 */
21 21
22 22#include <linux/kernel.h>
23#include <asm/ds.h>
24
25#include <linux/errno.h>
26#include <linux/string.h> 23#include <linux/string.h>
27#include <linux/slab.h> 24#include <linux/errno.h>
28#include <linux/sched.h> 25#include <linux/sched.h>
26#include <linux/slab.h>
29#include <linux/mm.h> 27#include <linux/mm.h>
30#include <linux/kernel.h> 28#include <linux/trace_clock.h>
29
30#include <asm/ds.h>
31 31
32#include "ds_selftest.h"
32 33
33/* 34/*
34 * The configuration for a particular DS hardware implementation. 35 * The configuration for a particular DS hardware implementation:
35 */ 36 */
36struct ds_configuration { 37struct ds_configuration {
37 /* the name of the configuration */ 38 /* The name of the configuration: */
38 const char *name; 39 const char *name;
39 /* the size of one pointer-typed field in the DS structure and 40
40 in the BTS and PEBS buffers in bytes; 41 /* The size of pointer-typed fields in DS, BTS, and PEBS: */
41 this covers the first 8 DS fields related to buffer management. */ 42 unsigned char sizeof_ptr_field;
42 unsigned char sizeof_field; 43
43 /* the size of a BTS/PEBS record in bytes */ 44 /* The size of a BTS/PEBS record in bytes: */
44 unsigned char sizeof_rec[2]; 45 unsigned char sizeof_rec[2];
45 /* a series of bit-masks to control various features indexed 46
46 * by enum ds_feature */ 47 /* The number of pebs counter reset values in the DS structure. */
47 unsigned long ctl[dsf_ctl_max]; 48 unsigned char nr_counter_reset;
49
50 /* Control bit-masks indexed by enum ds_feature: */
51 unsigned long ctl[dsf_ctl_max];
48}; 52};
49static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); 53static struct ds_configuration ds_cfg __read_mostly;
54
55
56/* Maximal size of a DS configuration: */
57#define MAX_SIZEOF_DS 0x80
50 58
51#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) 59/* Maximal size of a BTS record: */
60#define MAX_SIZEOF_BTS (3 * 8)
52 61
53#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ 62/* BTS and PEBS buffer alignment: */
54#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ 63#define DS_ALIGNMENT (1 << 3)
55#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
56 64
57#define BTS_CONTROL \ 65/* Number of buffer pointers in DS: */
58 (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ 66#define NUM_DS_PTR_FIELDS 8
59 ds_cfg.ctl[dsf_bts_overflow])
60 67
68/* Size of a pebs reset value in DS: */
69#define PEBS_RESET_FIELD_SIZE 8
70
71/* Mask of control bits in the DS MSR register: */
72#define BTS_CONTROL \
73 ( ds_cfg.ctl[dsf_bts] | \
74 ds_cfg.ctl[dsf_bts_kernel] | \
75 ds_cfg.ctl[dsf_bts_user] | \
76 ds_cfg.ctl[dsf_bts_overflow] )
61 77
62/* 78/*
63 * A BTS or PEBS tracer. 79 * A BTS or PEBS tracer.
@@ -66,29 +82,36 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
66 * to identify tracers. 82 * to identify tracers.
67 */ 83 */
68struct ds_tracer { 84struct ds_tracer {
69 /* the DS context (partially) owned by this tracer */ 85 /* The DS context (partially) owned by this tracer. */
70 struct ds_context *context; 86 struct ds_context *context;
71 /* the buffer provided on ds_request() and its size in bytes */ 87 /* The buffer provided on ds_request() and its size in bytes. */
72 void *buffer; 88 void *buffer;
73 size_t size; 89 size_t size;
74}; 90};
75 91
76struct bts_tracer { 92struct bts_tracer {
77 /* the common DS part */ 93 /* The common DS part: */
78 struct ds_tracer ds; 94 struct ds_tracer ds;
79 /* the trace including the DS configuration */ 95
80 struct bts_trace trace; 96 /* The trace including the DS configuration: */
81 /* buffer overflow notification function */ 97 struct bts_trace trace;
82 bts_ovfl_callback_t ovfl; 98
99 /* Buffer overflow notification function: */
100 bts_ovfl_callback_t ovfl;
101
102 /* Active flags affecting trace collection. */
103 unsigned int flags;
83}; 104};
84 105
85struct pebs_tracer { 106struct pebs_tracer {
86 /* the common DS part */ 107 /* The common DS part: */
87 struct ds_tracer ds; 108 struct ds_tracer ds;
88 /* the trace including the DS configuration */ 109
89 struct pebs_trace trace; 110 /* The trace including the DS configuration: */
90 /* buffer overflow notification function */ 111 struct pebs_trace trace;
91 pebs_ovfl_callback_t ovfl; 112
113 /* Buffer overflow notification function: */
114 pebs_ovfl_callback_t ovfl;
92}; 115};
93 116
94/* 117/*
@@ -97,6 +120,7 @@ struct pebs_tracer {
97 * 120 *
98 * The DS configuration consists of the following fields; different 121 * The DS configuration consists of the following fields; different
99 * architetures vary in the size of those fields. 122 * architetures vary in the size of those fields.
123 *
100 * - double-word aligned base linear address of the BTS buffer 124 * - double-word aligned base linear address of the BTS buffer
101 * - write pointer into the BTS buffer 125 * - write pointer into the BTS buffer
102 * - end linear address of the BTS buffer (one byte beyond the end of 126 * - end linear address of the BTS buffer (one byte beyond the end of
@@ -135,21 +159,22 @@ enum ds_field {
135}; 159};
136 160
137enum ds_qualifier { 161enum ds_qualifier {
138 ds_bts = 0, 162 ds_bts = 0,
139 ds_pebs 163 ds_pebs
140}; 164};
141 165
142static inline unsigned long ds_get(const unsigned char *base, 166static inline unsigned long
143 enum ds_qualifier qual, enum ds_field field) 167ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
144{ 168{
145 base += (ds_cfg.sizeof_field * (field + (4 * qual))); 169 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
146 return *(unsigned long *)base; 170 return *(unsigned long *)base;
147} 171}
148 172
149static inline void ds_set(unsigned char *base, enum ds_qualifier qual, 173static inline void
150 enum ds_field field, unsigned long value) 174ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
175 unsigned long value)
151{ 176{
152 base += (ds_cfg.sizeof_field * (field + (4 * qual))); 177 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
153 (*(unsigned long *)base) = value; 178 (*(unsigned long *)base) = value;
154} 179}
155 180
@@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
159 */ 184 */
160static DEFINE_SPINLOCK(ds_lock); 185static DEFINE_SPINLOCK(ds_lock);
161 186
162
163/* 187/*
164 * We either support (system-wide) per-cpu or per-thread allocation. 188 * We either support (system-wide) per-cpu or per-thread allocation.
165 * We distinguish the two based on the task_struct pointer, where a 189 * We distinguish the two based on the task_struct pointer, where a
@@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock);
178 */ 202 */
179static atomic_t tracers = ATOMIC_INIT(0); 203static atomic_t tracers = ATOMIC_INIT(0);
180 204
181static inline void get_tracer(struct task_struct *task) 205static inline int get_tracer(struct task_struct *task)
182{ 206{
183 if (task) 207 int error;
208
209 spin_lock_irq(&ds_lock);
210
211 if (task) {
212 error = -EPERM;
213 if (atomic_read(&tracers) < 0)
214 goto out;
184 atomic_inc(&tracers); 215 atomic_inc(&tracers);
185 else 216 } else {
217 error = -EPERM;
218 if (atomic_read(&tracers) > 0)
219 goto out;
186 atomic_dec(&tracers); 220 atomic_dec(&tracers);
221 }
222
223 error = 0;
224out:
225 spin_unlock_irq(&ds_lock);
226 return error;
187} 227}
188 228
189static inline void put_tracer(struct task_struct *task) 229static inline void put_tracer(struct task_struct *task)
@@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task)
194 atomic_inc(&tracers); 234 atomic_inc(&tracers);
195} 235}
196 236
197static inline int check_tracer(struct task_struct *task)
198{
199 return task ?
200 (atomic_read(&tracers) >= 0) :
201 (atomic_read(&tracers) <= 0);
202}
203
204
205/* 237/*
206 * The DS context is either attached to a thread or to a cpu: 238 * The DS context is either attached to a thread or to a cpu:
207 * - in the former case, the thread_struct contains a pointer to the 239 * - in the former case, the thread_struct contains a pointer to the
@@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task)
213 * deallocated when the last user puts the context. 245 * deallocated when the last user puts the context.
214 */ 246 */
215struct ds_context { 247struct ds_context {
216 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ 248 /* The DS configuration; goes into MSR_IA32_DS_AREA: */
217 unsigned char ds[MAX_SIZEOF_DS]; 249 unsigned char ds[MAX_SIZEOF_DS];
218 /* the owner of the BTS and PEBS configuration, respectively */ 250
219 struct bts_tracer *bts_master; 251 /* The owner of the BTS and PEBS configuration, respectively: */
220 struct pebs_tracer *pebs_master; 252 struct bts_tracer *bts_master;
221 /* use count */ 253 struct pebs_tracer *pebs_master;
222 unsigned long count;
223 /* a pointer to the context location inside the thread_struct
224 * or the per_cpu context array */
225 struct ds_context **this;
226 /* a pointer to the task owning this context, or NULL, if the
227 * context is owned by a cpu */
228 struct task_struct *task;
229};
230 254
231static DEFINE_PER_CPU(struct ds_context *, system_context_array); 255 /* Use count: */
256 unsigned long count;
232 257
233#define system_context per_cpu(system_context_array, smp_processor_id()) 258 /* Pointer to the context pointer field: */
259 struct ds_context **this;
260
261 /* The traced task; NULL for cpu tracing: */
262 struct task_struct *task;
263
264 /* The traced cpu; only valid if task is NULL: */
265 int cpu;
266};
234 267
268static DEFINE_PER_CPU(struct ds_context *, cpu_context);
235 269
236static inline struct ds_context *ds_get_context(struct task_struct *task) 270
271static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
237{ 272{
238 struct ds_context **p_context = 273 struct ds_context **p_context =
239 (task ? &task->thread.ds_ctx : &system_context); 274 (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
240 struct ds_context *context = NULL; 275 struct ds_context *context = NULL;
241 struct ds_context *new_context = NULL; 276 struct ds_context *new_context = NULL;
242 unsigned long irq;
243 277
244 /* Chances are small that we already have a context. */ 278 /* Chances are small that we already have a context. */
245 new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); 279 new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
246 if (!new_context) 280 if (!new_context)
247 return NULL; 281 return NULL;
248 282
249 spin_lock_irqsave(&ds_lock, irq); 283 spin_lock_irq(&ds_lock);
250 284
251 context = *p_context; 285 context = *p_context;
252 if (!context) { 286 if (likely(!context)) {
253 context = new_context; 287 context = new_context;
254 288
255 context->this = p_context; 289 context->this = p_context;
256 context->task = task; 290 context->task = task;
291 context->cpu = cpu;
257 context->count = 0; 292 context->count = 0;
258 293
259 if (task)
260 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
261
262 if (!task || (task == current))
263 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
264
265 *p_context = context; 294 *p_context = context;
266 } 295 }
267 296
268 context->count++; 297 context->count++;
269 298
270 spin_unlock_irqrestore(&ds_lock, irq); 299 spin_unlock_irq(&ds_lock);
271 300
272 if (context != new_context) 301 if (context != new_context)
273 kfree(new_context); 302 kfree(new_context);
@@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
275 return context; 304 return context;
276} 305}
277 306
278static inline void ds_put_context(struct ds_context *context) 307static void ds_put_context(struct ds_context *context)
279{ 308{
309 struct task_struct *task;
280 unsigned long irq; 310 unsigned long irq;
281 311
282 if (!context) 312 if (!context)
@@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context)
291 321
292 *(context->this) = NULL; 322 *(context->this) = NULL;
293 323
294 if (context->task) 324 task = context->task;
295 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); 325
326 if (task)
327 clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
296 328
297 if (!context->task || (context->task == current)) 329 /*
298 wrmsrl(MSR_IA32_DS_AREA, 0); 330 * We leave the (now dangling) pointer to the DS configuration in
331 * the DS_AREA msr. This is as good or as bad as replacing it with
332 * NULL - the hardware would crash if we enabled tracing.
333 *
334 * This saves us some problems with having to write an msr on a
335 * different cpu while preventing others from doing the same for the
336 * next context for that same cpu.
337 */
299 338
300 spin_unlock_irqrestore(&ds_lock, irq); 339 spin_unlock_irqrestore(&ds_lock, irq);
301 340
341 /* The context might still be in use for context switching. */
342 if (task && (task != current))
343 wait_task_context_switch(task);
344
302 kfree(context); 345 kfree(context);
303} 346}
304 347
348static void ds_install_ds_area(struct ds_context *context)
349{
350 unsigned long ds;
351
352 ds = (unsigned long)context->ds;
353
354 /*
355 * There is a race between the bts master and the pebs master.
356 *
357 * The thread/cpu access is synchronized via get/put_cpu() for
358 * task tracing and via wrmsr_on_cpu for cpu tracing.
359 *
360 * If bts and pebs are collected for the same task or same cpu,
361 * the same confiuration is written twice.
362 */
363 if (context->task) {
364 get_cpu();
365 if (context->task == current)
366 wrmsrl(MSR_IA32_DS_AREA, ds);
367 set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
368 put_cpu();
369 } else
370 wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
371 (u32)((u64)ds), (u32)((u64)ds >> 32));
372}
305 373
306/* 374/*
307 * Call the tracer's callback on a buffer overflow. 375 * Call the tracer's callback on a buffer overflow.
@@ -332,9 +400,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
332 * The remainder of any partially written record is zeroed out. 400 * The remainder of any partially written record is zeroed out.
333 * 401 *
334 * context: the DS context 402 * context: the DS context
335 * qual: the buffer type 403 * qual: the buffer type
336 * record: the data to write 404 * record: the data to write
337 * size: the size of the data 405 * size: the size of the data
338 */ 406 */
339static int ds_write(struct ds_context *context, enum ds_qualifier qual, 407static int ds_write(struct ds_context *context, enum ds_qualifier qual,
340 const void *record, size_t size) 408 const void *record, size_t size)
@@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
349 unsigned long write_size, adj_write_size; 417 unsigned long write_size, adj_write_size;
350 418
351 /* 419 /*
352 * write as much as possible without producing an 420 * Write as much as possible without producing an
353 * overflow interrupt. 421 * overflow interrupt.
354 * 422 *
355 * interrupt_threshold must either be 423 * Interrupt_threshold must either be
356 * - bigger than absolute_maximum or 424 * - bigger than absolute_maximum or
357 * - point to a record between buffer_base and absolute_maximum 425 * - point to a record between buffer_base and absolute_maximum
358 * 426 *
359 * index points to a valid record. 427 * Index points to a valid record.
360 */ 428 */
361 base = ds_get(context->ds, qual, ds_buffer_base); 429 base = ds_get(context->ds, qual, ds_buffer_base);
362 index = ds_get(context->ds, qual, ds_index); 430 index = ds_get(context->ds, qual, ds_index);
@@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
365 433
366 write_end = min(end, int_th); 434 write_end = min(end, int_th);
367 435
368 /* if we are already beyond the interrupt threshold, 436 /*
369 * we fill the entire buffer */ 437 * If we are already beyond the interrupt threshold,
438 * we fill the entire buffer.
439 */
370 if (write_end <= index) 440 if (write_end <= index)
371 write_end = end; 441 write_end = end;
372 442
@@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
383 adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; 453 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
384 adj_write_size *= ds_cfg.sizeof_rec[qual]; 454 adj_write_size *= ds_cfg.sizeof_rec[qual];
385 455
386 /* zero out trailing bytes */ 456 /* Zero out trailing bytes. */
387 memset((char *)index + write_size, 0, 457 memset((char *)index + write_size, 0,
388 adj_write_size - write_size); 458 adj_write_size - write_size);
389 index += adj_write_size; 459 index += adj_write_size;
@@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
410 * Later architectures use 64bit pointers throughout, whereas earlier 480 * Later architectures use 64bit pointers throughout, whereas earlier
411 * architectures use 32bit pointers in 32bit mode. 481 * architectures use 32bit pointers in 32bit mode.
412 * 482 *
413 * We compute the base address for the first 8 fields based on: 483 * We compute the base address for the fields based on:
414 * - the field size stored in the DS configuration 484 * - the field size stored in the DS configuration
415 * - the relative field position 485 * - the relative field position
416 * 486 *
@@ -431,23 +501,23 @@ enum bts_field {
431 bts_to, 501 bts_to,
432 bts_flags, 502 bts_flags,
433 503
434 bts_qual = bts_from, 504 bts_qual = bts_from,
435 bts_jiffies = bts_to, 505 bts_clock = bts_to,
436 bts_pid = bts_flags, 506 bts_pid = bts_flags,
437 507
438 bts_qual_mask = (bts_qual_max - 1), 508 bts_qual_mask = (bts_qual_max - 1),
439 bts_escape = ((unsigned long)-1 & ~bts_qual_mask) 509 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
440}; 510};
441 511
442static inline unsigned long bts_get(const char *base, enum bts_field field) 512static inline unsigned long bts_get(const char *base, enum bts_field field)
443{ 513{
444 base += (ds_cfg.sizeof_field * field); 514 base += (ds_cfg.sizeof_ptr_field * field);
445 return *(unsigned long *)base; 515 return *(unsigned long *)base;
446} 516}
447 517
448static inline void bts_set(char *base, enum bts_field field, unsigned long val) 518static inline void bts_set(char *base, enum bts_field field, unsigned long val)
449{ 519{
450 base += (ds_cfg.sizeof_field * field);; 520 base += (ds_cfg.sizeof_ptr_field * field);;
451 (*(unsigned long *)base) = val; 521 (*(unsigned long *)base) = val;
452} 522}
453 523
@@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val)
463 * 533 *
464 * return: bytes read/written on success; -Eerrno, otherwise 534 * return: bytes read/written on success; -Eerrno, otherwise
465 */ 535 */
466static int bts_read(struct bts_tracer *tracer, const void *at, 536static int
467 struct bts_struct *out) 537bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
468{ 538{
469 if (!tracer) 539 if (!tracer)
470 return -EINVAL; 540 return -EINVAL;
@@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at,
478 memset(out, 0, sizeof(*out)); 548 memset(out, 0, sizeof(*out));
479 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { 549 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
480 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); 550 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
481 out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); 551 out->variant.event.clock = bts_get(at, bts_clock);
482 out->variant.timestamp.pid = bts_get(at, bts_pid); 552 out->variant.event.pid = bts_get(at, bts_pid);
483 } else { 553 } else {
484 out->qualifier = bts_branch; 554 out->qualifier = bts_branch;
485 out->variant.lbr.from = bts_get(at, bts_from); 555 out->variant.lbr.from = bts_get(at, bts_from);
@@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
516 case bts_task_arrives: 586 case bts_task_arrives:
517 case bts_task_departs: 587 case bts_task_departs:
518 bts_set(raw, bts_qual, (bts_escape | in->qualifier)); 588 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
519 bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); 589 bts_set(raw, bts_clock, in->variant.event.clock);
520 bts_set(raw, bts_pid, in->variant.timestamp.pid); 590 bts_set(raw, bts_pid, in->variant.event.pid);
521 break; 591 break;
522 default: 592 default:
523 return -EINVAL; 593 return -EINVAL;
@@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
555 unsigned int flags) { 625 unsigned int flags) {
556 unsigned long buffer, adj; 626 unsigned long buffer, adj;
557 627
558 /* adjust the buffer address and size to meet alignment 628 /*
629 * Adjust the buffer address and size to meet alignment
559 * constraints: 630 * constraints:
560 * - buffer is double-word aligned 631 * - buffer is double-word aligned
561 * - size is multiple of record size 632 * - size is multiple of record size
@@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
577 trace->begin = (void *)buffer; 648 trace->begin = (void *)buffer;
578 trace->top = trace->begin; 649 trace->top = trace->begin;
579 trace->end = (void *)(buffer + size); 650 trace->end = (void *)(buffer + size);
580 /* The value for 'no threshold' is -1, which will set the 651 /*
652 * The value for 'no threshold' is -1, which will set the
581 * threshold outside of the buffer, just like we want it. 653 * threshold outside of the buffer, just like we want it.
582 */ 654 */
655 ith *= ds_cfg.sizeof_rec[qual];
583 trace->ith = (void *)(buffer + size - ith); 656 trace->ith = (void *)(buffer + size - ith);
584 657
585 trace->flags = flags; 658 trace->flags = flags;
@@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
588 661
589static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, 662static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
590 enum ds_qualifier qual, struct task_struct *task, 663 enum ds_qualifier qual, struct task_struct *task,
591 void *base, size_t size, size_t th, unsigned int flags) 664 int cpu, void *base, size_t size, size_t th)
592{ 665{
593 struct ds_context *context; 666 struct ds_context *context;
594 int error; 667 int error;
668 size_t req_size;
669
670 error = -EOPNOTSUPP;
671 if (!ds_cfg.sizeof_rec[qual])
672 goto out;
595 673
596 error = -EINVAL; 674 error = -EINVAL;
597 if (!base) 675 if (!base)
598 goto out; 676 goto out;
599 677
600 /* we require some space to do alignment adjustments below */ 678 req_size = ds_cfg.sizeof_rec[qual];
679 /* We might need space for alignment adjustments. */
680 if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
681 req_size += DS_ALIGNMENT;
682
601 error = -EINVAL; 683 error = -EINVAL;
602 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) 684 if (size < req_size)
603 goto out; 685 goto out;
604 686
605 if (th != (size_t)-1) { 687 if (th != (size_t)-1) {
@@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
614 tracer->size = size; 696 tracer->size = size;
615 697
616 error = -ENOMEM; 698 error = -ENOMEM;
617 context = ds_get_context(task); 699 context = ds_get_context(task, cpu);
618 if (!context) 700 if (!context)
619 goto out; 701 goto out;
620 tracer->context = context; 702 tracer->context = context;
621 703
622 ds_init_ds_trace(trace, qual, base, size, th, flags); 704 /*
705 * Defer any tracer-specific initialization work for the context until
706 * context ownership has been clarified.
707 */
623 708
624 error = 0; 709 error = 0;
625 out: 710 out:
626 return error; 711 return error;
627} 712}
628 713
629struct bts_tracer *ds_request_bts(struct task_struct *task, 714static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
630 void *base, size_t size, 715 void *base, size_t size,
631 bts_ovfl_callback_t ovfl, size_t th, 716 bts_ovfl_callback_t ovfl, size_t th,
632 unsigned int flags) 717 unsigned int flags)
633{ 718{
634 struct bts_tracer *tracer; 719 struct bts_tracer *tracer;
635 unsigned long irq;
636 int error; 720 int error;
637 721
722 /* Buffer overflow notification is not yet implemented. */
638 error = -EOPNOTSUPP; 723 error = -EOPNOTSUPP;
639 if (!ds_cfg.ctl[dsf_bts]) 724 if (ovfl)
640 goto out; 725 goto out;
641 726
642 /* buffer overflow notification is not yet implemented */ 727 error = get_tracer(task);
643 error = -EOPNOTSUPP; 728 if (error < 0)
644 if (ovfl)
645 goto out; 729 goto out;
646 730
647 error = -ENOMEM; 731 error = -ENOMEM;
648 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); 732 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
649 if (!tracer) 733 if (!tracer)
650 goto out; 734 goto out_put_tracer;
651 tracer->ovfl = ovfl; 735 tracer->ovfl = ovfl;
652 736
737 /* Do some more error checking and acquire a tracing context. */
653 error = ds_request(&tracer->ds, &tracer->trace.ds, 738 error = ds_request(&tracer->ds, &tracer->trace.ds,
654 ds_bts, task, base, size, th, flags); 739 ds_bts, task, cpu, base, size, th);
655 if (error < 0) 740 if (error < 0)
656 goto out_tracer; 741 goto out_tracer;
657 742
658 743 /* Claim the bts part of the tracing context we acquired above. */
659 spin_lock_irqsave(&ds_lock, irq); 744 spin_lock_irq(&ds_lock);
660
661 error = -EPERM;
662 if (!check_tracer(task))
663 goto out_unlock;
664 get_tracer(task);
665 745
666 error = -EPERM; 746 error = -EPERM;
667 if (tracer->ds.context->bts_master) 747 if (tracer->ds.context->bts_master)
668 goto out_put_tracer; 748 goto out_unlock;
669 tracer->ds.context->bts_master = tracer; 749 tracer->ds.context->bts_master = tracer;
670 750
671 spin_unlock_irqrestore(&ds_lock, irq); 751 spin_unlock_irq(&ds_lock);
672 752
753 /*
754 * Now that we own the bts part of the context, let's complete the
755 * initialization for that part.
756 */
757 ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
758 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
759 ds_install_ds_area(tracer->ds.context);
673 760
674 tracer->trace.read = bts_read; 761 tracer->trace.read = bts_read;
675 tracer->trace.write = bts_write; 762 tracer->trace.write = bts_write;
676 763
677 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); 764 /* Start tracing. */
678 ds_resume_bts(tracer); 765 ds_resume_bts(tracer);
679 766
680 return tracer; 767 return tracer;
681 768
682 out_put_tracer:
683 put_tracer(task);
684 out_unlock: 769 out_unlock:
685 spin_unlock_irqrestore(&ds_lock, irq); 770 spin_unlock_irq(&ds_lock);
686 ds_put_context(tracer->ds.context); 771 ds_put_context(tracer->ds.context);
687 out_tracer: 772 out_tracer:
688 kfree(tracer); 773 kfree(tracer);
774 out_put_tracer:
775 put_tracer(task);
689 out: 776 out:
690 return ERR_PTR(error); 777 return ERR_PTR(error);
691} 778}
692 779
693struct pebs_tracer *ds_request_pebs(struct task_struct *task, 780struct bts_tracer *ds_request_bts_task(struct task_struct *task,
694 void *base, size_t size, 781 void *base, size_t size,
695 pebs_ovfl_callback_t ovfl, size_t th, 782 bts_ovfl_callback_t ovfl,
696 unsigned int flags) 783 size_t th, unsigned int flags)
784{
785 return ds_request_bts(task, 0, base, size, ovfl, th, flags);
786}
787
788struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
789 bts_ovfl_callback_t ovfl,
790 size_t th, unsigned int flags)
791{
792 return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
793}
794
795static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
796 void *base, size_t size,
797 pebs_ovfl_callback_t ovfl, size_t th,
798 unsigned int flags)
697{ 799{
698 struct pebs_tracer *tracer; 800 struct pebs_tracer *tracer;
699 unsigned long irq;
700 int error; 801 int error;
701 802
702 /* buffer overflow notification is not yet implemented */ 803 /* Buffer overflow notification is not yet implemented. */
703 error = -EOPNOTSUPP; 804 error = -EOPNOTSUPP;
704 if (ovfl) 805 if (ovfl)
705 goto out; 806 goto out;
706 807
808 error = get_tracer(task);
809 if (error < 0)
810 goto out;
811
707 error = -ENOMEM; 812 error = -ENOMEM;
708 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); 813 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
709 if (!tracer) 814 if (!tracer)
710 goto out; 815 goto out_put_tracer;
711 tracer->ovfl = ovfl; 816 tracer->ovfl = ovfl;
712 817
818 /* Do some more error checking and acquire a tracing context. */
713 error = ds_request(&tracer->ds, &tracer->trace.ds, 819 error = ds_request(&tracer->ds, &tracer->trace.ds,
714 ds_pebs, task, base, size, th, flags); 820 ds_pebs, task, cpu, base, size, th);
715 if (error < 0) 821 if (error < 0)
716 goto out_tracer; 822 goto out_tracer;
717 823
718 spin_lock_irqsave(&ds_lock, irq); 824 /* Claim the pebs part of the tracing context we acquired above. */
719 825 spin_lock_irq(&ds_lock);
720 error = -EPERM;
721 if (!check_tracer(task))
722 goto out_unlock;
723 get_tracer(task);
724 826
725 error = -EPERM; 827 error = -EPERM;
726 if (tracer->ds.context->pebs_master) 828 if (tracer->ds.context->pebs_master)
727 goto out_put_tracer; 829 goto out_unlock;
728 tracer->ds.context->pebs_master = tracer; 830 tracer->ds.context->pebs_master = tracer;
729 831
730 spin_unlock_irqrestore(&ds_lock, irq); 832 spin_unlock_irq(&ds_lock);
731 833
834 /*
835 * Now that we own the pebs part of the context, let's complete the
836 * initialization for that part.
837 */
838 ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
732 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); 839 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
840 ds_install_ds_area(tracer->ds.context);
841
842 /* Start tracing. */
733 ds_resume_pebs(tracer); 843 ds_resume_pebs(tracer);
734 844
735 return tracer; 845 return tracer;
736 846
737 out_put_tracer:
738 put_tracer(task);
739 out_unlock: 847 out_unlock:
740 spin_unlock_irqrestore(&ds_lock, irq); 848 spin_unlock_irq(&ds_lock);
741 ds_put_context(tracer->ds.context); 849 ds_put_context(tracer->ds.context);
742 out_tracer: 850 out_tracer:
743 kfree(tracer); 851 kfree(tracer);
852 out_put_tracer:
853 put_tracer(task);
744 out: 854 out:
745 return ERR_PTR(error); 855 return ERR_PTR(error);
746} 856}
747 857
748void ds_release_bts(struct bts_tracer *tracer) 858struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
859 void *base, size_t size,
860 pebs_ovfl_callback_t ovfl,
861 size_t th, unsigned int flags)
749{ 862{
750 if (!tracer) 863 return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
751 return; 864}
752 865
753 ds_suspend_bts(tracer); 866struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
867 pebs_ovfl_callback_t ovfl,
868 size_t th, unsigned int flags)
869{
870 return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
871}
872
873static void ds_free_bts(struct bts_tracer *tracer)
874{
875 struct task_struct *task;
876
877 task = tracer->ds.context->task;
754 878
755 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); 879 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
756 tracer->ds.context->bts_master = NULL; 880 tracer->ds.context->bts_master = NULL;
757 881
758 put_tracer(tracer->ds.context->task); 882 /* Make sure tracing stopped and the tracer is not in use. */
883 if (task && (task != current))
884 wait_task_context_switch(task);
885
759 ds_put_context(tracer->ds.context); 886 ds_put_context(tracer->ds.context);
887 put_tracer(task);
760 888
761 kfree(tracer); 889 kfree(tracer);
762} 890}
763 891
892void ds_release_bts(struct bts_tracer *tracer)
893{
894 might_sleep();
895
896 if (!tracer)
897 return;
898
899 ds_suspend_bts(tracer);
900 ds_free_bts(tracer);
901}
902
903int ds_release_bts_noirq(struct bts_tracer *tracer)
904{
905 struct task_struct *task;
906 unsigned long irq;
907 int error;
908
909 if (!tracer)
910 return 0;
911
912 task = tracer->ds.context->task;
913
914 local_irq_save(irq);
915
916 error = -EPERM;
917 if (!task &&
918 (tracer->ds.context->cpu != smp_processor_id()))
919 goto out;
920
921 error = -EPERM;
922 if (task && (task != current))
923 goto out;
924
925 ds_suspend_bts_noirq(tracer);
926 ds_free_bts(tracer);
927
928 error = 0;
929 out:
930 local_irq_restore(irq);
931 return error;
932}
933
934static void update_task_debugctlmsr(struct task_struct *task,
935 unsigned long debugctlmsr)
936{
937 task->thread.debugctlmsr = debugctlmsr;
938
939 get_cpu();
940 if (task == current)
941 update_debugctlmsr(debugctlmsr);
942 put_cpu();
943}
944
764void ds_suspend_bts(struct bts_tracer *tracer) 945void ds_suspend_bts(struct bts_tracer *tracer)
765{ 946{
766 struct task_struct *task; 947 struct task_struct *task;
948 unsigned long debugctlmsr;
949 int cpu;
767 950
768 if (!tracer) 951 if (!tracer)
769 return; 952 return;
770 953
954 tracer->flags = 0;
955
771 task = tracer->ds.context->task; 956 task = tracer->ds.context->task;
957 cpu = tracer->ds.context->cpu;
772 958
773 if (!task || (task == current)) 959 WARN_ON(!task && irqs_disabled());
774 update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
775 960
776 if (task) { 961 debugctlmsr = (task ?
777 task->thread.debugctlmsr &= ~BTS_CONTROL; 962 task->thread.debugctlmsr :
963 get_debugctlmsr_on_cpu(cpu));
964 debugctlmsr &= ~BTS_CONTROL;
778 965
779 if (!task->thread.debugctlmsr) 966 if (task)
780 clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); 967 update_task_debugctlmsr(task, debugctlmsr);
781 } 968 else
969 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
782} 970}
783 971
784void ds_resume_bts(struct bts_tracer *tracer) 972int ds_suspend_bts_noirq(struct bts_tracer *tracer)
785{ 973{
786 struct task_struct *task; 974 struct task_struct *task;
787 unsigned long control; 975 unsigned long debugctlmsr, irq;
976 int cpu, error = 0;
788 977
789 if (!tracer) 978 if (!tracer)
790 return; 979 return 0;
980
981 tracer->flags = 0;
791 982
792 task = tracer->ds.context->task; 983 task = tracer->ds.context->task;
984 cpu = tracer->ds.context->cpu;
985
986 local_irq_save(irq);
987
988 error = -EPERM;
989 if (!task && (cpu != smp_processor_id()))
990 goto out;
991
992 debugctlmsr = (task ?
993 task->thread.debugctlmsr :
994 get_debugctlmsr());
995 debugctlmsr &= ~BTS_CONTROL;
996
997 if (task)
998 update_task_debugctlmsr(task, debugctlmsr);
999 else
1000 update_debugctlmsr(debugctlmsr);
1001
1002 error = 0;
1003 out:
1004 local_irq_restore(irq);
1005 return error;
1006}
1007
1008static unsigned long ds_bts_control(struct bts_tracer *tracer)
1009{
1010 unsigned long control;
793 1011
794 control = ds_cfg.ctl[dsf_bts]; 1012 control = ds_cfg.ctl[dsf_bts];
795 if (!(tracer->trace.ds.flags & BTS_KERNEL)) 1013 if (!(tracer->trace.ds.flags & BTS_KERNEL))
@@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer)
797 if (!(tracer->trace.ds.flags & BTS_USER)) 1015 if (!(tracer->trace.ds.flags & BTS_USER))
798 control |= ds_cfg.ctl[dsf_bts_user]; 1016 control |= ds_cfg.ctl[dsf_bts_user];
799 1017
800 if (task) { 1018 return control;
801 task->thread.debugctlmsr |= control;
802 set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
803 }
804
805 if (!task || (task == current))
806 update_debugctlmsr(get_debugctlmsr() | control);
807} 1019}
808 1020
809void ds_release_pebs(struct pebs_tracer *tracer) 1021void ds_resume_bts(struct bts_tracer *tracer)
810{ 1022{
1023 struct task_struct *task;
1024 unsigned long debugctlmsr;
1025 int cpu;
1026
811 if (!tracer) 1027 if (!tracer)
812 return; 1028 return;
813 1029
814 ds_suspend_pebs(tracer); 1030 tracer->flags = tracer->trace.ds.flags;
1031
1032 task = tracer->ds.context->task;
1033 cpu = tracer->ds.context->cpu;
1034
1035 WARN_ON(!task && irqs_disabled());
1036
1037 debugctlmsr = (task ?
1038 task->thread.debugctlmsr :
1039 get_debugctlmsr_on_cpu(cpu));
1040 debugctlmsr |= ds_bts_control(tracer);
1041
1042 if (task)
1043 update_task_debugctlmsr(task, debugctlmsr);
1044 else
1045 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
1046}
1047
1048int ds_resume_bts_noirq(struct bts_tracer *tracer)
1049{
1050 struct task_struct *task;
1051 unsigned long debugctlmsr, irq;
1052 int cpu, error = 0;
1053
1054 if (!tracer)
1055 return 0;
1056
1057 tracer->flags = tracer->trace.ds.flags;
1058
1059 task = tracer->ds.context->task;
1060 cpu = tracer->ds.context->cpu;
1061
1062 local_irq_save(irq);
1063
1064 error = -EPERM;
1065 if (!task && (cpu != smp_processor_id()))
1066 goto out;
1067
1068 debugctlmsr = (task ?
1069 task->thread.debugctlmsr :
1070 get_debugctlmsr());
1071 debugctlmsr |= ds_bts_control(tracer);
1072
1073 if (task)
1074 update_task_debugctlmsr(task, debugctlmsr);
1075 else
1076 update_debugctlmsr(debugctlmsr);
1077
1078 error = 0;
1079 out:
1080 local_irq_restore(irq);
1081 return error;
1082}
1083
1084static void ds_free_pebs(struct pebs_tracer *tracer)
1085{
1086 struct task_struct *task;
1087
1088 task = tracer->ds.context->task;
815 1089
816 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); 1090 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
817 tracer->ds.context->pebs_master = NULL; 1091 tracer->ds.context->pebs_master = NULL;
818 1092
819 put_tracer(tracer->ds.context->task);
820 ds_put_context(tracer->ds.context); 1093 ds_put_context(tracer->ds.context);
1094 put_tracer(task);
821 1095
822 kfree(tracer); 1096 kfree(tracer);
823} 1097}
824 1098
1099void ds_release_pebs(struct pebs_tracer *tracer)
1100{
1101 might_sleep();
1102
1103 if (!tracer)
1104 return;
1105
1106 ds_suspend_pebs(tracer);
1107 ds_free_pebs(tracer);
1108}
1109
1110int ds_release_pebs_noirq(struct pebs_tracer *tracer)
1111{
1112 struct task_struct *task;
1113 unsigned long irq;
1114 int error;
1115
1116 if (!tracer)
1117 return 0;
1118
1119 task = tracer->ds.context->task;
1120
1121 local_irq_save(irq);
1122
1123 error = -EPERM;
1124 if (!task &&
1125 (tracer->ds.context->cpu != smp_processor_id()))
1126 goto out;
1127
1128 error = -EPERM;
1129 if (task && (task != current))
1130 goto out;
1131
1132 ds_suspend_pebs_noirq(tracer);
1133 ds_free_pebs(tracer);
1134
1135 error = 0;
1136 out:
1137 local_irq_restore(irq);
1138 return error;
1139}
1140
825void ds_suspend_pebs(struct pebs_tracer *tracer) 1141void ds_suspend_pebs(struct pebs_tracer *tracer)
826{ 1142{
827 1143
828} 1144}
829 1145
1146int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
1147{
1148 return 0;
1149}
1150
830void ds_resume_pebs(struct pebs_tracer *tracer) 1151void ds_resume_pebs(struct pebs_tracer *tracer)
831{ 1152{
832 1153
833} 1154}
834 1155
1156int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
1157{
1158 return 0;
1159}
1160
835const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) 1161const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
836{ 1162{
837 if (!tracer) 1163 if (!tracer)
@@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
847 return NULL; 1173 return NULL;
848 1174
849 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); 1175 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
850 tracer->trace.reset_value = 1176
851 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); 1177 tracer->trace.counters = ds_cfg.nr_counter_reset;
1178 memcpy(tracer->trace.counter_reset,
1179 tracer->ds.context->ds +
1180 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
1181 ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
852 1182
853 return &tracer->trace; 1183 return &tracer->trace;
854} 1184}
@@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer)
873 1203
874 tracer->trace.ds.top = tracer->trace.ds.begin; 1204 tracer->trace.ds.top = tracer->trace.ds.begin;
875 1205
876 ds_set(tracer->ds.context->ds, ds_bts, ds_index, 1206 ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
877 (unsigned long)tracer->trace.ds.top); 1207 (unsigned long)tracer->trace.ds.top);
878 1208
879 return 0; 1209 return 0;
880} 1210}
881 1211
882int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) 1212int ds_set_pebs_reset(struct pebs_tracer *tracer,
1213 unsigned int counter, u64 value)
883{ 1214{
884 if (!tracer) 1215 if (!tracer)
885 return -EINVAL; 1216 return -EINVAL;
886 1217
887 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; 1218 if (ds_cfg.nr_counter_reset < counter)
1219 return -EINVAL;
1220
1221 *(u64 *)(tracer->ds.context->ds +
1222 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
1223 (counter * PEBS_RESET_FIELD_SIZE)) = value;
888 1224
889 return 0; 1225 return 0;
890} 1226}
@@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = {
894 .ctl[dsf_bts] = (1 << 2) | (1 << 3), 1230 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
895 .ctl[dsf_bts_kernel] = (1 << 5), 1231 .ctl[dsf_bts_kernel] = (1 << 5),
896 .ctl[dsf_bts_user] = (1 << 6), 1232 .ctl[dsf_bts_user] = (1 << 6),
897 1233 .nr_counter_reset = 1,
898 .sizeof_field = sizeof(long),
899 .sizeof_rec[ds_bts] = sizeof(long) * 3,
900#ifdef __i386__
901 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
902#else
903 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
904#endif
905}; 1234};
906static const struct ds_configuration ds_cfg_pentium_m = { 1235static const struct ds_configuration ds_cfg_pentium_m = {
907 .name = "Pentium M", 1236 .name = "Pentium M",
908 .ctl[dsf_bts] = (1 << 6) | (1 << 7), 1237 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
909 1238 .nr_counter_reset = 1,
910 .sizeof_field = sizeof(long),
911 .sizeof_rec[ds_bts] = sizeof(long) * 3,
912#ifdef __i386__
913 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
914#else
915 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
916#endif
917}; 1239};
918static const struct ds_configuration ds_cfg_core2_atom = { 1240static const struct ds_configuration ds_cfg_core2_atom = {
919 .name = "Core 2/Atom", 1241 .name = "Core 2/Atom",
920 .ctl[dsf_bts] = (1 << 6) | (1 << 7), 1242 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
921 .ctl[dsf_bts_kernel] = (1 << 9), 1243 .ctl[dsf_bts_kernel] = (1 << 9),
922 .ctl[dsf_bts_user] = (1 << 10), 1244 .ctl[dsf_bts_user] = (1 << 10),
923 1245 .nr_counter_reset = 1,
924 .sizeof_field = 8, 1246};
925 .sizeof_rec[ds_bts] = 8 * 3, 1247static const struct ds_configuration ds_cfg_core_i7 = {
926 .sizeof_rec[ds_pebs] = 8 * 18, 1248 .name = "Core i7",
1249 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1250 .ctl[dsf_bts_kernel] = (1 << 9),
1251 .ctl[dsf_bts_user] = (1 << 10),
1252 .nr_counter_reset = 4,
927}; 1253};
928 1254
929static void 1255static void
930ds_configure(const struct ds_configuration *cfg) 1256ds_configure(const struct ds_configuration *cfg,
1257 struct cpuinfo_x86 *cpu)
931{ 1258{
1259 unsigned long nr_pebs_fields = 0;
1260
1261 printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
1262
1263#ifdef __i386__
1264 nr_pebs_fields = 10;
1265#else
1266 nr_pebs_fields = 18;
1267#endif
1268
1269 /*
1270 * Starting with version 2, architectural performance
1271 * monitoring supports a format specifier.
1272 */
1273 if ((cpuid_eax(0xa) & 0xff) > 1) {
1274 unsigned long perf_capabilities, format;
1275
1276 rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
1277
1278 format = (perf_capabilities >> 8) & 0xf;
1279
1280 switch (format) {
1281 case 0:
1282 nr_pebs_fields = 18;
1283 break;
1284 case 1:
1285 nr_pebs_fields = 22;
1286 break;
1287 default:
1288 printk(KERN_INFO
1289 "[ds] unknown PEBS format: %lu\n", format);
1290 nr_pebs_fields = 0;
1291 break;
1292 }
1293 }
1294
932 memset(&ds_cfg, 0, sizeof(ds_cfg)); 1295 memset(&ds_cfg, 0, sizeof(ds_cfg));
933 ds_cfg = *cfg; 1296 ds_cfg = *cfg;
934 1297
935 printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); 1298 ds_cfg.sizeof_ptr_field =
1299 (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
1300
1301 ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
1302 ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
936 1303
937 if (!cpu_has_bts) { 1304 if (!cpu_has(cpu, X86_FEATURE_BTS)) {
938 ds_cfg.ctl[dsf_bts] = 0; 1305 ds_cfg.sizeof_rec[ds_bts] = 0;
939 printk(KERN_INFO "[ds] bts not available\n"); 1306 printk(KERN_INFO "[ds] bts not available\n");
940 } 1307 }
941 if (!cpu_has_pebs) 1308 if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
1309 ds_cfg.sizeof_rec[ds_pebs] = 0;
942 printk(KERN_INFO "[ds] pebs not available\n"); 1310 printk(KERN_INFO "[ds] pebs not available\n");
1311 }
1312
1313 printk(KERN_INFO "[ds] sizes: address: %u bit, ",
1314 8 * ds_cfg.sizeof_ptr_field);
1315 printk("bts/pebs record: %u/%u bytes\n",
1316 ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
943 1317
944 WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); 1318 WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
945} 1319}
946 1320
947void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 1321void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
948{ 1322{
1323 /* Only configure the first cpu. Others are identical. */
1324 if (ds_cfg.name)
1325 return;
1326
949 switch (c->x86) { 1327 switch (c->x86) {
950 case 0x6: 1328 case 0x6:
951 switch (c->x86_model) { 1329 switch (c->x86_model) {
952 case 0x9: 1330 case 0x9:
953 case 0xd: /* Pentium M */ 1331 case 0xd: /* Pentium M */
954 ds_configure(&ds_cfg_pentium_m); 1332 ds_configure(&ds_cfg_pentium_m, c);
955 break; 1333 break;
956 case 0xf: 1334 case 0xf:
957 case 0x17: /* Core2 */ 1335 case 0x17: /* Core2 */
958 case 0x1c: /* Atom */ 1336 case 0x1c: /* Atom */
959 ds_configure(&ds_cfg_core2_atom); 1337 ds_configure(&ds_cfg_core2_atom, c);
1338 break;
1339 case 0x1a: /* Core i7 */
1340 ds_configure(&ds_cfg_core_i7, c);
960 break; 1341 break;
961 case 0x1a: /* i7 */
962 default: 1342 default:
963 /* sorry, don't know about them */ 1343 /* Sorry, don't know about them. */
964 break; 1344 break;
965 } 1345 }
966 break; 1346 break;
@@ -969,64 +1349,89 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
969 case 0x0: 1349 case 0x0:
970 case 0x1: 1350 case 0x1:
971 case 0x2: /* Netburst */ 1351 case 0x2: /* Netburst */
972 ds_configure(&ds_cfg_netburst); 1352 ds_configure(&ds_cfg_netburst, c);
973 break; 1353 break;
974 default: 1354 default:
975 /* sorry, don't know about them */ 1355 /* Sorry, don't know about them. */
976 break; 1356 break;
977 } 1357 }
978 break; 1358 break;
979 default: 1359 default:
980 /* sorry, don't know about them */ 1360 /* Sorry, don't know about them. */
981 break; 1361 break;
982 } 1362 }
983} 1363}
984 1364
1365static inline void ds_take_timestamp(struct ds_context *context,
1366 enum bts_qualifier qualifier,
1367 struct task_struct *task)
1368{
1369 struct bts_tracer *tracer = context->bts_master;
1370 struct bts_struct ts;
1371
1372 /* Prevent compilers from reading the tracer pointer twice. */
1373 barrier();
1374
1375 if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
1376 return;
1377
1378 memset(&ts, 0, sizeof(ts));
1379 ts.qualifier = qualifier;
1380 ts.variant.event.clock = trace_clock_global();
1381 ts.variant.event.pid = task->pid;
1382
1383 bts_write(tracer, &ts);
1384}
1385
985/* 1386/*
986 * Change the DS configuration from tracing prev to tracing next. 1387 * Change the DS configuration from tracing prev to tracing next.
987 */ 1388 */
988void ds_switch_to(struct task_struct *prev, struct task_struct *next) 1389void ds_switch_to(struct task_struct *prev, struct task_struct *next)
989{ 1390{
990 struct ds_context *prev_ctx = prev->thread.ds_ctx; 1391 struct ds_context *prev_ctx = prev->thread.ds_ctx;
991 struct ds_context *next_ctx = next->thread.ds_ctx; 1392 struct ds_context *next_ctx = next->thread.ds_ctx;
1393 unsigned long debugctlmsr = next->thread.debugctlmsr;
1394
1395 /* Make sure all data is read before we start. */
1396 barrier();
992 1397
993 if (prev_ctx) { 1398 if (prev_ctx) {
994 update_debugctlmsr(0); 1399 update_debugctlmsr(0);
995 1400
996 if (prev_ctx->bts_master && 1401 ds_take_timestamp(prev_ctx, bts_task_departs, prev);
997 (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
998 struct bts_struct ts = {
999 .qualifier = bts_task_departs,
1000 .variant.timestamp.jiffies = jiffies_64,
1001 .variant.timestamp.pid = prev->pid
1002 };
1003 bts_write(prev_ctx->bts_master, &ts);
1004 }
1005 } 1402 }
1006 1403
1007 if (next_ctx) { 1404 if (next_ctx) {
1008 if (next_ctx->bts_master && 1405 ds_take_timestamp(next_ctx, bts_task_arrives, next);
1009 (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1010 struct bts_struct ts = {
1011 .qualifier = bts_task_arrives,
1012 .variant.timestamp.jiffies = jiffies_64,
1013 .variant.timestamp.pid = next->pid
1014 };
1015 bts_write(next_ctx->bts_master, &ts);
1016 }
1017 1406
1018 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); 1407 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1019 } 1408 }
1020 1409
1021 update_debugctlmsr(next->thread.debugctlmsr); 1410 update_debugctlmsr(debugctlmsr);
1022} 1411}
1023 1412
1024void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) 1413static __init int ds_selftest(void)
1025{ 1414{
1026 clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); 1415 if (ds_cfg.sizeof_rec[ds_bts]) {
1027 tsk->thread.ds_ctx = NULL; 1416 int error;
1028}
1029 1417
1030void ds_exit_thread(struct task_struct *tsk) 1418 error = ds_selftest_bts();
1031{ 1419 if (error) {
1420 WARN(1, "[ds] selftest failed. disabling bts.\n");
1421 ds_cfg.sizeof_rec[ds_bts] = 0;
1422 }
1423 }
1424
1425 if (ds_cfg.sizeof_rec[ds_pebs]) {
1426 int error;
1427
1428 error = ds_selftest_pebs();
1429 if (error) {
1430 WARN(1, "[ds] selftest failed. disabling pebs.\n");
1431 ds_cfg.sizeof_rec[ds_pebs] = 0;
1432 }
1433 }
1434
1435 return 0;
1032} 1436}
1437device_initcall(ds_selftest);
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
new file mode 100644
index 000000000000..6bc7c199ab99
--- /dev/null
+++ b/arch/x86/kernel/ds_selftest.c
@@ -0,0 +1,408 @@
1/*
2 * Debug Store support - selftest
3 *
4 *
5 * Copyright (C) 2009 Intel Corporation.
6 * Markus Metzger <markus.t.metzger@intel.com>, 2009
7 */
8
9#include "ds_selftest.h"
10
11#include <linux/kernel.h>
12#include <linux/string.h>
13#include <linux/smp.h>
14#include <linux/cpu.h>
15
16#include <asm/ds.h>
17
18
19#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */
20#define SMALL_BUFFER_SIZE 24 /* A single bts entry. */
21
22struct ds_selftest_bts_conf {
23 struct bts_tracer *tracer;
24 int error;
25 int (*suspend)(struct bts_tracer *);
26 int (*resume)(struct bts_tracer *);
27};
28
29static int ds_selftest_bts_consistency(const struct bts_trace *trace)
30{
31 int error = 0;
32
33 if (!trace) {
34 printk(KERN_CONT "failed to access trace...");
35 /* Bail out. Other tests are pointless. */
36 return -1;
37 }
38
39 if (!trace->read) {
40 printk(KERN_CONT "bts read not available...");
41 error = -1;
42 }
43
44 /* Do some sanity checks on the trace configuration. */
45 if (!trace->ds.n) {
46 printk(KERN_CONT "empty bts buffer...");
47 error = -1;
48 }
49 if (!trace->ds.size) {
50 printk(KERN_CONT "bad bts trace setup...");
51 error = -1;
52 }
53 if (trace->ds.end !=
54 (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
55 printk(KERN_CONT "bad bts buffer setup...");
56 error = -1;
57 }
58 /*
59 * We allow top in [begin; end], since its not clear when the
60 * overflow adjustment happens: after the increment or before the
61 * write.
62 */
63 if ((trace->ds.top < trace->ds.begin) ||
64 (trace->ds.end < trace->ds.top)) {
65 printk(KERN_CONT "bts top out of bounds...");
66 error = -1;
67 }
68
69 return error;
70}
71
72static int ds_selftest_bts_read(struct bts_tracer *tracer,
73 const struct bts_trace *trace,
74 const void *from, const void *to)
75{
76 const unsigned char *at;
77
78 /*
79 * Check a few things which do not belong to this test.
80 * They should be covered by other tests.
81 */
82 if (!trace)
83 return -1;
84
85 if (!trace->read)
86 return -1;
87
88 if (to < from)
89 return -1;
90
91 if (from < trace->ds.begin)
92 return -1;
93
94 if (trace->ds.end < to)
95 return -1;
96
97 if (!trace->ds.size)
98 return -1;
99
100 /* Now to the test itself. */
101 for (at = from; (void *)at < to; at += trace->ds.size) {
102 struct bts_struct bts;
103 unsigned long index;
104 int error;
105
106 if (((void *)at - trace->ds.begin) % trace->ds.size) {
107 printk(KERN_CONT
108 "read from non-integer index...");
109 return -1;
110 }
111 index = ((void *)at - trace->ds.begin) / trace->ds.size;
112
113 memset(&bts, 0, sizeof(bts));
114 error = trace->read(tracer, at, &bts);
115 if (error < 0) {
116 printk(KERN_CONT
117 "error reading bts trace at [%lu] (0x%p)...",
118 index, at);
119 return error;
120 }
121
122 switch (bts.qualifier) {
123 case BTS_BRANCH:
124 break;
125 default:
126 printk(KERN_CONT
127 "unexpected bts entry %llu at [%lu] (0x%p)...",
128 bts.qualifier, index, at);
129 return -1;
130 }
131 }
132
133 return 0;
134}
135
136static void ds_selftest_bts_cpu(void *arg)
137{
138 struct ds_selftest_bts_conf *conf = arg;
139 const struct bts_trace *trace;
140 void *top;
141
142 if (IS_ERR(conf->tracer)) {
143 conf->error = PTR_ERR(conf->tracer);
144 conf->tracer = NULL;
145
146 printk(KERN_CONT
147 "initialization failed (err: %d)...", conf->error);
148 return;
149 }
150
151 /* We should meanwhile have enough trace. */
152 conf->error = conf->suspend(conf->tracer);
153 if (conf->error < 0)
154 return;
155
156 /* Let's see if we can access the trace. */
157 trace = ds_read_bts(conf->tracer);
158
159 conf->error = ds_selftest_bts_consistency(trace);
160 if (conf->error < 0)
161 return;
162
163 /* If everything went well, we should have a few trace entries. */
164 if (trace->ds.top == trace->ds.begin) {
165 /*
166 * It is possible but highly unlikely that we got a
167 * buffer overflow and end up at exactly the same
168 * position we started from.
169 * Let's issue a warning, but continue.
170 */
171 printk(KERN_CONT "no trace/overflow...");
172 }
173
174 /* Let's try to read the trace we collected. */
175 conf->error =
176 ds_selftest_bts_read(conf->tracer, trace,
177 trace->ds.begin, trace->ds.top);
178 if (conf->error < 0)
179 return;
180
181 /*
182 * Let's read the trace again.
183 * Since we suspended tracing, we should get the same result.
184 */
185 top = trace->ds.top;
186
187 trace = ds_read_bts(conf->tracer);
188 conf->error = ds_selftest_bts_consistency(trace);
189 if (conf->error < 0)
190 return;
191
192 if (top != trace->ds.top) {
193 printk(KERN_CONT "suspend not working...");
194 conf->error = -1;
195 return;
196 }
197
198 /* Let's collect some more trace - see if resume is working. */
199 conf->error = conf->resume(conf->tracer);
200 if (conf->error < 0)
201 return;
202
203 conf->error = conf->suspend(conf->tracer);
204 if (conf->error < 0)
205 return;
206
207 trace = ds_read_bts(conf->tracer);
208
209 conf->error = ds_selftest_bts_consistency(trace);
210 if (conf->error < 0)
211 return;
212
213 if (trace->ds.top == top) {
214 /*
215 * It is possible but highly unlikely that we got a
216 * buffer overflow and end up at exactly the same
217 * position we started from.
218 * Let's issue a warning and check the full trace.
219 */
220 printk(KERN_CONT
221 "no resume progress/overflow...");
222
223 conf->error =
224 ds_selftest_bts_read(conf->tracer, trace,
225 trace->ds.begin, trace->ds.end);
226 } else if (trace->ds.top < top) {
227 /*
228 * We had a buffer overflow - the entire buffer should
229 * contain trace records.
230 */
231 conf->error =
232 ds_selftest_bts_read(conf->tracer, trace,
233 trace->ds.begin, trace->ds.end);
234 } else {
235 /*
236 * It is quite likely that the buffer did not overflow.
237 * Let's just check the delta trace.
238 */
239 conf->error =
240 ds_selftest_bts_read(conf->tracer, trace, top,
241 trace->ds.top);
242 }
243 if (conf->error < 0)
244 return;
245
246 conf->error = 0;
247}
248
249static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
250{
251 ds_suspend_bts(tracer);
252 return 0;
253}
254
255static int ds_resume_bts_wrap(struct bts_tracer *tracer)
256{
257 ds_resume_bts(tracer);
258 return 0;
259}
260
261static void ds_release_bts_noirq_wrap(void *tracer)
262{
263 (void)ds_release_bts_noirq(tracer);
264}
265
266static int ds_selftest_bts_bad_release_noirq(int cpu,
267 struct bts_tracer *tracer)
268{
269 int error = -EPERM;
270
271 /* Try to release the tracer on the wrong cpu. */
272 get_cpu();
273 if (cpu != smp_processor_id()) {
274 error = ds_release_bts_noirq(tracer);
275 if (error != -EPERM)
276 printk(KERN_CONT "release on wrong cpu...");
277 }
278 put_cpu();
279
280 return error ? 0 : -1;
281}
282
283static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
284{
285 struct bts_tracer *tracer;
286 int error;
287
288 /* Try to request cpu tracing while task tracing is active. */
289 tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
290 (size_t)-1, BTS_KERNEL);
291 error = PTR_ERR(tracer);
292 if (!IS_ERR(tracer)) {
293 ds_release_bts(tracer);
294 error = 0;
295 }
296
297 if (error != -EPERM)
298 printk(KERN_CONT "cpu/task tracing overlap...");
299
300 return error ? 0 : -1;
301}
302
303static int ds_selftest_bts_bad_request_task(void *buffer)
304{
305 struct bts_tracer *tracer;
306 int error;
307
308 /* Try to request cpu tracing while task tracing is active. */
309 tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
310 (size_t)-1, BTS_KERNEL);
311 error = PTR_ERR(tracer);
312 if (!IS_ERR(tracer)) {
313 error = 0;
314 ds_release_bts(tracer);
315 }
316
317 if (error != -EPERM)
318 printk(KERN_CONT "task/cpu tracing overlap...");
319
320 return error ? 0 : -1;
321}
322
323int ds_selftest_bts(void)
324{
325 struct ds_selftest_bts_conf conf;
326 unsigned char buffer[BUFFER_SIZE], *small_buffer;
327 unsigned long irq;
328 int cpu;
329
330 printk(KERN_INFO "[ds] bts selftest...");
331 conf.error = 0;
332
333 small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
334
335 get_online_cpus();
336 for_each_online_cpu(cpu) {
337 conf.suspend = ds_suspend_bts_wrap;
338 conf.resume = ds_resume_bts_wrap;
339 conf.tracer =
340 ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
341 NULL, (size_t)-1, BTS_KERNEL);
342 ds_selftest_bts_cpu(&conf);
343 if (conf.error >= 0)
344 conf.error = ds_selftest_bts_bad_request_task(buffer);
345 ds_release_bts(conf.tracer);
346 if (conf.error < 0)
347 goto out;
348
349 conf.suspend = ds_suspend_bts_noirq;
350 conf.resume = ds_resume_bts_noirq;
351 conf.tracer =
352 ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
353 NULL, (size_t)-1, BTS_KERNEL);
354 smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
355 if (conf.error >= 0) {
356 conf.error =
357 ds_selftest_bts_bad_release_noirq(cpu,
358 conf.tracer);
359 /* We must not release the tracer twice. */
360 if (conf.error < 0)
361 conf.tracer = NULL;
362 }
363 if (conf.error >= 0)
364 conf.error = ds_selftest_bts_bad_request_task(buffer);
365 smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
366 conf.tracer, 1);
367 if (conf.error < 0)
368 goto out;
369 }
370
371 conf.suspend = ds_suspend_bts_wrap;
372 conf.resume = ds_resume_bts_wrap;
373 conf.tracer =
374 ds_request_bts_task(current, buffer, BUFFER_SIZE,
375 NULL, (size_t)-1, BTS_KERNEL);
376 ds_selftest_bts_cpu(&conf);
377 if (conf.error >= 0)
378 conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
379 ds_release_bts(conf.tracer);
380 if (conf.error < 0)
381 goto out;
382
383 conf.suspend = ds_suspend_bts_noirq;
384 conf.resume = ds_resume_bts_noirq;
385 conf.tracer =
386 ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
387 NULL, (size_t)-1, BTS_KERNEL);
388 local_irq_save(irq);
389 ds_selftest_bts_cpu(&conf);
390 if (conf.error >= 0)
391 conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
392 ds_release_bts_noirq(conf.tracer);
393 local_irq_restore(irq);
394 if (conf.error < 0)
395 goto out;
396
397 conf.error = 0;
398 out:
399 put_online_cpus();
400 printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
401
402 return conf.error;
403}
404
405int ds_selftest_pebs(void)
406{
407 return 0;
408}
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h
new file mode 100644
index 000000000000..2ba8745c6663
--- /dev/null
+++ b/arch/x86/kernel/ds_selftest.h
@@ -0,0 +1,15 @@
1/*
2 * Debug Store support - selftest
3 *
4 *
5 * Copyright (C) 2009 Intel Corporation.
6 * Markus Metzger <markus.t.metzger@intel.com>, 2009
7 */
8
9#ifdef CONFIG_X86_DS_SELFTEST
10extern int ds_selftest_bts(void);
11extern int ds_selftest_pebs(void);
12#else
13static inline int ds_selftest_bts(void) { return 0; }
14static inline int ds_selftest_pebs(void) { return 0; }
15#endif
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index da87590b8698..81086c227ab7 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -29,7 +29,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
29 unsigned long *sp, unsigned long bp, char *log_lvl); 29 unsigned long *sp, unsigned long bp, char *log_lvl);
30 30
31extern unsigned int code_bytes; 31extern unsigned int code_bytes;
32extern int kstack_depth_to_print;
33 32
34/* The form of the top of the frame on the stack */ 33/* The form of the top of the frame on the stack */
35struct stack_frame { 34struct stack_frame {
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 006281302925..7271fa33d791 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -617,7 +617,7 @@ __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
617 */ 617 */
618__init void e820_setup_gap(void) 618__init void e820_setup_gap(void)
619{ 619{
620 unsigned long gapstart, gapsize, round; 620 unsigned long gapstart, gapsize;
621 int found; 621 int found;
622 622
623 gapstart = 0x10000000; 623 gapstart = 0x10000000;
@@ -635,14 +635,9 @@ __init void e820_setup_gap(void)
635#endif 635#endif
636 636
637 /* 637 /*
638 * See how much we want to round up: start off with 638 * e820_reserve_resources_late protect stolen RAM already
639 * rounding to the next 1MB area.
640 */ 639 */
641 round = 0x100000; 640 pci_mem_start = gapstart;
642 while ((gapsize >> 4) > round)
643 round += round;
644 /* Fun with two's complement */
645 pci_mem_start = (gapstart + round) & -round;
646 641
647 printk(KERN_INFO 642 printk(KERN_INFO
648 "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", 643 "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
@@ -1371,6 +1366,23 @@ void __init e820_reserve_resources(void)
1371 } 1366 }
1372} 1367}
1373 1368
1369/* How much should we pad RAM ending depending on where it is? */
1370static unsigned long ram_alignment(resource_size_t pos)
1371{
1372 unsigned long mb = pos >> 20;
1373
1374 /* To 64kB in the first megabyte */
1375 if (!mb)
1376 return 64*1024;
1377
1378 /* To 1MB in the first 16MB */
1379 if (mb < 16)
1380 return 1024*1024;
1381
1382 /* To 32MB for anything above that */
1383 return 32*1024*1024;
1384}
1385
1374void __init e820_reserve_resources_late(void) 1386void __init e820_reserve_resources_late(void)
1375{ 1387{
1376 int i; 1388 int i;
@@ -1382,6 +1394,24 @@ void __init e820_reserve_resources_late(void)
1382 insert_resource_expand_to_fit(&iomem_resource, res); 1394 insert_resource_expand_to_fit(&iomem_resource, res);
1383 res++; 1395 res++;
1384 } 1396 }
1397
1398 /*
1399 * Try to bump up RAM regions to reasonable boundaries to
1400 * avoid stolen RAM:
1401 */
1402 for (i = 0; i < e820.nr_map; i++) {
1403 struct e820entry *entry = &e820_saved.map[i];
1404 resource_size_t start, end;
1405
1406 if (entry->type != E820_RAM)
1407 continue;
1408 start = entry->addr + entry->size;
1409 end = round_up(start, ram_alignment(start));
1410 if (start == end)
1411 continue;
1412 reserve_region_with_split(&iomem_resource, start,
1413 end - 1, "RAM buffer");
1414 }
1385} 1415}
1386 1416
1387char *__init default_machine_specific_memory_setup(void) 1417char *__init default_machine_specific_memory_setup(void)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 76b8cd953dee..ebdb85cf2686 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -97,6 +97,7 @@ static void __init nvidia_bugs(int num, int slot, int func)
97} 97}
98 98
99#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) 99#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
100#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
100static u32 __init ati_ixp4x0_rev(int num, int slot, int func) 101static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
101{ 102{
102 u32 d; 103 u32 d;
@@ -114,6 +115,7 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
114 d &= 0xff; 115 d &= 0xff;
115 return d; 116 return d;
116} 117}
118#endif
117 119
118static void __init ati_bugs(int num, int slot, int func) 120static void __init ati_bugs(int num, int slot, int func)
119{ 121{
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38946c6e8433..1c17d7c751a4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -147,27 +147,14 @@ END(ftrace_graph_caller)
147GLOBAL(return_to_handler) 147GLOBAL(return_to_handler)
148 subq $80, %rsp 148 subq $80, %rsp
149 149
150 /* Save the return values */
150 movq %rax, (%rsp) 151 movq %rax, (%rsp)
151 movq %rcx, 8(%rsp) 152 movq %rdx, 8(%rsp)
152 movq %rdx, 16(%rsp)
153 movq %rsi, 24(%rsp)
154 movq %rdi, 32(%rsp)
155 movq %r8, 40(%rsp)
156 movq %r9, 48(%rsp)
157 movq %r10, 56(%rsp)
158 movq %r11, 64(%rsp)
159 153
160 call ftrace_return_to_handler 154 call ftrace_return_to_handler
161 155
162 movq %rax, 72(%rsp) 156 movq %rax, 72(%rsp)
163 movq 64(%rsp), %r11 157 movq 8(%rsp), %rdx
164 movq 56(%rsp), %r10
165 movq 48(%rsp), %r9
166 movq 40(%rsp), %r8
167 movq 32(%rsp), %rdi
168 movq 24(%rsp), %rsi
169 movq 16(%rsp), %rdx
170 movq 8(%rsp), %rcx
171 movq (%rsp), %rax 158 movq (%rsp), %rax
172 addq $72, %rsp 159 addq $72, %rsp
173 retq 160 retq
@@ -1379,6 +1366,11 @@ END(xen_failsafe_callback)
1379paranoidzeroentry_ist debug do_debug DEBUG_STACK 1366paranoidzeroentry_ist debug do_debug DEBUG_STACK
1380paranoidzeroentry_ist int3 do_int3 DEBUG_STACK 1367paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1381paranoiderrorentry stack_segment do_stack_segment 1368paranoiderrorentry stack_segment do_stack_segment
1369#ifdef CONFIG_XEN
1370zeroentry xen_debug do_debug
1371zeroentry xen_int3 do_int3
1372errorentry xen_stack_segment do_stack_segment
1373#endif
1382errorentry general_protection do_general_protection 1374errorentry general_protection do_general_protection
1383errorentry page_fault do_page_fault 1375errorentry page_fault do_page_fault
1384#ifdef CONFIG_X86_MCE 1376#ifdef CONFIG_X86_MCE
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 30683883e0cd..dc5ed4bdd88d 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -608,13 +608,6 @@ ignore_int:
608ENTRY(initial_code) 608ENTRY(initial_code)
609 .long i386_start_kernel 609 .long i386_start_kernel
610 610
611.section .text
612/*
613 * Real beginning of normal "text" segment
614 */
615ENTRY(stext)
616ENTRY(_stext)
617
618/* 611/*
619 * BSS section 612 * BSS section
620 */ 613 */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c3fe010d74c8..9a391bbb8ba8 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -12,6 +12,7 @@
12#include <asm/io_apic.h> 12#include <asm/io_apic.h>
13#include <asm/irq.h> 13#include <asm/irq.h>
14#include <asm/idle.h> 14#include <asm/idle.h>
15#include <asm/hw_irq.h>
15 16
16atomic_t irq_err_count; 17atomic_t irq_err_count;
17 18
@@ -24,9 +25,9 @@ void (*generic_interrupt_extension)(void) = NULL;
24 */ 25 */
25void ack_bad_irq(unsigned int irq) 26void ack_bad_irq(unsigned int irq)
26{ 27{
27 printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); 28 if (printk_ratelimit())
29 pr_err("unexpected IRQ trap at vector %02x\n", irq);
28 30
29#ifdef CONFIG_X86_LOCAL_APIC
30 /* 31 /*
31 * Currently unexpected vectors happen only on SMP and APIC. 32 * Currently unexpected vectors happen only on SMP and APIC.
32 * We _must_ ack these because every local APIC has only N 33 * We _must_ ack these because every local APIC has only N
@@ -36,9 +37,7 @@ void ack_bad_irq(unsigned int irq)
36 * completely. 37 * completely.
37 * But only ack when the APIC is enabled -AK 38 * But only ack when the APIC is enabled -AK
38 */ 39 */
39 if (cpu_has_apic) 40 ack_APIC_irq();
40 ack_APIC_irq();
41#endif
42} 41}
43 42
44#define irq_stats(x) (&per_cpu(irq_stat, x)) 43#define irq_stats(x) (&per_cpu(irq_stat, x))
@@ -178,7 +177,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
178 sum += irq_stats(cpu)->irq_thermal_count; 177 sum += irq_stats(cpu)->irq_thermal_count;
179# ifdef CONFIG_X86_64 178# ifdef CONFIG_X86_64
180 sum += irq_stats(cpu)->irq_threshold_count; 179 sum += irq_stats(cpu)->irq_threshold_count;
181#endif 180# endif
182#endif 181#endif
183 return sum; 182 return sum;
184} 183}
@@ -213,14 +212,11 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
213 irq = __get_cpu_var(vector_irq)[vector]; 212 irq = __get_cpu_var(vector_irq)[vector];
214 213
215 if (!handle_irq(irq, regs)) { 214 if (!handle_irq(irq, regs)) {
216#ifdef CONFIG_X86_64 215 ack_APIC_irq();
217 if (!disable_apic)
218 ack_APIC_irq();
219#endif
220 216
221 if (printk_ratelimit()) 217 if (printk_ratelimit())
222 printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", 218 pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
223 __func__, smp_processor_id(), vector, irq); 219 __func__, smp_processor_id(), vector, irq);
224 } 220 }
225 221
226 irq_exit(); 222 irq_exit();
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit.c
index 368b0a8836f9..2e08b10ad51a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit.c
@@ -1,20 +1,25 @@
1#include <linux/linkage.h>
1#include <linux/errno.h> 2#include <linux/errno.h>
2#include <linux/signal.h> 3#include <linux/signal.h>
3#include <linux/sched.h> 4#include <linux/sched.h>
4#include <linux/ioport.h> 5#include <linux/ioport.h>
5#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/timex.h>
6#include <linux/slab.h> 8#include <linux/slab.h>
7#include <linux/random.h> 9#include <linux/random.h>
10#include <linux/kprobes.h>
8#include <linux/init.h> 11#include <linux/init.h>
9#include <linux/kernel_stat.h> 12#include <linux/kernel_stat.h>
10#include <linux/sysdev.h> 13#include <linux/sysdev.h>
11#include <linux/bitops.h> 14#include <linux/bitops.h>
15#include <linux/acpi.h>
12#include <linux/io.h> 16#include <linux/io.h>
13#include <linux/delay.h> 17#include <linux/delay.h>
14 18
15#include <asm/atomic.h> 19#include <asm/atomic.h>
16#include <asm/system.h> 20#include <asm/system.h>
17#include <asm/timer.h> 21#include <asm/timer.h>
22#include <asm/hw_irq.h>
18#include <asm/pgtable.h> 23#include <asm/pgtable.h>
19#include <asm/desc.h> 24#include <asm/desc.h>
20#include <asm/apic.h> 25#include <asm/apic.h>
@@ -22,7 +27,23 @@
22#include <asm/i8259.h> 27#include <asm/i8259.h>
23#include <asm/traps.h> 28#include <asm/traps.h>
24 29
30/*
31 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
32 * (these are usually mapped to vectors 0x30-0x3f)
33 */
34
35/*
36 * The IO-APIC gives us many more interrupt sources. Most of these
37 * are unused but an SMP system is supposed to have enough memory ...
38 * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
39 * across the spectrum, so we really want to be prepared to get all
40 * of these. Plus, more powerful systems might have more than 64
41 * IO-APIC registers.
42 *
43 * (these are usually mapped into the 0x30-0xff vector range)
44 */
25 45
46#ifdef CONFIG_X86_32
26/* 47/*
27 * Note that on a 486, we don't want to do a SIGFPE on an irq13 48 * Note that on a 486, we don't want to do a SIGFPE on an irq13
28 * as the irq is unreliable, and exception 16 works correctly 49 * as the irq is unreliable, and exception 16 works correctly
@@ -52,30 +73,7 @@ static struct irqaction fpu_irq = {
52 .handler = math_error_irq, 73 .handler = math_error_irq,
53 .name = "fpu", 74 .name = "fpu",
54}; 75};
55
56void __init init_ISA_irqs(void)
57{
58 int i;
59
60#ifdef CONFIG_X86_LOCAL_APIC
61 init_bsp_APIC();
62#endif 76#endif
63 init_8259A(0);
64
65 /*
66 * 16 old-style INTA-cycle interrupts:
67 */
68 for (i = 0; i < NR_IRQS_LEGACY; i++) {
69 struct irq_desc *desc = irq_to_desc(i);
70
71 desc->status = IRQ_DISABLED;
72 desc->action = NULL;
73 desc->depth = 1;
74
75 set_irq_chip_and_handler_name(i, &i8259A_chip,
76 handle_level_irq, "XT");
77 }
78}
79 77
80/* 78/*
81 * IRQ2 is cascade interrupt to second interrupt controller 79 * IRQ2 is cascade interrupt to second interrupt controller
@@ -118,29 +116,37 @@ int vector_used_by_percpu_irq(unsigned int vector)
118 return 0; 116 return 0;
119} 117}
120 118
121/* Overridden in paravirt.c */ 119static void __init init_ISA_irqs(void)
122void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
123
124void __init native_init_IRQ(void)
125{ 120{
126 int i; 121 int i;
127 122
128 /* Execute any quirks before the call gates are initialised: */ 123#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
129 x86_quirk_pre_intr_init(); 124 init_bsp_APIC();
125#endif
126 init_8259A(0);
130 127
131 /* 128 /*
132 * Cover the whole vector space, no vector can escape 129 * 16 old-style INTA-cycle interrupts:
133 * us. (some of these will be overridden and become
134 * 'special' SMP interrupts)
135 */ 130 */
136 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { 131 for (i = 0; i < NR_IRQS_LEGACY; i++) {
137 /* SYSCALL_VECTOR was reserved in trap_init. */ 132 struct irq_desc *desc = irq_to_desc(i);
138 if (i != SYSCALL_VECTOR) 133
139 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); 134 desc->status = IRQ_DISABLED;
135 desc->action = NULL;
136 desc->depth = 1;
137
138 set_irq_chip_and_handler_name(i, &i8259A_chip,
139 handle_level_irq, "XT");
140 } 140 }
141}
141 142
143/* Overridden in paravirt.c */
144void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
142 145
143#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) 146static void __init smp_intr_init(void)
147{
148#ifdef CONFIG_SMP
149#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
144 /* 150 /*
145 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 151 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
146 * IPI, driven by wakeup. 152 * IPI, driven by wakeup.
@@ -160,16 +166,27 @@ void __init native_init_IRQ(void)
160 /* IPI for generic function call */ 166 /* IPI for generic function call */
161 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 167 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
162 168
163 /* IPI for single call function */ 169 /* IPI for generic single function call */
164 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, 170 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
165 call_function_single_interrupt); 171 call_function_single_interrupt);
166 172
167 /* Low priority IPI to cleanup after moving an irq */ 173 /* Low priority IPI to cleanup after moving an irq */
168 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 174 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
169 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); 175 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
170#endif 176#endif
177#endif /* CONFIG_SMP */
178}
179
180static void __init apic_intr_init(void)
181{
182 smp_intr_init();
183
184#ifdef CONFIG_X86_64
185 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
186 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
187#endif
171 188
172#ifdef CONFIG_X86_LOCAL_APIC 189#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
173 /* self generated IPI for local APIC timer */ 190 /* self generated IPI for local APIC timer */
174 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); 191 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
175 192
@@ -179,16 +196,67 @@ void __init native_init_IRQ(void)
179 /* IPI vectors for APIC spurious and error interrupts */ 196 /* IPI vectors for APIC spurious and error interrupts */
180 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 197 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
181 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 198 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
199
200 /* Performance monitoring interrupts: */
201# ifdef CONFIG_PERF_COUNTERS
202 alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
203 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
204# endif
205
182#endif 206#endif
183 207
208#ifdef CONFIG_X86_32
184#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) 209#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
185 /* thermal monitor LVT interrupt */ 210 /* thermal monitor LVT interrupt */
186 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 211 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
187#endif 212#endif
213#endif
214}
215
216/**
217 * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
218 *
219 * Description:
220 * Perform any necessary interrupt initialisation prior to setting up
221 * the "ordinary" interrupt call gates. For legacy reasons, the ISA
222 * interrupts should be initialised here if the machine emulates a PC
223 * in any way.
224 **/
225static void __init x86_quirk_pre_intr_init(void)
226{
227#ifdef CONFIG_X86_32
228 if (x86_quirks->arch_pre_intr_init) {
229 if (x86_quirks->arch_pre_intr_init())
230 return;
231 }
232#endif
233 init_ISA_irqs();
234}
235
236void __init native_init_IRQ(void)
237{
238 int i;
239
240 /* Execute any quirks before the call gates are initialised: */
241 x86_quirk_pre_intr_init();
242
243 apic_intr_init();
244
245 /*
246 * Cover the whole vector space, no vector can escape
247 * us. (some of these will be overridden and become
248 * 'special' SMP interrupts)
249 */
250 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
251 /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
252 if (!test_bit(i, used_vectors))
253 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
254 }
188 255
189 if (!acpi_ioapic) 256 if (!acpi_ioapic)
190 setup_irq(2, &irq2); 257 setup_irq(2, &irq2);
191 258
259#ifdef CONFIG_X86_32
192 /* 260 /*
193 * Call quirks after call gates are initialised (usually add in 261 * Call quirks after call gates are initialised (usually add in
194 * the architecture specific gates): 262 * the architecture specific gates):
@@ -203,4 +271,5 @@ void __init native_init_IRQ(void)
203 setup_irq(FPU_IRQ, &fpu_irq); 271 setup_irq(FPU_IRQ, &fpu_irq);
204 272
205 irq_ctx_init(smp_processor_id()); 273 irq_ctx_init(smp_processor_id());
274#endif
206} 275}
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
deleted file mode 100644
index 8cd10537fd46..000000000000
--- a/arch/x86/kernel/irqinit_64.c
+++ /dev/null
@@ -1,177 +0,0 @@
1#include <linux/linkage.h>
2#include <linux/errno.h>
3#include <linux/signal.h>
4#include <linux/sched.h>
5#include <linux/ioport.h>
6#include <linux/interrupt.h>
7#include <linux/timex.h>
8#include <linux/slab.h>
9#include <linux/random.h>
10#include <linux/init.h>
11#include <linux/kernel_stat.h>
12#include <linux/sysdev.h>
13#include <linux/bitops.h>
14#include <linux/acpi.h>
15#include <linux/io.h>
16#include <linux/delay.h>
17
18#include <asm/atomic.h>
19#include <asm/system.h>
20#include <asm/hw_irq.h>
21#include <asm/pgtable.h>
22#include <asm/desc.h>
23#include <asm/apic.h>
24#include <asm/i8259.h>
25
26/*
27 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
28 * (these are usually mapped to vectors 0x30-0x3f)
29 */
30
31/*
32 * The IO-APIC gives us many more interrupt sources. Most of these
33 * are unused but an SMP system is supposed to have enough memory ...
34 * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
35 * across the spectrum, so we really want to be prepared to get all
36 * of these. Plus, more powerful systems might have more than 64
37 * IO-APIC registers.
38 *
39 * (these are usually mapped into the 0x30-0xff vector range)
40 */
41
42/*
43 * IRQ2 is cascade interrupt to second interrupt controller
44 */
45
46static struct irqaction irq2 = {
47 .handler = no_action,
48 .name = "cascade",
49};
50DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
51 [0 ... IRQ0_VECTOR - 1] = -1,
52 [IRQ0_VECTOR] = 0,
53 [IRQ1_VECTOR] = 1,
54 [IRQ2_VECTOR] = 2,
55 [IRQ3_VECTOR] = 3,
56 [IRQ4_VECTOR] = 4,
57 [IRQ5_VECTOR] = 5,
58 [IRQ6_VECTOR] = 6,
59 [IRQ7_VECTOR] = 7,
60 [IRQ8_VECTOR] = 8,
61 [IRQ9_VECTOR] = 9,
62 [IRQ10_VECTOR] = 10,
63 [IRQ11_VECTOR] = 11,
64 [IRQ12_VECTOR] = 12,
65 [IRQ13_VECTOR] = 13,
66 [IRQ14_VECTOR] = 14,
67 [IRQ15_VECTOR] = 15,
68 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
69};
70
71int vector_used_by_percpu_irq(unsigned int vector)
72{
73 int cpu;
74
75 for_each_online_cpu(cpu) {
76 if (per_cpu(vector_irq, cpu)[vector] != -1)
77 return 1;
78 }
79
80 return 0;
81}
82
83static void __init init_ISA_irqs(void)
84{
85 int i;
86
87 init_bsp_APIC();
88 init_8259A(0);
89
90 for (i = 0; i < NR_IRQS_LEGACY; i++) {
91 struct irq_desc *desc = irq_to_desc(i);
92
93 desc->status = IRQ_DISABLED;
94 desc->action = NULL;
95 desc->depth = 1;
96
97 /*
98 * 16 old-style INTA-cycle interrupts:
99 */
100 set_irq_chip_and_handler_name(i, &i8259A_chip,
101 handle_level_irq, "XT");
102 }
103}
104
105void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
106
107static void __init smp_intr_init(void)
108{
109#ifdef CONFIG_SMP
110 /*
111 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
112 * IPI, driven by wakeup.
113 */
114 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
115
116 /* IPIs for invalidation */
117 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
118 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
119 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
120 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
121 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
122 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
123 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
124 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
125
126 /* IPI for generic function call */
127 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
128
129 /* IPI for generic single function call */
130 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
131 call_function_single_interrupt);
132
133 /* Low priority IPI to cleanup after moving an irq */
134 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
135 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
136#endif
137}
138
139static void __init apic_intr_init(void)
140{
141 smp_intr_init();
142
143 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
144 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
145
146 /* self generated IPI for local APIC timer */
147 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
148
149 /* generic IPI for platform specific use */
150 alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
151
152 /* IPI vectors for APIC spurious and error interrupts */
153 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
154 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
155}
156
157void __init native_init_IRQ(void)
158{
159 int i;
160
161 init_ISA_irqs();
162 /*
163 * Cover the whole vector space, no vector can escape
164 * us. (some of these will be overridden and become
165 * 'special' SMP interrupts)
166 */
167 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
168 int vector = FIRST_EXTERNAL_VECTOR + i;
169 if (vector != IA32_SYSCALL_VECTOR)
170 set_intr_gate(vector, interrupt[i]);
171 }
172
173 apic_intr_init();
174
175 if (!acpi_ioapic)
176 setup_irq(2, &irq2);
177}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index b1f4dffb919e..8d82a77a3f3b 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -142,7 +142,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
142 gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); 142 gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
143 gdb_regs32[GDB_CS] = __KERNEL_CS; 143 gdb_regs32[GDB_CS] = __KERNEL_CS;
144 gdb_regs32[GDB_SS] = __KERNEL_DS; 144 gdb_regs32[GDB_SS] = __KERNEL_DS;
145 gdb_regs[GDB_PC] = p->thread.ip; 145 gdb_regs[GDB_PC] = 0;
146 gdb_regs[GDB_R8] = 0; 146 gdb_regs[GDB_R8] = 0;
147 gdb_regs[GDB_R9] = 0; 147 gdb_regs[GDB_R9] = 0;
148 gdb_regs[GDB_R10] = 0; 148 gdb_regs[GDB_R10] = 0;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 33019ddb56b4..6551dedee20c 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -195,7 +195,7 @@ static void kvm_leave_lazy_mmu(void)
195 struct kvm_para_state *state = kvm_para_state(); 195 struct kvm_para_state *state = kvm_para_state();
196 196
197 mmu_queue_flush(state); 197 mmu_queue_flush(state);
198 paravirt_leave_lazy(paravirt_get_lazy_mode()); 198 paravirt_leave_lazy_mmu();
199 state->mode = paravirt_get_lazy_mode(); 199 state->mode = paravirt_get_lazy_mode();
200} 200}
201 201
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 453b5795a5c6..366baa179913 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -13,25 +13,13 @@
13 * Licensed under the terms of the GNU General Public 13 * Licensed under the terms of the GNU General Public
14 * License version 2. See file COPYING for details. 14 * License version 2. See file COPYING for details.
15 */ 15 */
16#include <linux/platform_device.h>
17#include <linux/capability.h>
18#include <linux/miscdevice.h>
19#include <linux/firmware.h> 16#include <linux/firmware.h>
20#include <linux/spinlock.h>
21#include <linux/cpumask.h>
22#include <linux/pci_ids.h> 17#include <linux/pci_ids.h>
23#include <linux/uaccess.h> 18#include <linux/uaccess.h>
24#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
25#include <linux/kernel.h> 20#include <linux/kernel.h>
26#include <linux/module.h> 21#include <linux/module.h>
27#include <linux/mutex.h>
28#include <linux/sched.h>
29#include <linux/init.h>
30#include <linux/slab.h>
31#include <linux/cpu.h>
32#include <linux/pci.h> 22#include <linux/pci.h>
33#include <linux/fs.h>
34#include <linux/mm.h>
35 23
36#include <asm/microcode.h> 24#include <asm/microcode.h>
37#include <asm/processor.h> 25#include <asm/processor.h>
@@ -79,9 +67,6 @@ struct microcode_amd {
79#define UCODE_CONTAINER_SECTION_HDR 8 67#define UCODE_CONTAINER_SECTION_HDR 8
80#define UCODE_CONTAINER_HEADER_SIZE 12 68#define UCODE_CONTAINER_HEADER_SIZE 12
81 69
82/* serialize access to the physical write */
83static DEFINE_SPINLOCK(microcode_update_lock);
84
85static struct equiv_cpu_entry *equiv_cpu_table; 70static struct equiv_cpu_entry *equiv_cpu_table;
86 71
87static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 72static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
@@ -144,9 +129,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
144 return 1; 129 return 1;
145} 130}
146 131
147static void apply_microcode_amd(int cpu) 132static int apply_microcode_amd(int cpu)
148{ 133{
149 unsigned long flags;
150 u32 rev, dummy; 134 u32 rev, dummy;
151 int cpu_num = raw_smp_processor_id(); 135 int cpu_num = raw_smp_processor_id();
152 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; 136 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
@@ -156,25 +140,25 @@ static void apply_microcode_amd(int cpu)
156 BUG_ON(cpu_num != cpu); 140 BUG_ON(cpu_num != cpu);
157 141
158 if (mc_amd == NULL) 142 if (mc_amd == NULL)
159 return; 143 return 0;
160 144
161 spin_lock_irqsave(&microcode_update_lock, flags);
162 wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); 145 wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
163 /* get patch id after patching */ 146 /* get patch id after patching */
164 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); 147 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
165 spin_unlock_irqrestore(&microcode_update_lock, flags);
166 148
167 /* check current patch id and patch's id for match */ 149 /* check current patch id and patch's id for match */
168 if (rev != mc_amd->hdr.patch_id) { 150 if (rev != mc_amd->hdr.patch_id) {
169 printk(KERN_ERR "microcode: CPU%d: update failed " 151 printk(KERN_ERR "microcode: CPU%d: update failed "
170 "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); 152 "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id);
171 return; 153 return -1;
172 } 154 }
173 155
174 printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", 156 printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n",
175 cpu, rev); 157 cpu, rev);
176 158
177 uci->cpu_sig.rev = rev; 159 uci->cpu_sig.rev = rev;
160
161 return 0;
178} 162}
179 163
180static int get_ucode_data(void *to, const u8 *from, size_t n) 164static int get_ucode_data(void *to, const u8 *from, size_t n)
@@ -257,13 +241,12 @@ static int install_equiv_cpu_table(const u8 *buf)
257 241
258static void free_equiv_cpu_table(void) 242static void free_equiv_cpu_table(void)
259{ 243{
260 if (equiv_cpu_table) { 244 vfree(equiv_cpu_table);
261 vfree(equiv_cpu_table); 245 equiv_cpu_table = NULL;
262 equiv_cpu_table = NULL;
263 }
264} 246}
265 247
266static int generic_load_microcode(int cpu, const u8 *data, size_t size) 248static enum ucode_state
249generic_load_microcode(int cpu, const u8 *data, size_t size)
267{ 250{
268 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 251 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
269 const u8 *ucode_ptr = data; 252 const u8 *ucode_ptr = data;
@@ -272,12 +255,13 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size)
272 int new_rev = uci->cpu_sig.rev; 255 int new_rev = uci->cpu_sig.rev;
273 unsigned int leftover; 256 unsigned int leftover;
274 unsigned long offset; 257 unsigned long offset;
258 enum ucode_state state = UCODE_OK;
275 259
276 offset = install_equiv_cpu_table(ucode_ptr); 260 offset = install_equiv_cpu_table(ucode_ptr);
277 if (!offset) { 261 if (!offset) {
278 printk(KERN_ERR "microcode: failed to create " 262 printk(KERN_ERR "microcode: failed to create "
279 "equivalent cpu table\n"); 263 "equivalent cpu table\n");
280 return -EINVAL; 264 return UCODE_ERROR;
281 } 265 }
282 266
283 ucode_ptr += offset; 267 ucode_ptr += offset;
@@ -293,8 +277,7 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size)
293 277
294 mc_header = (struct microcode_header_amd *)mc; 278 mc_header = (struct microcode_header_amd *)mc;
295 if (get_matching_microcode(cpu, mc, new_rev)) { 279 if (get_matching_microcode(cpu, mc, new_rev)) {
296 if (new_mc) 280 vfree(new_mc);
297 vfree(new_mc);
298 new_rev = mc_header->patch_id; 281 new_rev = mc_header->patch_id;
299 new_mc = mc; 282 new_mc = mc;
300 } else 283 } else
@@ -306,34 +289,32 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size)
306 289
307 if (new_mc) { 290 if (new_mc) {
308 if (!leftover) { 291 if (!leftover) {
309 if (uci->mc) 292 vfree(uci->mc);
310 vfree(uci->mc);
311 uci->mc = new_mc; 293 uci->mc = new_mc;
312 pr_debug("microcode: CPU%d found a matching microcode " 294 pr_debug("microcode: CPU%d found a matching microcode "
313 "update with version 0x%x (current=0x%x)\n", 295 "update with version 0x%x (current=0x%x)\n",
314 cpu, new_rev, uci->cpu_sig.rev); 296 cpu, new_rev, uci->cpu_sig.rev);
315 } else 297 } else {
316 vfree(new_mc); 298 vfree(new_mc);
317 } 299 state = UCODE_ERROR;
300 }
301 } else
302 state = UCODE_NFOUND;
318 303
319 free_equiv_cpu_table(); 304 free_equiv_cpu_table();
320 305
321 return (int)leftover; 306 return state;
322} 307}
323 308
324static int request_microcode_fw(int cpu, struct device *device) 309static enum ucode_state request_microcode_fw(int cpu, struct device *device)
325{ 310{
326 const char *fw_name = "amd-ucode/microcode_amd.bin"; 311 const char *fw_name = "amd-ucode/microcode_amd.bin";
327 const struct firmware *firmware; 312 const struct firmware *firmware;
328 int ret; 313 enum ucode_state ret;
329
330 /* We should bind the task to the CPU */
331 BUG_ON(cpu != raw_smp_processor_id());
332 314
333 ret = request_firmware(&firmware, fw_name, device); 315 if (request_firmware(&firmware, fw_name, device)) {
334 if (ret) {
335 printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); 316 printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
336 return ret; 317 return UCODE_NFOUND;
337 } 318 }
338 319
339 ret = generic_load_microcode(cpu, firmware->data, firmware->size); 320 ret = generic_load_microcode(cpu, firmware->data, firmware->size);
@@ -343,11 +324,12 @@ static int request_microcode_fw(int cpu, struct device *device)
343 return ret; 324 return ret;
344} 325}
345 326
346static int request_microcode_user(int cpu, const void __user *buf, size_t size) 327static enum ucode_state
328request_microcode_user(int cpu, const void __user *buf, size_t size)
347{ 329{
348 printk(KERN_INFO "microcode: AMD microcode update via " 330 printk(KERN_INFO "microcode: AMD microcode update via "
349 "/dev/cpu/microcode not supported\n"); 331 "/dev/cpu/microcode not supported\n");
350 return -1; 332 return UCODE_ERROR;
351} 333}
352 334
353static void microcode_fini_cpu_amd(int cpu) 335static void microcode_fini_cpu_amd(int cpu)
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 98c470c069d1..9c4461501fcb 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -71,27 +71,18 @@
71 * Thanks to Stuart Swales for pointing out this bug. 71 * Thanks to Stuart Swales for pointing out this bug.
72 */ 72 */
73#include <linux/platform_device.h> 73#include <linux/platform_device.h>
74#include <linux/capability.h>
75#include <linux/miscdevice.h> 74#include <linux/miscdevice.h>
76#include <linux/firmware.h> 75#include <linux/capability.h>
77#include <linux/smp_lock.h> 76#include <linux/smp_lock.h>
78#include <linux/spinlock.h>
79#include <linux/cpumask.h>
80#include <linux/uaccess.h>
81#include <linux/vmalloc.h>
82#include <linux/kernel.h> 77#include <linux/kernel.h>
83#include <linux/module.h> 78#include <linux/module.h>
84#include <linux/mutex.h> 79#include <linux/mutex.h>
85#include <linux/sched.h>
86#include <linux/init.h>
87#include <linux/slab.h>
88#include <linux/cpu.h> 80#include <linux/cpu.h>
89#include <linux/fs.h> 81#include <linux/fs.h>
90#include <linux/mm.h> 82#include <linux/mm.h>
91 83
92#include <asm/microcode.h> 84#include <asm/microcode.h>
93#include <asm/processor.h> 85#include <asm/processor.h>
94#include <asm/msr.h>
95 86
96MODULE_DESCRIPTION("Microcode Update Driver"); 87MODULE_DESCRIPTION("Microcode Update Driver");
97MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 88MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -101,36 +92,110 @@ MODULE_LICENSE("GPL");
101 92
102static struct microcode_ops *microcode_ops; 93static struct microcode_ops *microcode_ops;
103 94
104/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ 95/*
96 * Synchronization.
97 *
98 * All non cpu-hotplug-callback call sites use:
99 *
100 * - microcode_mutex to synchronize with each other;
101 * - get/put_online_cpus() to synchronize with
102 * the cpu-hotplug-callback call sites.
103 *
104 * We guarantee that only a single cpu is being
105 * updated at any particular moment of time.
106 */
105static DEFINE_MUTEX(microcode_mutex); 107static DEFINE_MUTEX(microcode_mutex);
106 108
107struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; 109struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
108EXPORT_SYMBOL_GPL(ucode_cpu_info); 110EXPORT_SYMBOL_GPL(ucode_cpu_info);
109 111
112/*
113 * Operations that are run on a target cpu:
114 */
115
116struct cpu_info_ctx {
117 struct cpu_signature *cpu_sig;
118 int err;
119};
120
121static void collect_cpu_info_local(void *arg)
122{
123 struct cpu_info_ctx *ctx = arg;
124
125 ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(),
126 ctx->cpu_sig);
127}
128
129static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig)
130{
131 struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 };
132 int ret;
133
134 ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1);
135 if (!ret)
136 ret = ctx.err;
137
138 return ret;
139}
140
141static int collect_cpu_info(int cpu)
142{
143 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
144 int ret;
145
146 memset(uci, 0, sizeof(*uci));
147
148 ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig);
149 if (!ret)
150 uci->valid = 1;
151
152 return ret;
153}
154
155struct apply_microcode_ctx {
156 int err;
157};
158
159static void apply_microcode_local(void *arg)
160{
161 struct apply_microcode_ctx *ctx = arg;
162
163 ctx->err = microcode_ops->apply_microcode(smp_processor_id());
164}
165
166static int apply_microcode_on_target(int cpu)
167{
168 struct apply_microcode_ctx ctx = { .err = 0 };
169 int ret;
170
171 ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
172 if (!ret)
173 ret = ctx.err;
174
175 return ret;
176}
177
110#ifdef CONFIG_MICROCODE_OLD_INTERFACE 178#ifdef CONFIG_MICROCODE_OLD_INTERFACE
111static int do_microcode_update(const void __user *buf, size_t size) 179static int do_microcode_update(const void __user *buf, size_t size)
112{ 180{
113 cpumask_t old;
114 int error = 0; 181 int error = 0;
115 int cpu; 182 int cpu;
116 183
117 old = current->cpus_allowed;
118
119 for_each_online_cpu(cpu) { 184 for_each_online_cpu(cpu) {
120 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 185 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
186 enum ucode_state ustate;
121 187
122 if (!uci->valid) 188 if (!uci->valid)
123 continue; 189 continue;
124 190
125 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 191 ustate = microcode_ops->request_microcode_user(cpu, buf, size);
126 error = microcode_ops->request_microcode_user(cpu, buf, size); 192 if (ustate == UCODE_ERROR) {
127 if (error < 0) 193 error = -1;
128 goto out; 194 break;
129 if (!error) 195 } else if (ustate == UCODE_OK)
130 microcode_ops->apply_microcode(cpu); 196 apply_microcode_on_target(cpu);
131 } 197 }
132out: 198
133 set_cpus_allowed_ptr(current, &old);
134 return error; 199 return error;
135} 200}
136 201
@@ -143,19 +208,17 @@ static int microcode_open(struct inode *unused1, struct file *unused2)
143static ssize_t microcode_write(struct file *file, const char __user *buf, 208static ssize_t microcode_write(struct file *file, const char __user *buf,
144 size_t len, loff_t *ppos) 209 size_t len, loff_t *ppos)
145{ 210{
146 ssize_t ret; 211 ssize_t ret = -EINVAL;
147 212
148 if ((len >> PAGE_SHIFT) > num_physpages) { 213 if ((len >> PAGE_SHIFT) > num_physpages) {
149 printk(KERN_ERR "microcode: too much data (max %ld pages)\n", 214 pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
150 num_physpages); 215 return ret;
151 return -EINVAL;
152 } 216 }
153 217
154 get_online_cpus(); 218 get_online_cpus();
155 mutex_lock(&microcode_mutex); 219 mutex_lock(&microcode_mutex);
156 220
157 ret = do_microcode_update(buf, len); 221 if (do_microcode_update(buf, len) == 0)
158 if (!ret)
159 ret = (ssize_t)len; 222 ret = (ssize_t)len;
160 223
161 mutex_unlock(&microcode_mutex); 224 mutex_unlock(&microcode_mutex);
@@ -165,15 +228,15 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
165} 228}
166 229
167static const struct file_operations microcode_fops = { 230static const struct file_operations microcode_fops = {
168 .owner = THIS_MODULE, 231 .owner = THIS_MODULE,
169 .write = microcode_write, 232 .write = microcode_write,
170 .open = microcode_open, 233 .open = microcode_open,
171}; 234};
172 235
173static struct miscdevice microcode_dev = { 236static struct miscdevice microcode_dev = {
174 .minor = MICROCODE_MINOR, 237 .minor = MICROCODE_MINOR,
175 .name = "microcode", 238 .name = "microcode",
176 .fops = &microcode_fops, 239 .fops = &microcode_fops,
177}; 240};
178 241
179static int __init microcode_dev_init(void) 242static int __init microcode_dev_init(void)
@@ -182,9 +245,7 @@ static int __init microcode_dev_init(void)
182 245
183 error = misc_register(&microcode_dev); 246 error = misc_register(&microcode_dev);
184 if (error) { 247 if (error) {
185 printk(KERN_ERR 248 pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
186 "microcode: can't misc_register on minor=%d\n",
187 MICROCODE_MINOR);
188 return error; 249 return error;
189 } 250 }
190 251
@@ -205,42 +266,51 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
205/* fake device for request_firmware */ 266/* fake device for request_firmware */
206static struct platform_device *microcode_pdev; 267static struct platform_device *microcode_pdev;
207 268
208static long reload_for_cpu(void *unused) 269static int reload_for_cpu(int cpu)
209{ 270{
210 struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id(); 271 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
211 int err = 0; 272 int err = 0;
212 273
213 mutex_lock(&microcode_mutex); 274 mutex_lock(&microcode_mutex);
214 if (uci->valid) { 275 if (uci->valid) {
215 err = microcode_ops->request_microcode_fw(smp_processor_id(), 276 enum ucode_state ustate;
216 &microcode_pdev->dev); 277
217 if (!err) 278 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
218 microcode_ops->apply_microcode(smp_processor_id()); 279 if (ustate == UCODE_OK)
280 apply_microcode_on_target(cpu);
281 else
282 if (ustate == UCODE_ERROR)
283 err = -EINVAL;
219 } 284 }
220 mutex_unlock(&microcode_mutex); 285 mutex_unlock(&microcode_mutex);
286
221 return err; 287 return err;
222} 288}
223 289
224static ssize_t reload_store(struct sys_device *dev, 290static ssize_t reload_store(struct sys_device *dev,
225 struct sysdev_attribute *attr, 291 struct sysdev_attribute *attr,
226 const char *buf, size_t sz) 292 const char *buf, size_t size)
227{ 293{
228 char *end; 294 unsigned long val;
229 unsigned long val = simple_strtoul(buf, &end, 0);
230 int err = 0;
231 int cpu = dev->id; 295 int cpu = dev->id;
296 int ret = 0;
297 char *end;
232 298
299 val = simple_strtoul(buf, &end, 0);
233 if (end == buf) 300 if (end == buf)
234 return -EINVAL; 301 return -EINVAL;
302
235 if (val == 1) { 303 if (val == 1) {
236 get_online_cpus(); 304 get_online_cpus();
237 if (cpu_online(cpu)) 305 if (cpu_online(cpu))
238 err = work_on_cpu(cpu, reload_for_cpu, NULL); 306 ret = reload_for_cpu(cpu);
239 put_online_cpus(); 307 put_online_cpus();
240 } 308 }
241 if (err) 309
242 return err; 310 if (!ret)
243 return sz; 311 ret = size;
312
313 return ret;
244} 314}
245 315
246static ssize_t version_show(struct sys_device *dev, 316static ssize_t version_show(struct sys_device *dev,
@@ -271,11 +341,11 @@ static struct attribute *mc_default_attrs[] = {
271}; 341};
272 342
273static struct attribute_group mc_attr_group = { 343static struct attribute_group mc_attr_group = {
274 .attrs = mc_default_attrs, 344 .attrs = mc_default_attrs,
275 .name = "microcode", 345 .name = "microcode",
276}; 346};
277 347
278static void __microcode_fini_cpu(int cpu) 348static void microcode_fini_cpu(int cpu)
279{ 349{
280 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 350 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
281 351
@@ -283,103 +353,68 @@ static void __microcode_fini_cpu(int cpu)
283 uci->valid = 0; 353 uci->valid = 0;
284} 354}
285 355
286static void microcode_fini_cpu(int cpu) 356static enum ucode_state microcode_resume_cpu(int cpu)
287{
288 mutex_lock(&microcode_mutex);
289 __microcode_fini_cpu(cpu);
290 mutex_unlock(&microcode_mutex);
291}
292
293static void collect_cpu_info(int cpu)
294{ 357{
295 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 358 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
296 359
297 memset(uci, 0, sizeof(*uci)); 360 if (!uci->mc)
298 if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig)) 361 return UCODE_NFOUND;
299 uci->valid = 1; 362
363 pr_debug("microcode: CPU%d updated upon resume\n", cpu);
364 apply_microcode_on_target(cpu);
365
366 return UCODE_OK;
300} 367}
301 368
302static int microcode_resume_cpu(int cpu) 369static enum ucode_state microcode_init_cpu(int cpu)
303{ 370{
304 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 371 enum ucode_state ustate;
305 struct cpu_signature nsig;
306 372
307 pr_debug("microcode: CPU%d resumed\n", cpu); 373 if (collect_cpu_info(cpu))
374 return UCODE_ERROR;
308 375
309 if (!uci->mc) 376 /* --dimm. Trigger a delayed update? */
310 return 1; 377 if (system_state != SYSTEM_RUNNING)
378 return UCODE_NFOUND;
311 379
312 /* 380 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
313 * Let's verify that the 'cached' ucode does belong
314 * to this cpu (a bit of paranoia):
315 */
316 if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
317 __microcode_fini_cpu(cpu);
318 printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n",
319 cpu);
320 return -1;
321 }
322 381
323 if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) { 382 if (ustate == UCODE_OK) {
324 __microcode_fini_cpu(cpu); 383 pr_debug("microcode: CPU%d updated upon init\n", cpu);
325 printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n", 384 apply_microcode_on_target(cpu);
326 cpu);
327 /* Should we look for a new ucode here? */
328 return 1;
329 } 385 }
330 386
331 return 0; 387 return ustate;
332} 388}
333 389
334static long microcode_update_cpu(void *unused) 390static enum ucode_state microcode_update_cpu(int cpu)
335{ 391{
336 struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id(); 392 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
337 int err = 0; 393 enum ucode_state ustate;
338 394
339 /* 395 if (uci->valid)
340 * Check if the system resume is in progress (uci->valid != NULL), 396 ustate = microcode_resume_cpu(cpu);
341 * otherwise just request a firmware: 397 else
342 */ 398 ustate = microcode_init_cpu(cpu);
343 if (uci->valid) {
344 err = microcode_resume_cpu(smp_processor_id());
345 } else {
346 collect_cpu_info(smp_processor_id());
347 if (uci->valid && system_state == SYSTEM_RUNNING)
348 err = microcode_ops->request_microcode_fw(
349 smp_processor_id(),
350 &microcode_pdev->dev);
351 }
352 if (!err)
353 microcode_ops->apply_microcode(smp_processor_id());
354 return err;
355}
356 399
357static int microcode_init_cpu(int cpu) 400 return ustate;
358{
359 int err;
360 mutex_lock(&microcode_mutex);
361 err = work_on_cpu(cpu, microcode_update_cpu, NULL);
362 mutex_unlock(&microcode_mutex);
363
364 return err;
365} 401}
366 402
367static int mc_sysdev_add(struct sys_device *sys_dev) 403static int mc_sysdev_add(struct sys_device *sys_dev)
368{ 404{
369 int err, cpu = sys_dev->id; 405 int err, cpu = sys_dev->id;
370 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
371 406
372 if (!cpu_online(cpu)) 407 if (!cpu_online(cpu))
373 return 0; 408 return 0;
374 409
375 pr_debug("microcode: CPU%d added\n", cpu); 410 pr_debug("microcode: CPU%d added\n", cpu);
376 memset(uci, 0, sizeof(*uci));
377 411
378 err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); 412 err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
379 if (err) 413 if (err)
380 return err; 414 return err;
381 415
382 err = microcode_init_cpu(cpu); 416 if (microcode_init_cpu(cpu) == UCODE_ERROR)
417 err = -EINVAL;
383 418
384 return err; 419 return err;
385} 420}
@@ -400,19 +435,30 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
400static int mc_sysdev_resume(struct sys_device *dev) 435static int mc_sysdev_resume(struct sys_device *dev)
401{ 436{
402 int cpu = dev->id; 437 int cpu = dev->id;
438 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
403 439
404 if (!cpu_online(cpu)) 440 if (!cpu_online(cpu))
405 return 0; 441 return 0;
406 442
407 /* only CPU 0 will apply ucode here */ 443 /*
408 microcode_update_cpu(NULL); 444 * All non-bootup cpus are still disabled,
445 * so only CPU 0 will apply ucode here.
446 *
447 * Moreover, there can be no concurrent
448 * updates from any other places at this point.
449 */
450 WARN_ON(cpu != 0);
451
452 if (uci->valid && uci->mc)
453 microcode_ops->apply_microcode(cpu);
454
409 return 0; 455 return 0;
410} 456}
411 457
412static struct sysdev_driver mc_sysdev_driver = { 458static struct sysdev_driver mc_sysdev_driver = {
413 .add = mc_sysdev_add, 459 .add = mc_sysdev_add,
414 .remove = mc_sysdev_remove, 460 .remove = mc_sysdev_remove,
415 .resume = mc_sysdev_resume, 461 .resume = mc_sysdev_resume,
416}; 462};
417 463
418static __cpuinit int 464static __cpuinit int
@@ -425,15 +471,12 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
425 switch (action) { 471 switch (action) {
426 case CPU_ONLINE: 472 case CPU_ONLINE:
427 case CPU_ONLINE_FROZEN: 473 case CPU_ONLINE_FROZEN:
428 if (microcode_init_cpu(cpu)) 474 microcode_update_cpu(cpu);
429 printk(KERN_ERR "microcode: failed to init CPU%d\n",
430 cpu);
431 case CPU_DOWN_FAILED: 475 case CPU_DOWN_FAILED:
432 case CPU_DOWN_FAILED_FROZEN: 476 case CPU_DOWN_FAILED_FROZEN:
433 pr_debug("microcode: CPU%d added\n", cpu); 477 pr_debug("microcode: CPU%d added\n", cpu);
434 if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) 478 if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
435 printk(KERN_ERR "microcode: Failed to create the sysfs " 479 pr_err("microcode: Failed to create group for CPU%d\n", cpu);
436 "group for CPU%d\n", cpu);
437 break; 480 break;
438 case CPU_DOWN_PREPARE: 481 case CPU_DOWN_PREPARE:
439 case CPU_DOWN_PREPARE_FROZEN: 482 case CPU_DOWN_PREPARE_FROZEN:
@@ -465,13 +508,10 @@ static int __init microcode_init(void)
465 microcode_ops = init_amd_microcode(); 508 microcode_ops = init_amd_microcode();
466 509
467 if (!microcode_ops) { 510 if (!microcode_ops) {
468 printk(KERN_ERR "microcode: no support for this CPU vendor\n"); 511 pr_err("microcode: no support for this CPU vendor\n");
469 return -ENODEV; 512 return -ENODEV;
470 } 513 }
471 514
472 error = microcode_dev_init();
473 if (error)
474 return error;
475 microcode_pdev = platform_device_register_simple("microcode", -1, 515 microcode_pdev = platform_device_register_simple("microcode", -1,
476 NULL, 0); 516 NULL, 0);
477 if (IS_ERR(microcode_pdev)) { 517 if (IS_ERR(microcode_pdev)) {
@@ -480,23 +520,31 @@ static int __init microcode_init(void)
480 } 520 }
481 521
482 get_online_cpus(); 522 get_online_cpus();
523 mutex_lock(&microcode_mutex);
524
483 error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); 525 error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
526
527 mutex_unlock(&microcode_mutex);
484 put_online_cpus(); 528 put_online_cpus();
529
485 if (error) { 530 if (error) {
486 microcode_dev_exit();
487 platform_device_unregister(microcode_pdev); 531 platform_device_unregister(microcode_pdev);
488 return error; 532 return error;
489 } 533 }
490 534
535 error = microcode_dev_init();
536 if (error)
537 return error;
538
491 register_hotcpu_notifier(&mc_cpu_notifier); 539 register_hotcpu_notifier(&mc_cpu_notifier);
492 540
493 printk(KERN_INFO 541 pr_info("Microcode Update Driver: v" MICROCODE_VERSION
494 "Microcode Update Driver: v" MICROCODE_VERSION
495 " <tigran@aivazian.fsnet.co.uk>," 542 " <tigran@aivazian.fsnet.co.uk>,"
496 " Peter Oruba\n"); 543 " Peter Oruba\n");
497 544
498 return 0; 545 return 0;
499} 546}
547module_init(microcode_init);
500 548
501static void __exit microcode_exit(void) 549static void __exit microcode_exit(void)
502{ 550{
@@ -505,16 +553,17 @@ static void __exit microcode_exit(void)
505 unregister_hotcpu_notifier(&mc_cpu_notifier); 553 unregister_hotcpu_notifier(&mc_cpu_notifier);
506 554
507 get_online_cpus(); 555 get_online_cpus();
556 mutex_lock(&microcode_mutex);
557
508 sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); 558 sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
559
560 mutex_unlock(&microcode_mutex);
509 put_online_cpus(); 561 put_online_cpus();
510 562
511 platform_device_unregister(microcode_pdev); 563 platform_device_unregister(microcode_pdev);
512 564
513 microcode_ops = NULL; 565 microcode_ops = NULL;
514 566
515 printk(KERN_INFO 567 pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
516 "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
517} 568}
518
519module_init(microcode_init);
520module_exit(microcode_exit); 569module_exit(microcode_exit);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 149b9ec7c1ab..0d334ddd0a96 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -70,24 +70,11 @@
70 * Fix sigmatch() macro to handle old CPUs with pf == 0. 70 * Fix sigmatch() macro to handle old CPUs with pf == 0.
71 * Thanks to Stuart Swales for pointing out this bug. 71 * Thanks to Stuart Swales for pointing out this bug.
72 */ 72 */
73#include <linux/platform_device.h>
74#include <linux/capability.h>
75#include <linux/miscdevice.h>
76#include <linux/firmware.h> 73#include <linux/firmware.h>
77#include <linux/smp_lock.h>
78#include <linux/spinlock.h>
79#include <linux/cpumask.h>
80#include <linux/uaccess.h> 74#include <linux/uaccess.h>
81#include <linux/vmalloc.h>
82#include <linux/kernel.h> 75#include <linux/kernel.h>
83#include <linux/module.h> 76#include <linux/module.h>
84#include <linux/mutex.h> 77#include <linux/vmalloc.h>
85#include <linux/sched.h>
86#include <linux/init.h>
87#include <linux/slab.h>
88#include <linux/cpu.h>
89#include <linux/fs.h>
90#include <linux/mm.h>
91 78
92#include <asm/microcode.h> 79#include <asm/microcode.h>
93#include <asm/processor.h> 80#include <asm/processor.h>
@@ -150,13 +137,9 @@ struct extended_sigtable {
150 137
151#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) 138#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
152 139
153/* serialize access to the physical write to MSR 0x79 */
154static DEFINE_SPINLOCK(microcode_update_lock);
155
156static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) 140static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
157{ 141{
158 struct cpuinfo_x86 *c = &cpu_data(cpu_num); 142 struct cpuinfo_x86 *c = &cpu_data(cpu_num);
159 unsigned long flags;
160 unsigned int val[2]; 143 unsigned int val[2];
161 144
162 memset(csig, 0, sizeof(*csig)); 145 memset(csig, 0, sizeof(*csig));
@@ -176,18 +159,14 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
176 csig->pf = 1 << ((val[1] >> 18) & 7); 159 csig->pf = 1 << ((val[1] >> 18) & 7);
177 } 160 }
178 161
179 /* serialize access to the physical write to MSR 0x79 */
180 spin_lock_irqsave(&microcode_update_lock, flags);
181
182 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 162 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
183 /* see notes above for revision 1.07. Apparent chip bug */ 163 /* see notes above for revision 1.07. Apparent chip bug */
184 sync_core(); 164 sync_core();
185 /* get the current revision from MSR 0x8B */ 165 /* get the current revision from MSR 0x8B */
186 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); 166 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
187 spin_unlock_irqrestore(&microcode_update_lock, flags);
188 167
189 pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", 168 printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
190 csig->sig, csig->pf, csig->rev); 169 cpu_num, csig->sig, csig->pf, csig->rev);
191 170
192 return 0; 171 return 0;
193} 172}
@@ -318,11 +297,10 @@ get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
318 return 0; 297 return 0;
319} 298}
320 299
321static void apply_microcode(int cpu) 300static int apply_microcode(int cpu)
322{ 301{
323 struct microcode_intel *mc_intel; 302 struct microcode_intel *mc_intel;
324 struct ucode_cpu_info *uci; 303 struct ucode_cpu_info *uci;
325 unsigned long flags;
326 unsigned int val[2]; 304 unsigned int val[2];
327 int cpu_num; 305 int cpu_num;
328 306
@@ -334,10 +312,7 @@ static void apply_microcode(int cpu)
334 BUG_ON(cpu_num != cpu); 312 BUG_ON(cpu_num != cpu);
335 313
336 if (mc_intel == NULL) 314 if (mc_intel == NULL)
337 return; 315 return 0;
338
339 /* serialize access to the physical write to MSR 0x79 */
340 spin_lock_irqsave(&microcode_update_lock, flags);
341 316
342 /* write microcode via MSR 0x79 */ 317 /* write microcode via MSR 0x79 */
343 wrmsr(MSR_IA32_UCODE_WRITE, 318 wrmsr(MSR_IA32_UCODE_WRITE,
@@ -351,30 +326,32 @@ static void apply_microcode(int cpu)
351 /* get the current revision from MSR 0x8B */ 326 /* get the current revision from MSR 0x8B */
352 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); 327 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
353 328
354 spin_unlock_irqrestore(&microcode_update_lock, flags);
355 if (val[1] != mc_intel->hdr.rev) { 329 if (val[1] != mc_intel->hdr.rev) {
356 printk(KERN_ERR "microcode: CPU%d update from revision " 330 printk(KERN_ERR "microcode: CPU%d update "
357 "0x%x to 0x%x failed\n", 331 "to revision 0x%x failed\n",
358 cpu_num, uci->cpu_sig.rev, val[1]); 332 cpu_num, mc_intel->hdr.rev);
359 return; 333 return -1;
360 } 334 }
361 printk(KERN_INFO "microcode: CPU%d updated from revision " 335 printk(KERN_INFO "microcode: CPU%d updated to revision "
362 "0x%x to 0x%x, date = %04x-%02x-%02x \n", 336 "0x%x, date = %04x-%02x-%02x \n",
363 cpu_num, uci->cpu_sig.rev, val[1], 337 cpu_num, val[1],
364 mc_intel->hdr.date & 0xffff, 338 mc_intel->hdr.date & 0xffff,
365 mc_intel->hdr.date >> 24, 339 mc_intel->hdr.date >> 24,
366 (mc_intel->hdr.date >> 16) & 0xff); 340 (mc_intel->hdr.date >> 16) & 0xff);
367 341
368 uci->cpu_sig.rev = val[1]; 342 uci->cpu_sig.rev = val[1];
343
344 return 0;
369} 345}
370 346
371static int generic_load_microcode(int cpu, void *data, size_t size, 347static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
372 int (*get_ucode_data)(void *, const void *, size_t)) 348 int (*get_ucode_data)(void *, const void *, size_t))
373{ 349{
374 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 350 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
375 u8 *ucode_ptr = data, *new_mc = NULL, *mc; 351 u8 *ucode_ptr = data, *new_mc = NULL, *mc;
376 int new_rev = uci->cpu_sig.rev; 352 int new_rev = uci->cpu_sig.rev;
377 unsigned int leftover = size; 353 unsigned int leftover = size;
354 enum ucode_state state = UCODE_OK;
378 355
379 while (leftover) { 356 while (leftover) {
380 struct microcode_header_intel mc_header; 357 struct microcode_header_intel mc_header;
@@ -412,11 +389,15 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
412 leftover -= mc_size; 389 leftover -= mc_size;
413 } 390 }
414 391
415 if (!new_mc) 392 if (leftover) {
393 if (new_mc)
394 vfree(new_mc);
395 state = UCODE_ERROR;
416 goto out; 396 goto out;
397 }
417 398
418 if (leftover) { 399 if (!new_mc) {
419 vfree(new_mc); 400 state = UCODE_NFOUND;
420 goto out; 401 goto out;
421 } 402 }
422 403
@@ -427,9 +408,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
427 pr_debug("microcode: CPU%d found a matching microcode update with" 408 pr_debug("microcode: CPU%d found a matching microcode update with"
428 " version 0x%x (current=0x%x)\n", 409 " version 0x%x (current=0x%x)\n",
429 cpu, new_rev, uci->cpu_sig.rev); 410 cpu, new_rev, uci->cpu_sig.rev);
430 411out:
431 out: 412 return state;
432 return (int)leftover;
433} 413}
434 414
435static int get_ucode_fw(void *to, const void *from, size_t n) 415static int get_ucode_fw(void *to, const void *from, size_t n)
@@ -438,21 +418,19 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
438 return 0; 418 return 0;
439} 419}
440 420
441static int request_microcode_fw(int cpu, struct device *device) 421static enum ucode_state request_microcode_fw(int cpu, struct device *device)
442{ 422{
443 char name[30]; 423 char name[30];
444 struct cpuinfo_x86 *c = &cpu_data(cpu); 424 struct cpuinfo_x86 *c = &cpu_data(cpu);
445 const struct firmware *firmware; 425 const struct firmware *firmware;
446 int ret; 426 enum ucode_state ret;
447 427
448 /* We should bind the task to the CPU */
449 BUG_ON(cpu != raw_smp_processor_id());
450 sprintf(name, "intel-ucode/%02x-%02x-%02x", 428 sprintf(name, "intel-ucode/%02x-%02x-%02x",
451 c->x86, c->x86_model, c->x86_mask); 429 c->x86, c->x86_model, c->x86_mask);
452 ret = request_firmware(&firmware, name, device); 430
453 if (ret) { 431 if (request_firmware(&firmware, name, device)) {
454 pr_debug("microcode: data file %s load failed\n", name); 432 pr_debug("microcode: data file %s load failed\n", name);
455 return ret; 433 return UCODE_NFOUND;
456 } 434 }
457 435
458 ret = generic_load_microcode(cpu, (void *)firmware->data, 436 ret = generic_load_microcode(cpu, (void *)firmware->data,
@@ -468,11 +446,9 @@ static int get_ucode_user(void *to, const void *from, size_t n)
468 return copy_from_user(to, from, n); 446 return copy_from_user(to, from, n);
469} 447}
470 448
471static int request_microcode_user(int cpu, const void __user *buf, size_t size) 449static enum ucode_state
450request_microcode_user(int cpu, const void __user *buf, size_t size)
472{ 451{
473 /* We should bind the task to the CPU */
474 BUG_ON(cpu != raw_smp_processor_id());
475
476 return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); 452 return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
477} 453}
478 454
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 70fd7e414c15..651c93b28862 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -17,6 +17,7 @@
17#include <linux/acpi.h> 17#include <linux/acpi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/pci.h>
20 21
21#include <asm/mtrr.h> 22#include <asm/mtrr.h>
22#include <asm/mpspec.h> 23#include <asm/mpspec.h>
@@ -870,24 +871,17 @@ static
870inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} 871inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
871#endif /* CONFIG_X86_IO_APIC */ 872#endif /* CONFIG_X86_IO_APIC */
872 873
873static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, 874static int
874 int count) 875check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
875{ 876{
876 if (!mpc_new_phys) { 877 int ret = 0;
877 pr_info("No spare slots, try to append...take your risk, " 878
878 "new mpc_length %x\n", count); 879 if (!mpc_new_phys || count <= mpc_new_length) {
879 } else { 880 WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
880 if (count <= mpc_new_length) 881 return -1;
881 pr_info("No spare slots, try to append..., "
882 "new mpc_length %x\n", count);
883 else {
884 pr_err("mpc_new_length %lx is too small\n",
885 mpc_new_length);
886 return -1;
887 }
888 } 882 }
889 883
890 return 0; 884 return ret;
891} 885}
892 886
893static int __init replace_intsrc_all(struct mpc_table *mpc, 887static int __init replace_intsrc_all(struct mpc_table *mpc,
@@ -946,7 +940,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
946 } else { 940 } else {
947 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; 941 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
948 count += sizeof(struct mpc_intsrc); 942 count += sizeof(struct mpc_intsrc);
949 if (!check_slot(mpc_new_phys, mpc_new_length, count)) 943 if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
950 goto out; 944 goto out;
951 assign_to_mpc_intsrc(&mp_irqs[i], m); 945 assign_to_mpc_intsrc(&mp_irqs[i], m);
952 mpc->length = count; 946 mpc->length = count;
@@ -963,11 +957,14 @@ out:
963 return 0; 957 return 0;
964} 958}
965 959
966static int __initdata enable_update_mptable; 960int enable_update_mptable;
967 961
968static int __init update_mptable_setup(char *str) 962static int __init update_mptable_setup(char *str)
969{ 963{
970 enable_update_mptable = 1; 964 enable_update_mptable = 1;
965#ifdef CONFIG_PCI
966 pci_routeirq = 1;
967#endif
971 return 0; 968 return 0;
972} 969}
973early_param("update_mptable", update_mptable_setup); 970early_param("update_mptable", update_mptable_setup);
@@ -980,6 +977,9 @@ static int __initdata alloc_mptable;
980static int __init parse_alloc_mptable_opt(char *p) 977static int __init parse_alloc_mptable_opt(char *p)
981{ 978{
982 enable_update_mptable = 1; 979 enable_update_mptable = 1;
980#ifdef CONFIG_PCI
981 pci_routeirq = 1;
982#endif
983 alloc_mptable = 1; 983 alloc_mptable = 1;
984 if (!p) 984 if (!p)
985 return 0; 985 return 0;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 9faf43bea336..70ec9b951d76 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -248,18 +248,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
248 248
249static inline void enter_lazy(enum paravirt_lazy_mode mode) 249static inline void enter_lazy(enum paravirt_lazy_mode mode)
250{ 250{
251 BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); 251 BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
252 BUG_ON(preemptible());
253 252
254 __get_cpu_var(paravirt_lazy_mode) = mode; 253 percpu_write(paravirt_lazy_mode, mode);
255} 254}
256 255
257void paravirt_leave_lazy(enum paravirt_lazy_mode mode) 256static void leave_lazy(enum paravirt_lazy_mode mode)
258{ 257{
259 BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); 258 BUG_ON(percpu_read(paravirt_lazy_mode) != mode);
260 BUG_ON(preemptible());
261 259
262 __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; 260 percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
263} 261}
264 262
265void paravirt_enter_lazy_mmu(void) 263void paravirt_enter_lazy_mmu(void)
@@ -269,22 +267,36 @@ void paravirt_enter_lazy_mmu(void)
269 267
270void paravirt_leave_lazy_mmu(void) 268void paravirt_leave_lazy_mmu(void)
271{ 269{
272 paravirt_leave_lazy(PARAVIRT_LAZY_MMU); 270 leave_lazy(PARAVIRT_LAZY_MMU);
273} 271}
274 272
275void paravirt_enter_lazy_cpu(void) 273void paravirt_start_context_switch(struct task_struct *prev)
276{ 274{
275 BUG_ON(preemptible());
276
277 if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
278 arch_leave_lazy_mmu_mode();
279 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
280 }
277 enter_lazy(PARAVIRT_LAZY_CPU); 281 enter_lazy(PARAVIRT_LAZY_CPU);
278} 282}
279 283
280void paravirt_leave_lazy_cpu(void) 284void paravirt_end_context_switch(struct task_struct *next)
281{ 285{
282 paravirt_leave_lazy(PARAVIRT_LAZY_CPU); 286 BUG_ON(preemptible());
287
288 leave_lazy(PARAVIRT_LAZY_CPU);
289
290 if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
291 arch_enter_lazy_mmu_mode();
283} 292}
284 293
285enum paravirt_lazy_mode paravirt_get_lazy_mode(void) 294enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
286{ 295{
287 return __get_cpu_var(paravirt_lazy_mode); 296 if (in_interrupt())
297 return PARAVIRT_LAZY_NONE;
298
299 return percpu_read(paravirt_lazy_mode);
288} 300}
289 301
290void arch_flush_lazy_mmu_mode(void) 302void arch_flush_lazy_mmu_mode(void)
@@ -292,7 +304,6 @@ void arch_flush_lazy_mmu_mode(void)
292 preempt_disable(); 304 preempt_disable();
293 305
294 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { 306 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
295 WARN_ON(preempt_count() == 1);
296 arch_leave_lazy_mmu_mode(); 307 arch_leave_lazy_mmu_mode();
297 arch_enter_lazy_mmu_mode(); 308 arch_enter_lazy_mmu_mode();
298 } 309 }
@@ -300,19 +311,6 @@ void arch_flush_lazy_mmu_mode(void)
300 preempt_enable(); 311 preempt_enable();
301} 312}
302 313
303void arch_flush_lazy_cpu_mode(void)
304{
305 preempt_disable();
306
307 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
308 WARN_ON(preempt_count() == 1);
309 arch_leave_lazy_cpu_mode();
310 arch_enter_lazy_cpu_mode();
311 }
312
313 preempt_enable();
314}
315
316struct pv_info pv_info = { 314struct pv_info pv_info = {
317 .name = "bare hardware", 315 .name = "bare hardware",
318 .paravirt_enabled = 0, 316 .paravirt_enabled = 0,
@@ -404,10 +402,8 @@ struct pv_cpu_ops pv_cpu_ops = {
404 .set_iopl_mask = native_set_iopl_mask, 402 .set_iopl_mask = native_set_iopl_mask,
405 .io_delay = native_io_delay, 403 .io_delay = native_io_delay,
406 404
407 .lazy_mode = { 405 .start_context_switch = paravirt_nop,
408 .enter = paravirt_nop, 406 .end_context_switch = paravirt_nop,
409 .leave = paravirt_nop,
410 },
411}; 407};
412 408
413struct pv_apic_ops pv_apic_ops = { 409struct pv_apic_ops pv_apic_ops = {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 755c21e906f3..971a3bec47a8 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = {
186 186
187static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; 187static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
188 188
189/* enable this to stress test the chip's TCE cache */
190#ifdef CONFIG_IOMMU_DEBUG
191static int debugging = 1;
192
193static inline unsigned long verify_bit_range(unsigned long* bitmap,
194 int expected, unsigned long start, unsigned long end)
195{
196 unsigned long idx = start;
197
198 BUG_ON(start >= end);
199
200 while (idx < end) {
201 if (!!test_bit(idx, bitmap) != expected)
202 return idx;
203 ++idx;
204 }
205
206 /* all bits have the expected value */
207 return ~0UL;
208}
209#else /* debugging is disabled */
210static int debugging;
211
212static inline unsigned long verify_bit_range(unsigned long* bitmap,
213 int expected, unsigned long start, unsigned long end)
214{
215 return ~0UL;
216}
217
218#endif /* CONFIG_IOMMU_DEBUG */
219
220static inline int translation_enabled(struct iommu_table *tbl) 189static inline int translation_enabled(struct iommu_table *tbl)
221{ 190{
222 /* only PHBs with translation enabled have an IOMMU table */ 191 /* only PHBs with translation enabled have an IOMMU table */
@@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
228{ 197{
229 unsigned long index; 198 unsigned long index;
230 unsigned long end; 199 unsigned long end;
231 unsigned long badbit;
232 unsigned long flags; 200 unsigned long flags;
233 201
234 index = start_addr >> PAGE_SHIFT; 202 index = start_addr >> PAGE_SHIFT;
@@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
243 211
244 spin_lock_irqsave(&tbl->it_lock, flags); 212 spin_lock_irqsave(&tbl->it_lock, flags);
245 213
246 badbit = verify_bit_range(tbl->it_map, 0, index, end);
247 if (badbit != ~0UL) {
248 if (printk_ratelimit())
249 printk(KERN_ERR "Calgary: entry already allocated at "
250 "0x%lx tbl %p dma 0x%lx npages %u\n",
251 badbit, tbl, start_addr, npages);
252 }
253
254 iommu_area_reserve(tbl->it_map, index, npages); 214 iommu_area_reserve(tbl->it_map, index, npages);
255 215
256 spin_unlock_irqrestore(&tbl->it_lock, flags); 216 spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
326 unsigned int npages) 286 unsigned int npages)
327{ 287{
328 unsigned long entry; 288 unsigned long entry;
329 unsigned long badbit;
330 unsigned long badend; 289 unsigned long badend;
331 unsigned long flags; 290 unsigned long flags;
332 291
@@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
346 305
347 spin_lock_irqsave(&tbl->it_lock, flags); 306 spin_lock_irqsave(&tbl->it_lock, flags);
348 307
349 badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
350 if (badbit != ~0UL) {
351 if (printk_ratelimit())
352 printk(KERN_ERR "Calgary: bit is off at 0x%lx "
353 "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
354 badbit, tbl, dma_addr, entry, npages);
355 }
356
357 iommu_area_free(tbl->it_map, entry, npages); 308 iommu_area_free(tbl->it_map, entry, npages);
358 309
359 spin_unlock_irqrestore(&tbl->it_lock, flags); 310 spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -1488,9 +1439,8 @@ void __init detect_calgary(void)
1488 iommu_detected = 1; 1439 iommu_detected = 1;
1489 calgary_detected = 1; 1440 calgary_detected = 1;
1490 printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n"); 1441 printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
1491 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " 1442 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
1492 "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, 1443 specified_table_size);
1493 debugging ? "enabled" : "disabled");
1494 1444
1495 /* swiotlb for devices that aren't behind the Calgary. */ 1445 /* swiotlb for devices that aren't behind the Calgary. */
1496 if (max_pfn > MAX_DMA32_PFN) 1446 if (max_pfn > MAX_DMA32_PFN)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index b284b58c035c..cfd9f9063896 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -144,48 +144,21 @@ static void flush_gart(void)
144} 144}
145 145
146#ifdef CONFIG_IOMMU_LEAK 146#ifdef CONFIG_IOMMU_LEAK
147
148#define SET_LEAK(x) \
149 do { \
150 if (iommu_leak_tab) \
151 iommu_leak_tab[x] = __builtin_return_address(0);\
152 } while (0)
153
154#define CLEAR_LEAK(x) \
155 do { \
156 if (iommu_leak_tab) \
157 iommu_leak_tab[x] = NULL; \
158 } while (0)
159
160/* Debugging aid for drivers that don't free their IOMMU tables */ 147/* Debugging aid for drivers that don't free their IOMMU tables */
161static void **iommu_leak_tab;
162static int leak_trace; 148static int leak_trace;
163static int iommu_leak_pages = 20; 149static int iommu_leak_pages = 20;
164 150
165static void dump_leak(void) 151static void dump_leak(void)
166{ 152{
167 int i;
168 static int dump; 153 static int dump;
169 154
170 if (dump || !iommu_leak_tab) 155 if (dump)
171 return; 156 return;
172 dump = 1; 157 dump = 1;
173 show_stack(NULL, NULL);
174 158
175 /* Very crude. dump some from the end of the table too */ 159 show_stack(NULL, NULL);
176 printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", 160 debug_dma_dump_mappings(NULL);
177 iommu_leak_pages);
178 for (i = 0; i < iommu_leak_pages; i += 2) {
179 printk(KERN_DEBUG "%lu: ", iommu_pages-i);
180 printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
181 0);
182 printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
183 }
184 printk(KERN_DEBUG "\n");
185} 161}
186#else
187# define SET_LEAK(x)
188# define CLEAR_LEAK(x)
189#endif 162#endif
190 163
191static void iommu_full(struct device *dev, size_t size, int dir) 164static void iommu_full(struct device *dev, size_t size, int dir)
@@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
248 221
249 for (i = 0; i < npages; i++) { 222 for (i = 0; i < npages; i++) {
250 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); 223 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
251 SET_LEAK(iommu_page + i);
252 phys_mem += PAGE_SIZE; 224 phys_mem += PAGE_SIZE;
253 } 225 }
254 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); 226 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
@@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
294 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 266 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
295 for (i = 0; i < npages; i++) { 267 for (i = 0; i < npages; i++) {
296 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; 268 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
297 CLEAR_LEAK(iommu_page + i);
298 } 269 }
299 free_iommu(iommu_page, npages); 270 free_iommu(iommu_page, npages);
300} 271}
@@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
377 pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); 348 pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
378 while (pages--) { 349 while (pages--) {
379 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); 350 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
380 SET_LEAK(iommu_page);
381 addr += PAGE_SIZE; 351 addr += PAGE_SIZE;
382 iommu_page++; 352 iommu_page++;
383 } 353 }
@@ -688,8 +658,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
688 658
689 agp_gatt_table = gatt; 659 agp_gatt_table = gatt;
690 660
691 enable_gart_translations();
692
693 error = sysdev_class_register(&gart_sysdev_class); 661 error = sysdev_class_register(&gart_sysdev_class);
694 if (!error) 662 if (!error)
695 error = sysdev_register(&device_gart); 663 error = sysdev_register(&device_gart);
@@ -801,11 +769,12 @@ void __init gart_iommu_init(void)
801 769
802#ifdef CONFIG_IOMMU_LEAK 770#ifdef CONFIG_IOMMU_LEAK
803 if (leak_trace) { 771 if (leak_trace) {
804 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 772 int ret;
805 get_order(iommu_pages*sizeof(void *))); 773
806 if (!iommu_leak_tab) 774 ret = dma_debug_resize_entries(iommu_pages);
775 if (ret)
807 printk(KERN_DEBUG 776 printk(KERN_DEBUG
808 "PCI-DMA: Cannot allocate leak trace area\n"); 777 "PCI-DMA: Cannot trace all the entries\n");
809 } 778 }
810#endif 779#endif
811 780
@@ -845,6 +814,14 @@ void __init gart_iommu_init(void)
845 * the pages as Not-Present: 814 * the pages as Not-Present:
846 */ 815 */
847 wbinvd(); 816 wbinvd();
817
818 /*
819 * Now all caches are flushed and we can safely enable
820 * GART hardware. Doing it early leaves the possibility
821 * of stale cache entries that can lead to GART PTE
822 * errors.
823 */
824 enable_gart_translations();
848 825
849 /* 826 /*
850 * Try to workaround a bug (thanks to BenH): 827 * Try to workaround a bug (thanks to BenH):
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 221a3853e268..a1712f2b50f1 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
28 return paddr; 28 return paddr;
29} 29}
30 30
31phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) 31phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
32{ 32{
33 return baddr; 33 return baddr;
34} 34}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ca989158e847..3bb2be1649bd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,12 +8,15 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/pm.h> 9#include <linux/pm.h>
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h>
11#include <trace/power.h> 12#include <trace/power.h>
12#include <asm/system.h> 13#include <asm/system.h>
13#include <asm/apic.h> 14#include <asm/apic.h>
15#include <asm/syscalls.h>
14#include <asm/idle.h> 16#include <asm/idle.h>
15#include <asm/uaccess.h> 17#include <asm/uaccess.h>
16#include <asm/i387.h> 18#include <asm/i387.h>
19#include <asm/ds.h>
17 20
18unsigned long idle_halt; 21unsigned long idle_halt;
19EXPORT_SYMBOL(idle_halt); 22EXPORT_SYMBOL(idle_halt);
@@ -45,6 +48,8 @@ void free_thread_xstate(struct task_struct *tsk)
45 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); 48 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
46 tsk->thread.xstate = NULL; 49 tsk->thread.xstate = NULL;
47 } 50 }
51
52 WARN(tsk->thread.ds_ctx, "leaking DS context\n");
48} 53}
49 54
50void free_thread_info(struct thread_info *ti) 55void free_thread_info(struct thread_info *ti)
@@ -83,8 +88,6 @@ void exit_thread(void)
83 put_cpu(); 88 put_cpu();
84 kfree(bp); 89 kfree(bp);
85 } 90 }
86
87 ds_exit_thread(current);
88} 91}
89 92
90void flush_thread(void) 93void flush_thread(void)
@@ -613,3 +616,16 @@ static int __init idle_setup(char *str)
613} 616}
614early_param("idle", idle_setup); 617early_param("idle", idle_setup);
615 618
619unsigned long arch_align_stack(unsigned long sp)
620{
621 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
622 sp -= get_random_int() % 8192;
623 return sp & ~0xf;
624}
625
626unsigned long arch_randomize_brk(struct mm_struct *mm)
627{
628 unsigned long range_end = mm->brk + 0x02000000;
629 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
630}
631
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 76f8f84043a2..59f4524984af 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -9,8 +9,6 @@
9 * This file handles the architecture-dependent parts of process handling.. 9 * This file handles the architecture-dependent parts of process handling..
10 */ 10 */
11 11
12#include <stdarg.h>
13
14#include <linux/stackprotector.h> 12#include <linux/stackprotector.h>
15#include <linux/cpu.h> 13#include <linux/cpu.h>
16#include <linux/errno.h> 14#include <linux/errno.h>
@@ -33,7 +31,6 @@
33#include <linux/module.h> 31#include <linux/module.h>
34#include <linux/kallsyms.h> 32#include <linux/kallsyms.h>
35#include <linux/ptrace.h> 33#include <linux/ptrace.h>
36#include <linux/random.h>
37#include <linux/personality.h> 34#include <linux/personality.h>
38#include <linux/tick.h> 35#include <linux/tick.h>
39#include <linux/percpu.h> 36#include <linux/percpu.h>
@@ -290,7 +287,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
290 p->thread.io_bitmap_max = 0; 287 p->thread.io_bitmap_max = 0;
291 } 288 }
292 289
293 ds_copy_thread(p, current); 290 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
291 p->thread.ds_ctx = NULL;
294 292
295 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); 293 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
296 p->thread.debugctlmsr = 0; 294 p->thread.debugctlmsr = 0;
@@ -407,7 +405,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
407 * done before math_state_restore, so the TS bit is up 405 * done before math_state_restore, so the TS bit is up
408 * to date. 406 * to date.
409 */ 407 */
410 arch_leave_lazy_cpu_mode(); 408 arch_end_context_switch(next_p);
411 409
412 /* If the task has used fpu the last 5 timeslices, just do a full 410 /* If the task has used fpu the last 5 timeslices, just do a full
413 * restore of the math state immediately to avoid the trap; the 411 * restore of the math state immediately to avoid the trap; the
@@ -497,15 +495,3 @@ unsigned long get_wchan(struct task_struct *p)
497 return 0; 495 return 0;
498} 496}
499 497
500unsigned long arch_align_stack(unsigned long sp)
501{
502 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
503 sp -= get_random_int() % 8192;
504 return sp & ~0xf;
505}
506
507unsigned long arch_randomize_brk(struct mm_struct *mm)
508{
509 unsigned long range_end = mm->brk + 0x02000000;
510 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
511}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b751a41392b1..ebefb5407b9d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -14,8 +14,6 @@
14 * This file handles the architecture-dependent parts of process handling.. 14 * This file handles the architecture-dependent parts of process handling..
15 */ 15 */
16 16
17#include <stdarg.h>
18
19#include <linux/stackprotector.h> 17#include <linux/stackprotector.h>
20#include <linux/cpu.h> 18#include <linux/cpu.h>
21#include <linux/errno.h> 19#include <linux/errno.h>
@@ -32,7 +30,6 @@
32#include <linux/delay.h> 30#include <linux/delay.h>
33#include <linux/module.h> 31#include <linux/module.h>
34#include <linux/ptrace.h> 32#include <linux/ptrace.h>
35#include <linux/random.h>
36#include <linux/notifier.h> 33#include <linux/notifier.h>
37#include <linux/kprobes.h> 34#include <linux/kprobes.h>
38#include <linux/kdebug.h> 35#include <linux/kdebug.h>
@@ -335,7 +332,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
335 goto out; 332 goto out;
336 } 333 }
337 334
338 ds_copy_thread(p, me); 335 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
336 p->thread.ds_ctx = NULL;
339 337
340 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); 338 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
341 p->thread.debugctlmsr = 0; 339 p->thread.debugctlmsr = 0;
@@ -428,7 +426,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
428 * done before math_state_restore, so the TS bit is up 426 * done before math_state_restore, so the TS bit is up
429 * to date. 427 * to date.
430 */ 428 */
431 arch_leave_lazy_cpu_mode(); 429 arch_end_context_switch(next_p);
432 430
433 /* 431 /*
434 * Switch FS and GS. 432 * Switch FS and GS.
@@ -660,15 +658,3 @@ long sys_arch_prctl(int code, unsigned long addr)
660 return do_arch_prctl(current, code, addr); 658 return do_arch_prctl(current, code, addr);
661} 659}
662 660
663unsigned long arch_align_stack(unsigned long sp)
664{
665 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
666 sp -= get_random_int() % 8192;
667 return sp & ~0xf;
668}
669
670unsigned long arch_randomize_brk(struct mm_struct *mm)
671{
672 unsigned long range_end = mm->brk + 0x02000000;
673 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
674}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 23b7c8f017e2..09ecbde91c13 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/workqueue.h>
24 25
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26#include <asm/pgtable.h> 27#include <asm/pgtable.h>
@@ -578,17 +579,130 @@ static int ioperm_get(struct task_struct *target,
578} 579}
579 580
580#ifdef CONFIG_X86_PTRACE_BTS 581#ifdef CONFIG_X86_PTRACE_BTS
582/*
583 * A branch trace store context.
584 *
585 * Contexts may only be installed by ptrace_bts_config() and only for
586 * ptraced tasks.
587 *
588 * Contexts are destroyed when the tracee is detached from the tracer.
589 * The actual destruction work requires interrupts enabled, so the
590 * work is deferred and will be scheduled during __ptrace_unlink().
591 *
592 * Contexts hold an additional task_struct reference on the traced
593 * task, as well as a reference on the tracer's mm.
594 *
595 * Ptrace already holds a task_struct for the duration of ptrace operations,
596 * but since destruction is deferred, it may be executed after both
597 * tracer and tracee exited.
598 */
599struct bts_context {
600 /* The branch trace handle. */
601 struct bts_tracer *tracer;
602
603 /* The buffer used to store the branch trace and its size. */
604 void *buffer;
605 unsigned int size;
606
607 /* The mm that paid for the above buffer. */
608 struct mm_struct *mm;
609
610 /* The task this context belongs to. */
611 struct task_struct *task;
612
613 /* The signal to send on a bts buffer overflow. */
614 unsigned int bts_ovfl_signal;
615
616 /* The work struct to destroy a context. */
617 struct work_struct work;
618};
619
620static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
621{
622 void *buffer = NULL;
623 int err = -ENOMEM;
624
625 err = account_locked_memory(current->mm, current->signal->rlim, size);
626 if (err < 0)
627 return err;
628
629 buffer = kzalloc(size, GFP_KERNEL);
630 if (!buffer)
631 goto out_refund;
632
633 context->buffer = buffer;
634 context->size = size;
635 context->mm = get_task_mm(current);
636
637 return 0;
638
639 out_refund:
640 refund_locked_memory(current->mm, size);
641 return err;
642}
643
644static inline void free_bts_buffer(struct bts_context *context)
645{
646 if (!context->buffer)
647 return;
648
649 kfree(context->buffer);
650 context->buffer = NULL;
651
652 refund_locked_memory(context->mm, context->size);
653 context->size = 0;
654
655 mmput(context->mm);
656 context->mm = NULL;
657}
658
659static void free_bts_context_work(struct work_struct *w)
660{
661 struct bts_context *context;
662
663 context = container_of(w, struct bts_context, work);
664
665 ds_release_bts(context->tracer);
666 put_task_struct(context->task);
667 free_bts_buffer(context);
668 kfree(context);
669}
670
671static inline void free_bts_context(struct bts_context *context)
672{
673 INIT_WORK(&context->work, free_bts_context_work);
674 schedule_work(&context->work);
675}
676
677static inline struct bts_context *alloc_bts_context(struct task_struct *task)
678{
679 struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
680 if (context) {
681 context->task = task;
682 task->bts = context;
683
684 get_task_struct(task);
685 }
686
687 return context;
688}
689
581static int ptrace_bts_read_record(struct task_struct *child, size_t index, 690static int ptrace_bts_read_record(struct task_struct *child, size_t index,
582 struct bts_struct __user *out) 691 struct bts_struct __user *out)
583{ 692{
693 struct bts_context *context;
584 const struct bts_trace *trace; 694 const struct bts_trace *trace;
585 struct bts_struct bts; 695 struct bts_struct bts;
586 const unsigned char *at; 696 const unsigned char *at;
587 int error; 697 int error;
588 698
589 trace = ds_read_bts(child->bts); 699 context = child->bts;
700 if (!context)
701 return -ESRCH;
702
703 trace = ds_read_bts(context->tracer);
590 if (!trace) 704 if (!trace)
591 return -EPERM; 705 return -ESRCH;
592 706
593 at = trace->ds.top - ((index + 1) * trace->ds.size); 707 at = trace->ds.top - ((index + 1) * trace->ds.size);
594 if ((void *)at < trace->ds.begin) 708 if ((void *)at < trace->ds.begin)
@@ -597,7 +711,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
597 if (!trace->read) 711 if (!trace->read)
598 return -EOPNOTSUPP; 712 return -EOPNOTSUPP;
599 713
600 error = trace->read(child->bts, at, &bts); 714 error = trace->read(context->tracer, at, &bts);
601 if (error < 0) 715 if (error < 0)
602 return error; 716 return error;
603 717
@@ -611,13 +725,18 @@ static int ptrace_bts_drain(struct task_struct *child,
611 long size, 725 long size,
612 struct bts_struct __user *out) 726 struct bts_struct __user *out)
613{ 727{
728 struct bts_context *context;
614 const struct bts_trace *trace; 729 const struct bts_trace *trace;
615 const unsigned char *at; 730 const unsigned char *at;
616 int error, drained = 0; 731 int error, drained = 0;
617 732
618 trace = ds_read_bts(child->bts); 733 context = child->bts;
734 if (!context)
735 return -ESRCH;
736
737 trace = ds_read_bts(context->tracer);
619 if (!trace) 738 if (!trace)
620 return -EPERM; 739 return -ESRCH;
621 740
622 if (!trace->read) 741 if (!trace->read)
623 return -EOPNOTSUPP; 742 return -EOPNOTSUPP;
@@ -628,9 +747,8 @@ static int ptrace_bts_drain(struct task_struct *child,
628 for (at = trace->ds.begin; (void *)at < trace->ds.top; 747 for (at = trace->ds.begin; (void *)at < trace->ds.top;
629 out++, drained++, at += trace->ds.size) { 748 out++, drained++, at += trace->ds.size) {
630 struct bts_struct bts; 749 struct bts_struct bts;
631 int error;
632 750
633 error = trace->read(child->bts, at, &bts); 751 error = trace->read(context->tracer, at, &bts);
634 if (error < 0) 752 if (error < 0)
635 return error; 753 return error;
636 754
@@ -640,35 +758,18 @@ static int ptrace_bts_drain(struct task_struct *child,
640 758
641 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); 759 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
642 760
643 error = ds_reset_bts(child->bts); 761 error = ds_reset_bts(context->tracer);
644 if (error < 0) 762 if (error < 0)
645 return error; 763 return error;
646 764
647 return drained; 765 return drained;
648} 766}
649 767
650static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
651{
652 child->bts_buffer = alloc_locked_buffer(size);
653 if (!child->bts_buffer)
654 return -ENOMEM;
655
656 child->bts_size = size;
657
658 return 0;
659}
660
661static void ptrace_bts_free_buffer(struct task_struct *child)
662{
663 free_locked_buffer(child->bts_buffer, child->bts_size);
664 child->bts_buffer = NULL;
665 child->bts_size = 0;
666}
667
668static int ptrace_bts_config(struct task_struct *child, 768static int ptrace_bts_config(struct task_struct *child,
669 long cfg_size, 769 long cfg_size,
670 const struct ptrace_bts_config __user *ucfg) 770 const struct ptrace_bts_config __user *ucfg)
671{ 771{
772 struct bts_context *context;
672 struct ptrace_bts_config cfg; 773 struct ptrace_bts_config cfg;
673 unsigned int flags = 0; 774 unsigned int flags = 0;
674 775
@@ -678,28 +779,33 @@ static int ptrace_bts_config(struct task_struct *child,
678 if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 779 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
679 return -EFAULT; 780 return -EFAULT;
680 781
681 if (child->bts) { 782 context = child->bts;
682 ds_release_bts(child->bts); 783 if (!context)
683 child->bts = NULL; 784 context = alloc_bts_context(child);
684 } 785 if (!context)
786 return -ENOMEM;
685 787
686 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 788 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
687 if (!cfg.signal) 789 if (!cfg.signal)
688 return -EINVAL; 790 return -EINVAL;
689 791
690 child->thread.bts_ovfl_signal = cfg.signal;
691 return -EOPNOTSUPP; 792 return -EOPNOTSUPP;
793 context->bts_ovfl_signal = cfg.signal;
692 } 794 }
693 795
694 if ((cfg.flags & PTRACE_BTS_O_ALLOC) && 796 ds_release_bts(context->tracer);
695 (cfg.size != child->bts_size)) { 797 context->tracer = NULL;
696 int error;
697 798
698 ptrace_bts_free_buffer(child); 799 if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
800 int err;
699 801
700 error = ptrace_bts_allocate_buffer(child, cfg.size); 802 free_bts_buffer(context);
701 if (error < 0) 803 if (!cfg.size)
702 return error; 804 return 0;
805
806 err = alloc_bts_buffer(context, cfg.size);
807 if (err < 0)
808 return err;
703 } 809 }
704 810
705 if (cfg.flags & PTRACE_BTS_O_TRACE) 811 if (cfg.flags & PTRACE_BTS_O_TRACE)
@@ -708,15 +814,14 @@ static int ptrace_bts_config(struct task_struct *child,
708 if (cfg.flags & PTRACE_BTS_O_SCHED) 814 if (cfg.flags & PTRACE_BTS_O_SCHED)
709 flags |= BTS_TIMESTAMPS; 815 flags |= BTS_TIMESTAMPS;
710 816
711 child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, 817 context->tracer =
712 /* ovfl = */ NULL, /* th = */ (size_t)-1, 818 ds_request_bts_task(child, context->buffer, context->size,
713 flags); 819 NULL, (size_t)-1, flags);
714 if (IS_ERR(child->bts)) { 820 if (unlikely(IS_ERR(context->tracer))) {
715 int error = PTR_ERR(child->bts); 821 int error = PTR_ERR(context->tracer);
716
717 ptrace_bts_free_buffer(child);
718 child->bts = NULL;
719 822
823 free_bts_buffer(context);
824 context->tracer = NULL;
720 return error; 825 return error;
721 } 826 }
722 827
@@ -727,20 +832,25 @@ static int ptrace_bts_status(struct task_struct *child,
727 long cfg_size, 832 long cfg_size,
728 struct ptrace_bts_config __user *ucfg) 833 struct ptrace_bts_config __user *ucfg)
729{ 834{
835 struct bts_context *context;
730 const struct bts_trace *trace; 836 const struct bts_trace *trace;
731 struct ptrace_bts_config cfg; 837 struct ptrace_bts_config cfg;
732 838
839 context = child->bts;
840 if (!context)
841 return -ESRCH;
842
733 if (cfg_size < sizeof(cfg)) 843 if (cfg_size < sizeof(cfg))
734 return -EIO; 844 return -EIO;
735 845
736 trace = ds_read_bts(child->bts); 846 trace = ds_read_bts(context->tracer);
737 if (!trace) 847 if (!trace)
738 return -EPERM; 848 return -ESRCH;
739 849
740 memset(&cfg, 0, sizeof(cfg)); 850 memset(&cfg, 0, sizeof(cfg));
741 cfg.size = trace->ds.end - trace->ds.begin; 851 cfg.size = trace->ds.end - trace->ds.begin;
742 cfg.signal = child->thread.bts_ovfl_signal; 852 cfg.signal = context->bts_ovfl_signal;
743 cfg.bts_size = sizeof(struct bts_struct); 853 cfg.bts_size = sizeof(struct bts_struct);
744 854
745 if (cfg.signal) 855 if (cfg.signal)
746 cfg.flags |= PTRACE_BTS_O_SIGNAL; 856 cfg.flags |= PTRACE_BTS_O_SIGNAL;
@@ -759,80 +869,51 @@ static int ptrace_bts_status(struct task_struct *child,
759 869
760static int ptrace_bts_clear(struct task_struct *child) 870static int ptrace_bts_clear(struct task_struct *child)
761{ 871{
872 struct bts_context *context;
762 const struct bts_trace *trace; 873 const struct bts_trace *trace;
763 874
764 trace = ds_read_bts(child->bts); 875 context = child->bts;
876 if (!context)
877 return -ESRCH;
878
879 trace = ds_read_bts(context->tracer);
765 if (!trace) 880 if (!trace)
766 return -EPERM; 881 return -ESRCH;
767 882
768 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); 883 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
769 884
770 return ds_reset_bts(child->bts); 885 return ds_reset_bts(context->tracer);
771} 886}
772 887
773static int ptrace_bts_size(struct task_struct *child) 888static int ptrace_bts_size(struct task_struct *child)
774{ 889{
890 struct bts_context *context;
775 const struct bts_trace *trace; 891 const struct bts_trace *trace;
776 892
777 trace = ds_read_bts(child->bts); 893 context = child->bts;
894 if (!context)
895 return -ESRCH;
896
897 trace = ds_read_bts(context->tracer);
778 if (!trace) 898 if (!trace)
779 return -EPERM; 899 return -ESRCH;
780 900
781 return (trace->ds.top - trace->ds.begin) / trace->ds.size; 901 return (trace->ds.top - trace->ds.begin) / trace->ds.size;
782} 902}
783 903
784static void ptrace_bts_fork(struct task_struct *tsk) 904/*
785{ 905 * Called from __ptrace_unlink() after the child has been moved back
786 tsk->bts = NULL; 906 * to its original parent.
787 tsk->bts_buffer = NULL; 907 */
788 tsk->bts_size = 0; 908void ptrace_bts_untrace(struct task_struct *child)
789 tsk->thread.bts_ovfl_signal = 0;
790}
791
792static void ptrace_bts_untrace(struct task_struct *child)
793{ 909{
794 if (unlikely(child->bts)) { 910 if (unlikely(child->bts)) {
795 ds_release_bts(child->bts); 911 free_bts_context(child->bts);
796 child->bts = NULL; 912 child->bts = NULL;
797
798 /* We cannot update total_vm and locked_vm since
799 child's mm is already gone. But we can reclaim the
800 memory. */
801 kfree(child->bts_buffer);
802 child->bts_buffer = NULL;
803 child->bts_size = 0;
804 } 913 }
805} 914}
806
807static void ptrace_bts_detach(struct task_struct *child)
808{
809 /*
810 * Ptrace_detach() races with ptrace_untrace() in case
811 * the child dies and is reaped by another thread.
812 *
813 * We only do the memory accounting at this point and
814 * leave the buffer deallocation and the bts tracer
815 * release to ptrace_bts_untrace() which will be called
816 * later on with tasklist_lock held.
817 */
818 release_locked_buffer(child->bts_buffer, child->bts_size);
819}
820#else
821static inline void ptrace_bts_fork(struct task_struct *tsk) {}
822static inline void ptrace_bts_detach(struct task_struct *child) {}
823static inline void ptrace_bts_untrace(struct task_struct *child) {}
824#endif /* CONFIG_X86_PTRACE_BTS */ 915#endif /* CONFIG_X86_PTRACE_BTS */
825 916
826void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
827{
828 ptrace_bts_fork(child);
829}
830
831void x86_ptrace_untrace(struct task_struct *child)
832{
833 ptrace_bts_untrace(child);
834}
835
836/* 917/*
837 * Called by kernel/ptrace.c when detaching.. 918 * Called by kernel/ptrace.c when detaching..
838 * 919 *
@@ -844,7 +925,6 @@ void ptrace_disable(struct task_struct *child)
844#ifdef TIF_SYSCALL_EMU 925#ifdef TIF_SYSCALL_EMU
845 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 926 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
846#endif 927#endif
847 ptrace_bts_detach(child);
848} 928}
849 929
850#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 930#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 7563b31b4f03..af71d06624bf 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -491,5 +491,42 @@ void force_hpet_resume(void)
491 break; 491 break;
492 } 492 }
493} 493}
494#endif
495
496#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
497/* Set correct numa_node information for AMD NB functions */
498static void __init quirk_amd_nb_node(struct pci_dev *dev)
499{
500 struct pci_dev *nb_ht;
501 unsigned int devfn;
502 u32 val;
503
504 devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
505 nb_ht = pci_get_slot(dev->bus, devfn);
506 if (!nb_ht)
507 return;
508
509 pci_read_config_dword(nb_ht, 0x60, &val);
510 set_dev_node(&dev->dev, val & 7);
511 pci_dev_put(dev);
512}
494 513
514DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
515 quirk_amd_nb_node);
516DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
517 quirk_amd_nb_node);
518DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
519 quirk_amd_nb_node);
520DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC,
521 quirk_amd_nb_node);
522DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_HT,
523 quirk_amd_nb_node);
524DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MAP,
525 quirk_amd_nb_node);
526DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM,
527 quirk_amd_nb_node);
528DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC,
529 quirk_amd_nb_node);
530DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK,
531 quirk_amd_nb_node);
495#endif 532#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1340dad417f4..d2d1ce8170f0 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -192,6 +192,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
192 DMI_MATCH(DMI_BOARD_NAME, "0KP561"), 192 DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
193 }, 193 },
194 }, 194 },
195 { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
196 .callback = set_bios_reboot,
197 .ident = "Dell OptiPlex 360",
198 .matches = {
199 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
200 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 360"),
201 DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
202 },
203 },
195 { /* Handle problems with rebooting on Dell 2400's */ 204 { /* Handle problems with rebooting on Dell 2400's */
196 .callback = set_bios_reboot, 205 .callback = set_bios_reboot,
197 .ident = "Dell PowerEdge 2400", 206 .ident = "Dell PowerEdge 2400",
@@ -232,6 +241,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
232 DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"), 241 DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"),
233 }, 242 },
234 }, 243 },
244 { /* Handle problems with rebooting on Sony VGN-Z540N */
245 .callback = set_bios_reboot,
246 .ident = "Sony VGN-Z540N",
247 .matches = {
248 DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
249 DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
250 },
251 },
235 { } 252 { }
236}; 253};
237 254
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b4158439bf63..d1c636bf31a7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,14 @@
112#define ARCH_SETUP 112#define ARCH_SETUP
113#endif 113#endif
114 114
115/*
116 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
117 * The direct mapping extends to max_pfn_mapped, so that we can directly access
118 * apertures, ACPI and other tables without having to play with fixmaps.
119 */
120unsigned long max_low_pfn_mapped;
121unsigned long max_pfn_mapped;
122
115RESERVE_BRK(dmi_alloc, 65536); 123RESERVE_BRK(dmi_alloc, 65536);
116 124
117unsigned int boot_cpu_id __read_mostly; 125unsigned int boot_cpu_id __read_mostly;
@@ -214,8 +222,8 @@ unsigned long mmu_cr4_features;
214unsigned long mmu_cr4_features = X86_CR4_PAE; 222unsigned long mmu_cr4_features = X86_CR4_PAE;
215#endif 223#endif
216 224
217/* Boot loader ID as an integer, for the benefit of proc_dointvec */ 225/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
218int bootloader_type; 226int bootloader_type, bootloader_version;
219 227
220/* 228/*
221 * Setup options 229 * Setup options
@@ -706,6 +714,12 @@ void __init setup_arch(char **cmdline_p)
706#endif 714#endif
707 saved_video_mode = boot_params.hdr.vid_mode; 715 saved_video_mode = boot_params.hdr.vid_mode;
708 bootloader_type = boot_params.hdr.type_of_loader; 716 bootloader_type = boot_params.hdr.type_of_loader;
717 if ((bootloader_type >> 4) == 0xe) {
718 bootloader_type &= 0xf;
719 bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
720 }
721 bootloader_version = bootloader_type & 0xf;
722 bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
709 723
710#ifdef CONFIG_BLK_DEV_RAM 724#ifdef CONFIG_BLK_DEV_RAM
711 rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; 725 rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
@@ -854,12 +868,16 @@ void __init setup_arch(char **cmdline_p)
854 max_low_pfn = max_pfn; 868 max_low_pfn = max_pfn;
855 869
856 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; 870 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
871 max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
857#endif 872#endif
858 873
859#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION 874#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
860 setup_bios_corruption_check(); 875 setup_bios_corruption_check();
861#endif 876#endif
862 877
878 printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
879 max_pfn_mapped<<PAGE_SHIFT);
880
863 reserve_brk(); 881 reserve_brk();
864 882
865 /* max_pfn_mapped is updated here */ 883 /* max_pfn_mapped is updated here */
@@ -997,24 +1015,6 @@ void __init setup_arch(char **cmdline_p)
997#ifdef CONFIG_X86_32 1015#ifdef CONFIG_X86_32
998 1016
999/** 1017/**
1000 * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
1001 *
1002 * Description:
1003 * Perform any necessary interrupt initialisation prior to setting up
1004 * the "ordinary" interrupt call gates. For legacy reasons, the ISA
1005 * interrupts should be initialised here if the machine emulates a PC
1006 * in any way.
1007 **/
1008void __init x86_quirk_pre_intr_init(void)
1009{
1010 if (x86_quirks->arch_pre_intr_init) {
1011 if (x86_quirks->arch_pre_intr_init())
1012 return;
1013 }
1014 init_ISA_irqs();
1015}
1016
1017/**
1018 * x86_quirk_intr_init - post gate setup interrupt initialisation 1018 * x86_quirk_intr_init - post gate setup interrupt initialisation
1019 * 1019 *
1020 * Description: 1020 * Description:
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 3a97a4cf1872..9c3f0823e6aa 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -160,8 +160,10 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
160 /* 160 /*
161 * If large page isn't supported, there's no benefit in doing 161 * If large page isn't supported, there's no benefit in doing
162 * this. Also, on non-NUMA, embedding is better. 162 * this. Also, on non-NUMA, embedding is better.
163 *
164 * NOTE: disabled for now.
163 */ 165 */
164 if (!cpu_has_pse || !pcpu_need_numa()) 166 if (true || !cpu_has_pse || !pcpu_need_numa())
165 return -EINVAL; 167 return -EINVAL;
166 168
167 /* 169 /*
@@ -423,6 +425,14 @@ void __init setup_per_cpu_areas(void)
423 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 425 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
424#endif 426#endif
425 427
428#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
429 /*
430 * make sure boot cpu node_number is right, when boot cpu is on the
431 * node that doesn't have mem installed
432 */
433 per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
434#endif
435
426 /* Setup node to cpumask map */ 436 /* Setup node to cpumask map */
427 setup_node_to_cpumask_map(); 437 setup_node_to_cpumask_map();
428 438
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 13f33ea8ccaa..f6db48c405b8 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -193,19 +193,19 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
193} 193}
194 194
195struct smp_ops smp_ops = { 195struct smp_ops smp_ops = {
196 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, 196 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
197 .smp_prepare_cpus = native_smp_prepare_cpus, 197 .smp_prepare_cpus = native_smp_prepare_cpus,
198 .smp_cpus_done = native_smp_cpus_done, 198 .smp_cpus_done = native_smp_cpus_done,
199 199
200 .smp_send_stop = native_smp_send_stop, 200 .smp_send_stop = native_smp_send_stop,
201 .smp_send_reschedule = native_smp_send_reschedule, 201 .smp_send_reschedule = native_smp_send_reschedule,
202 202
203 .cpu_up = native_cpu_up, 203 .cpu_up = native_cpu_up,
204 .cpu_die = native_cpu_die, 204 .cpu_die = native_cpu_die,
205 .cpu_disable = native_cpu_disable, 205 .cpu_disable = native_cpu_disable,
206 .play_dead = native_play_dead, 206 .play_dead = native_play_dead,
207 207
208 .send_call_func_ipi = native_send_call_func_ipi, 208 .send_call_func_ipi = native_send_call_func_ipi,
209 .send_call_func_single_ipi = native_send_call_func_single_ipi, 209 .send_call_func_single_ipi = native_send_call_func_single_ipi,
210}; 210};
211EXPORT_SYMBOL_GPL(smp_ops); 211EXPORT_SYMBOL_GPL(smp_ops);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 58d24ef917d8..7c80007ea5f7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -504,7 +504,7 @@ void __inquire_remote_apic(int apicid)
504 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this 504 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
505 * won't ... remember to clear down the APIC, etc later. 505 * won't ... remember to clear down the APIC, etc later.
506 */ 506 */
507int __devinit 507int __cpuinit
508wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) 508wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
509{ 509{
510 unsigned long send_status, accept_status = 0; 510 unsigned long send_status, accept_status = 0;
@@ -538,7 +538,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
538 return (send_status | accept_status); 538 return (send_status | accept_status);
539} 539}
540 540
541int __devinit 541static int __cpuinit
542wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) 542wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
543{ 543{
544 unsigned long send_status, accept_status = 0; 544 unsigned long send_status, accept_status = 0;
@@ -822,10 +822,12 @@ do_rest:
822 /* mark "stuck" area as not stuck */ 822 /* mark "stuck" area as not stuck */
823 *((volatile unsigned long *)trampoline_base) = 0; 823 *((volatile unsigned long *)trampoline_base) = 0;
824 824
825 /* 825 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
826 * Cleanup possible dangling ends... 826 /*
827 */ 827 * Cleanup possible dangling ends...
828 smpboot_restore_warm_reset_vector(); 828 */
829 smpboot_restore_warm_reset_vector();
830 }
829 831
830 return boot_error; 832 return boot_error;
831} 833}
@@ -990,10 +992,12 @@ static int __init smp_sanity_check(unsigned max_cpus)
990 */ 992 */
991 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && 993 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
992 !cpu_has_apic) { 994 !cpu_has_apic) {
993 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", 995 if (!disable_apic) {
994 boot_cpu_physical_apicid); 996 pr_err("BIOS bug, local APIC #%d not detected!...\n",
995 printk(KERN_ERR "... forcing use of dummy APIC emulation." 997 boot_cpu_physical_apicid);
998 pr_err("... forcing use of dummy APIC emulation."
996 "(tell your hw vendor)\n"); 999 "(tell your hw vendor)\n");
1000 }
997 smpboot_clear_io_apic(); 1001 smpboot_clear_io_apic();
998 arch_disable_smp_support(); 1002 arch_disable_smp_support();
999 return -1; 1003 return -1;
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index f7bddc2e37d1..4aaf7e48394f 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -20,7 +20,7 @@ save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
20 20
21static int save_stack_stack(void *data, char *name) 21static int save_stack_stack(void *data, char *name)
22{ 22{
23 return -1; 23 return 0;
24} 24}
25 25
26static void save_stack_address(void *data, unsigned long addr, int reliable) 26static void save_stack_address(void *data, unsigned long addr, int reliable)
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index ff5c8736b491..734f92c02dde 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -334,3 +334,4 @@ ENTRY(sys_call_table)
334 .long sys_inotify_init1 334 .long sys_inotify_init1
335 .long sys_preadv 335 .long sys_preadv
336 .long sys_pwritev 336 .long sys_pwritev
337 .long sys_rt_tgsigqueueinfo /* 335 */
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index ed0c33761e6d..124d40c575df 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -715,7 +715,12 @@ uv_activation_descriptor_init(int node, int pnode)
715 struct bau_desc *adp; 715 struct bau_desc *adp;
716 struct bau_desc *ad2; 716 struct bau_desc *ad2;
717 717
718 adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node); 718 /*
719 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
720 * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade
721 */
722 adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)*
723 UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
719 BUG_ON(!adp); 724 BUG_ON(!adp);
720 725
721 pa = uv_gpa(adp); /* need the real nasid*/ 726 pa = uv_gpa(adp); /* need the real nasid*/
@@ -729,7 +734,13 @@ uv_activation_descriptor_init(int node, int pnode)
729 (n << UV_DESC_BASE_PNODE_SHIFT | m)); 734 (n << UV_DESC_BASE_PNODE_SHIFT | m));
730 } 735 }
731 736
732 for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) { 737 /*
738 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
739 * cpu even though we only use the first one; one descriptor can
740 * describe a broadcast to 256 nodes.
741 */
742 for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
743 i++, ad2++) {
733 memset(ad2, 0, sizeof(struct bau_desc)); 744 memset(ad2, 0, sizeof(struct bau_desc));
734 ad2->header.sw_ack_flag = 1; 745 ad2->header.sw_ack_flag = 1;
735 /* 746 /*
@@ -832,7 +843,7 @@ static int __init uv_bau_init(void)
832 return 0; 843 return 0;
833 844
834 for_each_possible_cpu(cur_cpu) 845 for_each_possible_cpu(cur_cpu)
835 alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), 846 zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
836 GFP_KERNEL, cpu_to_node(cur_cpu)); 847 GFP_KERNEL, cpu_to_node(cur_cpu));
837 848
838 uv_bau_retry_limit = 1; 849 uv_bau_retry_limit = 1;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a1d288327ff0..ede024531f8f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -839,9 +839,6 @@ asmlinkage void math_state_restore(void)
839 } 839 }
840 840
841 clts(); /* Allow maths ops (or we recurse) */ 841 clts(); /* Allow maths ops (or we recurse) */
842#ifdef CONFIG_X86_32
843 restore_fpu(tsk);
844#else
845 /* 842 /*
846 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 843 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
847 */ 844 */
@@ -850,7 +847,7 @@ asmlinkage void math_state_restore(void)
850 force_sig(SIGSEGV, tsk); 847 force_sig(SIGSEGV, tsk);
851 return; 848 return;
852 } 849 }
853#endif 850
854 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 851 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
855 tsk->fpu_counter++; 852 tsk->fpu_counter++;
856} 853}
@@ -969,11 +966,8 @@ void __init trap_init(void)
969 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 966 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
970 set_bit(i, used_vectors); 967 set_bit(i, used_vectors);
971 968
972#ifdef CONFIG_X86_64
973 set_bit(IA32_SYSCALL_VECTOR, used_vectors); 969 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
974#else 970
975 set_bit(SYSCALL_VECTOR, used_vectors);
976#endif
977 /* 971 /*
978 * Should be a barrier for any external CPU state: 972 * Should be a barrier for any external CPU state:
979 */ 973 */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index d57de05dc430..3e1c057e98fe 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -384,13 +384,13 @@ unsigned long native_calibrate_tsc(void)
384{ 384{
385 u64 tsc1, tsc2, delta, ref1, ref2; 385 u64 tsc1, tsc2, delta, ref1, ref2;
386 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 386 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
387 unsigned long flags, latch, ms, fast_calibrate, tsc_khz; 387 unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz;
388 int hpet = is_hpet_enabled(), i, loopmin; 388 int hpet = is_hpet_enabled(), i, loopmin;
389 389
390 tsc_khz = get_hypervisor_tsc_freq(); 390 hv_tsc_khz = get_hypervisor_tsc_freq();
391 if (tsc_khz) { 391 if (hv_tsc_khz) {
392 printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); 392 printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
393 return tsc_khz; 393 return hv_tsc_khz;
394 } 394 }
395 395
396 local_irq_save(flags); 396 local_irq_save(flags);
@@ -710,7 +710,16 @@ static cycle_t read_tsc(struct clocksource *cs)
710#ifdef CONFIG_X86_64 710#ifdef CONFIG_X86_64
711static cycle_t __vsyscall_fn vread_tsc(void) 711static cycle_t __vsyscall_fn vread_tsc(void)
712{ 712{
713 cycle_t ret = (cycle_t)vget_cycles(); 713 cycle_t ret;
714
715 /*
716 * Surround the RDTSC by barriers, to make sure it's not
717 * speculated to outside the seqlock critical section and
718 * does not cause time warps:
719 */
720 rdtsc_barrier();
721 ret = (cycle_t)vget_cycles();
722 rdtsc_barrier();
714 723
715 return ret >= __vsyscall_gtod_data.clock.cycle_last ? 724 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
716 ret : __vsyscall_gtod_data.clock.cycle_last; 725 ret : __vsyscall_gtod_data.clock.cycle_last;
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index bf36328f6ef9..027b5b498993 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -34,6 +34,7 @@ static __cpuinitdata atomic_t stop_count;
34 * of a critical section, to be able to prove TSC time-warps: 34 * of a critical section, to be able to prove TSC time-warps:
35 */ 35 */
36static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; 36static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
37
37static __cpuinitdata cycles_t last_tsc; 38static __cpuinitdata cycles_t last_tsc;
38static __cpuinitdata cycles_t max_warp; 39static __cpuinitdata cycles_t max_warp;
39static __cpuinitdata int nr_warps; 40static __cpuinitdata int nr_warps;
@@ -113,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
113 return; 114 return;
114 115
115 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 116 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
116 printk(KERN_INFO 117 pr_info("Skipping synchronization checks as TSC is reliable.\n");
117 "Skipping synchronization checks as TSC is reliable.\n");
118 return; 118 return;
119 } 119 }
120 120
121 printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", 121 pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:",
122 smp_processor_id(), cpu); 122 smp_processor_id(), cpu);
123 123
124 /* 124 /*
125 * Reset it - in case this is a second bootup: 125 * Reset it - in case this is a second bootup:
@@ -143,8 +143,8 @@ void __cpuinit check_tsc_sync_source(int cpu)
143 143
144 if (nr_warps) { 144 if (nr_warps) {
145 printk("\n"); 145 printk("\n");
146 printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," 146 pr_warning("Measured %Ld cycles TSC warp between CPUs, "
147 " turning off TSC clock.\n", max_warp); 147 "turning off TSC clock.\n", max_warp);
148 mark_tsc_unstable("check_tsc_sync_source failed"); 148 mark_tsc_unstable("check_tsc_sync_source failed");
149 } else { 149 } else {
150 printk(" passed.\n"); 150 printk(" passed.\n");
@@ -195,5 +195,3 @@ void __cpuinit check_tsc_sync_target(void)
195 while (atomic_read(&stop_count) != cpus) 195 while (atomic_read(&stop_count) != cpus)
196 cpu_relax(); 196 cpu_relax();
197} 197}
198#undef NR_LOOPS
199
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index d7ac84e7fc1c..9c4e62539058 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -287,10 +287,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
287 info->regs.pt.ds = 0; 287 info->regs.pt.ds = 0;
288 info->regs.pt.es = 0; 288 info->regs.pt.es = 0;
289 info->regs.pt.fs = 0; 289 info->regs.pt.fs = 0;
290 290#ifndef CONFIG_X86_32_LAZY_GS
291/* we are clearing gs later just before "jmp resume_userspace", 291 info->regs.pt.gs = 0;
292 * because it is not saved/restored. 292#endif
293 */
294 293
295/* 294/*
296 * The flags register is also special: we cannot trust that the user 295 * The flags register is also special: we cannot trust that the user
@@ -318,9 +317,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
318 } 317 }
319 318
320/* 319/*
321 * Save old state, set default return value (%ax) to 0 320 * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL)
322 */ 321 */
323 info->regs32->ax = 0; 322 info->regs32->ax = VM86_SIGNAL;
324 tsk->thread.saved_sp0 = tsk->thread.sp0; 323 tsk->thread.saved_sp0 = tsk->thread.sp0;
325 tsk->thread.saved_fs = info->regs32->fs; 324 tsk->thread.saved_fs = info->regs32->fs;
326 tsk->thread.saved_gs = get_user_gs(info->regs32); 325 tsk->thread.saved_gs = get_user_gs(info->regs32);
@@ -343,7 +342,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
343 __asm__ __volatile__( 342 __asm__ __volatile__(
344 "movl %0,%%esp\n\t" 343 "movl %0,%%esp\n\t"
345 "movl %1,%%ebp\n\t" 344 "movl %1,%%ebp\n\t"
345#ifdef CONFIG_X86_32_LAZY_GS
346 "mov %2, %%gs\n\t" 346 "mov %2, %%gs\n\t"
347#endif
347 "jmp resume_userspace" 348 "jmp resume_userspace"
348 : /* no outputs */ 349 : /* no outputs */
349 :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); 350 :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 95deb9f2211e..b263423fbe2a 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -462,22 +462,28 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
462} 462}
463#endif 463#endif
464 464
465static void vmi_enter_lazy_cpu(void) 465static void vmi_start_context_switch(struct task_struct *prev)
466{ 466{
467 paravirt_enter_lazy_cpu(); 467 paravirt_start_context_switch(prev);
468 vmi_ops.set_lazy_mode(2); 468 vmi_ops.set_lazy_mode(2);
469} 469}
470 470
471static void vmi_end_context_switch(struct task_struct *next)
472{
473 vmi_ops.set_lazy_mode(0);
474 paravirt_end_context_switch(next);
475}
476
471static void vmi_enter_lazy_mmu(void) 477static void vmi_enter_lazy_mmu(void)
472{ 478{
473 paravirt_enter_lazy_mmu(); 479 paravirt_enter_lazy_mmu();
474 vmi_ops.set_lazy_mode(1); 480 vmi_ops.set_lazy_mode(1);
475} 481}
476 482
477static void vmi_leave_lazy(void) 483static void vmi_leave_lazy_mmu(void)
478{ 484{
479 paravirt_leave_lazy(paravirt_get_lazy_mode());
480 vmi_ops.set_lazy_mode(0); 485 vmi_ops.set_lazy_mode(0);
486 paravirt_leave_lazy_mmu();
481} 487}
482 488
483static inline int __init check_vmi_rom(struct vrom_header *rom) 489static inline int __init check_vmi_rom(struct vrom_header *rom)
@@ -711,14 +717,14 @@ static inline int __init activate_vmi(void)
711 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); 717 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
712 para_fill(pv_cpu_ops.io_delay, IODelay); 718 para_fill(pv_cpu_ops.io_delay, IODelay);
713 719
714 para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, 720 para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
715 set_lazy_mode, SetLazyMode); 721 set_lazy_mode, SetLazyMode);
716 para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, 722 para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
717 set_lazy_mode, SetLazyMode); 723 set_lazy_mode, SetLazyMode);
718 724
719 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, 725 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
720 set_lazy_mode, SetLazyMode); 726 set_lazy_mode, SetLazyMode);
721 para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, 727 para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu,
722 set_lazy_mode, SetLazyMode); 728 set_lazy_mode, SetLazyMode);
723 729
724 /* user and kernel flush are just handled with different flags to FlushTLB */ 730 /* user and kernel flush are just handled with different flags to FlushTLB */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 849ee611f013..4c85b2e2bb65 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -1,5 +1,431 @@
1/*
2 * ld script for the x86 kernel
3 *
4 * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
5 *
6 * Modernisation, unification and other changes and fixes:
7 * Copyright (C) 2007-2009 Sam Ravnborg <sam@ravnborg.org>
8 *
9 *
10 * Don't define absolute symbols until and unless you know that symbol
11 * value is should remain constant even if kernel image is relocated
12 * at run time. Absolute symbols are not relocated. If symbol value should
13 * change if kernel is relocated, make the symbol section relative and
14 * put it inside the section definition.
15 */
16
1#ifdef CONFIG_X86_32 17#ifdef CONFIG_X86_32
2# include "vmlinux_32.lds.S" 18#define LOAD_OFFSET __PAGE_OFFSET
3#else 19#else
4# include "vmlinux_64.lds.S" 20#define LOAD_OFFSET __START_KERNEL_map
5#endif 21#endif
22
23#include <asm-generic/vmlinux.lds.h>
24#include <asm/asm-offsets.h>
25#include <asm/thread_info.h>
26#include <asm/page_types.h>
27#include <asm/cache.h>
28#include <asm/boot.h>
29
30#undef i386 /* in case the preprocessor is a 32bit one */
31
32OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
33
34#ifdef CONFIG_X86_32
35OUTPUT_ARCH(i386)
36ENTRY(phys_startup_32)
37jiffies = jiffies_64;
38#else
39OUTPUT_ARCH(i386:x86-64)
40ENTRY(phys_startup_64)
41jiffies_64 = jiffies;
42#endif
43
44PHDRS {
45 text PT_LOAD FLAGS(5); /* R_E */
46 data PT_LOAD FLAGS(7); /* RWE */
47#ifdef CONFIG_X86_64
48 user PT_LOAD FLAGS(7); /* RWE */
49 data.init PT_LOAD FLAGS(7); /* RWE */
50#ifdef CONFIG_SMP
51 percpu PT_LOAD FLAGS(7); /* RWE */
52#endif
53 data.init2 PT_LOAD FLAGS(7); /* RWE */
54#endif
55 note PT_NOTE FLAGS(0); /* ___ */
56}
57
58SECTIONS
59{
60#ifdef CONFIG_X86_32
61 . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
62 phys_startup_32 = startup_32 - LOAD_OFFSET;
63#else
64 . = __START_KERNEL;
65 phys_startup_64 = startup_64 - LOAD_OFFSET;
66#endif
67
68 /* Text and read-only data */
69
70 /* bootstrapping code */
71 .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
72 _text = .;
73 *(.text.head)
74 } :text = 0x9090
75
76 /* The rest of the text */
77 .text : AT(ADDR(.text) - LOAD_OFFSET) {
78#ifdef CONFIG_X86_32
79 /* not really needed, already page aligned */
80 . = ALIGN(PAGE_SIZE);
81 *(.text.page_aligned)
82#endif
83 . = ALIGN(8);
84 _stext = .;
85 TEXT_TEXT
86 SCHED_TEXT
87 LOCK_TEXT
88 KPROBES_TEXT
89 IRQENTRY_TEXT
90 *(.fixup)
91 *(.gnu.warning)
92 /* End of text section */
93 _etext = .;
94 } :text = 0x9090
95
96 NOTES :text :note
97
98 /* Exception table */
99 . = ALIGN(16);
100 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
101 __start___ex_table = .;
102 *(__ex_table)
103 __stop___ex_table = .;
104 } :text = 0x9090
105
106 RODATA
107
108 /* Data */
109 . = ALIGN(PAGE_SIZE);
110 .data : AT(ADDR(.data) - LOAD_OFFSET) {
111 DATA_DATA
112 CONSTRUCTORS
113
114#ifdef CONFIG_X86_64
115 /* End of data section */
116 _edata = .;
117#endif
118 } :data
119
120#ifdef CONFIG_X86_32
121 /* 32 bit has nosave before _edata */
122 . = ALIGN(PAGE_SIZE);
123 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
124 __nosave_begin = .;
125 *(.data.nosave)
126 . = ALIGN(PAGE_SIZE);
127 __nosave_end = .;
128 }
129#endif
130
131 . = ALIGN(PAGE_SIZE);
132 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
133 *(.data.page_aligned)
134 *(.data.idt)
135 }
136
137#ifdef CONFIG_X86_32
138 . = ALIGN(32);
139#else
140 . = ALIGN(PAGE_SIZE);
141 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
142#endif
143 .data.cacheline_aligned :
144 AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
145 *(.data.cacheline_aligned)
146 }
147
148 /* rarely changed data like cpu maps */
149#ifdef CONFIG_X86_32
150 . = ALIGN(32);
151#else
152 . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
153#endif
154 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
155 *(.data.read_mostly)
156
157#ifdef CONFIG_X86_32
158 /* End of data section */
159 _edata = .;
160#endif
161 }
162
163#ifdef CONFIG_X86_64
164
165#define VSYSCALL_ADDR (-10*1024*1024)
166#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \
167 SIZEOF(.data.read_mostly) + 4095) & ~(4095))
168#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \
169 SIZEOF(.data.read_mostly) + 4095) & ~(4095))
170
171#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
172#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
173
174#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
175#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
176
177 . = VSYSCALL_ADDR;
178 .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) {
179 *(.vsyscall_0)
180 } :user
181
182 __vsyscall_0 = VSYSCALL_VIRT_ADDR;
183
184 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
185 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
186 *(.vsyscall_fn)
187 }
188
189 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
190 .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
191 *(.vsyscall_gtod_data)
192 }
193
194 vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
195 .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
196 *(.vsyscall_clock)
197 }
198 vsyscall_clock = VVIRT(.vsyscall_clock);
199
200
201 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
202 *(.vsyscall_1)
203 }
204 .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
205 *(.vsyscall_2)
206 }
207
208 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
209 *(.vgetcpu_mode)
210 }
211 vgetcpu_mode = VVIRT(.vgetcpu_mode);
212
213 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
214 .jiffies : AT(VLOAD(.jiffies)) {
215 *(.jiffies)
216 }
217 jiffies = VVIRT(.jiffies);
218
219 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
220 *(.vsyscall_3)
221 }
222
223 . = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
224
225#undef VSYSCALL_ADDR
226#undef VSYSCALL_PHYS_ADDR
227#undef VSYSCALL_VIRT_ADDR
228#undef VLOAD_OFFSET
229#undef VLOAD
230#undef VVIRT_OFFSET
231#undef VVIRT
232
233#endif /* CONFIG_X86_64 */
234
235 /* init_task */
236 . = ALIGN(THREAD_SIZE);
237 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
238 *(.data.init_task)
239 }
240#ifdef CONFIG_X86_64
241 :data.init
242#endif
243
244 /*
245 * smp_locks might be freed after init
246 * start/end must be page aligned
247 */
248 . = ALIGN(PAGE_SIZE);
249 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
250 __smp_locks = .;
251 *(.smp_locks)
252 __smp_locks_end = .;
253 . = ALIGN(PAGE_SIZE);
254 }
255
256 /* Init code and data - will be freed after init */
257 . = ALIGN(PAGE_SIZE);
258 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
259 __init_begin = .; /* paired with __init_end */
260 _sinittext = .;
261 INIT_TEXT
262 _einittext = .;
263 }
264
265 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
266 INIT_DATA
267 }
268
269 . = ALIGN(16);
270 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
271 __setup_start = .;
272 *(.init.setup)
273 __setup_end = .;
274 }
275 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
276 __initcall_start = .;
277 INITCALLS
278 __initcall_end = .;
279 }
280
281 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
282 __con_initcall_start = .;
283 *(.con_initcall.init)
284 __con_initcall_end = .;
285 }
286
287 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
288 __x86_cpu_dev_start = .;
289 *(.x86_cpu_dev.init)
290 __x86_cpu_dev_end = .;
291 }
292
293 SECURITY_INIT
294
295 . = ALIGN(8);
296 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
297 __parainstructions = .;
298 *(.parainstructions)
299 __parainstructions_end = .;
300 }
301
302 . = ALIGN(8);
303 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
304 __alt_instructions = .;
305 *(.altinstructions)
306 __alt_instructions_end = .;
307 }
308
309 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
310 *(.altinstr_replacement)
311 }
312
313 /*
314 * .exit.text is discard at runtime, not link time, to deal with
315 * references from .altinstructions and .eh_frame
316 */
317 .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
318 EXIT_TEXT
319 }
320
321 .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
322 EXIT_DATA
323 }
324
325#ifdef CONFIG_BLK_DEV_INITRD
326 . = ALIGN(PAGE_SIZE);
327 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
328 __initramfs_start = .;
329 *(.init.ramfs)
330 __initramfs_end = .;
331 }
332#endif
333
334#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
335 /*
336 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
337 * output PHDR, so the next output section - __data_nosave - should
338 * start another section data.init2. Also, pda should be at the head of
339 * percpu area. Preallocate it and define the percpu offset symbol
340 * so that it can be accessed as a percpu variable.
341 */
342 . = ALIGN(PAGE_SIZE);
343 PERCPU_VADDR(0, :percpu)
344#else
345 PERCPU(PAGE_SIZE)
346#endif
347
348 . = ALIGN(PAGE_SIZE);
349
350 /* freed after init ends here */
351 .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) {
352 __init_end = .;
353 }
354
355#ifdef CONFIG_X86_64
356 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
357 . = ALIGN(PAGE_SIZE);
358 __nosave_begin = .;
359 *(.data.nosave)
360 . = ALIGN(PAGE_SIZE);
361 __nosave_end = .;
362 } :data.init2
363 /* use another section data.init2, see PERCPU_VADDR() above */
364#endif
365
366 /* BSS */
367 . = ALIGN(PAGE_SIZE);
368 .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
369 __bss_start = .;
370 *(.bss.page_aligned)
371 *(.bss)
372 . = ALIGN(4);
373 __bss_stop = .;
374 }
375
376 . = ALIGN(PAGE_SIZE);
377 .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
378 __brk_base = .;
379 . += 64 * 1024; /* 64k alignment slop space */
380 *(.brk_reservation) /* areas brk users have reserved */
381 __brk_limit = .;
382 }
383
384 .end : AT(ADDR(.end) - LOAD_OFFSET) {
385 _end = .;
386 }
387
388 /* Sections to be discarded */
389 /DISCARD/ : {
390 *(.exitcall.exit)
391 *(.eh_frame)
392 *(.discard)
393 }
394
395 STABS_DEBUG
396 DWARF_DEBUG
397}
398
399
400#ifdef CONFIG_X86_32
401ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
402 "kernel image bigger than KERNEL_IMAGE_SIZE")
403#else
404/*
405 * Per-cpu symbols which need to be offset from __per_cpu_load
406 * for the boot processor.
407 */
408#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
409INIT_PER_CPU(gdt_page);
410INIT_PER_CPU(irq_stack_union);
411
412/*
413 * Build-time check on the image size:
414 */
415ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
416 "kernel image bigger than KERNEL_IMAGE_SIZE")
417
418#ifdef CONFIG_SMP
419ASSERT((per_cpu__irq_stack_union == 0),
420 "irq_stack_union is not at start of per-cpu area");
421#endif
422
423#endif /* CONFIG_X86_32 */
424
425#ifdef CONFIG_KEXEC
426#include <asm/kexec.h>
427
428ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
429 "kexec control code size is too big")
430#endif
431
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
deleted file mode 100644
index 62ad500d55f3..000000000000
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ /dev/null
@@ -1,229 +0,0 @@
1/* ld script to make i386 Linux kernel
2 * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
3 *
4 * Don't define absolute symbols until and unless you know that symbol
5 * value is should remain constant even if kernel image is relocated
6 * at run time. Absolute symbols are not relocated. If symbol value should
7 * change if kernel is relocated, make the symbol section relative and
8 * put it inside the section definition.
9 */
10
11#define LOAD_OFFSET __PAGE_OFFSET
12
13#include <asm-generic/vmlinux.lds.h>
14#include <asm/thread_info.h>
15#include <asm/page_types.h>
16#include <asm/cache.h>
17#include <asm/boot.h>
18
19OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
20OUTPUT_ARCH(i386)
21ENTRY(phys_startup_32)
22jiffies = jiffies_64;
23
24PHDRS {
25 text PT_LOAD FLAGS(5); /* R_E */
26 data PT_LOAD FLAGS(7); /* RWE */
27 note PT_NOTE FLAGS(0); /* ___ */
28}
29SECTIONS
30{
31 . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
32 phys_startup_32 = startup_32 - LOAD_OFFSET;
33
34 .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
35 _text = .; /* Text and read-only data */
36 *(.text.head)
37 } :text = 0x9090
38
39 /* read-only */
40 .text : AT(ADDR(.text) - LOAD_OFFSET) {
41 . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */
42 *(.text.page_aligned)
43 TEXT_TEXT
44 SCHED_TEXT
45 LOCK_TEXT
46 KPROBES_TEXT
47 IRQENTRY_TEXT
48 *(.fixup)
49 *(.gnu.warning)
50 _etext = .; /* End of text section */
51 } :text = 0x9090
52
53 NOTES :text :note
54
55 . = ALIGN(16); /* Exception table */
56 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
57 __start___ex_table = .;
58 *(__ex_table)
59 __stop___ex_table = .;
60 } :text = 0x9090
61
62 RODATA
63
64 /* writeable */
65 . = ALIGN(PAGE_SIZE);
66 .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
67 DATA_DATA
68 CONSTRUCTORS
69 } :data
70
71 . = ALIGN(PAGE_SIZE);
72 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
73 __nosave_begin = .;
74 *(.data.nosave)
75 . = ALIGN(PAGE_SIZE);
76 __nosave_end = .;
77 }
78
79 . = ALIGN(PAGE_SIZE);
80 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
81 *(.data.page_aligned)
82 *(.data.idt)
83 }
84
85 . = ALIGN(32);
86 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
87 *(.data.cacheline_aligned)
88 }
89
90 /* rarely changed data like cpu maps */
91 . = ALIGN(32);
92 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
93 *(.data.read_mostly)
94 _edata = .; /* End of data section */
95 }
96
97 . = ALIGN(THREAD_SIZE); /* init_task */
98 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
99 *(.data.init_task)
100 }
101
102 /* might get freed after init */
103 . = ALIGN(PAGE_SIZE);
104 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
105 __smp_locks = .;
106 *(.smp_locks)
107 __smp_locks_end = .;
108 }
109 /* will be freed after init
110 * Following ALIGN() is required to make sure no other data falls on the
111 * same page where __smp_alt_end is pointing as that page might be freed
112 * after boot. Always make sure that ALIGN() directive is present after
113 * the section which contains __smp_alt_end.
114 */
115 . = ALIGN(PAGE_SIZE);
116
117 /* will be freed after init */
118 . = ALIGN(PAGE_SIZE); /* Init code and data */
119 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
120 __init_begin = .;
121 _sinittext = .;
122 INIT_TEXT
123 _einittext = .;
124 }
125 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
126 INIT_DATA
127 }
128 . = ALIGN(16);
129 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
130 __setup_start = .;
131 *(.init.setup)
132 __setup_end = .;
133 }
134 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
135 __initcall_start = .;
136 INITCALLS
137 __initcall_end = .;
138 }
139 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
140 __con_initcall_start = .;
141 *(.con_initcall.init)
142 __con_initcall_end = .;
143 }
144 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
145 __x86_cpu_dev_start = .;
146 *(.x86_cpu_dev.init)
147 __x86_cpu_dev_end = .;
148 }
149 SECURITY_INIT
150 . = ALIGN(4);
151 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
152 __alt_instructions = .;
153 *(.altinstructions)
154 __alt_instructions_end = .;
155 }
156 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
157 *(.altinstr_replacement)
158 }
159 . = ALIGN(4);
160 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
161 __parainstructions = .;
162 *(.parainstructions)
163 __parainstructions_end = .;
164 }
165 /* .exit.text is discard at runtime, not link time, to deal with references
166 from .altinstructions and .eh_frame */
167 .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
168 EXIT_TEXT
169 }
170 .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
171 EXIT_DATA
172 }
173#if defined(CONFIG_BLK_DEV_INITRD)
174 . = ALIGN(PAGE_SIZE);
175 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
176 __initramfs_start = .;
177 *(.init.ramfs)
178 __initramfs_end = .;
179 }
180#endif
181 PERCPU(PAGE_SIZE)
182 . = ALIGN(PAGE_SIZE);
183 /* freed after init ends here */
184
185 .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
186 __init_end = .;
187 __bss_start = .; /* BSS */
188 *(.bss.page_aligned)
189 *(.bss)
190 . = ALIGN(4);
191 __bss_stop = .;
192 }
193
194 .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
195 . = ALIGN(PAGE_SIZE);
196 __brk_base = . ;
197 . += 64 * 1024 ; /* 64k alignment slop space */
198 *(.brk_reservation) /* areas brk users have reserved */
199 __brk_limit = . ;
200 }
201
202 .end : AT(ADDR(.end) - LOAD_OFFSET) {
203 _end = . ;
204 }
205
206 /* Sections to be discarded */
207 /DISCARD/ : {
208 *(.exitcall.exit)
209 *(.discard)
210 }
211
212 STABS_DEBUG
213
214 DWARF_DEBUG
215}
216
217/*
218 * Build-time check on the image size:
219 */
220ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
221 "kernel image bigger than KERNEL_IMAGE_SIZE")
222
223#ifdef CONFIG_KEXEC
224/* Link time checks */
225#include <asm/kexec.h>
226
227ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
228 "kexec control code size is too big")
229#endif
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
deleted file mode 100644
index c8742507b030..000000000000
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ /dev/null
@@ -1,298 +0,0 @@
1/* ld script to make x86-64 Linux kernel
2 * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
3 */
4
5#define LOAD_OFFSET __START_KERNEL_map
6
7#include <asm-generic/vmlinux.lds.h>
8#include <asm/asm-offsets.h>
9#include <asm/page_types.h>
10
11#undef i386 /* in case the preprocessor is a 32bit one */
12
13OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
14OUTPUT_ARCH(i386:x86-64)
15ENTRY(phys_startup_64)
16jiffies_64 = jiffies;
17PHDRS {
18 text PT_LOAD FLAGS(5); /* R_E */
19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */
22#ifdef CONFIG_SMP
23 percpu PT_LOAD FLAGS(7); /* RWE */
24#endif
25 data.init2 PT_LOAD FLAGS(7); /* RWE */
26 note PT_NOTE FLAGS(0); /* ___ */
27}
28SECTIONS
29{
30 . = __START_KERNEL;
31 phys_startup_64 = startup_64 - LOAD_OFFSET;
32 .text : AT(ADDR(.text) - LOAD_OFFSET) {
33 _text = .; /* Text and read-only data */
34 /* First the code that has to be first for bootstrapping */
35 *(.text.head)
36 _stext = .;
37 /* Then the rest */
38 TEXT_TEXT
39 SCHED_TEXT
40 LOCK_TEXT
41 KPROBES_TEXT
42 IRQENTRY_TEXT
43 *(.fixup)
44 *(.gnu.warning)
45 _etext = .; /* End of text section */
46 } :text = 0x9090
47
48 NOTES :text :note
49
50 . = ALIGN(16); /* Exception table */
51 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
52 __start___ex_table = .;
53 *(__ex_table)
54 __stop___ex_table = .;
55 } :text = 0x9090
56
57 RODATA
58
59 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
60 /* Data */
61 .data : AT(ADDR(.data) - LOAD_OFFSET) {
62 DATA_DATA
63 CONSTRUCTORS
64 _edata = .; /* End of data section */
65 } :data
66
67
68 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
69 . = ALIGN(PAGE_SIZE);
70 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
71 *(.data.cacheline_aligned)
72 }
73 . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
74 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
75 *(.data.read_mostly)
76 }
77
78#define VSYSCALL_ADDR (-10*1024*1024)
79#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
80#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
81
82#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
83#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
84
85#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
86#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
87
88 . = VSYSCALL_ADDR;
89 .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
90 __vsyscall_0 = VSYSCALL_VIRT_ADDR;
91
92 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
93 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) }
94 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
95 .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data))
96 { *(.vsyscall_gtod_data) }
97 vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
98 .vsyscall_clock : AT(VLOAD(.vsyscall_clock))
99 { *(.vsyscall_clock) }
100 vsyscall_clock = VVIRT(.vsyscall_clock);
101
102
103 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
104 { *(.vsyscall_1) }
105 .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
106 { *(.vsyscall_2) }
107
108 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
109 vgetcpu_mode = VVIRT(.vgetcpu_mode);
110
111 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
112 .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
113 jiffies = VVIRT(.jiffies);
114
115 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
116 { *(.vsyscall_3) }
117
118 . = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
119
120#undef VSYSCALL_ADDR
121#undef VSYSCALL_PHYS_ADDR
122#undef VSYSCALL_VIRT_ADDR
123#undef VLOAD_OFFSET
124#undef VLOAD
125#undef VVIRT_OFFSET
126#undef VVIRT
127
128 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
129 . = ALIGN(THREAD_SIZE); /* init_task */
130 *(.data.init_task)
131 }:data.init
132
133 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
134 . = ALIGN(PAGE_SIZE);
135 *(.data.page_aligned)
136 }
137
138 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
139 /* might get freed after init */
140 . = ALIGN(PAGE_SIZE);
141 __smp_alt_begin = .;
142 __smp_locks = .;
143 *(.smp_locks)
144 __smp_locks_end = .;
145 . = ALIGN(PAGE_SIZE);
146 __smp_alt_end = .;
147 }
148
149 . = ALIGN(PAGE_SIZE); /* Init code and data */
150 __init_begin = .; /* paired with __init_end */
151 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
152 _sinittext = .;
153 INIT_TEXT
154 _einittext = .;
155 }
156 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
157 __initdata_begin = .;
158 INIT_DATA
159 __initdata_end = .;
160 }
161
162 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
163 . = ALIGN(16);
164 __setup_start = .;
165 *(.init.setup)
166 __setup_end = .;
167 }
168 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
169 __initcall_start = .;
170 INITCALLS
171 __initcall_end = .;
172 }
173 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
174 __con_initcall_start = .;
175 *(.con_initcall.init)
176 __con_initcall_end = .;
177 }
178 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
179 __x86_cpu_dev_start = .;
180 *(.x86_cpu_dev.init)
181 __x86_cpu_dev_end = .;
182 }
183 SECURITY_INIT
184
185 . = ALIGN(8);
186 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
187 __parainstructions = .;
188 *(.parainstructions)
189 __parainstructions_end = .;
190 }
191
192 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
193 . = ALIGN(8);
194 __alt_instructions = .;
195 *(.altinstructions)
196 __alt_instructions_end = .;
197 }
198 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
199 *(.altinstr_replacement)
200 }
201 /* .exit.text is discard at runtime, not link time, to deal with references
202 from .altinstructions and .eh_frame */
203 .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
204 EXIT_TEXT
205 }
206 .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
207 EXIT_DATA
208 }
209
210#ifdef CONFIG_BLK_DEV_INITRD
211 . = ALIGN(PAGE_SIZE);
212 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
213 __initramfs_start = .;
214 *(.init.ramfs)
215 __initramfs_end = .;
216 }
217#endif
218
219#ifdef CONFIG_SMP
220 /*
221 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
222 * output PHDR, so the next output section - __data_nosave - should
223 * start another section data.init2. Also, pda should be at the head of
224 * percpu area. Preallocate it and define the percpu offset symbol
225 * so that it can be accessed as a percpu variable.
226 */
227 . = ALIGN(PAGE_SIZE);
228 PERCPU_VADDR(0, :percpu)
229#else
230 PERCPU(PAGE_SIZE)
231#endif
232
233 . = ALIGN(PAGE_SIZE);
234 __init_end = .;
235
236 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
237 . = ALIGN(PAGE_SIZE);
238 __nosave_begin = .;
239 *(.data.nosave)
240 . = ALIGN(PAGE_SIZE);
241 __nosave_end = .;
242 } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
243
244 .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
245 . = ALIGN(PAGE_SIZE);
246 __bss_start = .; /* BSS */
247 *(.bss.page_aligned)
248 *(.bss)
249 __bss_stop = .;
250 }
251
252 .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
253 . = ALIGN(PAGE_SIZE);
254 __brk_base = . ;
255 . += 64 * 1024 ; /* 64k alignment slop space */
256 *(.brk_reservation) /* areas brk users have reserved */
257 __brk_limit = . ;
258 }
259
260 _end = . ;
261
262 /* Sections to be discarded */
263 /DISCARD/ : {
264 *(.exitcall.exit)
265 *(.eh_frame)
266 *(.discard)
267 }
268
269 STABS_DEBUG
270
271 DWARF_DEBUG
272}
273
274 /*
275 * Per-cpu symbols which need to be offset from __per_cpu_load
276 * for the boot processor.
277 */
278#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
279INIT_PER_CPU(gdt_page);
280INIT_PER_CPU(irq_stack_union);
281
282/*
283 * Build-time check on the image size:
284 */
285ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
286 "kernel image bigger than KERNEL_IMAGE_SIZE")
287
288#ifdef CONFIG_SMP
289ASSERT((per_cpu__irq_stack_union == 0),
290 "irq_stack_union is not at start of per-cpu area");
291#endif
292
293#ifdef CONFIG_KEXEC
294#include <asm/kexec.h>
295
296ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
297 "kexec control code size is too big")
298#endif
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 44153afc9067..25ee06a80aad 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -132,15 +132,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
132 return; 132 return;
133 } 133 }
134 134
135 /*
136 * Surround the RDTSC by barriers, to make sure it's not
137 * speculated to outside the seqlock critical section and
138 * does not cause time warps:
139 */
140 rdtsc_barrier();
141 now = vread(); 135 now = vread();
142 rdtsc_barrier();
143
144 base = __vsyscall_gtod_data.clock.cycle_last; 136 base = __vsyscall_gtod_data.clock.cycle_last;
145 mask = __vsyscall_gtod_data.clock.mask; 137 mask = __vsyscall_gtod_data.clock.mask;
146 mult = __vsyscall_gtod_data.clock.mult; 138 mult = __vsyscall_gtod_data.clock.mult;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b6caf1329b1b..32cf11e5728a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2897,8 +2897,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
2897 2897
2898static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) 2898static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
2899{ 2899{
2900 kvm_x86_ops->tlb_flush(vcpu); 2900 kvm_set_cr3(vcpu, vcpu->arch.cr3);
2901 set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
2902 return 1; 2901 return 1;
2903} 2902}
2904 2903
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 49079a46687b..3944e917e794 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -338,6 +338,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw);
338 338
339void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 339void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
340{ 340{
341 unsigned long old_cr4 = vcpu->arch.cr4;
342 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
343
341 if (cr4 & CR4_RESERVED_BITS) { 344 if (cr4 & CR4_RESERVED_BITS) {
342 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); 345 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
343 kvm_inject_gp(vcpu, 0); 346 kvm_inject_gp(vcpu, 0);
@@ -351,7 +354,8 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
351 kvm_inject_gp(vcpu, 0); 354 kvm_inject_gp(vcpu, 0);
352 return; 355 return;
353 } 356 }
354 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE) 357 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
358 && ((cr4 ^ old_cr4) & pdptr_bits)
355 && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 359 && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
356 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); 360 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
357 kvm_inject_gp(vcpu, 0); 361 kvm_inject_gp(vcpu, 0);
diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile
index 27f0c9ed7f60..94e0e54056a9 100644
--- a/arch/x86/lguest/Makefile
+++ b/arch/x86/lguest/Makefile
@@ -1 +1,2 @@
1obj-y := i386_head.o boot.o 1obj-y := i386_head.o boot.o
2CFLAGS_boot.o := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ca7ec44bafc3..4e0c26559395 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -67,6 +67,7 @@
67#include <asm/mce.h> 67#include <asm/mce.h>
68#include <asm/io.h> 68#include <asm/io.h>
69#include <asm/i387.h> 69#include <asm/i387.h>
70#include <asm/stackprotector.h>
70#include <asm/reboot.h> /* for struct machine_ops */ 71#include <asm/reboot.h> /* for struct machine_ops */
71 72
72/*G:010 Welcome to the Guest! 73/*G:010 Welcome to the Guest!
@@ -166,10 +167,16 @@ static void lazy_hcall3(unsigned long call,
166 167
167/* When lazy mode is turned off reset the per-cpu lazy mode variable and then 168/* When lazy mode is turned off reset the per-cpu lazy mode variable and then
168 * issue the do-nothing hypercall to flush any stored calls. */ 169 * issue the do-nothing hypercall to flush any stored calls. */
169static void lguest_leave_lazy_mode(void) 170static void lguest_leave_lazy_mmu_mode(void)
170{ 171{
171 paravirt_leave_lazy(paravirt_get_lazy_mode());
172 kvm_hypercall0(LHCALL_FLUSH_ASYNC); 172 kvm_hypercall0(LHCALL_FLUSH_ASYNC);
173 paravirt_leave_lazy_mmu();
174}
175
176static void lguest_end_context_switch(struct task_struct *next)
177{
178 kvm_hypercall0(LHCALL_FLUSH_ASYNC);
179 paravirt_end_context_switch(next);
173} 180}
174 181
175/*G:033 182/*G:033
@@ -636,7 +643,7 @@ static void __init lguest_init_IRQ(void)
636 643
637void lguest_setup_irq(unsigned int irq) 644void lguest_setup_irq(unsigned int irq)
638{ 645{
639 irq_to_desc_alloc_cpu(irq, 0); 646 irq_to_desc_alloc_node(irq, 0);
640 set_irq_chip_and_handler_name(irq, &lguest_irq_controller, 647 set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
641 handle_level_irq, "level"); 648 handle_level_irq, "level");
642} 649}
@@ -1053,8 +1060,8 @@ __init void lguest_init(void)
1053 pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; 1060 pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
1054 pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; 1061 pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
1055 pv_cpu_ops.wbinvd = lguest_wbinvd; 1062 pv_cpu_ops.wbinvd = lguest_wbinvd;
1056 pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; 1063 pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
1057 pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; 1064 pv_cpu_ops.end_context_switch = lguest_end_context_switch;
1058 1065
1059 /* pagetable management */ 1066 /* pagetable management */
1060 pv_mmu_ops.write_cr3 = lguest_write_cr3; 1067 pv_mmu_ops.write_cr3 = lguest_write_cr3;
@@ -1067,7 +1074,7 @@ __init void lguest_init(void)
1067 pv_mmu_ops.read_cr2 = lguest_read_cr2; 1074 pv_mmu_ops.read_cr2 = lguest_read_cr2;
1068 pv_mmu_ops.read_cr3 = lguest_read_cr3; 1075 pv_mmu_ops.read_cr3 = lguest_read_cr3;
1069 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; 1076 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
1070 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; 1077 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
1071 pv_mmu_ops.pte_update = lguest_pte_update; 1078 pv_mmu_ops.pte_update = lguest_pte_update;
1072 pv_mmu_ops.pte_update_defer = lguest_pte_update; 1079 pv_mmu_ops.pte_update_defer = lguest_pte_update;
1073 1080
@@ -1088,13 +1095,21 @@ __init void lguest_init(void)
1088 * lguest_init() where the rest of the fairly chaotic boot setup 1095 * lguest_init() where the rest of the fairly chaotic boot setup
1089 * occurs. */ 1096 * occurs. */
1090 1097
1098 /* The stack protector is a weird thing where gcc places a canary
1099 * value on the stack and then checks it on return. This file is
1100 * compiled with -fno-stack-protector it, so we got this far without
1101 * problems. The value of the canary is kept at offset 20 from the
1102 * %gs register, so we need to set that up before calling C functions
1103 * in other files. */
1104 setup_stack_canary_segment(0);
1105 /* We could just call load_stack_canary_segment(), but we might as
1106 * call switch_to_new_gdt() which loads the whole table and sets up
1107 * the per-cpu segment descriptor register %fs as well. */
1108 switch_to_new_gdt(0);
1109
1091 /* As described in head_32.S, we map the first 128M of memory. */ 1110 /* As described in head_32.S, we map the first 128M of memory. */
1092 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; 1111 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
1093 1112
1094 /* Load the %fs segment register (the per-cpu segment register) with
1095 * the normal data segment to get through booting. */
1096 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
1097
1098 /* The Host<->Guest Switcher lives at the top of our address space, and 1113 /* The Host<->Guest Switcher lives at the top of our address space, and
1099 * the Host told us how big it is when we made LGUEST_INIT hypercall: 1114 * the Host told us how big it is when we made LGUEST_INIT hypercall:
1100 * it put the answer in lguest_data.reserve_mem */ 1115 * it put the answer in lguest_data.reserve_mem */
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index e7277cbcfb40..a725b7f760ae 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st,
161 st->current_address >= st->marker[1].start_address) { 161 st->current_address >= st->marker[1].start_address) {
162 const char *unit = units; 162 const char *unit = units;
163 unsigned long delta; 163 unsigned long delta;
164 int width = sizeof(unsigned long) * 2;
164 165
165 /* 166 /*
166 * Now print the actual finished series 167 * Now print the actual finished series
167 */ 168 */
168 seq_printf(m, "0x%p-0x%p ", 169 seq_printf(m, "0x%0*lx-0x%0*lx ",
169 (void *)st->start_address, 170 width, st->start_address,
170 (void *)st->current_address); 171 width, st->current_address);
171 172
172 delta = (st->current_address - st->start_address) >> 10; 173 delta = (st->current_address - st->start_address) >> 10;
173 while (!(delta & 1023) && unit[1]) { 174 while (!(delta & 1023) && unit[1]) {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a03b7279efa0..5ec7ae366615 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -3,40 +3,16 @@
3 * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. 3 * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
4 * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar 4 * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
5 */ 5 */
6#include <linux/interrupt.h> 6#include <linux/magic.h> /* STACK_END_MAGIC */
7#include <linux/mmiotrace.h> 7#include <linux/sched.h> /* test_thread_flag(), ... */
8#include <linux/bootmem.h> 8#include <linux/kdebug.h> /* oops_begin/end, ... */
9#include <linux/compiler.h> 9#include <linux/module.h> /* search_exception_table */
10#include <linux/highmem.h> 10#include <linux/bootmem.h> /* max_low_pfn */
11#include <linux/kprobes.h> 11#include <linux/kprobes.h> /* __kprobes, ... */
12#include <linux/uaccess.h> 12#include <linux/mmiotrace.h> /* kmmio_handler, ... */
13#include <linux/vmalloc.h> 13
14#include <linux/vt_kern.h> 14#include <asm/traps.h> /* dotraplinkage, ... */
15#include <linux/signal.h> 15#include <asm/pgalloc.h> /* pgd_*(), ... */
16#include <linux/kernel.h>
17#include <linux/ptrace.h>
18#include <linux/string.h>
19#include <linux/module.h>
20#include <linux/kdebug.h>
21#include <linux/errno.h>
22#include <linux/magic.h>
23#include <linux/sched.h>
24#include <linux/types.h>
25#include <linux/init.h>
26#include <linux/mman.h>
27#include <linux/tty.h>
28#include <linux/smp.h>
29#include <linux/mm.h>
30
31#include <asm-generic/sections.h>
32
33#include <asm/tlbflush.h>
34#include <asm/pgalloc.h>
35#include <asm/segment.h>
36#include <asm/system.h>
37#include <asm/proto.h>
38#include <asm/traps.h>
39#include <asm/desc.h>
40 16
41/* 17/*
42 * Page fault error code bits: 18 * Page fault error code bits:
@@ -225,12 +201,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
225 if (!pmd_present(*pmd_k)) 201 if (!pmd_present(*pmd_k))
226 return NULL; 202 return NULL;
227 203
228 if (!pmd_present(*pmd)) { 204 if (!pmd_present(*pmd))
229 set_pmd(pmd, *pmd_k); 205 set_pmd(pmd, *pmd_k);
230 arch_flush_lazy_mmu_mode(); 206 else
231 } else {
232 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); 207 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
233 }
234 208
235 return pmd_k; 209 return pmd_k;
236} 210}
@@ -538,8 +512,6 @@ bad:
538static int is_errata93(struct pt_regs *regs, unsigned long address) 512static int is_errata93(struct pt_regs *regs, unsigned long address)
539{ 513{
540#ifdef CONFIG_X86_64 514#ifdef CONFIG_X86_64
541 static int once;
542
543 if (address != regs->ip) 515 if (address != regs->ip)
544 return 0; 516 return 0;
545 517
@@ -549,10 +521,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
549 address |= 0xffffffffUL << 32; 521 address |= 0xffffffffUL << 32;
550 if ((address >= (u64)_stext && address <= (u64)_etext) || 522 if ((address >= (u64)_stext && address <= (u64)_etext) ||
551 (address >= MODULES_VADDR && address <= MODULES_END)) { 523 (address >= MODULES_VADDR && address <= MODULES_END)) {
552 if (!once) { 524 printk_once(errata93_warning);
553 printk(errata93_warning);
554 once = 1;
555 }
556 regs->ip = address; 525 regs->ip = address;
557 return 1; 526 return 1;
558 } 527 }
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 8126e8d1a2a4..58f621e81919 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
45 BUG_ON(!pte_none(*(kmap_pte-idx))); 45 BUG_ON(!pte_none(*(kmap_pte-idx)));
46 set_pte(kmap_pte-idx, mk_pte(page, prot)); 46 set_pte(kmap_pte-idx, mk_pte(page, prot));
47 arch_flush_lazy_mmu_mode();
48 47
49 return (void *)vaddr; 48 return (void *)vaddr;
50} 49}
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
74#endif 73#endif
75 } 74 }
76 75
77 arch_flush_lazy_mmu_mode();
78 pagefault_enable(); 76 pagefault_enable();
79} 77}
80 78
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8f307d914c2e..f46c340727b8 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -26,12 +26,16 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
26 unsigned long sbase = saddr & PUD_MASK; 26 unsigned long sbase = saddr & PUD_MASK;
27 unsigned long s_end = sbase + PUD_SIZE; 27 unsigned long s_end = sbase + PUD_SIZE;
28 28
29 /* Allow segments to share if only one is marked locked */
30 unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
31 unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
32
29 /* 33 /*
30 * match the virtual addresses, permission and the alignment of the 34 * match the virtual addresses, permission and the alignment of the
31 * page table page. 35 * page table page.
32 */ 36 */
33 if (pmd_index(addr) != pmd_index(saddr) || 37 if (pmd_index(addr) != pmd_index(saddr) ||
34 vma->vm_flags != svma->vm_flags || 38 vm_flags != svm_flags ||
35 sbase < svma->vm_start || svma->vm_end < s_end) 39 sbase < svma->vm_start || svma->vm_end < s_end)
36 return 0; 40 return 0;
37 41
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ae4f7b5d7104..34c1bfb64f1c 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,3 +1,4 @@
1#include <linux/initrd.h>
1#include <linux/ioport.h> 2#include <linux/ioport.h>
2#include <linux/swap.h> 3#include <linux/swap.h>
3 4
@@ -10,6 +11,9 @@
10#include <asm/setup.h> 11#include <asm/setup.h>
11#include <asm/system.h> 12#include <asm/system.h>
12#include <asm/tlbflush.h> 13#include <asm/tlbflush.h>
14#include <asm/tlb.h>
15
16DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
13 17
14unsigned long __initdata e820_table_start; 18unsigned long __initdata e820_table_start;
15unsigned long __meminitdata e820_table_end; 19unsigned long __meminitdata e820_table_end;
@@ -23,6 +27,69 @@ int direct_gbpages
23#endif 27#endif
24; 28;
25 29
30int nx_enabled;
31
32#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
33static int disable_nx __cpuinitdata;
34
35/*
36 * noexec = on|off
37 *
38 * Control non-executable mappings for processes.
39 *
40 * on Enable
41 * off Disable
42 */
43static int __init noexec_setup(char *str)
44{
45 if (!str)
46 return -EINVAL;
47 if (!strncmp(str, "on", 2)) {
48 __supported_pte_mask |= _PAGE_NX;
49 disable_nx = 0;
50 } else if (!strncmp(str, "off", 3)) {
51 disable_nx = 1;
52 __supported_pte_mask &= ~_PAGE_NX;
53 }
54 return 0;
55}
56early_param("noexec", noexec_setup);
57#endif
58
59#ifdef CONFIG_X86_PAE
60static void __init set_nx(void)
61{
62 unsigned int v[4], l, h;
63
64 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
65 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
66
67 if ((v[3] & (1 << 20)) && !disable_nx) {
68 rdmsr(MSR_EFER, l, h);
69 l |= EFER_NX;
70 wrmsr(MSR_EFER, l, h);
71 nx_enabled = 1;
72 __supported_pte_mask |= _PAGE_NX;
73 }
74 }
75}
76#else
77static inline void set_nx(void)
78{
79}
80#endif
81
82#ifdef CONFIG_X86_64
83void __cpuinit check_efer(void)
84{
85 unsigned long efer;
86
87 rdmsrl(MSR_EFER, efer);
88 if (!(efer & EFER_NX) || disable_nx)
89 __supported_pte_mask &= ~_PAGE_NX;
90}
91#endif
92
26static void __init find_early_table_space(unsigned long end, int use_pse, 93static void __init find_early_table_space(unsigned long end, int use_pse,
27 int use_gbpages) 94 int use_gbpages)
28{ 95{
@@ -66,12 +133,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
66 */ 133 */
67#ifdef CONFIG_X86_32 134#ifdef CONFIG_X86_32
68 start = 0x7000; 135 start = 0x7000;
69 e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, 136#else
70 tables, PAGE_SIZE);
71#else /* CONFIG_X86_64 */
72 start = 0x8000; 137 start = 0x8000;
73 e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
74#endif 138#endif
139 e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
140 tables, PAGE_SIZE);
75 if (e820_table_start == -1UL) 141 if (e820_table_start == -1UL)
76 panic("Cannot find space for the kernel page tables"); 142 panic("Cannot find space for the kernel page tables");
77 143
@@ -159,12 +225,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
159 use_gbpages = direct_gbpages; 225 use_gbpages = direct_gbpages;
160#endif 226#endif
161 227
162#ifdef CONFIG_X86_32
163#ifdef CONFIG_X86_PAE
164 set_nx(); 228 set_nx();
165 if (nx_enabled) 229 if (nx_enabled)
166 printk(KERN_INFO "NX (Execute Disable) protection: active\n"); 230 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
167#endif
168 231
169 /* Enable PSE if available */ 232 /* Enable PSE if available */
170 if (cpu_has_pse) 233 if (cpu_has_pse)
@@ -175,7 +238,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
175 set_in_cr4(X86_CR4_PGE); 238 set_in_cr4(X86_CR4_PGE);
176 __supported_pte_mask |= _PAGE_GLOBAL; 239 __supported_pte_mask |= _PAGE_GLOBAL;
177 } 240 }
178#endif
179 241
180 if (use_gbpages) 242 if (use_gbpages)
181 page_size_mask |= 1 << PG_LEVEL_1G; 243 page_size_mask |= 1 << PG_LEVEL_1G;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 749559ed80f5..949708d7a481 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,12 +49,9 @@
49#include <asm/paravirt.h> 49#include <asm/paravirt.h>
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
52#include <asm/page_types.h>
52#include <asm/init.h> 53#include <asm/init.h>
53 54
54unsigned long max_low_pfn_mapped;
55unsigned long max_pfn_mapped;
56
57DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
58unsigned long highstart_pfn, highend_pfn; 55unsigned long highstart_pfn, highend_pfn;
59 56
60static noinline int do_test_wp_bit(void); 57static noinline int do_test_wp_bit(void);
@@ -587,61 +584,9 @@ void zap_low_mappings(void)
587 flush_tlb_all(); 584 flush_tlb_all();
588} 585}
589 586
590int nx_enabled;
591
592pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); 587pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
593EXPORT_SYMBOL_GPL(__supported_pte_mask); 588EXPORT_SYMBOL_GPL(__supported_pte_mask);
594 589
595#ifdef CONFIG_X86_PAE
596
597static int disable_nx __initdata;
598
599/*
600 * noexec = on|off
601 *
602 * Control non executable mappings.
603 *
604 * on Enable
605 * off Disable
606 */
607static int __init noexec_setup(char *str)
608{
609 if (!str || !strcmp(str, "on")) {
610 if (cpu_has_nx) {
611 __supported_pte_mask |= _PAGE_NX;
612 disable_nx = 0;
613 }
614 } else {
615 if (!strcmp(str, "off")) {
616 disable_nx = 1;
617 __supported_pte_mask &= ~_PAGE_NX;
618 } else {
619 return -EINVAL;
620 }
621 }
622
623 return 0;
624}
625early_param("noexec", noexec_setup);
626
627void __init set_nx(void)
628{
629 unsigned int v[4], l, h;
630
631 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
632 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
633
634 if ((v[3] & (1 << 20)) && !disable_nx) {
635 rdmsr(MSR_EFER, l, h);
636 l |= EFER_NX;
637 wrmsr(MSR_EFER, l, h);
638 nx_enabled = 1;
639 __supported_pte_mask |= _PAGE_NX;
640 }
641 }
642}
643#endif
644
645/* user-defined highmem size */ 590/* user-defined highmem size */
646static unsigned int highmem_pages = -1; 591static unsigned int highmem_pages = -1;
647 592
@@ -761,15 +706,15 @@ void __init initmem_init(unsigned long start_pfn,
761 highstart_pfn = highend_pfn = max_pfn; 706 highstart_pfn = highend_pfn = max_pfn;
762 if (max_pfn > max_low_pfn) 707 if (max_pfn > max_low_pfn)
763 highstart_pfn = max_low_pfn; 708 highstart_pfn = max_low_pfn;
764 memory_present(0, 0, highend_pfn);
765 e820_register_active_regions(0, 0, highend_pfn); 709 e820_register_active_regions(0, 0, highend_pfn);
710 sparse_memory_present_with_active_regions(0);
766 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 711 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
767 pages_to_mb(highend_pfn - highstart_pfn)); 712 pages_to_mb(highend_pfn - highstart_pfn));
768 num_physpages = highend_pfn; 713 num_physpages = highend_pfn;
769 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 714 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
770#else 715#else
771 memory_present(0, 0, max_low_pfn);
772 e820_register_active_regions(0, 0, max_low_pfn); 716 e820_register_active_regions(0, 0, max_low_pfn);
717 sparse_memory_present_with_active_regions(0);
773 num_physpages = max_low_pfn; 718 num_physpages = max_low_pfn;
774 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 719 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
775#endif 720#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1753e8020df6..52bb9519bb86 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -50,18 +50,8 @@
50#include <asm/cacheflush.h> 50#include <asm/cacheflush.h>
51#include <asm/init.h> 51#include <asm/init.h>
52 52
53/*
54 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
55 * The direct mapping extends to max_pfn_mapped, so that we can directly access
56 * apertures, ACPI and other tables without having to play with fixmaps.
57 */
58unsigned long max_low_pfn_mapped;
59unsigned long max_pfn_mapped;
60
61static unsigned long dma_reserve __initdata; 53static unsigned long dma_reserve __initdata;
62 54
63DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
64
65static int __init parse_direct_gbpages_off(char *arg) 55static int __init parse_direct_gbpages_off(char *arg)
66{ 56{
67 direct_gbpages = 0; 57 direct_gbpages = 0;
@@ -85,39 +75,6 @@ early_param("gbpages", parse_direct_gbpages_on);
85pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; 75pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
86EXPORT_SYMBOL_GPL(__supported_pte_mask); 76EXPORT_SYMBOL_GPL(__supported_pte_mask);
87 77
88static int disable_nx __cpuinitdata;
89
90/*
91 * noexec=on|off
92 * Control non-executable mappings for 64-bit processes.
93 *
94 * on Enable (default)
95 * off Disable
96 */
97static int __init nonx_setup(char *str)
98{
99 if (!str)
100 return -EINVAL;
101 if (!strncmp(str, "on", 2)) {
102 __supported_pte_mask |= _PAGE_NX;
103 disable_nx = 0;
104 } else if (!strncmp(str, "off", 3)) {
105 disable_nx = 1;
106 __supported_pte_mask &= ~_PAGE_NX;
107 }
108 return 0;
109}
110early_param("noexec", nonx_setup);
111
112void __cpuinit check_efer(void)
113{
114 unsigned long efer;
115
116 rdmsrl(MSR_EFER, efer);
117 if (!(efer & EFER_NX) || disable_nx)
118 __supported_pte_mask &= ~_PAGE_NX;
119}
120
121int force_personality32; 78int force_personality32;
122 79
123/* 80/*
@@ -628,6 +585,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
628 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); 585 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
629 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); 586 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
630} 587}
588#endif
631 589
632void __init paging_init(void) 590void __init paging_init(void)
633{ 591{
@@ -638,11 +596,10 @@ void __init paging_init(void)
638 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 596 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
639 max_zone_pfns[ZONE_NORMAL] = max_pfn; 597 max_zone_pfns[ZONE_NORMAL] = max_pfn;
640 598
641 memory_present(0, 0, max_pfn); 599 sparse_memory_present_with_active_regions(MAX_NUMNODES);
642 sparse_init(); 600 sparse_init();
643 free_area_init_nodes(max_zone_pfns); 601 free_area_init_nodes(max_zone_pfns);
644} 602}
645#endif
646 603
647/* 604/*
648 * Memory hotplug specific functions 605 * Memory hotplug specific functions
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 8056545e2d39..fe6f84ca121e 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -82,7 +82,6 @@ iounmap_atomic(void *kvaddr, enum km_type type)
82 if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) 82 if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
83 kpte_clear_flush(kmap_pte-idx, vaddr); 83 kpte_clear_flush(kmap_pte-idx, vaddr);
84 84
85 arch_flush_lazy_mmu_mode();
86 pagefault_enable(); 85 pagefault_enable();
87} 86}
88EXPORT_SYMBOL_GPL(iounmap_atomic); 87EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 50dc802a1c46..16ccbd77917f 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -32,7 +32,7 @@ struct kmmio_fault_page {
32 struct list_head list; 32 struct list_head list;
33 struct kmmio_fault_page *release_next; 33 struct kmmio_fault_page *release_next;
34 unsigned long page; /* location of the fault page */ 34 unsigned long page; /* location of the fault page */
35 bool old_presence; /* page presence prior to arming */ 35 pteval_t old_presence; /* page presence prior to arming */
36 bool armed; 36 bool armed;
37 37
38 /* 38 /*
@@ -97,60 +97,62 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
97static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) 97static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
98{ 98{
99 struct list_head *head; 99 struct list_head *head;
100 struct kmmio_fault_page *p; 100 struct kmmio_fault_page *f;
101 101
102 page &= PAGE_MASK; 102 page &= PAGE_MASK;
103 head = kmmio_page_list(page); 103 head = kmmio_page_list(page);
104 list_for_each_entry_rcu(p, head, list) { 104 list_for_each_entry_rcu(f, head, list) {
105 if (p->page == page) 105 if (f->page == page)
106 return p; 106 return f;
107 } 107 }
108 return NULL; 108 return NULL;
109} 109}
110 110
111static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) 111static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
112{ 112{
113 pmdval_t v = pmd_val(*pmd); 113 pmdval_t v = pmd_val(*pmd);
114 *old = !!(v & _PAGE_PRESENT); 114 if (clear) {
115 v &= ~_PAGE_PRESENT; 115 *old = v & _PAGE_PRESENT;
116 if (present) 116 v &= ~_PAGE_PRESENT;
117 v |= _PAGE_PRESENT; 117 } else /* presume this has been called with clear==true previously */
118 v |= *old;
118 set_pmd(pmd, __pmd(v)); 119 set_pmd(pmd, __pmd(v));
119} 120}
120 121
121static void set_pte_presence(pte_t *pte, bool present, bool *old) 122static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
122{ 123{
123 pteval_t v = pte_val(*pte); 124 pteval_t v = pte_val(*pte);
124 *old = !!(v & _PAGE_PRESENT); 125 if (clear) {
125 v &= ~_PAGE_PRESENT; 126 *old = v & _PAGE_PRESENT;
126 if (present) 127 v &= ~_PAGE_PRESENT;
127 v |= _PAGE_PRESENT; 128 } else /* presume this has been called with clear==true previously */
129 v |= *old;
128 set_pte_atomic(pte, __pte(v)); 130 set_pte_atomic(pte, __pte(v));
129} 131}
130 132
131static int set_page_presence(unsigned long addr, bool present, bool *old) 133static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
132{ 134{
133 unsigned int level; 135 unsigned int level;
134 pte_t *pte = lookup_address(addr, &level); 136 pte_t *pte = lookup_address(f->page, &level);
135 137
136 if (!pte) { 138 if (!pte) {
137 pr_err("kmmio: no pte for page 0x%08lx\n", addr); 139 pr_err("kmmio: no pte for page 0x%08lx\n", f->page);
138 return -1; 140 return -1;
139 } 141 }
140 142
141 switch (level) { 143 switch (level) {
142 case PG_LEVEL_2M: 144 case PG_LEVEL_2M:
143 set_pmd_presence((pmd_t *)pte, present, old); 145 clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
144 break; 146 break;
145 case PG_LEVEL_4K: 147 case PG_LEVEL_4K:
146 set_pte_presence(pte, present, old); 148 clear_pte_presence(pte, clear, &f->old_presence);
147 break; 149 break;
148 default: 150 default:
149 pr_err("kmmio: unexpected page level 0x%x.\n", level); 151 pr_err("kmmio: unexpected page level 0x%x.\n", level);
150 return -1; 152 return -1;
151 } 153 }
152 154
153 __flush_tlb_one(addr); 155 __flush_tlb_one(f->page);
154 return 0; 156 return 0;
155} 157}
156 158
@@ -171,9 +173,9 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
171 WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); 173 WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
172 if (f->armed) { 174 if (f->armed) {
173 pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", 175 pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
174 f->page, f->count, f->old_presence); 176 f->page, f->count, !!f->old_presence);
175 } 177 }
176 ret = set_page_presence(f->page, false, &f->old_presence); 178 ret = clear_page_presence(f, true);
177 WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); 179 WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
178 f->armed = true; 180 f->armed = true;
179 return ret; 181 return ret;
@@ -182,8 +184,7 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
182/** Restore the given page to saved presence state. */ 184/** Restore the given page to saved presence state. */
183static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) 185static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
184{ 186{
185 bool tmp; 187 int ret = clear_page_presence(f, false);
186 int ret = set_page_presence(f->page, f->old_presence, &tmp);
187 WARN_ONCE(ret < 0, 188 WARN_ONCE(ret < 0,
188 KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); 189 KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
189 f->armed = false; 190 f->armed = false;
@@ -310,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 311 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
311 312
312 if (!ctx->active) { 313 if (!ctx->active) {
313 pr_debug("kmmio: spurious debug trap on CPU %d.\n", 314 /*
315 * debug traps without an active context are due to either
316 * something external causing them (f.e. using a debugger while
317 * mmio tracing enabled), or erroneous behaviour
318 */
319 pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
314 smp_processor_id()); 320 smp_processor_id());
315 goto out; 321 goto out;
316 } 322 }
@@ -439,12 +445,12 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
439 head, 445 head,
440 struct kmmio_delayed_release, 446 struct kmmio_delayed_release,
441 rcu); 447 rcu);
442 struct kmmio_fault_page *p = dr->release_list; 448 struct kmmio_fault_page *f = dr->release_list;
443 while (p) { 449 while (f) {
444 struct kmmio_fault_page *next = p->release_next; 450 struct kmmio_fault_page *next = f->release_next;
445 BUG_ON(p->count); 451 BUG_ON(f->count);
446 kfree(p); 452 kfree(f);
447 p = next; 453 f = next;
448 } 454 }
449 kfree(dr); 455 kfree(dr);
450} 456}
@@ -453,19 +459,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
453{ 459{
454 struct kmmio_delayed_release *dr = 460 struct kmmio_delayed_release *dr =
455 container_of(head, struct kmmio_delayed_release, rcu); 461 container_of(head, struct kmmio_delayed_release, rcu);
456 struct kmmio_fault_page *p = dr->release_list; 462 struct kmmio_fault_page *f = dr->release_list;
457 struct kmmio_fault_page **prevp = &dr->release_list; 463 struct kmmio_fault_page **prevp = &dr->release_list;
458 unsigned long flags; 464 unsigned long flags;
459 465
460 spin_lock_irqsave(&kmmio_lock, flags); 466 spin_lock_irqsave(&kmmio_lock, flags);
461 while (p) { 467 while (f) {
462 if (!p->count) { 468 if (!f->count) {
463 list_del_rcu(&p->list); 469 list_del_rcu(&f->list);
464 prevp = &p->release_next; 470 prevp = &f->release_next;
465 } else { 471 } else {
466 *prevp = p->release_next; 472 *prevp = f->release_next;
467 } 473 }
468 p = p->release_next; 474 f = f->release_next;
469 } 475 }
470 spin_unlock_irqrestore(&kmmio_lock, flags); 476 spin_unlock_irqrestore(&kmmio_lock, flags);
471 477
@@ -528,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
528} 534}
529EXPORT_SYMBOL(unregister_kmmio_probe); 535EXPORT_SYMBOL(unregister_kmmio_probe);
530 536
531static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, 537static int
532 void *args) 538kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
533{ 539{
534 struct die_args *arg = args; 540 struct die_args *arg = args;
535 541
@@ -544,11 +550,23 @@ static struct notifier_block nb_die = {
544 .notifier_call = kmmio_die_notifier 550 .notifier_call = kmmio_die_notifier
545}; 551};
546 552
547static int __init init_kmmio(void) 553int kmmio_init(void)
548{ 554{
549 int i; 555 int i;
556
550 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) 557 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
551 INIT_LIST_HEAD(&kmmio_page_table[i]); 558 INIT_LIST_HEAD(&kmmio_page_table[i]);
559
552 return register_die_notifier(&nb_die); 560 return register_die_notifier(&nb_die);
553} 561}
554fs_initcall(init_kmmio); /* should be before device_initcall() */ 562
563void kmmio_cleanup(void)
564{
565 int i;
566
567 unregister_die_notifier(&nb_die);
568 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
569 WARN_ONCE(!list_empty(&kmmio_page_table[i]),
570 KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
571 }
572}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 605c8be06217..c0bedcd10f97 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -40,23 +40,23 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
40 40
41static void __init memtest(u64 pattern, u64 start_phys, u64 size) 41static void __init memtest(u64 pattern, u64 start_phys, u64 size)
42{ 42{
43 u64 i, count; 43 u64 *p;
44 u64 *start; 44 void *start, *end;
45 u64 start_bad, last_bad; 45 u64 start_bad, last_bad;
46 u64 start_phys_aligned; 46 u64 start_phys_aligned;
47 size_t incr; 47 size_t incr;
48 48
49 incr = sizeof(pattern); 49 incr = sizeof(pattern);
50 start_phys_aligned = ALIGN(start_phys, incr); 50 start_phys_aligned = ALIGN(start_phys, incr);
51 count = (size - (start_phys_aligned - start_phys))/incr;
52 start = __va(start_phys_aligned); 51 start = __va(start_phys_aligned);
52 end = start + size - (start_phys_aligned - start_phys);
53 start_bad = 0; 53 start_bad = 0;
54 last_bad = 0; 54 last_bad = 0;
55 55
56 for (i = 0; i < count; i++) 56 for (p = start; p < end; p++)
57 start[i] = pattern; 57 *p = pattern;
58 for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { 58 for (p = start; p < end; p++, start_phys_aligned += incr) {
59 if (*start == pattern) 59 if (*p == pattern)
60 continue; 60 continue;
61 if (start_phys_aligned == last_bad + incr) { 61 if (start_phys_aligned == last_bad + incr) {
62 last_bad += incr; 62 last_bad += incr;
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index c9342ed8b402..132772a8ec57 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -451,6 +451,7 @@ void enable_mmiotrace(void)
451 451
452 if (nommiotrace) 452 if (nommiotrace)
453 pr_info(NAME "MMIO tracing disabled.\n"); 453 pr_info(NAME "MMIO tracing disabled.\n");
454 kmmio_init();
454 enter_uniprocessor(); 455 enter_uniprocessor();
455 spin_lock_irq(&trace_lock); 456 spin_lock_irq(&trace_lock);
456 atomic_inc(&mmiotrace_enabled); 457 atomic_inc(&mmiotrace_enabled);
@@ -473,6 +474,7 @@ void disable_mmiotrace(void)
473 474
474 clear_trace_list(); /* guarantees: no more kmmio callbacks */ 475 clear_trace_list(); /* guarantees: no more kmmio callbacks */
475 leave_uniprocessor(); 476 leave_uniprocessor();
477 kmmio_cleanup();
476 pr_info(NAME "disabled.\n"); 478 pr_info(NAME "disabled.\n");
477out: 479out:
478 mutex_unlock(&mmiotrace_mutex); 480 mutex_unlock(&mmiotrace_mutex);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 2d05a12029dc..459913beac71 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
179} 179}
180 180
181/* Initialize bootmem allocator for a node */ 181/* Initialize bootmem allocator for a node */
182void __init setup_node_bootmem(int nodeid, unsigned long start, 182void __init
183 unsigned long end) 183setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
184{ 184{
185 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; 185 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
186 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
186 unsigned long bootmap_start, nodedata_phys; 187 unsigned long bootmap_start, nodedata_phys;
187 void *bootmap; 188 void *bootmap;
188 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
189 int nid; 189 int nid;
190 190
191 if (!end) 191 if (!end)
192 return; 192 return;
193 193
194 /*
195 * Don't confuse VM with a node that doesn't have the
196 * minimum amount of memory:
197 */
198 if (end && (end - start) < NODE_MIN_SIZE)
199 return;
200
194 start = roundup(start, ZONE_ALIGN); 201 start = roundup(start, ZONE_ALIGN);
195 202
196 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 203 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
@@ -272,9 +279,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
272 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, 279 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
273 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); 280 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
274 281
275#ifdef CONFIG_ACPI_NUMA
276 srat_reserve_add_area(nodeid);
277#endif
278 node_set_online(nodeid); 282 node_set_online(nodeid);
279} 283}
280 284
@@ -578,21 +582,6 @@ unsigned long __init numa_free_all_bootmem(void)
578 return pages; 582 return pages;
579} 583}
580 584
581void __init paging_init(void)
582{
583 unsigned long max_zone_pfns[MAX_NR_ZONES];
584
585 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
586 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
587 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
588 max_zone_pfns[ZONE_NORMAL] = max_pfn;
589
590 sparse_memory_present_with_active_regions(MAX_NUMNODES);
591 sparse_init();
592
593 free_area_init_nodes(max_zone_pfns);
594}
595
596static __init int numa_setup(char *opt) 585static __init int numa_setup(char *opt)
597{ 586{
598 if (!opt) 587 if (!opt)
@@ -606,8 +595,6 @@ static __init int numa_setup(char *opt)
606#ifdef CONFIG_ACPI_NUMA 595#ifdef CONFIG_ACPI_NUMA
607 if (!strncmp(opt, "noacpi", 6)) 596 if (!strncmp(opt, "noacpi", 6))
608 acpi_numa = -1; 597 acpi_numa = -1;
609 if (!strncmp(opt, "hotadd=", 7))
610 hotadd_percent = simple_strtoul(opt+7, NULL, 10);
611#endif 598#endif
612 return 0; 599 return 0;
613} 600}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 797f9f107cb6..6ce9518fe2ac 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -153,7 +153,7 @@ static void __cpa_flush_all(void *arg)
153 */ 153 */
154 __flush_tlb_all(); 154 __flush_tlb_all();
155 155
156 if (cache && boot_cpu_data.x86_model >= 4) 156 if (cache && boot_cpu_data.x86 >= 4)
157 wbinvd(); 157 wbinvd();
158} 158}
159 159
@@ -208,20 +208,15 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
208 int in_flags, struct page **pages) 208 int in_flags, struct page **pages)
209{ 209{
210 unsigned int i, level; 210 unsigned int i, level;
211 unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
211 212
212 BUG_ON(irqs_disabled()); 213 BUG_ON(irqs_disabled());
213 214
214 on_each_cpu(__cpa_flush_range, NULL, 1); 215 on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
215 216
216 if (!cache) 217 if (!cache || do_wbinvd)
217 return; 218 return;
218 219
219 /* 4M threshold */
220 if (numpages >= 1024) {
221 if (boot_cpu_data.x86_model >= 4)
222 wbinvd();
223 return;
224 }
225 /* 220 /*
226 * We only need to flush on one CPU, 221 * We only need to flush on one CPU,
227 * clflush is a MESI-coherent instruction that 222 * clflush is a MESI-coherent instruction that
@@ -844,13 +839,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
844 839
845 vm_unmap_aliases(); 840 vm_unmap_aliases();
846 841
847 /*
848 * If we're called with lazy mmu updates enabled, the
849 * in-memory pte state may be stale. Flush pending updates to
850 * bring them up to date.
851 */
852 arch_flush_lazy_mmu_mode();
853
854 cpa.vaddr = addr; 842 cpa.vaddr = addr;
855 cpa.pages = pages; 843 cpa.pages = pages;
856 cpa.numpages = numpages; 844 cpa.numpages = numpages;
@@ -895,13 +883,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
895 } else 883 } else
896 cpa_flush_all(cache); 884 cpa_flush_all(cache);
897 885
898 /*
899 * If we've been called with lazy mmu updates enabled, then
900 * make sure that everything gets flushed out before we
901 * return.
902 */
903 arch_flush_lazy_mmu_mode();
904
905out: 886out:
906 return ret; 887 return ret;
907} 888}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 01765955baaf..2dfcbf9df2ae 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -31,17 +31,11 @@ static nodemask_t nodes_parsed __initdata;
31static nodemask_t cpu_nodes_parsed __initdata; 31static nodemask_t cpu_nodes_parsed __initdata;
32static struct bootnode nodes[MAX_NUMNODES] __initdata; 32static struct bootnode nodes[MAX_NUMNODES] __initdata;
33static struct bootnode nodes_add[MAX_NUMNODES]; 33static struct bootnode nodes_add[MAX_NUMNODES];
34static int found_add_area __initdata;
35int hotadd_percent __initdata = 0;
36 34
37static int num_node_memblks __initdata; 35static int num_node_memblks __initdata;
38static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; 36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
39static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; 37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
40 38
41/* Too small nodes confuse the VM badly. Usually they result
42 from BIOS bugs. */
43#define NODE_MIN_SIZE (4*1024*1024)
44
45static __init int setup_node(int pxm) 39static __init int setup_node(int pxm)
46{ 40{
47 return acpi_map_pxm_to_node(pxm); 41 return acpi_map_pxm_to_node(pxm);
@@ -66,9 +60,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
66{ 60{
67 struct bootnode *nd = &nodes[i]; 61 struct bootnode *nd = &nodes[i];
68 62
69 if (found_add_area)
70 return;
71
72 if (nd->start < start) { 63 if (nd->start < start) {
73 nd->start = start; 64 nd->start = start;
74 if (nd->end < nd->start) 65 if (nd->end < nd->start)
@@ -86,7 +77,6 @@ static __init void bad_srat(void)
86 int i; 77 int i;
87 printk(KERN_ERR "SRAT: SRAT not used.\n"); 78 printk(KERN_ERR "SRAT: SRAT not used.\n");
88 acpi_numa = -1; 79 acpi_numa = -1;
89 found_add_area = 0;
90 for (i = 0; i < MAX_LOCAL_APIC; i++) 80 for (i = 0; i < MAX_LOCAL_APIC; i++)
91 apicid_to_node[i] = NUMA_NO_NODE; 81 apicid_to_node[i] = NUMA_NO_NODE;
92 for (i = 0; i < MAX_NUMNODES; i++) 82 for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +172,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
182 pxm, apic_id, node); 172 pxm, apic_id, node);
183} 173}
184 174
185static int update_end_of_memory(unsigned long end) {return -1;}
186static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
187#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 175#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
188static inline int save_add_info(void) {return 1;} 176static inline int save_add_info(void) {return 1;}
189#else 177#else
190static inline int save_add_info(void) {return 0;} 178static inline int save_add_info(void) {return 0;}
191#endif 179#endif
192/* 180/*
193 * Update nodes_add and decide if to include add are in the zone. 181 * Update nodes_add[]
194 * Both SPARSE and RESERVE need nodes_add information. 182 * This code supports one contiguous hot add area per node
195 * This code supports one contiguous hot add area per node.
196 */ 183 */
197static int __init 184static void __init
198reserve_hotadd(int node, unsigned long start, unsigned long end) 185update_nodes_add(int node, unsigned long start, unsigned long end)
199{ 186{
200 unsigned long s_pfn = start >> PAGE_SHIFT; 187 unsigned long s_pfn = start >> PAGE_SHIFT;
201 unsigned long e_pfn = end >> PAGE_SHIFT; 188 unsigned long e_pfn = end >> PAGE_SHIFT;
202 int ret = 0, changed = 0; 189 int changed = 0;
203 struct bootnode *nd = &nodes_add[node]; 190 struct bootnode *nd = &nodes_add[node];
204 191
205 /* I had some trouble with strange memory hotadd regions breaking 192 /* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +197,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
210 mistakes */ 197 mistakes */
211 if ((signed long)(end - start) < NODE_MIN_SIZE) { 198 if ((signed long)(end - start) < NODE_MIN_SIZE) {
212 printk(KERN_ERR "SRAT: Hotplug area too small\n"); 199 printk(KERN_ERR "SRAT: Hotplug area too small\n");
213 return -1; 200 return;
214 } 201 }
215 202
216 /* This check might be a bit too strict, but I'm keeping it for now. */ 203 /* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +205,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
218 printk(KERN_ERR 205 printk(KERN_ERR
219 "SRAT: Hotplug area %lu -> %lu has existing memory\n", 206 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
220 s_pfn, e_pfn); 207 s_pfn, e_pfn);
221 return -1; 208 return;
222 }
223
224 if (!hotadd_enough_memory(&nodes_add[node])) {
225 printk(KERN_ERR "SRAT: Hotplug area too large\n");
226 return -1;
227 } 209 }
228 210
229 /* Looks good */ 211 /* Looks good */
@@ -245,11 +227,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
245 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); 227 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
246 } 228 }
247 229
248 ret = update_end_of_memory(nd->end);
249
250 if (changed) 230 if (changed)
251 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); 231 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
252 return ret; 232 nd->start, nd->end);
253} 233}
254 234
255/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ 235/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +290,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
310 start, end); 290 start, end);
311 e820_register_active_regions(node, start >> PAGE_SHIFT, 291 e820_register_active_regions(node, start >> PAGE_SHIFT,
312 end >> PAGE_SHIFT); 292 end >> PAGE_SHIFT);
313 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
314 nd->end >> PAGE_SHIFT);
315 293
316 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && 294 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
317 (reserve_hotadd(node, start, end) < 0)) { 295 update_nodes_add(node, start, end);
318 /* Ignore hotadd region. Undo damage */ 296 /* restore nodes[node] */
319 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
320 *nd = oldnode; 297 *nd = oldnode;
321 if ((nd->start | nd->end) == 0) 298 if ((nd->start | nd->end) == 0)
322 node_clear(node, nodes_parsed); 299 node_clear(node, nodes_parsed);
@@ -345,9 +322,9 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
345 pxmram = 0; 322 pxmram = 0;
346 } 323 }
347 324
348 e820ram = max_pfn - absent_pages_in_range(0, max_pfn); 325 e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
349 /* We seem to lose 3 pages somewhere. Allow a bit of slack. */ 326 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
350 if ((long)(e820ram - pxmram) >= 1*1024*1024) { 327 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
351 printk(KERN_ERR 328 printk(KERN_ERR
352 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", 329 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
353 (pxmram << PAGE_SHIFT) >> 20, 330 (pxmram << PAGE_SHIFT) >> 20,
@@ -357,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
357 return 1; 334 return 1;
358} 335}
359 336
360static void __init unparse_node(int node)
361{
362 int i;
363 node_clear(node, nodes_parsed);
364 node_clear(node, cpu_nodes_parsed);
365 for (i = 0; i < MAX_LOCAL_APIC; i++) {
366 if (apicid_to_node[i] == node)
367 apicid_to_node[i] = NUMA_NO_NODE;
368 }
369}
370
371void __init acpi_numa_arch_fixup(void) {} 337void __init acpi_numa_arch_fixup(void) {}
372 338
373/* Use the information discovered above to actually set up the nodes. */ 339/* Use the information discovered above to actually set up the nodes. */
@@ -379,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
379 return -1; 345 return -1;
380 346
381 /* First clean up the node list */ 347 /* First clean up the node list */
382 for (i = 0; i < MAX_NUMNODES; i++) { 348 for (i = 0; i < MAX_NUMNODES; i++)
383 cutoff_node(i, start, end); 349 cutoff_node(i, start, end);
384 /*
385 * don't confuse VM with a node that doesn't have the
386 * minimum memory.
387 */
388 if (nodes[i].end &&
389 (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
390 unparse_node(i);
391 node_set_offline(i);
392 }
393 }
394 350
395 if (!nodes_cover_memory(nodes)) { 351 if (!nodes_cover_memory(nodes)) {
396 bad_srat(); 352 bad_srat();
@@ -423,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
423 379
424 if (node == NUMA_NO_NODE) 380 if (node == NUMA_NO_NODE)
425 continue; 381 continue;
426 if (!node_isset(node, node_possible_map)) 382 if (!node_online(node))
427 numa_clear_node(i); 383 numa_clear_node(i);
428 } 384 }
429 numa_init_array(); 385 numa_init_array();
@@ -510,26 +466,6 @@ static int null_slit_node_compare(int a, int b)
510} 466}
511#endif /* CONFIG_NUMA_EMU */ 467#endif /* CONFIG_NUMA_EMU */
512 468
513void __init srat_reserve_add_area(int nodeid)
514{
515 if (found_add_area && nodes_add[nodeid].end) {
516 u64 total_mb;
517
518 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
519 "for node %d at %Lx-%Lx\n",
520 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
521 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
522 >> PAGE_SHIFT;
523 total_mb *= sizeof(struct page);
524 total_mb >>= 20;
525 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
526 "pre-allocated memory.\n", (unsigned long long)total_mb);
527 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
528 nodes_add[nodeid].end - nodes_add[nodeid].start,
529 BOOTMEM_DEFAULT);
530 }
531}
532
533int __node_distance(int a, int b) 469int __node_distance(int a, int b)
534{ 470{
535 int index; 471 int index;
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 202864ad49a7..3b285e656e27 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -356,14 +356,11 @@ static void exit_sysfs(void)
356#define exit_sysfs() do { } while (0) 356#define exit_sysfs() do { } while (0)
357#endif /* CONFIG_PM */ 357#endif /* CONFIG_PM */
358 358
359static int p4force;
360module_param(p4force, int, 0);
361
362static int __init p4_init(char **cpu_type) 359static int __init p4_init(char **cpu_type)
363{ 360{
364 __u8 cpu_model = boot_cpu_data.x86_model; 361 __u8 cpu_model = boot_cpu_data.x86_model;
365 362
366 if (!p4force && (cpu_model > 6 || cpu_model == 5)) 363 if (cpu_model > 6 || cpu_model == 5)
367 return 0; 364 return 0;
368 365
369#ifndef CONFIG_SMP 366#ifndef CONFIG_SMP
@@ -389,10 +386,25 @@ static int __init p4_init(char **cpu_type)
389 return 0; 386 return 0;
390} 387}
391 388
389static int force_arch_perfmon;
390static int force_cpu_type(const char *str, struct kernel_param *kp)
391{
392 if (!strcmp(str, "archperfmon")) {
393 force_arch_perfmon = 1;
394 printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
395 }
396
397 return 0;
398}
399module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
400
392static int __init ppro_init(char **cpu_type) 401static int __init ppro_init(char **cpu_type)
393{ 402{
394 __u8 cpu_model = boot_cpu_data.x86_model; 403 __u8 cpu_model = boot_cpu_data.x86_model;
395 404
405 if (force_arch_perfmon && cpu_has_arch_perfmon)
406 return 0;
407
396 switch (cpu_model) { 408 switch (cpu_model) {
397 case 0 ... 2: 409 case 0 ... 2:
398 *cpu_type = "i386/ppro"; 410 *cpu_type = "i386/ppro";
@@ -414,6 +426,13 @@ static int __init ppro_init(char **cpu_type)
414 case 15: case 23: 426 case 15: case 23:
415 *cpu_type = "i386/core_2"; 427 *cpu_type = "i386/core_2";
416 break; 428 break;
429 case 26:
430 arch_perfmon_setup_counters();
431 *cpu_type = "i386/core_i7";
432 break;
433 case 28:
434 *cpu_type = "i386/atom";
435 break;
417 default: 436 default:
418 /* Unknown */ 437 /* Unknown */
419 return 0; 438 return 0;
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index fecbce6e7d7c..0696d506c4ad 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -889,6 +889,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
889 return 0; 889 return 0;
890 } 890 }
891 891
892 if (io_apic_assign_pci_irqs)
893 return 0;
894
892 /* Find IRQ routing entry */ 895 /* Find IRQ routing entry */
893 896
894 if (!pirq_table) 897 if (!pirq_table)
@@ -1039,56 +1042,15 @@ static void __init pcibios_fixup_irqs(void)
1039 pirq_penalty[dev->irq]++; 1042 pirq_penalty[dev->irq]++;
1040 } 1043 }
1041 1044
1045 if (io_apic_assign_pci_irqs)
1046 return;
1047
1042 dev = NULL; 1048 dev = NULL;
1043 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1049 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1044 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); 1050 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
1045 if (!pin) 1051 if (!pin)
1046 continue; 1052 continue;
1047 1053
1048#ifdef CONFIG_X86_IO_APIC
1049 /*
1050 * Recalculate IRQ numbers if we use the I/O APIC.
1051 */
1052 if (io_apic_assign_pci_irqs) {
1053 int irq;
1054
1055 /*
1056 * interrupt pins are numbered starting from 1
1057 */
1058 irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
1059 PCI_SLOT(dev->devfn), pin - 1);
1060 /*
1061 * Busses behind bridges are typically not listed in the
1062 * MP-table. In this case we have to look up the IRQ
1063 * based on the parent bus, parent slot, and pin number.
1064 * The SMP code detects such bridged busses itself so we
1065 * should get into this branch reliably.
1066 */
1067 if (irq < 0 && dev->bus->parent) {
1068 /* go back to the bridge */
1069 struct pci_dev *bridge = dev->bus->self;
1070 int bus;
1071
1072 pin = pci_swizzle_interrupt_pin(dev, pin);
1073 bus = bridge->bus->number;
1074 irq = IO_APIC_get_PCI_irq_vector(bus,
1075 PCI_SLOT(bridge->devfn), pin - 1);
1076 if (irq >= 0)
1077 dev_warn(&dev->dev,
1078 "using bridge %s INT %c to "
1079 "get IRQ %d\n",
1080 pci_name(bridge),
1081 'A' + pin - 1, irq);
1082 }
1083 if (irq >= 0) {
1084 dev_info(&dev->dev,
1085 "PCI->APIC IRQ transform: INT %c "
1086 "-> IRQ %d\n",
1087 'A' + pin - 1, irq);
1088 dev->irq = irq;
1089 }
1090 }
1091#endif
1092 /* 1054 /*
1093 * Still no IRQ? Try to lookup one... 1055 * Still no IRQ? Try to lookup one...
1094 */ 1056 */
@@ -1183,6 +1145,19 @@ int __init pcibios_irq_init(void)
1183 pcibios_enable_irq = pirq_enable_irq; 1145 pcibios_enable_irq = pirq_enable_irq;
1184 1146
1185 pcibios_fixup_irqs(); 1147 pcibios_fixup_irqs();
1148
1149 if (io_apic_assign_pci_irqs && pci_routeirq) {
1150 struct pci_dev *dev = NULL;
1151 /*
1152 * PCI IRQ routing is set up by pci_enable_device(), but we
1153 * also do it here in case there are still broken drivers that
1154 * don't use pci_enable_device().
1155 */
1156 printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
1157 for_each_pci_dev(dev)
1158 pirq_enable_irq(dev);
1159 }
1160
1186 return 0; 1161 return 0;
1187} 1162}
1188 1163
@@ -1213,16 +1188,23 @@ void pcibios_penalize_isa_irq(int irq, int active)
1213static int pirq_enable_irq(struct pci_dev *dev) 1188static int pirq_enable_irq(struct pci_dev *dev)
1214{ 1189{
1215 u8 pin; 1190 u8 pin;
1216 struct pci_dev *temp_dev;
1217 1191
1218 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); 1192 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
1219 if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { 1193 if (pin && !pcibios_lookup_irq(dev, 1)) {
1220 char *msg = ""; 1194 char *msg = "";
1221 1195
1196 if (!io_apic_assign_pci_irqs && dev->irq)
1197 return 0;
1198
1222 if (io_apic_assign_pci_irqs) { 1199 if (io_apic_assign_pci_irqs) {
1200#ifdef CONFIG_X86_IO_APIC
1201 struct pci_dev *temp_dev;
1223 int irq; 1202 int irq;
1203 struct io_apic_irq_attr irq_attr;
1224 1204
1225 irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1); 1205 irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
1206 PCI_SLOT(dev->devfn),
1207 pin - 1, &irq_attr);
1226 /* 1208 /*
1227 * Busses behind bridges are typically not listed in the MP-table. 1209 * Busses behind bridges are typically not listed in the MP-table.
1228 * In this case we have to look up the IRQ based on the parent bus, 1210 * In this case we have to look up the IRQ based on the parent bus,
@@ -1235,7 +1217,8 @@ static int pirq_enable_irq(struct pci_dev *dev)
1235 1217
1236 pin = pci_swizzle_interrupt_pin(dev, pin); 1218 pin = pci_swizzle_interrupt_pin(dev, pin);
1237 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 1219 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
1238 PCI_SLOT(bridge->devfn), pin - 1); 1220 PCI_SLOT(bridge->devfn),
1221 pin - 1, &irq_attr);
1239 if (irq >= 0) 1222 if (irq >= 0)
1240 dev_warn(&dev->dev, "using bridge %s " 1223 dev_warn(&dev->dev, "using bridge %s "
1241 "INT %c to get IRQ %d\n", 1224 "INT %c to get IRQ %d\n",
@@ -1245,12 +1228,15 @@ static int pirq_enable_irq(struct pci_dev *dev)
1245 } 1228 }
1246 dev = temp_dev; 1229 dev = temp_dev;
1247 if (irq >= 0) { 1230 if (irq >= 0) {
1231 io_apic_set_pci_routing(&dev->dev, irq,
1232 &irq_attr);
1233 dev->irq = irq;
1248 dev_info(&dev->dev, "PCI->APIC IRQ transform: " 1234 dev_info(&dev->dev, "PCI->APIC IRQ transform: "
1249 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); 1235 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
1250 dev->irq = irq;
1251 return 0; 1236 return 0;
1252 } else 1237 } else
1253 msg = "; probably buggy MP table"; 1238 msg = "; probably buggy MP table";
1239#endif
1254 } else if (pci_probe & PCI_BIOS_IRQ_SCAN) 1240 } else if (pci_probe & PCI_BIOS_IRQ_SCAN)
1255 msg = ""; 1241 msg = "";
1256 else 1242 else
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 5fa10bb9604f..8766b0e216c5 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -375,7 +375,7 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res,
375 if (!fixmem32) 375 if (!fixmem32)
376 return AE_OK; 376 return AE_OK;
377 if ((mcfg_res->start >= fixmem32->address) && 377 if ((mcfg_res->start >= fixmem32->address) &&
378 (mcfg_res->end <= (fixmem32->address + 378 (mcfg_res->end < (fixmem32->address +
379 fixmem32->address_length))) { 379 fixmem32->address_length))) {
380 mcfg_res->flags = 1; 380 mcfg_res->flags = 1;
381 return AE_CTRL_TERMINATE; 381 return AE_CTRL_TERMINATE;
@@ -392,7 +392,7 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res,
392 return AE_OK; 392 return AE_OK;
393 393
394 if ((mcfg_res->start >= address.minimum) && 394 if ((mcfg_res->start >= address.minimum) &&
395 (mcfg_res->end <= (address.minimum + address.address_length))) { 395 (mcfg_res->end < (address.minimum + address.address_length))) {
396 mcfg_res->flags = 1; 396 mcfg_res->flags = 1;
397 return AE_CTRL_TERMINATE; 397 return AE_CTRL_TERMINATE;
398 } 398 }
@@ -418,7 +418,7 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used)
418 struct resource mcfg_res; 418 struct resource mcfg_res;
419 419
420 mcfg_res.start = start; 420 mcfg_res.start = start;
421 mcfg_res.end = end; 421 mcfg_res.end = end - 1;
422 mcfg_res.flags = 0; 422 mcfg_res.flags = 0;
423 423
424 acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL); 424 acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL);
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 7133cdf9098b..cac083386e03 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -8,6 +8,7 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/random.h> 10#include <linux/random.h>
11#include <linux/elf.h>
11#include <asm/vsyscall.h> 12#include <asm/vsyscall.h>
12#include <asm/vgtod.h> 13#include <asm/vgtod.h>
13#include <asm/proto.h> 14#include <asm/proto.h>
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f09e8c36ee80..0a1700a2be9c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -20,6 +20,7 @@
20#include <linux/delay.h> 20#include <linux/delay.h>
21#include <linux/start_kernel.h> 21#include <linux/start_kernel.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/kprobes.h>
23#include <linux/bootmem.h> 24#include <linux/bootmem.h>
24#include <linux/module.h> 25#include <linux/module.h>
25#include <linux/mm.h> 26#include <linux/mm.h>
@@ -44,6 +45,7 @@
44#include <asm/processor.h> 45#include <asm/processor.h>
45#include <asm/proto.h> 46#include <asm/proto.h>
46#include <asm/msr-index.h> 47#include <asm/msr-index.h>
48#include <asm/traps.h>
47#include <asm/setup.h> 49#include <asm/setup.h>
48#include <asm/desc.h> 50#include <asm/desc.h>
49#include <asm/pgtable.h> 51#include <asm/pgtable.h>
@@ -240,10 +242,10 @@ static unsigned long xen_get_debugreg(int reg)
240 return HYPERVISOR_get_debugreg(reg); 242 return HYPERVISOR_get_debugreg(reg);
241} 243}
242 244
243void xen_leave_lazy(void) 245static void xen_end_context_switch(struct task_struct *next)
244{ 246{
245 paravirt_leave_lazy(paravirt_get_lazy_mode());
246 xen_mc_flush(); 247 xen_mc_flush();
248 paravirt_end_context_switch(next);
247} 249}
248 250
249static unsigned long xen_store_tr(void) 251static unsigned long xen_store_tr(void)
@@ -428,11 +430,44 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
428static int cvt_gate_to_trap(int vector, const gate_desc *val, 430static int cvt_gate_to_trap(int vector, const gate_desc *val,
429 struct trap_info *info) 431 struct trap_info *info)
430{ 432{
433 unsigned long addr;
434
431 if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) 435 if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
432 return 0; 436 return 0;
433 437
434 info->vector = vector; 438 info->vector = vector;
435 info->address = gate_offset(*val); 439
440 addr = gate_offset(*val);
441#ifdef CONFIG_X86_64
442 /*
443 * Look for known traps using IST, and substitute them
444 * appropriately. The debugger ones are the only ones we care
445 * about. Xen will handle faults like double_fault and
446 * machine_check, so we should never see them. Warn if
447 * there's an unexpected IST-using fault handler.
448 */
449 if (addr == (unsigned long)debug)
450 addr = (unsigned long)xen_debug;
451 else if (addr == (unsigned long)int3)
452 addr = (unsigned long)xen_int3;
453 else if (addr == (unsigned long)stack_segment)
454 addr = (unsigned long)xen_stack_segment;
455 else if (addr == (unsigned long)double_fault ||
456 addr == (unsigned long)nmi) {
457 /* Don't need to handle these */
458 return 0;
459#ifdef CONFIG_X86_MCE
460 } else if (addr == (unsigned long)machine_check) {
461 return 0;
462#endif
463 } else {
464 /* Some other trap using IST? */
465 if (WARN_ON(val->ist != 0))
466 return 0;
467 }
468#endif /* CONFIG_X86_64 */
469 info->address = addr;
470
436 info->cs = gate_segment(*val); 471 info->cs = gate_segment(*val);
437 info->flags = val->dpl; 472 info->flags = val->dpl;
438 /* interrupt gates clear IF */ 473 /* interrupt gates clear IF */
@@ -623,10 +658,26 @@ static void xen_clts(void)
623 xen_mc_issue(PARAVIRT_LAZY_CPU); 658 xen_mc_issue(PARAVIRT_LAZY_CPU);
624} 659}
625 660
661static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
662
663static unsigned long xen_read_cr0(void)
664{
665 unsigned long cr0 = percpu_read(xen_cr0_value);
666
667 if (unlikely(cr0 == 0)) {
668 cr0 = native_read_cr0();
669 percpu_write(xen_cr0_value, cr0);
670 }
671
672 return cr0;
673}
674
626static void xen_write_cr0(unsigned long cr0) 675static void xen_write_cr0(unsigned long cr0)
627{ 676{
628 struct multicall_space mcs; 677 struct multicall_space mcs;
629 678
679 percpu_write(xen_cr0_value, cr0);
680
630 /* Only pay attention to cr0.TS; everything else is 681 /* Only pay attention to cr0.TS; everything else is
631 ignored. */ 682 ignored. */
632 mcs = xen_mc_entry(0); 683 mcs = xen_mc_entry(0);
@@ -812,7 +863,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
812 863
813 .clts = xen_clts, 864 .clts = xen_clts,
814 865
815 .read_cr0 = native_read_cr0, 866 .read_cr0 = xen_read_cr0,
816 .write_cr0 = xen_write_cr0, 867 .write_cr0 = xen_write_cr0,
817 868
818 .read_cr4 = native_read_cr4, 869 .read_cr4 = native_read_cr4,
@@ -860,10 +911,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
860 /* Xen takes care of %gs when switching to usermode for us */ 911 /* Xen takes care of %gs when switching to usermode for us */
861 .swapgs = paravirt_nop, 912 .swapgs = paravirt_nop,
862 913
863 .lazy_mode = { 914 .start_context_switch = paravirt_start_context_switch,
864 .enter = paravirt_enter_lazy_cpu, 915 .end_context_switch = xen_end_context_switch,
865 .leave = xen_leave_lazy,
866 },
867}; 916};
868 917
869static const struct pv_apic_ops xen_apic_ops __initdata = { 918static const struct pv_apic_ops xen_apic_ops __initdata = {
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index fba55b1a4021..4ceb28581652 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -452,10 +452,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
452void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 452void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
453 pte_t *ptep, pte_t pteval) 453 pte_t *ptep, pte_t pteval)
454{ 454{
455 /* updates to init_mm may be done without lock */
456 if (mm == &init_mm)
457 preempt_disable();
458
459 ADD_STATS(set_pte_at, 1); 455 ADD_STATS(set_pte_at, 1);
460// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); 456// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
461 ADD_STATS(set_pte_at_current, mm == current->mm); 457 ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -476,9 +472,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
476 } 472 }
477 xen_set_pte(ptep, pteval); 473 xen_set_pte(ptep, pteval);
478 474
479out: 475out: return;
480 if (mm == &init_mm)
481 preempt_enable();
482} 476}
483 477
484pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, 478pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
@@ -1152,10 +1146,8 @@ static void drop_other_mm_ref(void *info)
1152 1146
1153 /* If this cpu still has a stale cr3 reference, then make sure 1147 /* If this cpu still has a stale cr3 reference, then make sure
1154 it has been flushed. */ 1148 it has been flushed. */
1155 if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { 1149 if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
1156 load_cr3(swapper_pg_dir); 1150 load_cr3(swapper_pg_dir);
1157 arch_flush_lazy_cpu_mode();
1158 }
1159} 1151}
1160 1152
1161static void xen_drop_mm_ref(struct mm_struct *mm) 1153static void xen_drop_mm_ref(struct mm_struct *mm)
@@ -1168,7 +1160,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1168 load_cr3(swapper_pg_dir); 1160 load_cr3(swapper_pg_dir);
1169 else 1161 else
1170 leave_mm(smp_processor_id()); 1162 leave_mm(smp_processor_id());
1171 arch_flush_lazy_cpu_mode();
1172 } 1163 }
1173 1164
1174 /* Get the "official" set of cpus referring to our pagetable. */ 1165 /* Get the "official" set of cpus referring to our pagetable. */
@@ -1876,6 +1867,14 @@ __init void xen_post_allocator_init(void)
1876 xen_mark_init_mm_pinned(); 1867 xen_mark_init_mm_pinned();
1877} 1868}
1878 1869
1870static void xen_leave_lazy_mmu(void)
1871{
1872 preempt_disable();
1873 xen_mc_flush();
1874 paravirt_leave_lazy_mmu();
1875 preempt_enable();
1876}
1877
1879const struct pv_mmu_ops xen_mmu_ops __initdata = { 1878const struct pv_mmu_ops xen_mmu_ops __initdata = {
1880 .pagetable_setup_start = xen_pagetable_setup_start, 1879 .pagetable_setup_start = xen_pagetable_setup_start,
1881 .pagetable_setup_done = xen_pagetable_setup_done, 1880 .pagetable_setup_done = xen_pagetable_setup_done,
@@ -1949,7 +1948,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1949 1948
1950 .lazy_mode = { 1949 .lazy_mode = {
1951 .enter = paravirt_enter_lazy_mmu, 1950 .enter = paravirt_enter_lazy_mmu,
1952 .leave = xen_leave_lazy, 1951 .leave = xen_leave_lazy_mmu,
1953 }, 1952 },
1954 1953
1955 .set_fixmap = xen_set_fixmap, 1954 .set_fixmap = xen_set_fixmap,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 15c6c68db6a2..ad0047f47cd4 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -61,9 +61,9 @@ char * __init xen_memory_setup(void)
61 * - xen_start_info 61 * - xen_start_info
62 * See comment above "struct start_info" in <xen/interface/xen.h> 62 * See comment above "struct start_info" in <xen/interface/xen.h>
63 */ 63 */
64 e820_add_region(__pa(xen_start_info->mfn_list), 64 reserve_early(__pa(xen_start_info->mfn_list),
65 xen_start_info->pt_base - xen_start_info->mfn_list, 65 __pa(xen_start_info->pt_base),
66 E820_RESERVED); 66 "XEN START INFO");
67 67
68 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 68 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
69 69
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index ca6596b05d53..22494fd4c9b5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
30void xen_ident_map_ISA(void); 30void xen_ident_map_ISA(void);
31void xen_reserve_top(void); 31void xen_reserve_top(void);
32 32
33void xen_leave_lazy(void);
34void xen_post_allocator_init(void); 33void xen_post_allocator_init(void);
35 34
36char * __init xen_memory_setup(void); 35char * __init xen_memory_setup(void);
diff --git a/block/blk-core.c b/block/blk-core.c
index c89883be8737..9475bf99b891 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,22 +28,14 @@
28#include <linux/task_io_accounting_ops.h> 28#include <linux/task_io_accounting_ops.h>
29#include <linux/blktrace_api.h> 29#include <linux/blktrace_api.h>
30#include <linux/fault-inject.h> 30#include <linux/fault-inject.h>
31#include <trace/block.h> 31
32#define CREATE_TRACE_POINTS
33#include <trace/events/block.h>
32 34
33#include "blk.h" 35#include "blk.h"
34 36
35DEFINE_TRACE(block_plug);
36DEFINE_TRACE(block_unplug_io);
37DEFINE_TRACE(block_unplug_timer);
38DEFINE_TRACE(block_getrq);
39DEFINE_TRACE(block_sleeprq);
40DEFINE_TRACE(block_rq_requeue);
41DEFINE_TRACE(block_bio_backmerge);
42DEFINE_TRACE(block_bio_frontmerge);
43DEFINE_TRACE(block_bio_queue);
44DEFINE_TRACE(block_rq_complete);
45DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */
46EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); 37EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
47 39
48static int __make_request(struct request_queue *q, struct bio *bio); 40static int __make_request(struct request_queue *q, struct bio *bio);
49 41
@@ -1277,7 +1269,7 @@ static inline void blk_partition_remap(struct bio *bio)
1277 bio->bi_bdev = bdev->bd_contains; 1269 bio->bi_bdev = bdev->bd_contains;
1278 1270
1279 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, 1271 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
1280 bdev->bd_dev, bio->bi_sector, 1272 bdev->bd_dev,
1281 bio->bi_sector - p->start_sect); 1273 bio->bi_sector - p->start_sect);
1282 } 1274 }
1283} 1275}
@@ -1446,8 +1438,7 @@ static inline void __generic_make_request(struct bio *bio)
1446 goto end_io; 1438 goto end_io;
1447 1439
1448 if (old_sector != -1) 1440 if (old_sector != -1)
1449 trace_block_remap(q, bio, old_dev, bio->bi_sector, 1441 trace_block_remap(q, bio, old_dev, old_sector);
1450 old_sector);
1451 1442
1452 trace_block_bio_queue(q, bio); 1443 trace_block_bio_queue(q, bio);
1453 1444
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ff9bba3379a..26f9ec28f56c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -383,16 +383,21 @@ struct kobj_type blk_queue_ktype = {
383int blk_register_queue(struct gendisk *disk) 383int blk_register_queue(struct gendisk *disk)
384{ 384{
385 int ret; 385 int ret;
386 struct device *dev = disk_to_dev(disk);
386 387
387 struct request_queue *q = disk->queue; 388 struct request_queue *q = disk->queue;
388 389
389 if (WARN_ON(!q)) 390 if (WARN_ON(!q))
390 return -ENXIO; 391 return -ENXIO;
391 392
393 ret = blk_trace_init_sysfs(dev);
394 if (ret)
395 return ret;
396
392 if (!q->request_fn) 397 if (!q->request_fn)
393 return 0; 398 return 0;
394 399
395 ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), 400 ret = kobject_add(&q->kobj, kobject_get(&dev->kobj),
396 "%s", "queue"); 401 "%s", "queue");
397 if (ret < 0) 402 if (ret < 0)
398 return ret; 403 return ret;
diff --git a/block/bsg.c b/block/bsg.c
index 206060e795da..dd81be455e00 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -315,6 +315,7 @@ out:
315 blk_put_request(rq); 315 blk_put_request(rq);
316 if (next_rq) { 316 if (next_rq) {
317 blk_rq_unmap_user(next_rq->bio); 317 blk_rq_unmap_user(next_rq->bio);
318 next_rq->bio = NULL;
318 blk_put_request(next_rq); 319 blk_put_request(next_rq);
319 } 320 }
320 return ERR_PTR(ret); 321 return ERR_PTR(ret);
@@ -448,6 +449,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
448 hdr->dout_resid = rq->data_len; 449 hdr->dout_resid = rq->data_len;
449 hdr->din_resid = rq->next_rq->data_len; 450 hdr->din_resid = rq->next_rq->data_len;
450 blk_rq_unmap_user(bidi_bio); 451 blk_rq_unmap_user(bidi_bio);
452 rq->next_rq->bio = NULL;
451 blk_put_request(rq->next_rq); 453 blk_put_request(rq->next_rq);
452 } else if (rq_data_dir(rq) == READ) 454 } else if (rq_data_dir(rq) == READ)
453 hdr->din_resid = rq->data_len; 455 hdr->din_resid = rq->data_len;
@@ -466,6 +468,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
466 blk_rq_unmap_user(bio); 468 blk_rq_unmap_user(bio);
467 if (rq->cmd != rq->__cmd) 469 if (rq->cmd != rq->__cmd)
468 kfree(rq->cmd); 470 kfree(rq->cmd);
471 rq->bio = NULL;
469 blk_put_request(rq); 472 blk_put_request(rq);
470 473
471 return ret; 474 return ret;
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f87615dea46b..f8c218cd08e1 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg)
568 memcpy(&buts.name, &cbuts.name, 32); 568 memcpy(&buts.name, &cbuts.name, 32);
569 569
570 mutex_lock(&bdev->bd_mutex); 570 mutex_lock(&bdev->bd_mutex);
571 ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts); 571 ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts);
572 mutex_unlock(&bdev->bd_mutex); 572 mutex_unlock(&bdev->bd_mutex);
573 if (ret) 573 if (ret)
574 return ret; 574 return ret;
diff --git a/block/elevator.c b/block/elevator.c
index 7073a9072577..e220f0c543e3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,17 +33,16 @@
33#include <linux/compiler.h> 33#include <linux/compiler.h>
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/blktrace_api.h> 35#include <linux/blktrace_api.h>
36#include <trace/block.h>
37#include <linux/hash.h> 36#include <linux/hash.h>
38#include <linux/uaccess.h> 37#include <linux/uaccess.h>
39 38
39#include <trace/events/block.h>
40
40#include "blk.h" 41#include "blk.h"
41 42
42static DEFINE_SPINLOCK(elv_list_lock); 43static DEFINE_SPINLOCK(elv_list_lock);
43static LIST_HEAD(elv_list); 44static LIST_HEAD(elv_list);
44 45
45DEFINE_TRACE(block_rq_abort);
46
47/* 46/*
48 * Merge hash stuff. 47 * Merge hash stuff.
49 */ 48 */
@@ -55,9 +54,6 @@ static const int elv_hash_shift = 6;
55#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 54#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
56#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 55#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
57 56
58DEFINE_TRACE(block_rq_insert);
59DEFINE_TRACE(block_rq_issue);
60
61/* 57/*
62 * Query io scheduler to see if the current process issuing bio may be 58 * Query io scheduler to see if the current process issuing bio may be
63 * merged with rq. 59 * merged with rq.
diff --git a/crypto/ahash.c b/crypto/ahash.c
index b2d1ee32cfe8..f3476374f764 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -82,10 +82,11 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err)
82 if (err) 82 if (err)
83 return err; 83 return err;
84 84
85 walk->offset = 0; 85 if (nbytes) {
86 86 walk->offset = 0;
87 if (nbytes) 87 walk->pg++;
88 return hash_walk_next(walk); 88 return hash_walk_next(walk);
89 }
89 90
90 if (!walk->total) 91 if (!walk->total)
91 return 0; 92 return 0;
diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index 95650f83ce2e..bc46de3d967f 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -116,9 +116,6 @@ int acpi_pci_bind(struct acpi_device *device)
116 struct acpi_pci_data *pdata; 116 struct acpi_pci_data *pdata;
117 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 117 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
118 acpi_handle handle; 118 acpi_handle handle;
119 struct pci_dev *dev;
120 struct pci_bus *bus;
121
122 119
123 if (!device || !device->parent) 120 if (!device || !device->parent)
124 return -EINVAL; 121 return -EINVAL;
@@ -176,20 +173,9 @@ int acpi_pci_bind(struct acpi_device *device)
176 * Locate matching device in PCI namespace. If it doesn't exist 173 * Locate matching device in PCI namespace. If it doesn't exist
177 * this typically means that the device isn't currently inserted 174 * this typically means that the device isn't currently inserted
178 * (e.g. docking station, port replicator, etc.). 175 * (e.g. docking station, port replicator, etc.).
179 * We cannot simply search the global pci device list, since
180 * PCI devices are added to the global pci list when the root
181 * bridge start ops are run, which may not have happened yet.
182 */ 176 */
183 bus = pci_find_bus(data->id.segment, data->id.bus); 177 data->dev = pci_get_slot(pdata->bus,
184 if (bus) { 178 PCI_DEVFN(data->id.device, data->id.function));
185 list_for_each_entry(dev, &bus->devices, bus_list) {
186 if (dev->devfn == PCI_DEVFN(data->id.device,
187 data->id.function)) {
188 data->dev = dev;
189 break;
190 }
191 }
192 }
193 if (!data->dev) { 179 if (!data->dev) {
194 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 180 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
195 "Device %04x:%02x:%02x.%d not present in PCI namespace\n", 181 "Device %04x:%02x:%02x.%d not present in PCI namespace\n",
@@ -259,9 +245,10 @@ int acpi_pci_bind(struct acpi_device *device)
259 245
260 end: 246 end:
261 kfree(buffer.pointer); 247 kfree(buffer.pointer);
262 if (result) 248 if (result) {
249 pci_dev_put(data->dev);
263 kfree(data); 250 kfree(data);
264 251 }
265 return result; 252 return result;
266} 253}
267 254
@@ -303,6 +290,7 @@ static int acpi_pci_unbind(struct acpi_device *device)
303 if (data->dev->subordinate) { 290 if (data->dev->subordinate) {
304 acpi_pci_irq_del_prt(data->id.segment, data->bus->number); 291 acpi_pci_irq_del_prt(data->id.segment, data->bus->number);
305 } 292 }
293 pci_dev_put(data->dev);
306 kfree(data); 294 kfree(data);
307 295
308 end: 296 end:
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 51b9f8280f88..2faa9e2ac893 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -401,7 +401,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
401 /* Interrupt Line values above 0xF are forbidden */ 401 /* Interrupt Line values above 0xF are forbidden */
402 if (dev->irq > 0 && (dev->irq <= 0xF)) { 402 if (dev->irq > 0 && (dev->irq <= 0xF)) {
403 printk(" - using IRQ %d\n", dev->irq); 403 printk(" - using IRQ %d\n", dev->irq);
404 acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE, 404 acpi_register_gsi(&dev->dev, dev->irq,
405 ACPI_LEVEL_SENSITIVE,
405 ACPI_ACTIVE_LOW); 406 ACPI_ACTIVE_LOW);
406 return 0; 407 return 0;
407 } else { 408 } else {
@@ -410,7 +411,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
410 } 411 }
411 } 412 }
412 413
413 rc = acpi_register_gsi(gsi, triggering, polarity); 414 rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
414 if (rc < 0) { 415 if (rc < 0) {
415 dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n", 416 dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n",
416 pin_name(pin)); 417 pin_name(pin));
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 45ad3288c5ff..23f0fb84f1c1 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -844,7 +844,7 @@ static int acpi_processor_add(struct acpi_device *device)
844 if (!pr) 844 if (!pr)
845 return -ENOMEM; 845 return -ENOMEM;
846 846
847 if (!alloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { 847 if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
848 kfree(pr); 848 kfree(pr);
849 return -ENOMEM; 849 return -ENOMEM;
850 } 850 }
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 72069ba5f1ed..10a2d913635a 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -148,6 +148,9 @@ static void acpi_timer_check_state(int state, struct acpi_processor *pr,
148 if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT)) 148 if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
149 return; 149 return;
150 150
151 if (boot_cpu_has(X86_FEATURE_AMDC1E))
152 type = ACPI_STATE_C1;
153
151 /* 154 /*
152 * Check, if one of the previous states already marked the lapic 155 * Check, if one of the previous states already marked the lapic
153 * unstable 156 * unstable
@@ -611,6 +614,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
611 switch (cx->type) { 614 switch (cx->type) {
612 case ACPI_STATE_C1: 615 case ACPI_STATE_C1:
613 cx->valid = 1; 616 cx->valid = 1;
617 acpi_timer_check_state(i, pr, cx);
614 break; 618 break;
615 619
616 case ACPI_STATE_C2: 620 case ACPI_STATE_C2:
@@ -830,11 +834,12 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
830 834
831 /* Do not access any ACPI IO ports in suspend path */ 835 /* Do not access any ACPI IO ports in suspend path */
832 if (acpi_idle_suspend) { 836 if (acpi_idle_suspend) {
833 acpi_safe_halt();
834 local_irq_enable(); 837 local_irq_enable();
838 cpu_relax();
835 return 0; 839 return 0;
836 } 840 }
837 841
842 acpi_state_timer_broadcast(pr, cx, 1);
838 kt1 = ktime_get_real(); 843 kt1 = ktime_get_real();
839 acpi_idle_do_entry(cx); 844 acpi_idle_do_entry(cx);
840 kt2 = ktime_get_real(); 845 kt2 = ktime_get_real();
@@ -842,6 +847,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
842 847
843 local_irq_enable(); 848 local_irq_enable();
844 cx->usage++; 849 cx->usage++;
850 acpi_state_timer_broadcast(pr, cx, 0);
845 851
846 return idle_time; 852 return idle_time;
847} 853}
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index cafb41000f6b..60e543d3234e 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -309,9 +309,15 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
309 (u32) px->bus_master_latency, 309 (u32) px->bus_master_latency,
310 (u32) px->control, (u32) px->status)); 310 (u32) px->control, (u32) px->status));
311 311
312 if (!px->core_frequency) { 312 /*
313 printk(KERN_ERR PREFIX 313 * Check that ACPI's u64 MHz will be valid as u32 KHz in cpufreq
314 "Invalid _PSS data: freq is zero\n"); 314 */
315 if (!px->core_frequency ||
316 ((u32)(px->core_frequency * 1000) !=
317 (px->core_frequency * 1000))) {
318 printk(KERN_ERR FW_BUG PREFIX
319 "Invalid BIOS _PSS frequency: 0x%llx MHz\n",
320 px->core_frequency);
315 result = -EFAULT; 321 result = -EFAULT;
316 kfree(pr->performance->states); 322 kfree(pr->performance->states);
317 goto end; 323 goto end;
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index 7f16f5f8e7d3..227543789ba9 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -840,7 +840,7 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr)
840 state = acpi_get_throttling_state(pr, value); 840 state = acpi_get_throttling_state(pr, value);
841 if (state == -1) { 841 if (state == -1) {
842 ACPI_WARNING((AE_INFO, 842 ACPI_WARNING((AE_INFO,
843 "Invalid throttling state, reset\n")); 843 "Invalid throttling state, reset"));
844 state = 0; 844 state = 0;
845 ret = acpi_processor_set_throttling(pr, state); 845 ret = acpi_processor_set_throttling(pr, state);
846 if (ret) 846 if (ret)
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 810cca90ca7f..1bdfb37377e3 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -570,6 +570,22 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
570 DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5710Z"), 570 DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5710Z"),
571 }, 571 },
572 }, 572 },
573 {
574 .callback = video_set_bqc_offset,
575 .ident = "eMachines E510",
576 .matches = {
577 DMI_MATCH(DMI_BOARD_VENDOR, "EMACHINES"),
578 DMI_MATCH(DMI_PRODUCT_NAME, "eMachines E510"),
579 },
580 },
581 {
582 .callback = video_set_bqc_offset,
583 .ident = "Acer Aspire 5315",
584 .matches = {
585 DMI_MATCH(DMI_BOARD_VENDOR, "Acer"),
586 DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5315"),
587 },
588 },
573 {} 589 {}
574}; 590};
575 591
@@ -2334,7 +2350,7 @@ static int __init acpi_video_init(void)
2334 return acpi_video_register(); 2350 return acpi_video_register();
2335} 2351}
2336 2352
2337void __exit acpi_video_exit(void) 2353void acpi_video_exit(void)
2338{ 2354{
2339 2355
2340 acpi_bus_unregister_driver(&acpi_video_bus); 2356 acpi_bus_unregister_driver(&acpi_video_bus);
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 08186ecbaf8d..6b91c26a4635 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -220,6 +220,7 @@ enum {
220 AHCI_HFLAG_NO_HOTPLUG = (1 << 7), /* ignore PxSERR.DIAG.N */ 220 AHCI_HFLAG_NO_HOTPLUG = (1 << 7), /* ignore PxSERR.DIAG.N */
221 AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */ 221 AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */
222 AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */ 222 AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */
223 AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */
223 224
224 /* ap->flags bits */ 225 /* ap->flags bits */
225 226
@@ -2316,9 +2317,17 @@ static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg)
2316static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg) 2317static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
2317{ 2318{
2318 struct ata_host *host = dev_get_drvdata(&pdev->dev); 2319 struct ata_host *host = dev_get_drvdata(&pdev->dev);
2320 struct ahci_host_priv *hpriv = host->private_data;
2319 void __iomem *mmio = host->iomap[AHCI_PCI_BAR]; 2321 void __iomem *mmio = host->iomap[AHCI_PCI_BAR];
2320 u32 ctl; 2322 u32 ctl;
2321 2323
2324 if (mesg.event & PM_EVENT_SUSPEND &&
2325 hpriv->flags & AHCI_HFLAG_NO_SUSPEND) {
2326 dev_printk(KERN_ERR, &pdev->dev,
2327 "BIOS update required for suspend/resume\n");
2328 return -EIO;
2329 }
2330
2322 if (mesg.event & PM_EVENT_SLEEP) { 2331 if (mesg.event & PM_EVENT_SLEEP) {
2323 /* AHCI spec rev1.1 section 8.3.3: 2332 /* AHCI spec rev1.1 section 8.3.3:
2324 * Software must disable interrupts prior to requesting a 2333 * Software must disable interrupts prior to requesting a
@@ -2610,6 +2619,63 @@ static bool ahci_broken_system_poweroff(struct pci_dev *pdev)
2610 return false; 2619 return false;
2611} 2620}
2612 2621
2622static bool ahci_broken_suspend(struct pci_dev *pdev)
2623{
2624 static const struct dmi_system_id sysids[] = {
2625 /*
2626 * On HP dv[4-6] and HDX18 with earlier BIOSen, link
2627 * to the harddisk doesn't become online after
2628 * resuming from STR. Warn and fail suspend.
2629 */
2630 {
2631 .ident = "dv4",
2632 .matches = {
2633 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
2634 DMI_MATCH(DMI_PRODUCT_NAME,
2635 "HP Pavilion dv4 Notebook PC"),
2636 },
2637 .driver_data = "F.30", /* cutoff BIOS version */
2638 },
2639 {
2640 .ident = "dv5",
2641 .matches = {
2642 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
2643 DMI_MATCH(DMI_PRODUCT_NAME,
2644 "HP Pavilion dv5 Notebook PC"),
2645 },
2646 .driver_data = "F.16", /* cutoff BIOS version */
2647 },
2648 {
2649 .ident = "dv6",
2650 .matches = {
2651 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
2652 DMI_MATCH(DMI_PRODUCT_NAME,
2653 "HP Pavilion dv6 Notebook PC"),
2654 },
2655 .driver_data = "F.21", /* cutoff BIOS version */
2656 },
2657 {
2658 .ident = "HDX18",
2659 .matches = {
2660 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
2661 DMI_MATCH(DMI_PRODUCT_NAME,
2662 "HP HDX18 Notebook PC"),
2663 },
2664 .driver_data = "F.23", /* cutoff BIOS version */
2665 },
2666 { } /* terminate list */
2667 };
2668 const struct dmi_system_id *dmi = dmi_first_match(sysids);
2669 const char *ver;
2670
2671 if (!dmi || pdev->bus->number || pdev->devfn != PCI_DEVFN(0x1f, 2))
2672 return false;
2673
2674 ver = dmi_get_system_info(DMI_BIOS_VERSION);
2675
2676 return !ver || strcmp(ver, dmi->driver_data) < 0;
2677}
2678
2613static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 2679static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2614{ 2680{
2615 static int printed_version; 2681 static int printed_version;
@@ -2715,6 +2781,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2715 "quirky BIOS, skipping spindown on poweroff\n"); 2781 "quirky BIOS, skipping spindown on poweroff\n");
2716 } 2782 }
2717 2783
2784 if (ahci_broken_suspend(pdev)) {
2785 hpriv->flags |= AHCI_HFLAG_NO_SUSPEND;
2786 dev_printk(KERN_WARNING, &pdev->dev,
2787 "BIOS update required for suspend/resume\n");
2788 }
2789
2718 /* CAP.NP sometimes indicate the index of the last enabled 2790 /* CAP.NP sometimes indicate the index of the last enabled
2719 * port, at other times, that of the last possible port, so 2791 * port, at other times, that of the last possible port, so
2720 * determining the maximum port number requires looking at 2792 * determining the maximum port number requires looking at
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index d51a17c0f59b..1aeb7082b0c4 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -1455,6 +1455,15 @@ static bool piix_broken_system_poweroff(struct pci_dev *pdev)
1455 /* PCI slot number of the controller */ 1455 /* PCI slot number of the controller */
1456 .driver_data = (void *)0x1FUL, 1456 .driver_data = (void *)0x1FUL,
1457 }, 1457 },
1458 {
1459 .ident = "HP Compaq nc6000",
1460 .matches = {
1461 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1462 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nc6000"),
1463 },
1464 /* PCI slot number of the controller */
1465 .driver_data = (void *)0x1FUL,
1466 },
1458 1467
1459 { } /* terminate list */ 1468 { } /* terminate list */
1460 }; 1469 };
diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c
index 751b7ea4816c..fc9c5d6d7d80 100644
--- a/drivers/ata/pata_ali.c
+++ b/drivers/ata/pata_ali.c
@@ -497,14 +497,16 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
497 }; 497 };
498 /* Revision 0x20 added DMA */ 498 /* Revision 0x20 added DMA */
499 static const struct ata_port_info info_20 = { 499 static const struct ata_port_info info_20 = {
500 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48, 500 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
501 ATA_FLAG_IGN_SIMPLEX,
501 .pio_mask = ATA_PIO4, 502 .pio_mask = ATA_PIO4,
502 .mwdma_mask = ATA_MWDMA2, 503 .mwdma_mask = ATA_MWDMA2,
503 .port_ops = &ali_20_port_ops 504 .port_ops = &ali_20_port_ops
504 }; 505 };
505 /* Revision 0x20 with support logic added UDMA */ 506 /* Revision 0x20 with support logic added UDMA */
506 static const struct ata_port_info info_20_udma = { 507 static const struct ata_port_info info_20_udma = {
507 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48, 508 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
509 ATA_FLAG_IGN_SIMPLEX,
508 .pio_mask = ATA_PIO4, 510 .pio_mask = ATA_PIO4,
509 .mwdma_mask = ATA_MWDMA2, 511 .mwdma_mask = ATA_MWDMA2,
510 .udma_mask = ATA_UDMA2, 512 .udma_mask = ATA_UDMA2,
@@ -512,7 +514,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
512 }; 514 };
513 /* Revision 0xC2 adds UDMA66 */ 515 /* Revision 0xC2 adds UDMA66 */
514 static const struct ata_port_info info_c2 = { 516 static const struct ata_port_info info_c2 = {
515 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48, 517 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
518 ATA_FLAG_IGN_SIMPLEX,
516 .pio_mask = ATA_PIO4, 519 .pio_mask = ATA_PIO4,
517 .mwdma_mask = ATA_MWDMA2, 520 .mwdma_mask = ATA_MWDMA2,
518 .udma_mask = ATA_UDMA4, 521 .udma_mask = ATA_UDMA4,
@@ -520,7 +523,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
520 }; 523 };
521 /* Revision 0xC3 is UDMA66 for now */ 524 /* Revision 0xC3 is UDMA66 for now */
522 static const struct ata_port_info info_c3 = { 525 static const struct ata_port_info info_c3 = {
523 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48, 526 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
527 ATA_FLAG_IGN_SIMPLEX,
524 .pio_mask = ATA_PIO4, 528 .pio_mask = ATA_PIO4,
525 .mwdma_mask = ATA_MWDMA2, 529 .mwdma_mask = ATA_MWDMA2,
526 .udma_mask = ATA_UDMA4, 530 .udma_mask = ATA_UDMA4,
@@ -528,7 +532,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
528 }; 532 };
529 /* Revision 0xC4 is UDMA100 */ 533 /* Revision 0xC4 is UDMA100 */
530 static const struct ata_port_info info_c4 = { 534 static const struct ata_port_info info_c4 = {
531 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48, 535 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
536 ATA_FLAG_IGN_SIMPLEX,
532 .pio_mask = ATA_PIO4, 537 .pio_mask = ATA_PIO4,
533 .mwdma_mask = ATA_MWDMA2, 538 .mwdma_mask = ATA_MWDMA2,
534 .udma_mask = ATA_UDMA5, 539 .udma_mask = ATA_UDMA5,
@@ -536,7 +541,7 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
536 }; 541 };
537 /* Revision 0xC5 is UDMA133 with LBA48 DMA */ 542 /* Revision 0xC5 is UDMA133 with LBA48 DMA */
538 static const struct ata_port_info info_c5 = { 543 static const struct ata_port_info info_c5 = {
539 .flags = ATA_FLAG_SLAVE_POSS, 544 .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_IGN_SIMPLEX,
540 .pio_mask = ATA_PIO4, 545 .pio_mask = ATA_PIO4,
541 .mwdma_mask = ATA_MWDMA2, 546 .mwdma_mask = ATA_MWDMA2,
542 .udma_mask = ATA_UDMA6, 547 .udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/pata_efar.c b/drivers/ata/pata_efar.c
index 2085e0a3a05a..2a6412f5d117 100644
--- a/drivers/ata/pata_efar.c
+++ b/drivers/ata/pata_efar.c
@@ -22,7 +22,7 @@
22#include <linux/ata.h> 22#include <linux/ata.h>
23 23
24#define DRV_NAME "pata_efar" 24#define DRV_NAME "pata_efar"
25#define DRV_VERSION "0.4.4" 25#define DRV_VERSION "0.4.5"
26 26
27/** 27/**
28 * efar_pre_reset - Enable bits 28 * efar_pre_reset - Enable bits
@@ -98,18 +98,17 @@ static void efar_set_piomode (struct ata_port *ap, struct ata_device *adev)
98 { 2, 1 }, 98 { 2, 1 },
99 { 2, 3 }, }; 99 { 2, 3 }, };
100 100
101 if (pio > 2) 101 if (pio > 1)
102 control |= 1; /* TIME1 enable */ 102 control |= 1; /* TIME */
103 if (ata_pio_need_iordy(adev)) /* PIO 3/4 require IORDY */ 103 if (ata_pio_need_iordy(adev)) /* PIO 3/4 require IORDY */
104 control |= 2; /* IE enable */ 104 control |= 2; /* IE */
105 /* Intel specifies that the PPE functionality is for disk only */ 105 /* Intel specifies that the prefetch/posting is for disk only */
106 if (adev->class == ATA_DEV_ATA) 106 if (adev->class == ATA_DEV_ATA)
107 control |= 4; /* PPE enable */ 107 control |= 4; /* PPE */
108 108
109 pci_read_config_word(dev, idetm_port, &idetm_data); 109 pci_read_config_word(dev, idetm_port, &idetm_data);
110 110
111 /* Enable PPE, IE and TIME as appropriate */ 111 /* Set PPE, IE, and TIME as appropriate */
112
113 if (adev->devno == 0) { 112 if (adev->devno == 0) {
114 idetm_data &= 0xCCF0; 113 idetm_data &= 0xCCF0;
115 idetm_data |= control; 114 idetm_data |= control;
@@ -129,7 +128,7 @@ static void efar_set_piomode (struct ata_port *ap, struct ata_device *adev)
129 pci_write_config_byte(dev, 0x44, slave_data); 128 pci_write_config_byte(dev, 0x44, slave_data);
130 } 129 }
131 130
132 idetm_data |= 0x4000; /* Ensure SITRE is enabled */ 131 idetm_data |= 0x4000; /* Ensure SITRE is set */
133 pci_write_config_word(dev, idetm_port, idetm_data); 132 pci_write_config_word(dev, idetm_port, idetm_data);
134} 133}
135 134
diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index f72c6c5b820f..6932e56d179c 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -48,6 +48,7 @@
48 * 48 *
49 */ 49 */
50 50
51#include <linux/async.h>
51#include <linux/kernel.h> 52#include <linux/kernel.h>
52#include <linux/module.h> 53#include <linux/module.h>
53#include <linux/pci.h> 54#include <linux/pci.h>
@@ -1028,6 +1029,7 @@ static __init int legacy_init_one(struct legacy_probe *probe)
1028 &legacy_sht); 1029 &legacy_sht);
1029 if (ret) 1030 if (ret)
1030 goto fail; 1031 goto fail;
1032 async_synchronize_full();
1031 ld->platform_dev = pdev; 1033 ld->platform_dev = pdev;
1032 1034
1033 /* Nothing found means we drop the port as its probably not there */ 1035 /* Nothing found means we drop the port as its probably not there */
diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c
index bdb236957cb9..f0d52f72f5bb 100644
--- a/drivers/ata/pata_netcell.c
+++ b/drivers/ata/pata_netcell.c
@@ -20,13 +20,24 @@
20 20
21/* No PIO or DMA methods needed for this device */ 21/* No PIO or DMA methods needed for this device */
22 22
23static unsigned int netcell_read_id(struct ata_device *adev,
24 struct ata_taskfile *tf, u16 *id)
25{
26 unsigned int err_mask = ata_do_dev_read_id(adev, tf, id);
27 /* Firmware forgets to mark words 85-87 valid */
28 if (err_mask == 0)
29 id[ATA_ID_CSF_DEFAULT] |= 0x4000;
30 return err_mask;
31}
32
23static struct scsi_host_template netcell_sht = { 33static struct scsi_host_template netcell_sht = {
24 ATA_BMDMA_SHT(DRV_NAME), 34 ATA_BMDMA_SHT(DRV_NAME),
25}; 35};
26 36
27static struct ata_port_operations netcell_ops = { 37static struct ata_port_operations netcell_ops = {
28 .inherits = &ata_bmdma_port_ops, 38 .inherits = &ata_bmdma_port_ops,
29 .cable_detect = ata_cable_80wire, 39 .cable_detect = ata_cable_80wire,
40 .read_id = netcell_read_id,
30}; 41};
31 42
32 43
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index dc030f1f00f1..c6599618523e 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -700,8 +700,10 @@ int bus_add_driver(struct device_driver *drv)
700 } 700 }
701 701
702 kobject_uevent(&priv->kobj, KOBJ_ADD); 702 kobject_uevent(&priv->kobj, KOBJ_ADD);
703 return error; 703 return 0;
704out_unregister: 704out_unregister:
705 kfree(drv->p);
706 drv->p = NULL;
705 kobject_put(&priv->kobj); 707 kobject_put(&priv->kobj);
706out_put_bus: 708out_put_bus:
707 bus_put(bus); 709 bus_put(bus);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 4aa527b8a913..1977d4beb89e 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -879,7 +879,7 @@ int device_add(struct device *dev)
879 } 879 }
880 880
881 if (!dev_name(dev)) 881 if (!dev_name(dev))
882 goto done; 882 goto name_error;
883 883
884 pr_debug("device: '%s': %s\n", dev_name(dev), __func__); 884 pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
885 885
@@ -978,6 +978,9 @@ done:
978 cleanup_device_parent(dev); 978 cleanup_device_parent(dev);
979 if (parent) 979 if (parent)
980 put_device(parent); 980 put_device(parent);
981name_error:
982 kfree(dev->p);
983 dev->p = NULL;
981 goto done; 984 goto done;
982} 985}
983 986
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index c51f11bb29ae..8ae0f63602e0 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -257,6 +257,10 @@ EXPORT_SYMBOL_GPL(driver_register);
257 */ 257 */
258void driver_unregister(struct device_driver *drv) 258void driver_unregister(struct device_driver *drv)
259{ 259{
260 if (!drv || !drv->p) {
261 WARN(1, "Unexpected driver unregister!\n");
262 return;
263 }
260 driver_remove_groups(drv, drv->groups); 264 driver_remove_groups(drv, drv->groups);
261 bus_remove_driver(drv); 265 bus_remove_driver(drv);
262} 266}
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 69b4ddb7de3b..3e4bc699bc0f 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -357,6 +357,7 @@ static void dpm_power_up(pm_message_t state)
357{ 357{
358 struct device *dev; 358 struct device *dev;
359 359
360 mutex_lock(&dpm_list_mtx);
360 list_for_each_entry(dev, &dpm_list, power.entry) 361 list_for_each_entry(dev, &dpm_list, power.entry)
361 if (dev->power.status > DPM_OFF) { 362 if (dev->power.status > DPM_OFF) {
362 int error; 363 int error;
@@ -366,6 +367,7 @@ static void dpm_power_up(pm_message_t state)
366 if (error) 367 if (error)
367 pm_dev_err(dev, state, " early", error); 368 pm_dev_err(dev, state, " early", error);
368 } 369 }
370 mutex_unlock(&dpm_list_mtx);
369} 371}
370 372
371/** 373/**
@@ -614,6 +616,7 @@ int device_power_down(pm_message_t state)
614 int error = 0; 616 int error = 0;
615 617
616 suspend_device_irqs(); 618 suspend_device_irqs();
619 mutex_lock(&dpm_list_mtx);
617 list_for_each_entry_reverse(dev, &dpm_list, power.entry) { 620 list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
618 error = suspend_device_noirq(dev, state); 621 error = suspend_device_noirq(dev, state);
619 if (error) { 622 if (error) {
@@ -622,6 +625,7 @@ int device_power_down(pm_message_t state)
622 } 625 }
623 dev->power.status = DPM_OFF_IRQ; 626 dev->power.status = DPM_OFF_IRQ;
624 } 627 }
628 mutex_unlock(&dpm_list_mtx);
625 if (error) 629 if (error)
626 device_power_up(resume_event(state)); 630 device_power_up(resume_event(state));
627 return error; 631 return error;
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 340ba4f9dc54..4a9f3492b921 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -224,7 +224,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp)
224 break; 224 break;
225 } 225 }
226 226
227 gsi = acpi_register_gsi(irq, ACPI_LEVEL_SENSITIVE, 227 gsi = acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE,
228 ACPI_ACTIVE_LOW); 228 ACPI_ACTIVE_LOW);
229 if (gsi > 0) 229 if (gsi > 0)
230 break; 230 break;
@@ -939,7 +939,7 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
939 irqp = &res->data.extended_irq; 939 irqp = &res->data.extended_irq;
940 940
941 for (i = 0; i < irqp->interrupt_count; i++) { 941 for (i = 0; i < irqp->interrupt_count; i++) {
942 irq = acpi_register_gsi(irqp->interrupts[i], 942 irq = acpi_register_gsi(NULL, irqp->interrupts[i],
943 irqp->triggering, irqp->polarity); 943 irqp->triggering, irqp->polarity);
944 if (irq < 0) 944 if (irq < 0)
945 return AE_ERROR; 945 return AE_ERROR;
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index aa83a0865ec1..09050797c76a 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -2856,6 +2856,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
2856 /* Assume a single IPMB channel at zero. */ 2856 /* Assume a single IPMB channel at zero. */
2857 intf->channels[0].medium = IPMI_CHANNEL_MEDIUM_IPMB; 2857 intf->channels[0].medium = IPMI_CHANNEL_MEDIUM_IPMB;
2858 intf->channels[0].protocol = IPMI_CHANNEL_PROTOCOL_IPMB; 2858 intf->channels[0].protocol = IPMI_CHANNEL_PROTOCOL_IPMB;
2859 intf->curr_channel = IPMI_MAX_CHANNELS;
2859 } 2860 }
2860 2861
2861 if (rv == 0) 2862 if (rv == 0)
@@ -3648,13 +3649,13 @@ static int handle_new_recv_msg(ipmi_smi_t intf,
3648 } 3649 }
3649 3650
3650 /* 3651 /*
3651 ** We need to make sure the channels have been initialized. 3652 * We need to make sure the channels have been initialized.
3652 ** The channel_handler routine will set the "curr_channel" 3653 * The channel_handler routine will set the "curr_channel"
3653 ** equal to or greater than IPMI_MAX_CHANNELS when all the 3654 * equal to or greater than IPMI_MAX_CHANNELS when all the
3654 ** channels for this interface have been initialized. 3655 * channels for this interface have been initialized.
3655 */ 3656 */
3656 if (intf->curr_channel < IPMI_MAX_CHANNELS) { 3657 if (intf->curr_channel < IPMI_MAX_CHANNELS) {
3657 requeue = 1; /* Just put the message back for now */ 3658 requeue = 0; /* Throw the message away */
3658 goto out; 3659 goto out;
3659 } 3660 }
3660 3661
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 8f05c38c2f06..f96d0bef855e 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -694,6 +694,8 @@ static ssize_t read_zero(struct file * file, char __user * buf,
694 written += chunk - unwritten; 694 written += chunk - unwritten;
695 if (unwritten) 695 if (unwritten)
696 break; 696 break;
697 if (signal_pending(current))
698 return written ? written : -ERESTARTSYS;
697 buf += chunk; 699 buf += chunk;
698 count -= chunk; 700 count -= chunk;
699 cond_resched(); 701 cond_resched();
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index a420e8d437dd..13f8871e5b21 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -2711,7 +2711,7 @@ static int __init mxser_module_init(void)
2711 continue; 2711 continue;
2712 2712
2713 brd = &mxser_boards[m]; 2713 brd = &mxser_boards[m];
2714 retval = mxser_get_ISA_conf(!ioaddr[b], brd); 2714 retval = mxser_get_ISA_conf(ioaddr[b], brd);
2715 if (retval <= 0) { 2715 if (retval <= 0) {
2716 brd->info = NULL; 2716 brd->info = NULL;
2717 continue; 2717 continue;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index d270e8eb3e67..6e2ec0b18948 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -808,7 +808,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
808 ret = -ENOMEM; 808 ret = -ENOMEM;
809 goto nomem_out; 809 goto nomem_out;
810 } 810 }
811 if (!alloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { 811 if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
812 free_cpumask_var(policy->cpus); 812 free_cpumask_var(policy->cpus);
813 kfree(policy); 813 kfree(policy);
814 ret = -ENOMEM; 814 ret = -ENOMEM;
@@ -1070,11 +1070,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
1070 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 1070 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1071#endif 1071#endif
1072 1072
1073 unlock_policy_rwsem_write(cpu);
1074
1073 if (cpufreq_driver->target) 1075 if (cpufreq_driver->target)
1074 __cpufreq_governor(data, CPUFREQ_GOV_STOP); 1076 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1075 1077
1076 unlock_policy_rwsem_write(cpu);
1077
1078 kobject_put(&data->kobj); 1078 kobject_put(&data->kobj);
1079 1079
1080 /* we need to make sure that the underlying kobj is actually 1080 /* we need to make sure that the underlying kobj is actually
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 2ecd95e4ab1a..7a74d175287b 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -91,6 +91,9 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */
91 * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then 91 * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
92 * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock 92 * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
93 * is recursive for the same process. -Venki 93 * is recursive for the same process. -Venki
94 * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it
95 * would deadlock with cancel_delayed_work_sync(), which is needed for proper
96 * raceless workqueue teardown.
94 */ 97 */
95static DEFINE_MUTEX(dbs_mutex); 98static DEFINE_MUTEX(dbs_mutex);
96 99
@@ -542,7 +545,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
542static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 545static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
543{ 546{
544 dbs_info->enable = 0; 547 dbs_info->enable = 0;
545 cancel_delayed_work(&dbs_info->work); 548 cancel_delayed_work_sync(&dbs_info->work);
546} 549}
547 550
548static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 551static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 338f428a15b7..e741c339df76 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -98,6 +98,9 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */
98 * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then 98 * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
99 * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock 99 * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
100 * is recursive for the same process. -Venki 100 * is recursive for the same process. -Venki
101 * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it
102 * would deadlock with cancel_delayed_work_sync(), which is needed for proper
103 * raceless workqueue teardown.
101 */ 104 */
102static DEFINE_MUTEX(dbs_mutex); 105static DEFINE_MUTEX(dbs_mutex);
103 106
@@ -562,7 +565,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
562static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 565static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
563{ 566{
564 dbs_info->enable = 0; 567 dbs_info->enable = 0;
565 cancel_delayed_work(&dbs_info->work); 568 cancel_delayed_work_sync(&dbs_info->work);
566} 569}
567 570
568static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 571static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index da8a8ed9e411..f18d1bde0439 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -179,9 +179,14 @@ static void dma_halt(struct fsl_dma_chan *fsl_chan)
179static void set_ld_eol(struct fsl_dma_chan *fsl_chan, 179static void set_ld_eol(struct fsl_dma_chan *fsl_chan,
180 struct fsl_desc_sw *desc) 180 struct fsl_desc_sw *desc)
181{ 181{
182 u64 snoop_bits;
183
184 snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
185 ? FSL_DMA_SNEN : 0;
186
182 desc->hw.next_ln_addr = CPU_TO_DMA(fsl_chan, 187 desc->hw.next_ln_addr = CPU_TO_DMA(fsl_chan,
183 DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL, 188 DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
184 64); 189 | snoop_bits, 64);
185} 190}
186 191
187static void append_ld_queue(struct fsl_dma_chan *fsl_chan, 192static void append_ld_queue(struct fsl_dma_chan *fsl_chan,
@@ -313,8 +318,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
313 318
314static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) 319static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
315{ 320{
316 struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
317 struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan); 321 struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
322 struct fsl_desc_sw *desc;
318 unsigned long flags; 323 unsigned long flags;
319 dma_cookie_t cookie; 324 dma_cookie_t cookie;
320 325
@@ -322,14 +327,17 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
322 spin_lock_irqsave(&fsl_chan->desc_lock, flags); 327 spin_lock_irqsave(&fsl_chan->desc_lock, flags);
323 328
324 cookie = fsl_chan->common.cookie; 329 cookie = fsl_chan->common.cookie;
325 cookie++; 330 list_for_each_entry(desc, &tx->tx_list, node) {
326 if (cookie < 0) 331 cookie++;
327 cookie = 1; 332 if (cookie < 0)
328 desc->async_tx.cookie = cookie; 333 cookie = 1;
329 fsl_chan->common.cookie = desc->async_tx.cookie;
330 334
331 append_ld_queue(fsl_chan, desc); 335 desc->async_tx.cookie = cookie;
332 list_splice_init(&desc->async_tx.tx_list, fsl_chan->ld_queue.prev); 336 }
337
338 fsl_chan->common.cookie = cookie;
339 append_ld_queue(fsl_chan, tx_to_fsl_desc(tx));
340 list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev);
333 341
334 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); 342 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
335 343
@@ -454,8 +462,8 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
454{ 462{
455 struct fsl_dma_chan *fsl_chan; 463 struct fsl_dma_chan *fsl_chan;
456 struct fsl_desc_sw *first = NULL, *prev = NULL, *new; 464 struct fsl_desc_sw *first = NULL, *prev = NULL, *new;
465 struct list_head *list;
457 size_t copy; 466 size_t copy;
458 LIST_HEAD(link_chain);
459 467
460 if (!chan) 468 if (!chan)
461 return NULL; 469 return NULL;
@@ -472,7 +480,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
472 if (!new) { 480 if (!new) {
473 dev_err(fsl_chan->dev, 481 dev_err(fsl_chan->dev,
474 "No free memory for link descriptor\n"); 482 "No free memory for link descriptor\n");
475 return NULL; 483 goto fail;
476 } 484 }
477#ifdef FSL_DMA_LD_DEBUG 485#ifdef FSL_DMA_LD_DEBUG
478 dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new); 486 dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
@@ -507,7 +515,19 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
507 /* Set End-of-link to the last link descriptor of new list*/ 515 /* Set End-of-link to the last link descriptor of new list*/
508 set_ld_eol(fsl_chan, new); 516 set_ld_eol(fsl_chan, new);
509 517
510 return first ? &first->async_tx : NULL; 518 return &first->async_tx;
519
520fail:
521 if (!first)
522 return NULL;
523
524 list = &first->async_tx.tx_list;
525 list_for_each_entry_safe_reverse(new, prev, list, node) {
526 list_del(&new->node);
527 dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
528 }
529
530 return NULL;
511} 531}
512 532
513/** 533/**
@@ -598,15 +618,16 @@ static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan)
598 dma_addr_t next_dest_addr; 618 dma_addr_t next_dest_addr;
599 unsigned long flags; 619 unsigned long flags;
600 620
621 spin_lock_irqsave(&fsl_chan->desc_lock, flags);
622
601 if (!dma_is_idle(fsl_chan)) 623 if (!dma_is_idle(fsl_chan))
602 return; 624 goto out_unlock;
603 625
604 dma_halt(fsl_chan); 626 dma_halt(fsl_chan);
605 627
606 /* If there are some link descriptors 628 /* If there are some link descriptors
607 * not transfered in queue. We need to start it. 629 * not transfered in queue. We need to start it.
608 */ 630 */
609 spin_lock_irqsave(&fsl_chan->desc_lock, flags);
610 631
611 /* Find the first un-transfer desciptor */ 632 /* Find the first un-transfer desciptor */
612 for (ld_node = fsl_chan->ld_queue.next; 633 for (ld_node = fsl_chan->ld_queue.next;
@@ -617,19 +638,20 @@ static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan)
617 fsl_chan->common.cookie) == DMA_SUCCESS); 638 fsl_chan->common.cookie) == DMA_SUCCESS);
618 ld_node = ld_node->next); 639 ld_node = ld_node->next);
619 640
620 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
621
622 if (ld_node != &fsl_chan->ld_queue) { 641 if (ld_node != &fsl_chan->ld_queue) {
623 /* Get the ld start address from ld_queue */ 642 /* Get the ld start address from ld_queue */
624 next_dest_addr = to_fsl_desc(ld_node)->async_tx.phys; 643 next_dest_addr = to_fsl_desc(ld_node)->async_tx.phys;
625 dev_dbg(fsl_chan->dev, "xfer LDs staring from %p\n", 644 dev_dbg(fsl_chan->dev, "xfer LDs staring from 0x%llx\n",
626 (void *)next_dest_addr); 645 (unsigned long long)next_dest_addr);
627 set_cdar(fsl_chan, next_dest_addr); 646 set_cdar(fsl_chan, next_dest_addr);
628 dma_start(fsl_chan); 647 dma_start(fsl_chan);
629 } else { 648 } else {
630 set_cdar(fsl_chan, 0); 649 set_cdar(fsl_chan, 0);
631 set_ndar(fsl_chan, 0); 650 set_ndar(fsl_chan, 0);
632 } 651 }
652
653out_unlock:
654 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
633} 655}
634 656
635/** 657/**
@@ -734,8 +756,9 @@ static irqreturn_t fsl_dma_chan_do_interrupt(int irq, void *data)
734 */ 756 */
735 if (stat & FSL_DMA_SR_EOSI) { 757 if (stat & FSL_DMA_SR_EOSI) {
736 dev_dbg(fsl_chan->dev, "event: End-of-segments INT\n"); 758 dev_dbg(fsl_chan->dev, "event: End-of-segments INT\n");
737 dev_dbg(fsl_chan->dev, "event: clndar %p, nlndar %p\n", 759 dev_dbg(fsl_chan->dev, "event: clndar 0x%llx, nlndar 0x%llx\n",
738 (void *)get_cdar(fsl_chan), (void *)get_ndar(fsl_chan)); 760 (unsigned long long)get_cdar(fsl_chan),
761 (unsigned long long)get_ndar(fsl_chan));
739 stat &= ~FSL_DMA_SR_EOSI; 762 stat &= ~FSL_DMA_SR_EOSI;
740 update_cookie = 1; 763 update_cookie = 1;
741 } 764 }
@@ -830,7 +853,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
830 new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1); 853 new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
831 854
832 new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7; 855 new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
833 if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) { 856 if (new_fsl_chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
834 dev_err(fdev->dev, "There is no %d channel!\n", 857 dev_err(fdev->dev, "There is no %d channel!\n",
835 new_fsl_chan->id); 858 new_fsl_chan->id);
836 err = -EINVAL; 859 err = -EINVAL;
@@ -925,8 +948,8 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
925 } 948 }
926 949
927 dev_info(&dev->dev, "Probe the Freescale DMA driver for %s " 950 dev_info(&dev->dev, "Probe the Freescale DMA driver for %s "
928 "controller at %p...\n", 951 "controller at 0x%llx...\n",
929 match->compatible, (void *)fdev->reg.start); 952 match->compatible, (unsigned long long)fdev->reg.start);
930 fdev->reg_base = ioremap(fdev->reg.start, fdev->reg.end 953 fdev->reg_base = ioremap(fdev->reg.start, fdev->reg.end
931 - fdev->reg.start + 1); 954 - fdev->reg.start + 1);
932 955
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index 1955ee8d6d20..a600fc0f7962 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -173,7 +173,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
173 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); 173 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
174 174
175#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL 175#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
176 if (i7300_idle_platform_probe(NULL, NULL) == 0) { 176 if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) {
177 device->common.chancnt--; 177 device->common.chancnt--;
178 } 178 }
179#endif 179#endif
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index e5f5c5a8ba6c..956982f8739b 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -192,16 +192,20 @@ config EDAC_PPC4XX
192 192
193config EDAC_AMD8131 193config EDAC_AMD8131
194 tristate "AMD8131 HyperTransport PCI-X Tunnel" 194 tristate "AMD8131 HyperTransport PCI-X Tunnel"
195 depends on EDAC_MM_EDAC && PCI 195 depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
196 help 196 help
197 Support for error detection and correction on the 197 Support for error detection and correction on the
198 AMD8131 HyperTransport PCI-X Tunnel chip. 198 AMD8131 HyperTransport PCI-X Tunnel chip.
199 Note, add more Kconfig dependency if it's adopted
200 on some machine other than Maple.
199 201
200config EDAC_AMD8111 202config EDAC_AMD8111
201 tristate "AMD8111 HyperTransport I/O Hub" 203 tristate "AMD8111 HyperTransport I/O Hub"
202 depends on EDAC_MM_EDAC && PCI 204 depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
203 help 205 help
204 Support for error detection and correction on the 206 Support for error detection and correction on the
205 AMD8111 HyperTransport I/O Hub chip. 207 AMD8111 HyperTransport I/O Hub chip.
208 Note, add more Kconfig dependency if it's adopted
209 on some machine other than Maple.
206 210
207endif # EDAC 211endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index a5fdcf02f591..59076819135d 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -35,3 +35,5 @@ obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac.o
35obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o 35obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o
36obj-$(CONFIG_EDAC_CELL) += cell_edac.o 36obj-$(CONFIG_EDAC_CELL) += cell_edac.o
37obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o 37obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
38obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
39obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c
index 614692181120..2cb58ef743e0 100644
--- a/drivers/edac/amd8111_edac.c
+++ b/drivers/edac/amd8111_edac.c
@@ -389,7 +389,7 @@ static int amd8111_dev_probe(struct pci_dev *dev,
389 dev_info->edac_dev->dev = &dev_info->dev->dev; 389 dev_info->edac_dev->dev = &dev_info->dev->dev;
390 dev_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR; 390 dev_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR;
391 dev_info->edac_dev->ctl_name = dev_info->ctl_name; 391 dev_info->edac_dev->ctl_name = dev_info->ctl_name;
392 dev_info->edac_dev->dev_name = dev_info->dev->dev.bus_id; 392 dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev);
393 393
394 if (edac_op_state == EDAC_OPSTATE_POLL) 394 if (edac_op_state == EDAC_OPSTATE_POLL)
395 dev_info->edac_dev->edac_check = dev_info->check; 395 dev_info->edac_dev->edac_check = dev_info->check;
@@ -473,7 +473,7 @@ static int amd8111_pci_probe(struct pci_dev *dev,
473 pci_info->edac_dev->dev = &pci_info->dev->dev; 473 pci_info->edac_dev->dev = &pci_info->dev->dev;
474 pci_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR; 474 pci_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR;
475 pci_info->edac_dev->ctl_name = pci_info->ctl_name; 475 pci_info->edac_dev->ctl_name = pci_info->ctl_name;
476 pci_info->edac_dev->dev_name = pci_info->dev->dev.bus_id; 476 pci_info->edac_dev->dev_name = dev_name(&pci_info->dev->dev);
477 477
478 if (edac_op_state == EDAC_OPSTATE_POLL) 478 if (edac_op_state == EDAC_OPSTATE_POLL)
479 pci_info->edac_dev->edac_check = pci_info->check; 479 pci_info->edac_dev->edac_check = pci_info->check;
diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c
index c083b31cac5a..b432d60c622a 100644
--- a/drivers/edac/amd8131_edac.c
+++ b/drivers/edac/amd8131_edac.c
@@ -287,7 +287,7 @@ static int amd8131_probe(struct pci_dev *dev, const struct pci_device_id *id)
287 dev_info->edac_dev->dev = &dev_info->dev->dev; 287 dev_info->edac_dev->dev = &dev_info->dev->dev;
288 dev_info->edac_dev->mod_name = AMD8131_EDAC_MOD_STR; 288 dev_info->edac_dev->mod_name = AMD8131_EDAC_MOD_STR;
289 dev_info->edac_dev->ctl_name = dev_info->ctl_name; 289 dev_info->edac_dev->ctl_name = dev_info->ctl_name;
290 dev_info->edac_dev->dev_name = dev_info->dev->dev.bus_id; 290 dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev);
291 291
292 if (edac_op_state == EDAC_OPSTATE_POLL) 292 if (edac_op_state == EDAC_OPSTATE_POLL)
293 dev_info->edac_dev->edac_check = amd8131_chipset.check; 293 dev_info->edac_dev->edac_check = amd8131_chipset.check;
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 4cd35d8fd799..f5d46e7199d4 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -67,12 +67,18 @@ config DRM_I830
67 will load the correct one. 67 will load the correct one.
68 68
69config DRM_I915 69config DRM_I915
70 tristate "i915 driver"
70 select FB_CFB_FILLRECT 71 select FB_CFB_FILLRECT
71 select FB_CFB_COPYAREA 72 select FB_CFB_COPYAREA
72 select FB_CFB_IMAGEBLIT 73 select FB_CFB_IMAGEBLIT
73 select FB 74 select FB
74 select FRAMEBUFFER_CONSOLE if !EMBEDDED 75 select FRAMEBUFFER_CONSOLE if !EMBEDDED
75 tristate "i915 driver" 76 # i915 depends on ACPI_VIDEO when ACPI is enabled
77 # but for select to work, need to select ACPI_VIDEO's dependencies, ick
78 select VIDEO_OUTPUT_CONTROL if ACPI
79 select BACKLIGHT_CLASS_DEVICE if ACPI
80 select INPUT if ACPI
81 select ACPI_VIDEO if ACPI
76 help 82 help
77 Choose this option if you have a system that has Intel 830M, 845G, 83 Choose this option if you have a system that has Intel 830M, 845G,
78 852GM, 855GM 865G or 915G integrated graphics. If M is selected, the 84 852GM, 855GM 865G or 915G integrated graphics. If M is selected, the
@@ -84,12 +90,6 @@ config DRM_I915
84config DRM_I915_KMS 90config DRM_I915_KMS
85 bool "Enable modesetting on intel by default" 91 bool "Enable modesetting on intel by default"
86 depends on DRM_I915 92 depends on DRM_I915
87 # i915 KMS depends on ACPI_VIDEO when ACPI is enabled
88 # but for select to work, need to select ACPI_VIDEO's dependencies, ick
89 select VIDEO_OUTPUT_CONTROL if ACPI
90 select BACKLIGHT_CLASS_DEVICE if ACPI
91 select INPUT if ACPI
92 select ACPI_VIDEO if ACPI
93 help 93 help
94 Choose this option if you want kernel modesetting enabled by default, 94 Choose this option if you want kernel modesetting enabled by default,
95 and you have a new enough userspace to support this. Running old 95 and you have a new enough userspace to support this. Running old
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 94a768871734..8fab7890a363 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -2294,7 +2294,12 @@ int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
2294 } 2294 }
2295 } 2295 }
2296 2296
2297 if (connector->funcs->set_property) 2297 /* Do DPMS ourselves */
2298 if (property == connector->dev->mode_config.dpms_property) {
2299 if (connector->funcs->dpms)
2300 (*connector->funcs->dpms)(connector, (int) out_resp->value);
2301 ret = 0;
2302 } else if (connector->funcs->set_property)
2298 ret = connector->funcs->set_property(connector, property, out_resp->value); 2303 ret = connector->funcs->set_property(connector, property, out_resp->value);
2299 2304
2300 /* store the property value if succesful */ 2305 /* store the property value if succesful */
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 45890447feec..a6f73f1e99d9 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -199,6 +199,29 @@ static void drm_helper_add_std_modes(struct drm_device *dev,
199} 199}
200 200
201/** 201/**
202 * drm_helper_encoder_in_use - check if a given encoder is in use
203 * @encoder: encoder to check
204 *
205 * LOCKING:
206 * Caller must hold mode config lock.
207 *
208 * Walk @encoders's DRM device's mode_config and see if it's in use.
209 *
210 * RETURNS:
211 * True if @encoder is part of the mode_config, false otherwise.
212 */
213bool drm_helper_encoder_in_use(struct drm_encoder *encoder)
214{
215 struct drm_connector *connector;
216 struct drm_device *dev = encoder->dev;
217 list_for_each_entry(connector, &dev->mode_config.connector_list, head)
218 if (connector->encoder == encoder)
219 return true;
220 return false;
221}
222EXPORT_SYMBOL(drm_helper_encoder_in_use);
223
224/**
202 * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config 225 * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config
203 * @crtc: CRTC to check 226 * @crtc: CRTC to check
204 * 227 *
@@ -216,7 +239,7 @@ bool drm_helper_crtc_in_use(struct drm_crtc *crtc)
216 struct drm_device *dev = crtc->dev; 239 struct drm_device *dev = crtc->dev;
217 /* FIXME: Locking around list access? */ 240 /* FIXME: Locking around list access? */
218 list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) 241 list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
219 if (encoder->crtc == crtc) 242 if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder))
220 return true; 243 return true;
221 return false; 244 return false;
222} 245}
@@ -240,7 +263,7 @@ void drm_helper_disable_unused_functions(struct drm_device *dev)
240 263
241 list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { 264 list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
242 encoder_funcs = encoder->helper_private; 265 encoder_funcs = encoder->helper_private;
243 if (!encoder->crtc) 266 if (!drm_helper_encoder_in_use(encoder))
244 (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF); 267 (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
245 } 268 }
246 269
@@ -935,6 +958,88 @@ bool drm_helper_initial_config(struct drm_device *dev)
935} 958}
936EXPORT_SYMBOL(drm_helper_initial_config); 959EXPORT_SYMBOL(drm_helper_initial_config);
937 960
961static int drm_helper_choose_encoder_dpms(struct drm_encoder *encoder)
962{
963 int dpms = DRM_MODE_DPMS_OFF;
964 struct drm_connector *connector;
965 struct drm_device *dev = encoder->dev;
966
967 list_for_each_entry(connector, &dev->mode_config.connector_list, head)
968 if (connector->encoder == encoder)
969 if (connector->dpms < dpms)
970 dpms = connector->dpms;
971 return dpms;
972}
973
974static int drm_helper_choose_crtc_dpms(struct drm_crtc *crtc)
975{
976 int dpms = DRM_MODE_DPMS_OFF;
977 struct drm_connector *connector;
978 struct drm_device *dev = crtc->dev;
979
980 list_for_each_entry(connector, &dev->mode_config.connector_list, head)
981 if (connector->encoder && connector->encoder->crtc == crtc)
982 if (connector->dpms < dpms)
983 dpms = connector->dpms;
984 return dpms;
985}
986
987/**
988 * drm_helper_connector_dpms
989 * @connector affected connector
990 * @mode DPMS mode
991 *
992 * Calls the low-level connector DPMS function, then
993 * calls appropriate encoder and crtc DPMS functions as well
994 */
995void drm_helper_connector_dpms(struct drm_connector *connector, int mode)
996{
997 struct drm_encoder *encoder = connector->encoder;
998 struct drm_crtc *crtc = encoder ? encoder->crtc : NULL;
999 int old_dpms;
1000
1001 if (mode == connector->dpms)
1002 return;
1003
1004 old_dpms = connector->dpms;
1005 connector->dpms = mode;
1006
1007 /* from off to on, do crtc then encoder */
1008 if (mode < old_dpms) {
1009 if (crtc) {
1010 struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
1011 if (crtc_funcs->dpms)
1012 (*crtc_funcs->dpms) (crtc,
1013 drm_helper_choose_crtc_dpms(crtc));
1014 }
1015 if (encoder) {
1016 struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
1017 if (encoder_funcs->dpms)
1018 (*encoder_funcs->dpms) (encoder,
1019 drm_helper_choose_encoder_dpms(encoder));
1020 }
1021 }
1022
1023 /* from on to off, do encoder then crtc */
1024 if (mode > old_dpms) {
1025 if (encoder) {
1026 struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
1027 if (encoder_funcs->dpms)
1028 (*encoder_funcs->dpms) (encoder,
1029 drm_helper_choose_encoder_dpms(encoder));
1030 }
1031 if (crtc) {
1032 struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
1033 if (crtc_funcs->dpms)
1034 (*crtc_funcs->dpms) (crtc,
1035 drm_helper_choose_crtc_dpms(crtc));
1036 }
1037 }
1038
1039 return;
1040}
1041EXPORT_SYMBOL(drm_helper_connector_dpms);
1042
938/** 1043/**
939 * drm_hotplug_stage_two 1044 * drm_hotplug_stage_two
940 * @dev DRM device 1045 * @dev DRM device
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index ca9c61656714..6f6b26479d82 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -289,6 +289,11 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
289 struct drm_display_mode *mode; 289 struct drm_display_mode *mode;
290 struct detailed_pixel_timing *pt = &timing->data.pixel_data; 290 struct detailed_pixel_timing *pt = &timing->data.pixel_data;
291 291
292 /* ignore tiny modes */
293 if (((pt->hactive_hi << 8) | pt->hactive_lo) < 64 ||
294 ((pt->vactive_hi << 8) | pt->hactive_lo) < 64)
295 return NULL;
296
292 if (pt->stereo) { 297 if (pt->stereo) {
293 printk(KERN_WARNING "stereo mode not supported\n"); 298 printk(KERN_WARNING "stereo mode not supported\n");
294 return NULL; 299 return NULL;
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 93e677a481f5..fc8e5acd9d9a 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -196,6 +196,7 @@ int drm_irq_install(struct drm_device *dev)
196{ 196{
197 int ret = 0; 197 int ret = 0;
198 unsigned long sh_flags = 0; 198 unsigned long sh_flags = 0;
199 char *irqname;
199 200
200 if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) 201 if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
201 return -EINVAL; 202 return -EINVAL;
@@ -227,8 +228,13 @@ int drm_irq_install(struct drm_device *dev)
227 if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED)) 228 if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED))
228 sh_flags = IRQF_SHARED; 229 sh_flags = IRQF_SHARED;
229 230
231 if (dev->devname)
232 irqname = dev->devname;
233 else
234 irqname = dev->driver->name;
235
230 ret = request_irq(drm_dev_to_irq(dev), dev->driver->irq_handler, 236 ret = request_irq(drm_dev_to_irq(dev), dev->driver->irq_handler,
231 sh_flags, dev->devname, dev); 237 sh_flags, irqname, dev);
232 238
233 if (ret < 0) { 239 if (ret < 0) {
234 mutex_lock(&dev->struct_mutex); 240 mutex_lock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 8f9372921f82..9987ab880835 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -147,7 +147,7 @@ static ssize_t status_show(struct device *device,
147 enum drm_connector_status status; 147 enum drm_connector_status status;
148 148
149 status = connector->funcs->detect(connector); 149 status = connector->funcs->detect(connector);
150 return snprintf(buf, PAGE_SIZE, "%s", 150 return snprintf(buf, PAGE_SIZE, "%s\n",
151 drm_get_connector_status_name(status)); 151 drm_get_connector_status_name(status));
152} 152}
153 153
@@ -166,7 +166,7 @@ static ssize_t dpms_show(struct device *device,
166 if (ret) 166 if (ret)
167 return 0; 167 return 0;
168 168
169 return snprintf(buf, PAGE_SIZE, "%s", 169 return snprintf(buf, PAGE_SIZE, "%s\n",
170 drm_get_dpms_name((int)dpms_status)); 170 drm_get_dpms_name((int)dpms_status));
171} 171}
172 172
@@ -176,7 +176,7 @@ static ssize_t enabled_show(struct device *device,
176{ 176{
177 struct drm_connector *connector = to_drm_connector(device); 177 struct drm_connector *connector = to_drm_connector(device);
178 178
179 return snprintf(buf, PAGE_SIZE, connector->encoder ? "enabled" : 179 return snprintf(buf, PAGE_SIZE, "%s\n", connector->encoder ? "enabled" :
180 "disabled"); 180 "disabled");
181} 181}
182 182
@@ -317,6 +317,7 @@ static struct device_attribute connector_attrs_opt1[] = {
317 317
318static struct bin_attribute edid_attr = { 318static struct bin_attribute edid_attr = {
319 .attr.name = "edid", 319 .attr.name = "edid",
320 .attr.mode = 0444,
320 .size = 128, 321 .size = 128,
321 .read = edid_show, 322 .read = edid_show,
322}; 323};
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 53d544552625..0ccb63ee50ee 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -987,12 +987,6 @@ static int i915_load_modeset_init(struct drm_device *dev)
987 int fb_bar = IS_I9XX(dev) ? 2 : 0; 987 int fb_bar = IS_I9XX(dev) ? 2 : 0;
988 int ret = 0; 988 int ret = 0;
989 989
990 dev->devname = kstrdup(DRIVER_NAME, GFP_KERNEL);
991 if (!dev->devname) {
992 ret = -ENOMEM;
993 goto out;
994 }
995
996 dev->mode_config.fb_base = drm_get_resource_start(dev, fb_bar) & 990 dev->mode_config.fb_base = drm_get_resource_start(dev, fb_bar) &
997 0xff000000; 991 0xff000000;
998 992
@@ -1006,7 +1000,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
1006 1000
1007 ret = i915_probe_agp(dev, &agp_size, &prealloc_size); 1001 ret = i915_probe_agp(dev, &agp_size, &prealloc_size);
1008 if (ret) 1002 if (ret)
1009 goto kfree_devname; 1003 goto out;
1010 1004
1011 /* Basic memrange allocator for stolen space (aka vram) */ 1005 /* Basic memrange allocator for stolen space (aka vram) */
1012 drm_mm_init(&dev_priv->vram, 0, prealloc_size); 1006 drm_mm_init(&dev_priv->vram, 0, prealloc_size);
@@ -1024,7 +1018,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
1024 1018
1025 ret = i915_gem_init_ringbuffer(dev); 1019 ret = i915_gem_init_ringbuffer(dev);
1026 if (ret) 1020 if (ret)
1027 goto kfree_devname; 1021 goto out;
1028 1022
1029 /* Allow hardware batchbuffers unless told otherwise. 1023 /* Allow hardware batchbuffers unless told otherwise.
1030 */ 1024 */
@@ -1056,8 +1050,6 @@ static int i915_load_modeset_init(struct drm_device *dev)
1056 1050
1057destroy_ringbuffer: 1051destroy_ringbuffer:
1058 i915_gem_cleanup_ringbuffer(dev); 1052 i915_gem_cleanup_ringbuffer(dev);
1059kfree_devname:
1060 kfree(dev->devname);
1061out: 1053out:
1062 return ret; 1054 return ret;
1063} 1055}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9b149fe824c3..c431fa54bbb5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -180,7 +180,8 @@ typedef struct drm_i915_private {
180 int backlight_duty_cycle; /* restore backlight to this value */ 180 int backlight_duty_cycle; /* restore backlight to this value */
181 bool panel_wants_dither; 181 bool panel_wants_dither;
182 struct drm_display_mode *panel_fixed_mode; 182 struct drm_display_mode *panel_fixed_mode;
183 struct drm_display_mode *vbt_mode; /* if any */ 183 struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
184 struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
184 185
185 /* Feature bits from the VBIOS */ 186 /* Feature bits from the VBIOS */
186 unsigned int int_tv_support:1; 187 unsigned int int_tv_support:1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b189b49c7602..39f5c658ef5e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -349,7 +349,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
349 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 349 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
350 num_pages = last_data_page - first_data_page + 1; 350 num_pages = last_data_page - first_data_page + 1;
351 351
352 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 352 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
353 if (user_pages == NULL) 353 if (user_pages == NULL)
354 return -ENOMEM; 354 return -ENOMEM;
355 355
@@ -429,7 +429,7 @@ fail_put_user_pages:
429 SetPageDirty(user_pages[i]); 429 SetPageDirty(user_pages[i]);
430 page_cache_release(user_pages[i]); 430 page_cache_release(user_pages[i]);
431 } 431 }
432 kfree(user_pages); 432 drm_free_large(user_pages);
433 433
434 return ret; 434 return ret;
435} 435}
@@ -649,7 +649,7 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
649 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 649 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
650 num_pages = last_data_page - first_data_page + 1; 650 num_pages = last_data_page - first_data_page + 1;
651 651
652 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 652 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
653 if (user_pages == NULL) 653 if (user_pages == NULL)
654 return -ENOMEM; 654 return -ENOMEM;
655 655
@@ -719,7 +719,7 @@ out_unlock:
719out_unpin_pages: 719out_unpin_pages:
720 for (i = 0; i < pinned_pages; i++) 720 for (i = 0; i < pinned_pages; i++)
721 page_cache_release(user_pages[i]); 721 page_cache_release(user_pages[i]);
722 kfree(user_pages); 722 drm_free_large(user_pages);
723 723
724 return ret; 724 return ret;
725} 725}
@@ -824,7 +824,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
824 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 824 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
825 num_pages = last_data_page - first_data_page + 1; 825 num_pages = last_data_page - first_data_page + 1;
826 826
827 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 827 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
828 if (user_pages == NULL) 828 if (user_pages == NULL)
829 return -ENOMEM; 829 return -ENOMEM;
830 830
@@ -902,7 +902,7 @@ fail_unlock:
902fail_put_user_pages: 902fail_put_user_pages:
903 for (i = 0; i < pinned_pages; i++) 903 for (i = 0; i < pinned_pages; i++)
904 page_cache_release(user_pages[i]); 904 page_cache_release(user_pages[i]);
905 kfree(user_pages); 905 drm_free_large(user_pages);
906 906
907 return ret; 907 return ret;
908} 908}
@@ -1145,7 +1145,14 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1145 mutex_unlock(&dev->struct_mutex); 1145 mutex_unlock(&dev->struct_mutex);
1146 return VM_FAULT_SIGBUS; 1146 return VM_FAULT_SIGBUS;
1147 } 1147 }
1148 list_add(&obj_priv->list, &dev_priv->mm.inactive_list); 1148
1149 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1150 if (ret) {
1151 mutex_unlock(&dev->struct_mutex);
1152 return VM_FAULT_SIGBUS;
1153 }
1154
1155 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1149 } 1156 }
1150 1157
1151 /* Need a new fence register? */ 1158 /* Need a new fence register? */
@@ -1375,7 +1382,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1375 mutex_unlock(&dev->struct_mutex); 1382 mutex_unlock(&dev->struct_mutex);
1376 return ret; 1383 return ret;
1377 } 1384 }
1378 list_add(&obj_priv->list, &dev_priv->mm.inactive_list); 1385 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1379 } 1386 }
1380 1387
1381 drm_gem_object_unreference(obj); 1388 drm_gem_object_unreference(obj);
@@ -1408,9 +1415,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
1408 } 1415 }
1409 obj_priv->dirty = 0; 1416 obj_priv->dirty = 0;
1410 1417
1411 drm_free(obj_priv->pages, 1418 drm_free_large(obj_priv->pages);
1412 page_count * sizeof(struct page *),
1413 DRM_MEM_DRIVER);
1414 obj_priv->pages = NULL; 1419 obj_priv->pages = NULL;
1415} 1420}
1416 1421
@@ -2024,8 +2029,7 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
2024 */ 2029 */
2025 page_count = obj->size / PAGE_SIZE; 2030 page_count = obj->size / PAGE_SIZE;
2026 BUG_ON(obj_priv->pages != NULL); 2031 BUG_ON(obj_priv->pages != NULL);
2027 obj_priv->pages = drm_calloc(page_count, sizeof(struct page *), 2032 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2028 DRM_MEM_DRIVER);
2029 if (obj_priv->pages == NULL) { 2033 if (obj_priv->pages == NULL) {
2030 DRM_ERROR("Faled to allocate page list\n"); 2034 DRM_ERROR("Faled to allocate page list\n");
2031 obj_priv->pages_refcount--; 2035 obj_priv->pages_refcount--;
@@ -2131,8 +2135,10 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2131 return; 2135 return;
2132 } 2136 }
2133 2137
2134 pitch_val = (obj_priv->stride / 128) - 1; 2138 pitch_val = obj_priv->stride / 128;
2135 WARN_ON(pitch_val & ~0x0000000f); 2139 pitch_val = ffs(pitch_val) - 1;
2140 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2141
2136 val = obj_priv->gtt_offset; 2142 val = obj_priv->gtt_offset;
2137 if (obj_priv->tiling_mode == I915_TILING_Y) 2143 if (obj_priv->tiling_mode == I915_TILING_Y)
2138 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2144 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
@@ -2254,9 +2260,6 @@ try_again:
2254 goto try_again; 2260 goto try_again;
2255 } 2261 }
2256 2262
2257 BUG_ON(old_obj_priv->active ||
2258 (reg->obj->write_domain & I915_GEM_GPU_DOMAINS));
2259
2260 /* 2263 /*
2261 * Zap this virtual mapping so we can set up a fence again 2264 * Zap this virtual mapping so we can set up a fence again
2262 * for this object next time we need it. 2265 * for this object next time we need it.
@@ -2424,6 +2427,16 @@ i915_gem_clflush_object(struct drm_gem_object *obj)
2424 if (obj_priv->pages == NULL) 2427 if (obj_priv->pages == NULL)
2425 return; 2428 return;
2426 2429
2430 /* XXX: The 865 in particular appears to be weird in how it handles
2431 * cache flushing. We haven't figured it out, but the
2432 * clflush+agp_chipset_flush doesn't appear to successfully get the
2433 * data visible to the PGU, while wbinvd + agp_chipset_flush does.
2434 */
2435 if (IS_I865G(obj->dev)) {
2436 wbinvd();
2437 return;
2438 }
2439
2427 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); 2440 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2428} 2441}
2429 2442
@@ -3111,7 +3124,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
3111 reloc_count += exec_list[i].relocation_count; 3124 reloc_count += exec_list[i].relocation_count;
3112 } 3125 }
3113 3126
3114 *relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER); 3127 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
3115 if (*relocs == NULL) 3128 if (*relocs == NULL)
3116 return -ENOMEM; 3129 return -ENOMEM;
3117 3130
@@ -3125,8 +3138,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
3125 exec_list[i].relocation_count * 3138 exec_list[i].relocation_count *
3126 sizeof(**relocs)); 3139 sizeof(**relocs));
3127 if (ret != 0) { 3140 if (ret != 0) {
3128 drm_free(*relocs, reloc_count * sizeof(**relocs), 3141 drm_free_large(*relocs);
3129 DRM_MEM_DRIVER);
3130 *relocs = NULL; 3142 *relocs = NULL;
3131 return -EFAULT; 3143 return -EFAULT;
3132 } 3144 }
@@ -3165,7 +3177,7 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
3165 } 3177 }
3166 3178
3167err: 3179err:
3168 drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER); 3180 drm_free_large(relocs);
3169 3181
3170 return ret; 3182 return ret;
3171} 3183}
@@ -3198,10 +3210,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
3198 return -EINVAL; 3210 return -EINVAL;
3199 } 3211 }
3200 /* Copy in the exec list from userland */ 3212 /* Copy in the exec list from userland */
3201 exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count, 3213 exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count);
3202 DRM_MEM_DRIVER); 3214 object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count);
3203 object_list = drm_calloc(sizeof(*object_list), args->buffer_count,
3204 DRM_MEM_DRIVER);
3205 if (exec_list == NULL || object_list == NULL) { 3215 if (exec_list == NULL || object_list == NULL) {
3206 DRM_ERROR("Failed to allocate exec or object list " 3216 DRM_ERROR("Failed to allocate exec or object list "
3207 "for %d buffers\n", 3217 "for %d buffers\n",
@@ -3462,10 +3472,8 @@ err:
3462 } 3472 }
3463 3473
3464pre_mutex_err: 3474pre_mutex_err:
3465 drm_free(object_list, sizeof(*object_list) * args->buffer_count, 3475 drm_free_large(object_list);
3466 DRM_MEM_DRIVER); 3476 drm_free_large(exec_list);
3467 drm_free(exec_list, sizeof(*exec_list) * args->buffer_count,
3468 DRM_MEM_DRIVER);
3469 drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects, 3477 drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects,
3470 DRM_MEM_DRIVER); 3478 DRM_MEM_DRIVER);
3471 3479
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 52a059354e83..540dd336e6ec 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -213,7 +213,8 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
213 if (tiling_mode == I915_TILING_NONE) 213 if (tiling_mode == I915_TILING_NONE)
214 return true; 214 return true;
215 215
216 if (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 216 if (!IS_I9XX(dev) ||
217 (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
217 tile_width = 128; 218 tile_width = 128;
218 else 219 else
219 tile_width = 512; 220 tile_width = 512;
@@ -225,11 +226,18 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
225 if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) 226 if (stride / 128 > I965_FENCE_MAX_PITCH_VAL)
226 return false; 227 return false;
227 } else if (IS_I9XX(dev)) { 228 } else if (IS_I9XX(dev)) {
228 if (stride / tile_width > I830_FENCE_MAX_PITCH_VAL || 229 uint32_t pitch_val = ffs(stride / tile_width) - 1;
230
231 /* XXX: For Y tiling, FENCE_MAX_PITCH_VAL is actually 6 (8KB)
232 * instead of 4 (2KB) on 945s.
233 */
234 if (pitch_val > I915_FENCE_MAX_PITCH_VAL ||
229 size > (I830_FENCE_MAX_SIZE_VAL << 20)) 235 size > (I830_FENCE_MAX_SIZE_VAL << 20))
230 return false; 236 return false;
231 } else { 237 } else {
232 if (stride / 128 > I830_FENCE_MAX_PITCH_VAL || 238 uint32_t pitch_val = ffs(stride / tile_width) - 1;
239
240 if (pitch_val > I830_FENCE_MAX_PITCH_VAL ||
233 size > (I830_FENCE_MAX_SIZE_VAL << 19)) 241 size > (I830_FENCE_MAX_SIZE_VAL << 19))
234 return false; 242 return false;
235 } 243 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 15da44cf21b1..375569d01d01 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -190,7 +190,8 @@
190#define I830_FENCE_SIZE_BITS(size) ((ffs((size) >> 19) - 1) << 8) 190#define I830_FENCE_SIZE_BITS(size) ((ffs((size) >> 19) - 1) << 8)
191#define I830_FENCE_PITCH_SHIFT 4 191#define I830_FENCE_PITCH_SHIFT 4
192#define I830_FENCE_REG_VALID (1<<0) 192#define I830_FENCE_REG_VALID (1<<0)
193#define I830_FENCE_MAX_PITCH_VAL 0x10 193#define I915_FENCE_MAX_PITCH_VAL 0x10
194#define I830_FENCE_MAX_PITCH_VAL 6
194#define I830_FENCE_MAX_SIZE_VAL (1<<8) 195#define I830_FENCE_MAX_SIZE_VAL (1<<8)
195 196
196#define I915_FENCE_START_MASK 0x0ff00000 197#define I915_FENCE_START_MASK 0x0ff00000
@@ -1410,9 +1411,25 @@
1410 1411
1411/* Cursor A & B regs */ 1412/* Cursor A & B regs */
1412#define CURACNTR 0x70080 1413#define CURACNTR 0x70080
1414/* Old style CUR*CNTR flags (desktop 8xx) */
1415#define CURSOR_ENABLE 0x80000000
1416#define CURSOR_GAMMA_ENABLE 0x40000000
1417#define CURSOR_STRIDE_MASK 0x30000000
1418#define CURSOR_FORMAT_SHIFT 24
1419#define CURSOR_FORMAT_MASK (0x07 << CURSOR_FORMAT_SHIFT)
1420#define CURSOR_FORMAT_2C (0x00 << CURSOR_FORMAT_SHIFT)
1421#define CURSOR_FORMAT_3C (0x01 << CURSOR_FORMAT_SHIFT)
1422#define CURSOR_FORMAT_4C (0x02 << CURSOR_FORMAT_SHIFT)
1423#define CURSOR_FORMAT_ARGB (0x04 << CURSOR_FORMAT_SHIFT)
1424#define CURSOR_FORMAT_XRGB (0x05 << CURSOR_FORMAT_SHIFT)
1425/* New style CUR*CNTR flags */
1426#define CURSOR_MODE 0x27
1413#define CURSOR_MODE_DISABLE 0x00 1427#define CURSOR_MODE_DISABLE 0x00
1414#define CURSOR_MODE_64_32B_AX 0x07 1428#define CURSOR_MODE_64_32B_AX 0x07
1415#define CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX) 1429#define CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX)
1430#define MCURSOR_PIPE_SELECT (1 << 28)
1431#define MCURSOR_PIPE_A 0x00
1432#define MCURSOR_PIPE_B (1 << 28)
1416#define MCURSOR_GAMMA_ENABLE (1 << 26) 1433#define MCURSOR_GAMMA_ENABLE (1 << 26)
1417#define CURABASE 0x70084 1434#define CURABASE 0x70084
1418#define CURAPOS 0x70088 1435#define CURAPOS 0x70088
@@ -1420,6 +1437,7 @@
1420#define CURSOR_POS_SIGN 0x8000 1437#define CURSOR_POS_SIGN 0x8000
1421#define CURSOR_X_SHIFT 0 1438#define CURSOR_X_SHIFT 0
1422#define CURSOR_Y_SHIFT 16 1439#define CURSOR_Y_SHIFT 16
1440#define CURSIZE 0x700a0
1423#define CURBCNTR 0x700c0 1441#define CURBCNTR 0x700c0
1424#define CURBBASE 0x700c4 1442#define CURBBASE 0x700c4
1425#define CURBPOS 0x700c8 1443#define CURBPOS 0x700c8
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index fc28e2bbd542..9d78cff33b24 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -57,9 +57,43 @@ find_section(struct bdb_header *bdb, int section_id)
57 return NULL; 57 return NULL;
58} 58}
59 59
60/* Try to find panel data */
61static void 60static void
62parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb) 61fill_detail_timing_data(struct drm_display_mode *panel_fixed_mode,
62 struct lvds_dvo_timing *dvo_timing)
63{
64 panel_fixed_mode->hdisplay = (dvo_timing->hactive_hi << 8) |
65 dvo_timing->hactive_lo;
66 panel_fixed_mode->hsync_start = panel_fixed_mode->hdisplay +
67 ((dvo_timing->hsync_off_hi << 8) | dvo_timing->hsync_off_lo);
68 panel_fixed_mode->hsync_end = panel_fixed_mode->hsync_start +
69 dvo_timing->hsync_pulse_width;
70 panel_fixed_mode->htotal = panel_fixed_mode->hdisplay +
71 ((dvo_timing->hblank_hi << 8) | dvo_timing->hblank_lo);
72
73 panel_fixed_mode->vdisplay = (dvo_timing->vactive_hi << 8) |
74 dvo_timing->vactive_lo;
75 panel_fixed_mode->vsync_start = panel_fixed_mode->vdisplay +
76 dvo_timing->vsync_off;
77 panel_fixed_mode->vsync_end = panel_fixed_mode->vsync_start +
78 dvo_timing->vsync_pulse_width;
79 panel_fixed_mode->vtotal = panel_fixed_mode->vdisplay +
80 ((dvo_timing->vblank_hi << 8) | dvo_timing->vblank_lo);
81 panel_fixed_mode->clock = dvo_timing->clock * 10;
82 panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED;
83
84 /* Some VBTs have bogus h/vtotal values */
85 if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal)
86 panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1;
87 if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal)
88 panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end + 1;
89
90 drm_mode_set_name(panel_fixed_mode);
91}
92
93/* Try to find integrated panel data */
94static void
95parse_lfp_panel_data(struct drm_i915_private *dev_priv,
96 struct bdb_header *bdb)
63{ 97{
64 struct bdb_lvds_options *lvds_options; 98 struct bdb_lvds_options *lvds_options;
65 struct bdb_lvds_lfp_data *lvds_lfp_data; 99 struct bdb_lvds_lfp_data *lvds_lfp_data;
@@ -91,38 +125,45 @@ parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
91 panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode), 125 panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode),
92 DRM_MEM_DRIVER); 126 DRM_MEM_DRIVER);
93 127
94 panel_fixed_mode->hdisplay = (dvo_timing->hactive_hi << 8) | 128 fill_detail_timing_data(panel_fixed_mode, dvo_timing);
95 dvo_timing->hactive_lo;
96 panel_fixed_mode->hsync_start = panel_fixed_mode->hdisplay +
97 ((dvo_timing->hsync_off_hi << 8) | dvo_timing->hsync_off_lo);
98 panel_fixed_mode->hsync_end = panel_fixed_mode->hsync_start +
99 dvo_timing->hsync_pulse_width;
100 panel_fixed_mode->htotal = panel_fixed_mode->hdisplay +
101 ((dvo_timing->hblank_hi << 8) | dvo_timing->hblank_lo);
102 129
103 panel_fixed_mode->vdisplay = (dvo_timing->vactive_hi << 8) | 130 dev_priv->lfp_lvds_vbt_mode = panel_fixed_mode;
104 dvo_timing->vactive_lo;
105 panel_fixed_mode->vsync_start = panel_fixed_mode->vdisplay +
106 dvo_timing->vsync_off;
107 panel_fixed_mode->vsync_end = panel_fixed_mode->vsync_start +
108 dvo_timing->vsync_pulse_width;
109 panel_fixed_mode->vtotal = panel_fixed_mode->vdisplay +
110 ((dvo_timing->vblank_hi << 8) | dvo_timing->vblank_lo);
111 panel_fixed_mode->clock = dvo_timing->clock * 10;
112 panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED;
113 131
114 /* Some VBTs have bogus h/vtotal values */ 132 DRM_DEBUG("Found panel mode in BIOS VBT tables:\n");
115 if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal) 133 drm_mode_debug_printmodeline(panel_fixed_mode);
116 panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1;
117 if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal)
118 panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end + 1;
119 134
120 drm_mode_set_name(panel_fixed_mode); 135 return;
136}
137
138/* Try to find sdvo panel data */
139static void
140parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
141 struct bdb_header *bdb)
142{
143 struct bdb_sdvo_lvds_options *sdvo_lvds_options;
144 struct lvds_dvo_timing *dvo_timing;
145 struct drm_display_mode *panel_fixed_mode;
121 146
122 dev_priv->vbt_mode = panel_fixed_mode; 147 dev_priv->sdvo_lvds_vbt_mode = NULL;
123 148
124 DRM_DEBUG("Found panel mode in BIOS VBT tables:\n"); 149 sdvo_lvds_options = find_section(bdb, BDB_SDVO_LVDS_OPTIONS);
125 drm_mode_debug_printmodeline(panel_fixed_mode); 150 if (!sdvo_lvds_options)
151 return;
152
153 dvo_timing = find_section(bdb, BDB_SDVO_PANEL_DTDS);
154 if (!dvo_timing)
155 return;
156
157 panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode),
158 DRM_MEM_DRIVER);
159
160 if (!panel_fixed_mode)
161 return;
162
163 fill_detail_timing_data(panel_fixed_mode,
164 dvo_timing + sdvo_lvds_options->panel_type);
165
166 dev_priv->sdvo_lvds_vbt_mode = panel_fixed_mode;
126 167
127 return; 168 return;
128} 169}
@@ -199,7 +240,8 @@ intel_init_bios(struct drm_device *dev)
199 240
200 /* Grab useful general definitions */ 241 /* Grab useful general definitions */
201 parse_general_features(dev_priv, bdb); 242 parse_general_features(dev_priv, bdb);
202 parse_panel_data(dev_priv, bdb); 243 parse_lfp_panel_data(dev_priv, bdb);
244 parse_sdvo_panel_data(dev_priv, bdb);
203 245
204 pci_unmap_rom(pdev, bios); 246 pci_unmap_rom(pdev, bios);
205 247
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index de621aad85b5..8ca2cde15804 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -279,6 +279,23 @@ struct vch_bdb_22 {
279 struct vch_panel_data panels[16]; 279 struct vch_panel_data panels[16];
280} __attribute__((packed)); 280} __attribute__((packed));
281 281
282struct bdb_sdvo_lvds_options {
283 u8 panel_backlight;
284 u8 h40_set_panel_type;
285 u8 panel_type;
286 u8 ssc_clk_freq;
287 u16 als_low_trip;
288 u16 als_high_trip;
289 u8 sclalarcoeff_tab_row_num;
290 u8 sclalarcoeff_tab_row_size;
291 u8 coefficient[8];
292 u8 panel_misc_bits_1;
293 u8 panel_misc_bits_2;
294 u8 panel_misc_bits_3;
295 u8 panel_misc_bits_4;
296} __attribute__((packed));
297
298
282bool intel_init_bios(struct drm_device *dev); 299bool intel_init_bios(struct drm_device *dev);
283 300
284/* 301/*
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 19148c3df637..79acc4f4c1f8 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -198,9 +198,142 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector)
198 return intel_ddc_probe(intel_output); 198 return intel_ddc_probe(intel_output);
199} 199}
200 200
201static enum drm_connector_status
202intel_crt_load_detect(struct drm_crtc *crtc, struct intel_output *intel_output)
203{
204 struct drm_encoder *encoder = &intel_output->enc;
205 struct drm_device *dev = encoder->dev;
206 struct drm_i915_private *dev_priv = dev->dev_private;
207 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
208 uint32_t pipe = intel_crtc->pipe;
209 uint32_t save_bclrpat;
210 uint32_t save_vtotal;
211 uint32_t vtotal, vactive;
212 uint32_t vsample;
213 uint32_t vblank, vblank_start, vblank_end;
214 uint32_t dsl;
215 uint32_t bclrpat_reg;
216 uint32_t vtotal_reg;
217 uint32_t vblank_reg;
218 uint32_t vsync_reg;
219 uint32_t pipeconf_reg;
220 uint32_t pipe_dsl_reg;
221 uint8_t st00;
222 enum drm_connector_status status;
223
224 if (pipe == 0) {
225 bclrpat_reg = BCLRPAT_A;
226 vtotal_reg = VTOTAL_A;
227 vblank_reg = VBLANK_A;
228 vsync_reg = VSYNC_A;
229 pipeconf_reg = PIPEACONF;
230 pipe_dsl_reg = PIPEADSL;
231 } else {
232 bclrpat_reg = BCLRPAT_B;
233 vtotal_reg = VTOTAL_B;
234 vblank_reg = VBLANK_B;
235 vsync_reg = VSYNC_B;
236 pipeconf_reg = PIPEBCONF;
237 pipe_dsl_reg = PIPEBDSL;
238 }
239
240 save_bclrpat = I915_READ(bclrpat_reg);
241 save_vtotal = I915_READ(vtotal_reg);
242 vblank = I915_READ(vblank_reg);
243
244 vtotal = ((save_vtotal >> 16) & 0xfff) + 1;
245 vactive = (save_vtotal & 0x7ff) + 1;
246
247 vblank_start = (vblank & 0xfff) + 1;
248 vblank_end = ((vblank >> 16) & 0xfff) + 1;
249
250 /* Set the border color to purple. */
251 I915_WRITE(bclrpat_reg, 0x500050);
252
253 if (IS_I9XX(dev)) {
254 uint32_t pipeconf = I915_READ(pipeconf_reg);
255 I915_WRITE(pipeconf_reg, pipeconf | PIPECONF_FORCE_BORDER);
256 /* Wait for next Vblank to substitue
257 * border color for Color info */
258 intel_wait_for_vblank(dev);
259 st00 = I915_READ8(VGA_MSR_WRITE);
260 status = ((st00 & (1 << 4)) != 0) ?
261 connector_status_connected :
262 connector_status_disconnected;
263
264 I915_WRITE(pipeconf_reg, pipeconf);
265 } else {
266 bool restore_vblank = false;
267 int count, detect;
268
269 /*
270 * If there isn't any border, add some.
271 * Yes, this will flicker
272 */
273 if (vblank_start <= vactive && vblank_end >= vtotal) {
274 uint32_t vsync = I915_READ(vsync_reg);
275 uint32_t vsync_start = (vsync & 0xffff) + 1;
276
277 vblank_start = vsync_start;
278 I915_WRITE(vblank_reg,
279 (vblank_start - 1) |
280 ((vblank_end - 1) << 16));
281 restore_vblank = true;
282 }
283 /* sample in the vertical border, selecting the larger one */
284 if (vblank_start - vactive >= vtotal - vblank_end)
285 vsample = (vblank_start + vactive) >> 1;
286 else
287 vsample = (vtotal + vblank_end) >> 1;
288
289 /*
290 * Wait for the border to be displayed
291 */
292 while (I915_READ(pipe_dsl_reg) >= vactive)
293 ;
294 while ((dsl = I915_READ(pipe_dsl_reg)) <= vsample)
295 ;
296 /*
297 * Watch ST00 for an entire scanline
298 */
299 detect = 0;
300 count = 0;
301 do {
302 count++;
303 /* Read the ST00 VGA status register */
304 st00 = I915_READ8(VGA_MSR_WRITE);
305 if (st00 & (1 << 4))
306 detect++;
307 } while ((I915_READ(pipe_dsl_reg) == dsl));
308
309 /* restore vblank if necessary */
310 if (restore_vblank)
311 I915_WRITE(vblank_reg, vblank);
312 /*
313 * If more than 3/4 of the scanline detected a monitor,
314 * then it is assumed to be present. This works even on i830,
315 * where there isn't any way to force the border color across
316 * the screen
317 */
318 status = detect * 4 > count * 3 ?
319 connector_status_connected :
320 connector_status_disconnected;
321 }
322
323 /* Restore previous settings */
324 I915_WRITE(bclrpat_reg, save_bclrpat);
325
326 return status;
327}
328
201static enum drm_connector_status intel_crt_detect(struct drm_connector *connector) 329static enum drm_connector_status intel_crt_detect(struct drm_connector *connector)
202{ 330{
203 struct drm_device *dev = connector->dev; 331 struct drm_device *dev = connector->dev;
332 struct intel_output *intel_output = to_intel_output(connector);
333 struct drm_encoder *encoder = &intel_output->enc;
334 struct drm_crtc *crtc;
335 int dpms_mode;
336 enum drm_connector_status status;
204 337
205 if (IS_I9XX(dev) && !IS_I915G(dev) && !IS_I915GM(dev)) { 338 if (IS_I9XX(dev) && !IS_I915G(dev) && !IS_I915GM(dev)) {
206 if (intel_crt_detect_hotplug(connector)) 339 if (intel_crt_detect_hotplug(connector))
@@ -212,8 +345,20 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connecto
212 if (intel_crt_detect_ddc(connector)) 345 if (intel_crt_detect_ddc(connector))
213 return connector_status_connected; 346 return connector_status_connected;
214 347
215 /* TODO use load detect */ 348 /* for pre-945g platforms use load detect */
216 return connector_status_unknown; 349 if (encoder->crtc && encoder->crtc->enabled) {
350 status = intel_crt_load_detect(encoder->crtc, intel_output);
351 } else {
352 crtc = intel_get_load_detect_pipe(intel_output,
353 NULL, &dpms_mode);
354 if (crtc) {
355 status = intel_crt_load_detect(crtc, intel_output);
356 intel_release_load_detect_pipe(intel_output, dpms_mode);
357 } else
358 status = connector_status_unknown;
359 }
360
361 return status;
217} 362}
218 363
219static void intel_crt_destroy(struct drm_connector *connector) 364static void intel_crt_destroy(struct drm_connector *connector)
@@ -236,11 +381,6 @@ static int intel_crt_set_property(struct drm_connector *connector,
236 struct drm_property *property, 381 struct drm_property *property,
237 uint64_t value) 382 uint64_t value)
238{ 383{
239 struct drm_device *dev = connector->dev;
240
241 if (property == dev->mode_config.dpms_property && connector->encoder)
242 intel_crt_dpms(connector->encoder, (uint32_t)(value & 0xf));
243
244 return 0; 384 return 0;
245} 385}
246 386
@@ -257,6 +397,7 @@ static const struct drm_encoder_helper_funcs intel_crt_helper_funcs = {
257}; 397};
258 398
259static const struct drm_connector_funcs intel_crt_connector_funcs = { 399static const struct drm_connector_funcs intel_crt_connector_funcs = {
400 .dpms = drm_helper_connector_dpms,
260 .detect = intel_crt_detect, 401 .detect = intel_crt_detect,
261 .fill_modes = drm_helper_probe_single_connector_modes, 402 .fill_modes = drm_helper_probe_single_connector_modes,
262 .destroy = intel_crt_destroy, 403 .destroy = intel_crt_destroy,
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3387cf32f385..c9d6f10ba92e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1357,7 +1357,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
1357 int pipe = intel_crtc->pipe; 1357 int pipe = intel_crtc->pipe;
1358 uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR; 1358 uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
1359 uint32_t base = (pipe == 0) ? CURABASE : CURBBASE; 1359 uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
1360 uint32_t temp; 1360 uint32_t temp = I915_READ(control);
1361 size_t addr; 1361 size_t addr;
1362 int ret; 1362 int ret;
1363 1363
@@ -1366,7 +1366,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
1366 /* if we want to turn off the cursor ignore width and height */ 1366 /* if we want to turn off the cursor ignore width and height */
1367 if (!handle) { 1367 if (!handle) {
1368 DRM_DEBUG("cursor off\n"); 1368 DRM_DEBUG("cursor off\n");
1369 temp = CURSOR_MODE_DISABLE; 1369 if (IS_MOBILE(dev) || IS_I9XX(dev)) {
1370 temp &= ~(CURSOR_MODE | MCURSOR_GAMMA_ENABLE);
1371 temp |= CURSOR_MODE_DISABLE;
1372 } else {
1373 temp &= ~(CURSOR_ENABLE | CURSOR_GAMMA_ENABLE);
1374 }
1370 addr = 0; 1375 addr = 0;
1371 bo = NULL; 1376 bo = NULL;
1372 mutex_lock(&dev->struct_mutex); 1377 mutex_lock(&dev->struct_mutex);
@@ -1409,10 +1414,19 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
1409 addr = obj_priv->phys_obj->handle->busaddr; 1414 addr = obj_priv->phys_obj->handle->busaddr;
1410 } 1415 }
1411 1416
1412 temp = 0; 1417 if (!IS_I9XX(dev))
1413 /* set the pipe for the cursor */ 1418 I915_WRITE(CURSIZE, (height << 12) | width);
1414 temp |= (pipe << 28); 1419
1415 temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE; 1420 /* Hooray for CUR*CNTR differences */
1421 if (IS_MOBILE(dev) || IS_I9XX(dev)) {
1422 temp &= ~(CURSOR_MODE | MCURSOR_PIPE_SELECT);
1423 temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE;
1424 temp |= (pipe << 28); /* Connect to correct pipe */
1425 } else {
1426 temp &= ~(CURSOR_FORMAT_MASK);
1427 temp |= CURSOR_ENABLE;
1428 temp |= CURSOR_FORMAT_ARGB | CURSOR_GAMMA_ENABLE;
1429 }
1416 1430
1417 finish: 1431 finish:
1418 I915_WRITE(control, temp); 1432 I915_WRITE(control, temp);
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 8b8d6e65cd3f..1ee3007d6ec0 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -316,6 +316,7 @@ static const struct drm_encoder_helper_funcs intel_dvo_helper_funcs = {
316}; 316};
317 317
318static const struct drm_connector_funcs intel_dvo_connector_funcs = { 318static const struct drm_connector_funcs intel_dvo_connector_funcs = {
319 .dpms = drm_helper_connector_dpms,
319 .save = intel_dvo_save, 320 .save = intel_dvo_save,
320 .restore = intel_dvo_restore, 321 .restore = intel_dvo_restore,
321 .detect = intel_dvo_detect, 322 .detect = intel_dvo_detect,
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index d0983bb93a18..7d6bdd705326 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -219,6 +219,7 @@ static const struct drm_encoder_helper_funcs intel_hdmi_helper_funcs = {
219}; 219};
220 220
221static const struct drm_connector_funcs intel_hdmi_connector_funcs = { 221static const struct drm_connector_funcs intel_hdmi_connector_funcs = {
222 .dpms = drm_helper_connector_dpms,
222 .save = intel_hdmi_save, 223 .save = intel_hdmi_save,
223 .restore = intel_hdmi_restore, 224 .restore = intel_hdmi_restore,
224 .detect = intel_hdmi_detect, 225 .detect = intel_hdmi_detect,
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 439a86514993..53cccfa58b95 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -343,11 +343,6 @@ static int intel_lvds_set_property(struct drm_connector *connector,
343 struct drm_property *property, 343 struct drm_property *property,
344 uint64_t value) 344 uint64_t value)
345{ 345{
346 struct drm_device *dev = connector->dev;
347
348 if (property == dev->mode_config.dpms_property && connector->encoder)
349 intel_lvds_dpms(connector->encoder, (uint32_t)(value & 0xf));
350
351 return 0; 346 return 0;
352} 347}
353 348
@@ -366,6 +361,7 @@ static const struct drm_connector_helper_funcs intel_lvds_connector_helper_funcs
366}; 361};
367 362
368static const struct drm_connector_funcs intel_lvds_connector_funcs = { 363static const struct drm_connector_funcs intel_lvds_connector_funcs = {
364 .dpms = drm_helper_connector_dpms,
369 .save = intel_lvds_save, 365 .save = intel_lvds_save,
370 .restore = intel_lvds_restore, 366 .restore = intel_lvds_restore,
371 .detect = intel_lvds_detect, 367 .detect = intel_lvds_detect,
@@ -391,7 +387,7 @@ static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id)
391} 387}
392 388
393/* These systems claim to have LVDS, but really don't */ 389/* These systems claim to have LVDS, but really don't */
394static const struct dmi_system_id __initdata intel_no_lvds[] = { 390static const struct dmi_system_id intel_no_lvds[] = {
395 { 391 {
396 .callback = intel_no_lvds_dmi_callback, 392 .callback = intel_no_lvds_dmi_callback,
397 .ident = "Apple Mac Mini (Core series)", 393 .ident = "Apple Mac Mini (Core series)",
@@ -511,10 +507,10 @@ void intel_lvds_init(struct drm_device *dev)
511 } 507 }
512 508
513 /* Failed to get EDID, what about VBT? */ 509 /* Failed to get EDID, what about VBT? */
514 if (dev_priv->vbt_mode) { 510 if (dev_priv->lfp_lvds_vbt_mode) {
515 mutex_lock(&dev->mode_config.mutex); 511 mutex_lock(&dev->mode_config.mutex);
516 dev_priv->panel_fixed_mode = 512 dev_priv->panel_fixed_mode =
517 drm_mode_duplicate(dev, dev_priv->vbt_mode); 513 drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
518 mutex_unlock(&dev->mode_config.mutex); 514 mutex_unlock(&dev->mode_config.mutex);
519 if (dev_priv->panel_fixed_mode) { 515 if (dev_priv->panel_fixed_mode) {
520 dev_priv->panel_fixed_mode->type |= 516 dev_priv->panel_fixed_mode->type |=
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 9913651c1e17..3093b4d4a4dd 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -69,6 +69,10 @@ struct intel_sdvo_priv {
69 * This is set if we treat the device as HDMI, instead of DVI. 69 * This is set if we treat the device as HDMI, instead of DVI.
70 */ 70 */
71 bool is_hdmi; 71 bool is_hdmi;
72 /**
73 * This is set if we detect output of sdvo device as LVDS.
74 */
75 bool is_lvds;
72 76
73 /** 77 /**
74 * Returned SDTV resolutions allowed for the current format, if the 78 * Returned SDTV resolutions allowed for the current format, if the
@@ -1398,10 +1402,8 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect
1398static void intel_sdvo_get_ddc_modes(struct drm_connector *connector) 1402static void intel_sdvo_get_ddc_modes(struct drm_connector *connector)
1399{ 1403{
1400 struct intel_output *intel_output = to_intel_output(connector); 1404 struct intel_output *intel_output = to_intel_output(connector);
1401 struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
1402 1405
1403 /* set the bus switch and get the modes */ 1406 /* set the bus switch and get the modes */
1404 intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus);
1405 intel_ddc_get_modes(intel_output); 1407 intel_ddc_get_modes(intel_output);
1406 1408
1407#if 0 1409#if 0
@@ -1543,6 +1545,37 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector)
1543 } 1545 }
1544} 1546}
1545 1547
1548static void intel_sdvo_get_lvds_modes(struct drm_connector *connector)
1549{
1550 struct intel_output *intel_output = to_intel_output(connector);
1551 struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
1552 struct drm_i915_private *dev_priv = connector->dev->dev_private;
1553
1554 /*
1555 * Attempt to get the mode list from DDC.
1556 * Assume that the preferred modes are
1557 * arranged in priority order.
1558 */
1559 /* set the bus switch and get the modes */
1560 intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus);
1561 intel_ddc_get_modes(intel_output);
1562 if (list_empty(&connector->probed_modes) == false)
1563 return;
1564
1565 /* Fetch modes from VBT */
1566 if (dev_priv->sdvo_lvds_vbt_mode != NULL) {
1567 struct drm_display_mode *newmode;
1568 newmode = drm_mode_duplicate(connector->dev,
1569 dev_priv->sdvo_lvds_vbt_mode);
1570 if (newmode != NULL) {
1571 /* Guarantee the mode is preferred */
1572 newmode->type = (DRM_MODE_TYPE_PREFERRED |
1573 DRM_MODE_TYPE_DRIVER);
1574 drm_mode_probed_add(connector, newmode);
1575 }
1576 }
1577}
1578
1546static int intel_sdvo_get_modes(struct drm_connector *connector) 1579static int intel_sdvo_get_modes(struct drm_connector *connector)
1547{ 1580{
1548 struct intel_output *output = to_intel_output(connector); 1581 struct intel_output *output = to_intel_output(connector);
@@ -1550,6 +1583,8 @@ static int intel_sdvo_get_modes(struct drm_connector *connector)
1550 1583
1551 if (sdvo_priv->is_tv) 1584 if (sdvo_priv->is_tv)
1552 intel_sdvo_get_tv_modes(connector); 1585 intel_sdvo_get_tv_modes(connector);
1586 else if (sdvo_priv->is_lvds == true)
1587 intel_sdvo_get_lvds_modes(connector);
1553 else 1588 else
1554 intel_sdvo_get_ddc_modes(connector); 1589 intel_sdvo_get_ddc_modes(connector);
1555 1590
@@ -1564,6 +1599,9 @@ static void intel_sdvo_destroy(struct drm_connector *connector)
1564 1599
1565 if (intel_output->i2c_bus) 1600 if (intel_output->i2c_bus)
1566 intel_i2c_destroy(intel_output->i2c_bus); 1601 intel_i2c_destroy(intel_output->i2c_bus);
1602 if (intel_output->ddc_bus)
1603 intel_i2c_destroy(intel_output->ddc_bus);
1604
1567 drm_sysfs_connector_remove(connector); 1605 drm_sysfs_connector_remove(connector);
1568 drm_connector_cleanup(connector); 1606 drm_connector_cleanup(connector);
1569 kfree(intel_output); 1607 kfree(intel_output);
@@ -1578,6 +1616,7 @@ static const struct drm_encoder_helper_funcs intel_sdvo_helper_funcs = {
1578}; 1616};
1579 1617
1580static const struct drm_connector_funcs intel_sdvo_connector_funcs = { 1618static const struct drm_connector_funcs intel_sdvo_connector_funcs = {
1619 .dpms = drm_helper_connector_dpms,
1581 .save = intel_sdvo_save, 1620 .save = intel_sdvo_save,
1582 .restore = intel_sdvo_restore, 1621 .restore = intel_sdvo_restore,
1583 .detect = intel_sdvo_detect, 1622 .detect = intel_sdvo_detect,
@@ -1660,12 +1699,56 @@ intel_sdvo_get_digital_encoding_mode(struct intel_output *output)
1660 return true; 1699 return true;
1661} 1700}
1662 1701
1702static struct intel_output *
1703intel_sdvo_chan_to_intel_output(struct intel_i2c_chan *chan)
1704{
1705 struct drm_device *dev = chan->drm_dev;
1706 struct drm_connector *connector;
1707 struct intel_output *intel_output = NULL;
1708
1709 list_for_each_entry(connector,
1710 &dev->mode_config.connector_list, head) {
1711 if (to_intel_output(connector)->ddc_bus == chan) {
1712 intel_output = to_intel_output(connector);
1713 break;
1714 }
1715 }
1716 return intel_output;
1717}
1718
1719static int intel_sdvo_master_xfer(struct i2c_adapter *i2c_adap,
1720 struct i2c_msg msgs[], int num)
1721{
1722 struct intel_output *intel_output;
1723 struct intel_sdvo_priv *sdvo_priv;
1724 struct i2c_algo_bit_data *algo_data;
1725 struct i2c_algorithm *algo;
1726
1727 algo_data = (struct i2c_algo_bit_data *)i2c_adap->algo_data;
1728 intel_output =
1729 intel_sdvo_chan_to_intel_output(
1730 (struct intel_i2c_chan *)(algo_data->data));
1731 if (intel_output == NULL)
1732 return -EINVAL;
1733
1734 sdvo_priv = intel_output->dev_priv;
1735 algo = (struct i2c_algorithm *)intel_output->i2c_bus->adapter.algo;
1736
1737 intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus);
1738 return algo->master_xfer(i2c_adap, msgs, num);
1739}
1740
1741static struct i2c_algorithm intel_sdvo_i2c_bit_algo = {
1742 .master_xfer = intel_sdvo_master_xfer,
1743};
1744
1663bool intel_sdvo_init(struct drm_device *dev, int output_device) 1745bool intel_sdvo_init(struct drm_device *dev, int output_device)
1664{ 1746{
1665 struct drm_connector *connector; 1747 struct drm_connector *connector;
1666 struct intel_output *intel_output; 1748 struct intel_output *intel_output;
1667 struct intel_sdvo_priv *sdvo_priv; 1749 struct intel_sdvo_priv *sdvo_priv;
1668 struct intel_i2c_chan *i2cbus = NULL; 1750 struct intel_i2c_chan *i2cbus = NULL;
1751 struct intel_i2c_chan *ddcbus = NULL;
1669 int connector_type; 1752 int connector_type;
1670 u8 ch[0x40]; 1753 u8 ch[0x40];
1671 int i; 1754 int i;
@@ -1676,17 +1759,9 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
1676 return false; 1759 return false;
1677 } 1760 }
1678 1761
1679 connector = &intel_output->base;
1680
1681 drm_connector_init(dev, connector, &intel_sdvo_connector_funcs,
1682 DRM_MODE_CONNECTOR_Unknown);
1683 drm_connector_helper_add(connector, &intel_sdvo_connector_helper_funcs);
1684 sdvo_priv = (struct intel_sdvo_priv *)(intel_output + 1); 1762 sdvo_priv = (struct intel_sdvo_priv *)(intel_output + 1);
1685 intel_output->type = INTEL_OUTPUT_SDVO; 1763 intel_output->type = INTEL_OUTPUT_SDVO;
1686 1764
1687 connector->interlace_allowed = 0;
1688 connector->doublescan_allowed = 0;
1689
1690 /* setup the DDC bus. */ 1765 /* setup the DDC bus. */
1691 if (output_device == SDVOB) 1766 if (output_device == SDVOB)
1692 i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOB"); 1767 i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOB");
@@ -1694,7 +1769,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
1694 i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOC"); 1769 i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOC");
1695 1770
1696 if (!i2cbus) 1771 if (!i2cbus)
1697 goto err_connector; 1772 goto err_inteloutput;
1698 1773
1699 sdvo_priv->i2c_bus = i2cbus; 1774 sdvo_priv->i2c_bus = i2cbus;
1700 1775
@@ -1710,7 +1785,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
1710 intel_output->i2c_bus = i2cbus; 1785 intel_output->i2c_bus = i2cbus;
1711 intel_output->dev_priv = sdvo_priv; 1786 intel_output->dev_priv = sdvo_priv;
1712 1787
1713
1714 /* Read the regs to test if we can talk to the device */ 1788 /* Read the regs to test if we can talk to the device */
1715 for (i = 0; i < 0x40; i++) { 1789 for (i = 0; i < 0x40; i++) {
1716 if (!intel_sdvo_read_byte(intel_output, i, &ch[i])) { 1790 if (!intel_sdvo_read_byte(intel_output, i, &ch[i])) {
@@ -1720,6 +1794,22 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
1720 } 1794 }
1721 } 1795 }
1722 1796
1797 /* setup the DDC bus. */
1798 if (output_device == SDVOB)
1799 ddcbus = intel_i2c_create(dev, GPIOE, "SDVOB DDC BUS");
1800 else
1801 ddcbus = intel_i2c_create(dev, GPIOE, "SDVOC DDC BUS");
1802
1803 if (ddcbus == NULL)
1804 goto err_i2c;
1805
1806 intel_sdvo_i2c_bit_algo.functionality =
1807 intel_output->i2c_bus->adapter.algo->functionality;
1808 ddcbus->adapter.algo = &intel_sdvo_i2c_bit_algo;
1809 intel_output->ddc_bus = ddcbus;
1810
1811 /* In defaut case sdvo lvds is false */
1812 sdvo_priv->is_lvds = false;
1723 intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps); 1813 intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps);
1724 1814
1725 if (sdvo_priv->caps.output_flags & 1815 if (sdvo_priv->caps.output_flags &
@@ -1729,7 +1819,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
1729 else 1819 else
1730 sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS1; 1820 sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS1;
1731 1821
1732 connector->display_info.subpixel_order = SubPixelHorizontalRGB;
1733 encoder_type = DRM_MODE_ENCODER_TMDS; 1822 encoder_type = DRM_MODE_ENCODER_TMDS;
1734 connector_type = DRM_MODE_CONNECTOR_DVID; 1823 connector_type = DRM_MODE_CONNECTOR_DVID;
1735 1824
@@ -1747,7 +1836,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
1747 else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_SVID0) 1836 else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_SVID0)
1748 { 1837 {
1749 sdvo_priv->controlled_output = SDVO_OUTPUT_SVID0; 1838 sdvo_priv->controlled_output = SDVO_OUTPUT_SVID0;
1750 connector->display_info.subpixel_order = SubPixelHorizontalRGB;
1751 encoder_type = DRM_MODE_ENCODER_TVDAC; 1839 encoder_type = DRM_MODE_ENCODER_TVDAC;
1752 connector_type = DRM_MODE_CONNECTOR_SVIDEO; 1840 connector_type = DRM_MODE_CONNECTOR_SVIDEO;
1753 sdvo_priv->is_tv = true; 1841 sdvo_priv->is_tv = true;
@@ -1756,30 +1844,28 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
1756 else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB0) 1844 else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB0)
1757 { 1845 {
1758 sdvo_priv->controlled_output = SDVO_OUTPUT_RGB0; 1846 sdvo_priv->controlled_output = SDVO_OUTPUT_RGB0;
1759 connector->display_info.subpixel_order = SubPixelHorizontalRGB;
1760 encoder_type = DRM_MODE_ENCODER_DAC; 1847 encoder_type = DRM_MODE_ENCODER_DAC;
1761 connector_type = DRM_MODE_CONNECTOR_VGA; 1848 connector_type = DRM_MODE_CONNECTOR_VGA;
1762 } 1849 }
1763 else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB1) 1850 else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB1)
1764 { 1851 {
1765 sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1; 1852 sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1;
1766 connector->display_info.subpixel_order = SubPixelHorizontalRGB;
1767 encoder_type = DRM_MODE_ENCODER_DAC; 1853 encoder_type = DRM_MODE_ENCODER_DAC;
1768 connector_type = DRM_MODE_CONNECTOR_VGA; 1854 connector_type = DRM_MODE_CONNECTOR_VGA;
1769 } 1855 }
1770 else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS0) 1856 else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS0)
1771 { 1857 {
1772 sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0; 1858 sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0;
1773 connector->display_info.subpixel_order = SubPixelHorizontalRGB;
1774 encoder_type = DRM_MODE_ENCODER_LVDS; 1859 encoder_type = DRM_MODE_ENCODER_LVDS;
1775 connector_type = DRM_MODE_CONNECTOR_LVDS; 1860 connector_type = DRM_MODE_CONNECTOR_LVDS;
1861 sdvo_priv->is_lvds = true;
1776 } 1862 }
1777 else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS1) 1863 else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS1)
1778 { 1864 {
1779 sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS1; 1865 sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS1;
1780 connector->display_info.subpixel_order = SubPixelHorizontalRGB;
1781 encoder_type = DRM_MODE_ENCODER_LVDS; 1866 encoder_type = DRM_MODE_ENCODER_LVDS;
1782 connector_type = DRM_MODE_CONNECTOR_LVDS; 1867 connector_type = DRM_MODE_CONNECTOR_LVDS;
1868 sdvo_priv->is_lvds = true;
1783 } 1869 }
1784 else 1870 else
1785 { 1871 {
@@ -1795,9 +1881,16 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
1795 goto err_i2c; 1881 goto err_i2c;
1796 } 1882 }
1797 1883
1884 connector = &intel_output->base;
1885 drm_connector_init(dev, connector, &intel_sdvo_connector_funcs,
1886 connector_type);
1887 drm_connector_helper_add(connector, &intel_sdvo_connector_helper_funcs);
1888 connector->interlace_allowed = 0;
1889 connector->doublescan_allowed = 0;
1890 connector->display_info.subpixel_order = SubPixelHorizontalRGB;
1891
1798 drm_encoder_init(dev, &intel_output->enc, &intel_sdvo_enc_funcs, encoder_type); 1892 drm_encoder_init(dev, &intel_output->enc, &intel_sdvo_enc_funcs, encoder_type);
1799 drm_encoder_helper_add(&intel_output->enc, &intel_sdvo_helper_funcs); 1893 drm_encoder_helper_add(&intel_output->enc, &intel_sdvo_helper_funcs);
1800 connector->connector_type = connector_type;
1801 1894
1802 drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc); 1895 drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc);
1803 drm_sysfs_connector_add(connector); 1896 drm_sysfs_connector_add(connector);
@@ -1829,14 +1922,13 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
1829 sdvo_priv->caps.output_flags & 1922 sdvo_priv->caps.output_flags &
1830 (SDVO_OUTPUT_TMDS1 | SDVO_OUTPUT_RGB1) ? 'Y' : 'N'); 1923 (SDVO_OUTPUT_TMDS1 | SDVO_OUTPUT_RGB1) ? 'Y' : 'N');
1831 1924
1832 intel_output->ddc_bus = i2cbus;
1833
1834 return true; 1925 return true;
1835 1926
1836err_i2c: 1927err_i2c:
1928 if (ddcbus != NULL)
1929 intel_i2c_destroy(intel_output->ddc_bus);
1837 intel_i2c_destroy(intel_output->i2c_bus); 1930 intel_i2c_destroy(intel_output->i2c_bus);
1838err_connector: 1931err_inteloutput:
1839 drm_connector_cleanup(connector);
1840 kfree(intel_output); 1932 kfree(intel_output);
1841 1933
1842 return false; 1934 return false;
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index d2c32983242d..98ac0546b7bd 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1626,6 +1626,7 @@ static const struct drm_encoder_helper_funcs intel_tv_helper_funcs = {
1626}; 1626};
1627 1627
1628static const struct drm_connector_funcs intel_tv_connector_funcs = { 1628static const struct drm_connector_funcs intel_tv_connector_funcs = {
1629 .dpms = drm_helper_connector_dpms,
1629 .save = intel_tv_save, 1630 .save = intel_tv_save,
1630 .restore = intel_tv_restore, 1631 .restore = intel_tv_restore,
1631 .detect = intel_tv_detect, 1632 .detect = intel_tv_detect,
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 77a7a4d84650..aff90bb96488 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -2185,9 +2185,9 @@ void radeon_commit_ring(drm_radeon_private_t *dev_priv)
2185 2185
2186 /* check if the ring is padded out to 16-dword alignment */ 2186 /* check if the ring is padded out to 16-dword alignment */
2187 2187
2188 tail_aligned = dev_priv->ring.tail & 0xf; 2188 tail_aligned = dev_priv->ring.tail & (RADEON_RING_ALIGN-1);
2189 if (tail_aligned) { 2189 if (tail_aligned) {
2190 int num_p2 = 16 - tail_aligned; 2190 int num_p2 = RADEON_RING_ALIGN - tail_aligned;
2191 2191
2192 ring = dev_priv->ring.start; 2192 ring = dev_priv->ring.start;
2193 /* pad with some CP_PACKET2 */ 2193 /* pad with some CP_PACKET2 */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 8071d965f142..0c6bfc1de153 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -1964,11 +1964,14 @@ do { \
1964 1964
1965#define RING_LOCALS int write, _nr, _align_nr; unsigned int mask; u32 *ring; 1965#define RING_LOCALS int write, _nr, _align_nr; unsigned int mask; u32 *ring;
1966 1966
1967#define RADEON_RING_ALIGN 16
1968
1967#define BEGIN_RING( n ) do { \ 1969#define BEGIN_RING( n ) do { \
1968 if ( RADEON_VERBOSE ) { \ 1970 if ( RADEON_VERBOSE ) { \
1969 DRM_INFO( "BEGIN_RING( %d )\n", (n)); \ 1971 DRM_INFO( "BEGIN_RING( %d )\n", (n)); \
1970 } \ 1972 } \
1971 _align_nr = (n + 0xf) & ~0xf; \ 1973 _align_nr = RADEON_RING_ALIGN - ((dev_priv->ring.tail + n) & (RADEON_RING_ALIGN-1)); \
1974 _align_nr += n; \
1972 if (dev_priv->ring.space <= (_align_nr * sizeof(u32))) { \ 1975 if (dev_priv->ring.space <= (_align_nr * sizeof(u32))) { \
1973 COMMIT_RING(); \ 1976 COMMIT_RING(); \
1974 radeon_wait_ring( dev_priv, _align_nr * sizeof(u32)); \ 1977 radeon_wait_ring( dev_priv, _align_nr * sizeof(u32)); \
diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c
index b5e3b2851698..a1787fdf5b9f 100644
--- a/drivers/hwmon/lm78.c
+++ b/drivers/hwmon/lm78.c
@@ -182,7 +182,7 @@ static struct platform_driver lm78_isa_driver = {
182 .name = "lm78", 182 .name = "lm78",
183 }, 183 },
184 .probe = lm78_isa_probe, 184 .probe = lm78_isa_probe,
185 .remove = lm78_isa_remove, 185 .remove = __devexit_p(lm78_isa_remove),
186}; 186};
187 187
188 188
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 35dc38d3b2c5..6415a2e2ba87 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -696,7 +696,7 @@ void ide_timer_expiry (unsigned long data)
696 } 696 }
697 spin_lock_irq(&hwif->lock); 697 spin_lock_irq(&hwif->lock);
698 enable_irq(hwif->irq); 698 enable_irq(hwif->irq);
699 if (startstop == ide_stopped) { 699 if (startstop == ide_stopped && hwif->polling == 0) {
700 ide_unlock_port(hwif); 700 ide_unlock_port(hwif);
701 plug_device = 1; 701 plug_device = 1;
702 } 702 }
@@ -868,7 +868,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
868 * same irq as is currently being serviced here, and Linux 868 * same irq as is currently being serviced here, and Linux
869 * won't allow another of the same (on any CPU) until we return. 869 * won't allow another of the same (on any CPU) until we return.
870 */ 870 */
871 if (startstop == ide_stopped) { 871 if (startstop == ide_stopped && hwif->polling == 0) {
872 BUG_ON(hwif->handler); 872 BUG_ON(hwif->handler);
873 ide_unlock_port(hwif); 873 ide_unlock_port(hwif);
874 plug_device = 1; 874 plug_device = 1;
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index c19a221b1e18..06fe002116ec 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -206,8 +206,6 @@ EXPORT_SYMBOL_GPL(ide_in_drive_list);
206 206
207/* 207/*
208 * Early UDMA66 devices don't set bit14 to 1, only bit13 is valid. 208 * Early UDMA66 devices don't set bit14 to 1, only bit13 is valid.
209 * We list them here and depend on the device side cable detection for them.
210 *
211 * Some optical devices with the buggy firmwares have the same problem. 209 * Some optical devices with the buggy firmwares have the same problem.
212 */ 210 */
213static const struct drive_list_entry ivb_list[] = { 211static const struct drive_list_entry ivb_list[] = {
@@ -251,10 +249,25 @@ u8 eighty_ninty_three(ide_drive_t *drive)
251 * - force bit13 (80c cable present) check also for !ivb devices 249 * - force bit13 (80c cable present) check also for !ivb devices
252 * (unless the slave device is pre-ATA3) 250 * (unless the slave device is pre-ATA3)
253 */ 251 */
254 if ((id[ATA_ID_HW_CONFIG] & 0x4000) || 252 if (id[ATA_ID_HW_CONFIG] & 0x4000)
255 (ivb && (id[ATA_ID_HW_CONFIG] & 0x2000)))
256 return 1; 253 return 1;
257 254
255 if (ivb) {
256 const char *model = (char *)&id[ATA_ID_PROD];
257
258 if (strstr(model, "TSSTcorp CDDVDW SH-S202")) {
259 /*
260 * These ATAPI devices always report 80c cable
261 * so we have to depend on the host in this case.
262 */
263 if (hwif->cbl == ATA_CBL_PATA80)
264 return 1;
265 } else {
266 /* Depend on the device side cable detection. */
267 if (id[ATA_ID_HW_CONFIG] & 0x2000)
268 return 1;
269 }
270 }
258no_80w: 271no_80w:
259 if (drive->dev_flags & IDE_DFLAG_UDMA33_WARNED) 272 if (drive->dev_flags & IDE_DFLAG_UDMA33_WARNED)
260 return 0; 273 return 0;
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 56ff8c46c7d1..2148df836ce7 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -31,24 +31,6 @@ void ide_toggle_bounce(ide_drive_t *drive, int on)
31 blk_queue_bounce_limit(drive->queue, addr); 31 blk_queue_bounce_limit(drive->queue, addr);
32} 32}
33 33
34static void ide_dump_opcode(ide_drive_t *drive)
35{
36 struct request *rq = drive->hwif->rq;
37 struct ide_cmd *cmd = NULL;
38
39 if (!rq)
40 return;
41
42 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
43 cmd = rq->special;
44
45 printk(KERN_ERR "ide: failed opcode was: ");
46 if (cmd == NULL)
47 printk(KERN_CONT "unknown\n");
48 else
49 printk(KERN_CONT "0x%02x\n", cmd->tf.command);
50}
51
52u64 ide_get_lba_addr(struct ide_cmd *cmd, int lba48) 34u64 ide_get_lba_addr(struct ide_cmd *cmd, int lba48)
53{ 35{
54 struct ide_taskfile *tf = &cmd->tf; 36 struct ide_taskfile *tf = &cmd->tf;
@@ -91,7 +73,7 @@ static void ide_dump_sector(ide_drive_t *drive)
91 73
92static void ide_dump_ata_error(ide_drive_t *drive, u8 err) 74static void ide_dump_ata_error(ide_drive_t *drive, u8 err)
93{ 75{
94 printk(KERN_ERR "{ "); 76 printk(KERN_CONT "{ ");
95 if (err & ATA_ABORTED) 77 if (err & ATA_ABORTED)
96 printk(KERN_CONT "DriveStatusError "); 78 printk(KERN_CONT "DriveStatusError ");
97 if (err & ATA_ICRC) 79 if (err & ATA_ICRC)
@@ -121,7 +103,7 @@ static void ide_dump_ata_error(ide_drive_t *drive, u8 err)
121 103
122static void ide_dump_atapi_error(ide_drive_t *drive, u8 err) 104static void ide_dump_atapi_error(ide_drive_t *drive, u8 err)
123{ 105{
124 printk(KERN_ERR "{ "); 106 printk(KERN_CONT "{ ");
125 if (err & ATAPI_ILI) 107 if (err & ATAPI_ILI)
126 printk(KERN_CONT "IllegalLengthIndication "); 108 printk(KERN_CONT "IllegalLengthIndication ");
127 if (err & ATAPI_EOM) 109 if (err & ATAPI_EOM)
@@ -179,7 +161,10 @@ u8 ide_dump_status(ide_drive_t *drive, const char *msg, u8 stat)
179 else 161 else
180 ide_dump_atapi_error(drive, err); 162 ide_dump_atapi_error(drive, err);
181 } 163 }
182 ide_dump_opcode(drive); 164
165 printk(KERN_ERR "%s: possibly failed opcode: 0x%02x\n",
166 drive->name, drive->hwif->cmd.tf.command);
167
183 return err; 168 return err;
184} 169}
185EXPORT_SYMBOL(ide_dump_status); 170EXPORT_SYMBOL(ide_dump_status);
diff --git a/drivers/ide/ide-pci-generic.c b/drivers/ide/ide-pci-generic.c
index 61111fd27130..39d4e01f5c9c 100644
--- a/drivers/ide/ide-pci-generic.c
+++ b/drivers/ide/ide-pci-generic.c
@@ -33,6 +33,16 @@ static int ide_generic_all; /* Set to claim all devices */
33module_param_named(all_generic_ide, ide_generic_all, bool, 0444); 33module_param_named(all_generic_ide, ide_generic_all, bool, 0444);
34MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers."); 34MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers.");
35 35
36static void netcell_quirkproc(ide_drive_t *drive)
37{
38 /* mark words 85-87 as valid */
39 drive->id[ATA_ID_CSF_DEFAULT] |= 0x4000;
40}
41
42static const struct ide_port_ops netcell_port_ops = {
43 .quirkproc = netcell_quirkproc,
44};
45
36#define DECLARE_GENERIC_PCI_DEV(extra_flags) \ 46#define DECLARE_GENERIC_PCI_DEV(extra_flags) \
37 { \ 47 { \
38 .name = DRV_NAME, \ 48 .name = DRV_NAME, \
@@ -74,6 +84,7 @@ static const struct ide_port_info generic_chipsets[] __devinitdata = {
74 84
75 { /* 6: Revolution */ 85 { /* 6: Revolution */
76 .name = DRV_NAME, 86 .name = DRV_NAME,
87 .port_ops = &netcell_port_ops,
77 .host_flags = IDE_HFLAG_CLEAR_SIMPLEX | 88 .host_flags = IDE_HFLAG_CLEAR_SIMPLEX |
78 IDE_HFLAG_TRUST_BIOS_FOR_DMA | 89 IDE_HFLAG_TRUST_BIOS_FOR_DMA |
79 IDE_HFLAG_OFF_BOARD, 90 IDE_HFLAG_OFF_BOARD,
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 7f264ed1141b..c895ed52b2e8 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -295,7 +295,7 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id)
295 295
296 timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; 296 timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
297 297
298 if (ide_busy_sleep(hwif, timeout, use_altstatus)) 298 if (ide_busy_sleep(drive, timeout, use_altstatus))
299 return 1; 299 return 1;
300 300
301 /* wait for IRQ and ATA_DRQ */ 301 /* wait for IRQ and ATA_DRQ */
@@ -316,8 +316,9 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id)
316 return rc; 316 return rc;
317} 317}
318 318
319int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus) 319int ide_busy_sleep(ide_drive_t *drive, unsigned long timeout, int altstatus)
320{ 320{
321 ide_hwif_t *hwif = drive->hwif;
321 u8 stat; 322 u8 stat;
322 323
323 timeout += jiffies; 324 timeout += jiffies;
@@ -330,6 +331,8 @@ int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus)
330 return 0; 331 return 0;
331 } while (time_before(jiffies, timeout)); 332 } while (time_before(jiffies, timeout));
332 333
334 printk(KERN_ERR "%s: timeout in %s\n", drive->name, __func__);
335
333 return 1; /* drive timed-out */ 336 return 1; /* drive timed-out */
334} 337}
335 338
@@ -420,7 +423,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
420 tp_ops->dev_select(drive); 423 tp_ops->dev_select(drive);
421 msleep(50); 424 msleep(50);
422 tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); 425 tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
423 (void)ide_busy_sleep(hwif, WAIT_WORSTCASE, 0); 426 (void)ide_busy_sleep(drive, WAIT_WORSTCASE, 0);
424 rc = ide_dev_read_id(drive, cmd, id); 427 rc = ide_dev_read_id(drive, cmd, id);
425 } 428 }
426 429
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 248a54bd2386..b3bc96f930a6 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) 1998-2002 Andre Hedrick <andre@linux-ide.org> 2 * Copyright (C) 1998-2002 Andre Hedrick <andre@linux-ide.org>
3 * Copyright (C) 2006-2007 MontaVista Software, Inc. 3 * Copyright (C) 2006-2007, 2009 MontaVista Software, Inc.
4 * Copyright (C) 2007 Bartlomiej Zolnierkiewicz 4 * Copyright (C) 2007 Bartlomiej Zolnierkiewicz
5 * 5 *
6 * Portions Copyright (C) 1999 Promise Technology, Inc. 6 * Portions Copyright (C) 1999 Promise Technology, Inc.
@@ -227,28 +227,19 @@ somebody_else:
227 return (dma_stat & 4) == 4; /* return 1 if INTR asserted */ 227 return (dma_stat & 4) == 4; /* return 1 if INTR asserted */
228} 228}
229 229
230static void pdc202xx_reset_host (ide_hwif_t *hwif) 230static void pdc202xx_reset(ide_drive_t *drive)
231{ 231{
232 ide_hwif_t *hwif = drive->hwif;
232 unsigned long high_16 = hwif->extra_base - 16; 233 unsigned long high_16 = hwif->extra_base - 16;
233 u8 udma_speed_flag = inb(high_16 | 0x001f); 234 u8 udma_speed_flag = inb(high_16 | 0x001f);
234 235
236 printk(KERN_WARNING "PDC202xx: software reset...\n");
237
235 outb(udma_speed_flag | 0x10, high_16 | 0x001f); 238 outb(udma_speed_flag | 0x10, high_16 | 0x001f);
236 mdelay(100); 239 mdelay(100);
237 outb(udma_speed_flag & ~0x10, high_16 | 0x001f); 240 outb(udma_speed_flag & ~0x10, high_16 | 0x001f);
238 mdelay(2000); /* 2 seconds ?! */ 241 mdelay(2000); /* 2 seconds ?! */
239 242
240 printk(KERN_WARNING "PDC202XX: %s channel reset.\n",
241 hwif->channel ? "Secondary" : "Primary");
242}
243
244static void pdc202xx_reset (ide_drive_t *drive)
245{
246 ide_hwif_t *hwif = drive->hwif;
247 ide_hwif_t *mate = hwif->mate;
248
249 pdc202xx_reset_host(hwif);
250 pdc202xx_reset_host(mate);
251
252 ide_set_max_pio(drive); 243 ide_set_max_pio(drive);
253} 244}
254 245
@@ -328,9 +319,8 @@ static const struct ide_dma_ops pdc20246_dma_ops = {
328 .dma_start = ide_dma_start, 319 .dma_start = ide_dma_start,
329 .dma_end = ide_dma_end, 320 .dma_end = ide_dma_end,
330 .dma_test_irq = pdc202xx_dma_test_irq, 321 .dma_test_irq = pdc202xx_dma_test_irq,
331 .dma_lost_irq = pdc202xx_dma_lost_irq, 322 .dma_lost_irq = ide_dma_lost_irq,
332 .dma_timer_expiry = ide_dma_sff_timer_expiry, 323 .dma_timer_expiry = ide_dma_sff_timer_expiry,
333 .dma_clear = pdc202xx_reset,
334 .dma_sff_read_status = ide_dma_sff_read_status, 324 .dma_sff_read_status = ide_dma_sff_read_status,
335}; 325};
336 326
diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
index 3ff7231e4858..028de26a25fe 100644
--- a/drivers/ide/via82cxxx.c
+++ b/drivers/ide/via82cxxx.c
@@ -67,6 +67,7 @@ static struct via_isa_bridge {
67 u8 udma_mask; 67 u8 udma_mask;
68 u8 flags; 68 u8 flags;
69} via_isa_bridges[] = { 69} via_isa_bridges[] = {
70 { "vx855", PCI_DEVICE_ID_VIA_VX855, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
70 { "vx800", PCI_DEVICE_ID_VIA_VX800, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, 71 { "vx800", PCI_DEVICE_ID_VIA_VX800, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
71 { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, 72 { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
72 { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, 73 { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
@@ -474,6 +475,7 @@ static const struct pci_device_id via_pci_tbl[] = {
474 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C576_1), 0 }, 475 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C576_1), 0 },
475 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C586_1), 0 }, 476 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C586_1), 0 },
476 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_CX700_IDE), 0 }, 477 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_CX700_IDE), 0 },
478 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_VX855_IDE), 0 },
477 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_6410), 1 }, 479 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_6410), 1 },
478 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_SATA_EIDE), 1 }, 480 { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_SATA_EIDE), 1 },
479 { 0, }, 481 { 0, },
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
index bf740394d704..949c97ff57e3 100644
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -41,6 +41,10 @@ static int debug;
41module_param_named(debug, debug, uint, 0644); 41module_param_named(debug, debug, uint, 0644);
42MODULE_PARM_DESC(debug, "Enable debug printks in this driver"); 42MODULE_PARM_DESC(debug, "Enable debug printks in this driver");
43 43
44static int forceload;
45module_param_named(forceload, forceload, uint, 0644);
46MODULE_PARM_DESC(debug, "Enable driver testing on unvalidated i5000");
47
44#define dprintk(fmt, arg...) \ 48#define dprintk(fmt, arg...) \
45 do { if (debug) printk(KERN_INFO I7300_PRINT fmt, ##arg); } while (0) 49 do { if (debug) printk(KERN_INFO I7300_PRINT fmt, ##arg); } while (0)
46 50
@@ -552,7 +556,7 @@ static int __init i7300_idle_init(void)
552 cpus_clear(idle_cpumask); 556 cpus_clear(idle_cpumask);
553 total_us = 0; 557 total_us = 0;
554 558
555 if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev)) 559 if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
556 return -ENODEV; 560 return -ENODEV;
557 561
558 if (i7300_idle_thrt_save()) 562 if (i7300_idle_thrt_save())
diff --git a/drivers/input/input.c b/drivers/input/input.c
index e54e002665b0..5d445f48789b 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -42,6 +42,7 @@ static unsigned int input_abs_bypass_init_data[] __initdata = {
42 ABS_MT_POSITION_Y, 42 ABS_MT_POSITION_Y,
43 ABS_MT_TOOL_TYPE, 43 ABS_MT_TOOL_TYPE,
44 ABS_MT_BLOB_ID, 44 ABS_MT_BLOB_ID,
45 ABS_MT_TRACKING_ID,
45 0 46 0
46}; 47};
47static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)]; 48static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)];
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 67248c31e19a..be5bbbb8ae4e 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -210,7 +210,7 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
210 timeout = wait_event_timeout(ps2dev->wait, 210 timeout = wait_event_timeout(ps2dev->wait,
211 !(ps2dev->flags & PS2_FLAG_CMD1), timeout); 211 !(ps2dev->flags & PS2_FLAG_CMD1), timeout);
212 212
213 if (ps2dev->cmdcnt && timeout > 0) { 213 if (ps2dev->cmdcnt && !(ps2dev->flags & PS2_FLAG_CMD1)) {
214 214
215 timeout = ps2_adjust_timeout(ps2dev, command, timeout); 215 timeout = ps2_adjust_timeout(ps2dev, command, timeout);
216 wait_event_timeout(ps2dev->wait, 216 wait_event_timeout(ps2dev->wait,
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index f100c7f4c1db..6954f5500108 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -419,7 +419,7 @@ static int ucb1400_ts_remove(struct platform_device *dev)
419#ifdef CONFIG_PM 419#ifdef CONFIG_PM
420static int ucb1400_ts_resume(struct platform_device *dev) 420static int ucb1400_ts_resume(struct platform_device *dev)
421{ 421{
422 struct ucb1400_ts *ucb = platform_get_drvdata(dev); 422 struct ucb1400_ts *ucb = dev->dev.platform_data;
423 423
424 if (ucb->ts_task) { 424 if (ucb->ts_task) {
425 /* 425 /*
diff --git a/drivers/isdn/gigaset/isocdata.c b/drivers/isdn/gigaset/isocdata.c
index b171e75cb52e..29808c4fb1cb 100644
--- a/drivers/isdn/gigaset/isocdata.c
+++ b/drivers/isdn/gigaset/isocdata.c
@@ -175,7 +175,7 @@ int gigaset_isowbuf_getbytes(struct isowbuf_t *iwb, int size)
175 return -EINVAL; 175 return -EINVAL;
176 } 176 }
177 src = iwb->read; 177 src = iwb->read;
178 if (unlikely(limit > BAS_OUTBUFSIZE + BAS_OUTBUFPAD || 178 if (unlikely(limit >= BAS_OUTBUFSIZE + BAS_OUTBUFPAD ||
179 (read < src && limit >= src))) { 179 (read < src && limit >= src))) {
180 pr_err("isoc write buffer frame reservation violated\n"); 180 pr_err("isoc write buffer frame reservation violated\n");
181 return -EFAULT; 181 return -EFAULT;
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 1a83910f674f..eaf722fe309a 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -358,6 +358,16 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
358 if (emulate_insn(cpu)) 358 if (emulate_insn(cpu))
359 return; 359 return;
360 } 360 }
361 /* If KVM is active, the vmcall instruction triggers a
362 * General Protection Fault. Normally it triggers an
363 * invalid opcode fault (6): */
364 case 6:
365 /* We need to check if ring == GUEST_PL and
366 * faulting instruction == vmcall. */
367 if (is_hypercall(cpu)) {
368 rewrite_hypercall(cpu);
369 return;
370 }
361 break; 371 break;
362 case 14: /* We've intercepted a Page Fault. */ 372 case 14: /* We've intercepted a Page Fault. */
363 /* The Guest accessed a virtual address that wasn't mapped. 373 /* The Guest accessed a virtual address that wasn't mapped.
@@ -403,15 +413,6 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
403 * up the pointer now to indicate a hypercall is pending. */ 413 * up the pointer now to indicate a hypercall is pending. */
404 cpu->hcall = (struct hcall_args *)cpu->regs; 414 cpu->hcall = (struct hcall_args *)cpu->regs;
405 return; 415 return;
406 case 6:
407 /* kvm hypercalls trigger an invalid opcode fault (6).
408 * We need to check if ring == GUEST_PL and
409 * faulting instruction == vmcall. */
410 if (is_hypercall(cpu)) {
411 rewrite_hypercall(cpu);
412 return;
413 }
414 break;
415 } 416 }
416 417
417 /* We didn't handle the trap, so it needs to go to the Guest. */ 418 /* We didn't handle the trap, so it needs to go to the Guest. */
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 47c68bc75a17..56df1cee8fb3 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1097,14 +1097,12 @@ void bitmap_daemon_work(struct bitmap *bitmap)
1097 } 1097 }
1098 bitmap->allclean = 1; 1098 bitmap->allclean = 1;
1099 1099
1100 spin_lock_irqsave(&bitmap->lock, flags);
1100 for (j = 0; j < bitmap->chunks; j++) { 1101 for (j = 0; j < bitmap->chunks; j++) {
1101 bitmap_counter_t *bmc; 1102 bitmap_counter_t *bmc;
1102 spin_lock_irqsave(&bitmap->lock, flags); 1103 if (!bitmap->filemap)
1103 if (!bitmap->filemap) {
1104 /* error or shutdown */ 1104 /* error or shutdown */
1105 spin_unlock_irqrestore(&bitmap->lock, flags);
1106 break; 1105 break;
1107 }
1108 1106
1109 page = filemap_get_page(bitmap, j); 1107 page = filemap_get_page(bitmap, j);
1110 1108
@@ -1121,6 +1119,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
1121 write_page(bitmap, page, 0); 1119 write_page(bitmap, page, 0);
1122 bitmap->allclean = 0; 1120 bitmap->allclean = 0;
1123 } 1121 }
1122 spin_lock_irqsave(&bitmap->lock, flags);
1123 j |= (PAGE_BITS - 1);
1124 continue; 1124 continue;
1125 } 1125 }
1126 1126
@@ -1181,9 +1181,10 @@ void bitmap_daemon_work(struct bitmap *bitmap)
1181 ext2_clear_bit(file_page_offset(j), paddr); 1181 ext2_clear_bit(file_page_offset(j), paddr);
1182 kunmap_atomic(paddr, KM_USER0); 1182 kunmap_atomic(paddr, KM_USER0);
1183 } 1183 }
1184 } 1184 } else
1185 spin_unlock_irqrestore(&bitmap->lock, flags); 1185 j |= PAGE_COUNTER_MASK;
1186 } 1186 }
1187 spin_unlock_irqrestore(&bitmap->lock, flags);
1187 1188
1188 /* now sync the final page */ 1189 /* now sync the final page */
1189 if (lastpage != NULL) { 1190 if (lastpage != NULL) {
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 424f7b048c30..3fd8b1e65483 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -20,7 +20,8 @@
20#include <linux/idr.h> 20#include <linux/idr.h>
21#include <linux/hdreg.h> 21#include <linux/hdreg.h>
22#include <linux/blktrace_api.h> 22#include <linux/blktrace_api.h>
23#include <trace/block.h> 23
24#include <trace/events/block.h>
24 25
25#define DM_MSG_PREFIX "core" 26#define DM_MSG_PREFIX "core"
26 27
@@ -53,8 +54,6 @@ struct dm_target_io {
53 union map_info info; 54 union map_info info;
54}; 55};
55 56
56DEFINE_TRACE(block_bio_complete);
57
58/* 57/*
59 * For request-based dm. 58 * For request-based dm.
60 * One of these is allocated per request. 59 * One of these is allocated per request.
@@ -656,8 +655,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
656 /* the bio has been remapped so dispatch it */ 655 /* the bio has been remapped so dispatch it */
657 656
658 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, 657 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
659 tio->io->bio->bi_bdev->bd_dev, 658 tio->io->bio->bi_bdev->bd_dev, sector);
660 clone->bi_sector, sector);
661 659
662 generic_make_request(clone); 660 generic_make_request(clone);
663 } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 661 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fccc8343a250..641b211fe3fe 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1375,6 +1375,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1375 1375
1376 sb->raid_disks = cpu_to_le32(mddev->raid_disks); 1376 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1377 sb->size = cpu_to_le64(mddev->dev_sectors); 1377 sb->size = cpu_to_le64(mddev->dev_sectors);
1378 sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9);
1379 sb->level = cpu_to_le32(mddev->level);
1380 sb->layout = cpu_to_le32(mddev->layout);
1378 1381
1379 if (mddev->bitmap && mddev->bitmap_file == NULL) { 1382 if (mddev->bitmap && mddev->bitmap_file == NULL) {
1380 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); 1383 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
@@ -3303,7 +3306,9 @@ static ssize_t
3303action_show(mddev_t *mddev, char *page) 3306action_show(mddev_t *mddev, char *page)
3304{ 3307{
3305 char *type = "idle"; 3308 char *type = "idle";
3306 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 3309 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3310 type = "frozen";
3311 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3307 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) { 3312 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
3308 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 3313 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
3309 type = "reshape"; 3314 type = "reshape";
@@ -3326,7 +3331,12 @@ action_store(mddev_t *mddev, const char *page, size_t len)
3326 if (!mddev->pers || !mddev->pers->sync_request) 3331 if (!mddev->pers || !mddev->pers->sync_request)
3327 return -EINVAL; 3332 return -EINVAL;
3328 3333
3329 if (cmd_match(page, "idle")) { 3334 if (cmd_match(page, "frozen"))
3335 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3336 else
3337 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3338
3339 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
3330 if (mddev->sync_thread) { 3340 if (mddev->sync_thread) {
3331 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 3341 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3332 md_unregister_thread(mddev->sync_thread); 3342 md_unregister_thread(mddev->sync_thread);
@@ -3680,7 +3690,7 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
3680 if (strict_blocks_to_sectors(buf, &sectors) < 0) 3690 if (strict_blocks_to_sectors(buf, &sectors) < 0)
3681 return -EINVAL; 3691 return -EINVAL;
3682 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors) 3692 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
3683 return -EINVAL; 3693 return -E2BIG;
3684 3694
3685 mddev->external_size = 1; 3695 mddev->external_size = 1;
3686 } 3696 }
@@ -5557,7 +5567,7 @@ static struct block_device_operations md_fops =
5557 .owner = THIS_MODULE, 5567 .owner = THIS_MODULE,
5558 .open = md_open, 5568 .open = md_open,
5559 .release = md_release, 5569 .release = md_release,
5560 .locked_ioctl = md_ioctl, 5570 .ioctl = md_ioctl,
5561 .getgeo = md_getgeo, 5571 .getgeo = md_getgeo,
5562 .media_changed = md_media_changed, 5572 .media_changed = md_media_changed,
5563 .revalidate_disk= md_revalidate, 5573 .revalidate_disk= md_revalidate,
@@ -6352,12 +6362,13 @@ void md_do_sync(mddev_t *mddev)
6352 6362
6353 skipped = 0; 6363 skipped = 0;
6354 6364
6355 if ((mddev->curr_resync > mddev->curr_resync_completed && 6365 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
6356 (mddev->curr_resync - mddev->curr_resync_completed) 6366 ((mddev->curr_resync > mddev->curr_resync_completed &&
6357 > (max_sectors >> 4)) || 6367 (mddev->curr_resync - mddev->curr_resync_completed)
6358 (j - mddev->curr_resync_completed)*2 6368 > (max_sectors >> 4)) ||
6359 >= mddev->resync_max - mddev->curr_resync_completed 6369 (j - mddev->curr_resync_completed)*2
6360 ) { 6370 >= mddev->resync_max - mddev->curr_resync_completed
6371 )) {
6361 /* time to update curr_resync_completed */ 6372 /* time to update curr_resync_completed */
6362 blk_unplug(mddev->queue); 6373 blk_unplug(mddev->queue);
6363 wait_event(mddev->recovery_wait, 6374 wait_event(mddev->recovery_wait,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4616bc3a6e71..bb37fb1b2d82 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -362,7 +362,7 @@ static void raid5_unplug_device(struct request_queue *q);
362 362
363static struct stripe_head * 363static struct stripe_head *
364get_active_stripe(raid5_conf_t *conf, sector_t sector, 364get_active_stripe(raid5_conf_t *conf, sector_t sector,
365 int previous, int noblock) 365 int previous, int noblock, int noquiesce)
366{ 366{
367 struct stripe_head *sh; 367 struct stripe_head *sh;
368 368
@@ -372,7 +372,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
372 372
373 do { 373 do {
374 wait_event_lock_irq(conf->wait_for_stripe, 374 wait_event_lock_irq(conf->wait_for_stripe,
375 conf->quiesce == 0, 375 conf->quiesce == 0 || noquiesce,
376 conf->device_lock, /* nothing */); 376 conf->device_lock, /* nothing */);
377 sh = __find_stripe(conf, sector, conf->generation - previous); 377 sh = __find_stripe(conf, sector, conf->generation - previous);
378 if (!sh) { 378 if (!sh) {
@@ -2671,7 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2671 sector_t bn = compute_blocknr(sh, i, 1); 2671 sector_t bn = compute_blocknr(sh, i, 1);
2672 sector_t s = raid5_compute_sector(conf, bn, 0, 2672 sector_t s = raid5_compute_sector(conf, bn, 0,
2673 &dd_idx, NULL); 2673 &dd_idx, NULL);
2674 sh2 = get_active_stripe(conf, s, 0, 1); 2674 sh2 = get_active_stripe(conf, s, 0, 1, 1);
2675 if (sh2 == NULL) 2675 if (sh2 == NULL)
2676 /* so far only the early blocks of this stripe 2676 /* so far only the early blocks of this stripe
2677 * have been requested. When later blocks 2677 * have been requested. When later blocks
@@ -2944,7 +2944,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2944 /* Finish reconstruct operations initiated by the expansion process */ 2944 /* Finish reconstruct operations initiated by the expansion process */
2945 if (sh->reconstruct_state == reconstruct_state_result) { 2945 if (sh->reconstruct_state == reconstruct_state_result) {
2946 struct stripe_head *sh2 2946 struct stripe_head *sh2
2947 = get_active_stripe(conf, sh->sector, 1, 1); 2947 = get_active_stripe(conf, sh->sector, 1, 1, 1);
2948 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { 2948 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
2949 /* sh cannot be written until sh2 has been read. 2949 /* sh cannot be written until sh2 has been read.
2950 * so arrange for sh to be delayed a little 2950 * so arrange for sh to be delayed a little
@@ -3189,7 +3189,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3189 3189
3190 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3190 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
3191 struct stripe_head *sh2 3191 struct stripe_head *sh2
3192 = get_active_stripe(conf, sh->sector, 1, 1); 3192 = get_active_stripe(conf, sh->sector, 1, 1, 1);
3193 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { 3193 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
3194 /* sh cannot be written until sh2 has been read. 3194 /* sh cannot be written until sh2 has been read.
3195 * so arrange for sh to be delayed a little 3195 * so arrange for sh to be delayed a little
@@ -3288,7 +3288,7 @@ static void unplug_slaves(mddev_t *mddev)
3288 int i; 3288 int i;
3289 3289
3290 rcu_read_lock(); 3290 rcu_read_lock();
3291 for (i=0; i<mddev->raid_disks; i++) { 3291 for (i = 0; i < conf->raid_disks; i++) {
3292 mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); 3292 mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
3293 if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { 3293 if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
3294 struct request_queue *r_queue = bdev_get_queue(rdev->bdev); 3294 struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
@@ -3675,7 +3675,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3675 (unsigned long long)logical_sector); 3675 (unsigned long long)logical_sector);
3676 3676
3677 sh = get_active_stripe(conf, new_sector, previous, 3677 sh = get_active_stripe(conf, new_sector, previous,
3678 (bi->bi_rw&RWA_MASK)); 3678 (bi->bi_rw&RWA_MASK), 0);
3679 if (sh) { 3679 if (sh) {
3680 if (unlikely(previous)) { 3680 if (unlikely(previous)) {
3681 /* expansion might have moved on while waiting for a 3681 /* expansion might have moved on while waiting for a
@@ -3811,13 +3811,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3811 safepos = conf->reshape_safe; 3811 safepos = conf->reshape_safe;
3812 sector_div(safepos, data_disks); 3812 sector_div(safepos, data_disks);
3813 if (mddev->delta_disks < 0) { 3813 if (mddev->delta_disks < 0) {
3814 writepos -= reshape_sectors; 3814 writepos -= min_t(sector_t, reshape_sectors, writepos);
3815 readpos += reshape_sectors; 3815 readpos += reshape_sectors;
3816 safepos += reshape_sectors; 3816 safepos += reshape_sectors;
3817 } else { 3817 } else {
3818 writepos += reshape_sectors; 3818 writepos += reshape_sectors;
3819 readpos -= reshape_sectors; 3819 readpos -= min_t(sector_t, reshape_sectors, readpos);
3820 safepos -= reshape_sectors; 3820 safepos -= min_t(sector_t, reshape_sectors, safepos);
3821 } 3821 }
3822 3822
3823 /* 'writepos' is the most advanced device address we might write. 3823 /* 'writepos' is the most advanced device address we might write.
@@ -3873,7 +3873,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3873 for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { 3873 for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
3874 int j; 3874 int j;
3875 int skipped = 0; 3875 int skipped = 0;
3876 sh = get_active_stripe(conf, stripe_addr+i, 0, 0); 3876 sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
3877 set_bit(STRIPE_EXPANDING, &sh->state); 3877 set_bit(STRIPE_EXPANDING, &sh->state);
3878 atomic_inc(&conf->reshape_stripes); 3878 atomic_inc(&conf->reshape_stripes);
3879 /* If any of this stripe is beyond the end of the old 3879 /* If any of this stripe is beyond the end of the old
@@ -3916,13 +3916,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3916 raid5_compute_sector(conf, stripe_addr*(new_data_disks), 3916 raid5_compute_sector(conf, stripe_addr*(new_data_disks),
3917 1, &dd_idx, NULL); 3917 1, &dd_idx, NULL);
3918 last_sector = 3918 last_sector =
3919 raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512) 3919 raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
3920 *(new_data_disks) - 1), 3920 *(new_data_disks) - 1),
3921 1, &dd_idx, NULL); 3921 1, &dd_idx, NULL);
3922 if (last_sector >= mddev->dev_sectors) 3922 if (last_sector >= mddev->dev_sectors)
3923 last_sector = mddev->dev_sectors - 1; 3923 last_sector = mddev->dev_sectors - 1;
3924 while (first_sector <= last_sector) { 3924 while (first_sector <= last_sector) {
3925 sh = get_active_stripe(conf, first_sector, 1, 0); 3925 sh = get_active_stripe(conf, first_sector, 1, 0, 1);
3926 set_bit(STRIPE_EXPAND_SOURCE, &sh->state); 3926 set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
3927 set_bit(STRIPE_HANDLE, &sh->state); 3927 set_bit(STRIPE_HANDLE, &sh->state);
3928 release_stripe(sh); 3928 release_stripe(sh);
@@ -4022,9 +4022,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
4022 4022
4023 bitmap_cond_end_sync(mddev->bitmap, sector_nr); 4023 bitmap_cond_end_sync(mddev->bitmap, sector_nr);
4024 4024
4025 sh = get_active_stripe(conf, sector_nr, 0, 1); 4025 sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
4026 if (sh == NULL) { 4026 if (sh == NULL) {
4027 sh = get_active_stripe(conf, sector_nr, 0, 0); 4027 sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
4028 /* make sure we don't swamp the stripe cache if someone else 4028 /* make sure we don't swamp the stripe cache if someone else
4029 * is trying to get access 4029 * is trying to get access
4030 */ 4030 */
@@ -4034,7 +4034,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
4034 * We don't need to check the 'failed' flag as when that gets set, 4034 * We don't need to check the 'failed' flag as when that gets set,
4035 * recovery aborts. 4035 * recovery aborts.
4036 */ 4036 */
4037 for (i=0; i<mddev->raid_disks; i++) 4037 for (i = 0; i < conf->raid_disks; i++)
4038 if (conf->disks[i].rdev == NULL) 4038 if (conf->disks[i].rdev == NULL)
4039 still_degraded = 1; 4039 still_degraded = 1;
4040 4040
@@ -4086,7 +4086,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4086 /* already done this stripe */ 4086 /* already done this stripe */
4087 continue; 4087 continue;
4088 4088
4089 sh = get_active_stripe(conf, sector, 0, 1); 4089 sh = get_active_stripe(conf, sector, 0, 1, 0);
4090 4090
4091 if (!sh) { 4091 if (!sh) {
4092 /* failed to get a stripe - must wait */ 4092 /* failed to get a stripe - must wait */
diff --git a/drivers/media/video/ivtv/ivtv-queue.c b/drivers/media/video/ivtv/ivtv-queue.c
index ff7b7deded4f..7fde36e6d227 100644
--- a/drivers/media/video/ivtv/ivtv-queue.c
+++ b/drivers/media/video/ivtv/ivtv-queue.c
@@ -230,7 +230,8 @@ int ivtv_stream_alloc(struct ivtv_stream *s)
230 return -ENOMEM; 230 return -ENOMEM;
231 } 231 }
232 if (ivtv_might_use_dma(s)) { 232 if (ivtv_might_use_dma(s)) {
233 s->sg_handle = pci_map_single(itv->pdev, s->sg_dma, sizeof(struct ivtv_sg_element), s->dma); 233 s->sg_handle = pci_map_single(itv->pdev, s->sg_dma,
234 sizeof(struct ivtv_sg_element), PCI_DMA_TODEVICE);
234 ivtv_stream_sync_for_cpu(s); 235 ivtv_stream_sync_for_cpu(s);
235 } 236 }
236 237
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index 3cf61ece71d7..348443bdb23b 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -119,7 +119,7 @@ enclosure_register(struct device *dev, const char *name, int components,
119 edev->edev.class = &enclosure_class; 119 edev->edev.class = &enclosure_class;
120 edev->edev.parent = get_device(dev); 120 edev->edev.parent = get_device(dev);
121 edev->cb = cb; 121 edev->cb = cb;
122 dev_set_name(&edev->edev, name); 122 dev_set_name(&edev->edev, "%s", name);
123 err = device_register(&edev->edev); 123 err = device_register(&edev->edev);
124 if (err) 124 if (err)
125 goto err; 125 goto err;
@@ -255,8 +255,8 @@ enclosure_component_register(struct enclosure_device *edev,
255 ecomp->number = number; 255 ecomp->number = number;
256 cdev = &ecomp->cdev; 256 cdev = &ecomp->cdev;
257 cdev->parent = get_device(&edev->edev); 257 cdev->parent = get_device(&edev->edev);
258 if (name) 258 if (name && name[0])
259 dev_set_name(cdev, name); 259 dev_set_name(cdev, "%s", name);
260 else 260 else
261 dev_set_name(cdev, "%u", number); 261 dev_set_name(cdev, "%u", number);
262 262
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index c643d0fe118f..b56d72ff06e9 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -64,6 +64,31 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data)
64 unsigned int tmout; 64 unsigned int tmout;
65 int tmout_index; 65 int tmout_index;
66 66
67 /*
68 * Hardware weirdness. The FIFO_EMPTY bit of the HW_STATE
69 * register is sometimes not set before a while when some
70 * "unusual" data block sizes are used (such as with the SWITCH
71 * command), even despite the fact that the XFER_DONE interrupt
72 * was raised. And if another data transfer starts before
73 * this bit comes to good sense (which eventually happens by
74 * itself) then the new transfer simply fails with a timeout.
75 */
76 if (!(mvsd_read(MVSD_HW_STATE) & (1 << 13))) {
77 unsigned long t = jiffies + HZ;
78 unsigned int hw_state, count = 0;
79 do {
80 if (time_after(jiffies, t)) {
81 dev_warn(host->dev, "FIFO_EMPTY bit missing\n");
82 break;
83 }
84 hw_state = mvsd_read(MVSD_HW_STATE);
85 count++;
86 } while (!(hw_state & (1 << 13)));
87 dev_dbg(host->dev, "*** wait for FIFO_EMPTY bit "
88 "(hw=0x%04x, count=%d, jiffies=%ld)\n",
89 hw_state, count, jiffies - (t - HZ));
90 }
91
67 /* If timeout=0 then maximum timeout index is used. */ 92 /* If timeout=0 then maximum timeout index is used. */
68 tmout = DIV_ROUND_UP(data->timeout_ns, host->ns_per_clk); 93 tmout = DIV_ROUND_UP(data->timeout_ns, host->ns_per_clk);
69 tmout += data->timeout_clks; 94 tmout += data->timeout_clks;
@@ -620,9 +645,18 @@ static void mvsd_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
620 if (ios->bus_width == MMC_BUS_WIDTH_4) 645 if (ios->bus_width == MMC_BUS_WIDTH_4)
621 ctrl_reg |= MVSD_HOST_CTRL_DATA_WIDTH_4_BITS; 646 ctrl_reg |= MVSD_HOST_CTRL_DATA_WIDTH_4_BITS;
622 647
648 /*
649 * The HI_SPEED_EN bit is causing trouble with many (but not all)
650 * high speed SD, SDHC and SDIO cards. Not enabling that bit
651 * makes all cards work. So let's just ignore that bit for now
652 * and revisit this issue if problems for not enabling this bit
653 * are ever reported.
654 */
655#if 0
623 if (ios->timing == MMC_TIMING_MMC_HS || 656 if (ios->timing == MMC_TIMING_MMC_HS ||
624 ios->timing == MMC_TIMING_SD_HS) 657 ios->timing == MMC_TIMING_SD_HS)
625 ctrl_reg |= MVSD_HOST_CTRL_HI_SPEED_EN; 658 ctrl_reg |= MVSD_HOST_CTRL_HI_SPEED_EN;
659#endif
626 660
627 host->ctrl = ctrl_reg; 661 host->ctrl = ctrl_reg;
628 mvsd_write(MVSD_HOST_CTRL, ctrl_reg); 662 mvsd_write(MVSD_HOST_CTRL, ctrl_reg);
@@ -882,3 +916,4 @@ module_param(nodma, int, 0);
882MODULE_AUTHOR("Maen Suleiman, Nicolas Pitre"); 916MODULE_AUTHOR("Maen Suleiman, Nicolas Pitre");
883MODULE_DESCRIPTION("Marvell MMC,SD,SDIO Host Controller driver"); 917MODULE_DESCRIPTION("Marvell MMC,SD,SDIO Host Controller driver");
884MODULE_LICENSE("GPL"); 918MODULE_LICENSE("GPL");
919MODULE_ALIAS("platform:mvsdio");
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index b4a615c55f28..f4cbe473670e 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -140,6 +140,8 @@ struct mxcmci_host {
140 struct work_struct datawork; 140 struct work_struct datawork;
141}; 141};
142 142
143static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios);
144
143static inline int mxcmci_use_dma(struct mxcmci_host *host) 145static inline int mxcmci_use_dma(struct mxcmci_host *host)
144{ 146{
145 return host->do_dma; 147 return host->do_dma;
@@ -160,7 +162,7 @@ static void mxcmci_softreset(struct mxcmci_host *host)
160 writew(0xff, host->base + MMC_REG_RES_TO); 162 writew(0xff, host->base + MMC_REG_RES_TO);
161} 163}
162 164
163static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data) 165static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
164{ 166{
165 unsigned int nob = data->blocks; 167 unsigned int nob = data->blocks;
166 unsigned int blksz = data->blksz; 168 unsigned int blksz = data->blksz;
@@ -168,6 +170,7 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
168#ifdef HAS_DMA 170#ifdef HAS_DMA
169 struct scatterlist *sg; 171 struct scatterlist *sg;
170 int i; 172 int i;
173 int ret;
171#endif 174#endif
172 if (data->flags & MMC_DATA_STREAM) 175 if (data->flags & MMC_DATA_STREAM)
173 nob = 0xffff; 176 nob = 0xffff;
@@ -183,7 +186,7 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
183 for_each_sg(data->sg, sg, data->sg_len, i) { 186 for_each_sg(data->sg, sg, data->sg_len, i) {
184 if (sg->offset & 3 || sg->length & 3) { 187 if (sg->offset & 3 || sg->length & 3) {
185 host->do_dma = 0; 188 host->do_dma = 0;
186 return; 189 return 0;
187 } 190 }
188 } 191 }
189 192
@@ -192,23 +195,30 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
192 host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg, 195 host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg,
193 data->sg_len, host->dma_dir); 196 data->sg_len, host->dma_dir);
194 197
195 imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, datasize, 198 ret = imx_dma_setup_sg(host->dma, data->sg, host->dma_nents,
196 host->res->start + MMC_REG_BUFFER_ACCESS, 199 datasize,
197 DMA_MODE_READ); 200 host->res->start + MMC_REG_BUFFER_ACCESS,
201 DMA_MODE_READ);
198 } else { 202 } else {
199 host->dma_dir = DMA_TO_DEVICE; 203 host->dma_dir = DMA_TO_DEVICE;
200 host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg, 204 host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg,
201 data->sg_len, host->dma_dir); 205 data->sg_len, host->dma_dir);
202 206
203 imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, datasize, 207 ret = imx_dma_setup_sg(host->dma, data->sg, host->dma_nents,
204 host->res->start + MMC_REG_BUFFER_ACCESS, 208 datasize,
205 DMA_MODE_WRITE); 209 host->res->start + MMC_REG_BUFFER_ACCESS,
210 DMA_MODE_WRITE);
206 } 211 }
207 212
213 if (ret) {
214 dev_err(mmc_dev(host->mmc), "failed to setup DMA : %d\n", ret);
215 return ret;
216 }
208 wmb(); 217 wmb();
209 218
210 imx_dma_enable(host->dma); 219 imx_dma_enable(host->dma);
211#endif /* HAS_DMA */ 220#endif /* HAS_DMA */
221 return 0;
212} 222}
213 223
214static int mxcmci_start_cmd(struct mxcmci_host *host, struct mmc_command *cmd, 224static int mxcmci_start_cmd(struct mxcmci_host *host, struct mmc_command *cmd,
@@ -345,8 +355,11 @@ static int mxcmci_poll_status(struct mxcmci_host *host, u32 mask)
345 stat = readl(host->base + MMC_REG_STATUS); 355 stat = readl(host->base + MMC_REG_STATUS);
346 if (stat & STATUS_ERR_MASK) 356 if (stat & STATUS_ERR_MASK)
347 return stat; 357 return stat;
348 if (time_after(jiffies, timeout)) 358 if (time_after(jiffies, timeout)) {
359 mxcmci_softreset(host);
360 mxcmci_set_clk_rate(host, host->clock);
349 return STATUS_TIME_OUT_READ; 361 return STATUS_TIME_OUT_READ;
362 }
350 if (stat & mask) 363 if (stat & mask)
351 return 0; 364 return 0;
352 cpu_relax(); 365 cpu_relax();
@@ -531,6 +544,7 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req)
531{ 544{
532 struct mxcmci_host *host = mmc_priv(mmc); 545 struct mxcmci_host *host = mmc_priv(mmc);
533 unsigned int cmdat = host->cmdat; 546 unsigned int cmdat = host->cmdat;
547 int error;
534 548
535 WARN_ON(host->req != NULL); 549 WARN_ON(host->req != NULL);
536 550
@@ -540,7 +554,12 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req)
540 host->do_dma = 1; 554 host->do_dma = 1;
541#endif 555#endif
542 if (req->data) { 556 if (req->data) {
543 mxcmci_setup_data(host, req->data); 557 error = mxcmci_setup_data(host, req->data);
558 if (error) {
559 req->cmd->error = error;
560 goto out;
561 }
562
544 563
545 cmdat |= CMD_DAT_CONT_DATA_ENABLE; 564 cmdat |= CMD_DAT_CONT_DATA_ENABLE;
546 565
@@ -548,7 +567,9 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req)
548 cmdat |= CMD_DAT_CONT_WRITE; 567 cmdat |= CMD_DAT_CONT_WRITE;
549 } 568 }
550 569
551 if (mxcmci_start_cmd(host, req->cmd, cmdat)) 570 error = mxcmci_start_cmd(host, req->cmd, cmdat);
571out:
572 if (error)
552 mxcmci_finish_request(host, req); 573 mxcmci_finish_request(host, req);
553} 574}
554 575
@@ -724,7 +745,9 @@ static int mxcmci_probe(struct platform_device *pdev)
724 goto out_clk_put; 745 goto out_clk_put;
725 } 746 }
726 747
727 mmc->f_min = clk_get_rate(host->clk) >> 7; 748 mmc->f_min = clk_get_rate(host->clk) >> 16;
749 if (mmc->f_min < 400000)
750 mmc->f_min = 400000;
728 mmc->f_max = clk_get_rate(host->clk) >> 1; 751 mmc->f_max = clk_get_rate(host->clk) >> 1;
729 752
730 /* recommended in data sheet */ 753 /* recommended in data sheet */
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index bfa25c01c872..dceb5ee3bda0 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -822,7 +822,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
822 del_timer(&host->cmd_abort_timer); 822 del_timer(&host->cmd_abort_timer);
823 host->abort = 1; 823 host->abort = 1;
824 OMAP_MMC_WRITE(host, IE, 0); 824 OMAP_MMC_WRITE(host, IE, 0);
825 disable_irq(host->irq); 825 disable_irq_nosync(host->irq);
826 schedule_work(&host->cmd_abort_work); 826 schedule_work(&host->cmd_abort_work);
827 return IRQ_HANDLED; 827 return IRQ_HANDLED;
828 } 828 }
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index e62a22a7f00c..c40cb96255a2 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -680,7 +680,7 @@ static void mmc_omap_dma_cb(int lch, u16 ch_status, void *data)
680 host->dma_ch = -1; 680 host->dma_ch = -1;
681 /* 681 /*
682 * DMA Callback: run in interrupt context. 682 * DMA Callback: run in interrupt context.
683 * mutex_unlock will through a kernel warning if used. 683 * mutex_unlock will throw a kernel warning if used.
684 */ 684 */
685 up(&host->sem); 685 up(&host->sem);
686} 686}
diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c
index 3ff4ac3abe8b..128c614d11aa 100644
--- a/drivers/mmc/host/sdhci-of.c
+++ b/drivers/mmc/host/sdhci-of.c
@@ -55,7 +55,13 @@ static u32 esdhc_readl(struct sdhci_host *host, int reg)
55 55
56static u16 esdhc_readw(struct sdhci_host *host, int reg) 56static u16 esdhc_readw(struct sdhci_host *host, int reg)
57{ 57{
58 return in_be16(host->ioaddr + (reg ^ 0x2)); 58 u16 ret;
59
60 if (unlikely(reg == SDHCI_HOST_VERSION))
61 ret = in_be16(host->ioaddr + reg);
62 else
63 ret = in_be16(host->ioaddr + (reg ^ 0x2));
64 return ret;
59} 65}
60 66
61static u8 esdhc_readb(struct sdhci_host *host, int reg) 67static u8 esdhc_readb(struct sdhci_host *host, int reg)
@@ -277,6 +283,7 @@ static int __devexit sdhci_of_remove(struct of_device *ofdev)
277static const struct of_device_id sdhci_of_match[] = { 283static const struct of_device_id sdhci_of_match[] = {
278 { .compatible = "fsl,mpc8379-esdhc", .data = &sdhci_esdhc, }, 284 { .compatible = "fsl,mpc8379-esdhc", .data = &sdhci_esdhc, },
279 { .compatible = "fsl,mpc8536-esdhc", .data = &sdhci_esdhc, }, 285 { .compatible = "fsl,mpc8536-esdhc", .data = &sdhci_esdhc, },
286 { .compatible = "fsl,esdhc", .data = &sdhci_esdhc, },
280 { .compatible = "generic-sdhci", }, 287 { .compatible = "generic-sdhci", },
281 {}, 288 {},
282}; 289};
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index 0119220de7d0..02700f769b8a 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -407,16 +407,17 @@ static int __init nand_davinci_probe(struct platform_device *pdev)
407 } 407 }
408 info->chip.ecc.mode = ecc_mode; 408 info->chip.ecc.mode = ecc_mode;
409 409
410 info->clk = clk_get(&pdev->dev, "AEMIFCLK"); 410 info->clk = clk_get(&pdev->dev, "aemif");
411 if (IS_ERR(info->clk)) { 411 if (IS_ERR(info->clk)) {
412 ret = PTR_ERR(info->clk); 412 ret = PTR_ERR(info->clk);
413 dev_dbg(&pdev->dev, "unable to get AEMIFCLK, err %d\n", ret); 413 dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret);
414 goto err_clk; 414 goto err_clk;
415 } 415 }
416 416
417 ret = clk_enable(info->clk); 417 ret = clk_enable(info->clk);
418 if (ret < 0) { 418 if (ret < 0) {
419 dev_dbg(&pdev->dev, "unable to enable AEMIFCLK, err %d\n", ret); 419 dev_dbg(&pdev->dev, "unable to enable AEMIF clock, err %d\n",
420 ret);
420 goto err_clk_enable; 421 goto err_clk_enable;
421 } 422 }
422 423
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index f3548d048014..40c26080ecda 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -831,6 +831,7 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
831 break; 831 break;
832 832
833 case NAND_CMD_READID: 833 case NAND_CMD_READID:
834 host->col_addr = 0;
834 send_read_id(host); 835 send_read_id(host);
835 break; 836 break;
836 837
@@ -867,6 +868,7 @@ static int __init mxcnd_probe(struct platform_device *pdev)
867 mtd->priv = this; 868 mtd->priv = this;
868 mtd->owner = THIS_MODULE; 869 mtd->owner = THIS_MODULE;
869 mtd->dev.parent = &pdev->dev; 870 mtd->dev.parent = &pdev->dev;
871 mtd->name = "mxc_nand";
870 872
871 /* 50 us command delay time */ 873 /* 50 us command delay time */
872 this->chip_delay = 5; 874 this->chip_delay = 5;
@@ -882,8 +884,10 @@ static int __init mxcnd_probe(struct platform_device *pdev)
882 this->verify_buf = mxc_nand_verify_buf; 884 this->verify_buf = mxc_nand_verify_buf;
883 885
884 host->clk = clk_get(&pdev->dev, "nfc"); 886 host->clk = clk_get(&pdev->dev, "nfc");
885 if (IS_ERR(host->clk)) 887 if (IS_ERR(host->clk)) {
888 err = PTR_ERR(host->clk);
886 goto eclk; 889 goto eclk;
890 }
887 891
888 clk_enable(host->clk); 892 clk_enable(host->clk);
889 host->clk_act = 1; 893 host->clk_act = 1;
@@ -896,7 +900,7 @@ static int __init mxcnd_probe(struct platform_device *pdev)
896 900
897 host->regs = ioremap(res->start, res->end - res->start + 1); 901 host->regs = ioremap(res->start, res->end - res->start + 1);
898 if (!host->regs) { 902 if (!host->regs) {
899 err = -EIO; 903 err = -ENOMEM;
900 goto eres; 904 goto eres;
901 } 905 }
902 906
@@ -1011,30 +1015,35 @@ static int __devexit mxcnd_remove(struct platform_device *pdev)
1011#ifdef CONFIG_PM 1015#ifdef CONFIG_PM
1012static int mxcnd_suspend(struct platform_device *pdev, pm_message_t state) 1016static int mxcnd_suspend(struct platform_device *pdev, pm_message_t state)
1013{ 1017{
1014 struct mtd_info *info = platform_get_drvdata(pdev); 1018 struct mtd_info *mtd = platform_get_drvdata(pdev);
1019 struct nand_chip *nand_chip = mtd->priv;
1020 struct mxc_nand_host *host = nand_chip->priv;
1015 int ret = 0; 1021 int ret = 0;
1016 1022
1017 DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND suspend\n"); 1023 DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND suspend\n");
1018 if (info) 1024 if (mtd) {
1019 ret = info->suspend(info); 1025 ret = mtd->suspend(mtd);
1020 1026 /* Disable the NFC clock */
1021 /* Disable the NFC clock */ 1027 clk_disable(host->clk);
1022 clk_disable(nfc_clk); /* FIXME */ 1028 }
1023 1029
1024 return ret; 1030 return ret;
1025} 1031}
1026 1032
1027static int mxcnd_resume(struct platform_device *pdev) 1033static int mxcnd_resume(struct platform_device *pdev)
1028{ 1034{
1029 struct mtd_info *info = platform_get_drvdata(pdev); 1035 struct mtd_info *mtd = platform_get_drvdata(pdev);
1036 struct nand_chip *nand_chip = mtd->priv;
1037 struct mxc_nand_host *host = nand_chip->priv;
1030 int ret = 0; 1038 int ret = 0;
1031 1039
1032 DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND resume\n"); 1040 DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND resume\n");
1033 /* Enable the NFC clock */
1034 clk_enable(nfc_clk); /* FIXME */
1035 1041
1036 if (info) 1042 if (mtd) {
1037 info->resume(info); 1043 /* Enable the NFC clock */
1044 clk_enable(host->clk);
1045 mtd->resume(mtd);
1046 }
1038 1047
1039 return ret; 1048 return ret;
1040} 1049}
@@ -1055,13 +1064,7 @@ static struct platform_driver mxcnd_driver = {
1055 1064
1056static int __init mxc_nd_init(void) 1065static int __init mxc_nd_init(void)
1057{ 1066{
1058 /* Register the device driver structure. */ 1067 return platform_driver_probe(&mxcnd_driver, mxcnd_probe);
1059 pr_info("MXC MTD nand Driver\n");
1060 if (platform_driver_probe(&mxcnd_driver, mxcnd_probe) != 0) {
1061 printk(KERN_ERR "Driver register failed for mxcnd_driver\n");
1062 return -ENODEV;
1063 }
1064 return 0;
1065} 1068}
1066 1069
1067static void __exit mxc_nd_cleanup(void) 1070static void __exit mxc_nd_cleanup(void)
diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c
index fbb371921991..682aad897081 100644
--- a/drivers/net/3c509.c
+++ b/drivers/net/3c509.c
@@ -480,9 +480,13 @@ static int pnp_registered;
480 480
481#ifdef CONFIG_EISA 481#ifdef CONFIG_EISA
482static struct eisa_device_id el3_eisa_ids[] = { 482static struct eisa_device_id el3_eisa_ids[] = {
483 { "TCM5090" },
484 { "TCM5091" },
483 { "TCM5092" }, 485 { "TCM5092" },
484 { "TCM5093" }, 486 { "TCM5093" },
487 { "TCM5094" },
485 { "TCM5095" }, 488 { "TCM5095" },
489 { "TCM5098" },
486 { "" } 490 { "" }
487}; 491};
488MODULE_DEVICE_TABLE(eisa, el3_eisa_ids); 492MODULE_DEVICE_TABLE(eisa, el3_eisa_ids);
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 1fc4602a6ff2..a1c25cb4669f 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -102,7 +102,7 @@ obj-$(CONFIG_HAMACHI) += hamachi.o
102obj-$(CONFIG_NET) += Space.o loopback.o 102obj-$(CONFIG_NET) += Space.o loopback.o
103obj-$(CONFIG_SEEQ8005) += seeq8005.o 103obj-$(CONFIG_SEEQ8005) += seeq8005.o
104obj-$(CONFIG_NET_SB1000) += sb1000.o 104obj-$(CONFIG_NET_SB1000) += sb1000.o
105obj-$(CONFIG_MAC8390) += mac8390.o 8390.o 105obj-$(CONFIG_MAC8390) += mac8390.o
106obj-$(CONFIG_APNE) += apne.o 8390.o 106obj-$(CONFIG_APNE) += apne.o 8390.o
107obj-$(CONFIG_PCMCIA_PCNET) += 8390.o 107obj-$(CONFIG_PCMCIA_PCNET) += 8390.o
108obj-$(CONFIG_HP100) += hp100.o 108obj-$(CONFIG_HP100) += hp100.o
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c
index fb57b750866b..1342418fb209 100644
--- a/drivers/net/atl1e/atl1e_main.c
+++ b/drivers/net/atl1e/atl1e_main.c
@@ -37,6 +37,7 @@ char atl1e_driver_version[] = DRV_VERSION;
37 */ 37 */
38static struct pci_device_id atl1e_pci_tbl[] = { 38static struct pci_device_id atl1e_pci_tbl[] = {
39 {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATTANSIC_L1E)}, 39 {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATTANSIC_L1E)},
40 {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, 0x1066)},
40 /* required last entry */ 41 /* required last entry */
41 { 0 } 42 { 0 }
42}; 43};
diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 0ab22540bf59..4e817126e280 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -82,6 +82,12 @@
82 82
83#include "atl1.h" 83#include "atl1.h"
84 84
85#define ATLX_DRIVER_VERSION "2.1.3"
86MODULE_AUTHOR("Xiong Huang <xiong.huang@atheros.com>, \
87 Chris Snook <csnook@redhat.com>, Jay Cliburn <jcliburn@gmail.com>");
88MODULE_LICENSE("GPL");
89MODULE_VERSION(ATLX_DRIVER_VERSION);
90
85/* Temporary hack for merging atl1 and atl2 */ 91/* Temporary hack for merging atl1 and atl2 */
86#include "atlx.c" 92#include "atlx.c"
87 93
diff --git a/drivers/net/atlx/atlx.h b/drivers/net/atlx/atlx.h
index 297a03da6b7f..14054b75aa62 100644
--- a/drivers/net/atlx/atlx.h
+++ b/drivers/net/atlx/atlx.h
@@ -29,12 +29,6 @@
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/types.h> 30#include <linux/types.h>
31 31
32#define ATLX_DRIVER_VERSION "2.1.3"
33MODULE_AUTHOR("Xiong Huang <xiong.huang@atheros.com>, \
34 Chris Snook <csnook@redhat.com>, Jay Cliburn <jcliburn@gmail.com>");
35MODULE_LICENSE("GPL");
36MODULE_VERSION(ATLX_DRIVER_VERSION);
37
38#define ATLX_ERR_PHY 2 32#define ATLX_ERR_PHY 2
39#define ATLX_ERR_PHY_SPEED 7 33#define ATLX_ERR_PHY_SPEED 7
40#define ATLX_ERR_PHY_RES 8 34#define ATLX_ERR_PHY_RES 8
diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index 9f971ed6b58d..b4da18213324 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -979,22 +979,7 @@ static int bfin_mac_open(struct net_device *dev)
979 return 0; 979 return 0;
980} 980}
981 981
982static const struct net_device_ops bfin_mac_netdev_ops = {
983 .ndo_open = bfin_mac_open,
984 .ndo_stop = bfin_mac_close,
985 .ndo_start_xmit = bfin_mac_hard_start_xmit,
986 .ndo_set_mac_address = bfin_mac_set_mac_address,
987 .ndo_tx_timeout = bfin_mac_timeout,
988 .ndo_set_multicast_list = bfin_mac_set_multicast_list,
989 .ndo_validate_addr = eth_validate_addr,
990 .ndo_change_mtu = eth_change_mtu,
991#ifdef CONFIG_NET_POLL_CONTROLLER
992 .ndo_poll_controller = bfin_mac_poll,
993#endif
994};
995
996/* 982/*
997 *
998 * this makes the board clean up everything that it can 983 * this makes the board clean up everything that it can
999 * and not talk to the outside world. Caused by 984 * and not talk to the outside world. Caused by
1000 * an 'ifconfig ethX down' 985 * an 'ifconfig ethX down'
@@ -1019,6 +1004,20 @@ static int bfin_mac_close(struct net_device *dev)
1019 return 0; 1004 return 0;
1020} 1005}
1021 1006
1007static const struct net_device_ops bfin_mac_netdev_ops = {
1008 .ndo_open = bfin_mac_open,
1009 .ndo_stop = bfin_mac_close,
1010 .ndo_start_xmit = bfin_mac_hard_start_xmit,
1011 .ndo_set_mac_address = bfin_mac_set_mac_address,
1012 .ndo_tx_timeout = bfin_mac_timeout,
1013 .ndo_set_multicast_list = bfin_mac_set_multicast_list,
1014 .ndo_validate_addr = eth_validate_addr,
1015 .ndo_change_mtu = eth_change_mtu,
1016#ifdef CONFIG_NET_POLL_CONTROLLER
1017 .ndo_poll_controller = bfin_mac_poll,
1018#endif
1019};
1020
1022static int __devinit bfin_mac_probe(struct platform_device *pdev) 1021static int __devinit bfin_mac_probe(struct platform_device *pdev)
1023{ 1022{
1024 struct net_device *ndev; 1023 struct net_device *ndev;
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 714df2b675e6..c888e97c9671 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -85,8 +85,8 @@ struct fl_pg_chunk {
85 struct page *page; 85 struct page *page;
86 void *va; 86 void *va;
87 unsigned int offset; 87 unsigned int offset;
88 u64 *p_cnt; 88 unsigned long *p_cnt;
89 DECLARE_PCI_UNMAP_ADDR(mapping); 89 dma_addr_t mapping;
90}; 90};
91 91
92struct rx_desc; 92struct rx_desc;
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 7ea48414c6cb..17858b9a5830 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -2496,14 +2496,16 @@ static void check_link_status(struct adapter *adapter)
2496 for_each_port(adapter, i) { 2496 for_each_port(adapter, i) {
2497 struct net_device *dev = adapter->port[i]; 2497 struct net_device *dev = adapter->port[i];
2498 struct port_info *p = netdev_priv(dev); 2498 struct port_info *p = netdev_priv(dev);
2499 int link_fault;
2499 2500
2500 spin_lock_irq(&adapter->work_lock); 2501 spin_lock_irq(&adapter->work_lock);
2501 if (p->link_fault) { 2502 link_fault = p->link_fault;
2503 spin_unlock_irq(&adapter->work_lock);
2504
2505 if (link_fault) {
2502 t3_link_fault(adapter, i); 2506 t3_link_fault(adapter, i);
2503 spin_unlock_irq(&adapter->work_lock);
2504 continue; 2507 continue;
2505 } 2508 }
2506 spin_unlock_irq(&adapter->work_lock);
2507 2509
2508 if (!(p->phy.caps & SUPPORTED_IRQ) && netif_running(dev)) { 2510 if (!(p->phy.caps & SUPPORTED_IRQ) && netif_running(dev)) {
2509 t3_xgm_intr_disable(adapter, i); 2511 t3_xgm_intr_disable(adapter, i);
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 26d3587f3399..b3ee2bc1a005 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -355,7 +355,7 @@ static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
355 (*d->pg_chunk.p_cnt)--; 355 (*d->pg_chunk.p_cnt)--;
356 if (!*d->pg_chunk.p_cnt) 356 if (!*d->pg_chunk.p_cnt)
357 pci_unmap_page(pdev, 357 pci_unmap_page(pdev,
358 pci_unmap_addr(&d->pg_chunk, mapping), 358 d->pg_chunk.mapping,
359 q->alloc_size, PCI_DMA_FROMDEVICE); 359 q->alloc_size, PCI_DMA_FROMDEVICE);
360 360
361 put_page(d->pg_chunk.page); 361 put_page(d->pg_chunk.page);
@@ -454,7 +454,7 @@ static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
454 q->pg_chunk.offset = 0; 454 q->pg_chunk.offset = 0;
455 mapping = pci_map_page(adapter->pdev, q->pg_chunk.page, 455 mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
456 0, q->alloc_size, PCI_DMA_FROMDEVICE); 456 0, q->alloc_size, PCI_DMA_FROMDEVICE);
457 pci_unmap_addr_set(&q->pg_chunk, mapping, mapping); 457 q->pg_chunk.mapping = mapping;
458 } 458 }
459 sd->pg_chunk = q->pg_chunk; 459 sd->pg_chunk = q->pg_chunk;
460 460
@@ -511,8 +511,7 @@ static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
511nomem: q->alloc_failed++; 511nomem: q->alloc_failed++;
512 break; 512 break;
513 } 513 }
514 mapping = pci_unmap_addr(&sd->pg_chunk, mapping) + 514 mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset;
515 sd->pg_chunk.offset;
516 pci_unmap_addr_set(sd, dma_addr, mapping); 515 pci_unmap_addr_set(sd, dma_addr, mapping);
517 516
518 add_one_rx_chunk(mapping, d, q->gen); 517 add_one_rx_chunk(mapping, d, q->gen);
@@ -881,7 +880,7 @@ recycle:
881 (*sd->pg_chunk.p_cnt)--; 880 (*sd->pg_chunk.p_cnt)--;
882 if (!*sd->pg_chunk.p_cnt) 881 if (!*sd->pg_chunk.p_cnt)
883 pci_unmap_page(adap->pdev, 882 pci_unmap_page(adap->pdev,
884 pci_unmap_addr(&sd->pg_chunk, mapping), 883 sd->pg_chunk.mapping,
885 fl->alloc_size, 884 fl->alloc_size,
886 PCI_DMA_FROMDEVICE); 885 PCI_DMA_FROMDEVICE);
887 if (!skb) { 886 if (!skb) {
@@ -2096,7 +2095,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2096 (*sd->pg_chunk.p_cnt)--; 2095 (*sd->pg_chunk.p_cnt)--;
2097 if (!*sd->pg_chunk.p_cnt) 2096 if (!*sd->pg_chunk.p_cnt)
2098 pci_unmap_page(adap->pdev, 2097 pci_unmap_page(adap->pdev,
2099 pci_unmap_addr(&sd->pg_chunk, mapping), 2098 sd->pg_chunk.mapping,
2100 fl->alloc_size, 2099 fl->alloc_size,
2101 PCI_DMA_FROMDEVICE); 2100 PCI_DMA_FROMDEVICE);
2102 2101
diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c
index 4f68aeb2679a..4950d5d789ae 100644
--- a/drivers/net/cxgb3/t3_hw.c
+++ b/drivers/net/cxgb3/t3_hw.c
@@ -1274,6 +1274,11 @@ void t3_link_fault(struct adapter *adapter, int port_id)
1274 A_XGM_INT_STATUS + mac->offset); 1274 A_XGM_INT_STATUS + mac->offset);
1275 link_fault &= F_LINKFAULTCHANGE; 1275 link_fault &= F_LINKFAULTCHANGE;
1276 1276
1277 link_ok = lc->link_ok;
1278 speed = lc->speed;
1279 duplex = lc->duplex;
1280 fc = lc->fc;
1281
1277 phy->ops->get_link_status(phy, &link_ok, &speed, &duplex, &fc); 1282 phy->ops->get_link_status(phy, &link_ok, &speed, &duplex, &fc);
1278 1283
1279 if (link_fault) { 1284 if (link_fault) {
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index b1419e21b46b..fffb006b7d95 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -4027,8 +4027,9 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
4027 PCI_DMA_FROMDEVICE); 4027 PCI_DMA_FROMDEVICE);
4028 4028
4029 length = le16_to_cpu(rx_desc->length); 4029 length = le16_to_cpu(rx_desc->length);
4030 4030 /* !EOP means multiple descriptors were used to store a single
4031 if (unlikely(!(status & E1000_RXD_STAT_EOP))) { 4031 * packet, also make sure the frame isn't just CRC only */
4032 if (unlikely(!(status & E1000_RXD_STAT_EOP) || (length <= 4))) {
4032 /* All receives must fit into a single buffer */ 4033 /* All receives must fit into a single buffer */
4033 E1000_DBG("%s: Receive packet consumed multiple" 4034 E1000_DBG("%s: Receive packet consumed multiple"
4034 " buffers\n", netdev->name); 4035 " buffers\n", netdev->name);
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index f9a846b1b92f..9f6a68fb7b45 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -897,6 +897,12 @@ enum {
897}; 897};
898static int phy_cross = NV_CROSSOVER_DETECTION_DISABLED; 898static int phy_cross = NV_CROSSOVER_DETECTION_DISABLED;
899 899
900/*
901 * Power down phy when interface is down (persists through reboot;
902 * older Linux and other OSes may not power it up again)
903 */
904static int phy_power_down = 0;
905
900static inline struct fe_priv *get_nvpriv(struct net_device *dev) 906static inline struct fe_priv *get_nvpriv(struct net_device *dev)
901{ 907{
902 return netdev_priv(dev); 908 return netdev_priv(dev);
@@ -1485,7 +1491,10 @@ static int phy_init(struct net_device *dev)
1485 1491
1486 /* restart auto negotiation, power down phy */ 1492 /* restart auto negotiation, power down phy */
1487 mii_control = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ); 1493 mii_control = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ);
1488 mii_control |= (BMCR_ANRESTART | BMCR_ANENABLE | BMCR_PDOWN); 1494 mii_control |= (BMCR_ANRESTART | BMCR_ANENABLE);
1495 if (phy_power_down) {
1496 mii_control |= BMCR_PDOWN;
1497 }
1489 if (mii_rw(dev, np->phyaddr, MII_BMCR, mii_control)) { 1498 if (mii_rw(dev, np->phyaddr, MII_BMCR, mii_control)) {
1490 return PHY_ERROR; 1499 return PHY_ERROR;
1491 } 1500 }
@@ -5513,7 +5522,7 @@ static int nv_close(struct net_device *dev)
5513 5522
5514 nv_drain_rxtx(dev); 5523 nv_drain_rxtx(dev);
5515 5524
5516 if (np->wolenabled) { 5525 if (np->wolenabled || !phy_power_down) {
5517 writel(NVREG_PFF_ALWAYS|NVREG_PFF_MYADDR, base + NvRegPacketFilterFlags); 5526 writel(NVREG_PFF_ALWAYS|NVREG_PFF_MYADDR, base + NvRegPacketFilterFlags);
5518 nv_start_rx(dev); 5527 nv_start_rx(dev);
5519 } else { 5528 } else {
@@ -6367,6 +6376,8 @@ module_param(dma_64bit, int, 0);
6367MODULE_PARM_DESC(dma_64bit, "High DMA is enabled by setting to 1 and disabled by setting to 0."); 6376MODULE_PARM_DESC(dma_64bit, "High DMA is enabled by setting to 1 and disabled by setting to 0.");
6368module_param(phy_cross, int, 0); 6377module_param(phy_cross, int, 0);
6369MODULE_PARM_DESC(phy_cross, "Phy crossover detection for Realtek 8201 phy is enabled by setting to 1 and disabled by setting to 0."); 6378MODULE_PARM_DESC(phy_cross, "Phy crossover detection for Realtek 8201 phy is enabled by setting to 1 and disabled by setting to 0.");
6379module_param(phy_power_down, int, 0);
6380MODULE_PARM_DESC(phy_power_down, "Power down phy and disable link when interface is down (1), or leave phy powered up (0).");
6370 6381
6371MODULE_AUTHOR("Manfred Spraul <manfred@colorfullife.com>"); 6382MODULE_AUTHOR("Manfred Spraul <manfred@colorfullife.com>");
6372MODULE_DESCRIPTION("Reverse Engineered nForce ethernet driver"); 6383MODULE_DESCRIPTION("Reverse Engineered nForce ethernet driver");
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index b2c49679bba7..a0519184e54e 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -1885,8 +1885,17 @@ int gfar_clean_rx_ring(struct net_device *dev, int rx_work_limit)
1885 1885
1886 if (unlikely(!newskb)) 1886 if (unlikely(!newskb))
1887 newskb = skb; 1887 newskb = skb;
1888 else if (skb) 1888 else if (skb) {
1889 /*
1890 * We need to reset ->data to what it
1891 * was before gfar_new_skb() re-aligned
1892 * it to an RXBUF_ALIGNMENT boundary
1893 * before we put the skb back on the
1894 * recycle list.
1895 */
1896 skb->data = skb->head + NET_SKB_PAD;
1889 __skb_queue_head(&priv->rx_recycle, skb); 1897 __skb_queue_head(&priv->rx_recycle, skb);
1898 }
1890 } else { 1899 } else {
1891 /* Increment the number of packets */ 1900 /* Increment the number of packets */
1892 dev->stats.rx_packets++; 1901 dev->stats.rx_packets++;
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index 0642d52aef5c..cf352961ae9b 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -259,7 +259,7 @@ extern const char gfar_driver_version[];
259(IEVENT_RXC | IEVENT_BSY | IEVENT_EBERR | IEVENT_MSRO | \ 259(IEVENT_RXC | IEVENT_BSY | IEVENT_EBERR | IEVENT_MSRO | \
260 IEVENT_BABT | IEVENT_TXC | IEVENT_TXE | IEVENT_LC \ 260 IEVENT_BABT | IEVENT_TXC | IEVENT_TXE | IEVENT_LC \
261 | IEVENT_CRL | IEVENT_XFUN | IEVENT_DPE | IEVENT_PERR \ 261 | IEVENT_CRL | IEVENT_XFUN | IEVENT_DPE | IEVENT_PERR \
262 | IEVENT_MAG) 262 | IEVENT_MAG | IEVENT_BABR)
263 263
264#define IMASK_INIT_CLEAR 0x00000000 264#define IMASK_INIT_CLEAR 0x00000000
265#define IMASK_BABR 0x80000000 265#define IMASK_BABR 0x80000000
diff --git a/drivers/net/mac8390.c b/drivers/net/mac8390.c
index 8e884869a05b..22e74a0e0361 100644
--- a/drivers/net/mac8390.c
+++ b/drivers/net/mac8390.c
@@ -304,7 +304,7 @@ struct net_device * __init mac8390_probe(int unit)
304 if (!MACH_IS_MAC) 304 if (!MACH_IS_MAC)
305 return ERR_PTR(-ENODEV); 305 return ERR_PTR(-ENODEV);
306 306
307 dev = alloc_ei_netdev(); 307 dev = ____alloc_ei_netdev(0);
308 if (!dev) 308 if (!dev)
309 return ERR_PTR(-ENOMEM); 309 return ERR_PTR(-ENOMEM);
310 310
@@ -481,15 +481,15 @@ void cleanup_module(void)
481static const struct net_device_ops mac8390_netdev_ops = { 481static const struct net_device_ops mac8390_netdev_ops = {
482 .ndo_open = mac8390_open, 482 .ndo_open = mac8390_open,
483 .ndo_stop = mac8390_close, 483 .ndo_stop = mac8390_close,
484 .ndo_start_xmit = ei_start_xmit, 484 .ndo_start_xmit = __ei_start_xmit,
485 .ndo_tx_timeout = ei_tx_timeout, 485 .ndo_tx_timeout = __ei_tx_timeout,
486 .ndo_get_stats = ei_get_stats, 486 .ndo_get_stats = __ei_get_stats,
487 .ndo_set_multicast_list = ei_set_multicast_list, 487 .ndo_set_multicast_list = __ei_set_multicast_list,
488 .ndo_validate_addr = eth_validate_addr, 488 .ndo_validate_addr = eth_validate_addr,
489 .ndo_set_mac_address = eth_mac_addr, 489 .ndo_set_mac_address = eth_mac_addr,
490 .ndo_change_mtu = eth_change_mtu, 490 .ndo_change_mtu = eth_change_mtu,
491#ifdef CONFIG_NET_POLL_CONTROLLER 491#ifdef CONFIG_NET_POLL_CONTROLLER
492 .ndo_poll_controller = ei_poll, 492 .ndo_poll_controller = __ei_poll,
493#endif 493#endif
494}; 494};
495 495
diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c
index ac6fc499b280..e5c98a98ad37 100644
--- a/drivers/net/mlx4/en_tx.c
+++ b/drivers/net/mlx4/en_tx.c
@@ -426,7 +426,7 @@ void mlx4_en_poll_tx_cq(unsigned long data)
426 426
427 INC_PERF_COUNTER(priv->pstats.tx_poll); 427 INC_PERF_COUNTER(priv->pstats.tx_poll);
428 428
429 if (!spin_trylock(&ring->comp_lock)) { 429 if (!spin_trylock_irq(&ring->comp_lock)) {
430 mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); 430 mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
431 return; 431 return;
432 } 432 }
@@ -439,7 +439,7 @@ void mlx4_en_poll_tx_cq(unsigned long data)
439 if (inflight && priv->port_up) 439 if (inflight && priv->port_up)
440 mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); 440 mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
441 441
442 spin_unlock(&ring->comp_lock); 442 spin_unlock_irq(&ring->comp_lock);
443} 443}
444 444
445static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, 445static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
@@ -482,9 +482,9 @@ static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind)
482 482
483 /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */ 483 /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */
484 if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0) 484 if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0)
485 if (spin_trylock(&ring->comp_lock)) { 485 if (spin_trylock_irq(&ring->comp_lock)) {
486 mlx4_en_process_tx_cq(priv->dev, cq); 486 mlx4_en_process_tx_cq(priv->dev, cq);
487 spin_unlock(&ring->comp_lock); 487 spin_unlock_irq(&ring->comp_lock);
488 } 488 }
489} 489}
490 490
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 0b6e8c896835..3b19e0ce290f 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -66,7 +66,6 @@ static const int multicast_filter_limit = 32;
66#define RX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ 66#define RX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */
67#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ 67#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */
68#define EarlyTxThld 0x3F /* 0x3F means NO early transmit */ 68#define EarlyTxThld 0x3F /* 0x3F means NO early transmit */
69#define RxPacketMaxSize 0x3FE8 /* 16K - 1 - ETH_HLEN - VLAN - CRC... */
70#define SafeMtu 0x1c20 /* ... actually life sucks beyond ~7k */ 69#define SafeMtu 0x1c20 /* ... actually life sucks beyond ~7k */
71#define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */ 70#define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */
72 71
@@ -2357,10 +2356,10 @@ static u16 rtl_rw_cpluscmd(void __iomem *ioaddr)
2357 return cmd; 2356 return cmd;
2358} 2357}
2359 2358
2360static void rtl_set_rx_max_size(void __iomem *ioaddr) 2359static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz)
2361{ 2360{
2362 /* Low hurts. Let's disable the filtering. */ 2361 /* Low hurts. Let's disable the filtering. */
2363 RTL_W16(RxMaxSize, 16383); 2362 RTL_W16(RxMaxSize, rx_buf_sz);
2364} 2363}
2365 2364
2366static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version) 2365static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
@@ -2407,7 +2406,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
2407 2406
2408 RTL_W8(EarlyTxThres, EarlyTxThld); 2407 RTL_W8(EarlyTxThres, EarlyTxThld);
2409 2408
2410 rtl_set_rx_max_size(ioaddr); 2409 rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
2411 2410
2412 if ((tp->mac_version == RTL_GIGA_MAC_VER_01) || 2411 if ((tp->mac_version == RTL_GIGA_MAC_VER_01) ||
2413 (tp->mac_version == RTL_GIGA_MAC_VER_02) || 2412 (tp->mac_version == RTL_GIGA_MAC_VER_02) ||
@@ -2668,7 +2667,7 @@ static void rtl_hw_start_8168(struct net_device *dev)
2668 2667
2669 RTL_W8(EarlyTxThres, EarlyTxThld); 2668 RTL_W8(EarlyTxThres, EarlyTxThld);
2670 2669
2671 rtl_set_rx_max_size(ioaddr); 2670 rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
2672 2671
2673 tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1; 2672 tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
2674 2673
@@ -2846,7 +2845,7 @@ static void rtl_hw_start_8101(struct net_device *dev)
2846 2845
2847 RTL_W8(EarlyTxThres, EarlyTxThld); 2846 RTL_W8(EarlyTxThres, EarlyTxThld);
2848 2847
2849 rtl_set_rx_max_size(ioaddr); 2848 rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
2850 2849
2851 tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW; 2850 tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
2852 2851
@@ -3554,54 +3553,64 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
3554 int handled = 0; 3553 int handled = 0;
3555 int status; 3554 int status;
3556 3555
3556 /* loop handling interrupts until we have no new ones or
3557 * we hit a invalid/hotplug case.
3558 */
3557 status = RTL_R16(IntrStatus); 3559 status = RTL_R16(IntrStatus);
3560 while (status && status != 0xffff) {
3561 handled = 1;
3558 3562
3559 /* hotplug/major error/no more work/shared irq */ 3563 /* Handle all of the error cases first. These will reset
3560 if ((status == 0xffff) || !status) 3564 * the chip, so just exit the loop.
3561 goto out; 3565 */
3562 3566 if (unlikely(!netif_running(dev))) {
3563 handled = 1; 3567 rtl8169_asic_down(ioaddr);
3568 break;
3569 }
3564 3570
3565 if (unlikely(!netif_running(dev))) { 3571 /* Work around for rx fifo overflow */
3566 rtl8169_asic_down(ioaddr); 3572 if (unlikely(status & RxFIFOOver) &&
3567 goto out; 3573 (tp->mac_version == RTL_GIGA_MAC_VER_11)) {
3568 } 3574 netif_stop_queue(dev);
3575 rtl8169_tx_timeout(dev);
3576 break;
3577 }
3569 3578
3570 status &= tp->intr_mask; 3579 if (unlikely(status & SYSErr)) {
3571 RTL_W16(IntrStatus, 3580 rtl8169_pcierr_interrupt(dev);
3572 (status & RxFIFOOver) ? (status | RxOverflow) : status); 3581 break;
3582 }
3573 3583
3574 if (!(status & tp->intr_event)) 3584 if (status & LinkChg)
3575 goto out; 3585 rtl8169_check_link_status(dev, tp, ioaddr);
3576 3586
3577 /* Work around for rx fifo overflow */ 3587 /* We need to see the lastest version of tp->intr_mask to
3578 if (unlikely(status & RxFIFOOver) && 3588 * avoid ignoring an MSI interrupt and having to wait for
3579 (tp->mac_version == RTL_GIGA_MAC_VER_11)) { 3589 * another event which may never come.
3580 netif_stop_queue(dev); 3590 */
3581 rtl8169_tx_timeout(dev); 3591 smp_rmb();
3582 goto out; 3592 if (status & tp->intr_mask & tp->napi_event) {
3583 } 3593 RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event);
3594 tp->intr_mask = ~tp->napi_event;
3595
3596 if (likely(napi_schedule_prep(&tp->napi)))
3597 __napi_schedule(&tp->napi);
3598 else if (netif_msg_intr(tp)) {
3599 printk(KERN_INFO "%s: interrupt %04x in poll\n",
3600 dev->name, status);
3601 }
3602 }
3584 3603
3585 if (unlikely(status & SYSErr)) { 3604 /* We only get a new MSI interrupt when all active irq
3586 rtl8169_pcierr_interrupt(dev); 3605 * sources on the chip have been acknowledged. So, ack
3587 goto out; 3606 * everything we've seen and check if new sources have become
3607 * active to avoid blocking all interrupts from the chip.
3608 */
3609 RTL_W16(IntrStatus,
3610 (status & RxFIFOOver) ? (status | RxOverflow) : status);
3611 status = RTL_R16(IntrStatus);
3588 } 3612 }
3589 3613
3590 if (status & LinkChg)
3591 rtl8169_check_link_status(dev, tp, ioaddr);
3592
3593 if (status & tp->napi_event) {
3594 RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event);
3595 tp->intr_mask = ~tp->napi_event;
3596
3597 if (likely(napi_schedule_prep(&tp->napi)))
3598 __napi_schedule(&tp->napi);
3599 else if (netif_msg_intr(tp)) {
3600 printk(KERN_INFO "%s: interrupt %04x in poll\n",
3601 dev->name, status);
3602 }
3603 }
3604out:
3605 return IRQ_RETVAL(handled); 3614 return IRQ_RETVAL(handled);
3606} 3615}
3607 3616
@@ -3617,13 +3626,15 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
3617 3626
3618 if (work_done < budget) { 3627 if (work_done < budget) {
3619 napi_complete(napi); 3628 napi_complete(napi);
3620 tp->intr_mask = 0xffff; 3629
3621 /* 3630 /* We need for force the visibility of tp->intr_mask
3622 * 20040426: the barrier is not strictly required but the 3631 * for other CPUs, as we can loose an MSI interrupt
3623 * behavior of the irq handler could be less predictable 3632 * and potentially wait for a retransmit timeout if we don't.
3624 * without it. Btw, the lack of flush for the posted pci 3633 * The posted write to IntrMask is safe, as it will
3625 * write is safe - FR 3634 * eventually make it to the chip and we won't loose anything
3635 * until it does.
3626 */ 3636 */
3637 tp->intr_mask = 0xffff;
3627 smp_wmb(); 3638 smp_wmb();
3628 RTL_W16(IntrMask, tp->intr_event); 3639 RTL_W16(IntrMask, tp->intr_event);
3629 } 3640 }
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index ca4151a9e222..17851321b7fd 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -505,27 +505,52 @@ int i2400mu_suspend(struct usb_interface *iface, pm_message_t pm_msg)
505#ifdef CONFIG_PM 505#ifdef CONFIG_PM
506 struct usb_device *usb_dev = i2400mu->usb_dev; 506 struct usb_device *usb_dev = i2400mu->usb_dev;
507#endif 507#endif
508 unsigned is_autosuspend = 0;
508 struct i2400m *i2400m = &i2400mu->i2400m; 509 struct i2400m *i2400m = &i2400mu->i2400m;
509 510
511#ifdef CONFIG_PM
512 if (usb_dev->auto_pm > 0)
513 is_autosuspend = 1;
514#endif
515
510 d_fnstart(3, dev, "(iface %p pm_msg %u)\n", iface, pm_msg.event); 516 d_fnstart(3, dev, "(iface %p pm_msg %u)\n", iface, pm_msg.event);
511 if (i2400m->updown == 0) 517 if (i2400m->updown == 0)
512 goto no_firmware; 518 goto no_firmware;
513 d_printf(1, dev, "fw up, requesting standby\n"); 519 if (i2400m->state == I2400M_SS_DATA_PATH_CONNECTED && is_autosuspend) {
520 /* ugh -- the device is connected and this suspend
521 * request is an autosuspend one (not a system standby
522 * / hibernate).
523 *
524 * The only way the device can go to standby is if the
525 * link with the base station is in IDLE mode; that
526 * were the case, we'd be in status
527 * I2400M_SS_CONNECTED_IDLE. But we are not.
528 *
529 * If we *tell* him to go power save now, it'll reset
530 * as a precautionary measure, so if this is an
531 * autosuspend thing, say no and it'll come back
532 * later, when the link is IDLE
533 */
534 result = -EBADF;
535 d_printf(1, dev, "fw up, link up, not-idle, autosuspend: "
536 "not entering powersave\n");
537 goto error_not_now;
538 }
539 d_printf(1, dev, "fw up: entering powersave\n");
514 atomic_dec(&i2400mu->do_autopm); 540 atomic_dec(&i2400mu->do_autopm);
515 result = i2400m_cmd_enter_powersave(i2400m); 541 result = i2400m_cmd_enter_powersave(i2400m);
516 atomic_inc(&i2400mu->do_autopm); 542 atomic_inc(&i2400mu->do_autopm);
517#ifdef CONFIG_PM 543 if (result < 0 && !is_autosuspend) {
518 if (result < 0 && usb_dev->auto_pm == 0) {
519 /* System suspend, can't fail */ 544 /* System suspend, can't fail */
520 dev_err(dev, "failed to suspend, will reset on resume\n"); 545 dev_err(dev, "failed to suspend, will reset on resume\n");
521 result = 0; 546 result = 0;
522 } 547 }
523#endif
524 if (result < 0) 548 if (result < 0)
525 goto error_enter_powersave; 549 goto error_enter_powersave;
526 i2400mu_notification_release(i2400mu); 550 i2400mu_notification_release(i2400mu);
527 d_printf(1, dev, "fw up, got standby\n"); 551 d_printf(1, dev, "powersave requested\n");
528error_enter_powersave: 552error_enter_powersave:
553error_not_now:
529no_firmware: 554no_firmware:
530 d_fnend(3, dev, "(iface %p pm_msg %u) = %d\n", 555 d_fnend(3, dev, "(iface %p pm_msg %u) = %d\n",
531 iface, pm_msg.event, result); 556 iface, pm_msg.event, result);
diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 8a0823588c51..3d94e7dfea69 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -430,6 +430,7 @@ config RTL8187
430 ASUS P5B Deluxe 430 ASUS P5B Deluxe
431 Toshiba Satellite Pro series of laptops 431 Toshiba Satellite Pro series of laptops
432 Asus Wireless Link 432 Asus Wireless Link
433 Linksys WUSB54GC-EU
433 434
434 Thanks to Realtek for their support! 435 Thanks to Realtek for their support!
435 436
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index d73475739127..9eabf4d1f2e7 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -6467,6 +6467,7 @@ static int airo_get_encode(struct net_device *dev,
6467{ 6467{
6468 struct airo_info *local = dev->ml_priv; 6468 struct airo_info *local = dev->ml_priv;
6469 int index = (dwrq->flags & IW_ENCODE_INDEX) - 1; 6469 int index = (dwrq->flags & IW_ENCODE_INDEX) - 1;
6470 int wep_key_len;
6470 u8 buf[16]; 6471 u8 buf[16];
6471 6472
6472 if (!local->wep_capable) 6473 if (!local->wep_capable)
@@ -6500,11 +6501,13 @@ static int airo_get_encode(struct net_device *dev,
6500 dwrq->flags |= index + 1; 6501 dwrq->flags |= index + 1;
6501 6502
6502 /* Copy the key to the user buffer */ 6503 /* Copy the key to the user buffer */
6503 dwrq->length = get_wep_key(local, index, &buf[0], sizeof(buf)); 6504 wep_key_len = get_wep_key(local, index, &buf[0], sizeof(buf));
6504 if (dwrq->length != -1) 6505 if (wep_key_len < 0) {
6505 memcpy(extra, buf, dwrq->length);
6506 else
6507 dwrq->length = 0; 6506 dwrq->length = 0;
6507 } else {
6508 dwrq->length = wep_key_len;
6509 memcpy(extra, buf, dwrq->length);
6510 }
6508 6511
6509 return 0; 6512 return 0;
6510} 6513}
@@ -6617,7 +6620,7 @@ static int airo_get_encodeext(struct net_device *dev,
6617 struct airo_info *local = dev->ml_priv; 6620 struct airo_info *local = dev->ml_priv;
6618 struct iw_point *encoding = &wrqu->encoding; 6621 struct iw_point *encoding = &wrqu->encoding;
6619 struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; 6622 struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
6620 int idx, max_key_len; 6623 int idx, max_key_len, wep_key_len;
6621 u8 buf[16]; 6624 u8 buf[16];
6622 6625
6623 if (!local->wep_capable) 6626 if (!local->wep_capable)
@@ -6661,11 +6664,13 @@ static int airo_get_encodeext(struct net_device *dev,
6661 memset(extra, 0, 16); 6664 memset(extra, 0, 16);
6662 6665
6663 /* Copy the key to the user buffer */ 6666 /* Copy the key to the user buffer */
6664 ext->key_len = get_wep_key(local, idx, &buf[0], sizeof(buf)); 6667 wep_key_len = get_wep_key(local, idx, &buf[0], sizeof(buf));
6665 if (ext->key_len != -1) 6668 if (wep_key_len < 0) {
6666 memcpy(extra, buf, ext->key_len);
6667 else
6668 ext->key_len = 0; 6669 ext->key_len = 0;
6670 } else {
6671 ext->key_len = wep_key_len;
6672 memcpy(extra, buf, ext->key_len);
6673 }
6669 6674
6670 return 0; 6675 return 0;
6671} 6676}
diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index 744f4f4dd3d1..8d93ca4651b9 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c
@@ -1873,18 +1873,18 @@ static void at76_dwork_hw_scan(struct work_struct *work)
1873 if (ret != CMD_STATUS_COMPLETE) { 1873 if (ret != CMD_STATUS_COMPLETE) {
1874 queue_delayed_work(priv->hw->workqueue, &priv->dwork_hw_scan, 1874 queue_delayed_work(priv->hw->workqueue, &priv->dwork_hw_scan,
1875 SCAN_POLL_INTERVAL); 1875 SCAN_POLL_INTERVAL);
1876 goto exit; 1876 mutex_unlock(&priv->mtx);
1877 return;
1877 } 1878 }
1878 1879
1879 ieee80211_scan_completed(priv->hw, false);
1880
1881 if (is_valid_ether_addr(priv->bssid)) 1880 if (is_valid_ether_addr(priv->bssid))
1882 at76_join(priv); 1881 at76_join(priv);
1883 1882
1884 ieee80211_wake_queues(priv->hw);
1885
1886exit:
1887 mutex_unlock(&priv->mtx); 1883 mutex_unlock(&priv->mtx);
1884
1885 ieee80211_scan_completed(priv->hw, false);
1886
1887 ieee80211_wake_queues(priv->hw);
1888} 1888}
1889 1889
1890static int at76_hw_scan(struct ieee80211_hw *hw, 1890static int at76_hw_scan(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/ath5k/phy.c b/drivers/net/wireless/ath5k/phy.c
index 9e2faae5ae94..b48b29dca3d2 100644
--- a/drivers/net/wireless/ath5k/phy.c
+++ b/drivers/net/wireless/ath5k/phy.c
@@ -1487,28 +1487,35 @@ ath5k_get_linear_pcdac_min(const u8 *stepL, const u8 *stepR,
1487{ 1487{
1488 s8 tmp; 1488 s8 tmp;
1489 s16 min_pwrL, min_pwrR; 1489 s16 min_pwrL, min_pwrR;
1490 s16 pwr_i = pwrL[0]; 1490 s16 pwr_i;
1491 1491
1492 do { 1492 if (pwrL[0] == pwrL[1])
1493 pwr_i--; 1493 min_pwrL = pwrL[0];
1494 tmp = (s8) ath5k_get_interpolated_value(pwr_i, 1494 else {
1495 pwrL[0], pwrL[1], 1495 pwr_i = pwrL[0];
1496 stepL[0], stepL[1]); 1496 do {
1497 1497 pwr_i--;
1498 } while (tmp > 1); 1498 tmp = (s8) ath5k_get_interpolated_value(pwr_i,
1499 1499 pwrL[0], pwrL[1],
1500 min_pwrL = pwr_i; 1500 stepL[0], stepL[1]);
1501 1501 } while (tmp > 1);
1502 pwr_i = pwrR[0]; 1502
1503 do { 1503 min_pwrL = pwr_i;
1504 pwr_i--; 1504 }
1505 tmp = (s8) ath5k_get_interpolated_value(pwr_i,
1506 pwrR[0], pwrR[1],
1507 stepR[0], stepR[1]);
1508
1509 } while (tmp > 1);
1510 1505
1511 min_pwrR = pwr_i; 1506 if (pwrR[0] == pwrR[1])
1507 min_pwrR = pwrR[0];
1508 else {
1509 pwr_i = pwrR[0];
1510 do {
1511 pwr_i--;
1512 tmp = (s8) ath5k_get_interpolated_value(pwr_i,
1513 pwrR[0], pwrR[1],
1514 stepR[0], stepR[1]);
1515 } while (tmp > 1);
1516
1517 min_pwrR = pwr_i;
1518 }
1512 1519
1513 /* Keep the right boundary so that it works for both curves */ 1520 /* Keep the right boundary so that it works for both curves */
1514 return max(min_pwrL, min_pwrR); 1521 return max(min_pwrL, min_pwrR);
diff --git a/drivers/net/wireless/ath5k/reset.c b/drivers/net/wireless/ath5k/reset.c
index 7a17d31b2fd9..5f72c111c2e8 100644
--- a/drivers/net/wireless/ath5k/reset.c
+++ b/drivers/net/wireless/ath5k/reset.c
@@ -26,7 +26,7 @@
26\*****************************/ 26\*****************************/
27 27
28#include <linux/pci.h> /* To determine if a card is pci-e */ 28#include <linux/pci.h> /* To determine if a card is pci-e */
29#include <linux/bitops.h> /* For get_bitmask_order */ 29#include <linux/log2.h>
30#include "ath5k.h" 30#include "ath5k.h"
31#include "reg.h" 31#include "reg.h"
32#include "base.h" 32#include "base.h"
@@ -69,10 +69,10 @@ static inline int ath5k_hw_write_ofdm_timings(struct ath5k_hw *ah,
69 69
70 /* Get exponent 70 /* Get exponent
71 * ALGO: coef_exp = 14 - highest set bit position */ 71 * ALGO: coef_exp = 14 - highest set bit position */
72 coef_exp = get_bitmask_order(coef_scaled); 72 coef_exp = ilog2(coef_scaled);
73 73
74 /* Doesn't make sense if it's zero*/ 74 /* Doesn't make sense if it's zero*/
75 if (!coef_exp) 75 if (!coef_scaled || !coef_exp)
76 return -EINVAL; 76 return -EINVAL;
77 77
78 /* Note: we've shifted coef_scaled by 24 */ 78 /* Note: we've shifted coef_scaled by 24 */
@@ -359,7 +359,7 @@ int ath5k_hw_nic_wakeup(struct ath5k_hw *ah, int flags, bool initial)
359 mode |= AR5K_PHY_MODE_FREQ_5GHZ; 359 mode |= AR5K_PHY_MODE_FREQ_5GHZ;
360 360
361 if (ah->ah_radio == AR5K_RF5413) 361 if (ah->ah_radio == AR5K_RF5413)
362 clock |= AR5K_PHY_PLL_40MHZ_5413; 362 clock = AR5K_PHY_PLL_40MHZ_5413;
363 else 363 else
364 clock |= AR5K_PHY_PLL_40MHZ; 364 clock |= AR5K_PHY_PLL_40MHZ;
365 365
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index e5ca2511a81a..9452461ce864 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -46,7 +46,7 @@
46#include "iwl-6000-hw.h" 46#include "iwl-6000-hw.h"
47 47
48/* Highest firmware API version supported */ 48/* Highest firmware API version supported */
49#define IWL5000_UCODE_API_MAX 1 49#define IWL5000_UCODE_API_MAX 2
50#define IWL5150_UCODE_API_MAX 2 50#define IWL5150_UCODE_API_MAX 2
51 51
52/* Lowest firmware API version supported */ 52/* Lowest firmware API version supported */
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 3bb28db4a40f..f46ba2475776 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -669,13 +669,6 @@ static int iwl_set_mode(struct iwl_priv *priv, int mode)
669 if (!iwl_is_ready_rf(priv)) 669 if (!iwl_is_ready_rf(priv))
670 return -EAGAIN; 670 return -EAGAIN;
671 671
672 cancel_delayed_work(&priv->scan_check);
673 if (iwl_scan_cancel_timeout(priv, 100)) {
674 IWL_WARN(priv, "Aborted scan still in progress after 100ms\n");
675 IWL_DEBUG_MAC80211(priv, "leaving - scan abort failed.\n");
676 return -EAGAIN;
677 }
678
679 iwl_commit_rxon(priv); 672 iwl_commit_rxon(priv);
680 673
681 return 0; 674 return 0;
diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c
index e7c65c4f741b..6330b91e37ce 100644
--- a/drivers/net/wireless/iwlwifi/iwl-scan.c
+++ b/drivers/net/wireless/iwlwifi/iwl-scan.c
@@ -227,9 +227,6 @@ static void iwl_rx_scan_complete_notif(struct iwl_priv *priv,
227 /* The HW is no longer scanning */ 227 /* The HW is no longer scanning */
228 clear_bit(STATUS_SCAN_HW, &priv->status); 228 clear_bit(STATUS_SCAN_HW, &priv->status);
229 229
230 /* The scan completion notification came in, so kill that timer... */
231 cancel_delayed_work(&priv->scan_check);
232
233 IWL_DEBUG_INFO(priv, "Scan pass on %sGHz took %dms\n", 230 IWL_DEBUG_INFO(priv, "Scan pass on %sGHz took %dms\n",
234 (priv->scan_bands & BIT(IEEE80211_BAND_2GHZ)) ? 231 (priv->scan_bands & BIT(IEEE80211_BAND_2GHZ)) ?
235 "2.4" : "5.2", 232 "2.4" : "5.2",
@@ -712,6 +709,8 @@ static void iwl_bg_request_scan(struct work_struct *data)
712 709
713 mutex_lock(&priv->mutex); 710 mutex_lock(&priv->mutex);
714 711
712 cancel_delayed_work(&priv->scan_check);
713
715 if (!iwl_is_ready(priv)) { 714 if (!iwl_is_ready(priv)) {
716 IWL_WARN(priv, "request scan called when driver not ready.\n"); 715 IWL_WARN(priv, "request scan called when driver not ready.\n");
717 goto done; 716 goto done;
@@ -925,6 +924,8 @@ void iwl_bg_scan_completed(struct work_struct *work)
925 924
926 IWL_DEBUG_SCAN(priv, "SCAN complete scan\n"); 925 IWL_DEBUG_SCAN(priv, "SCAN complete scan\n");
927 926
927 cancel_delayed_work(&priv->scan_check);
928
928 ieee80211_scan_completed(priv->hw, false); 929 ieee80211_scan_completed(priv->hw, false);
929 930
930 if (test_bit(STATUS_EXIT_PENDING, &priv->status)) 931 if (test_bit(STATUS_EXIT_PENDING, &priv->status))
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 4cce66133500..ff4d0e41d7c4 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -782,13 +782,6 @@ static int iwl3945_set_mode(struct iwl_priv *priv, int mode)
782 if (!iwl_is_ready_rf(priv)) 782 if (!iwl_is_ready_rf(priv))
783 return -EAGAIN; 783 return -EAGAIN;
784 784
785 cancel_delayed_work(&priv->scan_check);
786 if (iwl_scan_cancel_timeout(priv, 100)) {
787 IWL_WARN(priv, "Aborted scan still in progress after 100ms\n");
788 IWL_DEBUG_MAC80211(priv, "leaving - scan abort failed.\n");
789 return -EAGAIN;
790 }
791
792 iwl3945_commit_rxon(priv); 785 iwl3945_commit_rxon(priv);
793 786
794 return 0; 787 return 0;
@@ -3298,6 +3291,8 @@ static void iwl3945_bg_request_scan(struct work_struct *data)
3298 3291
3299 mutex_lock(&priv->mutex); 3292 mutex_lock(&priv->mutex);
3300 3293
3294 cancel_delayed_work(&priv->scan_check);
3295
3301 if (!iwl_is_ready(priv)) { 3296 if (!iwl_is_ready(priv)) {
3302 IWL_WARN(priv, "request scan called when driver not ready.\n"); 3297 IWL_WARN(priv, "request scan called when driver not ready.\n");
3303 goto done; 3298 goto done;
diff --git a/drivers/net/wireless/rt2x00/rt2x00debug.c b/drivers/net/wireless/rt2x00/rt2x00debug.c
index 07d378ef0b46..7b3ee8c2eaef 100644
--- a/drivers/net/wireless/rt2x00/rt2x00debug.c
+++ b/drivers/net/wireless/rt2x00/rt2x00debug.c
@@ -138,7 +138,7 @@ void rt2x00debug_update_crypto(struct rt2x00_dev *rt2x00dev,
138 138
139 if (cipher == CIPHER_TKIP_NO_MIC) 139 if (cipher == CIPHER_TKIP_NO_MIC)
140 cipher = CIPHER_TKIP; 140 cipher = CIPHER_TKIP;
141 if (cipher == CIPHER_NONE || cipher > CIPHER_MAX) 141 if (cipher == CIPHER_NONE || cipher >= CIPHER_MAX)
142 return; 142 return;
143 143
144 /* Remove CIPHER_NONE index */ 144 /* Remove CIPHER_NONE index */
diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c
index bac6cfba6abd..d51ba0a88c23 100644
--- a/drivers/net/wireless/rtl818x/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c
@@ -71,6 +71,8 @@ static struct usb_device_id rtl8187_table[] __devinitdata = {
71 {USB_DEVICE(0x18E8, 0x6232), .driver_info = DEVICE_RTL8187}, 71 {USB_DEVICE(0x18E8, 0x6232), .driver_info = DEVICE_RTL8187},
72 /* AirLive */ 72 /* AirLive */
73 {USB_DEVICE(0x1b75, 0x8187), .driver_info = DEVICE_RTL8187}, 73 {USB_DEVICE(0x1b75, 0x8187), .driver_info = DEVICE_RTL8187},
74 /* Linksys */
75 {USB_DEVICE(0x1737, 0x0073), .driver_info = DEVICE_RTL8187B},
74 {} 76 {}
75}; 77};
76 78
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index f0e99d4c066b..242257b19441 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -78,16 +78,20 @@ void free_cpu_buffers(void)
78 op_ring_buffer_write = NULL; 78 op_ring_buffer_write = NULL;
79} 79}
80 80
81#define RB_EVENT_HDR_SIZE 4
82
81int alloc_cpu_buffers(void) 83int alloc_cpu_buffers(void)
82{ 84{
83 int i; 85 int i;
84 86
85 unsigned long buffer_size = oprofile_cpu_buffer_size; 87 unsigned long buffer_size = oprofile_cpu_buffer_size;
88 unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
89 RB_EVENT_HDR_SIZE);
86 90
87 op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); 91 op_ring_buffer_read = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
88 if (!op_ring_buffer_read) 92 if (!op_ring_buffer_read)
89 goto fail; 93 goto fail;
90 op_ring_buffer_write = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); 94 op_ring_buffer_write = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
91 if (!op_ring_buffer_write) 95 if (!op_ring_buffer_write)
92 goto fail; 96 goto fail;
93 97
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 73348c4047e9..4a9cc92d4d18 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -702,7 +702,7 @@ static unsigned int iosapic_startup_irq(unsigned int irq)
702} 702}
703 703
704#ifdef CONFIG_SMP 704#ifdef CONFIG_SMP
705static void iosapic_set_affinity_irq(unsigned int irq, 705static int iosapic_set_affinity_irq(unsigned int irq,
706 const struct cpumask *dest) 706 const struct cpumask *dest)
707{ 707{
708 struct vector_info *vi = iosapic_get_vector(irq); 708 struct vector_info *vi = iosapic_get_vector(irq);
@@ -712,7 +712,7 @@ static void iosapic_set_affinity_irq(unsigned int irq,
712 712
713 dest_cpu = cpu_check_affinity(irq, dest); 713 dest_cpu = cpu_check_affinity(irq, dest);
714 if (dest_cpu < 0) 714 if (dest_cpu < 0)
715 return; 715 return -1;
716 716
717 cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu)); 717 cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu));
718 vi->txn_addr = txn_affinity_addr(irq, dest_cpu); 718 vi->txn_addr = txn_affinity_addr(irq, dest_cpu);
@@ -724,6 +724,8 @@ static void iosapic_set_affinity_irq(unsigned int irq,
724 iosapic_set_irt_data(vi, &dummy_d0, &d1); 724 iosapic_set_irt_data(vi, &dummy_d0, &d1);
725 iosapic_wr_irt_entry(vi, d0, d1); 725 iosapic_wr_irt_entry(vi, d0, d1);
726 spin_unlock_irqrestore(&iosapic_lock, flags); 726 spin_unlock_irqrestore(&iosapic_lock, flags);
727
728 return 0;
727} 729}
728#endif 730#endif
729 731
diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c
index e6a7e847ee80..ea31a452b153 100644
--- a/drivers/parport/parport_gsc.c
+++ b/drivers/parport/parport_gsc.c
@@ -352,8 +352,8 @@ static int __devinit parport_init_chip(struct parisc_device *dev)
352 unsigned long port; 352 unsigned long port;
353 353
354 if (!dev->irq) { 354 if (!dev->irq) {
355 printk(KERN_WARNING "IRQ not found for parallel device at 0x%lx\n", 355 printk(KERN_WARNING "IRQ not found for parallel device at 0x%llx\n",
356 dev->hpa.start); 356 (unsigned long long)dev->hpa.start);
357 return -ENODEV; 357 return -ENODEV;
358 } 358 }
359 359
diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index 0ebca450ed29..dffa5d4fb298 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -614,7 +614,10 @@ parport_register_device(struct parport *port, const char *name,
614 * pardevice fields. -arca 614 * pardevice fields. -arca
615 */ 615 */
616 port->ops->init_state(tmp, tmp->state); 616 port->ops->init_state(tmp, tmp->state);
617 parport_device_proc_register(tmp); 617 if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) {
618 port->proc_device = tmp;
619 parport_device_proc_register(tmp);
620 }
618 return tmp; 621 return tmp;
619 622
620 out_free_all: 623 out_free_all:
@@ -646,10 +649,14 @@ void parport_unregister_device(struct pardevice *dev)
646 } 649 }
647#endif 650#endif
648 651
649 parport_device_proc_unregister(dev);
650
651 port = dev->port->physport; 652 port = dev->port->physport;
652 653
654 if (port->proc_device == dev) {
655 port->proc_device = NULL;
656 clear_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags);
657 parport_device_proc_unregister(dev);
658 }
659
653 if (port->cad == dev) { 660 if (port->cad == dev) {
654 printk(KERN_DEBUG "%s: %s forgot to release port\n", 661 printk(KERN_DEBUG "%s: %s forgot to release port\n",
655 port->name, dev->name); 662 port->name, dev->name);
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index 4fc168b70095..e68d5f20ffb3 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -129,7 +129,6 @@ struct acpiphp_func {
129 struct acpiphp_bridge *bridge; /* Ejectable PCI-to-PCI bridge */ 129 struct acpiphp_bridge *bridge; /* Ejectable PCI-to-PCI bridge */
130 130
131 struct list_head sibling; 131 struct list_head sibling;
132 struct pci_dev *pci_dev;
133 struct notifier_block nb; 132 struct notifier_block nb;
134 acpi_handle handle; 133 acpi_handle handle;
135 134
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index a33794d9e0dc..3a6064bce561 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -32,9 +32,6 @@
32 32
33/* 33/*
34 * Lifetime rules for pci_dev: 34 * Lifetime rules for pci_dev:
35 * - The one in acpiphp_func has its refcount elevated by pci_get_slot()
36 * when the driver is loaded or when an insertion event occurs. It loses
37 * a refcount when its ejected or the driver unloads.
38 * - The one in acpiphp_bridge has its refcount elevated by pci_get_slot() 35 * - The one in acpiphp_bridge has its refcount elevated by pci_get_slot()
39 * when the bridge is scanned and it loses a refcount when the bridge 36 * when the bridge is scanned and it loses a refcount when the bridge
40 * is removed. 37 * is removed.
@@ -130,6 +127,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
130 unsigned long long adr, sun; 127 unsigned long long adr, sun;
131 int device, function, retval; 128 int device, function, retval;
132 struct pci_bus *pbus = bridge->pci_bus; 129 struct pci_bus *pbus = bridge->pci_bus;
130 struct pci_dev *pdev;
133 131
134 if (!acpi_pci_check_ejectable(pbus, handle) && !is_dock_device(handle)) 132 if (!acpi_pci_check_ejectable(pbus, handle) && !is_dock_device(handle))
135 return AE_OK; 133 return AE_OK;
@@ -213,10 +211,10 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
213 newfunc->slot = slot; 211 newfunc->slot = slot;
214 list_add_tail(&newfunc->sibling, &slot->funcs); 212 list_add_tail(&newfunc->sibling, &slot->funcs);
215 213
216 /* associate corresponding pci_dev */ 214 pdev = pci_get_slot(pbus, PCI_DEVFN(device, function));
217 newfunc->pci_dev = pci_get_slot(pbus, PCI_DEVFN(device, function)); 215 if (pdev) {
218 if (newfunc->pci_dev) {
219 slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON); 216 slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON);
217 pci_dev_put(pdev);
220 } 218 }
221 219
222 if (is_dock_device(handle)) { 220 if (is_dock_device(handle)) {
@@ -617,7 +615,6 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge)
617 if (ACPI_FAILURE(status)) 615 if (ACPI_FAILURE(status))
618 err("failed to remove notify handler\n"); 616 err("failed to remove notify handler\n");
619 } 617 }
620 pci_dev_put(func->pci_dev);
621 list_del(list); 618 list_del(list);
622 kfree(func); 619 kfree(func);
623 } 620 }
@@ -1101,22 +1098,24 @@ static int __ref enable_device(struct acpiphp_slot *slot)
1101 pci_enable_bridges(bus); 1098 pci_enable_bridges(bus);
1102 pci_bus_add_devices(bus); 1099 pci_bus_add_devices(bus);
1103 1100
1104 /* associate pci_dev to our representation */
1105 list_for_each (l, &slot->funcs) { 1101 list_for_each (l, &slot->funcs) {
1106 func = list_entry(l, struct acpiphp_func, sibling); 1102 func = list_entry(l, struct acpiphp_func, sibling);
1107 func->pci_dev = pci_get_slot(bus, PCI_DEVFN(slot->device, 1103 dev = pci_get_slot(bus, PCI_DEVFN(slot->device,
1108 func->function)); 1104 func->function));
1109 if (!func->pci_dev) 1105 if (!dev)
1110 continue; 1106 continue;
1111 1107
1112 if (func->pci_dev->hdr_type != PCI_HEADER_TYPE_BRIDGE && 1108 if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE &&
1113 func->pci_dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) 1109 dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) {
1110 pci_dev_put(dev);
1114 continue; 1111 continue;
1112 }
1115 1113
1116 status = find_p2p_bridge(func->handle, (u32)1, bus, NULL); 1114 status = find_p2p_bridge(func->handle, (u32)1, bus, NULL);
1117 if (ACPI_FAILURE(status)) 1115 if (ACPI_FAILURE(status))
1118 warn("find_p2p_bridge failed (error code = 0x%x)\n", 1116 warn("find_p2p_bridge failed (error code = 0x%x)\n",
1119 status); 1117 status);
1118 pci_dev_put(dev);
1120 } 1119 }
1121 1120
1122 slot->flags |= SLOT_ENABLED; 1121 slot->flags |= SLOT_ENABLED;
@@ -1142,17 +1141,14 @@ static void disable_bridges(struct pci_bus *bus)
1142 */ 1141 */
1143static int disable_device(struct acpiphp_slot *slot) 1142static int disable_device(struct acpiphp_slot *slot)
1144{ 1143{
1145 int retval = 0;
1146 struct acpiphp_func *func; 1144 struct acpiphp_func *func;
1147 struct list_head *l; 1145 struct pci_dev *pdev;
1148 1146
1149 /* is this slot already disabled? */ 1147 /* is this slot already disabled? */
1150 if (!(slot->flags & SLOT_ENABLED)) 1148 if (!(slot->flags & SLOT_ENABLED))
1151 goto err_exit; 1149 goto err_exit;
1152 1150
1153 list_for_each (l, &slot->funcs) { 1151 list_for_each_entry(func, &slot->funcs, sibling) {
1154 func = list_entry(l, struct acpiphp_func, sibling);
1155
1156 if (func->bridge) { 1152 if (func->bridge) {
1157 /* cleanup p2p bridges under this P2P bridge */ 1153 /* cleanup p2p bridges under this P2P bridge */
1158 cleanup_p2p_bridge(func->bridge->handle, 1154 cleanup_p2p_bridge(func->bridge->handle,
@@ -1160,35 +1156,28 @@ static int disable_device(struct acpiphp_slot *slot)
1160 func->bridge = NULL; 1156 func->bridge = NULL;
1161 } 1157 }
1162 1158
1163 if (func->pci_dev) { 1159 pdev = pci_get_slot(slot->bridge->pci_bus,
1164 pci_stop_bus_device(func->pci_dev); 1160 PCI_DEVFN(slot->device, func->function));
1165 if (func->pci_dev->subordinate) { 1161 if (pdev) {
1166 disable_bridges(func->pci_dev->subordinate); 1162 pci_stop_bus_device(pdev);
1167 pci_disable_device(func->pci_dev); 1163 if (pdev->subordinate) {
1164 disable_bridges(pdev->subordinate);
1165 pci_disable_device(pdev);
1168 } 1166 }
1167 pci_remove_bus_device(pdev);
1168 pci_dev_put(pdev);
1169 } 1169 }
1170 } 1170 }
1171 1171
1172 list_for_each (l, &slot->funcs) { 1172 list_for_each_entry(func, &slot->funcs, sibling) {
1173 func = list_entry(l, struct acpiphp_func, sibling);
1174
1175 acpiphp_unconfigure_ioapics(func->handle); 1173 acpiphp_unconfigure_ioapics(func->handle);
1176 acpiphp_bus_trim(func->handle); 1174 acpiphp_bus_trim(func->handle);
1177 /* try to remove anyway.
1178 * acpiphp_bus_add might have been failed */
1179
1180 if (!func->pci_dev)
1181 continue;
1182
1183 pci_remove_bus_device(func->pci_dev);
1184 pci_dev_put(func->pci_dev);
1185 func->pci_dev = NULL;
1186 } 1175 }
1187 1176
1188 slot->flags &= (~SLOT_ENABLED); 1177 slot->flags &= (~SLOT_ENABLED);
1189 1178
1190 err_exit: 1179err_exit:
1191 return retval; 1180 return 0;
1192} 1181}
1193 1182
1194 1183
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index dd18f857dfb0..42e4260c3b12 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -153,45 +153,47 @@ int ibmphp_init_devno(struct slot **cur_slot)
153 return -1; 153 return -1;
154 } 154 }
155 for (loop = 0; loop < len; loop++) { 155 for (loop = 0; loop < len; loop++) {
156 if ((*cur_slot)->number == rtable->slots[loop].slot) { 156 if ((*cur_slot)->number == rtable->slots[loop].slot &&
157 if ((*cur_slot)->bus == rtable->slots[loop].bus) { 157 (*cur_slot)->bus == rtable->slots[loop].bus) {
158 struct io_apic_irq_attr irq_attr;
159
158 (*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn); 160 (*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn);
159 for (i = 0; i < 4; i++) 161 for (i = 0; i < 4; i++)
160 (*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus, 162 (*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus,
161 (int) (*cur_slot)->device, i); 163 (int) (*cur_slot)->device, i,
162 164 &irq_attr);
163 debug("(*cur_slot)->irq[0] = %x\n", 165
164 (*cur_slot)->irq[0]); 166 debug("(*cur_slot)->irq[0] = %x\n",
165 debug("(*cur_slot)->irq[1] = %x\n", 167 (*cur_slot)->irq[0]);
166 (*cur_slot)->irq[1]); 168 debug("(*cur_slot)->irq[1] = %x\n",
167 debug("(*cur_slot)->irq[2] = %x\n", 169 (*cur_slot)->irq[1]);
168 (*cur_slot)->irq[2]); 170 debug("(*cur_slot)->irq[2] = %x\n",
169 debug("(*cur_slot)->irq[3] = %x\n", 171 (*cur_slot)->irq[2]);
170 (*cur_slot)->irq[3]); 172 debug("(*cur_slot)->irq[3] = %x\n",
171 173 (*cur_slot)->irq[3]);
172 debug("rtable->exlusive_irqs = %x\n", 174
175 debug("rtable->exlusive_irqs = %x\n",
173 rtable->exclusive_irqs); 176 rtable->exclusive_irqs);
174 debug("rtable->slots[loop].irq[0].bitmap = %x\n", 177 debug("rtable->slots[loop].irq[0].bitmap = %x\n",
175 rtable->slots[loop].irq[0].bitmap); 178 rtable->slots[loop].irq[0].bitmap);
176 debug("rtable->slots[loop].irq[1].bitmap = %x\n", 179 debug("rtable->slots[loop].irq[1].bitmap = %x\n",
177 rtable->slots[loop].irq[1].bitmap); 180 rtable->slots[loop].irq[1].bitmap);
178 debug("rtable->slots[loop].irq[2].bitmap = %x\n", 181 debug("rtable->slots[loop].irq[2].bitmap = %x\n",
179 rtable->slots[loop].irq[2].bitmap); 182 rtable->slots[loop].irq[2].bitmap);
180 debug("rtable->slots[loop].irq[3].bitmap = %x\n", 183 debug("rtable->slots[loop].irq[3].bitmap = %x\n",
181 rtable->slots[loop].irq[3].bitmap); 184 rtable->slots[loop].irq[3].bitmap);
182 185
183 debug("rtable->slots[loop].irq[0].link = %x\n", 186 debug("rtable->slots[loop].irq[0].link = %x\n",
184 rtable->slots[loop].irq[0].link); 187 rtable->slots[loop].irq[0].link);
185 debug("rtable->slots[loop].irq[1].link = %x\n", 188 debug("rtable->slots[loop].irq[1].link = %x\n",
186 rtable->slots[loop].irq[1].link); 189 rtable->slots[loop].irq[1].link);
187 debug("rtable->slots[loop].irq[2].link = %x\n", 190 debug("rtable->slots[loop].irq[2].link = %x\n",
188 rtable->slots[loop].irq[2].link); 191 rtable->slots[loop].irq[2].link);
189 debug("rtable->slots[loop].irq[3].link = %x\n", 192 debug("rtable->slots[loop].irq[3].link = %x\n",
190 rtable->slots[loop].irq[3].link); 193 rtable->slots[loop].irq[3].link);
191 debug("end of init_devno\n"); 194 debug("end of init_devno\n");
192 kfree(rtable); 195 kfree(rtable);
193 return 0; 196 return 0;
194 }
195 } 197 }
196 } 198 }
197 199
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 6808d8333ecc..737a1c44b07a 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -98,6 +98,7 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
98 int max_irq; 98 int max_irq;
99 int pos; 99 int pos;
100 int irq; 100 int irq;
101 int node;
101 102
102 pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ); 103 pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
103 if (!pos) 104 if (!pos)
@@ -125,7 +126,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
125 cfg->msg.address_lo = 0xffffffff; 126 cfg->msg.address_lo = 0xffffffff;
126 cfg->msg.address_hi = 0xffffffff; 127 cfg->msg.address_hi = 0xffffffff;
127 128
128 irq = create_irq(); 129 node = dev_to_node(&dev->dev);
130 irq = create_irq_nr(0, node);
129 131
130 if (irq <= 0) { 132 if (irq <= 0) {
131 kfree(cfg); 133 kfree(cfg);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index a563fbe559d0..cd389162735f 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1972,15 +1972,6 @@ static int __init init_dmars(void)
1972 } 1972 }
1973 } 1973 }
1974 1974
1975#ifdef CONFIG_INTR_REMAP
1976 if (!intr_remapping_enabled) {
1977 ret = enable_intr_remapping(0);
1978 if (ret)
1979 printk(KERN_ERR
1980 "IOMMU: enable interrupt remapping failed\n");
1981 }
1982#endif
1983
1984 /* 1975 /*
1985 * For each rmrr 1976 * For each rmrr
1986 * for each dev attached to rmrr 1977 * for each dev attached to rmrr
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index f5e0ea724a6f..3a0cb0bb0593 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -15,6 +15,14 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
15static int ir_ioapic_num; 15static int ir_ioapic_num;
16int intr_remapping_enabled; 16int intr_remapping_enabled;
17 17
18static int disable_intremap;
19static __init int setup_nointremap(char *str)
20{
21 disable_intremap = 1;
22 return 0;
23}
24early_param("nointremap", setup_nointremap);
25
18struct irq_2_iommu { 26struct irq_2_iommu {
19 struct intel_iommu *iommu; 27 struct intel_iommu *iommu;
20 u16 irte_index; 28 u16 irte_index;
@@ -23,15 +31,12 @@ struct irq_2_iommu {
23}; 31};
24 32
25#ifdef CONFIG_GENERIC_HARDIRQS 33#ifdef CONFIG_GENERIC_HARDIRQS
26static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu) 34static struct irq_2_iommu *get_one_free_irq_2_iommu(int node)
27{ 35{
28 struct irq_2_iommu *iommu; 36 struct irq_2_iommu *iommu;
29 int node;
30
31 node = cpu_to_node(cpu);
32 37
33 iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node); 38 iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
34 printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node); 39 printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node);
35 40
36 return iommu; 41 return iommu;
37} 42}
@@ -48,7 +53,7 @@ static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
48 return desc->irq_2_iommu; 53 return desc->irq_2_iommu;
49} 54}
50 55
51static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) 56static struct irq_2_iommu *irq_2_iommu_alloc_node(unsigned int irq, int node)
52{ 57{
53 struct irq_desc *desc; 58 struct irq_desc *desc;
54 struct irq_2_iommu *irq_iommu; 59 struct irq_2_iommu *irq_iommu;
@@ -56,7 +61,7 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
56 /* 61 /*
57 * alloc irq desc if not allocated already. 62 * alloc irq desc if not allocated already.
58 */ 63 */
59 desc = irq_to_desc_alloc_cpu(irq, cpu); 64 desc = irq_to_desc_alloc_node(irq, node);
60 if (!desc) { 65 if (!desc) {
61 printk(KERN_INFO "can not get irq_desc for %d\n", irq); 66 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
62 return NULL; 67 return NULL;
@@ -65,14 +70,14 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
65 irq_iommu = desc->irq_2_iommu; 70 irq_iommu = desc->irq_2_iommu;
66 71
67 if (!irq_iommu) 72 if (!irq_iommu)
68 desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu); 73 desc->irq_2_iommu = get_one_free_irq_2_iommu(node);
69 74
70 return desc->irq_2_iommu; 75 return desc->irq_2_iommu;
71} 76}
72 77
73static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) 78static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
74{ 79{
75 return irq_2_iommu_alloc_cpu(irq, boot_cpu_id); 80 return irq_2_iommu_alloc_node(irq, cpu_to_node(boot_cpu_id));
76} 81}
77 82
78#else /* !CONFIG_SPARSE_IRQ */ 83#else /* !CONFIG_SPARSE_IRQ */
@@ -423,20 +428,6 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
423 readl, (sts & DMA_GSTS_IRTPS), sts); 428 readl, (sts & DMA_GSTS_IRTPS), sts);
424 spin_unlock_irqrestore(&iommu->register_lock, flags); 429 spin_unlock_irqrestore(&iommu->register_lock, flags);
425 430
426 if (mode == 0) {
427 spin_lock_irqsave(&iommu->register_lock, flags);
428
429 /* enable comaptiblity format interrupt pass through */
430 cmd = iommu->gcmd | DMA_GCMD_CFI;
431 iommu->gcmd |= DMA_GCMD_CFI;
432 writel(cmd, iommu->reg + DMAR_GCMD_REG);
433
434 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
435 readl, (sts & DMA_GSTS_CFIS), sts);
436
437 spin_unlock_irqrestore(&iommu->register_lock, flags);
438 }
439
440 /* 431 /*
441 * global invalidation of interrupt entry cache before enabling 432 * global invalidation of interrupt entry cache before enabling
442 * interrupt-remapping. 433 * interrupt-remapping.
@@ -516,6 +507,23 @@ end:
516 spin_unlock_irqrestore(&iommu->register_lock, flags); 507 spin_unlock_irqrestore(&iommu->register_lock, flags);
517} 508}
518 509
510int __init intr_remapping_supported(void)
511{
512 struct dmar_drhd_unit *drhd;
513
514 if (disable_intremap)
515 return 0;
516
517 for_each_drhd_unit(drhd) {
518 struct intel_iommu *iommu = drhd->iommu;
519
520 if (!ecap_ir_support(iommu->ecap))
521 return 0;
522 }
523
524 return 1;
525}
526
519int __init enable_intr_remapping(int eim) 527int __init enable_intr_remapping(int eim)
520{ 528{
521 struct dmar_drhd_unit *drhd; 529 struct dmar_drhd_unit *drhd;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 34bf0fdf5047..1a91bf9687af 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -557,7 +557,8 @@ static int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state)
557 } else { 557 } else {
558 error = -ENODEV; 558 error = -ENODEV;
559 /* Fall back to PCI_D0 if native PM is not supported */ 559 /* Fall back to PCI_D0 if native PM is not supported */
560 pci_update_current_state(dev, PCI_D0); 560 if (!dev->pm_cap)
561 dev->current_state = PCI_D0;
561 } 562 }
562 563
563 return error; 564 return error;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e3c3e081b834..f1ae2475ffff 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -745,6 +745,8 @@ int pci_setup_device(struct pci_dev *dev)
745 745
746 /* Early fixups, before probing the BARs */ 746 /* Early fixups, before probing the BARs */
747 pci_fixup_device(pci_fixup_early, dev); 747 pci_fixup_device(pci_fixup_early, dev);
748 /* device class may be changed after fixup */
749 class = dev->class >> 8;
748 750
749 switch (dev->hdr_type) { /* header type */ 751 switch (dev->hdr_type) { /* header type */
750 case PCI_HEADER_TYPE_NORMAL: /* standard header */ 752 case PCI_HEADER_TYPE_NORMAL: /* standard header */
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index adf17856bacc..7f207f335bec 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -123,7 +123,7 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev,
123 } 123 }
124 124
125 flags = irq_flags(triggering, polarity, shareable); 125 flags = irq_flags(triggering, polarity, shareable);
126 irq = acpi_register_gsi(gsi, triggering, polarity); 126 irq = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
127 if (irq >= 0) 127 if (irq >= 0)
128 pcibios_penalize_isa_irq(irq, 1); 128 pcibios_penalize_isa_irq(irq, 1);
129 else 129 else
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 8b7983aba8f7..36c21b19e5d7 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1978,7 +1978,8 @@ static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id)
1978{ 1978{
1979 struct scsi_cmnd *cmd = tw_dev->srb[request_id]; 1979 struct scsi_cmnd *cmd = tw_dev->srb[request_id];
1980 1980
1981 scsi_dma_unmap(cmd); 1981 if (cmd->SCp.phase == TW_PHASE_SGLIST)
1982 scsi_dma_unmap(cmd);
1982} /* End twa_unmap_scsi_data() */ 1983} /* End twa_unmap_scsi_data() */
1983 1984
1984/* scsi_host_template initializer */ 1985/* scsi_host_template initializer */
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index c03f1d2c9e2e..faa0fcfed71e 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -6,7 +6,7 @@
6 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 Brad Strand <linux@3ware.com> 7 Brad Strand <linux@3ware.com>
8 8
9 Copyright (C) 1999-2007 3ware Inc. 9 Copyright (C) 1999-2009 3ware Inc.
10 10
11 Kernel compatiblity By: Andre Hedrick <andre@suse.com> 11 Kernel compatiblity By: Andre Hedrick <andre@suse.com>
12 Non-Copyright (C) 2000 Andre Hedrick <andre@suse.com> 12 Non-Copyright (C) 2000 Andre Hedrick <andre@suse.com>
@@ -1294,7 +1294,8 @@ static void tw_unmap_scsi_data(struct pci_dev *pdev, struct scsi_cmnd *cmd)
1294{ 1294{
1295 dprintk(KERN_WARNING "3w-xxxx: tw_unmap_scsi_data()\n"); 1295 dprintk(KERN_WARNING "3w-xxxx: tw_unmap_scsi_data()\n");
1296 1296
1297 scsi_dma_unmap(cmd); 1297 if (cmd->SCp.phase == TW_PHASE_SGLIST)
1298 scsi_dma_unmap(cmd);
1298} /* End tw_unmap_scsi_data() */ 1299} /* End tw_unmap_scsi_data() */
1299 1300
1300/* This function will reset a device extension */ 1301/* This function will reset a device extension */
diff --git a/drivers/scsi/3w-xxxx.h b/drivers/scsi/3w-xxxx.h
index 8e71e5e122b3..a5a2ba2561d9 100644
--- a/drivers/scsi/3w-xxxx.h
+++ b/drivers/scsi/3w-xxxx.h
@@ -6,7 +6,7 @@
6 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 Brad Strand <linux@3ware.com> 7 Brad Strand <linux@3ware.com>
8 8
9 Copyright (C) 1999-2007 3ware Inc. 9 Copyright (C) 1999-2009 3ware Inc.
10 10
11 Kernel compatiblity By: Andre Hedrick <andre@suse.com> 11 Kernel compatiblity By: Andre Hedrick <andre@suse.com>
12 Non-Copyright (C) 2000 Andre Hedrick <andre@suse.com> 12 Non-Copyright (C) 2000 Andre Hedrick <andre@suse.com>
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 8ed2990c826e..fb2740789b68 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -628,6 +628,17 @@ config FCOE
628 ---help--- 628 ---help---
629 Fibre Channel over Ethernet module 629 Fibre Channel over Ethernet module
630 630
631config FCOE_FNIC
632 tristate "Cisco FNIC Driver"
633 depends on PCI && X86
634 select LIBFC
635 help
636 This is support for the Cisco PCI-Express FCoE HBA.
637
638 To compile this driver as a module, choose M here and read
639 <file:Documentation/scsi/scsi.txt>.
640 The module will be called fnic.
641
631config SCSI_DMX3191D 642config SCSI_DMX3191D
632 tristate "DMX3191D SCSI support" 643 tristate "DMX3191D SCSI support"
633 depends on PCI && SCSI 644 depends on PCI && SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index e7c861ac417d..a5049cfb40ed 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_SCSI_DH) += device_handler/
39obj-$(CONFIG_LIBFC) += libfc/ 39obj-$(CONFIG_LIBFC) += libfc/
40obj-$(CONFIG_LIBFCOE) += fcoe/ 40obj-$(CONFIG_LIBFCOE) += fcoe/
41obj-$(CONFIG_FCOE) += fcoe/ 41obj-$(CONFIG_FCOE) += fcoe/
42obj-$(CONFIG_FCOE_FNIC) += fnic/
42obj-$(CONFIG_ISCSI_TCP) += libiscsi.o libiscsi_tcp.o iscsi_tcp.o 43obj-$(CONFIG_ISCSI_TCP) += libiscsi.o libiscsi_tcp.o iscsi_tcp.o
43obj-$(CONFIG_INFINIBAND_ISER) += libiscsi.o 44obj-$(CONFIG_INFINIBAND_ISER) += libiscsi.o
44obj-$(CONFIG_SCSI_A4000T) += 53c700.o a4000t.o 45obj-$(CONFIG_SCSI_A4000T) += 53c700.o a4000t.o
diff --git a/drivers/scsi/fnic/Makefile b/drivers/scsi/fnic/Makefile
new file mode 100644
index 000000000000..37c3440bc17c
--- /dev/null
+++ b/drivers/scsi/fnic/Makefile
@@ -0,0 +1,15 @@
1obj-$(CONFIG_FCOE_FNIC) += fnic.o
2
3fnic-y := \
4 fnic_attrs.o \
5 fnic_isr.o \
6 fnic_main.o \
7 fnic_res.o \
8 fnic_fcs.o \
9 fnic_scsi.o \
10 vnic_cq.o \
11 vnic_dev.o \
12 vnic_intr.o \
13 vnic_rq.o \
14 vnic_wq_copy.o \
15 vnic_wq.o
diff --git a/drivers/scsi/fnic/cq_desc.h b/drivers/scsi/fnic/cq_desc.h
new file mode 100644
index 000000000000..d1225cf6320e
--- /dev/null
+++ b/drivers/scsi/fnic/cq_desc.h
@@ -0,0 +1,78 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _CQ_DESC_H_
19#define _CQ_DESC_H_
20
21/*
22 * Completion queue descriptor types
23 */
24enum cq_desc_types {
25 CQ_DESC_TYPE_WQ_ENET = 0,
26 CQ_DESC_TYPE_DESC_COPY = 1,
27 CQ_DESC_TYPE_WQ_EXCH = 2,
28 CQ_DESC_TYPE_RQ_ENET = 3,
29 CQ_DESC_TYPE_RQ_FCP = 4,
30};
31
32/* Completion queue descriptor: 16B
33 *
34 * All completion queues have this basic layout. The
35 * type_specfic area is unique for each completion
36 * queue type.
37 */
38struct cq_desc {
39 __le16 completed_index;
40 __le16 q_number;
41 u8 type_specfic[11];
42 u8 type_color;
43};
44
45#define CQ_DESC_TYPE_BITS 4
46#define CQ_DESC_TYPE_MASK ((1 << CQ_DESC_TYPE_BITS) - 1)
47#define CQ_DESC_COLOR_MASK 1
48#define CQ_DESC_COLOR_SHIFT 7
49#define CQ_DESC_Q_NUM_BITS 10
50#define CQ_DESC_Q_NUM_MASK ((1 << CQ_DESC_Q_NUM_BITS) - 1)
51#define CQ_DESC_COMP_NDX_BITS 12
52#define CQ_DESC_COMP_NDX_MASK ((1 << CQ_DESC_COMP_NDX_BITS) - 1)
53
54static inline void cq_desc_dec(const struct cq_desc *desc_arg,
55 u8 *type, u8 *color, u16 *q_number, u16 *completed_index)
56{
57 const struct cq_desc *desc = desc_arg;
58 const u8 type_color = desc->type_color;
59
60 *color = (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK;
61
62 /*
63 * Make sure color bit is read from desc *before* other fields
64 * are read from desc. Hardware guarantees color bit is last
65 * bit (byte) written. Adding the rmb() prevents the compiler
66 * and/or CPU from reordering the reads which would potentially
67 * result in reading stale values.
68 */
69
70 rmb();
71
72 *type = type_color & CQ_DESC_TYPE_MASK;
73 *q_number = le16_to_cpu(desc->q_number) & CQ_DESC_Q_NUM_MASK;
74 *completed_index = le16_to_cpu(desc->completed_index) &
75 CQ_DESC_COMP_NDX_MASK;
76}
77
78#endif /* _CQ_DESC_H_ */
diff --git a/drivers/scsi/fnic/cq_enet_desc.h b/drivers/scsi/fnic/cq_enet_desc.h
new file mode 100644
index 000000000000..a9fa26f82ddd
--- /dev/null
+++ b/drivers/scsi/fnic/cq_enet_desc.h
@@ -0,0 +1,167 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _CQ_ENET_DESC_H_
19#define _CQ_ENET_DESC_H_
20
21#include "cq_desc.h"
22
23/* Ethernet completion queue descriptor: 16B */
24struct cq_enet_wq_desc {
25 __le16 completed_index;
26 __le16 q_number;
27 u8 reserved[11];
28 u8 type_color;
29};
30
31static inline void cq_enet_wq_desc_dec(struct cq_enet_wq_desc *desc,
32 u8 *type, u8 *color, u16 *q_number, u16 *completed_index)
33{
34 cq_desc_dec((struct cq_desc *)desc, type,
35 color, q_number, completed_index);
36}
37
38/* Completion queue descriptor: Ethernet receive queue, 16B */
39struct cq_enet_rq_desc {
40 __le16 completed_index_flags;
41 __le16 q_number_rss_type_flags;
42 __le32 rss_hash;
43 __le16 bytes_written_flags;
44 __le16 vlan;
45 __le16 checksum_fcoe;
46 u8 flags;
47 u8 type_color;
48};
49
50#define CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT (0x1 << 12)
51#define CQ_ENET_RQ_DESC_FLAGS_FCOE (0x1 << 13)
52#define CQ_ENET_RQ_DESC_FLAGS_EOP (0x1 << 14)
53#define CQ_ENET_RQ_DESC_FLAGS_SOP (0x1 << 15)
54
55#define CQ_ENET_RQ_DESC_RSS_TYPE_BITS 4
56#define CQ_ENET_RQ_DESC_RSS_TYPE_MASK \
57 ((1 << CQ_ENET_RQ_DESC_RSS_TYPE_BITS) - 1)
58#define CQ_ENET_RQ_DESC_RSS_TYPE_NONE 0
59#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv4 1
60#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4 2
61#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6 3
62#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6 4
63#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX 5
64#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX 6
65
66#define CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC (0x1 << 14)
67
68#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS 14
69#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK \
70 ((1 << CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS) - 1)
71#define CQ_ENET_RQ_DESC_FLAGS_TRUNCATED (0x1 << 14)
72#define CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED (0x1 << 15)
73
74#define CQ_ENET_RQ_DESC_FCOE_SOF_BITS 4
75#define CQ_ENET_RQ_DESC_FCOE_SOF_MASK \
76 ((1 << CQ_ENET_RQ_DESC_FCOE_SOF_BITS) - 1)
77#define CQ_ENET_RQ_DESC_FCOE_EOF_BITS 8
78#define CQ_ENET_RQ_DESC_FCOE_EOF_MASK \
79 ((1 << CQ_ENET_RQ_DESC_FCOE_EOF_BITS) - 1)
80#define CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT 8
81
82#define CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK (0x1 << 0)
83#define CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK (0x1 << 0)
84#define CQ_ENET_RQ_DESC_FLAGS_UDP (0x1 << 1)
85#define CQ_ENET_RQ_DESC_FCOE_ENC_ERROR (0x1 << 1)
86#define CQ_ENET_RQ_DESC_FLAGS_TCP (0x1 << 2)
87#define CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK (0x1 << 3)
88#define CQ_ENET_RQ_DESC_FLAGS_IPV6 (0x1 << 4)
89#define CQ_ENET_RQ_DESC_FLAGS_IPV4 (0x1 << 5)
90#define CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT (0x1 << 6)
91#define CQ_ENET_RQ_DESC_FLAGS_FCS_OK (0x1 << 7)
92
93static inline void cq_enet_rq_desc_dec(struct cq_enet_rq_desc *desc,
94 u8 *type, u8 *color, u16 *q_number, u16 *completed_index,
95 u8 *ingress_port, u8 *fcoe, u8 *eop, u8 *sop, u8 *rss_type,
96 u8 *csum_not_calc, u32 *rss_hash, u16 *bytes_written, u8 *packet_error,
97 u8 *vlan_stripped, u16 *vlan, u16 *checksum, u8 *fcoe_sof,
98 u8 *fcoe_fc_crc_ok, u8 *fcoe_enc_error, u8 *fcoe_eof,
99 u8 *tcp_udp_csum_ok, u8 *udp, u8 *tcp, u8 *ipv4_csum_ok,
100 u8 *ipv6, u8 *ipv4, u8 *ipv4_fragment, u8 *fcs_ok)
101{
102 u16 completed_index_flags = le16_to_cpu(desc->completed_index_flags);
103 u16 q_number_rss_type_flags =
104 le16_to_cpu(desc->q_number_rss_type_flags);
105 u16 bytes_written_flags = le16_to_cpu(desc->bytes_written_flags);
106
107 cq_desc_dec((struct cq_desc *)desc, type,
108 color, q_number, completed_index);
109
110 *ingress_port = (completed_index_flags &
111 CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT) ? 1 : 0;
112 *fcoe = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_FCOE) ?
113 1 : 0;
114 *eop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_EOP) ?
115 1 : 0;
116 *sop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_SOP) ?
117 1 : 0;
118
119 *rss_type = (u8)((q_number_rss_type_flags >> CQ_DESC_Q_NUM_BITS) &
120 CQ_ENET_RQ_DESC_RSS_TYPE_MASK);
121 *csum_not_calc = (q_number_rss_type_flags &
122 CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ? 1 : 0;
123
124 *rss_hash = le32_to_cpu(desc->rss_hash);
125
126 *bytes_written = bytes_written_flags &
127 CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
128 *packet_error = (bytes_written_flags &
129 CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ? 1 : 0;
130 *vlan_stripped = (bytes_written_flags &
131 CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) ? 1 : 0;
132
133 *vlan = le16_to_cpu(desc->vlan);
134
135 if (*fcoe) {
136 *fcoe_sof = (u8)(le16_to_cpu(desc->checksum_fcoe) &
137 CQ_ENET_RQ_DESC_FCOE_SOF_MASK);
138 *fcoe_fc_crc_ok = (desc->flags &
139 CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK) ? 1 : 0;
140 *fcoe_enc_error = (desc->flags &
141 CQ_ENET_RQ_DESC_FCOE_ENC_ERROR) ? 1 : 0;
142 *fcoe_eof = (u8)((desc->checksum_fcoe >>
143 CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT) &
144 CQ_ENET_RQ_DESC_FCOE_EOF_MASK);
145 *checksum = 0;
146 } else {
147 *fcoe_sof = 0;
148 *fcoe_fc_crc_ok = 0;
149 *fcoe_enc_error = 0;
150 *fcoe_eof = 0;
151 *checksum = le16_to_cpu(desc->checksum_fcoe);
152 }
153
154 *tcp_udp_csum_ok =
155 (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ? 1 : 0;
156 *udp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_UDP) ? 1 : 0;
157 *tcp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP) ? 1 : 0;
158 *ipv4_csum_ok =
159 (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ? 1 : 0;
160 *ipv6 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV6) ? 1 : 0;
161 *ipv4 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4) ? 1 : 0;
162 *ipv4_fragment =
163 (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT) ? 1 : 0;
164 *fcs_ok = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK) ? 1 : 0;
165}
166
167#endif /* _CQ_ENET_DESC_H_ */
diff --git a/drivers/scsi/fnic/cq_exch_desc.h b/drivers/scsi/fnic/cq_exch_desc.h
new file mode 100644
index 000000000000..501660cfe228
--- /dev/null
+++ b/drivers/scsi/fnic/cq_exch_desc.h
@@ -0,0 +1,182 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _CQ_EXCH_DESC_H_
19#define _CQ_EXCH_DESC_H_
20
21#include "cq_desc.h"
22
23/* Exchange completion queue descriptor: 16B */
24struct cq_exch_wq_desc {
25 u16 completed_index;
26 u16 q_number;
27 u16 exchange_id;
28 u8 tmpl;
29 u8 reserved0;
30 u32 reserved1;
31 u8 exch_status;
32 u8 reserved2[2];
33 u8 type_color;
34};
35
36#define CQ_EXCH_WQ_STATUS_BITS 2
37#define CQ_EXCH_WQ_STATUS_MASK ((1 << CQ_EXCH_WQ_STATUS_BITS) - 1)
38
39enum cq_exch_status_types {
40 CQ_EXCH_WQ_STATUS_TYPE_COMPLETE = 0,
41 CQ_EXCH_WQ_STATUS_TYPE_ABORT = 1,
42 CQ_EXCH_WQ_STATUS_TYPE_SGL_EOF = 2,
43 CQ_EXCH_WQ_STATUS_TYPE_TMPL_ERR = 3,
44};
45
46static inline void cq_exch_wq_desc_dec(struct cq_exch_wq_desc *desc_ptr,
47 u8 *type,
48 u8 *color,
49 u16 *q_number,
50 u16 *completed_index,
51 u8 *exch_status)
52{
53 cq_desc_dec((struct cq_desc *)desc_ptr, type,
54 color, q_number, completed_index);
55 *exch_status = desc_ptr->exch_status & CQ_EXCH_WQ_STATUS_MASK;
56}
57
58struct cq_fcp_rq_desc {
59 u16 completed_index_eop_sop_prt;
60 u16 q_number;
61 u16 exchange_id;
62 u16 tmpl;
63 u16 bytes_written;
64 u16 vlan;
65 u8 sof;
66 u8 eof;
67 u8 fcs_fer_fck;
68 u8 type_color;
69};
70
71#define CQ_FCP_RQ_DESC_FLAGS_SOP (1 << 15)
72#define CQ_FCP_RQ_DESC_FLAGS_EOP (1 << 14)
73#define CQ_FCP_RQ_DESC_FLAGS_PRT (1 << 12)
74#define CQ_FCP_RQ_DESC_TMPL_MASK 0x1f
75#define CQ_FCP_RQ_DESC_BYTES_WRITTEN_MASK 0x3fff
76#define CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT 14
77#define CQ_FCP_RQ_DESC_PACKET_ERR_MASK (1 << CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT)
78#define CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT 15
79#define CQ_FCP_RQ_DESC_VS_STRIPPED_MASK (1 << CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT)
80#define CQ_FCP_RQ_DESC_FC_CRC_OK_MASK 0x1
81#define CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT 1
82#define CQ_FCP_RQ_DESC_FCOE_ERR_MASK (1 << CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT)
83#define CQ_FCP_RQ_DESC_FCS_OK_SHIFT 7
84#define CQ_FCP_RQ_DESC_FCS_OK_MASK (1 << CQ_FCP_RQ_DESC_FCS_OK_SHIFT)
85
86static inline void cq_fcp_rq_desc_dec(struct cq_fcp_rq_desc *desc_ptr,
87 u8 *type,
88 u8 *color,
89 u16 *q_number,
90 u16 *completed_index,
91 u8 *eop,
92 u8 *sop,
93 u8 *fck,
94 u16 *exchange_id,
95 u16 *tmpl,
96 u32 *bytes_written,
97 u8 *sof,
98 u8 *eof,
99 u8 *ingress_port,
100 u8 *packet_err,
101 u8 *fcoe_err,
102 u8 *fcs_ok,
103 u8 *vlan_stripped,
104 u16 *vlan)
105{
106 cq_desc_dec((struct cq_desc *)desc_ptr, type,
107 color, q_number, completed_index);
108 *eop = (desc_ptr->completed_index_eop_sop_prt &
109 CQ_FCP_RQ_DESC_FLAGS_EOP) ? 1 : 0;
110 *sop = (desc_ptr->completed_index_eop_sop_prt &
111 CQ_FCP_RQ_DESC_FLAGS_SOP) ? 1 : 0;
112 *ingress_port =
113 (desc_ptr->completed_index_eop_sop_prt &
114 CQ_FCP_RQ_DESC_FLAGS_PRT) ? 1 : 0;
115 *exchange_id = desc_ptr->exchange_id;
116 *tmpl = desc_ptr->tmpl & CQ_FCP_RQ_DESC_TMPL_MASK;
117 *bytes_written =
118 desc_ptr->bytes_written & CQ_FCP_RQ_DESC_BYTES_WRITTEN_MASK;
119 *packet_err =
120 (desc_ptr->bytes_written & CQ_FCP_RQ_DESC_PACKET_ERR_MASK) >>
121 CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT;
122 *vlan_stripped =
123 (desc_ptr->bytes_written & CQ_FCP_RQ_DESC_VS_STRIPPED_MASK) >>
124 CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT;
125 *vlan = desc_ptr->vlan;
126 *sof = desc_ptr->sof;
127 *fck = desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FC_CRC_OK_MASK;
128 *fcoe_err = (desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FCOE_ERR_MASK) >>
129 CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT;
130 *eof = desc_ptr->eof;
131 *fcs_ok =
132 (desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FCS_OK_MASK) >>
133 CQ_FCP_RQ_DESC_FCS_OK_SHIFT;
134}
135
136struct cq_sgl_desc {
137 u16 exchange_id;
138 u16 q_number;
139 u32 active_burst_offset;
140 u32 tot_data_bytes;
141 u16 tmpl;
142 u8 sgl_err;
143 u8 type_color;
144};
145
146enum cq_sgl_err_types {
147 CQ_SGL_ERR_NO_ERROR = 0,
148 CQ_SGL_ERR_OVERFLOW, /* data ran beyond end of SGL */
149 CQ_SGL_ERR_SGL_LCL_ADDR_ERR, /* sgl access to local vnic addr illegal*/
150 CQ_SGL_ERR_ADDR_RSP_ERR, /* sgl address error */
151 CQ_SGL_ERR_DATA_RSP_ERR, /* sgl data rsp error */
152 CQ_SGL_ERR_CNT_ZERO_ERR, /* SGL count is 0 */
153 CQ_SGL_ERR_CNT_MAX_ERR, /* SGL count is larger than supported */
154 CQ_SGL_ERR_ORDER_ERR, /* frames recv on both ports, order err */
155 CQ_SGL_ERR_DATA_LCL_ADDR_ERR,/* sgl data buf to local vnic addr ill */
156 CQ_SGL_ERR_HOST_CQ_ERR, /* host cq entry to local vnic addr ill */
157};
158
159#define CQ_SGL_SGL_ERR_MASK 0x1f
160#define CQ_SGL_TMPL_MASK 0x1f
161
162static inline void cq_sgl_desc_dec(struct cq_sgl_desc *desc_ptr,
163 u8 *type,
164 u8 *color,
165 u16 *q_number,
166 u16 *exchange_id,
167 u32 *active_burst_offset,
168 u32 *tot_data_bytes,
169 u16 *tmpl,
170 u8 *sgl_err)
171{
172 /* Cheat a little by assuming exchange_id is the same as completed
173 index */
174 cq_desc_dec((struct cq_desc *)desc_ptr, type, color, q_number,
175 exchange_id);
176 *active_burst_offset = desc_ptr->active_burst_offset;
177 *tot_data_bytes = desc_ptr->tot_data_bytes;
178 *tmpl = desc_ptr->tmpl & CQ_SGL_TMPL_MASK;
179 *sgl_err = desc_ptr->sgl_err & CQ_SGL_SGL_ERR_MASK;
180}
181
182#endif /* _CQ_EXCH_DESC_H_ */
diff --git a/drivers/scsi/fnic/fcpio.h b/drivers/scsi/fnic/fcpio.h
new file mode 100644
index 000000000000..12d770d885c5
--- /dev/null
+++ b/drivers/scsi/fnic/fcpio.h
@@ -0,0 +1,780 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _FCPIO_H_
19#define _FCPIO_H_
20
21#include <linux/if_ether.h>
22
23/*
24 * This header file includes all of the data structures used for
25 * communication by the host driver to the fcp firmware.
26 */
27
28/*
29 * Exchange and sequence id space allocated to the host driver
30 */
31#define FCPIO_HOST_EXCH_RANGE_START 0x1000
32#define FCPIO_HOST_EXCH_RANGE_END 0x1fff
33#define FCPIO_HOST_SEQ_ID_RANGE_START 0x80
34#define FCPIO_HOST_SEQ_ID_RANGE_END 0xff
35
36/*
37 * Command entry type
38 */
39enum fcpio_type {
40 /*
41 * Initiator request types
42 */
43 FCPIO_ICMND_16 = 0x1,
44 FCPIO_ICMND_32,
45 FCPIO_ICMND_CMPL,
46 FCPIO_ITMF,
47 FCPIO_ITMF_CMPL,
48
49 /*
50 * Target request types
51 */
52 FCPIO_TCMND_16 = 0x11,
53 FCPIO_TCMND_32,
54 FCPIO_TDATA,
55 FCPIO_TXRDY,
56 FCPIO_TRSP,
57 FCPIO_TDRSP_CMPL,
58 FCPIO_TTMF,
59 FCPIO_TTMF_ACK,
60 FCPIO_TABORT,
61 FCPIO_TABORT_CMPL,
62
63 /*
64 * Misc request types
65 */
66 FCPIO_ACK = 0x20,
67 FCPIO_RESET,
68 FCPIO_RESET_CMPL,
69 FCPIO_FLOGI_REG,
70 FCPIO_FLOGI_REG_CMPL,
71 FCPIO_ECHO,
72 FCPIO_ECHO_CMPL,
73 FCPIO_LUNMAP_CHNG,
74 FCPIO_LUNMAP_REQ,
75 FCPIO_LUNMAP_REQ_CMPL,
76 FCPIO_FLOGI_FIP_REG,
77 FCPIO_FLOGI_FIP_REG_CMPL,
78};
79
80/*
81 * Header status codes from the firmware
82 */
83enum fcpio_status {
84 FCPIO_SUCCESS = 0, /* request was successful */
85
86 /*
87 * If a request to the firmware is rejected, the original request
88 * header will be returned with the status set to one of the following:
89 */
90 FCPIO_INVALID_HEADER, /* header contains invalid data */
91 FCPIO_OUT_OF_RESOURCE, /* out of resources to complete request */
92 FCPIO_INVALID_PARAM, /* some parameter in request is invalid */
93 FCPIO_REQ_NOT_SUPPORTED, /* request type is not supported */
94 FCPIO_IO_NOT_FOUND, /* requested I/O was not found */
95
96 /*
97 * Once a request is processed, the firmware will usually return
98 * a cmpl message type. In cases where errors occurred,
99 * the header status field would be filled in with one of the following:
100 */
101 FCPIO_ABORTED = 0x41, /* request was aborted */
102 FCPIO_TIMEOUT, /* request was timed out */
103 FCPIO_SGL_INVALID, /* request was aborted due to sgl error */
104 FCPIO_MSS_INVALID, /* request was aborted due to mss error */
105 FCPIO_DATA_CNT_MISMATCH, /* recv/sent more/less data than exp. */
106 FCPIO_FW_ERR, /* request was terminated due to fw error */
107 FCPIO_ITMF_REJECTED, /* itmf req was rejected by remote node */
108 FCPIO_ITMF_FAILED, /* itmf req was failed by remote node */
109 FCPIO_ITMF_INCORRECT_LUN, /* itmf req targeted incorrect LUN */
110 FCPIO_CMND_REJECTED, /* request was invalid and rejected */
111 FCPIO_NO_PATH_AVAIL, /* no paths to the lun was available */
112 FCPIO_PATH_FAILED, /* i/o sent to current path failed */
113 FCPIO_LUNMAP_CHNG_PEND, /* i/o rejected due to lunmap change */
114};
115
116/*
117 * The header command tag. All host requests will use the "tag" field
118 * to mark commands with a unique tag. When the firmware responds to
119 * a host request, it will copy the tag field into the response.
120 *
121 * The only firmware requests that will use the rx_id/ox_id fields instead
122 * of the tag field will be the target command and target task management
123 * requests. These two requests do not have corresponding host requests
124 * since they come directly from the FC initiator on the network.
125 */
126struct fcpio_tag {
127 union {
128 u32 req_id;
129 struct {
130 u16 rx_id;
131 u16 ox_id;
132 } ex_id;
133 } u;
134};
135
136static inline void
137fcpio_tag_id_enc(struct fcpio_tag *tag, u32 id)
138{
139 tag->u.req_id = id;
140}
141
142static inline void
143fcpio_tag_id_dec(struct fcpio_tag *tag, u32 *id)
144{
145 *id = tag->u.req_id;
146}
147
148static inline void
149fcpio_tag_exid_enc(struct fcpio_tag *tag, u16 ox_id, u16 rx_id)
150{
151 tag->u.ex_id.rx_id = rx_id;
152 tag->u.ex_id.ox_id = ox_id;
153}
154
155static inline void
156fcpio_tag_exid_dec(struct fcpio_tag *tag, u16 *ox_id, u16 *rx_id)
157{
158 *rx_id = tag->u.ex_id.rx_id;
159 *ox_id = tag->u.ex_id.ox_id;
160}
161
162/*
163 * The header for an fcpio request, whether from the firmware or from the
164 * host driver
165 */
166struct fcpio_header {
167 u8 type; /* enum fcpio_type */
168 u8 status; /* header status entry */
169 u16 _resvd; /* reserved */
170 struct fcpio_tag tag; /* header tag */
171};
172
173static inline void
174fcpio_header_enc(struct fcpio_header *hdr,
175 u8 type, u8 status,
176 struct fcpio_tag tag)
177{
178 hdr->type = type;
179 hdr->status = status;
180 hdr->_resvd = 0;
181 hdr->tag = tag;
182}
183
184static inline void
185fcpio_header_dec(struct fcpio_header *hdr,
186 u8 *type, u8 *status,
187 struct fcpio_tag *tag)
188{
189 *type = hdr->type;
190 *status = hdr->status;
191 *tag = hdr->tag;
192}
193
194#define CDB_16 16
195#define CDB_32 32
196#define LUN_ADDRESS 8
197
198/*
199 * fcpio_icmnd_16: host -> firmware request
200 *
201 * used for sending out an initiator SCSI 16-byte command
202 */
203struct fcpio_icmnd_16 {
204 u32 lunmap_id; /* index into lunmap table */
205 u8 special_req_flags; /* special exchange request flags */
206 u8 _resvd0[3]; /* reserved */
207 u32 sgl_cnt; /* scatter-gather list count */
208 u32 sense_len; /* sense buffer length */
209 u64 sgl_addr; /* scatter-gather list addr */
210 u64 sense_addr; /* sense buffer address */
211 u8 crn; /* SCSI Command Reference No. */
212 u8 pri_ta; /* SCSI Priority and Task attribute */
213 u8 _resvd1; /* reserved: should be 0 */
214 u8 flags; /* command flags */
215 u8 scsi_cdb[CDB_16]; /* SCSI Cmnd Descriptor Block */
216 u32 data_len; /* length of data expected */
217 u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */
218 u8 _resvd2; /* reserved */
219 u8 d_id[3]; /* FC vNIC only: Target D_ID */
220 u16 mss; /* FC vNIC only: max burst */
221 u16 _resvd3; /* reserved */
222 u32 r_a_tov; /* FC vNIC only: Res. Alloc Timeout */
223 u32 e_d_tov; /* FC vNIC only: Err Detect Timeout */
224};
225
226/*
227 * Special request flags
228 */
229#define FCPIO_ICMND_SRFLAG_RETRY 0x01 /* Enable Retry handling on exchange */
230
231/*
232 * Priority/Task Attribute settings
233 */
234#define FCPIO_ICMND_PTA_SIMPLE 0 /* simple task attribute */
235#define FCPIO_ICMND_PTA_HEADQ 1 /* head of queue task attribute */
236#define FCPIO_ICMND_PTA_ORDERED 2 /* ordered task attribute */
237#define FCPIO_ICMND_PTA_ACA 4 /* auto contingent allegiance */
238#define FCPIO_ICMND_PRI_SHIFT 3 /* priority field starts in bit 3 */
239
240/*
241 * Command flags
242 */
243#define FCPIO_ICMND_RDDATA 0x02 /* read data */
244#define FCPIO_ICMND_WRDATA 0x01 /* write data */
245
246/*
247 * fcpio_icmnd_32: host -> firmware request
248 *
249 * used for sending out an initiator SCSI 32-byte command
250 */
251struct fcpio_icmnd_32 {
252 u32 lunmap_id; /* index into lunmap table */
253 u8 special_req_flags; /* special exchange request flags */
254 u8 _resvd0[3]; /* reserved */
255 u32 sgl_cnt; /* scatter-gather list count */
256 u32 sense_len; /* sense buffer length */
257 u64 sgl_addr; /* scatter-gather list addr */
258 u64 sense_addr; /* sense buffer address */
259 u8 crn; /* SCSI Command Reference No. */
260 u8 pri_ta; /* SCSI Priority and Task attribute */
261 u8 _resvd1; /* reserved: should be 0 */
262 u8 flags; /* command flags */
263 u8 scsi_cdb[CDB_32]; /* SCSI Cmnd Descriptor Block */
264 u32 data_len; /* length of data expected */
265 u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */
266 u8 _resvd2; /* reserved */
267 u8 d_id[3]; /* FC vNIC only: Target D_ID */
268 u16 mss; /* FC vNIC only: max burst */
269 u16 _resvd3; /* reserved */
270 u32 r_a_tov; /* FC vNIC only: Res. Alloc Timeout */
271 u32 e_d_tov; /* FC vNIC only: Error Detect Timeout */
272};
273
274/*
275 * fcpio_itmf: host -> firmware request
276 *
277 * used for requesting the firmware to abort a request and/or send out
278 * a task management function
279 *
280 * The t_tag field is only needed when the request type is ABT_TASK.
281 */
282struct fcpio_itmf {
283 u32 lunmap_id; /* index into lunmap table */
284 u32 tm_req; /* SCSI Task Management request */
285 u32 t_tag; /* header tag of fcpio to be aborted */
286 u32 _resvd; /* _reserved */
287 u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */
288 u8 _resvd1; /* reserved */
289 u8 d_id[3]; /* FC vNIC only: Target D_ID */
290 u32 r_a_tov; /* FC vNIC only: R_A_TOV in msec */
291 u32 e_d_tov; /* FC vNIC only: E_D_TOV in msec */
292};
293
294/*
295 * Task Management request
296 */
297enum fcpio_itmf_tm_req_type {
298 FCPIO_ITMF_ABT_TASK_TERM = 0x01, /* abort task and terminate */
299 FCPIO_ITMF_ABT_TASK, /* abort task and issue abts */
300 FCPIO_ITMF_ABT_TASK_SET, /* abort task set */
301 FCPIO_ITMF_CLR_TASK_SET, /* clear task set */
302 FCPIO_ITMF_LUN_RESET, /* logical unit reset task mgmt */
303 FCPIO_ITMF_CLR_ACA, /* Clear ACA condition */
304};
305
306/*
307 * fcpio_tdata: host -> firmware request
308 *
309 * used for requesting the firmware to send out a read data transfer for a
310 * target command
311 */
312struct fcpio_tdata {
313 u16 rx_id; /* FC rx_id of target command */
314 u16 flags; /* command flags */
315 u32 rel_offset; /* data sequence relative offset */
316 u32 sgl_cnt; /* scatter-gather list count */
317 u32 data_len; /* length of data expected to send */
318 u64 sgl_addr; /* scatter-gather list address */
319};
320
321/*
322 * Command flags
323 */
324#define FCPIO_TDATA_SCSI_RSP 0x01 /* send a scsi resp. after last frame */
325
326/*
327 * fcpio_txrdy: host -> firmware request
328 *
329 * used for requesting the firmware to send out a write data transfer for a
330 * target command
331 */
332struct fcpio_txrdy {
333 u16 rx_id; /* FC rx_id of target command */
334 u16 _resvd0; /* reserved */
335 u32 rel_offset; /* data sequence relative offset */
336 u32 sgl_cnt; /* scatter-gather list count */
337 u32 data_len; /* length of data expected to send */
338 u64 sgl_addr; /* scatter-gather list address */
339};
340
341/*
342 * fcpio_trsp: host -> firmware request
343 *
344 * used for requesting the firmware to send out a response for a target
345 * command
346 */
347struct fcpio_trsp {
348 u16 rx_id; /* FC rx_id of target command */
349 u16 _resvd0; /* reserved */
350 u32 sense_len; /* sense data buffer length */
351 u64 sense_addr; /* sense data buffer address */
352 u16 _resvd1; /* reserved */
353 u8 flags; /* response request flags */
354 u8 scsi_status; /* SCSI status */
355 u32 residual; /* SCSI data residual value of I/O */
356};
357
358/*
359 * resposnse request flags
360 */
361#define FCPIO_TRSP_RESID_UNDER 0x08 /* residual is valid and is underflow */
362#define FCPIO_TRSP_RESID_OVER 0x04 /* residual is valid and is overflow */
363
364/*
365 * fcpio_ttmf_ack: host -> firmware response
366 *
367 * used by the host to indicate to the firmware it has received and processed
368 * the target tmf request
369 */
370struct fcpio_ttmf_ack {
371 u16 rx_id; /* FC rx_id of target command */
372 u16 _resvd0; /* reserved */
373 u32 tmf_status; /* SCSI task management status */
374};
375
376/*
377 * fcpio_tabort: host -> firmware request
378 *
379 * used by the host to request the firmware to abort a target request that was
380 * received by the firmware
381 */
382struct fcpio_tabort {
383 u16 rx_id; /* rx_id of the target request */
384};
385
386/*
387 * fcpio_reset: host -> firmware request
388 *
389 * used by the host to signal a reset of the driver to the firmware
390 * and to request firmware to clean up all outstanding I/O
391 */
392struct fcpio_reset {
393 u32 _resvd;
394};
395
396enum fcpio_flogi_reg_format_type {
397 FCPIO_FLOGI_REG_DEF_DEST = 0, /* Use the oui | s_id mac format */
398 FCPIO_FLOGI_REG_GW_DEST, /* Use the fixed gateway mac */
399};
400
401/*
402 * fcpio_flogi_reg: host -> firmware request
403 *
404 * fc vnic only
405 * used by the host to notify the firmware of the lif's s_id
406 * and destination mac address format
407 */
408struct fcpio_flogi_reg {
409 u8 format;
410 u8 s_id[3]; /* FC vNIC only: Source S_ID */
411 u8 gateway_mac[ETH_ALEN]; /* Destination gateway mac */
412 u16 _resvd;
413 u32 r_a_tov; /* R_A_TOV in msec */
414 u32 e_d_tov; /* E_D_TOV in msec */
415};
416
417/*
418 * fcpio_echo: host -> firmware request
419 *
420 * sends a heartbeat echo request to the firmware
421 */
422struct fcpio_echo {
423 u32 _resvd;
424};
425
426/*
427 * fcpio_lunmap_req: host -> firmware request
428 *
429 * scsi vnic only
430 * sends a request to retrieve the lunmap table for scsi vnics
431 */
432struct fcpio_lunmap_req {
433 u64 addr; /* address of the buffer */
434 u32 len; /* len of the buffer */
435};
436
437/*
438 * fcpio_flogi_fip_reg: host -> firmware request
439 *
440 * fc vnic only
441 * used by the host to notify the firmware of the lif's s_id
442 * and destination mac address format
443 */
444struct fcpio_flogi_fip_reg {
445 u8 _resvd0;
446 u8 s_id[3]; /* FC vNIC only: Source S_ID */
447 u8 fcf_mac[ETH_ALEN]; /* FCF Target destination mac */
448 u16 _resvd1;
449 u32 r_a_tov; /* R_A_TOV in msec */
450 u32 e_d_tov; /* E_D_TOV in msec */
451 u8 ha_mac[ETH_ALEN]; /* Host adapter source mac */
452 u16 _resvd2;
453};
454
455/*
456 * Basic structure for all fcpio structures that are sent from the host to the
457 * firmware. They are 128 bytes per structure.
458 */
459#define FCPIO_HOST_REQ_LEN 128 /* expected length of host requests */
460
461struct fcpio_host_req {
462 struct fcpio_header hdr;
463
464 union {
465 /*
466 * Defines space needed for request
467 */
468 u8 buf[FCPIO_HOST_REQ_LEN - sizeof(struct fcpio_header)];
469
470 /*
471 * Initiator host requests
472 */
473 struct fcpio_icmnd_16 icmnd_16;
474 struct fcpio_icmnd_32 icmnd_32;
475 struct fcpio_itmf itmf;
476
477 /*
478 * Target host requests
479 */
480 struct fcpio_tdata tdata;
481 struct fcpio_txrdy txrdy;
482 struct fcpio_trsp trsp;
483 struct fcpio_ttmf_ack ttmf_ack;
484 struct fcpio_tabort tabort;
485
486 /*
487 * Misc requests
488 */
489 struct fcpio_reset reset;
490 struct fcpio_flogi_reg flogi_reg;
491 struct fcpio_echo echo;
492 struct fcpio_lunmap_req lunmap_req;
493 struct fcpio_flogi_fip_reg flogi_fip_reg;
494 } u;
495};
496
497/*
498 * fcpio_icmnd_cmpl: firmware -> host response
499 *
500 * used for sending the host a response to an initiator command
501 */
502struct fcpio_icmnd_cmpl {
503 u8 _resvd0[6]; /* reserved */
504 u8 flags; /* response flags */
505 u8 scsi_status; /* SCSI status */
506 u32 residual; /* SCSI data residual length */
507 u32 sense_len; /* SCSI sense length */
508};
509
510/*
511 * response flags
512 */
513#define FCPIO_ICMND_CMPL_RESID_UNDER 0x08 /* resid under and valid */
514#define FCPIO_ICMND_CMPL_RESID_OVER 0x04 /* resid over and valid */
515
516/*
517 * fcpio_itmf_cmpl: firmware -> host response
518 *
519 * used for sending the host a response for a itmf request
520 */
521struct fcpio_itmf_cmpl {
522 u32 _resvd; /* reserved */
523};
524
525/*
526 * fcpio_tcmnd_16: firmware -> host request
527 *
528 * used by the firmware to notify the host of an incoming target SCSI 16-Byte
529 * request
530 */
531struct fcpio_tcmnd_16 {
532 u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */
533 u8 crn; /* SCSI Command Reference No. */
534 u8 pri_ta; /* SCSI Priority and Task attribute */
535 u8 _resvd2; /* reserved: should be 0 */
536 u8 flags; /* command flags */
537 u8 scsi_cdb[CDB_16]; /* SCSI Cmnd Descriptor Block */
538 u32 data_len; /* length of data expected */
539 u8 _resvd1; /* reserved */
540 u8 s_id[3]; /* FC vNIC only: Source S_ID */
541};
542
543/*
544 * Priority/Task Attribute settings
545 */
546#define FCPIO_TCMND_PTA_SIMPLE 0 /* simple task attribute */
547#define FCPIO_TCMND_PTA_HEADQ 1 /* head of queue task attribute */
548#define FCPIO_TCMND_PTA_ORDERED 2 /* ordered task attribute */
549#define FCPIO_TCMND_PTA_ACA 4 /* auto contingent allegiance */
550#define FCPIO_TCMND_PRI_SHIFT 3 /* priority field starts in bit 3 */
551
552/*
553 * Command flags
554 */
555#define FCPIO_TCMND_RDDATA 0x02 /* read data */
556#define FCPIO_TCMND_WRDATA 0x01 /* write data */
557
558/*
559 * fcpio_tcmnd_32: firmware -> host request
560 *
561 * used by the firmware to notify the host of an incoming target SCSI 32-Byte
562 * request
563 */
564struct fcpio_tcmnd_32 {
565 u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */
566 u8 crn; /* SCSI Command Reference No. */
567 u8 pri_ta; /* SCSI Priority and Task attribute */
568 u8 _resvd2; /* reserved: should be 0 */
569 u8 flags; /* command flags */
570 u8 scsi_cdb[CDB_32]; /* SCSI Cmnd Descriptor Block */
571 u32 data_len; /* length of data expected */
572 u8 _resvd0; /* reserved */
573 u8 s_id[3]; /* FC vNIC only: Source S_ID */
574};
575
576/*
577 * fcpio_tdrsp_cmpl: firmware -> host response
578 *
579 * used by the firmware to notify the host of a response to a host target
580 * command
581 */
582struct fcpio_tdrsp_cmpl {
583 u16 rx_id; /* rx_id of the target request */
584 u16 _resvd0; /* reserved */
585};
586
587/*
588 * fcpio_ttmf: firmware -> host request
589 *
590 * used by the firmware to notify the host of an incoming task management
591 * function request
592 */
593struct fcpio_ttmf {
594 u8 _resvd0; /* reserved */
595 u8 s_id[3]; /* FC vNIC only: Source S_ID */
596 u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */
597 u8 crn; /* SCSI Command Reference No. */
598 u8 _resvd2[3]; /* reserved */
599 u32 tmf_type; /* task management request type */
600};
601
602/*
603 * Task Management request
604 */
605#define FCPIO_TTMF_CLR_ACA 0x40 /* Clear ACA condition */
606#define FCPIO_TTMF_LUN_RESET 0x10 /* logical unit reset task mgmt */
607#define FCPIO_TTMF_CLR_TASK_SET 0x04 /* clear task set */
608#define FCPIO_TTMF_ABT_TASK_SET 0x02 /* abort task set */
609#define FCPIO_TTMF_ABT_TASK 0x01 /* abort task */
610
611/*
612 * fcpio_tabort_cmpl: firmware -> host response
613 *
614 * used by the firmware to respond to a host's tabort request
615 */
616struct fcpio_tabort_cmpl {
617 u16 rx_id; /* rx_id of the target request */
618 u16 _resvd0; /* reserved */
619};
620
621/*
622 * fcpio_ack: firmware -> host response
623 *
624 * used by firmware to notify the host of the last work request received
625 */
626struct fcpio_ack {
627 u16 request_out; /* last host entry received */
628 u16 _resvd;
629};
630
631/*
632 * fcpio_reset_cmpl: firmware -> host response
633 *
634 * use by firmware to respond to the host's reset request
635 */
636struct fcpio_reset_cmpl {
637 u16 vnic_id;
638};
639
640/*
641 * fcpio_flogi_reg_cmpl: firmware -> host response
642 *
643 * fc vnic only
644 * response to the fcpio_flogi_reg request
645 */
646struct fcpio_flogi_reg_cmpl {
647 u32 _resvd;
648};
649
650/*
651 * fcpio_echo_cmpl: firmware -> host response
652 *
653 * response to the fcpio_echo request
654 */
655struct fcpio_echo_cmpl {
656 u32 _resvd;
657};
658
659/*
660 * fcpio_lunmap_chng: firmware -> host notification
661 *
662 * scsi vnic only
663 * notifies the host that the lunmap tables have changed
664 */
665struct fcpio_lunmap_chng {
666 u32 _resvd;
667};
668
669/*
670 * fcpio_lunmap_req_cmpl: firmware -> host response
671 *
672 * scsi vnic only
673 * response for lunmap table request from the host
674 */
675struct fcpio_lunmap_req_cmpl {
676 u32 _resvd;
677};
678
679/*
680 * Basic structure for all fcpio structures that are sent from the firmware to
681 * the host. They are 64 bytes per structure.
682 */
683#define FCPIO_FW_REQ_LEN 64 /* expected length of fw requests */
684struct fcpio_fw_req {
685 struct fcpio_header hdr;
686
687 union {
688 /*
689 * Defines space needed for request
690 */
691 u8 buf[FCPIO_FW_REQ_LEN - sizeof(struct fcpio_header)];
692
693 /*
694 * Initiator firmware responses
695 */
696 struct fcpio_icmnd_cmpl icmnd_cmpl;
697 struct fcpio_itmf_cmpl itmf_cmpl;
698
699 /*
700 * Target firmware new requests
701 */
702 struct fcpio_tcmnd_16 tcmnd_16;
703 struct fcpio_tcmnd_32 tcmnd_32;
704
705 /*
706 * Target firmware responses
707 */
708 struct fcpio_tdrsp_cmpl tdrsp_cmpl;
709 struct fcpio_ttmf ttmf;
710 struct fcpio_tabort_cmpl tabort_cmpl;
711
712 /*
713 * Firmware response to work received
714 */
715 struct fcpio_ack ack;
716
717 /*
718 * Misc requests
719 */
720 struct fcpio_reset_cmpl reset_cmpl;
721 struct fcpio_flogi_reg_cmpl flogi_reg_cmpl;
722 struct fcpio_echo_cmpl echo_cmpl;
723 struct fcpio_lunmap_chng lunmap_chng;
724 struct fcpio_lunmap_req_cmpl lunmap_req_cmpl;
725 } u;
726};
727
728/*
729 * Access routines to encode and decode the color bit, which is the most
730 * significant bit of the MSB of the structure
731 */
732static inline void fcpio_color_enc(struct fcpio_fw_req *fw_req, u8 color)
733{
734 u8 *c = ((u8 *) fw_req) + sizeof(struct fcpio_fw_req) - 1;
735
736 if (color)
737 *c |= 0x80;
738 else
739 *c &= ~0x80;
740}
741
742static inline void fcpio_color_dec(struct fcpio_fw_req *fw_req, u8 *color)
743{
744 u8 *c = ((u8 *) fw_req) + sizeof(struct fcpio_fw_req) - 1;
745
746 *color = *c >> 7;
747
748 /*
749 * Make sure color bit is read from desc *before* other fields
750 * are read from desc. Hardware guarantees color bit is last
751 * bit (byte) written. Adding the rmb() prevents the compiler
752 * and/or CPU from reordering the reads which would potentially
753 * result in reading stale values.
754 */
755
756 rmb();
757
758}
759
760/*
761 * Lunmap table entry for scsi vnics
762 */
763#define FCPIO_LUNMAP_TABLE_SIZE 256
764#define FCPIO_FLAGS_LUNMAP_VALID 0x80
765#define FCPIO_FLAGS_BOOT 0x01
766struct fcpio_lunmap_entry {
767 u8 bus;
768 u8 target;
769 u8 lun;
770 u8 path_cnt;
771 u16 flags;
772 u16 update_cnt;
773};
774
775struct fcpio_lunmap_tbl {
776 u32 update_cnt;
777 struct fcpio_lunmap_entry lunmaps[FCPIO_LUNMAP_TABLE_SIZE];
778};
779
780#endif /* _FCPIO_H_ */
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
new file mode 100644
index 000000000000..e4c0a3d7d87b
--- /dev/null
+++ b/drivers/scsi/fnic/fnic.h
@@ -0,0 +1,265 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _FNIC_H_
19#define _FNIC_H_
20
21#include <linux/interrupt.h>
22#include <linux/netdevice.h>
23#include <linux/workqueue.h>
24#include <scsi/libfc.h>
25#include "fnic_io.h"
26#include "fnic_res.h"
27#include "vnic_dev.h"
28#include "vnic_wq.h"
29#include "vnic_rq.h"
30#include "vnic_cq.h"
31#include "vnic_wq_copy.h"
32#include "vnic_intr.h"
33#include "vnic_stats.h"
34#include "vnic_scsi.h"
35
36#define DRV_NAME "fnic"
37#define DRV_DESCRIPTION "Cisco FCoE HBA Driver"
38#define DRV_VERSION "1.0.0.1121"
39#define PFX DRV_NAME ": "
40#define DFX DRV_NAME "%d: "
41
42#define DESC_CLEAN_LOW_WATERMARK 8
43#define FNIC_MAX_IO_REQ 2048 /* scsi_cmnd tag map entries */
44#define FNIC_IO_LOCKS 64 /* IO locks: power of 2 */
45#define FNIC_DFLT_QUEUE_DEPTH 32
46#define FNIC_STATS_RATE_LIMIT 4 /* limit rate at which stats are pulled up */
47
48/*
49 * Tag bits used for special requests.
50 */
51#define BIT(nr) (1UL << (nr))
52#define FNIC_TAG_ABORT BIT(30) /* tag bit indicating abort */
53#define FNIC_TAG_DEV_RST BIT(29) /* indicates device reset */
54#define FNIC_TAG_MASK (BIT(24) - 1) /* mask for lookup */
55#define FNIC_NO_TAG -1
56
57/*
58 * Usage of the scsi_cmnd scratchpad.
59 * These fields are locked by the hashed io_req_lock.
60 */
61#define CMD_SP(Cmnd) ((Cmnd)->SCp.ptr)
62#define CMD_STATE(Cmnd) ((Cmnd)->SCp.phase)
63#define CMD_ABTS_STATUS(Cmnd) ((Cmnd)->SCp.Message)
64#define CMD_LR_STATUS(Cmnd) ((Cmnd)->SCp.have_data_in)
65#define CMD_TAG(Cmnd) ((Cmnd)->SCp.sent_command)
66
67#define FCPIO_INVALID_CODE 0x100 /* hdr_status value unused by firmware */
68
69#define FNIC_LUN_RESET_TIMEOUT 10000 /* mSec */
70#define FNIC_HOST_RESET_TIMEOUT 10000 /* mSec */
71#define FNIC_RMDEVICE_TIMEOUT 1000 /* mSec */
72#define FNIC_HOST_RESET_SETTLE_TIME 30 /* Sec */
73
74#define FNIC_MAX_FCP_TARGET 256
75
76extern unsigned int fnic_log_level;
77
78#define FNIC_MAIN_LOGGING 0x01
79#define FNIC_FCS_LOGGING 0x02
80#define FNIC_SCSI_LOGGING 0x04
81#define FNIC_ISR_LOGGING 0x08
82
83#define FNIC_CHECK_LOGGING(LEVEL, CMD) \
84do { \
85 if (unlikely(fnic_log_level & LEVEL)) \
86 do { \
87 CMD; \
88 } while (0); \
89} while (0)
90
91#define FNIC_MAIN_DBG(kern_level, host, fmt, args...) \
92 FNIC_CHECK_LOGGING(FNIC_MAIN_LOGGING, \
93 shost_printk(kern_level, host, fmt, ##args);)
94
95#define FNIC_FCS_DBG(kern_level, host, fmt, args...) \
96 FNIC_CHECK_LOGGING(FNIC_FCS_LOGGING, \
97 shost_printk(kern_level, host, fmt, ##args);)
98
99#define FNIC_SCSI_DBG(kern_level, host, fmt, args...) \
100 FNIC_CHECK_LOGGING(FNIC_SCSI_LOGGING, \
101 shost_printk(kern_level, host, fmt, ##args);)
102
103#define FNIC_ISR_DBG(kern_level, host, fmt, args...) \
104 FNIC_CHECK_LOGGING(FNIC_ISR_LOGGING, \
105 shost_printk(kern_level, host, fmt, ##args);)
106
107extern const char *fnic_state_str[];
108
109enum fnic_intx_intr_index {
110 FNIC_INTX_WQ_RQ_COPYWQ,
111 FNIC_INTX_ERR,
112 FNIC_INTX_NOTIFY,
113 FNIC_INTX_INTR_MAX,
114};
115
116enum fnic_msix_intr_index {
117 FNIC_MSIX_RQ,
118 FNIC_MSIX_WQ,
119 FNIC_MSIX_WQ_COPY,
120 FNIC_MSIX_ERR_NOTIFY,
121 FNIC_MSIX_INTR_MAX,
122};
123
124struct fnic_msix_entry {
125 int requested;
126 char devname[IFNAMSIZ];
127 irqreturn_t (*isr)(int, void *);
128 void *devid;
129};
130
131enum fnic_state {
132 FNIC_IN_FC_MODE = 0,
133 FNIC_IN_FC_TRANS_ETH_MODE,
134 FNIC_IN_ETH_MODE,
135 FNIC_IN_ETH_TRANS_FC_MODE,
136};
137
138#define FNIC_WQ_COPY_MAX 1
139#define FNIC_WQ_MAX 1
140#define FNIC_RQ_MAX 1
141#define FNIC_CQ_MAX (FNIC_WQ_COPY_MAX + FNIC_WQ_MAX + FNIC_RQ_MAX)
142
143struct mempool;
144
145/* Per-instance private data structure */
146struct fnic {
147 struct fc_lport *lport;
148 struct vnic_dev_bar bar0;
149
150 struct msix_entry msix_entry[FNIC_MSIX_INTR_MAX];
151 struct fnic_msix_entry msix[FNIC_MSIX_INTR_MAX];
152
153 struct vnic_stats *stats;
154 unsigned long stats_time; /* time of stats update */
155 struct vnic_nic_cfg *nic_cfg;
156 char name[IFNAMSIZ];
157 struct timer_list notify_timer; /* used for MSI interrupts */
158
159 unsigned int err_intr_offset;
160 unsigned int link_intr_offset;
161
162 unsigned int wq_count;
163 unsigned int cq_count;
164
165 u32 fcoui_mode:1; /* use fcoui address*/
166 u32 vlan_hw_insert:1; /* let hw insert the tag */
167 u32 in_remove:1; /* fnic device in removal */
168 u32 stop_rx_link_events:1; /* stop proc. rx frames, link events */
169
170 struct completion *remove_wait; /* device remove thread blocks */
171
172 struct fc_frame *flogi;
173 struct fc_frame *flogi_resp;
174 u16 flogi_oxid;
175 unsigned long s_id;
176 enum fnic_state state;
177 spinlock_t fnic_lock;
178
179 u16 vlan_id; /* VLAN tag including priority */
180 u8 mac_addr[ETH_ALEN];
181 u8 dest_addr[ETH_ALEN];
182 u8 data_src_addr[ETH_ALEN];
183 u64 fcp_input_bytes; /* internal statistic */
184 u64 fcp_output_bytes; /* internal statistic */
185 u32 link_down_cnt;
186 int link_status;
187
188 struct list_head list;
189 struct pci_dev *pdev;
190 struct vnic_fc_config config;
191 struct vnic_dev *vdev;
192 unsigned int raw_wq_count;
193 unsigned int wq_copy_count;
194 unsigned int rq_count;
195 int fw_ack_index[FNIC_WQ_COPY_MAX];
196 unsigned short fw_ack_recd[FNIC_WQ_COPY_MAX];
197 unsigned short wq_copy_desc_low[FNIC_WQ_COPY_MAX];
198 unsigned int intr_count;
199 u32 __iomem *legacy_pba;
200 struct fnic_host_tag *tags;
201 mempool_t *io_req_pool;
202 mempool_t *io_sgl_pool[FNIC_SGL_NUM_CACHES];
203 spinlock_t io_req_lock[FNIC_IO_LOCKS]; /* locks for scsi cmnds */
204
205 struct work_struct link_work;
206 struct work_struct frame_work;
207 struct sk_buff_head frame_queue;
208
209 /* copy work queue cache line section */
210 ____cacheline_aligned struct vnic_wq_copy wq_copy[FNIC_WQ_COPY_MAX];
211 /* completion queue cache line section */
212 ____cacheline_aligned struct vnic_cq cq[FNIC_CQ_MAX];
213
214 spinlock_t wq_copy_lock[FNIC_WQ_COPY_MAX];
215
216 /* work queue cache line section */
217 ____cacheline_aligned struct vnic_wq wq[FNIC_WQ_MAX];
218 spinlock_t wq_lock[FNIC_WQ_MAX];
219
220 /* receive queue cache line section */
221 ____cacheline_aligned struct vnic_rq rq[FNIC_RQ_MAX];
222
223 /* interrupt resource cache line section */
224 ____cacheline_aligned struct vnic_intr intr[FNIC_MSIX_INTR_MAX];
225};
226
227extern struct workqueue_struct *fnic_event_queue;
228extern struct device_attribute *fnic_attrs[];
229
230void fnic_clear_intr_mode(struct fnic *fnic);
231int fnic_set_intr_mode(struct fnic *fnic);
232void fnic_free_intr(struct fnic *fnic);
233int fnic_request_intr(struct fnic *fnic);
234
235int fnic_send(struct fc_lport *, struct fc_frame *);
236void fnic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf);
237void fnic_handle_frame(struct work_struct *work);
238void fnic_handle_link(struct work_struct *work);
239int fnic_rq_cmpl_handler(struct fnic *fnic, int);
240int fnic_alloc_rq_frame(struct vnic_rq *rq);
241void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf);
242int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp);
243
244int fnic_queuecommand(struct scsi_cmnd *, void (*done)(struct scsi_cmnd *));
245int fnic_abort_cmd(struct scsi_cmnd *);
246int fnic_device_reset(struct scsi_cmnd *);
247int fnic_host_reset(struct scsi_cmnd *);
248int fnic_reset(struct Scsi_Host *);
249void fnic_scsi_cleanup(struct fc_lport *);
250void fnic_scsi_abort_io(struct fc_lport *);
251void fnic_empty_scsi_cleanup(struct fc_lport *);
252void fnic_exch_mgr_reset(struct fc_lport *, u32, u32);
253int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int);
254int fnic_wq_cmpl_handler(struct fnic *fnic, int);
255int fnic_flogi_reg_handler(struct fnic *fnic);
256void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq,
257 struct fcpio_host_req *desc);
258int fnic_fw_reset_handler(struct fnic *fnic);
259void fnic_terminate_rport_io(struct fc_rport *);
260const char *fnic_state_to_str(unsigned int state);
261
262void fnic_log_q_error(struct fnic *fnic);
263void fnic_handle_link_event(struct fnic *fnic);
264
265#endif /* _FNIC_H_ */
diff --git a/drivers/scsi/fnic/fnic_attrs.c b/drivers/scsi/fnic/fnic_attrs.c
new file mode 100644
index 000000000000..aea0c3becfd4
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_attrs.c
@@ -0,0 +1,56 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#include <linux/string.h>
19#include <linux/device.h>
20#include <scsi/scsi_host.h>
21#include "fnic.h"
22
23static ssize_t fnic_show_state(struct device *dev,
24 struct device_attribute *attr, char *buf)
25{
26 struct fc_lport *lp = shost_priv(class_to_shost(dev));
27 struct fnic *fnic = lport_priv(lp);
28
29 return snprintf(buf, PAGE_SIZE, "%s\n", fnic_state_str[fnic->state]);
30}
31
32static ssize_t fnic_show_drv_version(struct device *dev,
33 struct device_attribute *attr, char *buf)
34{
35 return snprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION);
36}
37
38static ssize_t fnic_show_link_state(struct device *dev,
39 struct device_attribute *attr, char *buf)
40{
41 struct fc_lport *lp = shost_priv(class_to_shost(dev));
42
43 return snprintf(buf, PAGE_SIZE, "%s\n", (lp->link_up)
44 ? "Link Up" : "Link Down");
45}
46
47static DEVICE_ATTR(fnic_state, S_IRUGO, fnic_show_state, NULL);
48static DEVICE_ATTR(drv_version, S_IRUGO, fnic_show_drv_version, NULL);
49static DEVICE_ATTR(link_state, S_IRUGO, fnic_show_link_state, NULL);
50
51struct device_attribute *fnic_attrs[] = {
52 &dev_attr_fnic_state,
53 &dev_attr_drv_version,
54 &dev_attr_link_state,
55 NULL,
56};
diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c
new file mode 100644
index 000000000000..07e6eedb83ce
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_fcs.c
@@ -0,0 +1,742 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#include <linux/errno.h>
19#include <linux/pci.h>
20#include <linux/skbuff.h>
21#include <linux/interrupt.h>
22#include <linux/spinlock.h>
23#include <linux/if_ether.h>
24#include <linux/if_vlan.h>
25#include <linux/workqueue.h>
26#include <scsi/fc/fc_els.h>
27#include <scsi/fc/fc_fcoe.h>
28#include <scsi/fc_frame.h>
29#include <scsi/libfc.h>
30#include "fnic_io.h"
31#include "fnic.h"
32#include "cq_enet_desc.h"
33#include "cq_exch_desc.h"
34
35struct workqueue_struct *fnic_event_queue;
36
37void fnic_handle_link(struct work_struct *work)
38{
39 struct fnic *fnic = container_of(work, struct fnic, link_work);
40 unsigned long flags;
41 int old_link_status;
42 u32 old_link_down_cnt;
43
44 spin_lock_irqsave(&fnic->fnic_lock, flags);
45
46 if (fnic->stop_rx_link_events) {
47 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
48 return;
49 }
50
51 old_link_down_cnt = fnic->link_down_cnt;
52 old_link_status = fnic->link_status;
53 fnic->link_status = vnic_dev_link_status(fnic->vdev);
54 fnic->link_down_cnt = vnic_dev_link_down_cnt(fnic->vdev);
55
56 if (old_link_status == fnic->link_status) {
57 if (!fnic->link_status)
58 /* DOWN -> DOWN */
59 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
60 else {
61 if (old_link_down_cnt != fnic->link_down_cnt) {
62 /* UP -> DOWN -> UP */
63 fnic->lport->host_stats.link_failure_count++;
64 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
65 FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
66 "link down\n");
67 fc_linkdown(fnic->lport);
68 FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
69 "link up\n");
70 fc_linkup(fnic->lport);
71 } else
72 /* UP -> UP */
73 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
74 }
75 } else if (fnic->link_status) {
76 /* DOWN -> UP */
77 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
78 FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, "link up\n");
79 fc_linkup(fnic->lport);
80 } else {
81 /* UP -> DOWN */
82 fnic->lport->host_stats.link_failure_count++;
83 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
84 FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, "link down\n");
85 fc_linkdown(fnic->lport);
86 }
87
88}
89
90/*
91 * This function passes incoming fabric frames to libFC
92 */
93void fnic_handle_frame(struct work_struct *work)
94{
95 struct fnic *fnic = container_of(work, struct fnic, frame_work);
96 struct fc_lport *lp = fnic->lport;
97 unsigned long flags;
98 struct sk_buff *skb;
99 struct fc_frame *fp;
100
101 while ((skb = skb_dequeue(&fnic->frame_queue))) {
102
103 spin_lock_irqsave(&fnic->fnic_lock, flags);
104 if (fnic->stop_rx_link_events) {
105 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
106 dev_kfree_skb(skb);
107 return;
108 }
109 fp = (struct fc_frame *)skb;
110 /* if Flogi resp frame, register the address */
111 if (fr_flags(fp)) {
112 vnic_dev_add_addr(fnic->vdev,
113 fnic->data_src_addr);
114 fr_flags(fp) = 0;
115 }
116 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
117
118 fc_exch_recv(lp, lp->emp, fp);
119 }
120
121}
122
123static inline void fnic_import_rq_fc_frame(struct sk_buff *skb,
124 u32 len, u8 sof, u8 eof)
125{
126 struct fc_frame *fp = (struct fc_frame *)skb;
127
128 skb_trim(skb, len);
129 fr_eof(fp) = eof;
130 fr_sof(fp) = sof;
131}
132
133
134static inline int fnic_import_rq_eth_pkt(struct sk_buff *skb, u32 len)
135{
136 struct fc_frame *fp;
137 struct ethhdr *eh;
138 struct vlan_ethhdr *vh;
139 struct fcoe_hdr *fcoe_hdr;
140 struct fcoe_crc_eof *ft;
141 u32 transport_len = 0;
142
143 eh = (struct ethhdr *)skb->data;
144 vh = (struct vlan_ethhdr *)skb->data;
145 if (vh->h_vlan_proto == htons(ETH_P_8021Q) &&
146 vh->h_vlan_encapsulated_proto == htons(ETH_P_FCOE)) {
147 skb_pull(skb, sizeof(struct vlan_ethhdr));
148 transport_len += sizeof(struct vlan_ethhdr);
149 } else if (eh->h_proto == htons(ETH_P_FCOE)) {
150 transport_len += sizeof(struct ethhdr);
151 skb_pull(skb, sizeof(struct ethhdr));
152 } else
153 return -1;
154
155 fcoe_hdr = (struct fcoe_hdr *)skb->data;
156 if (FC_FCOE_DECAPS_VER(fcoe_hdr) != FC_FCOE_VER)
157 return -1;
158
159 fp = (struct fc_frame *)skb;
160 fc_frame_init(fp);
161 fr_sof(fp) = fcoe_hdr->fcoe_sof;
162 skb_pull(skb, sizeof(struct fcoe_hdr));
163 transport_len += sizeof(struct fcoe_hdr);
164
165 ft = (struct fcoe_crc_eof *)(skb->data + len -
166 transport_len - sizeof(*ft));
167 fr_eof(fp) = ft->fcoe_eof;
168 skb_trim(skb, len - transport_len - sizeof(*ft));
169 return 0;
170}
171
172static inline int fnic_handle_flogi_resp(struct fnic *fnic,
173 struct fc_frame *fp)
174{
175 u8 mac[ETH_ALEN] = FC_FCOE_FLOGI_MAC;
176 struct ethhdr *eth_hdr;
177 struct fc_frame_header *fh;
178 int ret = 0;
179 unsigned long flags;
180 struct fc_frame *old_flogi_resp = NULL;
181
182 fh = (struct fc_frame_header *)fr_hdr(fp);
183
184 spin_lock_irqsave(&fnic->fnic_lock, flags);
185
186 if (fnic->state == FNIC_IN_ETH_MODE) {
187
188 /*
189 * Check if oxid matches on taking the lock. A new Flogi
190 * issued by libFC might have changed the fnic cached oxid
191 */
192 if (fnic->flogi_oxid != ntohs(fh->fh_ox_id)) {
193 FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
194 "Flogi response oxid not"
195 " matching cached oxid, dropping frame"
196 "\n");
197 ret = -1;
198 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
199 dev_kfree_skb_irq(fp_skb(fp));
200 goto handle_flogi_resp_end;
201 }
202
203 /* Drop older cached flogi response frame, cache this frame */
204 old_flogi_resp = fnic->flogi_resp;
205 fnic->flogi_resp = fp;
206 fnic->flogi_oxid = FC_XID_UNKNOWN;
207
208 /*
209 * this frame is part of flogi get the src mac addr from this
210 * frame if the src mac is fcoui based then we mark the
211 * address mode flag to use fcoui base for dst mac addr
212 * otherwise we have to store the fcoe gateway addr
213 */
214 eth_hdr = (struct ethhdr *)skb_mac_header(fp_skb(fp));
215 memcpy(mac, eth_hdr->h_source, ETH_ALEN);
216
217 if (ntoh24(mac) == FC_FCOE_OUI)
218 fnic->fcoui_mode = 1;
219 else {
220 fnic->fcoui_mode = 0;
221 memcpy(fnic->dest_addr, mac, ETH_ALEN);
222 }
223
224 /*
225 * Except for Flogi frame, all outbound frames from us have the
226 * Eth Src address as FC_FCOE_OUI"our_sid". Flogi frame uses
227 * the vnic MAC address as the Eth Src address
228 */
229 fc_fcoe_set_mac(fnic->data_src_addr, fh->fh_d_id);
230
231 /* We get our s_id from the d_id of the flogi resp frame */
232 fnic->s_id = ntoh24(fh->fh_d_id);
233
234 /* Change state to reflect transition from Eth to FC mode */
235 fnic->state = FNIC_IN_ETH_TRANS_FC_MODE;
236
237 } else {
238 FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
239 "Unexpected fnic state %s while"
240 " processing flogi resp\n",
241 fnic_state_to_str(fnic->state));
242 ret = -1;
243 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
244 dev_kfree_skb_irq(fp_skb(fp));
245 goto handle_flogi_resp_end;
246 }
247
248 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
249
250 /* Drop older cached frame */
251 if (old_flogi_resp)
252 dev_kfree_skb_irq(fp_skb(old_flogi_resp));
253
254 /*
255 * send flogi reg request to firmware, this will put the fnic in
256 * in FC mode
257 */
258 ret = fnic_flogi_reg_handler(fnic);
259
260 if (ret < 0) {
261 int free_fp = 1;
262 spin_lock_irqsave(&fnic->fnic_lock, flags);
263 /*
264 * free the frame is some other thread is not
265 * pointing to it
266 */
267 if (fnic->flogi_resp != fp)
268 free_fp = 0;
269 else
270 fnic->flogi_resp = NULL;
271
272 if (fnic->state == FNIC_IN_ETH_TRANS_FC_MODE)
273 fnic->state = FNIC_IN_ETH_MODE;
274 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
275 if (free_fp)
276 dev_kfree_skb_irq(fp_skb(fp));
277 }
278
279 handle_flogi_resp_end:
280 return ret;
281}
282
283/* Returns 1 for a response that matches cached flogi oxid */
284static inline int is_matching_flogi_resp_frame(struct fnic *fnic,
285 struct fc_frame *fp)
286{
287 struct fc_frame_header *fh;
288 int ret = 0;
289 u32 f_ctl;
290
291 fh = fc_frame_header_get(fp);
292 f_ctl = ntoh24(fh->fh_f_ctl);
293
294 if (fnic->flogi_oxid == ntohs(fh->fh_ox_id) &&
295 fh->fh_r_ctl == FC_RCTL_ELS_REP &&
296 (f_ctl & (FC_FC_EX_CTX | FC_FC_SEQ_CTX)) == FC_FC_EX_CTX &&
297 fh->fh_type == FC_TYPE_ELS)
298 ret = 1;
299
300 return ret;
301}
302
303static void fnic_rq_cmpl_frame_recv(struct vnic_rq *rq, struct cq_desc
304 *cq_desc, struct vnic_rq_buf *buf,
305 int skipped __attribute__((unused)),
306 void *opaque)
307{
308 struct fnic *fnic = vnic_dev_priv(rq->vdev);
309 struct sk_buff *skb;
310 struct fc_frame *fp;
311 unsigned int eth_hdrs_stripped;
312 u8 type, color, eop, sop, ingress_port, vlan_stripped;
313 u8 fcoe = 0, fcoe_sof, fcoe_eof;
314 u8 fcoe_fc_crc_ok = 1, fcoe_enc_error = 0;
315 u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok;
316 u8 ipv6, ipv4, ipv4_fragment, rss_type, csum_not_calc;
317 u8 fcs_ok = 1, packet_error = 0;
318 u16 q_number, completed_index, bytes_written = 0, vlan, checksum;
319 u32 rss_hash;
320 u16 exchange_id, tmpl;
321 u8 sof = 0;
322 u8 eof = 0;
323 u32 fcp_bytes_written = 0;
324 unsigned long flags;
325
326 pci_unmap_single(fnic->pdev, buf->dma_addr, buf->len,
327 PCI_DMA_FROMDEVICE);
328 skb = buf->os_buf;
329 buf->os_buf = NULL;
330
331 cq_desc_dec(cq_desc, &type, &color, &q_number, &completed_index);
332 if (type == CQ_DESC_TYPE_RQ_FCP) {
333 cq_fcp_rq_desc_dec((struct cq_fcp_rq_desc *)cq_desc,
334 &type, &color, &q_number, &completed_index,
335 &eop, &sop, &fcoe_fc_crc_ok, &exchange_id,
336 &tmpl, &fcp_bytes_written, &sof, &eof,
337 &ingress_port, &packet_error,
338 &fcoe_enc_error, &fcs_ok, &vlan_stripped,
339 &vlan);
340 eth_hdrs_stripped = 1;
341
342 } else if (type == CQ_DESC_TYPE_RQ_ENET) {
343 cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc,
344 &type, &color, &q_number, &completed_index,
345 &ingress_port, &fcoe, &eop, &sop,
346 &rss_type, &csum_not_calc, &rss_hash,
347 &bytes_written, &packet_error,
348 &vlan_stripped, &vlan, &checksum,
349 &fcoe_sof, &fcoe_fc_crc_ok,
350 &fcoe_enc_error, &fcoe_eof,
351 &tcp_udp_csum_ok, &udp, &tcp,
352 &ipv4_csum_ok, &ipv6, &ipv4,
353 &ipv4_fragment, &fcs_ok);
354 eth_hdrs_stripped = 0;
355
356 } else {
357 /* wrong CQ type*/
358 shost_printk(KERN_ERR, fnic->lport->host,
359 "fnic rq_cmpl wrong cq type x%x\n", type);
360 goto drop;
361 }
362
363 if (!fcs_ok || packet_error || !fcoe_fc_crc_ok || fcoe_enc_error) {
364 FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
365 "fnic rq_cmpl fcoe x%x fcsok x%x"
366 " pkterr x%x fcoe_fc_crc_ok x%x, fcoe_enc_err"
367 " x%x\n",
368 fcoe, fcs_ok, packet_error,
369 fcoe_fc_crc_ok, fcoe_enc_error);
370 goto drop;
371 }
372
373 if (eth_hdrs_stripped)
374 fnic_import_rq_fc_frame(skb, fcp_bytes_written, sof, eof);
375 else if (fnic_import_rq_eth_pkt(skb, bytes_written))
376 goto drop;
377
378 fp = (struct fc_frame *)skb;
379
380 /*
381 * If frame is an ELS response that matches the cached FLOGI OX_ID,
382 * and is accept, issue flogi_reg_request copy wq request to firmware
383 * to register the S_ID and determine whether FC_OUI mode or GW mode.
384 */
385 if (is_matching_flogi_resp_frame(fnic, fp)) {
386 if (!eth_hdrs_stripped) {
387 if (fc_frame_payload_op(fp) == ELS_LS_ACC) {
388 fnic_handle_flogi_resp(fnic, fp);
389 return;
390 }
391 /*
392 * Recd. Flogi reject. No point registering
393 * with fw, but forward to libFC
394 */
395 goto forward;
396 }
397 goto drop;
398 }
399 if (!eth_hdrs_stripped)
400 goto drop;
401
402forward:
403 spin_lock_irqsave(&fnic->fnic_lock, flags);
404 if (fnic->stop_rx_link_events) {
405 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
406 goto drop;
407 }
408 /* Use fr_flags to indicate whether succ. flogi resp or not */
409 fr_flags(fp) = 0;
410 fr_dev(fp) = fnic->lport;
411 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
412
413 skb_queue_tail(&fnic->frame_queue, skb);
414 queue_work(fnic_event_queue, &fnic->frame_work);
415
416 return;
417drop:
418 dev_kfree_skb_irq(skb);
419}
420
421static int fnic_rq_cmpl_handler_cont(struct vnic_dev *vdev,
422 struct cq_desc *cq_desc, u8 type,
423 u16 q_number, u16 completed_index,
424 void *opaque)
425{
426 struct fnic *fnic = vnic_dev_priv(vdev);
427
428 vnic_rq_service(&fnic->rq[q_number], cq_desc, completed_index,
429 VNIC_RQ_RETURN_DESC, fnic_rq_cmpl_frame_recv,
430 NULL);
431 return 0;
432}
433
434int fnic_rq_cmpl_handler(struct fnic *fnic, int rq_work_to_do)
435{
436 unsigned int tot_rq_work_done = 0, cur_work_done;
437 unsigned int i;
438 int err;
439
440 for (i = 0; i < fnic->rq_count; i++) {
441 cur_work_done = vnic_cq_service(&fnic->cq[i], rq_work_to_do,
442 fnic_rq_cmpl_handler_cont,
443 NULL);
444 if (cur_work_done) {
445 err = vnic_rq_fill(&fnic->rq[i], fnic_alloc_rq_frame);
446 if (err)
447 shost_printk(KERN_ERR, fnic->lport->host,
448 "fnic_alloc_rq_frame cant alloc"
449 " frame\n");
450 }
451 tot_rq_work_done += cur_work_done;
452 }
453
454 return tot_rq_work_done;
455}
456
457/*
458 * This function is called once at init time to allocate and fill RQ
459 * buffers. Subsequently, it is called in the interrupt context after RQ
460 * buffer processing to replenish the buffers in the RQ
461 */
462int fnic_alloc_rq_frame(struct vnic_rq *rq)
463{
464 struct fnic *fnic = vnic_dev_priv(rq->vdev);
465 struct sk_buff *skb;
466 u16 len;
467 dma_addr_t pa;
468
469 len = FC_FRAME_HEADROOM + FC_MAX_FRAME + FC_FRAME_TAILROOM;
470 skb = dev_alloc_skb(len);
471 if (!skb) {
472 FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
473 "Unable to allocate RQ sk_buff\n");
474 return -ENOMEM;
475 }
476 skb_reset_mac_header(skb);
477 skb_reset_transport_header(skb);
478 skb_reset_network_header(skb);
479 skb_put(skb, len);
480 pa = pci_map_single(fnic->pdev, skb->data, len, PCI_DMA_FROMDEVICE);
481 fnic_queue_rq_desc(rq, skb, pa, len);
482 return 0;
483}
484
485void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
486{
487 struct fc_frame *fp = buf->os_buf;
488 struct fnic *fnic = vnic_dev_priv(rq->vdev);
489
490 pci_unmap_single(fnic->pdev, buf->dma_addr, buf->len,
491 PCI_DMA_FROMDEVICE);
492
493 dev_kfree_skb(fp_skb(fp));
494 buf->os_buf = NULL;
495}
496
497static inline int is_flogi_frame(struct fc_frame_header *fh)
498{
499 return fh->fh_r_ctl == FC_RCTL_ELS_REQ && *(u8 *)(fh + 1) == ELS_FLOGI;
500}
501
502int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp)
503{
504 struct vnic_wq *wq = &fnic->wq[0];
505 struct sk_buff *skb;
506 dma_addr_t pa;
507 struct ethhdr *eth_hdr;
508 struct vlan_ethhdr *vlan_hdr;
509 struct fcoe_hdr *fcoe_hdr;
510 struct fc_frame_header *fh;
511 u32 tot_len, eth_hdr_len;
512 int ret = 0;
513 unsigned long flags;
514
515 fh = fc_frame_header_get(fp);
516 skb = fp_skb(fp);
517
518 if (!fnic->vlan_hw_insert) {
519 eth_hdr_len = sizeof(*vlan_hdr) + sizeof(*fcoe_hdr);
520 vlan_hdr = (struct vlan_ethhdr *)skb_push(skb, eth_hdr_len);
521 eth_hdr = (struct ethhdr *)vlan_hdr;
522 vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
523 vlan_hdr->h_vlan_encapsulated_proto = htons(ETH_P_FCOE);
524 vlan_hdr->h_vlan_TCI = htons(fnic->vlan_id);
525 fcoe_hdr = (struct fcoe_hdr *)(vlan_hdr + 1);
526 } else {
527 eth_hdr_len = sizeof(*eth_hdr) + sizeof(*fcoe_hdr);
528 eth_hdr = (struct ethhdr *)skb_push(skb, eth_hdr_len);
529 eth_hdr->h_proto = htons(ETH_P_FCOE);
530 fcoe_hdr = (struct fcoe_hdr *)(eth_hdr + 1);
531 }
532
533 if (is_flogi_frame(fh)) {
534 fc_fcoe_set_mac(eth_hdr->h_dest, fh->fh_d_id);
535 memcpy(eth_hdr->h_source, fnic->mac_addr, ETH_ALEN);
536 } else {
537 if (fnic->fcoui_mode)
538 fc_fcoe_set_mac(eth_hdr->h_dest, fh->fh_d_id);
539 else
540 memcpy(eth_hdr->h_dest, fnic->dest_addr, ETH_ALEN);
541 memcpy(eth_hdr->h_source, fnic->data_src_addr, ETH_ALEN);
542 }
543
544 tot_len = skb->len;
545 BUG_ON(tot_len % 4);
546
547 memset(fcoe_hdr, 0, sizeof(*fcoe_hdr));
548 fcoe_hdr->fcoe_sof = fr_sof(fp);
549 if (FC_FCOE_VER)
550 FC_FCOE_ENCAPS_VER(fcoe_hdr, FC_FCOE_VER);
551
552 pa = pci_map_single(fnic->pdev, eth_hdr, tot_len, PCI_DMA_TODEVICE);
553
554 spin_lock_irqsave(&fnic->wq_lock[0], flags);
555
556 if (!vnic_wq_desc_avail(wq)) {
557 pci_unmap_single(fnic->pdev, pa,
558 tot_len, PCI_DMA_TODEVICE);
559 ret = -1;
560 goto fnic_send_frame_end;
561 }
562
563 fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp),
564 fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1);
565fnic_send_frame_end:
566 spin_unlock_irqrestore(&fnic->wq_lock[0], flags);
567
568 if (ret)
569 dev_kfree_skb_any(fp_skb(fp));
570
571 return ret;
572}
573
574/*
575 * fnic_send
576 * Routine to send a raw frame
577 */
578int fnic_send(struct fc_lport *lp, struct fc_frame *fp)
579{
580 struct fnic *fnic = lport_priv(lp);
581 struct fc_frame_header *fh;
582 int ret = 0;
583 enum fnic_state old_state;
584 unsigned long flags;
585 struct fc_frame *old_flogi = NULL;
586 struct fc_frame *old_flogi_resp = NULL;
587
588 if (fnic->in_remove) {
589 dev_kfree_skb(fp_skb(fp));
590 ret = -1;
591 goto fnic_send_end;
592 }
593
594 fh = fc_frame_header_get(fp);
595 /* if not an Flogi frame, send it out, this is the common case */
596 if (!is_flogi_frame(fh))
597 return fnic_send_frame(fnic, fp);
598
599 /* Flogi frame, now enter the state machine */
600
601 spin_lock_irqsave(&fnic->fnic_lock, flags);
602again:
603 /* Get any old cached frames, free them after dropping lock */
604 old_flogi = fnic->flogi;
605 fnic->flogi = NULL;
606 old_flogi_resp = fnic->flogi_resp;
607 fnic->flogi_resp = NULL;
608
609 fnic->flogi_oxid = FC_XID_UNKNOWN;
610
611 old_state = fnic->state;
612 switch (old_state) {
613 case FNIC_IN_FC_MODE:
614 case FNIC_IN_ETH_TRANS_FC_MODE:
615 default:
616 fnic->state = FNIC_IN_FC_TRANS_ETH_MODE;
617 vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr);
618 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
619
620 if (old_flogi) {
621 dev_kfree_skb(fp_skb(old_flogi));
622 old_flogi = NULL;
623 }
624 if (old_flogi_resp) {
625 dev_kfree_skb(fp_skb(old_flogi_resp));
626 old_flogi_resp = NULL;
627 }
628
629 ret = fnic_fw_reset_handler(fnic);
630
631 spin_lock_irqsave(&fnic->fnic_lock, flags);
632 if (fnic->state != FNIC_IN_FC_TRANS_ETH_MODE)
633 goto again;
634 if (ret) {
635 fnic->state = old_state;
636 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
637 dev_kfree_skb(fp_skb(fp));
638 goto fnic_send_end;
639 }
640 old_flogi = fnic->flogi;
641 fnic->flogi = fp;
642 fnic->flogi_oxid = ntohs(fh->fh_ox_id);
643 old_flogi_resp = fnic->flogi_resp;
644 fnic->flogi_resp = NULL;
645 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
646 break;
647
648 case FNIC_IN_FC_TRANS_ETH_MODE:
649 /*
650 * A reset is pending with the firmware. Store the flogi
651 * and its oxid. The transition out of this state happens
652 * only when Firmware completes the reset, either with
653 * success or failed. If success, transition to
654 * FNIC_IN_ETH_MODE, if fail, then transition to
655 * FNIC_IN_FC_MODE
656 */
657 fnic->flogi = fp;
658 fnic->flogi_oxid = ntohs(fh->fh_ox_id);
659 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
660 break;
661
662 case FNIC_IN_ETH_MODE:
663 /*
664 * The fw/hw is already in eth mode. Store the oxid,
665 * and send the flogi frame out. The transition out of this
666 * state happens only we receive flogi response from the
667 * network, and the oxid matches the cached oxid when the
668 * flogi frame was sent out. If they match, then we issue
669 * a flogi_reg request and transition to state
670 * FNIC_IN_ETH_TRANS_FC_MODE
671 */
672 fnic->flogi_oxid = ntohs(fh->fh_ox_id);
673 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
674 ret = fnic_send_frame(fnic, fp);
675 break;
676 }
677
678fnic_send_end:
679 if (old_flogi)
680 dev_kfree_skb(fp_skb(old_flogi));
681 if (old_flogi_resp)
682 dev_kfree_skb(fp_skb(old_flogi_resp));
683 return ret;
684}
685
686static void fnic_wq_complete_frame_send(struct vnic_wq *wq,
687 struct cq_desc *cq_desc,
688 struct vnic_wq_buf *buf, void *opaque)
689{
690 struct sk_buff *skb = buf->os_buf;
691 struct fc_frame *fp = (struct fc_frame *)skb;
692 struct fnic *fnic = vnic_dev_priv(wq->vdev);
693
694 pci_unmap_single(fnic->pdev, buf->dma_addr,
695 buf->len, PCI_DMA_TODEVICE);
696 dev_kfree_skb_irq(fp_skb(fp));
697 buf->os_buf = NULL;
698}
699
700static int fnic_wq_cmpl_handler_cont(struct vnic_dev *vdev,
701 struct cq_desc *cq_desc, u8 type,
702 u16 q_number, u16 completed_index,
703 void *opaque)
704{
705 struct fnic *fnic = vnic_dev_priv(vdev);
706 unsigned long flags;
707
708 spin_lock_irqsave(&fnic->wq_lock[q_number], flags);
709 vnic_wq_service(&fnic->wq[q_number], cq_desc, completed_index,
710 fnic_wq_complete_frame_send, NULL);
711 spin_unlock_irqrestore(&fnic->wq_lock[q_number], flags);
712
713 return 0;
714}
715
716int fnic_wq_cmpl_handler(struct fnic *fnic, int work_to_do)
717{
718 unsigned int wq_work_done = 0;
719 unsigned int i;
720
721 for (i = 0; i < fnic->raw_wq_count; i++) {
722 wq_work_done += vnic_cq_service(&fnic->cq[fnic->rq_count+i],
723 work_to_do,
724 fnic_wq_cmpl_handler_cont,
725 NULL);
726 }
727
728 return wq_work_done;
729}
730
731
732void fnic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf)
733{
734 struct fc_frame *fp = buf->os_buf;
735 struct fnic *fnic = vnic_dev_priv(wq->vdev);
736
737 pci_unmap_single(fnic->pdev, buf->dma_addr,
738 buf->len, PCI_DMA_TODEVICE);
739
740 dev_kfree_skb(fp_skb(fp));
741 buf->os_buf = NULL;
742}
diff --git a/drivers/scsi/fnic/fnic_io.h b/drivers/scsi/fnic/fnic_io.h
new file mode 100644
index 000000000000..f0b896988cd5
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_io.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _FNIC_IO_H_
19#define _FNIC_IO_H_
20
21#include <scsi/fc/fc_fcp.h>
22
23#define FNIC_DFLT_SG_DESC_CNT 32
24#define FNIC_MAX_SG_DESC_CNT 1024 /* Maximum descriptors per sgl */
25#define FNIC_SG_DESC_ALIGN 16 /* Descriptor address alignment */
26
27struct host_sg_desc {
28 __le64 addr;
29 __le32 len;
30 u32 _resvd;
31};
32
33struct fnic_dflt_sgl_list {
34 struct host_sg_desc sg_desc[FNIC_DFLT_SG_DESC_CNT];
35};
36
37struct fnic_sgl_list {
38 struct host_sg_desc sg_desc[FNIC_MAX_SG_DESC_CNT];
39};
40
41enum fnic_sgl_list_type {
42 FNIC_SGL_CACHE_DFLT = 0, /* cache with default size sgl */
43 FNIC_SGL_CACHE_MAX, /* cache with max size sgl */
44 FNIC_SGL_NUM_CACHES /* number of sgl caches */
45};
46
47enum fnic_ioreq_state {
48 FNIC_IOREQ_CMD_PENDING = 0,
49 FNIC_IOREQ_ABTS_PENDING,
50 FNIC_IOREQ_ABTS_COMPLETE,
51 FNIC_IOREQ_CMD_COMPLETE,
52};
53
54struct fnic_io_req {
55 struct host_sg_desc *sgl_list; /* sgl list */
56 void *sgl_list_alloc; /* sgl list address used for free */
57 dma_addr_t sense_buf_pa; /* dma address for sense buffer*/
58 dma_addr_t sgl_list_pa; /* dma address for sgl list */
59 u16 sgl_cnt;
60 u8 sgl_type; /* device DMA descriptor list type */
61 u8 io_completed:1; /* set to 1 when fw completes IO */
62 u32 port_id; /* remote port DID */
63 struct completion *abts_done; /* completion for abts */
64 struct completion *dr_done; /* completion for device reset */
65};
66
67#endif /* _FNIC_IO_H_ */
diff --git a/drivers/scsi/fnic/fnic_isr.c b/drivers/scsi/fnic/fnic_isr.c
new file mode 100644
index 000000000000..2b3064828aea
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_isr.c
@@ -0,0 +1,332 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#include <linux/string.h>
19#include <linux/errno.h>
20#include <linux/pci.h>
21#include <linux/interrupt.h>
22#include <scsi/libfc.h>
23#include <scsi/fc_frame.h>
24#include "vnic_dev.h"
25#include "vnic_intr.h"
26#include "vnic_stats.h"
27#include "fnic_io.h"
28#include "fnic.h"
29
30static irqreturn_t fnic_isr_legacy(int irq, void *data)
31{
32 struct fnic *fnic = data;
33 u32 pba;
34 unsigned long work_done = 0;
35
36 pba = vnic_intr_legacy_pba(fnic->legacy_pba);
37 if (!pba)
38 return IRQ_NONE;
39
40 if (pba & (1 << FNIC_INTX_NOTIFY)) {
41 vnic_intr_return_all_credits(&fnic->intr[FNIC_INTX_NOTIFY]);
42 fnic_handle_link_event(fnic);
43 }
44
45 if (pba & (1 << FNIC_INTX_ERR)) {
46 vnic_intr_return_all_credits(&fnic->intr[FNIC_INTX_ERR]);
47 fnic_log_q_error(fnic);
48 }
49
50 if (pba & (1 << FNIC_INTX_WQ_RQ_COPYWQ)) {
51 work_done += fnic_wq_copy_cmpl_handler(fnic, 8);
52 work_done += fnic_wq_cmpl_handler(fnic, 4);
53 work_done += fnic_rq_cmpl_handler(fnic, 4);
54
55 vnic_intr_return_credits(&fnic->intr[FNIC_INTX_WQ_RQ_COPYWQ],
56 work_done,
57 1 /* unmask intr */,
58 1 /* reset intr timer */);
59 }
60
61 return IRQ_HANDLED;
62}
63
64static irqreturn_t fnic_isr_msi(int irq, void *data)
65{
66 struct fnic *fnic = data;
67 unsigned long work_done = 0;
68
69 work_done += fnic_wq_copy_cmpl_handler(fnic, 8);
70 work_done += fnic_wq_cmpl_handler(fnic, 4);
71 work_done += fnic_rq_cmpl_handler(fnic, 4);
72
73 vnic_intr_return_credits(&fnic->intr[0],
74 work_done,
75 1 /* unmask intr */,
76 1 /* reset intr timer */);
77
78 return IRQ_HANDLED;
79}
80
81static irqreturn_t fnic_isr_msix_rq(int irq, void *data)
82{
83 struct fnic *fnic = data;
84 unsigned long rq_work_done = 0;
85
86 rq_work_done = fnic_rq_cmpl_handler(fnic, 4);
87 vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_RQ],
88 rq_work_done,
89 1 /* unmask intr */,
90 1 /* reset intr timer */);
91
92 return IRQ_HANDLED;
93}
94
95static irqreturn_t fnic_isr_msix_wq(int irq, void *data)
96{
97 struct fnic *fnic = data;
98 unsigned long wq_work_done = 0;
99
100 wq_work_done = fnic_wq_cmpl_handler(fnic, 4);
101 vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_WQ],
102 wq_work_done,
103 1 /* unmask intr */,
104 1 /* reset intr timer */);
105 return IRQ_HANDLED;
106}
107
108static irqreturn_t fnic_isr_msix_wq_copy(int irq, void *data)
109{
110 struct fnic *fnic = data;
111 unsigned long wq_copy_work_done = 0;
112
113 wq_copy_work_done = fnic_wq_copy_cmpl_handler(fnic, 8);
114 vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_WQ_COPY],
115 wq_copy_work_done,
116 1 /* unmask intr */,
117 1 /* reset intr timer */);
118 return IRQ_HANDLED;
119}
120
121static irqreturn_t fnic_isr_msix_err_notify(int irq, void *data)
122{
123 struct fnic *fnic = data;
124
125 vnic_intr_return_all_credits(&fnic->intr[FNIC_MSIX_ERR_NOTIFY]);
126 fnic_log_q_error(fnic);
127 fnic_handle_link_event(fnic);
128
129 return IRQ_HANDLED;
130}
131
132void fnic_free_intr(struct fnic *fnic)
133{
134 int i;
135
136 switch (vnic_dev_get_intr_mode(fnic->vdev)) {
137 case VNIC_DEV_INTR_MODE_INTX:
138 case VNIC_DEV_INTR_MODE_MSI:
139 free_irq(fnic->pdev->irq, fnic);
140 break;
141
142 case VNIC_DEV_INTR_MODE_MSIX:
143 for (i = 0; i < ARRAY_SIZE(fnic->msix); i++)
144 if (fnic->msix[i].requested)
145 free_irq(fnic->msix_entry[i].vector,
146 fnic->msix[i].devid);
147 break;
148
149 default:
150 break;
151 }
152}
153
154int fnic_request_intr(struct fnic *fnic)
155{
156 int err = 0;
157 int i;
158
159 switch (vnic_dev_get_intr_mode(fnic->vdev)) {
160
161 case VNIC_DEV_INTR_MODE_INTX:
162 err = request_irq(fnic->pdev->irq, &fnic_isr_legacy,
163 IRQF_SHARED, DRV_NAME, fnic);
164 break;
165
166 case VNIC_DEV_INTR_MODE_MSI:
167 err = request_irq(fnic->pdev->irq, &fnic_isr_msi,
168 0, fnic->name, fnic);
169 break;
170
171 case VNIC_DEV_INTR_MODE_MSIX:
172
173 sprintf(fnic->msix[FNIC_MSIX_RQ].devname,
174 "%.11s-fcs-rq", fnic->name);
175 fnic->msix[FNIC_MSIX_RQ].isr = fnic_isr_msix_rq;
176 fnic->msix[FNIC_MSIX_RQ].devid = fnic;
177
178 sprintf(fnic->msix[FNIC_MSIX_WQ].devname,
179 "%.11s-fcs-wq", fnic->name);
180 fnic->msix[FNIC_MSIX_WQ].isr = fnic_isr_msix_wq;
181 fnic->msix[FNIC_MSIX_WQ].devid = fnic;
182
183 sprintf(fnic->msix[FNIC_MSIX_WQ_COPY].devname,
184 "%.11s-scsi-wq", fnic->name);
185 fnic->msix[FNIC_MSIX_WQ_COPY].isr = fnic_isr_msix_wq_copy;
186 fnic->msix[FNIC_MSIX_WQ_COPY].devid = fnic;
187
188 sprintf(fnic->msix[FNIC_MSIX_ERR_NOTIFY].devname,
189 "%.11s-err-notify", fnic->name);
190 fnic->msix[FNIC_MSIX_ERR_NOTIFY].isr =
191 fnic_isr_msix_err_notify;
192 fnic->msix[FNIC_MSIX_ERR_NOTIFY].devid = fnic;
193
194 for (i = 0; i < ARRAY_SIZE(fnic->msix); i++) {
195 err = request_irq(fnic->msix_entry[i].vector,
196 fnic->msix[i].isr, 0,
197 fnic->msix[i].devname,
198 fnic->msix[i].devid);
199 if (err) {
200 shost_printk(KERN_ERR, fnic->lport->host,
201 "MSIX: request_irq"
202 " failed %d\n", err);
203 fnic_free_intr(fnic);
204 break;
205 }
206 fnic->msix[i].requested = 1;
207 }
208 break;
209
210 default:
211 break;
212 }
213
214 return err;
215}
216
217int fnic_set_intr_mode(struct fnic *fnic)
218{
219 unsigned int n = ARRAY_SIZE(fnic->rq);
220 unsigned int m = ARRAY_SIZE(fnic->wq);
221 unsigned int o = ARRAY_SIZE(fnic->wq_copy);
222 unsigned int i;
223
224 /*
225 * Set interrupt mode (INTx, MSI, MSI-X) depending
226 * system capabilities.
227 *
228 * Try MSI-X first
229 *
230 * We need n RQs, m WQs, o Copy WQs, n+m+o CQs, and n+m+o+1 INTRs
231 * (last INTR is used for WQ/RQ errors and notification area)
232 */
233
234 BUG_ON(ARRAY_SIZE(fnic->msix_entry) < n + m + o + 1);
235 for (i = 0; i < n + m + o + 1; i++)
236 fnic->msix_entry[i].entry = i;
237
238 if (fnic->rq_count >= n &&
239 fnic->raw_wq_count >= m &&
240 fnic->wq_copy_count >= o &&
241 fnic->cq_count >= n + m + o) {
242 if (!pci_enable_msix(fnic->pdev, fnic->msix_entry,
243 n + m + o + 1)) {
244 fnic->rq_count = n;
245 fnic->raw_wq_count = m;
246 fnic->wq_copy_count = o;
247 fnic->wq_count = m + o;
248 fnic->cq_count = n + m + o;
249 fnic->intr_count = n + m + o + 1;
250 fnic->err_intr_offset = FNIC_MSIX_ERR_NOTIFY;
251
252 FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host,
253 "Using MSI-X Interrupts\n");
254 vnic_dev_set_intr_mode(fnic->vdev,
255 VNIC_DEV_INTR_MODE_MSIX);
256 return 0;
257 }
258 }
259
260 /*
261 * Next try MSI
262 * We need 1 RQ, 1 WQ, 1 WQ_COPY, 3 CQs, and 1 INTR
263 */
264 if (fnic->rq_count >= 1 &&
265 fnic->raw_wq_count >= 1 &&
266 fnic->wq_copy_count >= 1 &&
267 fnic->cq_count >= 3 &&
268 fnic->intr_count >= 1 &&
269 !pci_enable_msi(fnic->pdev)) {
270
271 fnic->rq_count = 1;
272 fnic->raw_wq_count = 1;
273 fnic->wq_copy_count = 1;
274 fnic->wq_count = 2;
275 fnic->cq_count = 3;
276 fnic->intr_count = 1;
277 fnic->err_intr_offset = 0;
278
279 FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host,
280 "Using MSI Interrupts\n");
281 vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_MSI);
282
283 return 0;
284 }
285
286 /*
287 * Next try INTx
288 * We need 1 RQ, 1 WQ, 1 WQ_COPY, 3 CQs, and 3 INTRs
289 * 1 INTR is used for all 3 queues, 1 INTR for queue errors
290 * 1 INTR for notification area
291 */
292
293 if (fnic->rq_count >= 1 &&
294 fnic->raw_wq_count >= 1 &&
295 fnic->wq_copy_count >= 1 &&
296 fnic->cq_count >= 3 &&
297 fnic->intr_count >= 3) {
298
299 fnic->rq_count = 1;
300 fnic->raw_wq_count = 1;
301 fnic->wq_copy_count = 1;
302 fnic->cq_count = 3;
303 fnic->intr_count = 3;
304
305 FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host,
306 "Using Legacy Interrupts\n");
307 vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_INTX);
308
309 return 0;
310 }
311
312 vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN);
313
314 return -EINVAL;
315}
316
317void fnic_clear_intr_mode(struct fnic *fnic)
318{
319 switch (vnic_dev_get_intr_mode(fnic->vdev)) {
320 case VNIC_DEV_INTR_MODE_MSIX:
321 pci_disable_msix(fnic->pdev);
322 break;
323 case VNIC_DEV_INTR_MODE_MSI:
324 pci_disable_msi(fnic->pdev);
325 break;
326 default:
327 break;
328 }
329
330 vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_INTX);
331}
332
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
new file mode 100644
index 000000000000..32ef6b87d895
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -0,0 +1,942 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#include <linux/module.h>
19#include <linux/mempool.h>
20#include <linux/string.h>
21#include <linux/errno.h>
22#include <linux/init.h>
23#include <linux/pci.h>
24#include <linux/skbuff.h>
25#include <linux/interrupt.h>
26#include <linux/spinlock.h>
27#include <linux/workqueue.h>
28#include <scsi/scsi_host.h>
29#include <scsi/scsi_transport.h>
30#include <scsi/scsi_transport_fc.h>
31#include <scsi/scsi_tcq.h>
32#include <scsi/libfc.h>
33#include <scsi/fc_frame.h>
34
35#include "vnic_dev.h"
36#include "vnic_intr.h"
37#include "vnic_stats.h"
38#include "fnic_io.h"
39#include "fnic.h"
40
41#define PCI_DEVICE_ID_CISCO_FNIC 0x0045
42
43/* Timer to poll notification area for events. Used for MSI interrupts */
44#define FNIC_NOTIFY_TIMER_PERIOD (2 * HZ)
45
46static struct kmem_cache *fnic_sgl_cache[FNIC_SGL_NUM_CACHES];
47static struct kmem_cache *fnic_io_req_cache;
48LIST_HEAD(fnic_list);
49DEFINE_SPINLOCK(fnic_list_lock);
50
51/* Supported devices by fnic module */
52static struct pci_device_id fnic_id_table[] = {
53 { PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_FNIC) },
54 { 0, }
55};
56
57MODULE_DESCRIPTION(DRV_DESCRIPTION);
58MODULE_AUTHOR("Abhijeet Joglekar <abjoglek@cisco.com>, "
59 "Joseph R. Eykholt <jeykholt@cisco.com>");
60MODULE_LICENSE("GPL v2");
61MODULE_VERSION(DRV_VERSION);
62MODULE_DEVICE_TABLE(pci, fnic_id_table);
63
64unsigned int fnic_log_level;
65module_param(fnic_log_level, int, S_IRUGO|S_IWUSR);
66MODULE_PARM_DESC(fnic_log_level, "bit mask of fnic logging levels");
67
68
69static struct libfc_function_template fnic_transport_template = {
70 .frame_send = fnic_send,
71 .fcp_abort_io = fnic_empty_scsi_cleanup,
72 .fcp_cleanup = fnic_empty_scsi_cleanup,
73 .exch_mgr_reset = fnic_exch_mgr_reset
74};
75
76static int fnic_slave_alloc(struct scsi_device *sdev)
77{
78 struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
79 struct fc_lport *lp = shost_priv(sdev->host);
80 struct fnic *fnic = lport_priv(lp);
81
82 sdev->tagged_supported = 1;
83
84 if (!rport || fc_remote_port_chkready(rport))
85 return -ENXIO;
86
87 scsi_activate_tcq(sdev, FNIC_DFLT_QUEUE_DEPTH);
88 rport->dev_loss_tmo = fnic->config.port_down_timeout / 1000;
89
90 return 0;
91}
92
93static struct scsi_host_template fnic_host_template = {
94 .module = THIS_MODULE,
95 .name = DRV_NAME,
96 .queuecommand = fnic_queuecommand,
97 .eh_abort_handler = fnic_abort_cmd,
98 .eh_device_reset_handler = fnic_device_reset,
99 .eh_host_reset_handler = fnic_host_reset,
100 .slave_alloc = fnic_slave_alloc,
101 .change_queue_depth = fc_change_queue_depth,
102 .change_queue_type = fc_change_queue_type,
103 .this_id = -1,
104 .cmd_per_lun = 3,
105 .can_queue = FNIC_MAX_IO_REQ,
106 .use_clustering = ENABLE_CLUSTERING,
107 .sg_tablesize = FNIC_MAX_SG_DESC_CNT,
108 .max_sectors = 0xffff,
109 .shost_attrs = fnic_attrs,
110};
111
112static void fnic_get_host_speed(struct Scsi_Host *shost);
113static struct scsi_transport_template *fnic_fc_transport;
114static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *);
115
116static struct fc_function_template fnic_fc_functions = {
117
118 .show_host_node_name = 1,
119 .show_host_port_name = 1,
120 .show_host_supported_classes = 1,
121 .show_host_supported_fc4s = 1,
122 .show_host_active_fc4s = 1,
123 .show_host_maxframe_size = 1,
124 .show_host_port_id = 1,
125 .show_host_supported_speeds = 1,
126 .get_host_speed = fnic_get_host_speed,
127 .show_host_speed = 1,
128 .show_host_port_type = 1,
129 .get_host_port_state = fc_get_host_port_state,
130 .show_host_port_state = 1,
131 .show_host_symbolic_name = 1,
132 .show_rport_maxframe_size = 1,
133 .show_rport_supported_classes = 1,
134 .show_host_fabric_name = 1,
135 .show_starget_node_name = 1,
136 .show_starget_port_name = 1,
137 .show_starget_port_id = 1,
138 .show_rport_dev_loss_tmo = 1,
139 .issue_fc_host_lip = fnic_reset,
140 .get_fc_host_stats = fnic_get_stats,
141 .dd_fcrport_size = sizeof(struct fc_rport_libfc_priv),
142 .terminate_rport_io = fnic_terminate_rport_io,
143};
144
145static void fnic_get_host_speed(struct Scsi_Host *shost)
146{
147 struct fc_lport *lp = shost_priv(shost);
148 struct fnic *fnic = lport_priv(lp);
149 u32 port_speed = vnic_dev_port_speed(fnic->vdev);
150
151 /* Add in other values as they get defined in fw */
152 switch (port_speed) {
153 case 10000:
154 fc_host_speed(shost) = FC_PORTSPEED_10GBIT;
155 break;
156 default:
157 fc_host_speed(shost) = FC_PORTSPEED_10GBIT;
158 break;
159 }
160}
161
162static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *host)
163{
164 int ret;
165 struct fc_lport *lp = shost_priv(host);
166 struct fnic *fnic = lport_priv(lp);
167 struct fc_host_statistics *stats = &lp->host_stats;
168 struct vnic_stats *vs;
169 unsigned long flags;
170
171 if (time_before(jiffies, fnic->stats_time + HZ / FNIC_STATS_RATE_LIMIT))
172 return stats;
173 fnic->stats_time = jiffies;
174
175 spin_lock_irqsave(&fnic->fnic_lock, flags);
176 ret = vnic_dev_stats_dump(fnic->vdev, &fnic->stats);
177 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
178
179 if (ret) {
180 FNIC_MAIN_DBG(KERN_DEBUG, fnic->lport->host,
181 "fnic: Get vnic stats failed"
182 " 0x%x", ret);
183 return stats;
184 }
185 vs = fnic->stats;
186 stats->tx_frames = vs->tx.tx_unicast_frames_ok;
187 stats->tx_words = vs->tx.tx_unicast_bytes_ok / 4;
188 stats->rx_frames = vs->rx.rx_unicast_frames_ok;
189 stats->rx_words = vs->rx.rx_unicast_bytes_ok / 4;
190 stats->error_frames = vs->tx.tx_errors + vs->rx.rx_errors;
191 stats->dumped_frames = vs->tx.tx_drops + vs->rx.rx_drop;
192 stats->invalid_crc_count = vs->rx.rx_crc_errors;
193 stats->seconds_since_last_reset = (jiffies - lp->boot_time) / HZ;
194 stats->fcp_input_megabytes = div_u64(fnic->fcp_input_bytes, 1000000);
195 stats->fcp_output_megabytes = div_u64(fnic->fcp_output_bytes, 1000000);
196
197 return stats;
198}
199
200void fnic_log_q_error(struct fnic *fnic)
201{
202 unsigned int i;
203 u32 error_status;
204
205 for (i = 0; i < fnic->raw_wq_count; i++) {
206 error_status = ioread32(&fnic->wq[i].ctrl->error_status);
207 if (error_status)
208 shost_printk(KERN_ERR, fnic->lport->host,
209 "WQ[%d] error_status"
210 " %d\n", i, error_status);
211 }
212
213 for (i = 0; i < fnic->rq_count; i++) {
214 error_status = ioread32(&fnic->rq[i].ctrl->error_status);
215 if (error_status)
216 shost_printk(KERN_ERR, fnic->lport->host,
217 "RQ[%d] error_status"
218 " %d\n", i, error_status);
219 }
220
221 for (i = 0; i < fnic->wq_copy_count; i++) {
222 error_status = ioread32(&fnic->wq_copy[i].ctrl->error_status);
223 if (error_status)
224 shost_printk(KERN_ERR, fnic->lport->host,
225 "CWQ[%d] error_status"
226 " %d\n", i, error_status);
227 }
228}
229
230void fnic_handle_link_event(struct fnic *fnic)
231{
232 unsigned long flags;
233
234 spin_lock_irqsave(&fnic->fnic_lock, flags);
235 if (fnic->stop_rx_link_events) {
236 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
237 return;
238 }
239 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
240
241 queue_work(fnic_event_queue, &fnic->link_work);
242
243}
244
245static int fnic_notify_set(struct fnic *fnic)
246{
247 int err;
248
249 switch (vnic_dev_get_intr_mode(fnic->vdev)) {
250 case VNIC_DEV_INTR_MODE_INTX:
251 err = vnic_dev_notify_set(fnic->vdev, FNIC_INTX_NOTIFY);
252 break;
253 case VNIC_DEV_INTR_MODE_MSI:
254 err = vnic_dev_notify_set(fnic->vdev, -1);
255 break;
256 case VNIC_DEV_INTR_MODE_MSIX:
257 err = vnic_dev_notify_set(fnic->vdev, FNIC_MSIX_ERR_NOTIFY);
258 break;
259 default:
260 shost_printk(KERN_ERR, fnic->lport->host,
261 "Interrupt mode should be set up"
262 " before devcmd notify set %d\n",
263 vnic_dev_get_intr_mode(fnic->vdev));
264 err = -1;
265 break;
266 }
267
268 return err;
269}
270
271static void fnic_notify_timer(unsigned long data)
272{
273 struct fnic *fnic = (struct fnic *)data;
274
275 fnic_handle_link_event(fnic);
276 mod_timer(&fnic->notify_timer,
277 round_jiffies(jiffies + FNIC_NOTIFY_TIMER_PERIOD));
278}
279
280static void fnic_notify_timer_start(struct fnic *fnic)
281{
282 switch (vnic_dev_get_intr_mode(fnic->vdev)) {
283 case VNIC_DEV_INTR_MODE_MSI:
284 /*
285 * Schedule first timeout immediately. The driver is
286 * initiatialized and ready to look for link up notification
287 */
288 mod_timer(&fnic->notify_timer, jiffies);
289 break;
290 default:
291 /* Using intr for notification for INTx/MSI-X */
292 break;
293 };
294}
295
296static int fnic_dev_wait(struct vnic_dev *vdev,
297 int (*start)(struct vnic_dev *, int),
298 int (*finished)(struct vnic_dev *, int *),
299 int arg)
300{
301 unsigned long time;
302 int done;
303 int err;
304
305 err = start(vdev, arg);
306 if (err)
307 return err;
308
309 /* Wait for func to complete...2 seconds max */
310 time = jiffies + (HZ * 2);
311 do {
312 err = finished(vdev, &done);
313 if (err)
314 return err;
315 if (done)
316 return 0;
317 schedule_timeout_uninterruptible(HZ / 10);
318 } while (time_after(time, jiffies));
319
320 return -ETIMEDOUT;
321}
322
323static int fnic_cleanup(struct fnic *fnic)
324{
325 unsigned int i;
326 int err;
327 unsigned long flags;
328 struct fc_frame *flogi = NULL;
329 struct fc_frame *flogi_resp = NULL;
330
331 vnic_dev_disable(fnic->vdev);
332 for (i = 0; i < fnic->intr_count; i++)
333 vnic_intr_mask(&fnic->intr[i]);
334
335 for (i = 0; i < fnic->rq_count; i++) {
336 err = vnic_rq_disable(&fnic->rq[i]);
337 if (err)
338 return err;
339 }
340 for (i = 0; i < fnic->raw_wq_count; i++) {
341 err = vnic_wq_disable(&fnic->wq[i]);
342 if (err)
343 return err;
344 }
345 for (i = 0; i < fnic->wq_copy_count; i++) {
346 err = vnic_wq_copy_disable(&fnic->wq_copy[i]);
347 if (err)
348 return err;
349 }
350
351 /* Clean up completed IOs and FCS frames */
352 fnic_wq_copy_cmpl_handler(fnic, -1);
353 fnic_wq_cmpl_handler(fnic, -1);
354 fnic_rq_cmpl_handler(fnic, -1);
355
356 /* Clean up the IOs and FCS frames that have not completed */
357 for (i = 0; i < fnic->raw_wq_count; i++)
358 vnic_wq_clean(&fnic->wq[i], fnic_free_wq_buf);
359 for (i = 0; i < fnic->rq_count; i++)
360 vnic_rq_clean(&fnic->rq[i], fnic_free_rq_buf);
361 for (i = 0; i < fnic->wq_copy_count; i++)
362 vnic_wq_copy_clean(&fnic->wq_copy[i],
363 fnic_wq_copy_cleanup_handler);
364
365 for (i = 0; i < fnic->cq_count; i++)
366 vnic_cq_clean(&fnic->cq[i]);
367 for (i = 0; i < fnic->intr_count; i++)
368 vnic_intr_clean(&fnic->intr[i]);
369
370 /*
371 * Remove cached flogi and flogi resp frames if any
372 * These frames are not in any queue, and therefore queue
373 * cleanup does not clean them. So clean them explicitly
374 */
375 spin_lock_irqsave(&fnic->fnic_lock, flags);
376 flogi = fnic->flogi;
377 fnic->flogi = NULL;
378 flogi_resp = fnic->flogi_resp;
379 fnic->flogi_resp = NULL;
380 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
381
382 if (flogi)
383 dev_kfree_skb(fp_skb(flogi));
384
385 if (flogi_resp)
386 dev_kfree_skb(fp_skb(flogi_resp));
387
388 mempool_destroy(fnic->io_req_pool);
389 for (i = 0; i < FNIC_SGL_NUM_CACHES; i++)
390 mempool_destroy(fnic->io_sgl_pool[i]);
391
392 return 0;
393}
394
395static void fnic_iounmap(struct fnic *fnic)
396{
397 if (fnic->bar0.vaddr)
398 iounmap(fnic->bar0.vaddr);
399}
400
401/*
402 * Allocate element for mempools requiring GFP_DMA flag.
403 * Otherwise, checks in kmem_flagcheck() hit BUG_ON().
404 */
405static void *fnic_alloc_slab_dma(gfp_t gfp_mask, void *pool_data)
406{
407 struct kmem_cache *mem = pool_data;
408
409 return kmem_cache_alloc(mem, gfp_mask | GFP_ATOMIC | GFP_DMA);
410}
411
412static int __devinit fnic_probe(struct pci_dev *pdev,
413 const struct pci_device_id *ent)
414{
415 struct Scsi_Host *host;
416 struct fc_lport *lp;
417 struct fnic *fnic;
418 mempool_t *pool;
419 int err;
420 int i;
421 unsigned long flags;
422
423 /*
424 * Allocate SCSI Host and set up association between host,
425 * local port, and fnic
426 */
427 host = scsi_host_alloc(&fnic_host_template,
428 sizeof(struct fc_lport) + sizeof(struct fnic));
429 if (!host) {
430 printk(KERN_ERR PFX "Unable to alloc SCSI host\n");
431 err = -ENOMEM;
432 goto err_out;
433 }
434 lp = shost_priv(host);
435 lp->host = host;
436 fnic = lport_priv(lp);
437 fnic->lport = lp;
438
439 snprintf(fnic->name, sizeof(fnic->name) - 1, "%s%d", DRV_NAME,
440 host->host_no);
441
442 host->transportt = fnic_fc_transport;
443
444 err = scsi_init_shared_tag_map(host, FNIC_MAX_IO_REQ);
445 if (err) {
446 shost_printk(KERN_ERR, fnic->lport->host,
447 "Unable to alloc shared tag map\n");
448 goto err_out_free_hba;
449 }
450
451 /* Setup PCI resources */
452 pci_set_drvdata(pdev, fnic);
453
454 fnic->pdev = pdev;
455
456 err = pci_enable_device(pdev);
457 if (err) {
458 shost_printk(KERN_ERR, fnic->lport->host,
459 "Cannot enable PCI device, aborting.\n");
460 goto err_out_free_hba;
461 }
462
463 err = pci_request_regions(pdev, DRV_NAME);
464 if (err) {
465 shost_printk(KERN_ERR, fnic->lport->host,
466 "Cannot enable PCI resources, aborting\n");
467 goto err_out_disable_device;
468 }
469
470 pci_set_master(pdev);
471
472 /* Query PCI controller on system for DMA addressing
473 * limitation for the device. Try 40-bit first, and
474 * fail to 32-bit.
475 */
476 err = pci_set_dma_mask(pdev, DMA_40BIT_MASK);
477 if (err) {
478 err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
479 if (err) {
480 shost_printk(KERN_ERR, fnic->lport->host,
481 "No usable DMA configuration "
482 "aborting\n");
483 goto err_out_release_regions;
484 }
485 err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
486 if (err) {
487 shost_printk(KERN_ERR, fnic->lport->host,
488 "Unable to obtain 32-bit DMA "
489 "for consistent allocations, aborting.\n");
490 goto err_out_release_regions;
491 }
492 } else {
493 err = pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK);
494 if (err) {
495 shost_printk(KERN_ERR, fnic->lport->host,
496 "Unable to obtain 40-bit DMA "
497 "for consistent allocations, aborting.\n");
498 goto err_out_release_regions;
499 }
500 }
501
502 /* Map vNIC resources from BAR0 */
503 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
504 shost_printk(KERN_ERR, fnic->lport->host,
505 "BAR0 not memory-map'able, aborting.\n");
506 err = -ENODEV;
507 goto err_out_release_regions;
508 }
509
510 fnic->bar0.vaddr = pci_iomap(pdev, 0, 0);
511 fnic->bar0.bus_addr = pci_resource_start(pdev, 0);
512 fnic->bar0.len = pci_resource_len(pdev, 0);
513
514 if (!fnic->bar0.vaddr) {
515 shost_printk(KERN_ERR, fnic->lport->host,
516 "Cannot memory-map BAR0 res hdr, "
517 "aborting.\n");
518 err = -ENODEV;
519 goto err_out_release_regions;
520 }
521
522 fnic->vdev = vnic_dev_register(NULL, fnic, pdev, &fnic->bar0);
523 if (!fnic->vdev) {
524 shost_printk(KERN_ERR, fnic->lport->host,
525 "vNIC registration failed, "
526 "aborting.\n");
527 err = -ENODEV;
528 goto err_out_iounmap;
529 }
530
531 err = fnic_dev_wait(fnic->vdev, vnic_dev_open,
532 vnic_dev_open_done, 0);
533 if (err) {
534 shost_printk(KERN_ERR, fnic->lport->host,
535 "vNIC dev open failed, aborting.\n");
536 goto err_out_vnic_unregister;
537 }
538
539 err = vnic_dev_init(fnic->vdev, 0);
540 if (err) {
541 shost_printk(KERN_ERR, fnic->lport->host,
542 "vNIC dev init failed, aborting.\n");
543 goto err_out_dev_close;
544 }
545
546 err = vnic_dev_mac_addr(fnic->vdev, fnic->mac_addr);
547 if (err) {
548 shost_printk(KERN_ERR, fnic->lport->host,
549 "vNIC get MAC addr failed \n");
550 goto err_out_dev_close;
551 }
552
553 /* Get vNIC configuration */
554 err = fnic_get_vnic_config(fnic);
555 if (err) {
556 shost_printk(KERN_ERR, fnic->lport->host,
557 "Get vNIC configuration failed, "
558 "aborting.\n");
559 goto err_out_dev_close;
560 }
561 host->max_lun = fnic->config.luns_per_tgt;
562 host->max_id = FNIC_MAX_FCP_TARGET;
563
564 fnic_get_res_counts(fnic);
565
566 err = fnic_set_intr_mode(fnic);
567 if (err) {
568 shost_printk(KERN_ERR, fnic->lport->host,
569 "Failed to set intr mode, "
570 "aborting.\n");
571 goto err_out_dev_close;
572 }
573
574 err = fnic_request_intr(fnic);
575 if (err) {
576 shost_printk(KERN_ERR, fnic->lport->host,
577 "Unable to request irq.\n");
578 goto err_out_clear_intr;
579 }
580
581 err = fnic_alloc_vnic_resources(fnic);
582 if (err) {
583 shost_printk(KERN_ERR, fnic->lport->host,
584 "Failed to alloc vNIC resources, "
585 "aborting.\n");
586 goto err_out_free_intr;
587 }
588
589
590 /* initialize all fnic locks */
591 spin_lock_init(&fnic->fnic_lock);
592
593 for (i = 0; i < FNIC_WQ_MAX; i++)
594 spin_lock_init(&fnic->wq_lock[i]);
595
596 for (i = 0; i < FNIC_WQ_COPY_MAX; i++) {
597 spin_lock_init(&fnic->wq_copy_lock[i]);
598 fnic->wq_copy_desc_low[i] = DESC_CLEAN_LOW_WATERMARK;
599 fnic->fw_ack_recd[i] = 0;
600 fnic->fw_ack_index[i] = -1;
601 }
602
603 for (i = 0; i < FNIC_IO_LOCKS; i++)
604 spin_lock_init(&fnic->io_req_lock[i]);
605
606 fnic->io_req_pool = mempool_create_slab_pool(2, fnic_io_req_cache);
607 if (!fnic->io_req_pool)
608 goto err_out_free_resources;
609
610 pool = mempool_create(2, fnic_alloc_slab_dma, mempool_free_slab,
611 fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]);
612 if (!pool)
613 goto err_out_free_ioreq_pool;
614 fnic->io_sgl_pool[FNIC_SGL_CACHE_DFLT] = pool;
615
616 pool = mempool_create(2, fnic_alloc_slab_dma, mempool_free_slab,
617 fnic_sgl_cache[FNIC_SGL_CACHE_MAX]);
618 if (!pool)
619 goto err_out_free_dflt_pool;
620 fnic->io_sgl_pool[FNIC_SGL_CACHE_MAX] = pool;
621
622 /* setup vlan config, hw inserts vlan header */
623 fnic->vlan_hw_insert = 1;
624 fnic->vlan_id = 0;
625
626 fnic->flogi_oxid = FC_XID_UNKNOWN;
627 fnic->flogi = NULL;
628 fnic->flogi_resp = NULL;
629 fnic->state = FNIC_IN_FC_MODE;
630
631 /* Enable hardware stripping of vlan header on ingress */
632 fnic_set_nic_config(fnic, 0, 0, 0, 0, 0, 0, 1);
633
634 /* Setup notification buffer area */
635 err = fnic_notify_set(fnic);
636 if (err) {
637 shost_printk(KERN_ERR, fnic->lport->host,
638 "Failed to alloc notify buffer, aborting.\n");
639 goto err_out_free_max_pool;
640 }
641
642 /* Setup notify timer when using MSI interrupts */
643 if (vnic_dev_get_intr_mode(fnic->vdev) == VNIC_DEV_INTR_MODE_MSI)
644 setup_timer(&fnic->notify_timer,
645 fnic_notify_timer, (unsigned long)fnic);
646
647 /* allocate RQ buffers and post them to RQ*/
648 for (i = 0; i < fnic->rq_count; i++) {
649 err = vnic_rq_fill(&fnic->rq[i], fnic_alloc_rq_frame);
650 if (err) {
651 shost_printk(KERN_ERR, fnic->lport->host,
652 "fnic_alloc_rq_frame can't alloc "
653 "frame\n");
654 goto err_out_free_rq_buf;
655 }
656 }
657
658 /*
659 * Initialization done with PCI system, hardware, firmware.
660 * Add host to SCSI
661 */
662 err = scsi_add_host(lp->host, &pdev->dev);
663 if (err) {
664 shost_printk(KERN_ERR, fnic->lport->host,
665 "fnic: scsi_add_host failed...exiting\n");
666 goto err_out_free_rq_buf;
667 }
668
669 /* Start local port initiatialization */
670
671 lp->link_up = 0;
672 lp->tt = fnic_transport_template;
673
674 lp->emp = fc_exch_mgr_alloc(lp, FC_CLASS_3,
675 FCPIO_HOST_EXCH_RANGE_START,
676 FCPIO_HOST_EXCH_RANGE_END);
677 if (!lp->emp) {
678 err = -ENOMEM;
679 goto err_out_remove_scsi_host;
680 }
681
682 lp->max_retry_count = fnic->config.flogi_retries;
683 lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
684 FCP_SPPF_CONF_COMPL);
685 if (fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR)
686 lp->service_params |= FCP_SPPF_RETRY;
687
688 lp->boot_time = jiffies;
689 lp->e_d_tov = fnic->config.ed_tov;
690 lp->r_a_tov = fnic->config.ra_tov;
691 lp->link_supported_speeds = FC_PORTSPEED_10GBIT;
692 fc_set_wwnn(lp, fnic->config.node_wwn);
693 fc_set_wwpn(lp, fnic->config.port_wwn);
694
695 fc_exch_init(lp);
696 fc_lport_init(lp);
697 fc_elsct_init(lp);
698 fc_rport_init(lp);
699 fc_disc_init(lp);
700
701 fc_lport_config(lp);
702
703 if (fc_set_mfs(lp, fnic->config.maxdatafieldsize +
704 sizeof(struct fc_frame_header))) {
705 err = -EINVAL;
706 goto err_out_free_exch_mgr;
707 }
708 fc_host_maxframe_size(lp->host) = lp->mfs;
709
710 sprintf(fc_host_symbolic_name(lp->host),
711 DRV_NAME " v" DRV_VERSION " over %s", fnic->name);
712
713 spin_lock_irqsave(&fnic_list_lock, flags);
714 list_add_tail(&fnic->list, &fnic_list);
715 spin_unlock_irqrestore(&fnic_list_lock, flags);
716
717 INIT_WORK(&fnic->link_work, fnic_handle_link);
718 INIT_WORK(&fnic->frame_work, fnic_handle_frame);
719 skb_queue_head_init(&fnic->frame_queue);
720
721 /* Enable all queues */
722 for (i = 0; i < fnic->raw_wq_count; i++)
723 vnic_wq_enable(&fnic->wq[i]);
724 for (i = 0; i < fnic->rq_count; i++)
725 vnic_rq_enable(&fnic->rq[i]);
726 for (i = 0; i < fnic->wq_copy_count; i++)
727 vnic_wq_copy_enable(&fnic->wq_copy[i]);
728
729 fc_fabric_login(lp);
730
731 vnic_dev_enable(fnic->vdev);
732 for (i = 0; i < fnic->intr_count; i++)
733 vnic_intr_unmask(&fnic->intr[i]);
734
735 fnic_notify_timer_start(fnic);
736
737 return 0;
738
739err_out_free_exch_mgr:
740 fc_exch_mgr_free(lp->emp);
741err_out_remove_scsi_host:
742 fc_remove_host(fnic->lport->host);
743 scsi_remove_host(fnic->lport->host);
744err_out_free_rq_buf:
745 for (i = 0; i < fnic->rq_count; i++)
746 vnic_rq_clean(&fnic->rq[i], fnic_free_rq_buf);
747 vnic_dev_notify_unset(fnic->vdev);
748err_out_free_max_pool:
749 mempool_destroy(fnic->io_sgl_pool[FNIC_SGL_CACHE_MAX]);
750err_out_free_dflt_pool:
751 mempool_destroy(fnic->io_sgl_pool[FNIC_SGL_CACHE_DFLT]);
752err_out_free_ioreq_pool:
753 mempool_destroy(fnic->io_req_pool);
754err_out_free_resources:
755 fnic_free_vnic_resources(fnic);
756err_out_free_intr:
757 fnic_free_intr(fnic);
758err_out_clear_intr:
759 fnic_clear_intr_mode(fnic);
760err_out_dev_close:
761 vnic_dev_close(fnic->vdev);
762err_out_vnic_unregister:
763 vnic_dev_unregister(fnic->vdev);
764err_out_iounmap:
765 fnic_iounmap(fnic);
766err_out_release_regions:
767 pci_release_regions(pdev);
768err_out_disable_device:
769 pci_disable_device(pdev);
770err_out_free_hba:
771 scsi_host_put(lp->host);
772err_out:
773 return err;
774}
775
776static void __devexit fnic_remove(struct pci_dev *pdev)
777{
778 struct fnic *fnic = pci_get_drvdata(pdev);
779 unsigned long flags;
780
781 /*
782 * Mark state so that the workqueue thread stops forwarding
783 * received frames and link events to the local port. ISR and
784 * other threads that can queue work items will also stop
785 * creating work items on the fnic workqueue
786 */
787 spin_lock_irqsave(&fnic->fnic_lock, flags);
788 fnic->stop_rx_link_events = 1;
789 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
790
791 if (vnic_dev_get_intr_mode(fnic->vdev) == VNIC_DEV_INTR_MODE_MSI)
792 del_timer_sync(&fnic->notify_timer);
793
794 /*
795 * Flush the fnic event queue. After this call, there should
796 * be no event queued for this fnic device in the workqueue
797 */
798 flush_workqueue(fnic_event_queue);
799 skb_queue_purge(&fnic->frame_queue);
800
801 /*
802 * Log off the fabric. This stops all remote ports, dns port,
803 * logs off the fabric. This flushes all rport, disc, lport work
804 * before returning
805 */
806 fc_fabric_logoff(fnic->lport);
807
808 spin_lock_irqsave(&fnic->fnic_lock, flags);
809 fnic->in_remove = 1;
810 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
811
812 fc_lport_destroy(fnic->lport);
813
814 /*
815 * This stops the fnic device, masks all interrupts. Completed
816 * CQ entries are drained. Posted WQ/RQ/Copy-WQ entries are
817 * cleaned up
818 */
819 fnic_cleanup(fnic);
820
821 BUG_ON(!skb_queue_empty(&fnic->frame_queue));
822
823 spin_lock_irqsave(&fnic_list_lock, flags);
824 list_del(&fnic->list);
825 spin_unlock_irqrestore(&fnic_list_lock, flags);
826
827 fc_remove_host(fnic->lport->host);
828 scsi_remove_host(fnic->lport->host);
829 fc_exch_mgr_free(fnic->lport->emp);
830 vnic_dev_notify_unset(fnic->vdev);
831 fnic_free_vnic_resources(fnic);
832 fnic_free_intr(fnic);
833 fnic_clear_intr_mode(fnic);
834 vnic_dev_close(fnic->vdev);
835 vnic_dev_unregister(fnic->vdev);
836 fnic_iounmap(fnic);
837 pci_release_regions(pdev);
838 pci_disable_device(pdev);
839 pci_set_drvdata(pdev, NULL);
840 scsi_host_put(fnic->lport->host);
841}
842
843static struct pci_driver fnic_driver = {
844 .name = DRV_NAME,
845 .id_table = fnic_id_table,
846 .probe = fnic_probe,
847 .remove = __devexit_p(fnic_remove),
848};
849
850static int __init fnic_init_module(void)
851{
852 size_t len;
853 int err = 0;
854
855 printk(KERN_INFO PFX "%s, ver %s\n", DRV_DESCRIPTION, DRV_VERSION);
856
857 /* Create a cache for allocation of default size sgls */
858 len = sizeof(struct fnic_dflt_sgl_list);
859 fnic_sgl_cache[FNIC_SGL_CACHE_DFLT] = kmem_cache_create
860 ("fnic_sgl_dflt", len + FNIC_SG_DESC_ALIGN, FNIC_SG_DESC_ALIGN,
861 SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA,
862 NULL);
863 if (!fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]) {
864 printk(KERN_ERR PFX "failed to create fnic dflt sgl slab\n");
865 err = -ENOMEM;
866 goto err_create_fnic_sgl_slab_dflt;
867 }
868
869 /* Create a cache for allocation of max size sgls*/
870 len = sizeof(struct fnic_sgl_list);
871 fnic_sgl_cache[FNIC_SGL_CACHE_MAX] = kmem_cache_create
872 ("fnic_sgl_max", len + FNIC_SG_DESC_ALIGN, FNIC_SG_DESC_ALIGN,
873 SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA,
874 NULL);
875 if (!fnic_sgl_cache[FNIC_SGL_CACHE_MAX]) {
876 printk(KERN_ERR PFX "failed to create fnic max sgl slab\n");
877 err = -ENOMEM;
878 goto err_create_fnic_sgl_slab_max;
879 }
880
881 /* Create a cache of io_req structs for use via mempool */
882 fnic_io_req_cache = kmem_cache_create("fnic_io_req",
883 sizeof(struct fnic_io_req),
884 0, SLAB_HWCACHE_ALIGN, NULL);
885 if (!fnic_io_req_cache) {
886 printk(KERN_ERR PFX "failed to create fnic io_req slab\n");
887 err = -ENOMEM;
888 goto err_create_fnic_ioreq_slab;
889 }
890
891 fnic_event_queue = create_singlethread_workqueue("fnic_event_wq");
892 if (!fnic_event_queue) {
893 printk(KERN_ERR PFX "fnic work queue create failed\n");
894 err = -ENOMEM;
895 goto err_create_fnic_workq;
896 }
897
898 spin_lock_init(&fnic_list_lock);
899 INIT_LIST_HEAD(&fnic_list);
900
901 fnic_fc_transport = fc_attach_transport(&fnic_fc_functions);
902 if (!fnic_fc_transport) {
903 printk(KERN_ERR PFX "fc_attach_transport error\n");
904 err = -ENOMEM;
905 goto err_fc_transport;
906 }
907
908 /* register the driver with PCI system */
909 err = pci_register_driver(&fnic_driver);
910 if (err < 0) {
911 printk(KERN_ERR PFX "pci register error\n");
912 goto err_pci_register;
913 }
914 return err;
915
916err_pci_register:
917 fc_release_transport(fnic_fc_transport);
918err_fc_transport:
919 destroy_workqueue(fnic_event_queue);
920err_create_fnic_workq:
921 kmem_cache_destroy(fnic_io_req_cache);
922err_create_fnic_ioreq_slab:
923 kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_MAX]);
924err_create_fnic_sgl_slab_max:
925 kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]);
926err_create_fnic_sgl_slab_dflt:
927 return err;
928}
929
930static void __exit fnic_cleanup_module(void)
931{
932 pci_unregister_driver(&fnic_driver);
933 destroy_workqueue(fnic_event_queue);
934 kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_MAX]);
935 kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]);
936 kmem_cache_destroy(fnic_io_req_cache);
937 fc_release_transport(fnic_fc_transport);
938}
939
940module_init(fnic_init_module);
941module_exit(fnic_cleanup_module);
942
diff --git a/drivers/scsi/fnic/fnic_res.c b/drivers/scsi/fnic/fnic_res.c
new file mode 100644
index 000000000000..7ba61ec715d2
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_res.c
@@ -0,0 +1,444 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#include <linux/errno.h>
19#include <linux/types.h>
20#include <linux/pci.h>
21#include "wq_enet_desc.h"
22#include "rq_enet_desc.h"
23#include "cq_enet_desc.h"
24#include "vnic_resource.h"
25#include "vnic_dev.h"
26#include "vnic_wq.h"
27#include "vnic_rq.h"
28#include "vnic_cq.h"
29#include "vnic_intr.h"
30#include "vnic_stats.h"
31#include "vnic_nic.h"
32#include "fnic.h"
33
34int fnic_get_vnic_config(struct fnic *fnic)
35{
36 struct vnic_fc_config *c = &fnic->config;
37 int err;
38
39#define GET_CONFIG(m) \
40 do { \
41 err = vnic_dev_spec(fnic->vdev, \
42 offsetof(struct vnic_fc_config, m), \
43 sizeof(c->m), &c->m); \
44 if (err) { \
45 shost_printk(KERN_ERR, fnic->lport->host, \
46 "Error getting %s, %d\n", #m, \
47 err); \
48 return err; \
49 } \
50 } while (0);
51
52 GET_CONFIG(node_wwn);
53 GET_CONFIG(port_wwn);
54 GET_CONFIG(wq_enet_desc_count);
55 GET_CONFIG(wq_copy_desc_count);
56 GET_CONFIG(rq_desc_count);
57 GET_CONFIG(maxdatafieldsize);
58 GET_CONFIG(ed_tov);
59 GET_CONFIG(ra_tov);
60 GET_CONFIG(intr_timer);
61 GET_CONFIG(intr_timer_type);
62 GET_CONFIG(flags);
63 GET_CONFIG(flogi_retries);
64 GET_CONFIG(flogi_timeout);
65 GET_CONFIG(plogi_retries);
66 GET_CONFIG(plogi_timeout);
67 GET_CONFIG(io_throttle_count);
68 GET_CONFIG(link_down_timeout);
69 GET_CONFIG(port_down_timeout);
70 GET_CONFIG(port_down_io_retries);
71 GET_CONFIG(luns_per_tgt);
72
73 c->wq_enet_desc_count =
74 min_t(u32, VNIC_FNIC_WQ_DESCS_MAX,
75 max_t(u32, VNIC_FNIC_WQ_DESCS_MIN,
76 c->wq_enet_desc_count));
77 c->wq_enet_desc_count = ALIGN(c->wq_enet_desc_count, 16);
78
79 c->wq_copy_desc_count =
80 min_t(u32, VNIC_FNIC_WQ_COPY_DESCS_MAX,
81 max_t(u32, VNIC_FNIC_WQ_COPY_DESCS_MIN,
82 c->wq_copy_desc_count));
83 c->wq_copy_desc_count = ALIGN(c->wq_copy_desc_count, 16);
84
85 c->rq_desc_count =
86 min_t(u32, VNIC_FNIC_RQ_DESCS_MAX,
87 max_t(u32, VNIC_FNIC_RQ_DESCS_MIN,
88 c->rq_desc_count));
89 c->rq_desc_count = ALIGN(c->rq_desc_count, 16);
90
91 c->maxdatafieldsize =
92 min_t(u16, VNIC_FNIC_MAXDATAFIELDSIZE_MAX,
93 max_t(u16, VNIC_FNIC_MAXDATAFIELDSIZE_MIN,
94 c->maxdatafieldsize));
95 c->ed_tov =
96 min_t(u32, VNIC_FNIC_EDTOV_MAX,
97 max_t(u32, VNIC_FNIC_EDTOV_MIN,
98 c->ed_tov));
99
100 c->ra_tov =
101 min_t(u32, VNIC_FNIC_RATOV_MAX,
102 max_t(u32, VNIC_FNIC_RATOV_MIN,
103 c->ra_tov));
104
105 c->flogi_retries =
106 min_t(u32, VNIC_FNIC_FLOGI_RETRIES_MAX, c->flogi_retries);
107
108 c->flogi_timeout =
109 min_t(u32, VNIC_FNIC_FLOGI_TIMEOUT_MAX,
110 max_t(u32, VNIC_FNIC_FLOGI_TIMEOUT_MIN,
111 c->flogi_timeout));
112
113 c->plogi_retries =
114 min_t(u32, VNIC_FNIC_PLOGI_RETRIES_MAX, c->plogi_retries);
115
116 c->plogi_timeout =
117 min_t(u32, VNIC_FNIC_PLOGI_TIMEOUT_MAX,
118 max_t(u32, VNIC_FNIC_PLOGI_TIMEOUT_MIN,
119 c->plogi_timeout));
120
121 c->io_throttle_count =
122 min_t(u32, VNIC_FNIC_IO_THROTTLE_COUNT_MAX,
123 max_t(u32, VNIC_FNIC_IO_THROTTLE_COUNT_MIN,
124 c->io_throttle_count));
125
126 c->link_down_timeout =
127 min_t(u32, VNIC_FNIC_LINK_DOWN_TIMEOUT_MAX,
128 c->link_down_timeout);
129
130 c->port_down_timeout =
131 min_t(u32, VNIC_FNIC_PORT_DOWN_TIMEOUT_MAX,
132 c->port_down_timeout);
133
134 c->port_down_io_retries =
135 min_t(u32, VNIC_FNIC_PORT_DOWN_IO_RETRIES_MAX,
136 c->port_down_io_retries);
137
138 c->luns_per_tgt =
139 min_t(u32, VNIC_FNIC_LUNS_PER_TARGET_MAX,
140 max_t(u32, VNIC_FNIC_LUNS_PER_TARGET_MIN,
141 c->luns_per_tgt));
142
143 c->intr_timer = min_t(u16, VNIC_INTR_TIMER_MAX, c->intr_timer);
144 c->intr_timer_type = c->intr_timer_type;
145
146 shost_printk(KERN_INFO, fnic->lport->host,
147 "vNIC MAC addr %02x:%02x:%02x:%02x:%02x:%02x "
148 "wq/wq_copy/rq %d/%d/%d\n",
149 fnic->mac_addr[0], fnic->mac_addr[1], fnic->mac_addr[2],
150 fnic->mac_addr[3], fnic->mac_addr[4], fnic->mac_addr[5],
151 c->wq_enet_desc_count, c->wq_copy_desc_count,
152 c->rq_desc_count);
153 shost_printk(KERN_INFO, fnic->lport->host,
154 "vNIC node wwn %llx port wwn %llx\n",
155 c->node_wwn, c->port_wwn);
156 shost_printk(KERN_INFO, fnic->lport->host,
157 "vNIC ed_tov %d ra_tov %d\n",
158 c->ed_tov, c->ra_tov);
159 shost_printk(KERN_INFO, fnic->lport->host,
160 "vNIC mtu %d intr timer %d\n",
161 c->maxdatafieldsize, c->intr_timer);
162 shost_printk(KERN_INFO, fnic->lport->host,
163 "vNIC flags 0x%x luns per tgt %d\n",
164 c->flags, c->luns_per_tgt);
165 shost_printk(KERN_INFO, fnic->lport->host,
166 "vNIC flogi_retries %d flogi timeout %d\n",
167 c->flogi_retries, c->flogi_timeout);
168 shost_printk(KERN_INFO, fnic->lport->host,
169 "vNIC plogi retries %d plogi timeout %d\n",
170 c->plogi_retries, c->plogi_timeout);
171 shost_printk(KERN_INFO, fnic->lport->host,
172 "vNIC io throttle count %d link dn timeout %d\n",
173 c->io_throttle_count, c->link_down_timeout);
174 shost_printk(KERN_INFO, fnic->lport->host,
175 "vNIC port dn io retries %d port dn timeout %d\n",
176 c->port_down_io_retries, c->port_down_timeout);
177
178 return 0;
179}
180
181int fnic_set_nic_config(struct fnic *fnic, u8 rss_default_cpu,
182 u8 rss_hash_type,
183 u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable,
184 u8 tso_ipid_split_en, u8 ig_vlan_strip_en)
185{
186 u64 a0, a1;
187 u32 nic_cfg;
188 int wait = 1000;
189
190 vnic_set_nic_cfg(&nic_cfg, rss_default_cpu,
191 rss_hash_type, rss_hash_bits, rss_base_cpu,
192 rss_enable, tso_ipid_split_en, ig_vlan_strip_en);
193
194 a0 = nic_cfg;
195 a1 = 0;
196
197 return vnic_dev_cmd(fnic->vdev, CMD_NIC_CFG, &a0, &a1, wait);
198}
199
200void fnic_get_res_counts(struct fnic *fnic)
201{
202 fnic->wq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_WQ);
203 fnic->raw_wq_count = fnic->wq_count - 1;
204 fnic->wq_copy_count = fnic->wq_count - fnic->raw_wq_count;
205 fnic->rq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_RQ);
206 fnic->cq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_CQ);
207 fnic->intr_count = vnic_dev_get_res_count(fnic->vdev,
208 RES_TYPE_INTR_CTRL);
209}
210
211void fnic_free_vnic_resources(struct fnic *fnic)
212{
213 unsigned int i;
214
215 for (i = 0; i < fnic->raw_wq_count; i++)
216 vnic_wq_free(&fnic->wq[i]);
217
218 for (i = 0; i < fnic->wq_copy_count; i++)
219 vnic_wq_copy_free(&fnic->wq_copy[i]);
220
221 for (i = 0; i < fnic->rq_count; i++)
222 vnic_rq_free(&fnic->rq[i]);
223
224 for (i = 0; i < fnic->cq_count; i++)
225 vnic_cq_free(&fnic->cq[i]);
226
227 for (i = 0; i < fnic->intr_count; i++)
228 vnic_intr_free(&fnic->intr[i]);
229}
230
231int fnic_alloc_vnic_resources(struct fnic *fnic)
232{
233 enum vnic_dev_intr_mode intr_mode;
234 unsigned int mask_on_assertion;
235 unsigned int interrupt_offset;
236 unsigned int error_interrupt_enable;
237 unsigned int error_interrupt_offset;
238 unsigned int i, cq_index;
239 unsigned int wq_copy_cq_desc_count;
240 int err;
241
242 intr_mode = vnic_dev_get_intr_mode(fnic->vdev);
243
244 shost_printk(KERN_INFO, fnic->lport->host, "vNIC interrupt mode: %s\n",
245 intr_mode == VNIC_DEV_INTR_MODE_INTX ? "legacy PCI INTx" :
246 intr_mode == VNIC_DEV_INTR_MODE_MSI ? "MSI" :
247 intr_mode == VNIC_DEV_INTR_MODE_MSIX ?
248 "MSI-X" : "unknown");
249
250 shost_printk(KERN_INFO, fnic->lport->host, "vNIC resources avail: "
251 "wq %d cp_wq %d raw_wq %d rq %d cq %d intr %d\n",
252 fnic->wq_count, fnic->wq_copy_count, fnic->raw_wq_count,
253 fnic->rq_count, fnic->cq_count, fnic->intr_count);
254
255 /* Allocate Raw WQ used for FCS frames */
256 for (i = 0; i < fnic->raw_wq_count; i++) {
257 err = vnic_wq_alloc(fnic->vdev, &fnic->wq[i], i,
258 fnic->config.wq_enet_desc_count,
259 sizeof(struct wq_enet_desc));
260 if (err)
261 goto err_out_cleanup;
262 }
263
264 /* Allocate Copy WQs used for SCSI IOs */
265 for (i = 0; i < fnic->wq_copy_count; i++) {
266 err = vnic_wq_copy_alloc(fnic->vdev, &fnic->wq_copy[i],
267 (fnic->raw_wq_count + i),
268 fnic->config.wq_copy_desc_count,
269 sizeof(struct fcpio_host_req));
270 if (err)
271 goto err_out_cleanup;
272 }
273
274 /* RQ for receiving FCS frames */
275 for (i = 0; i < fnic->rq_count; i++) {
276 err = vnic_rq_alloc(fnic->vdev, &fnic->rq[i], i,
277 fnic->config.rq_desc_count,
278 sizeof(struct rq_enet_desc));
279 if (err)
280 goto err_out_cleanup;
281 }
282
283 /* CQ for each RQ */
284 for (i = 0; i < fnic->rq_count; i++) {
285 cq_index = i;
286 err = vnic_cq_alloc(fnic->vdev,
287 &fnic->cq[cq_index], cq_index,
288 fnic->config.rq_desc_count,
289 sizeof(struct cq_enet_rq_desc));
290 if (err)
291 goto err_out_cleanup;
292 }
293
294 /* CQ for each WQ */
295 for (i = 0; i < fnic->raw_wq_count; i++) {
296 cq_index = fnic->rq_count + i;
297 err = vnic_cq_alloc(fnic->vdev, &fnic->cq[cq_index], cq_index,
298 fnic->config.wq_enet_desc_count,
299 sizeof(struct cq_enet_wq_desc));
300 if (err)
301 goto err_out_cleanup;
302 }
303
304 /* CQ for each COPY WQ */
305 wq_copy_cq_desc_count = (fnic->config.wq_copy_desc_count * 3);
306 for (i = 0; i < fnic->wq_copy_count; i++) {
307 cq_index = fnic->raw_wq_count + fnic->rq_count + i;
308 err = vnic_cq_alloc(fnic->vdev, &fnic->cq[cq_index],
309 cq_index,
310 wq_copy_cq_desc_count,
311 sizeof(struct fcpio_fw_req));
312 if (err)
313 goto err_out_cleanup;
314 }
315
316 for (i = 0; i < fnic->intr_count; i++) {
317 err = vnic_intr_alloc(fnic->vdev, &fnic->intr[i], i);
318 if (err)
319 goto err_out_cleanup;
320 }
321
322 fnic->legacy_pba = vnic_dev_get_res(fnic->vdev,
323 RES_TYPE_INTR_PBA_LEGACY, 0);
324
325 if (!fnic->legacy_pba && intr_mode == VNIC_DEV_INTR_MODE_INTX) {
326 shost_printk(KERN_ERR, fnic->lport->host,
327 "Failed to hook legacy pba resource\n");
328 err = -ENODEV;
329 goto err_out_cleanup;
330 }
331
332 /*
333 * Init RQ/WQ resources.
334 *
335 * RQ[0 to n-1] point to CQ[0 to n-1]
336 * WQ[0 to m-1] point to CQ[n to n+m-1]
337 * WQ_COPY[0 to k-1] points to CQ[n+m to n+m+k-1]
338 *
339 * Note for copy wq we always initialize with cq_index = 0
340 *
341 * Error interrupt is not enabled for MSI.
342 */
343
344 switch (intr_mode) {
345 case VNIC_DEV_INTR_MODE_INTX:
346 case VNIC_DEV_INTR_MODE_MSIX:
347 error_interrupt_enable = 1;
348 error_interrupt_offset = fnic->err_intr_offset;
349 break;
350 default:
351 error_interrupt_enable = 0;
352 error_interrupt_offset = 0;
353 break;
354 }
355
356 for (i = 0; i < fnic->rq_count; i++) {
357 cq_index = i;
358 vnic_rq_init(&fnic->rq[i],
359 cq_index,
360 error_interrupt_enable,
361 error_interrupt_offset);
362 }
363
364 for (i = 0; i < fnic->raw_wq_count; i++) {
365 cq_index = i + fnic->rq_count;
366 vnic_wq_init(&fnic->wq[i],
367 cq_index,
368 error_interrupt_enable,
369 error_interrupt_offset);
370 }
371
372 for (i = 0; i < fnic->wq_copy_count; i++) {
373 vnic_wq_copy_init(&fnic->wq_copy[i],
374 0 /* cq_index 0 - always */,
375 error_interrupt_enable,
376 error_interrupt_offset);
377 }
378
379 for (i = 0; i < fnic->cq_count; i++) {
380
381 switch (intr_mode) {
382 case VNIC_DEV_INTR_MODE_MSIX:
383 interrupt_offset = i;
384 break;
385 default:
386 interrupt_offset = 0;
387 break;
388 }
389
390 vnic_cq_init(&fnic->cq[i],
391 0 /* flow_control_enable */,
392 1 /* color_enable */,
393 0 /* cq_head */,
394 0 /* cq_tail */,
395 1 /* cq_tail_color */,
396 1 /* interrupt_enable */,
397 1 /* cq_entry_enable */,
398 0 /* cq_message_enable */,
399 interrupt_offset,
400 0 /* cq_message_addr */);
401 }
402
403 /*
404 * Init INTR resources
405 *
406 * mask_on_assertion is not used for INTx due to the level-
407 * triggered nature of INTx
408 */
409
410 switch (intr_mode) {
411 case VNIC_DEV_INTR_MODE_MSI:
412 case VNIC_DEV_INTR_MODE_MSIX:
413 mask_on_assertion = 1;
414 break;
415 default:
416 mask_on_assertion = 0;
417 break;
418 }
419
420 for (i = 0; i < fnic->intr_count; i++) {
421 vnic_intr_init(&fnic->intr[i],
422 fnic->config.intr_timer,
423 fnic->config.intr_timer_type,
424 mask_on_assertion);
425 }
426
427 /* init the stats memory by making the first call here */
428 err = vnic_dev_stats_dump(fnic->vdev, &fnic->stats);
429 if (err) {
430 shost_printk(KERN_ERR, fnic->lport->host,
431 "vnic_dev_stats_dump failed - x%x\n", err);
432 goto err_out_cleanup;
433 }
434
435 /* Clear LIF stats */
436 vnic_dev_stats_clear(fnic->vdev);
437
438 return 0;
439
440err_out_cleanup:
441 fnic_free_vnic_resources(fnic);
442
443 return err;
444}
diff --git a/drivers/scsi/fnic/fnic_res.h b/drivers/scsi/fnic/fnic_res.h
new file mode 100644
index 000000000000..b6f310262534
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_res.h
@@ -0,0 +1,197 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _FNIC_RES_H_
19#define _FNIC_RES_H_
20
21#include "wq_enet_desc.h"
22#include "rq_enet_desc.h"
23#include "vnic_wq.h"
24#include "vnic_rq.h"
25#include "fnic_io.h"
26#include "fcpio.h"
27#include "vnic_wq_copy.h"
28#include "vnic_cq_copy.h"
29
30static inline void fnic_queue_wq_desc(struct vnic_wq *wq,
31 void *os_buf, dma_addr_t dma_addr,
32 unsigned int len, unsigned int fc_eof,
33 int vlan_tag_insert,
34 unsigned int vlan_tag,
35 int cq_entry, int sop, int eop)
36{
37 struct wq_enet_desc *desc = vnic_wq_next_desc(wq);
38
39 wq_enet_desc_enc(desc,
40 (u64)dma_addr | VNIC_PADDR_TARGET,
41 (u16)len,
42 0, /* mss_or_csum_offset */
43 (u16)fc_eof,
44 0, /* offload_mode */
45 (u8)eop, (u8)cq_entry,
46 1, /* fcoe_encap */
47 (u8)vlan_tag_insert,
48 (u16)vlan_tag,
49 0 /* loopback */);
50
51 vnic_wq_post(wq, os_buf, dma_addr, len, sop, eop);
52}
53
54static inline void fnic_queue_wq_copy_desc_icmnd_16(struct vnic_wq_copy *wq,
55 u32 req_id,
56 u32 lunmap_id, u8 spl_flags,
57 u32 sgl_cnt, u32 sense_len,
58 u64 sgl_addr, u64 sns_addr,
59 u8 crn, u8 pri_ta,
60 u8 flags, u8 *scsi_cdb,
61 u32 data_len, u8 *lun,
62 u32 d_id, u16 mss,
63 u32 ratov, u32 edtov)
64{
65 struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq);
66
67 desc->hdr.type = FCPIO_ICMND_16; /* enum fcpio_type */
68 desc->hdr.status = 0; /* header status entry */
69 desc->hdr._resvd = 0; /* reserved */
70 desc->hdr.tag.u.req_id = req_id; /* id for this request */
71
72 desc->u.icmnd_16.lunmap_id = lunmap_id; /* index into lunmap table */
73 desc->u.icmnd_16.special_req_flags = spl_flags; /* exch req flags */
74 desc->u.icmnd_16._resvd0[0] = 0; /* reserved */
75 desc->u.icmnd_16._resvd0[1] = 0; /* reserved */
76 desc->u.icmnd_16._resvd0[2] = 0; /* reserved */
77 desc->u.icmnd_16.sgl_cnt = sgl_cnt; /* scatter-gather list count */
78 desc->u.icmnd_16.sense_len = sense_len; /* sense buffer length */
79 desc->u.icmnd_16.sgl_addr = sgl_addr; /* scatter-gather list addr */
80 desc->u.icmnd_16.sense_addr = sns_addr; /* sense buffer address */
81 desc->u.icmnd_16.crn = crn; /* SCSI Command Reference No.*/
82 desc->u.icmnd_16.pri_ta = pri_ta; /* SCSI Pri & Task attribute */
83 desc->u.icmnd_16._resvd1 = 0; /* reserved: should be 0 */
84 desc->u.icmnd_16.flags = flags; /* command flags */
85 memcpy(desc->u.icmnd_16.scsi_cdb, scsi_cdb, CDB_16); /* SCSI CDB */
86 desc->u.icmnd_16.data_len = data_len; /* length of data expected */
87 memcpy(desc->u.icmnd_16.lun, lun, LUN_ADDRESS); /* LUN address */
88 desc->u.icmnd_16._resvd2 = 0; /* reserved */
89 hton24(desc->u.icmnd_16.d_id, d_id); /* FC vNIC only: Target D_ID */
90 desc->u.icmnd_16.mss = mss; /* FC vNIC only: max burst */
91 desc->u.icmnd_16.r_a_tov = ratov; /*FC vNIC only: Res. Alloc Timeout */
92 desc->u.icmnd_16.e_d_tov = edtov; /*FC vNIC only: Err Detect Timeout */
93
94 vnic_wq_copy_post(wq);
95}
96
97static inline void fnic_queue_wq_copy_desc_itmf(struct vnic_wq_copy *wq,
98 u32 req_id, u32 lunmap_id,
99 u32 tm_req, u32 tm_id, u8 *lun,
100 u32 d_id, u32 r_a_tov,
101 u32 e_d_tov)
102{
103 struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq);
104
105 desc->hdr.type = FCPIO_ITMF; /* enum fcpio_type */
106 desc->hdr.status = 0; /* header status entry */
107 desc->hdr._resvd = 0; /* reserved */
108 desc->hdr.tag.u.req_id = req_id; /* id for this request */
109
110 desc->u.itmf.lunmap_id = lunmap_id; /* index into lunmap table */
111 desc->u.itmf.tm_req = tm_req; /* SCSI Task Management request */
112 desc->u.itmf.t_tag = tm_id; /* tag of fcpio to be aborted */
113 desc->u.itmf._resvd = 0;
114 memcpy(desc->u.itmf.lun, lun, LUN_ADDRESS); /* LUN address */
115 desc->u.itmf._resvd1 = 0;
116 hton24(desc->u.itmf.d_id, d_id); /* FC vNIC only: Target D_ID */
117 desc->u.itmf.r_a_tov = r_a_tov; /* FC vNIC only: R_A_TOV in msec */
118 desc->u.itmf.e_d_tov = e_d_tov; /* FC vNIC only: E_D_TOV in msec */
119
120 vnic_wq_copy_post(wq);
121}
122
123static inline void fnic_queue_wq_copy_desc_flogi_reg(struct vnic_wq_copy *wq,
124 u32 req_id, u8 format,
125 u32 s_id, u8 *gw_mac)
126{
127 struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq);
128
129 desc->hdr.type = FCPIO_FLOGI_REG; /* enum fcpio_type */
130 desc->hdr.status = 0; /* header status entry */
131 desc->hdr._resvd = 0; /* reserved */
132 desc->hdr.tag.u.req_id = req_id; /* id for this request */
133
134 desc->u.flogi_reg.format = format;
135 hton24(desc->u.flogi_reg.s_id, s_id);
136 memcpy(desc->u.flogi_reg.gateway_mac, gw_mac, ETH_ALEN);
137
138 vnic_wq_copy_post(wq);
139}
140
141static inline void fnic_queue_wq_copy_desc_fw_reset(struct vnic_wq_copy *wq,
142 u32 req_id)
143{
144 struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq);
145
146 desc->hdr.type = FCPIO_RESET; /* enum fcpio_type */
147 desc->hdr.status = 0; /* header status entry */
148 desc->hdr._resvd = 0; /* reserved */
149 desc->hdr.tag.u.req_id = req_id; /* id for this request */
150
151 vnic_wq_copy_post(wq);
152}
153
154static inline void fnic_queue_wq_copy_desc_lunmap(struct vnic_wq_copy *wq,
155 u32 req_id, u64 lunmap_addr,
156 u32 lunmap_len)
157{
158 struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq);
159
160 desc->hdr.type = FCPIO_LUNMAP_REQ; /* enum fcpio_type */
161 desc->hdr.status = 0; /* header status entry */
162 desc->hdr._resvd = 0; /* reserved */
163 desc->hdr.tag.u.req_id = req_id; /* id for this request */
164
165 desc->u.lunmap_req.addr = lunmap_addr; /* address of the buffer */
166 desc->u.lunmap_req.len = lunmap_len; /* len of the buffer */
167
168 vnic_wq_copy_post(wq);
169}
170
171static inline void fnic_queue_rq_desc(struct vnic_rq *rq,
172 void *os_buf, dma_addr_t dma_addr,
173 u16 len)
174{
175 struct rq_enet_desc *desc = vnic_rq_next_desc(rq);
176
177 rq_enet_desc_enc(desc,
178 (u64)dma_addr | VNIC_PADDR_TARGET,
179 RQ_ENET_TYPE_ONLY_SOP,
180 (u16)len);
181
182 vnic_rq_post(rq, os_buf, 0, dma_addr, len);
183}
184
185
186struct fnic;
187
188int fnic_get_vnic_config(struct fnic *);
189int fnic_alloc_vnic_resources(struct fnic *);
190void fnic_free_vnic_resources(struct fnic *);
191void fnic_get_res_counts(struct fnic *);
192int fnic_set_nic_config(struct fnic *fnic, u8 rss_default_cpu,
193 u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu,
194 u8 rss_enable, u8 tso_ipid_split_en,
195 u8 ig_vlan_strip_en);
196
197#endif /* _FNIC_RES_H_ */
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
new file mode 100644
index 000000000000..eabf36502856
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -0,0 +1,1850 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#include <linux/mempool.h>
19#include <linux/errno.h>
20#include <linux/init.h>
21#include <linux/workqueue.h>
22#include <linux/pci.h>
23#include <linux/scatterlist.h>
24#include <linux/skbuff.h>
25#include <linux/spinlock.h>
26#include <linux/if_ether.h>
27#include <linux/if_vlan.h>
28#include <linux/delay.h>
29#include <scsi/scsi.h>
30#include <scsi/scsi_host.h>
31#include <scsi/scsi_device.h>
32#include <scsi/scsi_cmnd.h>
33#include <scsi/scsi_tcq.h>
34#include <scsi/fc/fc_els.h>
35#include <scsi/fc/fc_fcoe.h>
36#include <scsi/libfc.h>
37#include <scsi/fc_frame.h>
38#include "fnic_io.h"
39#include "fnic.h"
40
41const char *fnic_state_str[] = {
42 [FNIC_IN_FC_MODE] = "FNIC_IN_FC_MODE",
43 [FNIC_IN_FC_TRANS_ETH_MODE] = "FNIC_IN_FC_TRANS_ETH_MODE",
44 [FNIC_IN_ETH_MODE] = "FNIC_IN_ETH_MODE",
45 [FNIC_IN_ETH_TRANS_FC_MODE] = "FNIC_IN_ETH_TRANS_FC_MODE",
46};
47
48static const char *fnic_ioreq_state_str[] = {
49 [FNIC_IOREQ_CMD_PENDING] = "FNIC_IOREQ_CMD_PENDING",
50 [FNIC_IOREQ_ABTS_PENDING] = "FNIC_IOREQ_ABTS_PENDING",
51 [FNIC_IOREQ_ABTS_COMPLETE] = "FNIC_IOREQ_ABTS_COMPLETE",
52 [FNIC_IOREQ_CMD_COMPLETE] = "FNIC_IOREQ_CMD_COMPLETE",
53};
54
55static const char *fcpio_status_str[] = {
56 [FCPIO_SUCCESS] = "FCPIO_SUCCESS", /*0x0*/
57 [FCPIO_INVALID_HEADER] = "FCPIO_INVALID_HEADER",
58 [FCPIO_OUT_OF_RESOURCE] = "FCPIO_OUT_OF_RESOURCE",
59 [FCPIO_INVALID_PARAM] = "FCPIO_INVALID_PARAM]",
60 [FCPIO_REQ_NOT_SUPPORTED] = "FCPIO_REQ_NOT_SUPPORTED",
61 [FCPIO_IO_NOT_FOUND] = "FCPIO_IO_NOT_FOUND",
62 [FCPIO_ABORTED] = "FCPIO_ABORTED", /*0x41*/
63 [FCPIO_TIMEOUT] = "FCPIO_TIMEOUT",
64 [FCPIO_SGL_INVALID] = "FCPIO_SGL_INVALID",
65 [FCPIO_MSS_INVALID] = "FCPIO_MSS_INVALID",
66 [FCPIO_DATA_CNT_MISMATCH] = "FCPIO_DATA_CNT_MISMATCH",
67 [FCPIO_FW_ERR] = "FCPIO_FW_ERR",
68 [FCPIO_ITMF_REJECTED] = "FCPIO_ITMF_REJECTED",
69 [FCPIO_ITMF_FAILED] = "FCPIO_ITMF_FAILED",
70 [FCPIO_ITMF_INCORRECT_LUN] = "FCPIO_ITMF_INCORRECT_LUN",
71 [FCPIO_CMND_REJECTED] = "FCPIO_CMND_REJECTED",
72 [FCPIO_NO_PATH_AVAIL] = "FCPIO_NO_PATH_AVAIL",
73 [FCPIO_PATH_FAILED] = "FCPIO_PATH_FAILED",
74 [FCPIO_LUNMAP_CHNG_PEND] = "FCPIO_LUNHMAP_CHNG_PEND",
75};
76
77const char *fnic_state_to_str(unsigned int state)
78{
79 if (state >= ARRAY_SIZE(fnic_state_str) || !fnic_state_str[state])
80 return "unknown";
81
82 return fnic_state_str[state];
83}
84
85static const char *fnic_ioreq_state_to_str(unsigned int state)
86{
87 if (state >= ARRAY_SIZE(fnic_ioreq_state_str) ||
88 !fnic_ioreq_state_str[state])
89 return "unknown";
90
91 return fnic_ioreq_state_str[state];
92}
93
94static const char *fnic_fcpio_status_to_str(unsigned int status)
95{
96 if (status >= ARRAY_SIZE(fcpio_status_str) || !fcpio_status_str[status])
97 return "unknown";
98
99 return fcpio_status_str[status];
100}
101
102static void fnic_cleanup_io(struct fnic *fnic, int exclude_id);
103
104static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic,
105 struct scsi_cmnd *sc)
106{
107 u32 hash = sc->request->tag & (FNIC_IO_LOCKS - 1);
108
109 return &fnic->io_req_lock[hash];
110}
111
112/*
113 * Unmap the data buffer and sense buffer for an io_req,
114 * also unmap and free the device-private scatter/gather list.
115 */
116static void fnic_release_ioreq_buf(struct fnic *fnic,
117 struct fnic_io_req *io_req,
118 struct scsi_cmnd *sc)
119{
120 if (io_req->sgl_list_pa)
121 pci_unmap_single(fnic->pdev, io_req->sgl_list_pa,
122 sizeof(io_req->sgl_list[0]) * io_req->sgl_cnt,
123 PCI_DMA_TODEVICE);
124 scsi_dma_unmap(sc);
125
126 if (io_req->sgl_cnt)
127 mempool_free(io_req->sgl_list_alloc,
128 fnic->io_sgl_pool[io_req->sgl_type]);
129 if (io_req->sense_buf_pa)
130 pci_unmap_single(fnic->pdev, io_req->sense_buf_pa,
131 SCSI_SENSE_BUFFERSIZE, PCI_DMA_FROMDEVICE);
132}
133
134/* Free up Copy Wq descriptors. Called with copy_wq lock held */
135static int free_wq_copy_descs(struct fnic *fnic, struct vnic_wq_copy *wq)
136{
137 /* if no Ack received from firmware, then nothing to clean */
138 if (!fnic->fw_ack_recd[0])
139 return 1;
140
141 /*
142 * Update desc_available count based on number of freed descriptors
143 * Account for wraparound
144 */
145 if (wq->to_clean_index <= fnic->fw_ack_index[0])
146 wq->ring.desc_avail += (fnic->fw_ack_index[0]
147 - wq->to_clean_index + 1);
148 else
149 wq->ring.desc_avail += (wq->ring.desc_count
150 - wq->to_clean_index
151 + fnic->fw_ack_index[0] + 1);
152
153 /*
154 * just bump clean index to ack_index+1 accounting for wraparound
155 * this will essentially free up all descriptors between
156 * to_clean_index and fw_ack_index, both inclusive
157 */
158 wq->to_clean_index =
159 (fnic->fw_ack_index[0] + 1) % wq->ring.desc_count;
160
161 /* we have processed the acks received so far */
162 fnic->fw_ack_recd[0] = 0;
163 return 0;
164}
165
166
167/*
168 * fnic_fw_reset_handler
169 * Routine to send reset msg to fw
170 */
171int fnic_fw_reset_handler(struct fnic *fnic)
172{
173 struct vnic_wq_copy *wq = &fnic->wq_copy[0];
174 int ret = 0;
175 unsigned long flags;
176
177 spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
178
179 if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
180 free_wq_copy_descs(fnic, wq);
181
182 if (!vnic_wq_copy_desc_avail(wq))
183 ret = -EAGAIN;
184 else
185 fnic_queue_wq_copy_desc_fw_reset(wq, SCSI_NO_TAG);
186
187 spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
188
189 if (!ret)
190 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
191 "Issued fw reset\n");
192 else
193 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
194 "Failed to issue fw reset\n");
195 return ret;
196}
197
198
199/*
200 * fnic_flogi_reg_handler
201 * Routine to send flogi register msg to fw
202 */
203int fnic_flogi_reg_handler(struct fnic *fnic)
204{
205 struct vnic_wq_copy *wq = &fnic->wq_copy[0];
206 u8 gw_mac[ETH_ALEN];
207 int ret = 0;
208 unsigned long flags;
209
210 spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
211
212 if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
213 free_wq_copy_descs(fnic, wq);
214
215 if (!vnic_wq_copy_desc_avail(wq)) {
216 ret = -EAGAIN;
217 goto flogi_reg_ioreq_end;
218 }
219
220 if (fnic->fcoui_mode)
221 memset(gw_mac, 0xff, ETH_ALEN);
222 else
223 memcpy(gw_mac, fnic->dest_addr, ETH_ALEN);
224
225 fnic_queue_wq_copy_desc_flogi_reg(wq, SCSI_NO_TAG,
226 FCPIO_FLOGI_REG_GW_DEST,
227 fnic->s_id,
228 gw_mac);
229
230flogi_reg_ioreq_end:
231 spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
232
233 if (!ret)
234 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
235 "flog reg issued\n");
236
237 return ret;
238}
239
240/*
241 * fnic_queue_wq_copy_desc
242 * Routine to enqueue a wq copy desc
243 */
244static inline int fnic_queue_wq_copy_desc(struct fnic *fnic,
245 struct vnic_wq_copy *wq,
246 struct fnic_io_req *io_req,
247 struct scsi_cmnd *sc,
248 u32 sg_count)
249{
250 struct scatterlist *sg;
251 struct fc_rport *rport = starget_to_rport(scsi_target(sc->device));
252 struct fc_rport_libfc_priv *rp = rport->dd_data;
253 struct host_sg_desc *desc;
254 u8 pri_tag = 0;
255 unsigned int i;
256 unsigned long intr_flags;
257 int flags;
258 u8 exch_flags;
259 struct scsi_lun fc_lun;
260 char msg[2];
261
262 if (sg_count) {
263 BUG_ON(sg_count < 0);
264 BUG_ON(sg_count > FNIC_MAX_SG_DESC_CNT);
265
266 /* For each SGE, create a device desc entry */
267 desc = io_req->sgl_list;
268 for_each_sg(scsi_sglist(sc), sg, sg_count, i) {
269 desc->addr = cpu_to_le64(sg_dma_address(sg));
270 desc->len = cpu_to_le32(sg_dma_len(sg));
271 desc->_resvd = 0;
272 desc++;
273 }
274
275 io_req->sgl_list_pa = pci_map_single
276 (fnic->pdev,
277 io_req->sgl_list,
278 sizeof(io_req->sgl_list[0]) * sg_count,
279 PCI_DMA_TODEVICE);
280 }
281
282 io_req->sense_buf_pa = pci_map_single(fnic->pdev,
283 sc->sense_buffer,
284 SCSI_SENSE_BUFFERSIZE,
285 PCI_DMA_FROMDEVICE);
286
287 int_to_scsilun(sc->device->lun, &fc_lun);
288
289 pri_tag = FCPIO_ICMND_PTA_SIMPLE;
290 msg[0] = MSG_SIMPLE_TAG;
291 scsi_populate_tag_msg(sc, msg);
292 if (msg[0] == MSG_ORDERED_TAG)
293 pri_tag = FCPIO_ICMND_PTA_ORDERED;
294
295 /* Enqueue the descriptor in the Copy WQ */
296 spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags);
297
298 if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
299 free_wq_copy_descs(fnic, wq);
300
301 if (unlikely(!vnic_wq_copy_desc_avail(wq))) {
302 spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags);
303 return SCSI_MLQUEUE_HOST_BUSY;
304 }
305
306 flags = 0;
307 if (sc->sc_data_direction == DMA_FROM_DEVICE)
308 flags = FCPIO_ICMND_RDDATA;
309 else if (sc->sc_data_direction == DMA_TO_DEVICE)
310 flags = FCPIO_ICMND_WRDATA;
311
312 exch_flags = 0;
313 if ((fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR) &&
314 (rp->flags & FC_RP_FLAGS_RETRY))
315 exch_flags |= FCPIO_ICMND_SRFLAG_RETRY;
316
317 fnic_queue_wq_copy_desc_icmnd_16(wq, sc->request->tag,
318 0, exch_flags, io_req->sgl_cnt,
319 SCSI_SENSE_BUFFERSIZE,
320 io_req->sgl_list_pa,
321 io_req->sense_buf_pa,
322 0, /* scsi cmd ref, always 0 */
323 pri_tag, /* scsi pri and tag */
324 flags, /* command flags */
325 sc->cmnd, scsi_bufflen(sc),
326 fc_lun.scsi_lun, io_req->port_id,
327 rport->maxframe_size, rp->r_a_tov,
328 rp->e_d_tov);
329
330 spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags);
331 return 0;
332}
333
334/*
335 * fnic_queuecommand
336 * Routine to send a scsi cdb
337 * Called with host_lock held and interrupts disabled.
338 */
339int fnic_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
340{
341 struct fc_lport *lp;
342 struct fc_rport *rport;
343 struct fnic_io_req *io_req;
344 struct fnic *fnic;
345 struct vnic_wq_copy *wq;
346 int ret;
347 u32 sg_count;
348 unsigned long flags;
349 unsigned long ptr;
350
351 rport = starget_to_rport(scsi_target(sc->device));
352 ret = fc_remote_port_chkready(rport);
353 if (ret) {
354 sc->result = ret;
355 done(sc);
356 return 0;
357 }
358
359 lp = shost_priv(sc->device->host);
360 if (lp->state != LPORT_ST_READY || !(lp->link_up))
361 return SCSI_MLQUEUE_HOST_BUSY;
362
363 /*
364 * Release host lock, use driver resource specific locks from here.
365 * Don't re-enable interrupts in case they were disabled prior to the
366 * caller disabling them.
367 */
368 spin_unlock(lp->host->host_lock);
369
370 /* Get a new io_req for this SCSI IO */
371 fnic = lport_priv(lp);
372
373 io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
374 if (!io_req) {
375 ret = SCSI_MLQUEUE_HOST_BUSY;
376 goto out;
377 }
378 memset(io_req, 0, sizeof(*io_req));
379
380 /* Map the data buffer */
381 sg_count = scsi_dma_map(sc);
382 if (sg_count < 0) {
383 mempool_free(io_req, fnic->io_req_pool);
384 goto out;
385 }
386
387 /* Determine the type of scatter/gather list we need */
388 io_req->sgl_cnt = sg_count;
389 io_req->sgl_type = FNIC_SGL_CACHE_DFLT;
390 if (sg_count > FNIC_DFLT_SG_DESC_CNT)
391 io_req->sgl_type = FNIC_SGL_CACHE_MAX;
392
393 if (sg_count) {
394 io_req->sgl_list =
395 mempool_alloc(fnic->io_sgl_pool[io_req->sgl_type],
396 GFP_ATOMIC | GFP_DMA);
397 if (!io_req->sgl_list) {
398 ret = SCSI_MLQUEUE_HOST_BUSY;
399 scsi_dma_unmap(sc);
400 mempool_free(io_req, fnic->io_req_pool);
401 goto out;
402 }
403
404 /* Cache sgl list allocated address before alignment */
405 io_req->sgl_list_alloc = io_req->sgl_list;
406 ptr = (unsigned long) io_req->sgl_list;
407 if (ptr % FNIC_SG_DESC_ALIGN) {
408 io_req->sgl_list = (struct host_sg_desc *)
409 (((unsigned long) ptr
410 + FNIC_SG_DESC_ALIGN - 1)
411 & ~(FNIC_SG_DESC_ALIGN - 1));
412 }
413 }
414
415 /* initialize rest of io_req */
416 io_req->port_id = rport->port_id;
417 CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING;
418 CMD_SP(sc) = (char *)io_req;
419 sc->scsi_done = done;
420
421 /* create copy wq desc and enqueue it */
422 wq = &fnic->wq_copy[0];
423 ret = fnic_queue_wq_copy_desc(fnic, wq, io_req, sc, sg_count);
424 if (ret) {
425 /*
426 * In case another thread cancelled the request,
427 * refetch the pointer under the lock.
428 */
429 spinlock_t *io_lock = fnic_io_lock_hash(fnic, sc);
430
431 spin_lock_irqsave(io_lock, flags);
432 io_req = (struct fnic_io_req *)CMD_SP(sc);
433 CMD_SP(sc) = NULL;
434 CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE;
435 spin_unlock_irqrestore(io_lock, flags);
436 if (io_req) {
437 fnic_release_ioreq_buf(fnic, io_req, sc);
438 mempool_free(io_req, fnic->io_req_pool);
439 }
440 }
441out:
442 /* acquire host lock before returning to SCSI */
443 spin_lock(lp->host->host_lock);
444 return ret;
445}
446
447/*
448 * fnic_fcpio_fw_reset_cmpl_handler
449 * Routine to handle fw reset completion
450 */
451static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic,
452 struct fcpio_fw_req *desc)
453{
454 u8 type;
455 u8 hdr_status;
456 struct fcpio_tag tag;
457 int ret = 0;
458 struct fc_frame *flogi;
459 unsigned long flags;
460
461 fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
462
463 /* Clean up all outstanding io requests */
464 fnic_cleanup_io(fnic, SCSI_NO_TAG);
465
466 spin_lock_irqsave(&fnic->fnic_lock, flags);
467
468 flogi = fnic->flogi;
469 fnic->flogi = NULL;
470
471 /* fnic should be in FC_TRANS_ETH_MODE */
472 if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE) {
473 /* Check status of reset completion */
474 if (!hdr_status) {
475 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
476 "reset cmpl success\n");
477 /* Ready to send flogi out */
478 fnic->state = FNIC_IN_ETH_MODE;
479 } else {
480 FNIC_SCSI_DBG(KERN_DEBUG,
481 fnic->lport->host,
482 "fnic fw_reset : failed %s\n",
483 fnic_fcpio_status_to_str(hdr_status));
484
485 /*
486 * Unable to change to eth mode, cannot send out flogi
487 * Change state to fc mode, so that subsequent Flogi
488 * requests from libFC will cause more attempts to
489 * reset the firmware. Free the cached flogi
490 */
491 fnic->state = FNIC_IN_FC_MODE;
492 ret = -1;
493 }
494 } else {
495 FNIC_SCSI_DBG(KERN_DEBUG,
496 fnic->lport->host,
497 "Unexpected state %s while processing"
498 " reset cmpl\n", fnic_state_to_str(fnic->state));
499 ret = -1;
500 }
501
502 /* Thread removing device blocks till firmware reset is complete */
503 if (fnic->remove_wait)
504 complete(fnic->remove_wait);
505
506 /*
507 * If fnic is being removed, or fw reset failed
508 * free the flogi frame. Else, send it out
509 */
510 if (fnic->remove_wait || ret) {
511 fnic->flogi_oxid = FC_XID_UNKNOWN;
512 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
513 if (flogi)
514 dev_kfree_skb_irq(fp_skb(flogi));
515 goto reset_cmpl_handler_end;
516 }
517
518 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
519
520 if (flogi)
521 ret = fnic_send_frame(fnic, flogi);
522
523 reset_cmpl_handler_end:
524 return ret;
525}
526
527/*
528 * fnic_fcpio_flogi_reg_cmpl_handler
529 * Routine to handle flogi register completion
530 */
531static int fnic_fcpio_flogi_reg_cmpl_handler(struct fnic *fnic,
532 struct fcpio_fw_req *desc)
533{
534 u8 type;
535 u8 hdr_status;
536 struct fcpio_tag tag;
537 int ret = 0;
538 struct fc_frame *flogi_resp = NULL;
539 unsigned long flags;
540 struct sk_buff *skb;
541
542 fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
543
544 /* Update fnic state based on status of flogi reg completion */
545 spin_lock_irqsave(&fnic->fnic_lock, flags);
546
547 flogi_resp = fnic->flogi_resp;
548 fnic->flogi_resp = NULL;
549
550 if (fnic->state == FNIC_IN_ETH_TRANS_FC_MODE) {
551
552 /* Check flogi registration completion status */
553 if (!hdr_status) {
554 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
555 "flog reg succeeded\n");
556 fnic->state = FNIC_IN_FC_MODE;
557 } else {
558 FNIC_SCSI_DBG(KERN_DEBUG,
559 fnic->lport->host,
560 "fnic flogi reg :failed %s\n",
561 fnic_fcpio_status_to_str(hdr_status));
562 fnic->state = FNIC_IN_ETH_MODE;
563 ret = -1;
564 }
565 } else {
566 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
567 "Unexpected fnic state %s while"
568 " processing flogi reg completion\n",
569 fnic_state_to_str(fnic->state));
570 ret = -1;
571 }
572
573 /* Successful flogi reg cmpl, pass frame to LibFC */
574 if (!ret && flogi_resp) {
575 if (fnic->stop_rx_link_events) {
576 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
577 goto reg_cmpl_handler_end;
578 }
579 skb = (struct sk_buff *)flogi_resp;
580 /* Use fr_flags to indicate whether flogi resp or not */
581 fr_flags(flogi_resp) = 1;
582 fr_dev(flogi_resp) = fnic->lport;
583 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
584
585 skb_queue_tail(&fnic->frame_queue, skb);
586 queue_work(fnic_event_queue, &fnic->frame_work);
587
588 } else {
589 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
590 if (flogi_resp)
591 dev_kfree_skb_irq(fp_skb(flogi_resp));
592 }
593
594reg_cmpl_handler_end:
595 return ret;
596}
597
598static inline int is_ack_index_in_range(struct vnic_wq_copy *wq,
599 u16 request_out)
600{
601 if (wq->to_clean_index <= wq->to_use_index) {
602 /* out of range, stale request_out index */
603 if (request_out < wq->to_clean_index ||
604 request_out >= wq->to_use_index)
605 return 0;
606 } else {
607 /* out of range, stale request_out index */
608 if (request_out < wq->to_clean_index &&
609 request_out >= wq->to_use_index)
610 return 0;
611 }
612 /* request_out index is in range */
613 return 1;
614}
615
616
617/*
618 * Mark that ack received and store the Ack index. If there are multiple
619 * acks received before Tx thread cleans it up, the latest value will be
620 * used which is correct behavior. This state should be in the copy Wq
621 * instead of in the fnic
622 */
623static inline void fnic_fcpio_ack_handler(struct fnic *fnic,
624 unsigned int cq_index,
625 struct fcpio_fw_req *desc)
626{
627 struct vnic_wq_copy *wq;
628 u16 request_out = desc->u.ack.request_out;
629 unsigned long flags;
630
631 /* mark the ack state */
632 wq = &fnic->wq_copy[cq_index - fnic->raw_wq_count - fnic->rq_count];
633 spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
634
635 if (is_ack_index_in_range(wq, request_out)) {
636 fnic->fw_ack_index[0] = request_out;
637 fnic->fw_ack_recd[0] = 1;
638 }
639 spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
640}
641
642/*
643 * fnic_fcpio_icmnd_cmpl_handler
644 * Routine to handle icmnd completions
645 */
646static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
647 struct fcpio_fw_req *desc)
648{
649 u8 type;
650 u8 hdr_status;
651 struct fcpio_tag tag;
652 u32 id;
653 u64 xfer_len = 0;
654 struct fcpio_icmnd_cmpl *icmnd_cmpl;
655 struct fnic_io_req *io_req;
656 struct scsi_cmnd *sc;
657 unsigned long flags;
658 spinlock_t *io_lock;
659
660 /* Decode the cmpl description to get the io_req id */
661 fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
662 fcpio_tag_id_dec(&tag, &id);
663
664 if (id >= FNIC_MAX_IO_REQ)
665 return;
666
667 sc = scsi_host_find_tag(fnic->lport->host, id);
668 WARN_ON_ONCE(!sc);
669 if (!sc)
670 return;
671
672 io_lock = fnic_io_lock_hash(fnic, sc);
673 spin_lock_irqsave(io_lock, flags);
674 io_req = (struct fnic_io_req *)CMD_SP(sc);
675 WARN_ON_ONCE(!io_req);
676 if (!io_req) {
677 spin_unlock_irqrestore(io_lock, flags);
678 return;
679 }
680
681 /* firmware completed the io */
682 io_req->io_completed = 1;
683
684 /*
685 * if SCSI-ML has already issued abort on this command,
686 * ignore completion of the IO. The abts path will clean it up
687 */
688 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
689 spin_unlock_irqrestore(io_lock, flags);
690 return;
691 }
692
693 /* Mark the IO as complete */
694 CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE;
695
696 icmnd_cmpl = &desc->u.icmnd_cmpl;
697
698 switch (hdr_status) {
699 case FCPIO_SUCCESS:
700 sc->result = (DID_OK << 16) | icmnd_cmpl->scsi_status;
701 xfer_len = scsi_bufflen(sc);
702 scsi_set_resid(sc, icmnd_cmpl->residual);
703
704 if (icmnd_cmpl->flags & FCPIO_ICMND_CMPL_RESID_UNDER)
705 xfer_len -= icmnd_cmpl->residual;
706
707 /*
708 * If queue_full, then try to reduce queue depth for all
709 * LUNS on the target. Todo: this should be accompanied
710 * by a periodic queue_depth rampup based on successful
711 * IO completion.
712 */
713 if (icmnd_cmpl->scsi_status == QUEUE_FULL) {
714 struct scsi_device *t_sdev;
715 int qd = 0;
716
717 shost_for_each_device(t_sdev, sc->device->host) {
718 if (t_sdev->id != sc->device->id)
719 continue;
720
721 if (t_sdev->queue_depth > 1) {
722 qd = scsi_track_queue_full
723 (t_sdev,
724 t_sdev->queue_depth - 1);
725 if (qd == -1)
726 qd = t_sdev->host->cmd_per_lun;
727 shost_printk(KERN_INFO,
728 fnic->lport->host,
729 "scsi[%d:%d:%d:%d"
730 "] queue full detected,"
731 "new depth = %d\n",
732 t_sdev->host->host_no,
733 t_sdev->channel,
734 t_sdev->id, t_sdev->lun,
735 t_sdev->queue_depth);
736 }
737 }
738 }
739 break;
740
741 case FCPIO_TIMEOUT: /* request was timed out */
742 sc->result = (DID_TIME_OUT << 16) | icmnd_cmpl->scsi_status;
743 break;
744
745 case FCPIO_ABORTED: /* request was aborted */
746 sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status;
747 break;
748
749 case FCPIO_DATA_CNT_MISMATCH: /* recv/sent more/less data than exp. */
750 scsi_set_resid(sc, icmnd_cmpl->residual);
751 sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status;
752 break;
753
754 case FCPIO_OUT_OF_RESOURCE: /* out of resources to complete request */
755 sc->result = (DID_REQUEUE << 16) | icmnd_cmpl->scsi_status;
756 break;
757 case FCPIO_INVALID_HEADER: /* header contains invalid data */
758 case FCPIO_INVALID_PARAM: /* some parameter in request invalid */
759 case FCPIO_REQ_NOT_SUPPORTED:/* request type is not supported */
760 case FCPIO_IO_NOT_FOUND: /* requested I/O was not found */
761 case FCPIO_SGL_INVALID: /* request was aborted due to sgl error */
762 case FCPIO_MSS_INVALID: /* request was aborted due to mss error */
763 case FCPIO_FW_ERR: /* request was terminated due fw error */
764 default:
765 shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n",
766 fnic_fcpio_status_to_str(hdr_status));
767 sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status;
768 break;
769 }
770
771 /* Break link with the SCSI command */
772 CMD_SP(sc) = NULL;
773
774 spin_unlock_irqrestore(io_lock, flags);
775
776 fnic_release_ioreq_buf(fnic, io_req, sc);
777
778 mempool_free(io_req, fnic->io_req_pool);
779
780 if (sc->sc_data_direction == DMA_FROM_DEVICE) {
781 fnic->lport->host_stats.fcp_input_requests++;
782 fnic->fcp_input_bytes += xfer_len;
783 } else if (sc->sc_data_direction == DMA_TO_DEVICE) {
784 fnic->lport->host_stats.fcp_output_requests++;
785 fnic->fcp_output_bytes += xfer_len;
786 } else
787 fnic->lport->host_stats.fcp_control_requests++;
788
789 /* Call SCSI completion function to complete the IO */
790 if (sc->scsi_done)
791 sc->scsi_done(sc);
792
793}
794
795/* fnic_fcpio_itmf_cmpl_handler
796 * Routine to handle itmf completions
797 */
798static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic,
799 struct fcpio_fw_req *desc)
800{
801 u8 type;
802 u8 hdr_status;
803 struct fcpio_tag tag;
804 u32 id;
805 struct scsi_cmnd *sc;
806 struct fnic_io_req *io_req;
807 unsigned long flags;
808 spinlock_t *io_lock;
809
810 fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
811 fcpio_tag_id_dec(&tag, &id);
812
813 if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ)
814 return;
815
816 sc = scsi_host_find_tag(fnic->lport->host, id & FNIC_TAG_MASK);
817 WARN_ON_ONCE(!sc);
818 if (!sc)
819 return;
820
821 io_lock = fnic_io_lock_hash(fnic, sc);
822 spin_lock_irqsave(io_lock, flags);
823 io_req = (struct fnic_io_req *)CMD_SP(sc);
824 WARN_ON_ONCE(!io_req);
825 if (!io_req) {
826 spin_unlock_irqrestore(io_lock, flags);
827 return;
828 }
829
830 if (id & FNIC_TAG_ABORT) {
831 /* Completion of abort cmd */
832 if (CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING) {
833 /* This is a late completion. Ignore it */
834 spin_unlock_irqrestore(io_lock, flags);
835 return;
836 }
837 CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
838 CMD_ABTS_STATUS(sc) = hdr_status;
839
840 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
841 "abts cmpl recd. id %d status %s\n",
842 (int)(id & FNIC_TAG_MASK),
843 fnic_fcpio_status_to_str(hdr_status));
844
845 /*
846 * If scsi_eh thread is blocked waiting for abts to complete,
847 * signal completion to it. IO will be cleaned in the thread
848 * else clean it in this context
849 */
850 if (io_req->abts_done) {
851 complete(io_req->abts_done);
852 spin_unlock_irqrestore(io_lock, flags);
853 } else {
854 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
855 "abts cmpl, completing IO\n");
856 CMD_SP(sc) = NULL;
857 sc->result = (DID_ERROR << 16);
858
859 spin_unlock_irqrestore(io_lock, flags);
860
861 fnic_release_ioreq_buf(fnic, io_req, sc);
862 mempool_free(io_req, fnic->io_req_pool);
863 if (sc->scsi_done)
864 sc->scsi_done(sc);
865 }
866
867 } else if (id & FNIC_TAG_DEV_RST) {
868 /* Completion of device reset */
869 CMD_LR_STATUS(sc) = hdr_status;
870 CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE;
871 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
872 "dev reset cmpl recd. id %d status %s\n",
873 (int)(id & FNIC_TAG_MASK),
874 fnic_fcpio_status_to_str(hdr_status));
875 if (io_req->dr_done)
876 complete(io_req->dr_done);
877 spin_unlock_irqrestore(io_lock, flags);
878
879 } else {
880 shost_printk(KERN_ERR, fnic->lport->host,
881 "Unexpected itmf io state %s tag %x\n",
882 fnic_ioreq_state_to_str(CMD_STATE(sc)), id);
883 spin_unlock_irqrestore(io_lock, flags);
884 }
885
886}
887
888/*
889 * fnic_fcpio_cmpl_handler
890 * Routine to service the cq for wq_copy
891 */
892static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev,
893 unsigned int cq_index,
894 struct fcpio_fw_req *desc)
895{
896 struct fnic *fnic = vnic_dev_priv(vdev);
897 int ret = 0;
898
899 switch (desc->hdr.type) {
900 case FCPIO_ACK: /* fw copied copy wq desc to its queue */
901 fnic_fcpio_ack_handler(fnic, cq_index, desc);
902 break;
903
904 case FCPIO_ICMND_CMPL: /* fw completed a command */
905 fnic_fcpio_icmnd_cmpl_handler(fnic, desc);
906 break;
907
908 case FCPIO_ITMF_CMPL: /* fw completed itmf (abort cmd, lun reset)*/
909 fnic_fcpio_itmf_cmpl_handler(fnic, desc);
910 break;
911
912 case FCPIO_FLOGI_REG_CMPL: /* fw completed flogi_reg */
913 ret = fnic_fcpio_flogi_reg_cmpl_handler(fnic, desc);
914 break;
915
916 case FCPIO_RESET_CMPL: /* fw completed reset */
917 ret = fnic_fcpio_fw_reset_cmpl_handler(fnic, desc);
918 break;
919
920 default:
921 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
922 "firmware completion type %d\n",
923 desc->hdr.type);
924 break;
925 }
926
927 return ret;
928}
929
930/*
931 * fnic_wq_copy_cmpl_handler
932 * Routine to process wq copy
933 */
934int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do)
935{
936 unsigned int wq_work_done = 0;
937 unsigned int i, cq_index;
938 unsigned int cur_work_done;
939
940 for (i = 0; i < fnic->wq_copy_count; i++) {
941 cq_index = i + fnic->raw_wq_count + fnic->rq_count;
942 cur_work_done = vnic_cq_copy_service(&fnic->cq[cq_index],
943 fnic_fcpio_cmpl_handler,
944 copy_work_to_do);
945 wq_work_done += cur_work_done;
946 }
947 return wq_work_done;
948}
949
950static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
951{
952 unsigned int i;
953 struct fnic_io_req *io_req;
954 unsigned long flags = 0;
955 struct scsi_cmnd *sc;
956 spinlock_t *io_lock;
957
958 for (i = 0; i < FNIC_MAX_IO_REQ; i++) {
959 if (i == exclude_id)
960 continue;
961
962 sc = scsi_host_find_tag(fnic->lport->host, i);
963 if (!sc)
964 continue;
965
966 io_lock = fnic_io_lock_hash(fnic, sc);
967 spin_lock_irqsave(io_lock, flags);
968 io_req = (struct fnic_io_req *)CMD_SP(sc);
969 if (!io_req) {
970 spin_unlock_irqrestore(io_lock, flags);
971 goto cleanup_scsi_cmd;
972 }
973
974 CMD_SP(sc) = NULL;
975
976 spin_unlock_irqrestore(io_lock, flags);
977
978 /*
979 * If there is a scsi_cmnd associated with this io_req, then
980 * free the corresponding state
981 */
982 fnic_release_ioreq_buf(fnic, io_req, sc);
983 mempool_free(io_req, fnic->io_req_pool);
984
985cleanup_scsi_cmd:
986 sc->result = DID_TRANSPORT_DISRUPTED << 16;
987 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "fnic_cleanup_io:"
988 " DID_TRANSPORT_DISRUPTED\n");
989
990 /* Complete the command to SCSI */
991 if (sc->scsi_done)
992 sc->scsi_done(sc);
993 }
994}
995
996void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq,
997 struct fcpio_host_req *desc)
998{
999 u32 id;
1000 struct fnic *fnic = vnic_dev_priv(wq->vdev);
1001 struct fnic_io_req *io_req;
1002 struct scsi_cmnd *sc;
1003 unsigned long flags;
1004 spinlock_t *io_lock;
1005
1006 /* get the tag reference */
1007 fcpio_tag_id_dec(&desc->hdr.tag, &id);
1008 id &= FNIC_TAG_MASK;
1009
1010 if (id >= FNIC_MAX_IO_REQ)
1011 return;
1012
1013 sc = scsi_host_find_tag(fnic->lport->host, id);
1014 if (!sc)
1015 return;
1016
1017 io_lock = fnic_io_lock_hash(fnic, sc);
1018 spin_lock_irqsave(io_lock, flags);
1019
1020 /* Get the IO context which this desc refers to */
1021 io_req = (struct fnic_io_req *)CMD_SP(sc);
1022
1023 /* fnic interrupts are turned off by now */
1024
1025 if (!io_req) {
1026 spin_unlock_irqrestore(io_lock, flags);
1027 goto wq_copy_cleanup_scsi_cmd;
1028 }
1029
1030 CMD_SP(sc) = NULL;
1031
1032 spin_unlock_irqrestore(io_lock, flags);
1033
1034 fnic_release_ioreq_buf(fnic, io_req, sc);
1035 mempool_free(io_req, fnic->io_req_pool);
1036
1037wq_copy_cleanup_scsi_cmd:
1038 sc->result = DID_NO_CONNECT << 16;
1039 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "wq_copy_cleanup_handler:"
1040 " DID_NO_CONNECT\n");
1041
1042 if (sc->scsi_done)
1043 sc->scsi_done(sc);
1044}
1045
1046static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag,
1047 u32 task_req, u8 *fc_lun,
1048 struct fnic_io_req *io_req)
1049{
1050 struct vnic_wq_copy *wq = &fnic->wq_copy[0];
1051 unsigned long flags;
1052
1053 spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
1054
1055 if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
1056 free_wq_copy_descs(fnic, wq);
1057
1058 if (!vnic_wq_copy_desc_avail(wq)) {
1059 spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
1060 return 1;
1061 }
1062 fnic_queue_wq_copy_desc_itmf(wq, tag | FNIC_TAG_ABORT,
1063 0, task_req, tag, fc_lun, io_req->port_id,
1064 fnic->config.ra_tov, fnic->config.ed_tov);
1065
1066 spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
1067 return 0;
1068}
1069
1070void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
1071{
1072 int tag;
1073 struct fnic_io_req *io_req;
1074 spinlock_t *io_lock;
1075 unsigned long flags;
1076 struct scsi_cmnd *sc;
1077 struct scsi_lun fc_lun;
1078 enum fnic_ioreq_state old_ioreq_state;
1079
1080 FNIC_SCSI_DBG(KERN_DEBUG,
1081 fnic->lport->host,
1082 "fnic_rport_reset_exch called portid 0x%06x\n",
1083 port_id);
1084
1085 if (fnic->in_remove)
1086 return;
1087
1088 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
1089 sc = scsi_host_find_tag(fnic->lport->host, tag);
1090 if (!sc)
1091 continue;
1092
1093 io_lock = fnic_io_lock_hash(fnic, sc);
1094 spin_lock_irqsave(io_lock, flags);
1095
1096 io_req = (struct fnic_io_req *)CMD_SP(sc);
1097
1098 if (!io_req || io_req->port_id != port_id) {
1099 spin_unlock_irqrestore(io_lock, flags);
1100 continue;
1101 }
1102
1103 /*
1104 * Found IO that is still pending with firmware and
1105 * belongs to rport that went away
1106 */
1107 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
1108 spin_unlock_irqrestore(io_lock, flags);
1109 continue;
1110 }
1111 old_ioreq_state = CMD_STATE(sc);
1112 CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
1113 CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
1114
1115 BUG_ON(io_req->abts_done);
1116
1117 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
1118 "fnic_rport_reset_exch: Issuing abts\n");
1119
1120 spin_unlock_irqrestore(io_lock, flags);
1121
1122 /* Now queue the abort command to firmware */
1123 int_to_scsilun(sc->device->lun, &fc_lun);
1124
1125 if (fnic_queue_abort_io_req(fnic, tag,
1126 FCPIO_ITMF_ABT_TASK_TERM,
1127 fc_lun.scsi_lun, io_req)) {
1128 /*
1129 * Revert the cmd state back to old state, if
1130 * it hasnt changed in between. This cmd will get
1131 * aborted later by scsi_eh, or cleaned up during
1132 * lun reset
1133 */
1134 io_lock = fnic_io_lock_hash(fnic, sc);
1135
1136 spin_lock_irqsave(io_lock, flags);
1137 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
1138 CMD_STATE(sc) = old_ioreq_state;
1139 spin_unlock_irqrestore(io_lock, flags);
1140 }
1141 }
1142
1143}
1144
1145void fnic_terminate_rport_io(struct fc_rport *rport)
1146{
1147 int tag;
1148 struct fnic_io_req *io_req;
1149 spinlock_t *io_lock;
1150 unsigned long flags;
1151 struct scsi_cmnd *sc;
1152 struct scsi_lun fc_lun;
1153 struct fc_rport_libfc_priv *rdata = rport->dd_data;
1154 struct fc_lport *lport = rdata->local_port;
1155 struct fnic *fnic = lport_priv(lport);
1156 struct fc_rport *cmd_rport;
1157 enum fnic_ioreq_state old_ioreq_state;
1158
1159 FNIC_SCSI_DBG(KERN_DEBUG,
1160 fnic->lport->host, "fnic_terminate_rport_io called"
1161 " wwpn 0x%llx, wwnn0x%llx, portid 0x%06x\n",
1162 rport->port_name, rport->node_name,
1163 rport->port_id);
1164
1165 if (fnic->in_remove)
1166 return;
1167
1168 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
1169 sc = scsi_host_find_tag(fnic->lport->host, tag);
1170 if (!sc)
1171 continue;
1172
1173 cmd_rport = starget_to_rport(scsi_target(sc->device));
1174 if (rport != cmd_rport)
1175 continue;
1176
1177 io_lock = fnic_io_lock_hash(fnic, sc);
1178 spin_lock_irqsave(io_lock, flags);
1179
1180 io_req = (struct fnic_io_req *)CMD_SP(sc);
1181
1182 if (!io_req || rport != cmd_rport) {
1183 spin_unlock_irqrestore(io_lock, flags);
1184 continue;
1185 }
1186
1187 /*
1188 * Found IO that is still pending with firmware and
1189 * belongs to rport that went away
1190 */
1191 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
1192 spin_unlock_irqrestore(io_lock, flags);
1193 continue;
1194 }
1195 old_ioreq_state = CMD_STATE(sc);
1196 CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
1197 CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
1198
1199 BUG_ON(io_req->abts_done);
1200
1201 FNIC_SCSI_DBG(KERN_DEBUG,
1202 fnic->lport->host,
1203 "fnic_terminate_rport_io: Issuing abts\n");
1204
1205 spin_unlock_irqrestore(io_lock, flags);
1206
1207 /* Now queue the abort command to firmware */
1208 int_to_scsilun(sc->device->lun, &fc_lun);
1209
1210 if (fnic_queue_abort_io_req(fnic, tag,
1211 FCPIO_ITMF_ABT_TASK_TERM,
1212 fc_lun.scsi_lun, io_req)) {
1213 /*
1214 * Revert the cmd state back to old state, if
1215 * it hasnt changed in between. This cmd will get
1216 * aborted later by scsi_eh, or cleaned up during
1217 * lun reset
1218 */
1219 io_lock = fnic_io_lock_hash(fnic, sc);
1220
1221 spin_lock_irqsave(io_lock, flags);
1222 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
1223 CMD_STATE(sc) = old_ioreq_state;
1224 spin_unlock_irqrestore(io_lock, flags);
1225 }
1226 }
1227
1228}
1229
1230static void fnic_block_error_handler(struct scsi_cmnd *sc)
1231{
1232 struct Scsi_Host *shost = sc->device->host;
1233 struct fc_rport *rport = starget_to_rport(scsi_target(sc->device));
1234 unsigned long flags;
1235
1236 spin_lock_irqsave(shost->host_lock, flags);
1237 while (rport->port_state == FC_PORTSTATE_BLOCKED) {
1238 spin_unlock_irqrestore(shost->host_lock, flags);
1239 msleep(1000);
1240 spin_lock_irqsave(shost->host_lock, flags);
1241 }
1242 spin_unlock_irqrestore(shost->host_lock, flags);
1243
1244}
1245
1246/*
1247 * This function is exported to SCSI for sending abort cmnds.
1248 * A SCSI IO is represented by a io_req in the driver.
1249 * The ioreq is linked to the SCSI Cmd, thus a link with the ULP's IO.
1250 */
1251int fnic_abort_cmd(struct scsi_cmnd *sc)
1252{
1253 struct fc_lport *lp;
1254 struct fnic *fnic;
1255 struct fnic_io_req *io_req;
1256 struct fc_rport *rport;
1257 spinlock_t *io_lock;
1258 unsigned long flags;
1259 int ret = SUCCESS;
1260 u32 task_req;
1261 struct scsi_lun fc_lun;
1262 DECLARE_COMPLETION_ONSTACK(tm_done);
1263
1264 /* Wait for rport to unblock */
1265 fnic_block_error_handler(sc);
1266
1267 /* Get local-port, check ready and link up */
1268 lp = shost_priv(sc->device->host);
1269
1270 fnic = lport_priv(lp);
1271 FNIC_SCSI_DBG(KERN_DEBUG,
1272 fnic->lport->host,
1273 "Abort Cmd called FCID 0x%x, LUN 0x%x TAG %d\n",
1274 (starget_to_rport(scsi_target(sc->device)))->port_id,
1275 sc->device->lun, sc->request->tag);
1276
1277 if (lp->state != LPORT_ST_READY || !(lp->link_up)) {
1278 ret = FAILED;
1279 goto fnic_abort_cmd_end;
1280 }
1281
1282 /*
1283 * Avoid a race between SCSI issuing the abort and the device
1284 * completing the command.
1285 *
1286 * If the command is already completed by the fw cmpl code,
1287 * we just return SUCCESS from here. This means that the abort
1288 * succeeded. In the SCSI ML, since the timeout for command has
1289 * happened, the completion wont actually complete the command
1290 * and it will be considered as an aborted command
1291 *
1292 * The CMD_SP will not be cleared except while holding io_req_lock.
1293 */
1294 io_lock = fnic_io_lock_hash(fnic, sc);
1295 spin_lock_irqsave(io_lock, flags);
1296 io_req = (struct fnic_io_req *)CMD_SP(sc);
1297 if (!io_req) {
1298 spin_unlock_irqrestore(io_lock, flags);
1299 goto fnic_abort_cmd_end;
1300 }
1301
1302 io_req->abts_done = &tm_done;
1303
1304 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
1305 spin_unlock_irqrestore(io_lock, flags);
1306 goto wait_pending;
1307 }
1308 /*
1309 * Command is still pending, need to abort it
1310 * If the firmware completes the command after this point,
1311 * the completion wont be done till mid-layer, since abort
1312 * has already started.
1313 */
1314 CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
1315 CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
1316
1317 spin_unlock_irqrestore(io_lock, flags);
1318
1319 /*
1320 * Check readiness of the remote port. If the path to remote
1321 * port is up, then send abts to the remote port to terminate
1322 * the IO. Else, just locally terminate the IO in the firmware
1323 */
1324 rport = starget_to_rport(scsi_target(sc->device));
1325 if (fc_remote_port_chkready(rport) == 0)
1326 task_req = FCPIO_ITMF_ABT_TASK;
1327 else
1328 task_req = FCPIO_ITMF_ABT_TASK_TERM;
1329
1330 /* Now queue the abort command to firmware */
1331 int_to_scsilun(sc->device->lun, &fc_lun);
1332
1333 if (fnic_queue_abort_io_req(fnic, sc->request->tag, task_req,
1334 fc_lun.scsi_lun, io_req)) {
1335 spin_lock_irqsave(io_lock, flags);
1336 io_req = (struct fnic_io_req *)CMD_SP(sc);
1337 if (io_req)
1338 io_req->abts_done = NULL;
1339 spin_unlock_irqrestore(io_lock, flags);
1340 ret = FAILED;
1341 goto fnic_abort_cmd_end;
1342 }
1343
1344 /*
1345 * We queued an abort IO, wait for its completion.
1346 * Once the firmware completes the abort command, it will
1347 * wake up this thread.
1348 */
1349 wait_pending:
1350 wait_for_completion_timeout(&tm_done,
1351 msecs_to_jiffies
1352 (2 * fnic->config.ra_tov +
1353 fnic->config.ed_tov));
1354
1355 /* Check the abort status */
1356 spin_lock_irqsave(io_lock, flags);
1357
1358 io_req = (struct fnic_io_req *)CMD_SP(sc);
1359 if (!io_req) {
1360 spin_unlock_irqrestore(io_lock, flags);
1361 ret = FAILED;
1362 goto fnic_abort_cmd_end;
1363 }
1364 io_req->abts_done = NULL;
1365
1366 /* fw did not complete abort, timed out */
1367 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
1368 spin_unlock_irqrestore(io_lock, flags);
1369 ret = FAILED;
1370 goto fnic_abort_cmd_end;
1371 }
1372
1373 /*
1374 * firmware completed the abort, check the status,
1375 * free the io_req irrespective of failure or success
1376 */
1377 if (CMD_ABTS_STATUS(sc) != FCPIO_SUCCESS)
1378 ret = FAILED;
1379
1380 CMD_SP(sc) = NULL;
1381
1382 spin_unlock_irqrestore(io_lock, flags);
1383
1384 fnic_release_ioreq_buf(fnic, io_req, sc);
1385 mempool_free(io_req, fnic->io_req_pool);
1386
1387fnic_abort_cmd_end:
1388 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
1389 "Returning from abort cmd %s\n",
1390 (ret == SUCCESS) ?
1391 "SUCCESS" : "FAILED");
1392 return ret;
1393}
1394
1395static inline int fnic_queue_dr_io_req(struct fnic *fnic,
1396 struct scsi_cmnd *sc,
1397 struct fnic_io_req *io_req)
1398{
1399 struct vnic_wq_copy *wq = &fnic->wq_copy[0];
1400 struct scsi_lun fc_lun;
1401 int ret = 0;
1402 unsigned long intr_flags;
1403
1404 spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags);
1405
1406 if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
1407 free_wq_copy_descs(fnic, wq);
1408
1409 if (!vnic_wq_copy_desc_avail(wq)) {
1410 ret = -EAGAIN;
1411 goto lr_io_req_end;
1412 }
1413
1414 /* fill in the lun info */
1415 int_to_scsilun(sc->device->lun, &fc_lun);
1416
1417 fnic_queue_wq_copy_desc_itmf(wq, sc->request->tag | FNIC_TAG_DEV_RST,
1418 0, FCPIO_ITMF_LUN_RESET, SCSI_NO_TAG,
1419 fc_lun.scsi_lun, io_req->port_id,
1420 fnic->config.ra_tov, fnic->config.ed_tov);
1421
1422lr_io_req_end:
1423 spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags);
1424
1425 return ret;
1426}
1427
1428/*
1429 * Clean up any pending aborts on the lun
1430 * For each outstanding IO on this lun, whose abort is not completed by fw,
1431 * issue a local abort. Wait for abort to complete. Return 0 if all commands
1432 * successfully aborted, 1 otherwise
1433 */
1434static int fnic_clean_pending_aborts(struct fnic *fnic,
1435 struct scsi_cmnd *lr_sc)
1436{
1437 int tag;
1438 struct fnic_io_req *io_req;
1439 spinlock_t *io_lock;
1440 unsigned long flags;
1441 int ret = 0;
1442 struct scsi_cmnd *sc;
1443 struct fc_rport *rport;
1444 struct scsi_lun fc_lun;
1445 struct scsi_device *lun_dev = lr_sc->device;
1446 DECLARE_COMPLETION_ONSTACK(tm_done);
1447
1448 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
1449 sc = scsi_host_find_tag(fnic->lport->host, tag);
1450 /*
1451 * ignore this lun reset cmd or cmds that do not belong to
1452 * this lun
1453 */
1454 if (!sc || sc == lr_sc || sc->device != lun_dev)
1455 continue;
1456
1457 io_lock = fnic_io_lock_hash(fnic, sc);
1458 spin_lock_irqsave(io_lock, flags);
1459
1460 io_req = (struct fnic_io_req *)CMD_SP(sc);
1461
1462 if (!io_req || sc->device != lun_dev) {
1463 spin_unlock_irqrestore(io_lock, flags);
1464 continue;
1465 }
1466
1467 /*
1468 * Found IO that is still pending with firmware and
1469 * belongs to the LUN that we are resetting
1470 */
1471 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
1472 "Found IO in %s on lun\n",
1473 fnic_ioreq_state_to_str(CMD_STATE(sc)));
1474
1475 BUG_ON(CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING);
1476
1477 CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
1478 io_req->abts_done = &tm_done;
1479 spin_unlock_irqrestore(io_lock, flags);
1480
1481 /* Now queue the abort command to firmware */
1482 int_to_scsilun(sc->device->lun, &fc_lun);
1483 rport = starget_to_rport(scsi_target(sc->device));
1484
1485 if (fnic_queue_abort_io_req(fnic, tag,
1486 FCPIO_ITMF_ABT_TASK_TERM,
1487 fc_lun.scsi_lun, io_req)) {
1488 spin_lock_irqsave(io_lock, flags);
1489 io_req = (struct fnic_io_req *)CMD_SP(sc);
1490 if (io_req)
1491 io_req->abts_done = NULL;
1492 spin_unlock_irqrestore(io_lock, flags);
1493 ret = 1;
1494 goto clean_pending_aborts_end;
1495 }
1496
1497 wait_for_completion_timeout(&tm_done,
1498 msecs_to_jiffies
1499 (fnic->config.ed_tov));
1500
1501 /* Recheck cmd state to check if it is now aborted */
1502 spin_lock_irqsave(io_lock, flags);
1503 io_req = (struct fnic_io_req *)CMD_SP(sc);
1504 if (!io_req) {
1505 spin_unlock_irqrestore(io_lock, flags);
1506 ret = 1;
1507 goto clean_pending_aborts_end;
1508 }
1509
1510 io_req->abts_done = NULL;
1511
1512 /* if abort is still pending with fw, fail */
1513 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
1514 spin_unlock_irqrestore(io_lock, flags);
1515 ret = 1;
1516 goto clean_pending_aborts_end;
1517 }
1518 CMD_SP(sc) = NULL;
1519 spin_unlock_irqrestore(io_lock, flags);
1520
1521 fnic_release_ioreq_buf(fnic, io_req, sc);
1522 mempool_free(io_req, fnic->io_req_pool);
1523 }
1524
1525clean_pending_aborts_end:
1526 return ret;
1527}
1528
1529/*
1530 * SCSI Eh thread issues a Lun Reset when one or more commands on a LUN
1531 * fail to get aborted. It calls driver's eh_device_reset with a SCSI command
1532 * on the LUN.
1533 */
1534int fnic_device_reset(struct scsi_cmnd *sc)
1535{
1536 struct fc_lport *lp;
1537 struct fnic *fnic;
1538 struct fnic_io_req *io_req;
1539 struct fc_rport *rport;
1540 int status;
1541 int ret = FAILED;
1542 spinlock_t *io_lock;
1543 unsigned long flags;
1544 DECLARE_COMPLETION_ONSTACK(tm_done);
1545
1546 /* Wait for rport to unblock */
1547 fnic_block_error_handler(sc);
1548
1549 /* Get local-port, check ready and link up */
1550 lp = shost_priv(sc->device->host);
1551
1552 fnic = lport_priv(lp);
1553 FNIC_SCSI_DBG(KERN_DEBUG,
1554 fnic->lport->host,
1555 "Device reset called FCID 0x%x, LUN 0x%x\n",
1556 (starget_to_rport(scsi_target(sc->device)))->port_id,
1557 sc->device->lun);
1558
1559
1560 if (lp->state != LPORT_ST_READY || !(lp->link_up))
1561 goto fnic_device_reset_end;
1562
1563 /* Check if remote port up */
1564 rport = starget_to_rport(scsi_target(sc->device));
1565 if (fc_remote_port_chkready(rport))
1566 goto fnic_device_reset_end;
1567
1568 io_lock = fnic_io_lock_hash(fnic, sc);
1569 spin_lock_irqsave(io_lock, flags);
1570 io_req = (struct fnic_io_req *)CMD_SP(sc);
1571
1572 /*
1573 * If there is a io_req attached to this command, then use it,
1574 * else allocate a new one.
1575 */
1576 if (!io_req) {
1577 io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
1578 if (!io_req) {
1579 spin_unlock_irqrestore(io_lock, flags);
1580 goto fnic_device_reset_end;
1581 }
1582 memset(io_req, 0, sizeof(*io_req));
1583 io_req->port_id = rport->port_id;
1584 CMD_SP(sc) = (char *)io_req;
1585 }
1586 io_req->dr_done = &tm_done;
1587 CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING;
1588 CMD_LR_STATUS(sc) = FCPIO_INVALID_CODE;
1589 spin_unlock_irqrestore(io_lock, flags);
1590
1591 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "TAG %d\n",
1592 sc->request->tag);
1593
1594 /*
1595 * issue the device reset, if enqueue failed, clean up the ioreq
1596 * and break assoc with scsi cmd
1597 */
1598 if (fnic_queue_dr_io_req(fnic, sc, io_req)) {
1599 spin_lock_irqsave(io_lock, flags);
1600 io_req = (struct fnic_io_req *)CMD_SP(sc);
1601 if (io_req)
1602 io_req->dr_done = NULL;
1603 goto fnic_device_reset_clean;
1604 }
1605
1606 /*
1607 * Wait on the local completion for LUN reset. The io_req may be
1608 * freed while we wait since we hold no lock.
1609 */
1610 wait_for_completion_timeout(&tm_done,
1611 msecs_to_jiffies(FNIC_LUN_RESET_TIMEOUT));
1612
1613 spin_lock_irqsave(io_lock, flags);
1614 io_req = (struct fnic_io_req *)CMD_SP(sc);
1615 if (!io_req) {
1616 spin_unlock_irqrestore(io_lock, flags);
1617 goto fnic_device_reset_end;
1618 }
1619 io_req->dr_done = NULL;
1620
1621 status = CMD_LR_STATUS(sc);
1622 spin_unlock_irqrestore(io_lock, flags);
1623
1624 /*
1625 * If lun reset not completed, bail out with failed. io_req
1626 * gets cleaned up during higher levels of EH
1627 */
1628 if (status == FCPIO_INVALID_CODE) {
1629 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
1630 "Device reset timed out\n");
1631 goto fnic_device_reset_end;
1632 }
1633
1634 /* Completed, but not successful, clean up the io_req, return fail */
1635 if (status != FCPIO_SUCCESS) {
1636 spin_lock_irqsave(io_lock, flags);
1637 FNIC_SCSI_DBG(KERN_DEBUG,
1638 fnic->lport->host,
1639 "Device reset completed - failed\n");
1640 io_req = (struct fnic_io_req *)CMD_SP(sc);
1641 goto fnic_device_reset_clean;
1642 }
1643
1644 /*
1645 * Clean up any aborts on this lun that have still not
1646 * completed. If any of these fail, then LUN reset fails.
1647 * clean_pending_aborts cleans all cmds on this lun except
1648 * the lun reset cmd. If all cmds get cleaned, the lun reset
1649 * succeeds
1650 */
1651 if (fnic_clean_pending_aborts(fnic, sc)) {
1652 spin_lock_irqsave(io_lock, flags);
1653 io_req = (struct fnic_io_req *)CMD_SP(sc);
1654 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
1655 "Device reset failed"
1656 " since could not abort all IOs\n");
1657 goto fnic_device_reset_clean;
1658 }
1659
1660 /* Clean lun reset command */
1661 spin_lock_irqsave(io_lock, flags);
1662 io_req = (struct fnic_io_req *)CMD_SP(sc);
1663 if (io_req)
1664 /* Completed, and successful */
1665 ret = SUCCESS;
1666
1667fnic_device_reset_clean:
1668 if (io_req)
1669 CMD_SP(sc) = NULL;
1670
1671 spin_unlock_irqrestore(io_lock, flags);
1672
1673 if (io_req) {
1674 fnic_release_ioreq_buf(fnic, io_req, sc);
1675 mempool_free(io_req, fnic->io_req_pool);
1676 }
1677
1678fnic_device_reset_end:
1679 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
1680 "Returning from device reset %s\n",
1681 (ret == SUCCESS) ?
1682 "SUCCESS" : "FAILED");
1683 return ret;
1684}
1685
1686/* Clean up all IOs, clean up libFC local port */
1687int fnic_reset(struct Scsi_Host *shost)
1688{
1689 struct fc_lport *lp;
1690 struct fnic *fnic;
1691 int ret = SUCCESS;
1692
1693 lp = shost_priv(shost);
1694 fnic = lport_priv(lp);
1695
1696 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
1697 "fnic_reset called\n");
1698
1699 /*
1700 * Reset local port, this will clean up libFC exchanges,
1701 * reset remote port sessions, and if link is up, begin flogi
1702 */
1703 if (lp->tt.lport_reset(lp))
1704 ret = FAILED;
1705
1706 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
1707 "Returning from fnic reset %s\n",
1708 (ret == SUCCESS) ?
1709 "SUCCESS" : "FAILED");
1710
1711 return ret;
1712}
1713
1714/*
1715 * SCSI Error handling calls driver's eh_host_reset if all prior
1716 * error handling levels return FAILED. If host reset completes
1717 * successfully, and if link is up, then Fabric login begins.
1718 *
1719 * Host Reset is the highest level of error recovery. If this fails, then
1720 * host is offlined by SCSI.
1721 *
1722 */
1723int fnic_host_reset(struct scsi_cmnd *sc)
1724{
1725 int ret;
1726 unsigned long wait_host_tmo;
1727 struct Scsi_Host *shost = sc->device->host;
1728 struct fc_lport *lp = shost_priv(shost);
1729
1730 /*
1731 * If fnic_reset is successful, wait for fabric login to complete
1732 * scsi-ml tries to send a TUR to every device if host reset is
1733 * successful, so before returning to scsi, fabric should be up
1734 */
1735 ret = fnic_reset(shost);
1736 if (ret == SUCCESS) {
1737 wait_host_tmo = jiffies + FNIC_HOST_RESET_SETTLE_TIME * HZ;
1738 ret = FAILED;
1739 while (time_before(jiffies, wait_host_tmo)) {
1740 if ((lp->state == LPORT_ST_READY) &&
1741 (lp->link_up)) {
1742 ret = SUCCESS;
1743 break;
1744 }
1745 ssleep(1);
1746 }
1747 }
1748
1749 return ret;
1750}
1751
1752/*
1753 * This fxn is called from libFC when host is removed
1754 */
1755void fnic_scsi_abort_io(struct fc_lport *lp)
1756{
1757 int err = 0;
1758 unsigned long flags;
1759 enum fnic_state old_state;
1760 struct fnic *fnic = lport_priv(lp);
1761 DECLARE_COMPLETION_ONSTACK(remove_wait);
1762
1763 /* Issue firmware reset for fnic, wait for reset to complete */
1764 spin_lock_irqsave(&fnic->fnic_lock, flags);
1765 fnic->remove_wait = &remove_wait;
1766 old_state = fnic->state;
1767 fnic->state = FNIC_IN_FC_TRANS_ETH_MODE;
1768 vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr);
1769 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
1770
1771 err = fnic_fw_reset_handler(fnic);
1772 if (err) {
1773 spin_lock_irqsave(&fnic->fnic_lock, flags);
1774 if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE)
1775 fnic->state = old_state;
1776 fnic->remove_wait = NULL;
1777 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
1778 return;
1779 }
1780
1781 /* Wait for firmware reset to complete */
1782 wait_for_completion_timeout(&remove_wait,
1783 msecs_to_jiffies(FNIC_RMDEVICE_TIMEOUT));
1784
1785 spin_lock_irqsave(&fnic->fnic_lock, flags);
1786 fnic->remove_wait = NULL;
1787 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
1788 "fnic_scsi_abort_io %s\n",
1789 (fnic->state == FNIC_IN_ETH_MODE) ?
1790 "SUCCESS" : "FAILED");
1791 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
1792
1793}
1794
1795/*
1796 * This fxn called from libFC to clean up driver IO state on link down
1797 */
1798void fnic_scsi_cleanup(struct fc_lport *lp)
1799{
1800 unsigned long flags;
1801 enum fnic_state old_state;
1802 struct fnic *fnic = lport_priv(lp);
1803
1804 /* issue fw reset */
1805 spin_lock_irqsave(&fnic->fnic_lock, flags);
1806 old_state = fnic->state;
1807 fnic->state = FNIC_IN_FC_TRANS_ETH_MODE;
1808 vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr);
1809 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
1810
1811 if (fnic_fw_reset_handler(fnic)) {
1812 spin_lock_irqsave(&fnic->fnic_lock, flags);
1813 if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE)
1814 fnic->state = old_state;
1815 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
1816 }
1817
1818}
1819
1820void fnic_empty_scsi_cleanup(struct fc_lport *lp)
1821{
1822}
1823
1824void fnic_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did)
1825{
1826 struct fnic *fnic = lport_priv(lp);
1827
1828 /* Non-zero sid, nothing to do */
1829 if (sid)
1830 goto call_fc_exch_mgr_reset;
1831
1832 if (did) {
1833 fnic_rport_exch_reset(fnic, did);
1834 goto call_fc_exch_mgr_reset;
1835 }
1836
1837 /*
1838 * sid = 0, did = 0
1839 * link down or device being removed
1840 */
1841 if (!fnic->in_remove)
1842 fnic_scsi_cleanup(lp);
1843 else
1844 fnic_scsi_abort_io(lp);
1845
1846 /* call libFC exch mgr reset to reset its exchanges */
1847call_fc_exch_mgr_reset:
1848 fc_exch_mgr_reset(lp, sid, did);
1849
1850}
diff --git a/drivers/scsi/fnic/rq_enet_desc.h b/drivers/scsi/fnic/rq_enet_desc.h
new file mode 100644
index 000000000000..92e80ae6b725
--- /dev/null
+++ b/drivers/scsi/fnic/rq_enet_desc.h
@@ -0,0 +1,58 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _RQ_ENET_DESC_H_
19#define _RQ_ENET_DESC_H_
20
21/* Ethernet receive queue descriptor: 16B */
22struct rq_enet_desc {
23 __le64 address;
24 __le16 length_type;
25 u8 reserved[6];
26};
27
28enum rq_enet_type_types {
29 RQ_ENET_TYPE_ONLY_SOP = 0,
30 RQ_ENET_TYPE_NOT_SOP = 1,
31 RQ_ENET_TYPE_RESV2 = 2,
32 RQ_ENET_TYPE_RESV3 = 3,
33};
34
35#define RQ_ENET_ADDR_BITS 64
36#define RQ_ENET_LEN_BITS 14
37#define RQ_ENET_LEN_MASK ((1 << RQ_ENET_LEN_BITS) - 1)
38#define RQ_ENET_TYPE_BITS 2
39#define RQ_ENET_TYPE_MASK ((1 << RQ_ENET_TYPE_BITS) - 1)
40
41static inline void rq_enet_desc_enc(struct rq_enet_desc *desc,
42 u64 address, u8 type, u16 length)
43{
44 desc->address = cpu_to_le64(address);
45 desc->length_type = cpu_to_le16((length & RQ_ENET_LEN_MASK) |
46 ((type & RQ_ENET_TYPE_MASK) << RQ_ENET_LEN_BITS));
47}
48
49static inline void rq_enet_desc_dec(struct rq_enet_desc *desc,
50 u64 *address, u8 *type, u16 *length)
51{
52 *address = le64_to_cpu(desc->address);
53 *length = le16_to_cpu(desc->length_type) & RQ_ENET_LEN_MASK;
54 *type = (u8)((le16_to_cpu(desc->length_type) >> RQ_ENET_LEN_BITS) &
55 RQ_ENET_TYPE_MASK);
56}
57
58#endif /* _RQ_ENET_DESC_H_ */
diff --git a/drivers/scsi/fnic/vnic_cq.c b/drivers/scsi/fnic/vnic_cq.c
new file mode 100644
index 000000000000..c5db32eda5ef
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_cq.c
@@ -0,0 +1,85 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#include <linux/errno.h>
19#include <linux/types.h>
20#include <linux/pci.h>
21#include "vnic_dev.h"
22#include "vnic_cq.h"
23
24void vnic_cq_free(struct vnic_cq *cq)
25{
26 vnic_dev_free_desc_ring(cq->vdev, &cq->ring);
27
28 cq->ctrl = NULL;
29}
30
31int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index,
32 unsigned int desc_count, unsigned int desc_size)
33{
34 int err;
35
36 cq->index = index;
37 cq->vdev = vdev;
38
39 cq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_CQ, index);
40 if (!cq->ctrl) {
41 printk(KERN_ERR "Failed to hook CQ[%d] resource\n", index);
42 return -EINVAL;
43 }
44
45 err = vnic_dev_alloc_desc_ring(vdev, &cq->ring, desc_count, desc_size);
46 if (err)
47 return err;
48
49 return 0;
50}
51
52void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable,
53 unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail,
54 unsigned int cq_tail_color, unsigned int interrupt_enable,
55 unsigned int cq_entry_enable, unsigned int cq_message_enable,
56 unsigned int interrupt_offset, u64 cq_message_addr)
57{
58 u64 paddr;
59
60 paddr = (u64)cq->ring.base_addr | VNIC_PADDR_TARGET;
61 writeq(paddr, &cq->ctrl->ring_base);
62 iowrite32(cq->ring.desc_count, &cq->ctrl->ring_size);
63 iowrite32(flow_control_enable, &cq->ctrl->flow_control_enable);
64 iowrite32(color_enable, &cq->ctrl->color_enable);
65 iowrite32(cq_head, &cq->ctrl->cq_head);
66 iowrite32(cq_tail, &cq->ctrl->cq_tail);
67 iowrite32(cq_tail_color, &cq->ctrl->cq_tail_color);
68 iowrite32(interrupt_enable, &cq->ctrl->interrupt_enable);
69 iowrite32(cq_entry_enable, &cq->ctrl->cq_entry_enable);
70 iowrite32(cq_message_enable, &cq->ctrl->cq_message_enable);
71 iowrite32(interrupt_offset, &cq->ctrl->interrupt_offset);
72 writeq(cq_message_addr, &cq->ctrl->cq_message_addr);
73}
74
75void vnic_cq_clean(struct vnic_cq *cq)
76{
77 cq->to_clean = 0;
78 cq->last_color = 0;
79
80 iowrite32(0, &cq->ctrl->cq_head);
81 iowrite32(0, &cq->ctrl->cq_tail);
82 iowrite32(1, &cq->ctrl->cq_tail_color);
83
84 vnic_dev_clear_desc_ring(&cq->ring);
85}
diff --git a/drivers/scsi/fnic/vnic_cq.h b/drivers/scsi/fnic/vnic_cq.h
new file mode 100644
index 000000000000..4ede6809fb1e
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_cq.h
@@ -0,0 +1,121 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_CQ_H_
19#define _VNIC_CQ_H_
20
21#include "cq_desc.h"
22#include "vnic_dev.h"
23
24/*
25 * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
26 * Driver) when both are built with CONFIG options =y
27 */
28#define vnic_cq_service fnic_cq_service
29#define vnic_cq_free fnic_cq_free
30#define vnic_cq_alloc fnic_cq_alloc
31#define vnic_cq_init fnic_cq_init
32#define vnic_cq_clean fnic_cq_clean
33
34/* Completion queue control */
35struct vnic_cq_ctrl {
36 u64 ring_base; /* 0x00 */
37 u32 ring_size; /* 0x08 */
38 u32 pad0;
39 u32 flow_control_enable; /* 0x10 */
40 u32 pad1;
41 u32 color_enable; /* 0x18 */
42 u32 pad2;
43 u32 cq_head; /* 0x20 */
44 u32 pad3;
45 u32 cq_tail; /* 0x28 */
46 u32 pad4;
47 u32 cq_tail_color; /* 0x30 */
48 u32 pad5;
49 u32 interrupt_enable; /* 0x38 */
50 u32 pad6;
51 u32 cq_entry_enable; /* 0x40 */
52 u32 pad7;
53 u32 cq_message_enable; /* 0x48 */
54 u32 pad8;
55 u32 interrupt_offset; /* 0x50 */
56 u32 pad9;
57 u64 cq_message_addr; /* 0x58 */
58 u32 pad10;
59};
60
61struct vnic_cq {
62 unsigned int index;
63 struct vnic_dev *vdev;
64 struct vnic_cq_ctrl __iomem *ctrl; /* memory-mapped */
65 struct vnic_dev_ring ring;
66 unsigned int to_clean;
67 unsigned int last_color;
68};
69
70static inline unsigned int vnic_cq_service(struct vnic_cq *cq,
71 unsigned int work_to_do,
72 int (*q_service)(struct vnic_dev *vdev, struct cq_desc *cq_desc,
73 u8 type, u16 q_number, u16 completed_index, void *opaque),
74 void *opaque)
75{
76 struct cq_desc *cq_desc;
77 unsigned int work_done = 0;
78 u16 q_number, completed_index;
79 u8 type, color;
80
81 cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs +
82 cq->ring.desc_size * cq->to_clean);
83 cq_desc_dec(cq_desc, &type, &color,
84 &q_number, &completed_index);
85
86 while (color != cq->last_color) {
87
88 if ((*q_service)(cq->vdev, cq_desc, type,
89 q_number, completed_index, opaque))
90 break;
91
92 cq->to_clean++;
93 if (cq->to_clean == cq->ring.desc_count) {
94 cq->to_clean = 0;
95 cq->last_color = cq->last_color ? 0 : 1;
96 }
97
98 cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs +
99 cq->ring.desc_size * cq->to_clean);
100 cq_desc_dec(cq_desc, &type, &color,
101 &q_number, &completed_index);
102
103 work_done++;
104 if (work_done >= work_to_do)
105 break;
106 }
107
108 return work_done;
109}
110
111void vnic_cq_free(struct vnic_cq *cq);
112int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index,
113 unsigned int desc_count, unsigned int desc_size);
114void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable,
115 unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail,
116 unsigned int cq_tail_color, unsigned int interrupt_enable,
117 unsigned int cq_entry_enable, unsigned int message_enable,
118 unsigned int interrupt_offset, u64 message_addr);
119void vnic_cq_clean(struct vnic_cq *cq);
120
121#endif /* _VNIC_CQ_H_ */
diff --git a/drivers/scsi/fnic/vnic_cq_copy.h b/drivers/scsi/fnic/vnic_cq_copy.h
new file mode 100644
index 000000000000..7901ce255a81
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_cq_copy.h
@@ -0,0 +1,62 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_CQ_COPY_H_
19#define _VNIC_CQ_COPY_H_
20
21#include "fcpio.h"
22
23static inline unsigned int vnic_cq_copy_service(
24 struct vnic_cq *cq,
25 int (*q_service)(struct vnic_dev *vdev,
26 unsigned int index,
27 struct fcpio_fw_req *desc),
28 unsigned int work_to_do)
29
30{
31 struct fcpio_fw_req *desc;
32 unsigned int work_done = 0;
33 u8 color;
34
35 desc = (struct fcpio_fw_req *)((u8 *)cq->ring.descs +
36 cq->ring.desc_size * cq->to_clean);
37 fcpio_color_dec(desc, &color);
38
39 while (color != cq->last_color) {
40
41 if ((*q_service)(cq->vdev, cq->index, desc))
42 break;
43
44 cq->to_clean++;
45 if (cq->to_clean == cq->ring.desc_count) {
46 cq->to_clean = 0;
47 cq->last_color = cq->last_color ? 0 : 1;
48 }
49
50 desc = (struct fcpio_fw_req *)((u8 *)cq->ring.descs +
51 cq->ring.desc_size * cq->to_clean);
52 fcpio_color_dec(desc, &color);
53
54 work_done++;
55 if (work_done >= work_to_do)
56 break;
57 }
58
59 return work_done;
60}
61
62#endif /* _VNIC_CQ_COPY_H_ */
diff --git a/drivers/scsi/fnic/vnic_dev.c b/drivers/scsi/fnic/vnic_dev.c
new file mode 100644
index 000000000000..566770645086
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_dev.c
@@ -0,0 +1,690 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18
19#include <linux/kernel.h>
20#include <linux/errno.h>
21#include <linux/types.h>
22#include <linux/pci.h>
23#include <linux/delay.h>
24#include <linux/if_ether.h>
25#include "vnic_resource.h"
26#include "vnic_devcmd.h"
27#include "vnic_dev.h"
28#include "vnic_stats.h"
29
30struct vnic_res {
31 void __iomem *vaddr;
32 unsigned int count;
33};
34
35struct vnic_dev {
36 void *priv;
37 struct pci_dev *pdev;
38 struct vnic_res res[RES_TYPE_MAX];
39 enum vnic_dev_intr_mode intr_mode;
40 struct vnic_devcmd __iomem *devcmd;
41 struct vnic_devcmd_notify *notify;
42 struct vnic_devcmd_notify notify_copy;
43 dma_addr_t notify_pa;
44 u32 *linkstatus;
45 dma_addr_t linkstatus_pa;
46 struct vnic_stats *stats;
47 dma_addr_t stats_pa;
48 struct vnic_devcmd_fw_info *fw_info;
49 dma_addr_t fw_info_pa;
50};
51
52#define VNIC_MAX_RES_HDR_SIZE \
53 (sizeof(struct vnic_resource_header) + \
54 sizeof(struct vnic_resource) * RES_TYPE_MAX)
55#define VNIC_RES_STRIDE 128
56
57void *vnic_dev_priv(struct vnic_dev *vdev)
58{
59 return vdev->priv;
60}
61
62static int vnic_dev_discover_res(struct vnic_dev *vdev,
63 struct vnic_dev_bar *bar)
64{
65 struct vnic_resource_header __iomem *rh;
66 struct vnic_resource __iomem *r;
67 u8 type;
68
69 if (bar->len < VNIC_MAX_RES_HDR_SIZE) {
70 printk(KERN_ERR "vNIC BAR0 res hdr length error\n");
71 return -EINVAL;
72 }
73
74 rh = bar->vaddr;
75 if (!rh) {
76 printk(KERN_ERR "vNIC BAR0 res hdr not mem-mapped\n");
77 return -EINVAL;
78 }
79
80 if (ioread32(&rh->magic) != VNIC_RES_MAGIC ||
81 ioread32(&rh->version) != VNIC_RES_VERSION) {
82 printk(KERN_ERR "vNIC BAR0 res magic/version error "
83 "exp (%lx/%lx) curr (%x/%x)\n",
84 VNIC_RES_MAGIC, VNIC_RES_VERSION,
85 ioread32(&rh->magic), ioread32(&rh->version));
86 return -EINVAL;
87 }
88
89 r = (struct vnic_resource __iomem *)(rh + 1);
90
91 while ((type = ioread8(&r->type)) != RES_TYPE_EOL) {
92
93 u8 bar_num = ioread8(&r->bar);
94 u32 bar_offset = ioread32(&r->bar_offset);
95 u32 count = ioread32(&r->count);
96 u32 len;
97
98 r++;
99
100 if (bar_num != 0) /* only mapping in BAR0 resources */
101 continue;
102
103 switch (type) {
104 case RES_TYPE_WQ:
105 case RES_TYPE_RQ:
106 case RES_TYPE_CQ:
107 case RES_TYPE_INTR_CTRL:
108 /* each count is stride bytes long */
109 len = count * VNIC_RES_STRIDE;
110 if (len + bar_offset > bar->len) {
111 printk(KERN_ERR "vNIC BAR0 resource %d "
112 "out-of-bounds, offset 0x%x + "
113 "size 0x%x > bar len 0x%lx\n",
114 type, bar_offset,
115 len,
116 bar->len);
117 return -EINVAL;
118 }
119 break;
120 case RES_TYPE_INTR_PBA_LEGACY:
121 case RES_TYPE_DEVCMD:
122 len = count;
123 break;
124 default:
125 continue;
126 }
127
128 vdev->res[type].count = count;
129 vdev->res[type].vaddr = (char __iomem *)bar->vaddr + bar_offset;
130 }
131
132 return 0;
133}
134
135unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev,
136 enum vnic_res_type type)
137{
138 return vdev->res[type].count;
139}
140
141void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type,
142 unsigned int index)
143{
144 if (!vdev->res[type].vaddr)
145 return NULL;
146
147 switch (type) {
148 case RES_TYPE_WQ:
149 case RES_TYPE_RQ:
150 case RES_TYPE_CQ:
151 case RES_TYPE_INTR_CTRL:
152 return (char __iomem *)vdev->res[type].vaddr +
153 index * VNIC_RES_STRIDE;
154 default:
155 return (char __iomem *)vdev->res[type].vaddr;
156 }
157}
158
159unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring,
160 unsigned int desc_count,
161 unsigned int desc_size)
162{
163 /* The base address of the desc rings must be 512 byte aligned.
164 * Descriptor count is aligned to groups of 32 descriptors. A
165 * count of 0 means the maximum 4096 descriptors. Descriptor
166 * size is aligned to 16 bytes.
167 */
168
169 unsigned int count_align = 32;
170 unsigned int desc_align = 16;
171
172 ring->base_align = 512;
173
174 if (desc_count == 0)
175 desc_count = 4096;
176
177 ring->desc_count = ALIGN(desc_count, count_align);
178
179 ring->desc_size = ALIGN(desc_size, desc_align);
180
181 ring->size = ring->desc_count * ring->desc_size;
182 ring->size_unaligned = ring->size + ring->base_align;
183
184 return ring->size_unaligned;
185}
186
187void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring)
188{
189 memset(ring->descs, 0, ring->size);
190}
191
192int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring,
193 unsigned int desc_count, unsigned int desc_size)
194{
195 vnic_dev_desc_ring_size(ring, desc_count, desc_size);
196
197 ring->descs_unaligned = pci_alloc_consistent(vdev->pdev,
198 ring->size_unaligned,
199 &ring->base_addr_unaligned);
200
201 if (!ring->descs_unaligned) {
202 printk(KERN_ERR
203 "Failed to allocate ring (size=%d), aborting\n",
204 (int)ring->size);
205 return -ENOMEM;
206 }
207
208 ring->base_addr = ALIGN(ring->base_addr_unaligned,
209 ring->base_align);
210 ring->descs = (u8 *)ring->descs_unaligned +
211 (ring->base_addr - ring->base_addr_unaligned);
212
213 vnic_dev_clear_desc_ring(ring);
214
215 ring->desc_avail = ring->desc_count - 1;
216
217 return 0;
218}
219
220void vnic_dev_free_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring)
221{
222 if (ring->descs) {
223 pci_free_consistent(vdev->pdev,
224 ring->size_unaligned,
225 ring->descs_unaligned,
226 ring->base_addr_unaligned);
227 ring->descs = NULL;
228 }
229}
230
231int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
232 u64 *a0, u64 *a1, int wait)
233{
234 struct vnic_devcmd __iomem *devcmd = vdev->devcmd;
235 int delay;
236 u32 status;
237 int dev_cmd_err[] = {
238 /* convert from fw's version of error.h to host's version */
239 0, /* ERR_SUCCESS */
240 EINVAL, /* ERR_EINVAL */
241 EFAULT, /* ERR_EFAULT */
242 EPERM, /* ERR_EPERM */
243 EBUSY, /* ERR_EBUSY */
244 };
245 int err;
246
247 status = ioread32(&devcmd->status);
248 if (status & STAT_BUSY) {
249 printk(KERN_ERR "Busy devcmd %d\n", _CMD_N(cmd));
250 return -EBUSY;
251 }
252
253 if (_CMD_DIR(cmd) & _CMD_DIR_WRITE) {
254 writeq(*a0, &devcmd->args[0]);
255 writeq(*a1, &devcmd->args[1]);
256 wmb();
257 }
258
259 iowrite32(cmd, &devcmd->cmd);
260
261 if ((_CMD_FLAGS(cmd) & _CMD_FLAGS_NOWAIT))
262 return 0;
263
264 for (delay = 0; delay < wait; delay++) {
265
266 udelay(100);
267
268 status = ioread32(&devcmd->status);
269 if (!(status & STAT_BUSY)) {
270
271 if (status & STAT_ERROR) {
272 err = dev_cmd_err[(int)readq(&devcmd->args[0])];
273 printk(KERN_ERR "Error %d devcmd %d\n",
274 err, _CMD_N(cmd));
275 return -err;
276 }
277
278 if (_CMD_DIR(cmd) & _CMD_DIR_READ) {
279 rmb();
280 *a0 = readq(&devcmd->args[0]);
281 *a1 = readq(&devcmd->args[1]);
282 }
283
284 return 0;
285 }
286 }
287
288 printk(KERN_ERR "Timedout devcmd %d\n", _CMD_N(cmd));
289 return -ETIMEDOUT;
290}
291
292int vnic_dev_fw_info(struct vnic_dev *vdev,
293 struct vnic_devcmd_fw_info **fw_info)
294{
295 u64 a0, a1 = 0;
296 int wait = 1000;
297 int err = 0;
298
299 if (!vdev->fw_info) {
300 vdev->fw_info = pci_alloc_consistent(vdev->pdev,
301 sizeof(struct vnic_devcmd_fw_info),
302 &vdev->fw_info_pa);
303 if (!vdev->fw_info)
304 return -ENOMEM;
305
306 a0 = vdev->fw_info_pa;
307
308 /* only get fw_info once and cache it */
309 err = vnic_dev_cmd(vdev, CMD_MCPU_FW_INFO, &a0, &a1, wait);
310 }
311
312 *fw_info = vdev->fw_info;
313
314 return err;
315}
316
317int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, unsigned int size,
318 void *value)
319{
320 u64 a0, a1;
321 int wait = 1000;
322 int err;
323
324 a0 = offset;
325 a1 = size;
326
327 err = vnic_dev_cmd(vdev, CMD_DEV_SPEC, &a0, &a1, wait);
328
329 switch (size) {
330 case 1:
331 *(u8 *)value = (u8)a0;
332 break;
333 case 2:
334 *(u16 *)value = (u16)a0;
335 break;
336 case 4:
337 *(u32 *)value = (u32)a0;
338 break;
339 case 8:
340 *(u64 *)value = a0;
341 break;
342 default:
343 BUG();
344 break;
345 }
346
347 return err;
348}
349
350int vnic_dev_stats_clear(struct vnic_dev *vdev)
351{
352 u64 a0 = 0, a1 = 0;
353 int wait = 1000;
354 return vnic_dev_cmd(vdev, CMD_STATS_CLEAR, &a0, &a1, wait);
355}
356
357int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats)
358{
359 u64 a0, a1;
360 int wait = 1000;
361
362 if (!vdev->stats) {
363 vdev->stats = pci_alloc_consistent(vdev->pdev,
364 sizeof(struct vnic_stats), &vdev->stats_pa);
365 if (!vdev->stats)
366 return -ENOMEM;
367 }
368
369 *stats = vdev->stats;
370 a0 = vdev->stats_pa;
371 a1 = sizeof(struct vnic_stats);
372
373 return vnic_dev_cmd(vdev, CMD_STATS_DUMP, &a0, &a1, wait);
374}
375
376int vnic_dev_close(struct vnic_dev *vdev)
377{
378 u64 a0 = 0, a1 = 0;
379 int wait = 1000;
380 return vnic_dev_cmd(vdev, CMD_CLOSE, &a0, &a1, wait);
381}
382
383int vnic_dev_enable(struct vnic_dev *vdev)
384{
385 u64 a0 = 0, a1 = 0;
386 int wait = 1000;
387 return vnic_dev_cmd(vdev, CMD_ENABLE, &a0, &a1, wait);
388}
389
390int vnic_dev_disable(struct vnic_dev *vdev)
391{
392 u64 a0 = 0, a1 = 0;
393 int wait = 1000;
394 return vnic_dev_cmd(vdev, CMD_DISABLE, &a0, &a1, wait);
395}
396
397int vnic_dev_open(struct vnic_dev *vdev, int arg)
398{
399 u64 a0 = (u32)arg, a1 = 0;
400 int wait = 1000;
401 return vnic_dev_cmd(vdev, CMD_OPEN, &a0, &a1, wait);
402}
403
404int vnic_dev_open_done(struct vnic_dev *vdev, int *done)
405{
406 u64 a0 = 0, a1 = 0;
407 int wait = 1000;
408 int err;
409
410 *done = 0;
411
412 err = vnic_dev_cmd(vdev, CMD_OPEN_STATUS, &a0, &a1, wait);
413 if (err)
414 return err;
415
416 *done = (a0 == 0);
417
418 return 0;
419}
420
421int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg)
422{
423 u64 a0 = (u32)arg, a1 = 0;
424 int wait = 1000;
425 return vnic_dev_cmd(vdev, CMD_SOFT_RESET, &a0, &a1, wait);
426}
427
428int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done)
429{
430 u64 a0 = 0, a1 = 0;
431 int wait = 1000;
432 int err;
433
434 *done = 0;
435
436 err = vnic_dev_cmd(vdev, CMD_SOFT_RESET_STATUS, &a0, &a1, wait);
437 if (err)
438 return err;
439
440 *done = (a0 == 0);
441
442 return 0;
443}
444
445int vnic_dev_hang_notify(struct vnic_dev *vdev)
446{
447 u64 a0, a1;
448 int wait = 1000;
449 return vnic_dev_cmd(vdev, CMD_HANG_NOTIFY, &a0, &a1, wait);
450}
451
452int vnic_dev_mac_addr(struct vnic_dev *vdev, u8 *mac_addr)
453{
454 u64 a0, a1;
455 int wait = 1000;
456 int err, i;
457
458 for (i = 0; i < ETH_ALEN; i++)
459 mac_addr[i] = 0;
460
461 err = vnic_dev_cmd(vdev, CMD_MAC_ADDR, &a0, &a1, wait);
462 if (err)
463 return err;
464
465 for (i = 0; i < ETH_ALEN; i++)
466 mac_addr[i] = ((u8 *)&a0)[i];
467
468 return 0;
469}
470
471void vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast,
472 int broadcast, int promisc, int allmulti)
473{
474 u64 a0, a1 = 0;
475 int wait = 1000;
476 int err;
477
478 a0 = (directed ? CMD_PFILTER_DIRECTED : 0) |
479 (multicast ? CMD_PFILTER_MULTICAST : 0) |
480 (broadcast ? CMD_PFILTER_BROADCAST : 0) |
481 (promisc ? CMD_PFILTER_PROMISCUOUS : 0) |
482 (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0);
483
484 err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait);
485 if (err)
486 printk(KERN_ERR "Can't set packet filter\n");
487}
488
489void vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr)
490{
491 u64 a0 = 0, a1 = 0;
492 int wait = 1000;
493 int err;
494 int i;
495
496 for (i = 0; i < ETH_ALEN; i++)
497 ((u8 *)&a0)[i] = addr[i];
498
499 err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait);
500 if (err)
501 printk(KERN_ERR
502 "Can't add addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n",
503 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5],
504 err);
505}
506
507void vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr)
508{
509 u64 a0 = 0, a1 = 0;
510 int wait = 1000;
511 int err;
512 int i;
513
514 for (i = 0; i < ETH_ALEN; i++)
515 ((u8 *)&a0)[i] = addr[i];
516
517 err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a0, &a1, wait);
518 if (err)
519 printk(KERN_ERR
520 "Can't del addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n",
521 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5],
522 err);
523}
524
525int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr)
526{
527 u64 a0, a1;
528 int wait = 1000;
529
530 if (!vdev->notify) {
531 vdev->notify = pci_alloc_consistent(vdev->pdev,
532 sizeof(struct vnic_devcmd_notify),
533 &vdev->notify_pa);
534 if (!vdev->notify)
535 return -ENOMEM;
536 }
537
538 a0 = vdev->notify_pa;
539 a1 = ((u64)intr << 32) & 0x0000ffff00000000ULL;
540 a1 += sizeof(struct vnic_devcmd_notify);
541
542 return vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait);
543}
544
545void vnic_dev_notify_unset(struct vnic_dev *vdev)
546{
547 u64 a0, a1;
548 int wait = 1000;
549
550 a0 = 0; /* paddr = 0 to unset notify buffer */
551 a1 = 0x0000ffff00000000ULL; /* intr num = -1 to unreg for intr */
552 a1 += sizeof(struct vnic_devcmd_notify);
553
554 vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait);
555}
556
557static int vnic_dev_notify_ready(struct vnic_dev *vdev)
558{
559 u32 *words;
560 unsigned int nwords = sizeof(struct vnic_devcmd_notify) / 4;
561 unsigned int i;
562 u32 csum;
563
564 if (!vdev->notify)
565 return 0;
566
567 do {
568 csum = 0;
569 memcpy(&vdev->notify_copy, vdev->notify,
570 sizeof(struct vnic_devcmd_notify));
571 words = (u32 *)&vdev->notify_copy;
572 for (i = 1; i < nwords; i++)
573 csum += words[i];
574 } while (csum != words[0]);
575
576 return 1;
577}
578
579int vnic_dev_init(struct vnic_dev *vdev, int arg)
580{
581 u64 a0 = (u32)arg, a1 = 0;
582 int wait = 1000;
583 return vnic_dev_cmd(vdev, CMD_INIT, &a0, &a1, wait);
584}
585
586int vnic_dev_link_status(struct vnic_dev *vdev)
587{
588 if (vdev->linkstatus)
589 return *vdev->linkstatus;
590
591 if (!vnic_dev_notify_ready(vdev))
592 return 0;
593
594 return vdev->notify_copy.link_state;
595}
596
597u32 vnic_dev_port_speed(struct vnic_dev *vdev)
598{
599 if (!vnic_dev_notify_ready(vdev))
600 return 0;
601
602 return vdev->notify_copy.port_speed;
603}
604
605u32 vnic_dev_msg_lvl(struct vnic_dev *vdev)
606{
607 if (!vnic_dev_notify_ready(vdev))
608 return 0;
609
610 return vdev->notify_copy.msglvl;
611}
612
613u32 vnic_dev_mtu(struct vnic_dev *vdev)
614{
615 if (!vnic_dev_notify_ready(vdev))
616 return 0;
617
618 return vdev->notify_copy.mtu;
619}
620
621u32 vnic_dev_link_down_cnt(struct vnic_dev *vdev)
622{
623 if (!vnic_dev_notify_ready(vdev))
624 return 0;
625
626 return vdev->notify_copy.link_down_cnt;
627}
628
629void vnic_dev_set_intr_mode(struct vnic_dev *vdev,
630 enum vnic_dev_intr_mode intr_mode)
631{
632 vdev->intr_mode = intr_mode;
633}
634
635enum vnic_dev_intr_mode vnic_dev_get_intr_mode(
636 struct vnic_dev *vdev)
637{
638 return vdev->intr_mode;
639}
640
641void vnic_dev_unregister(struct vnic_dev *vdev)
642{
643 if (vdev) {
644 if (vdev->notify)
645 pci_free_consistent(vdev->pdev,
646 sizeof(struct vnic_devcmd_notify),
647 vdev->notify,
648 vdev->notify_pa);
649 if (vdev->linkstatus)
650 pci_free_consistent(vdev->pdev,
651 sizeof(u32),
652 vdev->linkstatus,
653 vdev->linkstatus_pa);
654 if (vdev->stats)
655 pci_free_consistent(vdev->pdev,
656 sizeof(struct vnic_dev),
657 vdev->stats, vdev->stats_pa);
658 if (vdev->fw_info)
659 pci_free_consistent(vdev->pdev,
660 sizeof(struct vnic_devcmd_fw_info),
661 vdev->fw_info, vdev->fw_info_pa);
662 kfree(vdev);
663 }
664}
665
666struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
667 void *priv, struct pci_dev *pdev, struct vnic_dev_bar *bar)
668{
669 if (!vdev) {
670 vdev = kzalloc(sizeof(struct vnic_dev), GFP_KERNEL);
671 if (!vdev)
672 return NULL;
673 }
674
675 vdev->priv = priv;
676 vdev->pdev = pdev;
677
678 if (vnic_dev_discover_res(vdev, bar))
679 goto err_out;
680
681 vdev->devcmd = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD, 0);
682 if (!vdev->devcmd)
683 goto err_out;
684
685 return vdev;
686
687err_out:
688 vnic_dev_unregister(vdev);
689 return NULL;
690}
diff --git a/drivers/scsi/fnic/vnic_dev.h b/drivers/scsi/fnic/vnic_dev.h
new file mode 100644
index 000000000000..f9935a8a5a09
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_dev.h
@@ -0,0 +1,161 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_DEV_H_
19#define _VNIC_DEV_H_
20
21#include "vnic_resource.h"
22#include "vnic_devcmd.h"
23
24/*
25 * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
26 * Driver) when both are built with CONFIG options =y
27 */
28#define vnic_dev_priv fnic_dev_priv
29#define vnic_dev_get_res_count fnic_dev_get_res_count
30#define vnic_dev_get_res fnic_dev_get_res
31#define vnic_dev_desc_ring_size fnic_dev_desc_ring_siz
32#define vnic_dev_clear_desc_ring fnic_dev_clear_desc_ring
33#define vnic_dev_alloc_desc_ring fnic_dev_alloc_desc_ring
34#define vnic_dev_free_desc_ring fnic_dev_free_desc_ring
35#define vnic_dev_cmd fnic_dev_cmd
36#define vnic_dev_fw_info fnic_dev_fw_info
37#define vnic_dev_spec fnic_dev_spec
38#define vnic_dev_stats_clear fnic_dev_stats_clear
39#define vnic_dev_stats_dump fnic_dev_stats_dump
40#define vnic_dev_hang_notify fnic_dev_hang_notify
41#define vnic_dev_packet_filter fnic_dev_packet_filter
42#define vnic_dev_add_addr fnic_dev_add_addr
43#define vnic_dev_del_addr fnic_dev_del_addr
44#define vnic_dev_mac_addr fnic_dev_mac_addr
45#define vnic_dev_notify_set fnic_dev_notify_set
46#define vnic_dev_notify_unset fnic_dev_notify_unset
47#define vnic_dev_link_status fnic_dev_link_status
48#define vnic_dev_port_speed fnic_dev_port_speed
49#define vnic_dev_msg_lvl fnic_dev_msg_lvl
50#define vnic_dev_mtu fnic_dev_mtu
51#define vnic_dev_link_down_cnt fnic_dev_link_down_cnt
52#define vnic_dev_close fnic_dev_close
53#define vnic_dev_enable fnic_dev_enable
54#define vnic_dev_disable fnic_dev_disable
55#define vnic_dev_open fnic_dev_open
56#define vnic_dev_open_done fnic_dev_open_done
57#define vnic_dev_init fnic_dev_init
58#define vnic_dev_soft_reset fnic_dev_soft_reset
59#define vnic_dev_soft_reset_done fnic_dev_soft_reset_done
60#define vnic_dev_set_intr_mode fnic_dev_set_intr_mode
61#define vnic_dev_get_intr_mode fnic_dev_get_intr_mode
62#define vnic_dev_unregister fnic_dev_unregister
63#define vnic_dev_register fnic_dev_register
64
65#ifndef VNIC_PADDR_TARGET
66#define VNIC_PADDR_TARGET 0x0000000000000000ULL
67#endif
68
69#ifndef readq
70static inline u64 readq(void __iomem *reg)
71{
72 return ((u64)readl(reg + 0x4UL) << 32) | (u64)readl(reg);
73}
74
75static inline void writeq(u64 val, void __iomem *reg)
76{
77 writel(val & 0xffffffff, reg);
78 writel(val >> 32, reg + 0x4UL);
79}
80#endif
81
82enum vnic_dev_intr_mode {
83 VNIC_DEV_INTR_MODE_UNKNOWN,
84 VNIC_DEV_INTR_MODE_INTX,
85 VNIC_DEV_INTR_MODE_MSI,
86 VNIC_DEV_INTR_MODE_MSIX,
87};
88
89struct vnic_dev_bar {
90 void __iomem *vaddr;
91 dma_addr_t bus_addr;
92 unsigned long len;
93};
94
95struct vnic_dev_ring {
96 void *descs;
97 size_t size;
98 dma_addr_t base_addr;
99 size_t base_align;
100 void *descs_unaligned;
101 size_t size_unaligned;
102 dma_addr_t base_addr_unaligned;
103 unsigned int desc_size;
104 unsigned int desc_count;
105 unsigned int desc_avail;
106};
107
108struct vnic_dev;
109struct vnic_stats;
110
111void *vnic_dev_priv(struct vnic_dev *vdev);
112unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev,
113 enum vnic_res_type type);
114void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type,
115 unsigned int index);
116unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring,
117 unsigned int desc_count,
118 unsigned int desc_size);
119void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring);
120int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring,
121 unsigned int desc_count, unsigned int desc_size);
122void vnic_dev_free_desc_ring(struct vnic_dev *vdev,
123 struct vnic_dev_ring *ring);
124int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
125 u64 *a0, u64 *a1, int wait);
126int vnic_dev_fw_info(struct vnic_dev *vdev,
127 struct vnic_devcmd_fw_info **fw_info);
128int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset,
129 unsigned int size, void *value);
130int vnic_dev_stats_clear(struct vnic_dev *vdev);
131int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats);
132int vnic_dev_hang_notify(struct vnic_dev *vdev);
133void vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast,
134 int broadcast, int promisc, int allmulti);
135void vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr);
136void vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr);
137int vnic_dev_mac_addr(struct vnic_dev *vdev, u8 *mac_addr);
138int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr);
139void vnic_dev_notify_unset(struct vnic_dev *vdev);
140int vnic_dev_link_status(struct vnic_dev *vdev);
141u32 vnic_dev_port_speed(struct vnic_dev *vdev);
142u32 vnic_dev_msg_lvl(struct vnic_dev *vdev);
143u32 vnic_dev_mtu(struct vnic_dev *vdev);
144u32 vnic_dev_link_down_cnt(struct vnic_dev *vdev);
145int vnic_dev_close(struct vnic_dev *vdev);
146int vnic_dev_enable(struct vnic_dev *vdev);
147int vnic_dev_disable(struct vnic_dev *vdev);
148int vnic_dev_open(struct vnic_dev *vdev, int arg);
149int vnic_dev_open_done(struct vnic_dev *vdev, int *done);
150int vnic_dev_init(struct vnic_dev *vdev, int arg);
151int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg);
152int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done);
153void vnic_dev_set_intr_mode(struct vnic_dev *vdev,
154 enum vnic_dev_intr_mode intr_mode);
155enum vnic_dev_intr_mode vnic_dev_get_intr_mode(struct vnic_dev *vdev);
156void vnic_dev_unregister(struct vnic_dev *vdev);
157struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
158 void *priv, struct pci_dev *pdev,
159 struct vnic_dev_bar *bar);
160
161#endif /* _VNIC_DEV_H_ */
diff --git a/drivers/scsi/fnic/vnic_devcmd.h b/drivers/scsi/fnic/vnic_devcmd.h
new file mode 100644
index 000000000000..d62b9061bf12
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_devcmd.h
@@ -0,0 +1,281 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_DEVCMD_H_
19#define _VNIC_DEVCMD_H_
20
21#define _CMD_NBITS 14
22#define _CMD_VTYPEBITS 10
23#define _CMD_FLAGSBITS 6
24#define _CMD_DIRBITS 2
25
26#define _CMD_NMASK ((1 << _CMD_NBITS)-1)
27#define _CMD_VTYPEMASK ((1 << _CMD_VTYPEBITS)-1)
28#define _CMD_FLAGSMASK ((1 << _CMD_FLAGSBITS)-1)
29#define _CMD_DIRMASK ((1 << _CMD_DIRBITS)-1)
30
31#define _CMD_NSHIFT 0
32#define _CMD_VTYPESHIFT (_CMD_NSHIFT+_CMD_NBITS)
33#define _CMD_FLAGSSHIFT (_CMD_VTYPESHIFT+_CMD_VTYPEBITS)
34#define _CMD_DIRSHIFT (_CMD_FLAGSSHIFT+_CMD_FLAGSBITS)
35
36/*
37 * Direction bits (from host perspective).
38 */
39#define _CMD_DIR_NONE 0U
40#define _CMD_DIR_WRITE 1U
41#define _CMD_DIR_READ 2U
42#define _CMD_DIR_RW (_CMD_DIR_WRITE | _CMD_DIR_READ)
43
44/*
45 * Flag bits.
46 */
47#define _CMD_FLAGS_NONE 0U
48#define _CMD_FLAGS_NOWAIT 1U
49
50/*
51 * vNIC type bits.
52 */
53#define _CMD_VTYPE_NONE 0U
54#define _CMD_VTYPE_ENET 1U
55#define _CMD_VTYPE_FC 2U
56#define _CMD_VTYPE_SCSI 4U
57#define _CMD_VTYPE_ALL (_CMD_VTYPE_ENET | _CMD_VTYPE_FC | _CMD_VTYPE_SCSI)
58
59/*
60 * Used to create cmds..
61*/
62#define _CMDCF(dir, flags, vtype, nr) \
63 (((dir) << _CMD_DIRSHIFT) | \
64 ((flags) << _CMD_FLAGSSHIFT) | \
65 ((vtype) << _CMD_VTYPESHIFT) | \
66 ((nr) << _CMD_NSHIFT))
67#define _CMDC(dir, vtype, nr) _CMDCF(dir, 0, vtype, nr)
68#define _CMDCNW(dir, vtype, nr) _CMDCF(dir, _CMD_FLAGS_NOWAIT, vtype, nr)
69
70/*
71 * Used to decode cmds..
72*/
73#define _CMD_DIR(cmd) (((cmd) >> _CMD_DIRSHIFT) & _CMD_DIRMASK)
74#define _CMD_FLAGS(cmd) (((cmd) >> _CMD_FLAGSSHIFT) & _CMD_FLAGSMASK)
75#define _CMD_VTYPE(cmd) (((cmd) >> _CMD_VTYPESHIFT) & _CMD_VTYPEMASK)
76#define _CMD_N(cmd) (((cmd) >> _CMD_NSHIFT) & _CMD_NMASK)
77
78enum vnic_devcmd_cmd {
79 CMD_NONE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_NONE, 0),
80
81 /* mcpu fw info in mem: (u64)a0=paddr to struct vnic_devcmd_fw_info */
82 CMD_MCPU_FW_INFO = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 1),
83
84 /* dev-specific block member:
85 * in: (u16)a0=offset,(u8)a1=size
86 * out: a0=value */
87 CMD_DEV_SPEC = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 2),
88
89 /* stats clear */
90 CMD_STATS_CLEAR = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 3),
91
92 /* stats dump in mem: (u64)a0=paddr to stats area,
93 * (u16)a1=sizeof stats area */
94 CMD_STATS_DUMP = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 4),
95
96 /* set Rx packet filter: (u32)a0=filters (see CMD_PFILTER_*) */
97 CMD_PACKET_FILTER = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 7),
98
99 /* hang detection notification */
100 CMD_HANG_NOTIFY = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 8),
101
102 /* MAC address in (u48)a0 */
103 CMD_MAC_ADDR = _CMDC(_CMD_DIR_READ,
104 _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 9),
105
106 /* disable/enable promisc mode: (u8)a0=0/1 */
107/***** XXX DEPRECATED *****/
108 CMD_PROMISC_MODE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 10),
109
110 /* disable/enable all-multi mode: (u8)a0=0/1 */
111/***** XXX DEPRECATED *****/
112 CMD_ALLMULTI_MODE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 11),
113
114 /* add addr from (u48)a0 */
115 CMD_ADDR_ADD = _CMDCNW(_CMD_DIR_WRITE,
116 _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 12),
117
118 /* del addr from (u48)a0 */
119 CMD_ADDR_DEL = _CMDCNW(_CMD_DIR_WRITE,
120 _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 13),
121
122 /* add VLAN id in (u16)a0 */
123 CMD_VLAN_ADD = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 14),
124
125 /* del VLAN id in (u16)a0 */
126 CMD_VLAN_DEL = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 15),
127
128 /* nic_cfg in (u32)a0 */
129 CMD_NIC_CFG = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 16),
130
131 /* union vnic_rss_key in mem: (u64)a0=paddr, (u16)a1=len */
132 CMD_RSS_KEY = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 17),
133
134 /* union vnic_rss_cpu in mem: (u64)a0=paddr, (u16)a1=len */
135 CMD_RSS_CPU = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 18),
136
137 /* initiate softreset */
138 CMD_SOFT_RESET = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 19),
139
140 /* softreset status:
141 * out: a0=0 reset complete, a0=1 reset in progress */
142 CMD_SOFT_RESET_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 20),
143
144 /* set struct vnic_devcmd_notify buffer in mem:
145 * in:
146 * (u64)a0=paddr to notify (set paddr=0 to unset)
147 * (u32)a1 & 0x00000000ffffffff=sizeof(struct vnic_devcmd_notify)
148 * (u16)a1 & 0x0000ffff00000000=intr num (-1 for no intr)
149 * out:
150 * (u32)a1 = effective size
151 */
152 CMD_NOTIFY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 21),
153
154 /* UNDI API: (u64)a0=paddr to s_PXENV_UNDI_ struct,
155 * (u8)a1=PXENV_UNDI_xxx */
156 CMD_UNDI = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 22),
157
158 /* initiate open sequence (u32)a0=flags (see CMD_OPENF_*) */
159 CMD_OPEN = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 23),
160
161 /* open status:
162 * out: a0=0 open complete, a0=1 open in progress */
163 CMD_OPEN_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 24),
164
165 /* close vnic */
166 CMD_CLOSE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 25),
167
168 /* initialize virtual link: (u32)a0=flags (see CMD_INITF_*) */
169 CMD_INIT = _CMDCNW(_CMD_DIR_READ, _CMD_VTYPE_ALL, 26),
170
171 /* variant of CMD_INIT, with provisioning info
172 * (u64)a0=paddr of vnic_devcmd_provinfo
173 * (u32)a1=sizeof provision info */
174 CMD_INIT_PROV_INFO = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 27),
175
176 /* enable virtual link */
177 CMD_ENABLE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 28),
178
179 /* disable virtual link */
180 CMD_DISABLE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 29),
181
182 /* stats dump all vnics on uplink in mem: (u64)a0=paddr (u32)a1=uif */
183 CMD_STATS_DUMP_ALL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 30),
184
185 /* init status:
186 * out: a0=0 init complete, a0=1 init in progress
187 * if a0=0, a1=errno */
188 CMD_INIT_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 31),
189
190 /* INT13 API: (u64)a0=paddr to vnic_int13_params struct
191 * (u8)a1=INT13_CMD_xxx */
192 CMD_INT13 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_FC, 32),
193
194 /* logical uplink enable/disable: (u64)a0: 0/1=disable/enable */
195 CMD_LOGICAL_UPLINK = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 33),
196
197 /* undo initialize of virtual link */
198 CMD_DEINIT = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 34),
199};
200
201/* flags for CMD_OPEN */
202#define CMD_OPENF_OPROM 0x1 /* open coming from option rom */
203
204/* flags for CMD_INIT */
205#define CMD_INITF_DEFAULT_MAC 0x1 /* init with default mac addr */
206
207/* flags for CMD_PACKET_FILTER */
208#define CMD_PFILTER_DIRECTED 0x01
209#define CMD_PFILTER_MULTICAST 0x02
210#define CMD_PFILTER_BROADCAST 0x04
211#define CMD_PFILTER_PROMISCUOUS 0x08
212#define CMD_PFILTER_ALL_MULTICAST 0x10
213
214enum vnic_devcmd_status {
215 STAT_NONE = 0,
216 STAT_BUSY = 1 << 0, /* cmd in progress */
217 STAT_ERROR = 1 << 1, /* last cmd caused error (code in a0) */
218};
219
220enum vnic_devcmd_error {
221 ERR_SUCCESS = 0,
222 ERR_EINVAL = 1,
223 ERR_EFAULT = 2,
224 ERR_EPERM = 3,
225 ERR_EBUSY = 4,
226 ERR_ECMDUNKNOWN = 5,
227 ERR_EBADSTATE = 6,
228 ERR_ENOMEM = 7,
229 ERR_ETIMEDOUT = 8,
230 ERR_ELINKDOWN = 9,
231};
232
233struct vnic_devcmd_fw_info {
234 char fw_version[32];
235 char fw_build[32];
236 char hw_version[32];
237 char hw_serial_number[32];
238};
239
240struct vnic_devcmd_notify {
241 u32 csum; /* checksum over following words */
242
243 u32 link_state; /* link up == 1 */
244 u32 port_speed; /* effective port speed (rate limit) */
245 u32 mtu; /* MTU */
246 u32 msglvl; /* requested driver msg lvl */
247 u32 uif; /* uplink interface */
248 u32 status; /* status bits (see VNIC_STF_*) */
249 u32 error; /* error code (see ERR_*) for first ERR */
250 u32 link_down_cnt; /* running count of link down transitions */
251};
252#define VNIC_STF_FATAL_ERR 0x0001 /* fatal fw error */
253
254struct vnic_devcmd_provinfo {
255 u8 oui[3];
256 u8 type;
257 u8 data[0];
258};
259
260/*
261 * Writing cmd register causes STAT_BUSY to get set in status register.
262 * When cmd completes, STAT_BUSY will be cleared.
263 *
264 * If cmd completed successfully STAT_ERROR will be clear
265 * and args registers contain cmd-specific results.
266 *
267 * If cmd error, STAT_ERROR will be set and args[0] contains error code.
268 *
269 * status register is read-only. While STAT_BUSY is set,
270 * all other register contents are read-only.
271 */
272
273/* Make sizeof(vnic_devcmd) a power-of-2 for I/O BAR. */
274#define VNIC_DEVCMD_NARGS 15
275struct vnic_devcmd {
276 u32 status; /* RO */
277 u32 cmd; /* RW */
278 u64 args[VNIC_DEVCMD_NARGS]; /* RW cmd args (little-endian) */
279};
280
281#endif /* _VNIC_DEVCMD_H_ */
diff --git a/drivers/scsi/fnic/vnic_intr.c b/drivers/scsi/fnic/vnic_intr.c
new file mode 100644
index 000000000000..4f4dc8793d23
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_intr.c
@@ -0,0 +1,60 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18
19#include <linux/kernel.h>
20#include <linux/errno.h>
21#include <linux/types.h>
22#include <linux/pci.h>
23#include <linux/delay.h>
24#include "vnic_dev.h"
25#include "vnic_intr.h"
26
27void vnic_intr_free(struct vnic_intr *intr)
28{
29 intr->ctrl = NULL;
30}
31
32int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr,
33 unsigned int index)
34{
35 intr->index = index;
36 intr->vdev = vdev;
37
38 intr->ctrl = vnic_dev_get_res(vdev, RES_TYPE_INTR_CTRL, index);
39 if (!intr->ctrl) {
40 printk(KERN_ERR "Failed to hook INTR[%d].ctrl resource\n",
41 index);
42 return -EINVAL;
43 }
44
45 return 0;
46}
47
48void vnic_intr_init(struct vnic_intr *intr, unsigned int coalescing_timer,
49 unsigned int coalescing_type, unsigned int mask_on_assertion)
50{
51 iowrite32(coalescing_timer, &intr->ctrl->coalescing_timer);
52 iowrite32(coalescing_type, &intr->ctrl->coalescing_type);
53 iowrite32(mask_on_assertion, &intr->ctrl->mask_on_assertion);
54 iowrite32(0, &intr->ctrl->int_credits);
55}
56
57void vnic_intr_clean(struct vnic_intr *intr)
58{
59 iowrite32(0, &intr->ctrl->int_credits);
60}
diff --git a/drivers/scsi/fnic/vnic_intr.h b/drivers/scsi/fnic/vnic_intr.h
new file mode 100644
index 000000000000..d5fb40e7c98e
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_intr.h
@@ -0,0 +1,118 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_INTR_H_
19#define _VNIC_INTR_H_
20
21#include <linux/pci.h>
22#include "vnic_dev.h"
23
24/*
25 * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
26 * Driver) when both are built with CONFIG options =y
27 */
28#define vnic_intr_unmask fnic_intr_unmask
29#define vnic_intr_mask fnic_intr_mask
30#define vnic_intr_return_credits fnic_intr_return_credits
31#define vnic_intr_credits fnic_intr_credits
32#define vnic_intr_return_all_credits fnic_intr_return_all_credits
33#define vnic_intr_legacy_pba fnic_intr_legacy_pba
34#define vnic_intr_free fnic_intr_free
35#define vnic_intr_alloc fnic_intr_alloc
36#define vnic_intr_init fnic_intr_init
37#define vnic_intr_clean fnic_intr_clean
38
39#define VNIC_INTR_TIMER_MAX 0xffff
40
41#define VNIC_INTR_TIMER_TYPE_ABS 0
42#define VNIC_INTR_TIMER_TYPE_QUIET 1
43
44/* Interrupt control */
45struct vnic_intr_ctrl {
46 u32 coalescing_timer; /* 0x00 */
47 u32 pad0;
48 u32 coalescing_value; /* 0x08 */
49 u32 pad1;
50 u32 coalescing_type; /* 0x10 */
51 u32 pad2;
52 u32 mask_on_assertion; /* 0x18 */
53 u32 pad3;
54 u32 mask; /* 0x20 */
55 u32 pad4;
56 u32 int_credits; /* 0x28 */
57 u32 pad5;
58 u32 int_credit_return; /* 0x30 */
59 u32 pad6;
60};
61
62struct vnic_intr {
63 unsigned int index;
64 struct vnic_dev *vdev;
65 struct vnic_intr_ctrl __iomem *ctrl; /* memory-mapped */
66};
67
68static inline void vnic_intr_unmask(struct vnic_intr *intr)
69{
70 iowrite32(0, &intr->ctrl->mask);
71}
72
73static inline void vnic_intr_mask(struct vnic_intr *intr)
74{
75 iowrite32(1, &intr->ctrl->mask);
76}
77
78static inline void vnic_intr_return_credits(struct vnic_intr *intr,
79 unsigned int credits, int unmask, int reset_timer)
80{
81#define VNIC_INTR_UNMASK_SHIFT 16
82#define VNIC_INTR_RESET_TIMER_SHIFT 17
83
84 u32 int_credit_return = (credits & 0xffff) |
85 (unmask ? (1 << VNIC_INTR_UNMASK_SHIFT) : 0) |
86 (reset_timer ? (1 << VNIC_INTR_RESET_TIMER_SHIFT) : 0);
87
88 iowrite32(int_credit_return, &intr->ctrl->int_credit_return);
89}
90
91static inline unsigned int vnic_intr_credits(struct vnic_intr *intr)
92{
93 return ioread32(&intr->ctrl->int_credits);
94}
95
96static inline void vnic_intr_return_all_credits(struct vnic_intr *intr)
97{
98 unsigned int credits = vnic_intr_credits(intr);
99 int unmask = 1;
100 int reset_timer = 1;
101
102 vnic_intr_return_credits(intr, credits, unmask, reset_timer);
103}
104
105static inline u32 vnic_intr_legacy_pba(u32 __iomem *legacy_pba)
106{
107 /* read PBA without clearing */
108 return ioread32(legacy_pba);
109}
110
111void vnic_intr_free(struct vnic_intr *intr);
112int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr,
113 unsigned int index);
114void vnic_intr_init(struct vnic_intr *intr, unsigned int coalescing_timer,
115 unsigned int coalescing_type, unsigned int mask_on_assertion);
116void vnic_intr_clean(struct vnic_intr *intr);
117
118#endif /* _VNIC_INTR_H_ */
diff --git a/drivers/scsi/fnic/vnic_nic.h b/drivers/scsi/fnic/vnic_nic.h
new file mode 100644
index 000000000000..f15b83eeaced
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_nic.h
@@ -0,0 +1,69 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_NIC_H_
19#define _VNIC_NIC_H_
20
21/*
22 * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
23 * Driver) when both are built with CONFIG options =y
24 */
25#define vnic_set_nic_cfg fnic_set_nic_cfg
26
27#define NIC_CFG_RSS_DEFAULT_CPU_MASK_FIELD 0xffUL
28#define NIC_CFG_RSS_DEFAULT_CPU_SHIFT 0
29#define NIC_CFG_RSS_HASH_TYPE (0xffUL << 8)
30#define NIC_CFG_RSS_HASH_TYPE_MASK_FIELD 0xffUL
31#define NIC_CFG_RSS_HASH_TYPE_SHIFT 8
32#define NIC_CFG_RSS_HASH_BITS (7UL << 16)
33#define NIC_CFG_RSS_HASH_BITS_MASK_FIELD 7UL
34#define NIC_CFG_RSS_HASH_BITS_SHIFT 16
35#define NIC_CFG_RSS_BASE_CPU (7UL << 19)
36#define NIC_CFG_RSS_BASE_CPU_MASK_FIELD 7UL
37#define NIC_CFG_RSS_BASE_CPU_SHIFT 19
38#define NIC_CFG_RSS_ENABLE (1UL << 22)
39#define NIC_CFG_RSS_ENABLE_MASK_FIELD 1UL
40#define NIC_CFG_RSS_ENABLE_SHIFT 22
41#define NIC_CFG_TSO_IPID_SPLIT_EN (1UL << 23)
42#define NIC_CFG_TSO_IPID_SPLIT_EN_MASK_FIELD 1UL
43#define NIC_CFG_TSO_IPID_SPLIT_EN_SHIFT 23
44#define NIC_CFG_IG_VLAN_STRIP_EN (1UL << 24)
45#define NIC_CFG_IG_VLAN_STRIP_EN_MASK_FIELD 1UL
46#define NIC_CFG_IG_VLAN_STRIP_EN_SHIFT 24
47
48static inline void vnic_set_nic_cfg(u32 *nic_cfg,
49 u8 rss_default_cpu, u8 rss_hash_type,
50 u8 rss_hash_bits, u8 rss_base_cpu,
51 u8 rss_enable, u8 tso_ipid_split_en,
52 u8 ig_vlan_strip_en)
53{
54 *nic_cfg = (rss_default_cpu & NIC_CFG_RSS_DEFAULT_CPU_MASK_FIELD) |
55 ((rss_hash_type & NIC_CFG_RSS_HASH_TYPE_MASK_FIELD)
56 << NIC_CFG_RSS_HASH_TYPE_SHIFT) |
57 ((rss_hash_bits & NIC_CFG_RSS_HASH_BITS_MASK_FIELD)
58 << NIC_CFG_RSS_HASH_BITS_SHIFT) |
59 ((rss_base_cpu & NIC_CFG_RSS_BASE_CPU_MASK_FIELD)
60 << NIC_CFG_RSS_BASE_CPU_SHIFT) |
61 ((rss_enable & NIC_CFG_RSS_ENABLE_MASK_FIELD)
62 << NIC_CFG_RSS_ENABLE_SHIFT) |
63 ((tso_ipid_split_en & NIC_CFG_TSO_IPID_SPLIT_EN_MASK_FIELD)
64 << NIC_CFG_TSO_IPID_SPLIT_EN_SHIFT) |
65 ((ig_vlan_strip_en & NIC_CFG_IG_VLAN_STRIP_EN_MASK_FIELD)
66 << NIC_CFG_IG_VLAN_STRIP_EN_SHIFT);
67}
68
69#endif /* _VNIC_NIC_H_ */
diff --git a/drivers/scsi/fnic/vnic_resource.h b/drivers/scsi/fnic/vnic_resource.h
new file mode 100644
index 000000000000..2d842f79d41a
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_resource.h
@@ -0,0 +1,61 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_RESOURCE_H_
19#define _VNIC_RESOURCE_H_
20
21#define VNIC_RES_MAGIC 0x766E6963L /* 'vnic' */
22#define VNIC_RES_VERSION 0x00000000L
23
24/* vNIC resource types */
25enum vnic_res_type {
26 RES_TYPE_EOL, /* End-of-list */
27 RES_TYPE_WQ, /* Work queues */
28 RES_TYPE_RQ, /* Receive queues */
29 RES_TYPE_CQ, /* Completion queues */
30 RES_TYPE_RSVD1,
31 RES_TYPE_NIC_CFG, /* Enet NIC config registers */
32 RES_TYPE_RSVD2,
33 RES_TYPE_RSVD3,
34 RES_TYPE_RSVD4,
35 RES_TYPE_RSVD5,
36 RES_TYPE_INTR_CTRL, /* Interrupt ctrl table */
37 RES_TYPE_INTR_TABLE, /* MSI/MSI-X Interrupt table */
38 RES_TYPE_INTR_PBA, /* MSI/MSI-X PBA table */
39 RES_TYPE_INTR_PBA_LEGACY, /* Legacy intr status */
40 RES_TYPE_RSVD6,
41 RES_TYPE_RSVD7,
42 RES_TYPE_DEVCMD, /* Device command region */
43 RES_TYPE_PASS_THRU_PAGE, /* Pass-thru page */
44
45 RES_TYPE_MAX, /* Count of resource types */
46};
47
48struct vnic_resource_header {
49 u32 magic;
50 u32 version;
51};
52
53struct vnic_resource {
54 u8 type;
55 u8 bar;
56 u8 pad[2];
57 u32 bar_offset;
58 u32 count;
59};
60
61#endif /* _VNIC_RESOURCE_H_ */
diff --git a/drivers/scsi/fnic/vnic_rq.c b/drivers/scsi/fnic/vnic_rq.c
new file mode 100644
index 000000000000..bedd0d285630
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_rq.c
@@ -0,0 +1,196 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18
19#include <linux/errno.h>
20#include <linux/types.h>
21#include <linux/pci.h>
22#include <linux/delay.h>
23#include "vnic_dev.h"
24#include "vnic_rq.h"
25
26static int vnic_rq_alloc_bufs(struct vnic_rq *rq)
27{
28 struct vnic_rq_buf *buf;
29 struct vnic_dev *vdev;
30 unsigned int i, j, count = rq->ring.desc_count;
31 unsigned int blks = VNIC_RQ_BUF_BLKS_NEEDED(count);
32
33 vdev = rq->vdev;
34
35 for (i = 0; i < blks; i++) {
36 rq->bufs[i] = kzalloc(VNIC_RQ_BUF_BLK_SZ, GFP_ATOMIC);
37 if (!rq->bufs[i]) {
38 printk(KERN_ERR "Failed to alloc rq_bufs\n");
39 return -ENOMEM;
40 }
41 }
42
43 for (i = 0; i < blks; i++) {
44 buf = rq->bufs[i];
45 for (j = 0; j < VNIC_RQ_BUF_BLK_ENTRIES; j++) {
46 buf->index = i * VNIC_RQ_BUF_BLK_ENTRIES + j;
47 buf->desc = (u8 *)rq->ring.descs +
48 rq->ring.desc_size * buf->index;
49 if (buf->index + 1 == count) {
50 buf->next = rq->bufs[0];
51 break;
52 } else if (j + 1 == VNIC_RQ_BUF_BLK_ENTRIES) {
53 buf->next = rq->bufs[i + 1];
54 } else {
55 buf->next = buf + 1;
56 buf++;
57 }
58 }
59 }
60
61 rq->to_use = rq->to_clean = rq->bufs[0];
62 rq->buf_index = 0;
63
64 return 0;
65}
66
67void vnic_rq_free(struct vnic_rq *rq)
68{
69 struct vnic_dev *vdev;
70 unsigned int i;
71
72 vdev = rq->vdev;
73
74 vnic_dev_free_desc_ring(vdev, &rq->ring);
75
76 for (i = 0; i < VNIC_RQ_BUF_BLKS_MAX; i++) {
77 kfree(rq->bufs[i]);
78 rq->bufs[i] = NULL;
79 }
80
81 rq->ctrl = NULL;
82}
83
84int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index,
85 unsigned int desc_count, unsigned int desc_size)
86{
87 int err;
88
89 rq->index = index;
90 rq->vdev = vdev;
91
92 rq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_RQ, index);
93 if (!rq->ctrl) {
94 printk(KERN_ERR "Failed to hook RQ[%d] resource\n", index);
95 return -EINVAL;
96 }
97
98 vnic_rq_disable(rq);
99
100 err = vnic_dev_alloc_desc_ring(vdev, &rq->ring, desc_count, desc_size);
101 if (err)
102 return err;
103
104 err = vnic_rq_alloc_bufs(rq);
105 if (err) {
106 vnic_rq_free(rq);
107 return err;
108 }
109
110 return 0;
111}
112
113void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index,
114 unsigned int error_interrupt_enable,
115 unsigned int error_interrupt_offset)
116{
117 u64 paddr;
118 u32 fetch_index;
119
120 paddr = (u64)rq->ring.base_addr | VNIC_PADDR_TARGET;
121 writeq(paddr, &rq->ctrl->ring_base);
122 iowrite32(rq->ring.desc_count, &rq->ctrl->ring_size);
123 iowrite32(cq_index, &rq->ctrl->cq_index);
124 iowrite32(error_interrupt_enable, &rq->ctrl->error_interrupt_enable);
125 iowrite32(error_interrupt_offset, &rq->ctrl->error_interrupt_offset);
126 iowrite32(0, &rq->ctrl->dropped_packet_count);
127 iowrite32(0, &rq->ctrl->error_status);
128
129 /* Use current fetch_index as the ring starting point */
130 fetch_index = ioread32(&rq->ctrl->fetch_index);
131 rq->to_use = rq->to_clean =
132 &rq->bufs[fetch_index / VNIC_RQ_BUF_BLK_ENTRIES]
133 [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES];
134 iowrite32(fetch_index, &rq->ctrl->posted_index);
135
136 rq->buf_index = 0;
137}
138
139unsigned int vnic_rq_error_status(struct vnic_rq *rq)
140{
141 return ioread32(&rq->ctrl->error_status);
142}
143
144void vnic_rq_enable(struct vnic_rq *rq)
145{
146 iowrite32(1, &rq->ctrl->enable);
147}
148
149int vnic_rq_disable(struct vnic_rq *rq)
150{
151 unsigned int wait;
152
153 iowrite32(0, &rq->ctrl->enable);
154
155 /* Wait for HW to ACK disable request */
156 for (wait = 0; wait < 100; wait++) {
157 if (!(ioread32(&rq->ctrl->running)))
158 return 0;
159 udelay(1);
160 }
161
162 printk(KERN_ERR "Failed to disable RQ[%d]\n", rq->index);
163
164 return -ETIMEDOUT;
165}
166
167void vnic_rq_clean(struct vnic_rq *rq,
168 void (*buf_clean)(struct vnic_rq *rq, struct vnic_rq_buf *buf))
169{
170 struct vnic_rq_buf *buf;
171 u32 fetch_index;
172
173 BUG_ON(ioread32(&rq->ctrl->enable));
174
175 buf = rq->to_clean;
176
177 while (vnic_rq_desc_used(rq) > 0) {
178
179 (*buf_clean)(rq, buf);
180
181 buf = rq->to_clean = buf->next;
182 rq->ring.desc_avail++;
183 }
184
185 /* Use current fetch_index as the ring starting point */
186 fetch_index = ioread32(&rq->ctrl->fetch_index);
187 rq->to_use = rq->to_clean =
188 &rq->bufs[fetch_index / VNIC_RQ_BUF_BLK_ENTRIES]
189 [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES];
190 iowrite32(fetch_index, &rq->ctrl->posted_index);
191
192 rq->buf_index = 0;
193
194 vnic_dev_clear_desc_ring(&rq->ring);
195}
196
diff --git a/drivers/scsi/fnic/vnic_rq.h b/drivers/scsi/fnic/vnic_rq.h
new file mode 100644
index 000000000000..aebdfbd6ad3c
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_rq.h
@@ -0,0 +1,235 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_RQ_H_
19#define _VNIC_RQ_H_
20
21#include <linux/pci.h>
22#include "vnic_dev.h"
23#include "vnic_cq.h"
24
25/*
26 * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
27 * Driver) when both are built with CONFIG options =y
28 */
29#define vnic_rq_desc_avail fnic_rq_desc_avail
30#define vnic_rq_desc_used fnic_rq_desc_used
31#define vnic_rq_next_desc fnic_rq_next_desc
32#define vnic_rq_next_index fnic_rq_next_index
33#define vnic_rq_next_buf_index fnic_rq_next_buf_index
34#define vnic_rq_post fnic_rq_post
35#define vnic_rq_posting_soon fnic_rq_posting_soon
36#define vnic_rq_return_descs fnic_rq_return_descs
37#define vnic_rq_service fnic_rq_service
38#define vnic_rq_fill fnic_rq_fill
39#define vnic_rq_free fnic_rq_free
40#define vnic_rq_alloc fnic_rq_alloc
41#define vnic_rq_init fnic_rq_init
42#define vnic_rq_error_status fnic_rq_error_status
43#define vnic_rq_enable fnic_rq_enable
44#define vnic_rq_disable fnic_rq_disable
45#define vnic_rq_clean fnic_rq_clean
46
47/* Receive queue control */
48struct vnic_rq_ctrl {
49 u64 ring_base; /* 0x00 */
50 u32 ring_size; /* 0x08 */
51 u32 pad0;
52 u32 posted_index; /* 0x10 */
53 u32 pad1;
54 u32 cq_index; /* 0x18 */
55 u32 pad2;
56 u32 enable; /* 0x20 */
57 u32 pad3;
58 u32 running; /* 0x28 */
59 u32 pad4;
60 u32 fetch_index; /* 0x30 */
61 u32 pad5;
62 u32 error_interrupt_enable; /* 0x38 */
63 u32 pad6;
64 u32 error_interrupt_offset; /* 0x40 */
65 u32 pad7;
66 u32 error_status; /* 0x48 */
67 u32 pad8;
68 u32 dropped_packet_count; /* 0x50 */
69 u32 pad9;
70 u32 dropped_packet_count_rc; /* 0x58 */
71 u32 pad10;
72};
73
74/* Break the vnic_rq_buf allocations into blocks of 64 entries */
75#define VNIC_RQ_BUF_BLK_ENTRIES 64
76#define VNIC_RQ_BUF_BLK_SZ \
77 (VNIC_RQ_BUF_BLK_ENTRIES * sizeof(struct vnic_rq_buf))
78#define VNIC_RQ_BUF_BLKS_NEEDED(entries) \
79 DIV_ROUND_UP(entries, VNIC_RQ_BUF_BLK_ENTRIES)
80#define VNIC_RQ_BUF_BLKS_MAX VNIC_RQ_BUF_BLKS_NEEDED(4096)
81
82struct vnic_rq_buf {
83 struct vnic_rq_buf *next;
84 dma_addr_t dma_addr;
85 void *os_buf;
86 unsigned int os_buf_index;
87 unsigned int len;
88 unsigned int index;
89 void *desc;
90};
91
92struct vnic_rq {
93 unsigned int index;
94 struct vnic_dev *vdev;
95 struct vnic_rq_ctrl __iomem *ctrl; /* memory-mapped */
96 struct vnic_dev_ring ring;
97 struct vnic_rq_buf *bufs[VNIC_RQ_BUF_BLKS_MAX];
98 struct vnic_rq_buf *to_use;
99 struct vnic_rq_buf *to_clean;
100 void *os_buf_head;
101 unsigned int buf_index;
102 unsigned int pkts_outstanding;
103};
104
105static inline unsigned int vnic_rq_desc_avail(struct vnic_rq *rq)
106{
107 /* how many does SW own? */
108 return rq->ring.desc_avail;
109}
110
111static inline unsigned int vnic_rq_desc_used(struct vnic_rq *rq)
112{
113 /* how many does HW own? */
114 return rq->ring.desc_count - rq->ring.desc_avail - 1;
115}
116
117static inline void *vnic_rq_next_desc(struct vnic_rq *rq)
118{
119 return rq->to_use->desc;
120}
121
122static inline unsigned int vnic_rq_next_index(struct vnic_rq *rq)
123{
124 return rq->to_use->index;
125}
126
127static inline unsigned int vnic_rq_next_buf_index(struct vnic_rq *rq)
128{
129 return rq->buf_index++;
130}
131
132static inline void vnic_rq_post(struct vnic_rq *rq,
133 void *os_buf, unsigned int os_buf_index,
134 dma_addr_t dma_addr, unsigned int len)
135{
136 struct vnic_rq_buf *buf = rq->to_use;
137
138 buf->os_buf = os_buf;
139 buf->os_buf_index = os_buf_index;
140 buf->dma_addr = dma_addr;
141 buf->len = len;
142
143 buf = buf->next;
144 rq->to_use = buf;
145 rq->ring.desc_avail--;
146
147 /* Move the posted_index every nth descriptor
148 */
149
150#ifndef VNIC_RQ_RETURN_RATE
151#define VNIC_RQ_RETURN_RATE 0xf /* keep 2^n - 1 */
152#endif
153
154 if ((buf->index & VNIC_RQ_RETURN_RATE) == 0) {
155 /* Adding write memory barrier prevents compiler and/or CPU
156 * reordering, thus avoiding descriptor posting before
157 * descriptor is initialized. Otherwise, hardware can read
158 * stale descriptor fields.
159 */
160 wmb();
161 iowrite32(buf->index, &rq->ctrl->posted_index);
162 }
163}
164
165static inline int vnic_rq_posting_soon(struct vnic_rq *rq)
166{
167 return (rq->to_use->index & VNIC_RQ_RETURN_RATE) == 0;
168}
169
170static inline void vnic_rq_return_descs(struct vnic_rq *rq, unsigned int count)
171{
172 rq->ring.desc_avail += count;
173}
174
175enum desc_return_options {
176 VNIC_RQ_RETURN_DESC,
177 VNIC_RQ_DEFER_RETURN_DESC,
178};
179
180static inline void vnic_rq_service(struct vnic_rq *rq,
181 struct cq_desc *cq_desc, u16 completed_index,
182 int desc_return, void (*buf_service)(struct vnic_rq *rq,
183 struct cq_desc *cq_desc, struct vnic_rq_buf *buf,
184 int skipped, void *opaque), void *opaque)
185{
186 struct vnic_rq_buf *buf;
187 int skipped;
188
189 buf = rq->to_clean;
190 while (1) {
191
192 skipped = (buf->index != completed_index);
193
194 (*buf_service)(rq, cq_desc, buf, skipped, opaque);
195
196 if (desc_return == VNIC_RQ_RETURN_DESC)
197 rq->ring.desc_avail++;
198
199 rq->to_clean = buf->next;
200
201 if (!skipped)
202 break;
203
204 buf = rq->to_clean;
205 }
206}
207
208static inline int vnic_rq_fill(struct vnic_rq *rq,
209 int (*buf_fill)(struct vnic_rq *rq))
210{
211 int err;
212
213 while (vnic_rq_desc_avail(rq) > 1) {
214
215 err = (*buf_fill)(rq);
216 if (err)
217 return err;
218 }
219
220 return 0;
221}
222
223void vnic_rq_free(struct vnic_rq *rq);
224int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index,
225 unsigned int desc_count, unsigned int desc_size);
226void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index,
227 unsigned int error_interrupt_enable,
228 unsigned int error_interrupt_offset);
229unsigned int vnic_rq_error_status(struct vnic_rq *rq);
230void vnic_rq_enable(struct vnic_rq *rq);
231int vnic_rq_disable(struct vnic_rq *rq);
232void vnic_rq_clean(struct vnic_rq *rq,
233 void (*buf_clean)(struct vnic_rq *rq, struct vnic_rq_buf *buf));
234
235#endif /* _VNIC_RQ_H_ */
diff --git a/drivers/scsi/fnic/vnic_scsi.h b/drivers/scsi/fnic/vnic_scsi.h
new file mode 100644
index 000000000000..46baa5254001
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_scsi.h
@@ -0,0 +1,99 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_SCSI_H_
19#define _VNIC_SCSI_H_
20
21#define VNIC_FNIC_WQ_COPY_COUNT_MIN 1
22#define VNIC_FNIC_WQ_COPY_COUNT_MAX 1
23
24#define VNIC_FNIC_WQ_DESCS_MIN 64
25#define VNIC_FNIC_WQ_DESCS_MAX 128
26
27#define VNIC_FNIC_WQ_COPY_DESCS_MIN 64
28#define VNIC_FNIC_WQ_COPY_DESCS_MAX 512
29
30#define VNIC_FNIC_RQ_DESCS_MIN 64
31#define VNIC_FNIC_RQ_DESCS_MAX 128
32
33#define VNIC_FNIC_EDTOV_MIN 1000
34#define VNIC_FNIC_EDTOV_MAX 255000
35#define VNIC_FNIC_EDTOV_DEF 2000
36
37#define VNIC_FNIC_RATOV_MIN 1000
38#define VNIC_FNIC_RATOV_MAX 255000
39
40#define VNIC_FNIC_MAXDATAFIELDSIZE_MIN 256
41#define VNIC_FNIC_MAXDATAFIELDSIZE_MAX 2112
42
43#define VNIC_FNIC_FLOGI_RETRIES_MIN 0
44#define VNIC_FNIC_FLOGI_RETRIES_MAX 0xffffffff
45#define VNIC_FNIC_FLOGI_RETRIES_DEF 0xffffffff
46
47#define VNIC_FNIC_FLOGI_TIMEOUT_MIN 1000
48#define VNIC_FNIC_FLOGI_TIMEOUT_MAX 255000
49
50#define VNIC_FNIC_PLOGI_RETRIES_MIN 0
51#define VNIC_FNIC_PLOGI_RETRIES_MAX 255
52#define VNIC_FNIC_PLOGI_RETRIES_DEF 8
53
54#define VNIC_FNIC_PLOGI_TIMEOUT_MIN 1000
55#define VNIC_FNIC_PLOGI_TIMEOUT_MAX 255000
56
57#define VNIC_FNIC_IO_THROTTLE_COUNT_MIN 256
58#define VNIC_FNIC_IO_THROTTLE_COUNT_MAX 4096
59
60#define VNIC_FNIC_LINK_DOWN_TIMEOUT_MIN 0
61#define VNIC_FNIC_LINK_DOWN_TIMEOUT_MAX 240000
62
63#define VNIC_FNIC_PORT_DOWN_TIMEOUT_MIN 0
64#define VNIC_FNIC_PORT_DOWN_TIMEOUT_MAX 240000
65
66#define VNIC_FNIC_PORT_DOWN_IO_RETRIES_MIN 0
67#define VNIC_FNIC_PORT_DOWN_IO_RETRIES_MAX 255
68
69#define VNIC_FNIC_LUNS_PER_TARGET_MIN 1
70#define VNIC_FNIC_LUNS_PER_TARGET_MAX 1024
71
72/* Device-specific region: scsi configuration */
73struct vnic_fc_config {
74 u64 node_wwn;
75 u64 port_wwn;
76 u32 flags;
77 u32 wq_enet_desc_count;
78 u32 wq_copy_desc_count;
79 u32 rq_desc_count;
80 u32 flogi_retries;
81 u32 flogi_timeout;
82 u32 plogi_retries;
83 u32 plogi_timeout;
84 u32 io_throttle_count;
85 u32 link_down_timeout;
86 u32 port_down_timeout;
87 u32 port_down_io_retries;
88 u32 luns_per_tgt;
89 u16 maxdatafieldsize;
90 u16 ed_tov;
91 u16 ra_tov;
92 u16 intr_timer;
93 u8 intr_timer_type;
94};
95
96#define VFCF_FCP_SEQ_LVL_ERR 0x1 /* Enable FCP-2 Error Recovery */
97#define VFCF_PERBI 0x2 /* persistent binding info available */
98
99#endif /* _VNIC_SCSI_H_ */
diff --git a/drivers/scsi/fnic/vnic_stats.h b/drivers/scsi/fnic/vnic_stats.h
new file mode 100644
index 000000000000..5372e23c1cb3
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_stats.h
@@ -0,0 +1,68 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_STATS_H_
19#define _VNIC_STATS_H_
20
21/* Tx statistics */
22struct vnic_tx_stats {
23 u64 tx_frames_ok;
24 u64 tx_unicast_frames_ok;
25 u64 tx_multicast_frames_ok;
26 u64 tx_broadcast_frames_ok;
27 u64 tx_bytes_ok;
28 u64 tx_unicast_bytes_ok;
29 u64 tx_multicast_bytes_ok;
30 u64 tx_broadcast_bytes_ok;
31 u64 tx_drops;
32 u64 tx_errors;
33 u64 tx_tso;
34 u64 rsvd[16];
35};
36
37/* Rx statistics */
38struct vnic_rx_stats {
39 u64 rx_frames_ok;
40 u64 rx_frames_total;
41 u64 rx_unicast_frames_ok;
42 u64 rx_multicast_frames_ok;
43 u64 rx_broadcast_frames_ok;
44 u64 rx_bytes_ok;
45 u64 rx_unicast_bytes_ok;
46 u64 rx_multicast_bytes_ok;
47 u64 rx_broadcast_bytes_ok;
48 u64 rx_drop;
49 u64 rx_no_bufs;
50 u64 rx_errors;
51 u64 rx_rss;
52 u64 rx_crc_errors;
53 u64 rx_frames_64;
54 u64 rx_frames_127;
55 u64 rx_frames_255;
56 u64 rx_frames_511;
57 u64 rx_frames_1023;
58 u64 rx_frames_1518;
59 u64 rx_frames_to_max;
60 u64 rsvd[16];
61};
62
63struct vnic_stats {
64 struct vnic_tx_stats tx;
65 struct vnic_rx_stats rx;
66};
67
68#endif /* _VNIC_STATS_H_ */
diff --git a/drivers/scsi/fnic/vnic_wq.c b/drivers/scsi/fnic/vnic_wq.c
new file mode 100644
index 000000000000..1f9ea790d130
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_wq.c
@@ -0,0 +1,182 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18
19#include <linux/errno.h>
20#include <linux/types.h>
21#include <linux/pci.h>
22#include <linux/delay.h>
23#include "vnic_dev.h"
24#include "vnic_wq.h"
25
26static int vnic_wq_alloc_bufs(struct vnic_wq *wq)
27{
28 struct vnic_wq_buf *buf;
29 struct vnic_dev *vdev;
30 unsigned int i, j, count = wq->ring.desc_count;
31 unsigned int blks = VNIC_WQ_BUF_BLKS_NEEDED(count);
32
33 vdev = wq->vdev;
34
35 for (i = 0; i < blks; i++) {
36 wq->bufs[i] = kzalloc(VNIC_WQ_BUF_BLK_SZ, GFP_ATOMIC);
37 if (!wq->bufs[i]) {
38 printk(KERN_ERR "Failed to alloc wq_bufs\n");
39 return -ENOMEM;
40 }
41 }
42
43 for (i = 0; i < blks; i++) {
44 buf = wq->bufs[i];
45 for (j = 0; j < VNIC_WQ_BUF_BLK_ENTRIES; j++) {
46 buf->index = i * VNIC_WQ_BUF_BLK_ENTRIES + j;
47 buf->desc = (u8 *)wq->ring.descs +
48 wq->ring.desc_size * buf->index;
49 if (buf->index + 1 == count) {
50 buf->next = wq->bufs[0];
51 break;
52 } else if (j + 1 == VNIC_WQ_BUF_BLK_ENTRIES) {
53 buf->next = wq->bufs[i + 1];
54 } else {
55 buf->next = buf + 1;
56 buf++;
57 }
58 }
59 }
60
61 wq->to_use = wq->to_clean = wq->bufs[0];
62
63 return 0;
64}
65
66void vnic_wq_free(struct vnic_wq *wq)
67{
68 struct vnic_dev *vdev;
69 unsigned int i;
70
71 vdev = wq->vdev;
72
73 vnic_dev_free_desc_ring(vdev, &wq->ring);
74
75 for (i = 0; i < VNIC_WQ_BUF_BLKS_MAX; i++) {
76 kfree(wq->bufs[i]);
77 wq->bufs[i] = NULL;
78 }
79
80 wq->ctrl = NULL;
81
82}
83
84int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index,
85 unsigned int desc_count, unsigned int desc_size)
86{
87 int err;
88
89 wq->index = index;
90 wq->vdev = vdev;
91
92 wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_WQ, index);
93 if (!wq->ctrl) {
94 printk(KERN_ERR "Failed to hook WQ[%d] resource\n", index);
95 return -EINVAL;
96 }
97
98 vnic_wq_disable(wq);
99
100 err = vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size);
101 if (err)
102 return err;
103
104 err = vnic_wq_alloc_bufs(wq);
105 if (err) {
106 vnic_wq_free(wq);
107 return err;
108 }
109
110 return 0;
111}
112
113void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index,
114 unsigned int error_interrupt_enable,
115 unsigned int error_interrupt_offset)
116{
117 u64 paddr;
118
119 paddr = (u64)wq->ring.base_addr | VNIC_PADDR_TARGET;
120 writeq(paddr, &wq->ctrl->ring_base);
121 iowrite32(wq->ring.desc_count, &wq->ctrl->ring_size);
122 iowrite32(0, &wq->ctrl->fetch_index);
123 iowrite32(0, &wq->ctrl->posted_index);
124 iowrite32(cq_index, &wq->ctrl->cq_index);
125 iowrite32(error_interrupt_enable, &wq->ctrl->error_interrupt_enable);
126 iowrite32(error_interrupt_offset, &wq->ctrl->error_interrupt_offset);
127 iowrite32(0, &wq->ctrl->error_status);
128}
129
130unsigned int vnic_wq_error_status(struct vnic_wq *wq)
131{
132 return ioread32(&wq->ctrl->error_status);
133}
134
135void vnic_wq_enable(struct vnic_wq *wq)
136{
137 iowrite32(1, &wq->ctrl->enable);
138}
139
140int vnic_wq_disable(struct vnic_wq *wq)
141{
142 unsigned int wait;
143
144 iowrite32(0, &wq->ctrl->enable);
145
146 /* Wait for HW to ACK disable request */
147 for (wait = 0; wait < 100; wait++) {
148 if (!(ioread32(&wq->ctrl->running)))
149 return 0;
150 udelay(1);
151 }
152
153 printk(KERN_ERR "Failed to disable WQ[%d]\n", wq->index);
154
155 return -ETIMEDOUT;
156}
157
158void vnic_wq_clean(struct vnic_wq *wq,
159 void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf))
160{
161 struct vnic_wq_buf *buf;
162
163 BUG_ON(ioread32(&wq->ctrl->enable));
164
165 buf = wq->to_clean;
166
167 while (vnic_wq_desc_used(wq) > 0) {
168
169 (*buf_clean)(wq, buf);
170
171 buf = wq->to_clean = buf->next;
172 wq->ring.desc_avail++;
173 }
174
175 wq->to_use = wq->to_clean = wq->bufs[0];
176
177 iowrite32(0, &wq->ctrl->fetch_index);
178 iowrite32(0, &wq->ctrl->posted_index);
179 iowrite32(0, &wq->ctrl->error_status);
180
181 vnic_dev_clear_desc_ring(&wq->ring);
182}
diff --git a/drivers/scsi/fnic/vnic_wq.h b/drivers/scsi/fnic/vnic_wq.h
new file mode 100644
index 000000000000..5cd094f79281
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_wq.h
@@ -0,0 +1,175 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_WQ_H_
19#define _VNIC_WQ_H_
20
21#include <linux/pci.h>
22#include "vnic_dev.h"
23#include "vnic_cq.h"
24
25/*
26 * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
27 * Driver) when both are built with CONFIG options =y
28 */
29#define vnic_wq_desc_avail fnic_wq_desc_avail
30#define vnic_wq_desc_used fnic_wq_desc_used
31#define vnic_wq_next_desc fni_cwq_next_desc
32#define vnic_wq_post fnic_wq_post
33#define vnic_wq_service fnic_wq_service
34#define vnic_wq_free fnic_wq_free
35#define vnic_wq_alloc fnic_wq_alloc
36#define vnic_wq_init fnic_wq_init
37#define vnic_wq_error_status fnic_wq_error_status
38#define vnic_wq_enable fnic_wq_enable
39#define vnic_wq_disable fnic_wq_disable
40#define vnic_wq_clean fnic_wq_clean
41
42/* Work queue control */
43struct vnic_wq_ctrl {
44 u64 ring_base; /* 0x00 */
45 u32 ring_size; /* 0x08 */
46 u32 pad0;
47 u32 posted_index; /* 0x10 */
48 u32 pad1;
49 u32 cq_index; /* 0x18 */
50 u32 pad2;
51 u32 enable; /* 0x20 */
52 u32 pad3;
53 u32 running; /* 0x28 */
54 u32 pad4;
55 u32 fetch_index; /* 0x30 */
56 u32 pad5;
57 u32 dca_value; /* 0x38 */
58 u32 pad6;
59 u32 error_interrupt_enable; /* 0x40 */
60 u32 pad7;
61 u32 error_interrupt_offset; /* 0x48 */
62 u32 pad8;
63 u32 error_status; /* 0x50 */
64 u32 pad9;
65};
66
67struct vnic_wq_buf {
68 struct vnic_wq_buf *next;
69 dma_addr_t dma_addr;
70 void *os_buf;
71 unsigned int len;
72 unsigned int index;
73 int sop;
74 void *desc;
75};
76
77/* Break the vnic_wq_buf allocations into blocks of 64 entries */
78#define VNIC_WQ_BUF_BLK_ENTRIES 64
79#define VNIC_WQ_BUF_BLK_SZ \
80 (VNIC_WQ_BUF_BLK_ENTRIES * sizeof(struct vnic_wq_buf))
81#define VNIC_WQ_BUF_BLKS_NEEDED(entries) \
82 DIV_ROUND_UP(entries, VNIC_WQ_BUF_BLK_ENTRIES)
83#define VNIC_WQ_BUF_BLKS_MAX VNIC_WQ_BUF_BLKS_NEEDED(4096)
84
85struct vnic_wq {
86 unsigned int index;
87 struct vnic_dev *vdev;
88 struct vnic_wq_ctrl __iomem *ctrl; /* memory-mapped */
89 struct vnic_dev_ring ring;
90 struct vnic_wq_buf *bufs[VNIC_WQ_BUF_BLKS_MAX];
91 struct vnic_wq_buf *to_use;
92 struct vnic_wq_buf *to_clean;
93 unsigned int pkts_outstanding;
94};
95
96static inline unsigned int vnic_wq_desc_avail(struct vnic_wq *wq)
97{
98 /* how many does SW own? */
99 return wq->ring.desc_avail;
100}
101
102static inline unsigned int vnic_wq_desc_used(struct vnic_wq *wq)
103{
104 /* how many does HW own? */
105 return wq->ring.desc_count - wq->ring.desc_avail - 1;
106}
107
108static inline void *vnic_wq_next_desc(struct vnic_wq *wq)
109{
110 return wq->to_use->desc;
111}
112
113static inline void vnic_wq_post(struct vnic_wq *wq,
114 void *os_buf, dma_addr_t dma_addr,
115 unsigned int len, int sop, int eop)
116{
117 struct vnic_wq_buf *buf = wq->to_use;
118
119 buf->sop = sop;
120 buf->os_buf = eop ? os_buf : NULL;
121 buf->dma_addr = dma_addr;
122 buf->len = len;
123
124 buf = buf->next;
125 if (eop) {
126 /* Adding write memory barrier prevents compiler and/or CPU
127 * reordering, thus avoiding descriptor posting before
128 * descriptor is initialized. Otherwise, hardware can read
129 * stale descriptor fields.
130 */
131 wmb();
132 iowrite32(buf->index, &wq->ctrl->posted_index);
133 }
134 wq->to_use = buf;
135
136 wq->ring.desc_avail--;
137}
138
139static inline void vnic_wq_service(struct vnic_wq *wq,
140 struct cq_desc *cq_desc, u16 completed_index,
141 void (*buf_service)(struct vnic_wq *wq,
142 struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque),
143 void *opaque)
144{
145 struct vnic_wq_buf *buf;
146
147 buf = wq->to_clean;
148 while (1) {
149
150 (*buf_service)(wq, cq_desc, buf, opaque);
151
152 wq->ring.desc_avail++;
153
154 wq->to_clean = buf->next;
155
156 if (buf->index == completed_index)
157 break;
158
159 buf = wq->to_clean;
160 }
161}
162
163void vnic_wq_free(struct vnic_wq *wq);
164int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index,
165 unsigned int desc_count, unsigned int desc_size);
166void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index,
167 unsigned int error_interrupt_enable,
168 unsigned int error_interrupt_offset);
169unsigned int vnic_wq_error_status(struct vnic_wq *wq);
170void vnic_wq_enable(struct vnic_wq *wq);
171int vnic_wq_disable(struct vnic_wq *wq);
172void vnic_wq_clean(struct vnic_wq *wq,
173 void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf));
174
175#endif /* _VNIC_WQ_H_ */
diff --git a/drivers/scsi/fnic/vnic_wq_copy.c b/drivers/scsi/fnic/vnic_wq_copy.c
new file mode 100644
index 000000000000..9eab7e7caf38
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_wq_copy.c
@@ -0,0 +1,117 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18
19#include <linux/errno.h>
20#include <linux/types.h>
21#include <linux/pci.h>
22#include <linux/delay.h>
23#include "vnic_wq_copy.h"
24
25void vnic_wq_copy_enable(struct vnic_wq_copy *wq)
26{
27 iowrite32(1, &wq->ctrl->enable);
28}
29
30int vnic_wq_copy_disable(struct vnic_wq_copy *wq)
31{
32 unsigned int wait;
33
34 iowrite32(0, &wq->ctrl->enable);
35
36 /* Wait for HW to ACK disable request */
37 for (wait = 0; wait < 100; wait++) {
38 if (!(ioread32(&wq->ctrl->running)))
39 return 0;
40 udelay(1);
41 }
42
43 printk(KERN_ERR "Failed to disable Copy WQ[%d],"
44 " fetch index=%d, posted_index=%d\n",
45 wq->index, ioread32(&wq->ctrl->fetch_index),
46 ioread32(&wq->ctrl->posted_index));
47
48 return -ENODEV;
49}
50
51void vnic_wq_copy_clean(struct vnic_wq_copy *wq,
52 void (*q_clean)(struct vnic_wq_copy *wq,
53 struct fcpio_host_req *wq_desc))
54{
55 BUG_ON(ioread32(&wq->ctrl->enable));
56
57 if (vnic_wq_copy_desc_in_use(wq))
58 vnic_wq_copy_service(wq, -1, q_clean);
59
60 wq->to_use_index = wq->to_clean_index = 0;
61
62 iowrite32(0, &wq->ctrl->fetch_index);
63 iowrite32(0, &wq->ctrl->posted_index);
64 iowrite32(0, &wq->ctrl->error_status);
65
66 vnic_dev_clear_desc_ring(&wq->ring);
67}
68
69void vnic_wq_copy_free(struct vnic_wq_copy *wq)
70{
71 struct vnic_dev *vdev;
72
73 vdev = wq->vdev;
74 vnic_dev_free_desc_ring(vdev, &wq->ring);
75 wq->ctrl = NULL;
76}
77
78int vnic_wq_copy_alloc(struct vnic_dev *vdev, struct vnic_wq_copy *wq,
79 unsigned int index, unsigned int desc_count,
80 unsigned int desc_size)
81{
82 int err;
83
84 wq->index = index;
85 wq->vdev = vdev;
86 wq->to_use_index = wq->to_clean_index = 0;
87 wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_WQ, index);
88 if (!wq->ctrl) {
89 printk(KERN_ERR "Failed to hook COPY WQ[%d] resource\n", index);
90 return -EINVAL;
91 }
92
93 vnic_wq_copy_disable(wq);
94
95 err = vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size);
96 if (err)
97 return err;
98
99 return 0;
100}
101
102void vnic_wq_copy_init(struct vnic_wq_copy *wq, unsigned int cq_index,
103 unsigned int error_interrupt_enable,
104 unsigned int error_interrupt_offset)
105{
106 u64 paddr;
107
108 paddr = (u64)wq->ring.base_addr | VNIC_PADDR_TARGET;
109 writeq(paddr, &wq->ctrl->ring_base);
110 iowrite32(wq->ring.desc_count, &wq->ctrl->ring_size);
111 iowrite32(0, &wq->ctrl->fetch_index);
112 iowrite32(0, &wq->ctrl->posted_index);
113 iowrite32(cq_index, &wq->ctrl->cq_index);
114 iowrite32(error_interrupt_enable, &wq->ctrl->error_interrupt_enable);
115 iowrite32(error_interrupt_offset, &wq->ctrl->error_interrupt_offset);
116}
117
diff --git a/drivers/scsi/fnic/vnic_wq_copy.h b/drivers/scsi/fnic/vnic_wq_copy.h
new file mode 100644
index 000000000000..6aff9740c3df
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_wq_copy.h
@@ -0,0 +1,128 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _VNIC_WQ_COPY_H_
19#define _VNIC_WQ_COPY_H_
20
21#include <linux/pci.h>
22#include "vnic_wq.h"
23#include "fcpio.h"
24
25#define VNIC_WQ_COPY_MAX 1
26
27struct vnic_wq_copy {
28 unsigned int index;
29 struct vnic_dev *vdev;
30 struct vnic_wq_ctrl __iomem *ctrl; /* memory-mapped */
31 struct vnic_dev_ring ring;
32 unsigned to_use_index;
33 unsigned to_clean_index;
34};
35
36static inline unsigned int vnic_wq_copy_desc_avail(struct vnic_wq_copy *wq)
37{
38 return wq->ring.desc_avail;
39}
40
41static inline unsigned int vnic_wq_copy_desc_in_use(struct vnic_wq_copy *wq)
42{
43 return wq->ring.desc_count - 1 - wq->ring.desc_avail;
44}
45
46static inline void *vnic_wq_copy_next_desc(struct vnic_wq_copy *wq)
47{
48 struct fcpio_host_req *desc = wq->ring.descs;
49 return &desc[wq->to_use_index];
50}
51
52static inline void vnic_wq_copy_post(struct vnic_wq_copy *wq)
53{
54
55 ((wq->to_use_index + 1) == wq->ring.desc_count) ?
56 (wq->to_use_index = 0) : (wq->to_use_index++);
57 wq->ring.desc_avail--;
58
59 /* Adding write memory barrier prevents compiler and/or CPU
60 * reordering, thus avoiding descriptor posting before
61 * descriptor is initialized. Otherwise, hardware can read
62 * stale descriptor fields.
63 */
64 wmb();
65
66 iowrite32(wq->to_use_index, &wq->ctrl->posted_index);
67}
68
69static inline void vnic_wq_copy_desc_process(struct vnic_wq_copy *wq, u16 index)
70{
71 unsigned int cnt;
72
73 if (wq->to_clean_index <= index)
74 cnt = (index - wq->to_clean_index) + 1;
75 else
76 cnt = wq->ring.desc_count - wq->to_clean_index + index + 1;
77
78 wq->to_clean_index = ((index + 1) % wq->ring.desc_count);
79 wq->ring.desc_avail += cnt;
80
81}
82
83static inline void vnic_wq_copy_service(struct vnic_wq_copy *wq,
84 u16 completed_index,
85 void (*q_service)(struct vnic_wq_copy *wq,
86 struct fcpio_host_req *wq_desc))
87{
88 struct fcpio_host_req *wq_desc = wq->ring.descs;
89 unsigned int curr_index;
90
91 while (1) {
92
93 if (q_service)
94 (*q_service)(wq, &wq_desc[wq->to_clean_index]);
95
96 wq->ring.desc_avail++;
97
98 curr_index = wq->to_clean_index;
99
100 /* increment the to-clean index so that we start
101 * with an unprocessed index next time we enter the loop
102 */
103 ((wq->to_clean_index + 1) == wq->ring.desc_count) ?
104 (wq->to_clean_index = 0) : (wq->to_clean_index++);
105
106 if (curr_index == completed_index)
107 break;
108
109 /* we have cleaned all the entries */
110 if ((completed_index == (u16)-1) &&
111 (wq->to_clean_index == wq->to_use_index))
112 break;
113 }
114}
115
116void vnic_wq_copy_enable(struct vnic_wq_copy *wq);
117int vnic_wq_copy_disable(struct vnic_wq_copy *wq);
118void vnic_wq_copy_free(struct vnic_wq_copy *wq);
119int vnic_wq_copy_alloc(struct vnic_dev *vdev, struct vnic_wq_copy *wq,
120 unsigned int index, unsigned int desc_count, unsigned int desc_size);
121void vnic_wq_copy_init(struct vnic_wq_copy *wq, unsigned int cq_index,
122 unsigned int error_interrupt_enable,
123 unsigned int error_interrupt_offset);
124void vnic_wq_copy_clean(struct vnic_wq_copy *wq,
125 void (*q_clean)(struct vnic_wq_copy *wq,
126 struct fcpio_host_req *wq_desc));
127
128#endif /* _VNIC_WQ_COPY_H_ */
diff --git a/drivers/scsi/fnic/wq_enet_desc.h b/drivers/scsi/fnic/wq_enet_desc.h
new file mode 100644
index 000000000000..b121cbad18b8
--- /dev/null
+++ b/drivers/scsi/fnic/wq_enet_desc.h
@@ -0,0 +1,96 @@
1/*
2 * Copyright 2008 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
4 *
5 * This program is free software; you may redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 * SOFTWARE.
17 */
18#ifndef _WQ_ENET_DESC_H_
19#define _WQ_ENET_DESC_H_
20
21/* Ethernet work queue descriptor: 16B */
22struct wq_enet_desc {
23 __le64 address;
24 __le16 length;
25 __le16 mss_loopback;
26 __le16 header_length_flags;
27 __le16 vlan_tag;
28};
29
30#define WQ_ENET_ADDR_BITS 64
31#define WQ_ENET_LEN_BITS 14
32#define WQ_ENET_LEN_MASK ((1 << WQ_ENET_LEN_BITS) - 1)
33#define WQ_ENET_MSS_BITS 14
34#define WQ_ENET_MSS_MASK ((1 << WQ_ENET_MSS_BITS) - 1)
35#define WQ_ENET_MSS_SHIFT 2
36#define WQ_ENET_LOOPBACK_SHIFT 1
37#define WQ_ENET_HDRLEN_BITS 10
38#define WQ_ENET_HDRLEN_MASK ((1 << WQ_ENET_HDRLEN_BITS) - 1)
39#define WQ_ENET_FLAGS_OM_BITS 2
40#define WQ_ENET_FLAGS_OM_MASK ((1 << WQ_ENET_FLAGS_OM_BITS) - 1)
41#define WQ_ENET_FLAGS_EOP_SHIFT 12
42#define WQ_ENET_FLAGS_CQ_ENTRY_SHIFT 13
43#define WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT 14
44#define WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT 15
45
46#define WQ_ENET_OFFLOAD_MODE_CSUM 0
47#define WQ_ENET_OFFLOAD_MODE_RESERVED 1
48#define WQ_ENET_OFFLOAD_MODE_CSUM_L4 2
49#define WQ_ENET_OFFLOAD_MODE_TSO 3
50
51static inline void wq_enet_desc_enc(struct wq_enet_desc *desc,
52 u64 address, u16 length, u16 mss, u16 header_length,
53 u8 offload_mode, u8 eop, u8 cq_entry, u8 fcoe_encap,
54 u8 vlan_tag_insert, u16 vlan_tag, u8 loopback)
55{
56 desc->address = cpu_to_le64(address);
57 desc->length = cpu_to_le16(length & WQ_ENET_LEN_MASK);
58 desc->mss_loopback = cpu_to_le16((mss & WQ_ENET_MSS_MASK) <<
59 WQ_ENET_MSS_SHIFT | (loopback & 1) << WQ_ENET_LOOPBACK_SHIFT);
60 desc->header_length_flags = cpu_to_le16(
61 (header_length & WQ_ENET_HDRLEN_MASK) |
62 (offload_mode & WQ_ENET_FLAGS_OM_MASK) << WQ_ENET_HDRLEN_BITS |
63 (eop & 1) << WQ_ENET_FLAGS_EOP_SHIFT |
64 (cq_entry & 1) << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT |
65 (fcoe_encap & 1) << WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT |
66 (vlan_tag_insert & 1) << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT);
67 desc->vlan_tag = cpu_to_le16(vlan_tag);
68}
69
70static inline void wq_enet_desc_dec(struct wq_enet_desc *desc,
71 u64 *address, u16 *length, u16 *mss, u16 *header_length,
72 u8 *offload_mode, u8 *eop, u8 *cq_entry, u8 *fcoe_encap,
73 u8 *vlan_tag_insert, u16 *vlan_tag, u8 *loopback)
74{
75 *address = le64_to_cpu(desc->address);
76 *length = le16_to_cpu(desc->length) & WQ_ENET_LEN_MASK;
77 *mss = (le16_to_cpu(desc->mss_loopback) >> WQ_ENET_MSS_SHIFT) &
78 WQ_ENET_MSS_MASK;
79 *loopback = (u8)((le16_to_cpu(desc->mss_loopback) >>
80 WQ_ENET_LOOPBACK_SHIFT) & 1);
81 *header_length = le16_to_cpu(desc->header_length_flags) &
82 WQ_ENET_HDRLEN_MASK;
83 *offload_mode = (u8)((le16_to_cpu(desc->header_length_flags) >>
84 WQ_ENET_HDRLEN_BITS) & WQ_ENET_FLAGS_OM_MASK);
85 *eop = (u8)((le16_to_cpu(desc->header_length_flags) >>
86 WQ_ENET_FLAGS_EOP_SHIFT) & 1);
87 *cq_entry = (u8)((le16_to_cpu(desc->header_length_flags) >>
88 WQ_ENET_FLAGS_CQ_ENTRY_SHIFT) & 1);
89 *fcoe_encap = (u8)((le16_to_cpu(desc->header_length_flags) >>
90 WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT) & 1);
91 *vlan_tag_insert = (u8)((le16_to_cpu(desc->header_length_flags) >>
92 WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT) & 1);
93 *vlan_tag = le16_to_cpu(desc->vlan_tag);
94}
95
96#endif /* _WQ_ENET_DESC_H_ */
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index babd4cc0cb25..36b1d1052ba1 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -69,7 +69,7 @@
69#define MPT2SAS_AUTHOR "LSI Corporation <DL-MPTFusionLinux@lsi.com>" 69#define MPT2SAS_AUTHOR "LSI Corporation <DL-MPTFusionLinux@lsi.com>"
70#define MPT2SAS_DESCRIPTION "LSI MPT Fusion SAS 2.0 Device Driver" 70#define MPT2SAS_DESCRIPTION "LSI MPT Fusion SAS 2.0 Device Driver"
71#define MPT2SAS_DRIVER_VERSION "01.100.02.00" 71#define MPT2SAS_DRIVER_VERSION "01.100.02.00"
72#define MPT2SAS_MAJOR_VERSION 00 72#define MPT2SAS_MAJOR_VERSION 01
73#define MPT2SAS_MINOR_VERSION 100 73#define MPT2SAS_MINOR_VERSION 100
74#define MPT2SAS_BUILD_VERSION 02 74#define MPT2SAS_BUILD_VERSION 02
75#define MPT2SAS_RELEASE_VERSION 00 75#define MPT2SAS_RELEASE_VERSION 00
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 6f51ca485f35..e2b50d8f57a8 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -425,6 +425,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
425 INIT_LIST_HEAD(&starget->devices); 425 INIT_LIST_HEAD(&starget->devices);
426 starget->state = STARGET_CREATED; 426 starget->state = STARGET_CREATED;
427 starget->scsi_level = SCSI_2; 427 starget->scsi_level = SCSI_2;
428 starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED;
428 retry: 429 retry:
429 spin_lock_irqsave(shost->host_lock, flags); 430 spin_lock_irqsave(shost->host_lock, flags);
430 431
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 094795455293..0a2ce7b6325c 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -357,7 +357,7 @@ int iscsi_session_chkready(struct iscsi_cls_session *session)
357 err = 0; 357 err = 0;
358 break; 358 break;
359 case ISCSI_SESSION_FAILED: 359 case ISCSI_SESSION_FAILED:
360 err = DID_TRANSPORT_DISRUPTED << 16; 360 err = DID_IMM_RETRY << 16;
361 break; 361 break;
362 case ISCSI_SESSION_FREE: 362 case ISCSI_SESSION_FREE:
363 err = DID_TRANSPORT_FAILFAST << 16; 363 err = DID_TRANSPORT_FAILFAST << 16;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index e1716f14cd47..91e316fe6522 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1065,6 +1065,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
1065 return blk_trace_setup(sdp->device->request_queue, 1065 return blk_trace_setup(sdp->device->request_queue,
1066 sdp->disk->disk_name, 1066 sdp->disk->disk_name,
1067 MKDEV(SCSI_GENERIC_MAJOR, sdp->index), 1067 MKDEV(SCSI_GENERIC_MAJOR, sdp->index),
1068 NULL,
1068 (char *)arg); 1069 (char *)arg);
1069 case BLKTRACESTART: 1070 case BLKTRACESTART:
1070 return blk_trace_startstop(sdp->device->request_queue, 1); 1071 return blk_trace_startstop(sdp->device->request_queue, 1);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index b4b39811b445..a0127e93ade0 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -137,6 +137,7 @@ struct uart_8250_port {
137 unsigned char mcr; 137 unsigned char mcr;
138 unsigned char mcr_mask; /* mask of user bits */ 138 unsigned char mcr_mask; /* mask of user bits */
139 unsigned char mcr_force; /* mask of forced bits */ 139 unsigned char mcr_force; /* mask of forced bits */
140 unsigned char cur_iotype; /* Running I/O type */
140 141
141 /* 142 /*
142 * Some bits in registers are cleared on a read, so they must 143 * Some bits in registers are cleared on a read, so they must
@@ -471,6 +472,7 @@ static void io_serial_out(struct uart_port *p, int offset, int value)
471 472
472static void set_io_from_upio(struct uart_port *p) 473static void set_io_from_upio(struct uart_port *p)
473{ 474{
475 struct uart_8250_port *up = (struct uart_8250_port *)p;
474 switch (p->iotype) { 476 switch (p->iotype) {
475 case UPIO_HUB6: 477 case UPIO_HUB6:
476 p->serial_in = hub6_serial_in; 478 p->serial_in = hub6_serial_in;
@@ -509,6 +511,8 @@ static void set_io_from_upio(struct uart_port *p)
509 p->serial_out = io_serial_out; 511 p->serial_out = io_serial_out;
510 break; 512 break;
511 } 513 }
514 /* Remember loaded iotype */
515 up->cur_iotype = p->iotype;
512} 516}
513 517
514static void 518static void
@@ -1937,6 +1941,9 @@ static int serial8250_startup(struct uart_port *port)
1937 up->capabilities = uart_config[up->port.type].flags; 1941 up->capabilities = uart_config[up->port.type].flags;
1938 up->mcr = 0; 1942 up->mcr = 0;
1939 1943
1944 if (up->port.iotype != up->cur_iotype)
1945 set_io_from_upio(port);
1946
1940 if (up->port.type == PORT_16C950) { 1947 if (up->port.type == PORT_16C950) {
1941 /* Wake up and initialize UART */ 1948 /* Wake up and initialize UART */
1942 up->acr = 0; 1949 up->acr = 0;
@@ -2563,6 +2570,9 @@ static void serial8250_config_port(struct uart_port *port, int flags)
2563 if (ret < 0) 2570 if (ret < 0)
2564 probeflags &= ~PROBE_RSA; 2571 probeflags &= ~PROBE_RSA;
2565 2572
2573 if (up->port.iotype != up->cur_iotype)
2574 set_io_from_upio(port);
2575
2566 if (flags & UART_CONFIG_TYPE) 2576 if (flags & UART_CONFIG_TYPE)
2567 autoconfig(up, probeflags); 2577 autoconfig(up, probeflags);
2568 if (up->port.type != PORT_UNKNOWN && flags & UART_CONFIG_IRQ) 2578 if (up->port.type != PORT_UNKNOWN && flags & UART_CONFIG_IRQ)
@@ -2671,6 +2681,11 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
2671{ 2681{
2672 int i; 2682 int i;
2673 2683
2684 for (i = 0; i < nr_uarts; i++) {
2685 struct uart_8250_port *up = &serial8250_ports[i];
2686 up->cur_iotype = 0xFF;
2687 }
2688
2674 serial8250_isa_init_ports(); 2689 serial8250_isa_init_ports();
2675 2690
2676 for (i = 0; i < nr_uarts; i++) { 2691 for (i = 0; i < nr_uarts; i++) {
diff --git a/drivers/serial/8250_gsc.c b/drivers/serial/8250_gsc.c
index 418b4fe9a0a1..33149d982e82 100644
--- a/drivers/serial/8250_gsc.c
+++ b/drivers/serial/8250_gsc.c
@@ -39,9 +39,9 @@ static int __init serial_init_chip(struct parisc_device *dev)
39 */ 39 */
40 if (parisc_parent(dev)->id.hw_type != HPHW_IOA) 40 if (parisc_parent(dev)->id.hw_type != HPHW_IOA)
41 printk(KERN_INFO 41 printk(KERN_INFO
42 "Serial: device 0x%lx not configured.\n" 42 "Serial: device 0x%llx not configured.\n"
43 "Enable support for Wax, Lasi, Asp or Dino.\n", 43 "Enable support for Wax, Lasi, Asp or Dino.\n",
44 dev->hpa.start); 44 (unsigned long long)dev->hpa.start);
45 return -ENODEV; 45 return -ENODEV;
46 } 46 }
47 47
diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index 6579e2be1dd1..a461b3b2c72d 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -1472,8 +1472,8 @@ static void icom_remove_adapter(struct icom_adapter *icom_adapter)
1472 1472
1473 free_irq(icom_adapter->pci_dev->irq, (void *) icom_adapter); 1473 free_irq(icom_adapter->pci_dev->irq, (void *) icom_adapter);
1474 iounmap(icom_adapter->base_addr); 1474 iounmap(icom_adapter->base_addr);
1475 icom_free_adapter(icom_adapter);
1476 pci_release_regions(icom_adapter->pci_dev); 1475 pci_release_regions(icom_adapter->pci_dev);
1476 icom_free_adapter(icom_adapter);
1477} 1477}
1478 1478
1479static void icom_kref_release(struct kref *kref) 1479static void icom_kref_release(struct kref *kref)
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 9f460b175c50..5f0be40dfdab 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -1031,6 +1031,8 @@ imx_console_setup(struct console *co, char *options)
1031 if (co->index == -1 || co->index >= ARRAY_SIZE(imx_ports)) 1031 if (co->index == -1 || co->index >= ARRAY_SIZE(imx_ports))
1032 co->index = 0; 1032 co->index = 0;
1033 sport = imx_ports[co->index]; 1033 sport = imx_ports[co->index];
1034 if(sport == NULL)
1035 return -ENODEV;
1034 1036
1035 if (options) 1037 if (options)
1036 uart_parse_options(options, &baud, &parity, &bits, &flow); 1038 uart_parse_options(options, &baud, &parity, &bits, &flow);
diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index 7f72f8ceaa6f..b3feb6198d57 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -988,7 +988,7 @@ mpc52xx_console_setup(struct console *co, char *options)
988 pr_debug("mpc52xx_console_setup co=%p, co->index=%i, options=%s\n", 988 pr_debug("mpc52xx_console_setup co=%p, co->index=%i, options=%s\n",
989 co, co->index, options); 989 co, co->index, options);
990 990
991 if ((co->index < 0) || (co->index > MPC52xx_PSC_MAXNUM)) { 991 if ((co->index < 0) || (co->index >= MPC52xx_PSC_MAXNUM)) {
992 pr_debug("PSC%x out of range\n", co->index); 992 pr_debug("PSC%x out of range\n", co->index);
993 return -EINVAL; 993 return -EINVAL;
994 } 994 }
diff --git a/drivers/ssb/embedded.c b/drivers/ssb/embedded.c
index 7dc3a6b41397..a0e0d246b592 100644
--- a/drivers/ssb/embedded.c
+++ b/drivers/ssb/embedded.c
@@ -29,6 +29,7 @@ int ssb_watchdog_timer_set(struct ssb_bus *bus, u32 ticks)
29 } 29 }
30 return -ENODEV; 30 return -ENODEV;
31} 31}
32EXPORT_SYMBOL(ssb_watchdog_timer_set);
32 33
33u32 ssb_gpio_in(struct ssb_bus *bus, u32 mask) 34u32 ssb_gpio_in(struct ssb_bus *bus, u32 mask)
34{ 35{
diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index 0716cdb44cd8..0a3dc5ece634 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_USB_MON) += mon/
11obj-$(CONFIG_PCI) += host/ 11obj-$(CONFIG_PCI) += host/
12obj-$(CONFIG_USB_EHCI_HCD) += host/ 12obj-$(CONFIG_USB_EHCI_HCD) += host/
13obj-$(CONFIG_USB_ISP116X_HCD) += host/ 13obj-$(CONFIG_USB_ISP116X_HCD) += host/
14obj-$(CONFIG_USB_ISP1760_HCD) += host/
15obj-$(CONFIG_USB_OHCI_HCD) += host/ 14obj-$(CONFIG_USB_OHCI_HCD) += host/
16obj-$(CONFIG_USB_UHCI_HCD) += host/ 15obj-$(CONFIG_USB_UHCI_HCD) += host/
17obj-$(CONFIG_USB_FHCI_HCD) += host/ 16obj-$(CONFIG_USB_FHCI_HCD) += host/
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 0a69c0977e3f..7a1164dd1d37 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1375,6 +1375,9 @@ static struct usb_device_id acm_ids[] = {
1375 { USB_DEVICE(0x0572, 0x1324), /* Conexant USB MODEM RD02-D400 */ 1375 { USB_DEVICE(0x0572, 0x1324), /* Conexant USB MODEM RD02-D400 */
1376 .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ 1376 .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
1377 }, 1377 },
1378 { USB_DEVICE(0x0572, 0x1328), /* Shiro / Aztech USB MODEM UM-3100 */
1379 .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
1380 },
1378 { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */ 1381 { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */
1379 }, 1382 },
1380 { USB_DEVICE(0x0572, 0x1329), /* Hummingbird huc56s (Conexant) */ 1383 { USB_DEVICE(0x0572, 0x1329), /* Hummingbird huc56s (Conexant) */
diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c
index 563d57275448..05c913cc3658 100644
--- a/drivers/usb/gadget/atmel_usba_udc.c
+++ b/drivers/usb/gadget/atmel_usba_udc.c
@@ -794,7 +794,8 @@ usba_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
794 if (ep->desc) { 794 if (ep->desc) {
795 list_add_tail(&req->queue, &ep->queue); 795 list_add_tail(&req->queue, &ep->queue);
796 796
797 if (ep->is_in || (ep_is_control(ep) 797 if ((!ep_is_control(ep) && ep->is_in) ||
798 (ep_is_control(ep)
798 && (ep->state == DATA_STAGE_IN 799 && (ep->state == DATA_STAGE_IN
799 || ep->state == STATUS_STAGE_IN))) 800 || ep->state == STATUS_STAGE_IN)))
800 usba_ep_writel(ep, CTL_ENB, USBA_TX_PK_RDY); 801 usba_ep_writel(ep, CTL_ENB, USBA_TX_PK_RDY);
@@ -1940,7 +1941,7 @@ static int __init usba_udc_probe(struct platform_device *pdev)
1940 usba_writel(udc, CTRL, USBA_DISABLE_MASK); 1941 usba_writel(udc, CTRL, USBA_DISABLE_MASK);
1941 clk_disable(pclk); 1942 clk_disable(pclk);
1942 1943
1943 usba_ep = kmalloc(sizeof(struct usba_ep) * pdata->num_ep, 1944 usba_ep = kzalloc(sizeof(struct usba_ep) * pdata->num_ep,
1944 GFP_KERNEL); 1945 GFP_KERNEL);
1945 if (!usba_ep) 1946 if (!usba_ep)
1946 goto err_alloc_ep; 1947 goto err_alloc_ep;
diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index cd07ea3f0c63..15438469f21a 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -1658,6 +1658,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
1658 u32 reg_base, or_reg, skip_reg; 1658 u32 reg_base, or_reg, skip_reg;
1659 unsigned long flags; 1659 unsigned long flags;
1660 struct ptd ptd; 1660 struct ptd ptd;
1661 packet_enqueue *pe;
1661 1662
1662 switch (usb_pipetype(urb->pipe)) { 1663 switch (usb_pipetype(urb->pipe)) {
1663 case PIPE_ISOCHRONOUS: 1664 case PIPE_ISOCHRONOUS:
@@ -1669,6 +1670,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
1669 reg_base = INT_REGS_OFFSET; 1670 reg_base = INT_REGS_OFFSET;
1670 or_reg = HC_INT_IRQ_MASK_OR_REG; 1671 or_reg = HC_INT_IRQ_MASK_OR_REG;
1671 skip_reg = HC_INT_PTD_SKIPMAP_REG; 1672 skip_reg = HC_INT_PTD_SKIPMAP_REG;
1673 pe = enqueue_an_INT_packet;
1672 break; 1674 break;
1673 1675
1674 default: 1676 default:
@@ -1676,6 +1678,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
1676 reg_base = ATL_REGS_OFFSET; 1678 reg_base = ATL_REGS_OFFSET;
1677 or_reg = HC_ATL_IRQ_MASK_OR_REG; 1679 or_reg = HC_ATL_IRQ_MASK_OR_REG;
1678 skip_reg = HC_ATL_PTD_SKIPMAP_REG; 1680 skip_reg = HC_ATL_PTD_SKIPMAP_REG;
1681 pe = enqueue_an_ATL_packet;
1679 break; 1682 break;
1680 } 1683 }
1681 1684
@@ -1687,6 +1690,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
1687 u32 skip_map; 1690 u32 skip_map;
1688 u32 or_map; 1691 u32 or_map;
1689 struct isp1760_qtd *qtd; 1692 struct isp1760_qtd *qtd;
1693 struct isp1760_qh *qh = ints->qh;
1690 1694
1691 skip_map = isp1760_readl(hcd->regs + skip_reg); 1695 skip_map = isp1760_readl(hcd->regs + skip_reg);
1692 skip_map |= 1 << i; 1696 skip_map |= 1 << i;
@@ -1699,8 +1703,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
1699 priv_write_copy(priv, (u32 *)&ptd, hcd->regs + reg_base 1703 priv_write_copy(priv, (u32 *)&ptd, hcd->regs + reg_base
1700 + i * sizeof(ptd), sizeof(ptd)); 1704 + i * sizeof(ptd), sizeof(ptd));
1701 qtd = ints->qtd; 1705 qtd = ints->qtd;
1702 1706 qtd = clean_up_qtdlist(qtd);
1703 clean_up_qtdlist(qtd);
1704 1707
1705 free_mem(priv, ints->payload); 1708 free_mem(priv, ints->payload);
1706 1709
@@ -1711,7 +1714,24 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
1711 ints->payload = 0; 1714 ints->payload = 0;
1712 1715
1713 isp1760_urb_done(priv, urb, status); 1716 isp1760_urb_done(priv, urb, status);
1717 if (qtd)
1718 pe(hcd, qh, qtd);
1714 break; 1719 break;
1720
1721 } else if (ints->qtd) {
1722 struct isp1760_qtd *qtd, *prev_qtd = ints->qtd;
1723
1724 for (qtd = ints->qtd->hw_next; qtd; qtd = qtd->hw_next) {
1725 if (qtd->urb == urb) {
1726 prev_qtd->hw_next = clean_up_qtdlist(qtd);
1727 isp1760_urb_done(priv, urb, status);
1728 break;
1729 }
1730 prev_qtd = qtd;
1731 }
1732 /* we found the urb before the end of the list */
1733 if (qtd)
1734 break;
1715 } 1735 }
1716 ints++; 1736 ints++;
1717 } 1737 }
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 0a566eea49c0..f331e2bde88a 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -974,6 +974,7 @@ int usb_serial_probe(struct usb_interface *interface,
974 if (retval > 0) { 974 if (retval > 0) {
975 /* quietly accept this device, but don't bind to a 975 /* quietly accept this device, but don't bind to a
976 serial port as it's about to disappear */ 976 serial port as it's about to disappear */
977 serial->num_ports = 0;
977 goto exit; 978 goto exit;
978 } 979 }
979 } 980 }
diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c
index 9a577a800db5..2fb63f6ea2f1 100644
--- a/drivers/video/atmel_lcdfb.c
+++ b/drivers/video/atmel_lcdfb.c
@@ -29,14 +29,8 @@
29 29
30/* configurable parameters */ 30/* configurable parameters */
31#define ATMEL_LCDC_CVAL_DEFAULT 0xc8 31#define ATMEL_LCDC_CVAL_DEFAULT 0xc8
32#define ATMEL_LCDC_DMA_BURST_LEN 8 32#define ATMEL_LCDC_DMA_BURST_LEN 8 /* words */
33 33#define ATMEL_LCDC_FIFO_SIZE 512 /* words */
34#if defined(CONFIG_ARCH_AT91SAM9263) || defined(CONFIG_ARCH_AT91CAP9) || \
35 defined(CONFIG_ARCH_AT91SAM9RL)
36#define ATMEL_LCDC_FIFO_SIZE 2048
37#else
38#define ATMEL_LCDC_FIFO_SIZE 512
39#endif
40 34
41#if defined(CONFIG_ARCH_AT91) 35#if defined(CONFIG_ARCH_AT91)
42#define ATMEL_LCDFB_FBINFO_DEFAULT (FBINFO_DEFAULT \ 36#define ATMEL_LCDFB_FBINFO_DEFAULT (FBINFO_DEFAULT \
diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c
index 5e9c6302433b..d3a568e6b169 100644
--- a/drivers/video/s3c-fb.c
+++ b/drivers/video/s3c-fb.c
@@ -947,7 +947,8 @@ static int __devexit s3c_fb_remove(struct platform_device *pdev)
947 int win; 947 int win;
948 948
949 for (win = 0; win <= S3C_FB_MAX_WIN; win++) 949 for (win = 0; win <= S3C_FB_MAX_WIN; win++)
950 s3c_fb_release_win(sfb, sfb->windows[win]); 950 if (sfb->windows[win])
951 s3c_fb_release_win(sfb, sfb->windows[win]);
951 952
952 iounmap(sfb->regs); 953 iounmap(sfb->regs);
953 954
@@ -985,11 +986,20 @@ static int s3c_fb_suspend(struct platform_device *pdev, pm_message_t state)
985static int s3c_fb_resume(struct platform_device *pdev) 986static int s3c_fb_resume(struct platform_device *pdev)
986{ 987{
987 struct s3c_fb *sfb = platform_get_drvdata(pdev); 988 struct s3c_fb *sfb = platform_get_drvdata(pdev);
989 struct s3c_fb_platdata *pd = sfb->pdata;
988 struct s3c_fb_win *win; 990 struct s3c_fb_win *win;
989 int win_no; 991 int win_no;
990 992
991 clk_enable(sfb->bus_clk); 993 clk_enable(sfb->bus_clk);
992 994
995 /* setup registers */
996 writel(pd->vidcon1, sfb->regs + VIDCON1);
997
998 /* zero all windows before we do anything */
999 for (win_no = 0; win_no < S3C_FB_MAX_WIN; win_no++)
1000 s3c_fb_clear_win(sfb, win_no);
1001
1002 /* restore framebuffers */
993 for (win_no = 0; win_no < S3C_FB_MAX_WIN; win_no++) { 1003 for (win_no = 0; win_no < S3C_FB_MAX_WIN; win_no++) {
994 win = sfb->windows[win_no]; 1004 win = sfb->windows[win_no];
995 if (!win) 1005 if (!win)
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 8ac9cddac575..cab100acf983 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES
18 secure, but slightly less efficient. 18 secure, but slightly less efficient.
19 If in doubt, say yes. 19 If in doubt, say yes.
20 20
21config XEN_DEV_EVTCHN
22 tristate "Xen /dev/xen/evtchn device"
23 depends on XEN
24 default y
25 help
26 The evtchn driver allows a userspace process to triger event
27 channels and to receive notification of an event channel
28 firing.
29 If in doubt, say yes.
30
21config XENFS 31config XENFS
22 tristate "Xen filesystem" 32 tristate "Xen filesystem"
23 depends on XEN 33 depends on XEN
@@ -41,3 +51,13 @@ config XEN_COMPAT_XENFS
41 a xen platform. 51 a xen platform.
42 If in doubt, say yes. 52 If in doubt, say yes.
43 53
54config XEN_SYS_HYPERVISOR
55 bool "Create xen entries under /sys/hypervisor"
56 depends on XEN && SYSFS
57 select SYS_HYPERVISOR
58 default y
59 help
60 Create entries under /sys/hypervisor describing the Xen
61 hypervisor environment. When running native or in another
62 virtual environment, /sys/hypervisor will still be present,
63 but will have no xen contents. \ No newline at end of file
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ff8accc9e103..ec2a39b1e26f 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,4 +4,6 @@ obj-y += xenbus/
4obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 4obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
5obj-$(CONFIG_XEN_XENCOMM) += xencomm.o 5obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
6obj-$(CONFIG_XEN_BALLOON) += balloon.o 6obj-$(CONFIG_XEN_BALLOON) += balloon.o
7obj-$(CONFIG_XENFS) += xenfs/ \ No newline at end of file 7obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
8obj-$(CONFIG_XENFS) += xenfs/
9obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o \ No newline at end of file
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30963af5dba0..891d2e90753a 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq)
151 return info_for_irq(irq)->evtchn; 151 return info_for_irq(irq)->evtchn;
152} 152}
153 153
154unsigned irq_from_evtchn(unsigned int evtchn)
155{
156 return evtchn_to_irq[evtchn];
157}
158EXPORT_SYMBOL_GPL(irq_from_evtchn);
159
154static enum ipi_vector ipi_from_irq(unsigned irq) 160static enum ipi_vector ipi_from_irq(unsigned irq)
155{ 161{
156 struct irq_info *info = info_for_irq(irq); 162 struct irq_info *info = info_for_irq(irq);
@@ -335,7 +341,7 @@ static int find_unbound_irq(void)
335 if (irq == nr_irqs) 341 if (irq == nr_irqs)
336 panic("No available IRQ to bind to: increase nr_irqs!\n"); 342 panic("No available IRQ to bind to: increase nr_irqs!\n");
337 343
338 desc = irq_to_desc_alloc_cpu(irq, 0); 344 desc = irq_to_desc_alloc_node(irq, 0);
339 if (WARN_ON(desc == NULL)) 345 if (WARN_ON(desc == NULL))
340 return -1; 346 return -1;
341 347
@@ -688,13 +694,13 @@ void rebind_evtchn_irq(int evtchn, int irq)
688} 694}
689 695
690/* Rebind an evtchn so that it gets delivered to a specific cpu */ 696/* Rebind an evtchn so that it gets delivered to a specific cpu */
691static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) 697static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
692{ 698{
693 struct evtchn_bind_vcpu bind_vcpu; 699 struct evtchn_bind_vcpu bind_vcpu;
694 int evtchn = evtchn_from_irq(irq); 700 int evtchn = evtchn_from_irq(irq);
695 701
696 if (!VALID_EVTCHN(evtchn)) 702 if (!VALID_EVTCHN(evtchn))
697 return; 703 return -1;
698 704
699 /* Send future instances of this interrupt to other vcpu. */ 705 /* Send future instances of this interrupt to other vcpu. */
700 bind_vcpu.port = evtchn; 706 bind_vcpu.port = evtchn;
@@ -707,13 +713,15 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
707 */ 713 */
708 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) 714 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
709 bind_evtchn_to_cpu(evtchn, tcpu); 715 bind_evtchn_to_cpu(evtchn, tcpu);
710}
711 716
717 return 0;
718}
712 719
713static void set_affinity_irq(unsigned irq, const struct cpumask *dest) 720static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
714{ 721{
715 unsigned tcpu = cpumask_first(dest); 722 unsigned tcpu = cpumask_first(dest);
716 rebind_irq_to_cpu(irq, tcpu); 723
724 return rebind_irq_to_cpu(irq, tcpu);
717} 725}
718 726
719int resend_irq_on_evtchn(unsigned int irq) 727int resend_irq_on_evtchn(unsigned int irq)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
new file mode 100644
index 000000000000..af031950f9b1
--- /dev/null
+++ b/drivers/xen/evtchn.c
@@ -0,0 +1,507 @@
1/******************************************************************************
2 * evtchn.c
3 *
4 * Driver for receiving and demuxing event-channel signals.
5 *
6 * Copyright (c) 2004-2005, K A Fraser
7 * Multi-process extensions Copyright (c) 2004, Steven Smith
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#include <linux/module.h>
35#include <linux/kernel.h>
36#include <linux/sched.h>
37#include <linux/slab.h>
38#include <linux/string.h>
39#include <linux/errno.h>
40#include <linux/fs.h>
41#include <linux/errno.h>
42#include <linux/miscdevice.h>
43#include <linux/major.h>
44#include <linux/proc_fs.h>
45#include <linux/stat.h>
46#include <linux/poll.h>
47#include <linux/irq.h>
48#include <linux/init.h>
49#include <linux/gfp.h>
50#include <linux/mutex.h>
51#include <linux/cpu.h>
52#include <xen/events.h>
53#include <xen/evtchn.h>
54#include <asm/xen/hypervisor.h>
55
56struct per_user_data {
57 struct mutex bind_mutex; /* serialize bind/unbind operations */
58
59 /* Notification ring, accessed via /dev/xen/evtchn. */
60#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
61#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
62 evtchn_port_t *ring;
63 unsigned int ring_cons, ring_prod, ring_overflow;
64 struct mutex ring_cons_mutex; /* protect against concurrent readers */
65
66 /* Processes wait on this queue when ring is empty. */
67 wait_queue_head_t evtchn_wait;
68 struct fasync_struct *evtchn_async_queue;
69 const char *name;
70};
71
72/* Who's bound to each port? */
73static struct per_user_data *port_user[NR_EVENT_CHANNELS];
74static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
75
76irqreturn_t evtchn_interrupt(int irq, void *data)
77{
78 unsigned int port = (unsigned long)data;
79 struct per_user_data *u;
80
81 spin_lock(&port_user_lock);
82
83 u = port_user[port];
84
85 disable_irq_nosync(irq);
86
87 if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
88 u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
89 wmb(); /* Ensure ring contents visible */
90 if (u->ring_cons == u->ring_prod++) {
91 wake_up_interruptible(&u->evtchn_wait);
92 kill_fasync(&u->evtchn_async_queue,
93 SIGIO, POLL_IN);
94 }
95 } else {
96 u->ring_overflow = 1;
97 }
98
99 spin_unlock(&port_user_lock);
100
101 return IRQ_HANDLED;
102}
103
104static ssize_t evtchn_read(struct file *file, char __user *buf,
105 size_t count, loff_t *ppos)
106{
107 int rc;
108 unsigned int c, p, bytes1 = 0, bytes2 = 0;
109 struct per_user_data *u = file->private_data;
110
111 /* Whole number of ports. */
112 count &= ~(sizeof(evtchn_port_t)-1);
113
114 if (count == 0)
115 return 0;
116
117 if (count > PAGE_SIZE)
118 count = PAGE_SIZE;
119
120 for (;;) {
121 mutex_lock(&u->ring_cons_mutex);
122
123 rc = -EFBIG;
124 if (u->ring_overflow)
125 goto unlock_out;
126
127 c = u->ring_cons;
128 p = u->ring_prod;
129 if (c != p)
130 break;
131
132 mutex_unlock(&u->ring_cons_mutex);
133
134 if (file->f_flags & O_NONBLOCK)
135 return -EAGAIN;
136
137 rc = wait_event_interruptible(u->evtchn_wait,
138 u->ring_cons != u->ring_prod);
139 if (rc)
140 return rc;
141 }
142
143 /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
144 if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
145 bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
146 sizeof(evtchn_port_t);
147 bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
148 } else {
149 bytes1 = (p - c) * sizeof(evtchn_port_t);
150 bytes2 = 0;
151 }
152
153 /* Truncate chunks according to caller's maximum byte count. */
154 if (bytes1 > count) {
155 bytes1 = count;
156 bytes2 = 0;
157 } else if ((bytes1 + bytes2) > count) {
158 bytes2 = count - bytes1;
159 }
160
161 rc = -EFAULT;
162 rmb(); /* Ensure that we see the port before we copy it. */
163 if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
164 ((bytes2 != 0) &&
165 copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
166 goto unlock_out;
167
168 u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
169 rc = bytes1 + bytes2;
170
171 unlock_out:
172 mutex_unlock(&u->ring_cons_mutex);
173 return rc;
174}
175
176static ssize_t evtchn_write(struct file *file, const char __user *buf,
177 size_t count, loff_t *ppos)
178{
179 int rc, i;
180 evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
181 struct per_user_data *u = file->private_data;
182
183 if (kbuf == NULL)
184 return -ENOMEM;
185
186 /* Whole number of ports. */
187 count &= ~(sizeof(evtchn_port_t)-1);
188
189 rc = 0;
190 if (count == 0)
191 goto out;
192
193 if (count > PAGE_SIZE)
194 count = PAGE_SIZE;
195
196 rc = -EFAULT;
197 if (copy_from_user(kbuf, buf, count) != 0)
198 goto out;
199
200 spin_lock_irq(&port_user_lock);
201 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
202 if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
203 enable_irq(irq_from_evtchn(kbuf[i]));
204 spin_unlock_irq(&port_user_lock);
205
206 rc = count;
207
208 out:
209 free_page((unsigned long)kbuf);
210 return rc;
211}
212
213static int evtchn_bind_to_user(struct per_user_data *u, int port)
214{
215 int rc = 0;
216
217 /*
218 * Ports are never reused, so every caller should pass in a
219 * unique port.
220 *
221 * (Locking not necessary because we haven't registered the
222 * interrupt handler yet, and our caller has already
223 * serialized bind operations.)
224 */
225 BUG_ON(port_user[port] != NULL);
226 port_user[port] = u;
227
228 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
229 u->name, (void *)(unsigned long)port);
230 if (rc >= 0)
231 rc = 0;
232
233 return rc;
234}
235
236static void evtchn_unbind_from_user(struct per_user_data *u, int port)
237{
238 int irq = irq_from_evtchn(port);
239
240 unbind_from_irqhandler(irq, (void *)(unsigned long)port);
241
242 /* make sure we unbind the irq handler before clearing the port */
243 barrier();
244
245 port_user[port] = NULL;
246}
247
248static long evtchn_ioctl(struct file *file,
249 unsigned int cmd, unsigned long arg)
250{
251 int rc;
252 struct per_user_data *u = file->private_data;
253 void __user *uarg = (void __user *) arg;
254
255 /* Prevent bind from racing with unbind */
256 mutex_lock(&u->bind_mutex);
257
258 switch (cmd) {
259 case IOCTL_EVTCHN_BIND_VIRQ: {
260 struct ioctl_evtchn_bind_virq bind;
261 struct evtchn_bind_virq bind_virq;
262
263 rc = -EFAULT;
264 if (copy_from_user(&bind, uarg, sizeof(bind)))
265 break;
266
267 bind_virq.virq = bind.virq;
268 bind_virq.vcpu = 0;
269 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
270 &bind_virq);
271 if (rc != 0)
272 break;
273
274 rc = evtchn_bind_to_user(u, bind_virq.port);
275 if (rc == 0)
276 rc = bind_virq.port;
277 break;
278 }
279
280 case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
281 struct ioctl_evtchn_bind_interdomain bind;
282 struct evtchn_bind_interdomain bind_interdomain;
283
284 rc = -EFAULT;
285 if (copy_from_user(&bind, uarg, sizeof(bind)))
286 break;
287
288 bind_interdomain.remote_dom = bind.remote_domain;
289 bind_interdomain.remote_port = bind.remote_port;
290 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
291 &bind_interdomain);
292 if (rc != 0)
293 break;
294
295 rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
296 if (rc == 0)
297 rc = bind_interdomain.local_port;
298 break;
299 }
300
301 case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
302 struct ioctl_evtchn_bind_unbound_port bind;
303 struct evtchn_alloc_unbound alloc_unbound;
304
305 rc = -EFAULT;
306 if (copy_from_user(&bind, uarg, sizeof(bind)))
307 break;
308
309 alloc_unbound.dom = DOMID_SELF;
310 alloc_unbound.remote_dom = bind.remote_domain;
311 rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
312 &alloc_unbound);
313 if (rc != 0)
314 break;
315
316 rc = evtchn_bind_to_user(u, alloc_unbound.port);
317 if (rc == 0)
318 rc = alloc_unbound.port;
319 break;
320 }
321
322 case IOCTL_EVTCHN_UNBIND: {
323 struct ioctl_evtchn_unbind unbind;
324
325 rc = -EFAULT;
326 if (copy_from_user(&unbind, uarg, sizeof(unbind)))
327 break;
328
329 rc = -EINVAL;
330 if (unbind.port >= NR_EVENT_CHANNELS)
331 break;
332
333 spin_lock_irq(&port_user_lock);
334
335 rc = -ENOTCONN;
336 if (port_user[unbind.port] != u) {
337 spin_unlock_irq(&port_user_lock);
338 break;
339 }
340
341 evtchn_unbind_from_user(u, unbind.port);
342
343 spin_unlock_irq(&port_user_lock);
344
345 rc = 0;
346 break;
347 }
348
349 case IOCTL_EVTCHN_NOTIFY: {
350 struct ioctl_evtchn_notify notify;
351
352 rc = -EFAULT;
353 if (copy_from_user(&notify, uarg, sizeof(notify)))
354 break;
355
356 if (notify.port >= NR_EVENT_CHANNELS) {
357 rc = -EINVAL;
358 } else if (port_user[notify.port] != u) {
359 rc = -ENOTCONN;
360 } else {
361 notify_remote_via_evtchn(notify.port);
362 rc = 0;
363 }
364 break;
365 }
366
367 case IOCTL_EVTCHN_RESET: {
368 /* Initialise the ring to empty. Clear errors. */
369 mutex_lock(&u->ring_cons_mutex);
370 spin_lock_irq(&port_user_lock);
371 u->ring_cons = u->ring_prod = u->ring_overflow = 0;
372 spin_unlock_irq(&port_user_lock);
373 mutex_unlock(&u->ring_cons_mutex);
374 rc = 0;
375 break;
376 }
377
378 default:
379 rc = -ENOSYS;
380 break;
381 }
382 mutex_unlock(&u->bind_mutex);
383
384 return rc;
385}
386
387static unsigned int evtchn_poll(struct file *file, poll_table *wait)
388{
389 unsigned int mask = POLLOUT | POLLWRNORM;
390 struct per_user_data *u = file->private_data;
391
392 poll_wait(file, &u->evtchn_wait, wait);
393 if (u->ring_cons != u->ring_prod)
394 mask |= POLLIN | POLLRDNORM;
395 if (u->ring_overflow)
396 mask = POLLERR;
397 return mask;
398}
399
400static int evtchn_fasync(int fd, struct file *filp, int on)
401{
402 struct per_user_data *u = filp->private_data;
403 return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
404}
405
406static int evtchn_open(struct inode *inode, struct file *filp)
407{
408 struct per_user_data *u;
409
410 u = kzalloc(sizeof(*u), GFP_KERNEL);
411 if (u == NULL)
412 return -ENOMEM;
413
414 u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
415 if (u->name == NULL) {
416 kfree(u);
417 return -ENOMEM;
418 }
419
420 init_waitqueue_head(&u->evtchn_wait);
421
422 u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
423 if (u->ring == NULL) {
424 kfree(u->name);
425 kfree(u);
426 return -ENOMEM;
427 }
428
429 mutex_init(&u->bind_mutex);
430 mutex_init(&u->ring_cons_mutex);
431
432 filp->private_data = u;
433
434 return 0;
435}
436
437static int evtchn_release(struct inode *inode, struct file *filp)
438{
439 int i;
440 struct per_user_data *u = filp->private_data;
441
442 spin_lock_irq(&port_user_lock);
443
444 free_page((unsigned long)u->ring);
445
446 for (i = 0; i < NR_EVENT_CHANNELS; i++) {
447 if (port_user[i] != u)
448 continue;
449
450 evtchn_unbind_from_user(port_user[i], i);
451 }
452
453 spin_unlock_irq(&port_user_lock);
454
455 kfree(u->name);
456 kfree(u);
457
458 return 0;
459}
460
461static const struct file_operations evtchn_fops = {
462 .owner = THIS_MODULE,
463 .read = evtchn_read,
464 .write = evtchn_write,
465 .unlocked_ioctl = evtchn_ioctl,
466 .poll = evtchn_poll,
467 .fasync = evtchn_fasync,
468 .open = evtchn_open,
469 .release = evtchn_release,
470};
471
472static struct miscdevice evtchn_miscdev = {
473 .minor = MISC_DYNAMIC_MINOR,
474 .name = "evtchn",
475 .fops = &evtchn_fops,
476};
477static int __init evtchn_init(void)
478{
479 int err;
480
481 if (!xen_domain())
482 return -ENODEV;
483
484 spin_lock_init(&port_user_lock);
485 memset(port_user, 0, sizeof(port_user));
486
487 /* Create '/dev/misc/evtchn'. */
488 err = misc_register(&evtchn_miscdev);
489 if (err != 0) {
490 printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
491 return err;
492 }
493
494 printk(KERN_INFO "Event-channel device installed.\n");
495
496 return 0;
497}
498
499static void __exit evtchn_cleanup(void)
500{
501 misc_deregister(&evtchn_miscdev);
502}
503
504module_init(evtchn_init);
505module_exit(evtchn_cleanup);
506
507MODULE_LICENSE("GPL");
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 4b5b84837ee1..fddc2025dece 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -98,9 +98,8 @@ static void do_suspend(void)
98 goto out; 98 goto out;
99 } 99 }
100 100
101 printk("suspending xenbus...\n"); 101 printk(KERN_DEBUG "suspending xenstore...\n");
102 /* XXX use normal device tree? */ 102 xs_suspend();
103 xenbus_suspend();
104 103
105 err = device_power_down(PMSG_SUSPEND); 104 err = device_power_down(PMSG_SUSPEND);
106 if (err) { 105 if (err) {
@@ -116,9 +115,9 @@ static void do_suspend(void)
116 115
117 if (!cancelled) { 116 if (!cancelled) {
118 xen_arch_resume(); 117 xen_arch_resume();
119 xenbus_resume(); 118 xs_resume();
120 } else 119 } else
121 xenbus_suspend_cancel(); 120 xs_suspend_cancel();
122 121
123 device_power_up(PMSG_RESUME); 122 device_power_up(PMSG_RESUME);
124 123
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
new file mode 100644
index 000000000000..88a60e03ccf0
--- /dev/null
+++ b/drivers/xen/sys-hypervisor.c
@@ -0,0 +1,445 @@
1/*
2 * copyright (c) 2006 IBM Corporation
3 * Authored by: Mike D. Day <ncmike@us.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <linux/kobject.h>
13
14#include <asm/xen/hypervisor.h>
15#include <asm/xen/hypercall.h>
16
17#include <xen/xenbus.h>
18#include <xen/interface/xen.h>
19#include <xen/interface/version.h>
20
21#define HYPERVISOR_ATTR_RO(_name) \
22static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
23
24#define HYPERVISOR_ATTR_RW(_name) \
25static struct hyp_sysfs_attr _name##_attr = \
26 __ATTR(_name, 0644, _name##_show, _name##_store)
27
28struct hyp_sysfs_attr {
29 struct attribute attr;
30 ssize_t (*show)(struct hyp_sysfs_attr *, char *);
31 ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
32 void *hyp_attr_data;
33};
34
35static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
36{
37 return sprintf(buffer, "xen\n");
38}
39
40HYPERVISOR_ATTR_RO(type);
41
42static int __init xen_sysfs_type_init(void)
43{
44 return sysfs_create_file(hypervisor_kobj, &type_attr.attr);
45}
46
47static void xen_sysfs_type_destroy(void)
48{
49 sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
50}
51
52/* xen version attributes */
53static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
54{
55 int version = HYPERVISOR_xen_version(XENVER_version, NULL);
56 if (version)
57 return sprintf(buffer, "%d\n", version >> 16);
58 return -ENODEV;
59}
60
61HYPERVISOR_ATTR_RO(major);
62
63static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
64{
65 int version = HYPERVISOR_xen_version(XENVER_version, NULL);
66 if (version)
67 return sprintf(buffer, "%d\n", version & 0xff);
68 return -ENODEV;
69}
70
71HYPERVISOR_ATTR_RO(minor);
72
73static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
74{
75 int ret = -ENOMEM;
76 char *extra;
77
78 extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
79 if (extra) {
80 ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
81 if (!ret)
82 ret = sprintf(buffer, "%s\n", extra);
83 kfree(extra);
84 }
85
86 return ret;
87}
88
89HYPERVISOR_ATTR_RO(extra);
90
91static struct attribute *version_attrs[] = {
92 &major_attr.attr,
93 &minor_attr.attr,
94 &extra_attr.attr,
95 NULL
96};
97
98static struct attribute_group version_group = {
99 .name = "version",
100 .attrs = version_attrs,
101};
102
103static int __init xen_sysfs_version_init(void)
104{
105 return sysfs_create_group(hypervisor_kobj, &version_group);
106}
107
108static void xen_sysfs_version_destroy(void)
109{
110 sysfs_remove_group(hypervisor_kobj, &version_group);
111}
112
113/* UUID */
114
115static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
116{
117 char *vm, *val;
118 int ret;
119 extern int xenstored_ready;
120
121 if (!xenstored_ready)
122 return -EBUSY;
123
124 vm = xenbus_read(XBT_NIL, "vm", "", NULL);
125 if (IS_ERR(vm))
126 return PTR_ERR(vm);
127 val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
128 kfree(vm);
129 if (IS_ERR(val))
130 return PTR_ERR(val);
131 ret = sprintf(buffer, "%s\n", val);
132 kfree(val);
133 return ret;
134}
135
136HYPERVISOR_ATTR_RO(uuid);
137
138static int __init xen_sysfs_uuid_init(void)
139{
140 return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr);
141}
142
143static void xen_sysfs_uuid_destroy(void)
144{
145 sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
146}
147
148/* xen compilation attributes */
149
150static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
151{
152 int ret = -ENOMEM;
153 struct xen_compile_info *info;
154
155 info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
156 if (info) {
157 ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
158 if (!ret)
159 ret = sprintf(buffer, "%s\n", info->compiler);
160 kfree(info);
161 }
162
163 return ret;
164}
165
166HYPERVISOR_ATTR_RO(compiler);
167
168static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
169{
170 int ret = -ENOMEM;
171 struct xen_compile_info *info;
172
173 info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
174 if (info) {
175 ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
176 if (!ret)
177 ret = sprintf(buffer, "%s\n", info->compile_by);
178 kfree(info);
179 }
180
181 return ret;
182}
183
184HYPERVISOR_ATTR_RO(compiled_by);
185
186static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
187{
188 int ret = -ENOMEM;
189 struct xen_compile_info *info;
190
191 info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
192 if (info) {
193 ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
194 if (!ret)
195 ret = sprintf(buffer, "%s\n", info->compile_date);
196 kfree(info);
197 }
198
199 return ret;
200}
201
202HYPERVISOR_ATTR_RO(compile_date);
203
204static struct attribute *xen_compile_attrs[] = {
205 &compiler_attr.attr,
206 &compiled_by_attr.attr,
207 &compile_date_attr.attr,
208 NULL
209};
210
211static struct attribute_group xen_compilation_group = {
212 .name = "compilation",
213 .attrs = xen_compile_attrs,
214};
215
216int __init static xen_compilation_init(void)
217{
218 return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
219}
220
221static void xen_compilation_destroy(void)
222{
223 sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
224}
225
226/* xen properties info */
227
228static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
229{
230 int ret = -ENOMEM;
231 char *caps;
232
233 caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
234 if (caps) {
235 ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
236 if (!ret)
237 ret = sprintf(buffer, "%s\n", caps);
238 kfree(caps);
239 }
240
241 return ret;
242}
243
244HYPERVISOR_ATTR_RO(capabilities);
245
246static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
247{
248 int ret = -ENOMEM;
249 char *cset;
250
251 cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
252 if (cset) {
253 ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
254 if (!ret)
255 ret = sprintf(buffer, "%s\n", cset);
256 kfree(cset);
257 }
258
259 return ret;
260}
261
262HYPERVISOR_ATTR_RO(changeset);
263
264static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
265{
266 int ret = -ENOMEM;
267 struct xen_platform_parameters *parms;
268
269 parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
270 if (parms) {
271 ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
272 parms);
273 if (!ret)
274 ret = sprintf(buffer, "%lx\n", parms->virt_start);
275 kfree(parms);
276 }
277
278 return ret;
279}
280
281HYPERVISOR_ATTR_RO(virtual_start);
282
283static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
284{
285 int ret;
286
287 ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
288 if (ret > 0)
289 ret = sprintf(buffer, "%x\n", ret);
290
291 return ret;
292}
293
294HYPERVISOR_ATTR_RO(pagesize);
295
296static ssize_t xen_feature_show(int index, char *buffer)
297{
298 ssize_t ret;
299 struct xen_feature_info info;
300
301 info.submap_idx = index;
302 ret = HYPERVISOR_xen_version(XENVER_get_features, &info);
303 if (!ret)
304 ret = sprintf(buffer, "%08x", info.submap);
305
306 return ret;
307}
308
309static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer)
310{
311 ssize_t len;
312 int i;
313
314 len = 0;
315 for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) {
316 int ret = xen_feature_show(i, buffer + len);
317 if (ret < 0) {
318 if (len == 0)
319 len = ret;
320 break;
321 }
322 len += ret;
323 }
324 if (len > 0)
325 buffer[len++] = '\n';
326
327 return len;
328}
329
330HYPERVISOR_ATTR_RO(features);
331
332static struct attribute *xen_properties_attrs[] = {
333 &capabilities_attr.attr,
334 &changeset_attr.attr,
335 &virtual_start_attr.attr,
336 &pagesize_attr.attr,
337 &features_attr.attr,
338 NULL
339};
340
341static struct attribute_group xen_properties_group = {
342 .name = "properties",
343 .attrs = xen_properties_attrs,
344};
345
346static int __init xen_properties_init(void)
347{
348 return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
349}
350
351static void xen_properties_destroy(void)
352{
353 sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
354}
355
356static int __init hyper_sysfs_init(void)
357{
358 int ret;
359
360 if (!xen_domain())
361 return -ENODEV;
362
363 ret = xen_sysfs_type_init();
364 if (ret)
365 goto out;
366 ret = xen_sysfs_version_init();
367 if (ret)
368 goto version_out;
369 ret = xen_compilation_init();
370 if (ret)
371 goto comp_out;
372 ret = xen_sysfs_uuid_init();
373 if (ret)
374 goto uuid_out;
375 ret = xen_properties_init();
376 if (ret)
377 goto prop_out;
378
379 goto out;
380
381prop_out:
382 xen_sysfs_uuid_destroy();
383uuid_out:
384 xen_compilation_destroy();
385comp_out:
386 xen_sysfs_version_destroy();
387version_out:
388 xen_sysfs_type_destroy();
389out:
390 return ret;
391}
392
393static void __exit hyper_sysfs_exit(void)
394{
395 xen_properties_destroy();
396 xen_compilation_destroy();
397 xen_sysfs_uuid_destroy();
398 xen_sysfs_version_destroy();
399 xen_sysfs_type_destroy();
400
401}
402module_init(hyper_sysfs_init);
403module_exit(hyper_sysfs_exit);
404
405static ssize_t hyp_sysfs_show(struct kobject *kobj,
406 struct attribute *attr,
407 char *buffer)
408{
409 struct hyp_sysfs_attr *hyp_attr;
410 hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
411 if (hyp_attr->show)
412 return hyp_attr->show(hyp_attr, buffer);
413 return 0;
414}
415
416static ssize_t hyp_sysfs_store(struct kobject *kobj,
417 struct attribute *attr,
418 const char *buffer,
419 size_t len)
420{
421 struct hyp_sysfs_attr *hyp_attr;
422 hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
423 if (hyp_attr->store)
424 return hyp_attr->store(hyp_attr, buffer, len);
425 return 0;
426}
427
428static struct sysfs_ops hyp_sysfs_ops = {
429 .show = hyp_sysfs_show,
430 .store = hyp_sysfs_store,
431};
432
433static struct kobj_type hyp_sysfs_kobj_type = {
434 .sysfs_ops = &hyp_sysfs_ops,
435};
436
437static int __init hypervisor_subsys_init(void)
438{
439 if (!xen_domain())
440 return -ENODEV;
441
442 hypervisor_kobj->ktype = &hyp_sysfs_kobj_type;
443 return 0;
444}
445device_initcall(hypervisor_subsys_init);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 773d1cf23283..d42e25d5968d 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name);
71 71
72static void xenbus_dev_shutdown(struct device *_dev); 72static void xenbus_dev_shutdown(struct device *_dev);
73 73
74static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
75static int xenbus_dev_resume(struct device *dev);
76
74/* If something in array of ids matches this device, return it. */ 77/* If something in array of ids matches this device, return it. */
75static const struct xenbus_device_id * 78static const struct xenbus_device_id *
76match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) 79match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = {
188 .remove = xenbus_dev_remove, 191 .remove = xenbus_dev_remove,
189 .shutdown = xenbus_dev_shutdown, 192 .shutdown = xenbus_dev_shutdown,
190 .dev_attrs = xenbus_dev_attrs, 193 .dev_attrs = xenbus_dev_attrs,
194
195 .suspend = xenbus_dev_suspend,
196 .resume = xenbus_dev_resume,
191 }, 197 },
192}; 198};
193 199
@@ -654,6 +660,7 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
654 660
655 kfree(root); 661 kfree(root);
656} 662}
663EXPORT_SYMBOL_GPL(xenbus_dev_changed);
657 664
658static void frontend_changed(struct xenbus_watch *watch, 665static void frontend_changed(struct xenbus_watch *watch,
659 const char **vec, unsigned int len) 666 const char **vec, unsigned int len)
@@ -669,7 +676,7 @@ static struct xenbus_watch fe_watch = {
669 .callback = frontend_changed, 676 .callback = frontend_changed,
670}; 677};
671 678
672static int suspend_dev(struct device *dev, void *data) 679static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
673{ 680{
674 int err = 0; 681 int err = 0;
675 struct xenbus_driver *drv; 682 struct xenbus_driver *drv;
@@ -682,35 +689,14 @@ static int suspend_dev(struct device *dev, void *data)
682 drv = to_xenbus_driver(dev->driver); 689 drv = to_xenbus_driver(dev->driver);
683 xdev = container_of(dev, struct xenbus_device, dev); 690 xdev = container_of(dev, struct xenbus_device, dev);
684 if (drv->suspend) 691 if (drv->suspend)
685 err = drv->suspend(xdev); 692 err = drv->suspend(xdev, state);
686 if (err) 693 if (err)
687 printk(KERN_WARNING 694 printk(KERN_WARNING
688 "xenbus: suspend %s failed: %i\n", dev_name(dev), err); 695 "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
689 return 0; 696 return 0;
690} 697}
691 698
692static int suspend_cancel_dev(struct device *dev, void *data) 699static int xenbus_dev_resume(struct device *dev)
693{
694 int err = 0;
695 struct xenbus_driver *drv;
696 struct xenbus_device *xdev;
697
698 DPRINTK("");
699
700 if (dev->driver == NULL)
701 return 0;
702 drv = to_xenbus_driver(dev->driver);
703 xdev = container_of(dev, struct xenbus_device, dev);
704 if (drv->suspend_cancel)
705 err = drv->suspend_cancel(xdev);
706 if (err)
707 printk(KERN_WARNING
708 "xenbus: suspend_cancel %s failed: %i\n",
709 dev_name(dev), err);
710 return 0;
711}
712
713static int resume_dev(struct device *dev, void *data)
714{ 700{
715 int err; 701 int err;
716 struct xenbus_driver *drv; 702 struct xenbus_driver *drv;
@@ -755,33 +741,6 @@ static int resume_dev(struct device *dev, void *data)
755 return 0; 741 return 0;
756} 742}
757 743
758void xenbus_suspend(void)
759{
760 DPRINTK("");
761
762 bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
763 xenbus_backend_suspend(suspend_dev);
764 xs_suspend();
765}
766EXPORT_SYMBOL_GPL(xenbus_suspend);
767
768void xenbus_resume(void)
769{
770 xb_init_comms();
771 xs_resume();
772 bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
773 xenbus_backend_resume(resume_dev);
774}
775EXPORT_SYMBOL_GPL(xenbus_resume);
776
777void xenbus_suspend_cancel(void)
778{
779 xs_suspend_cancel();
780 bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
781 xenbus_backend_resume(suspend_cancel_dev);
782}
783EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
784
785/* A flag to determine if xenstored is 'ready' (i.e. has started) */ 744/* A flag to determine if xenstored is 'ready' (i.e. has started) */
786int xenstored_ready = 0; 745int xenstored_ready = 0;
787 746
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index e325eab4724d..eab33f1dbdf7 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -673,6 +673,8 @@ void xs_resume(void)
673 struct xenbus_watch *watch; 673 struct xenbus_watch *watch;
674 char token[sizeof(watch) * 2 + 1]; 674 char token[sizeof(watch) * 2 + 1];
675 675
676 xb_init_comms();
677
676 mutex_unlock(&xs_state.response_mutex); 678 mutex_unlock(&xs_state.response_mutex);
677 mutex_unlock(&xs_state.request_mutex); 679 mutex_unlock(&xs_state.request_mutex);
678 up_write(&xs_state.transaction_mutex); 680 up_write(&xs_state.transaction_mutex);
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 515741a8e6b8..6559e0c752ce 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -20,10 +20,27 @@
20MODULE_DESCRIPTION("Xen filesystem"); 20MODULE_DESCRIPTION("Xen filesystem");
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22 22
23static ssize_t capabilities_read(struct file *file, char __user *buf,
24 size_t size, loff_t *off)
25{
26 char *tmp = "";
27
28 if (xen_initial_domain())
29 tmp = "control_d\n";
30
31 return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp));
32}
33
34static const struct file_operations capabilities_file_ops = {
35 .read = capabilities_read,
36};
37
23static int xenfs_fill_super(struct super_block *sb, void *data, int silent) 38static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
24{ 39{
25 static struct tree_descr xenfs_files[] = { 40 static struct tree_descr xenfs_files[] = {
26 [2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR}, 41 [1] = {},
42 { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
43 { "capabilities", &capabilities_file_ops, S_IRUGO },
27 {""}, 44 {""},
28 }; 45 };
29 46
diff --git a/firmware/cis/.gitignore b/firmware/cis/.gitignore
new file mode 100644
index 000000000000..1de39847f261
--- /dev/null
+++ b/firmware/cis/.gitignore
@@ -0,0 +1 @@
*.cis
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index eeb246845909..2341375386f8 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -297,20 +297,14 @@ static int validate_request(struct autofs_wait_queue **wait,
297 */ 297 */
298 if (notify == NFY_MOUNT) { 298 if (notify == NFY_MOUNT) {
299 /* 299 /*
300 * If the dentry isn't hashed just go ahead and try the 300 * If the dentry was successfully mounted while we slept
301 * mount again with a new wait (not much else we can do). 301 * on the wait queue mutex we can return success. If it
302 */ 302 * isn't mounted (doesn't have submounts for the case of
303 if (!d_unhashed(dentry)) { 303 * a multi-mount with no mount at it's base) we can
304 /* 304 * continue on and create a new request.
305 * But if the dentry is hashed, that means that we 305 */
306 * got here through the revalidate path. Thus, we 306 if (have_submounts(dentry))
307 * need to check if the dentry has been mounted 307 return 0;
308 * while we waited on the wq_mutex. If it has,
309 * simply return success.
310 */
311 if (d_mountpoint(dentry))
312 return 0;
313 }
314 } 308 }
315 309
316 return 1; 310 return 1;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 5cebf0b37798..697f6b5f1313 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -41,6 +41,7 @@
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42#include <asm/unaligned.h> 42#include <asm/unaligned.h>
43#include <asm/cacheflush.h> 43#include <asm/cacheflush.h>
44#include <asm/page.h>
44 45
45/****************************************************************************/ 46/****************************************************************************/
46 47
@@ -54,6 +55,18 @@
54#define DBG_FLT(a...) 55#define DBG_FLT(a...)
55#endif 56#endif
56 57
58/*
59 * User data (stack, data section and bss) needs to be aligned
60 * for the same reasons as SLAB memory is, and to the same amount.
61 * Avoid duplicating architecture specific code by using the same
62 * macro as with SLAB allocation:
63 */
64#ifdef ARCH_SLAB_MINALIGN
65#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN)
66#else
67#define FLAT_DATA_ALIGN (sizeof(void *))
68#endif
69
57#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ 70#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
58#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ 71#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
59 72
@@ -114,20 +127,18 @@ static unsigned long create_flat_tables(
114 int envc = bprm->envc; 127 int envc = bprm->envc;
115 char uninitialized_var(dummy); 128 char uninitialized_var(dummy);
116 129
117 sp = (unsigned long *) ((-(unsigned long)sizeof(char *))&(unsigned long) p); 130 sp = (unsigned long *)p;
131 sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
132 sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
133 argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
134 envp = argv + (argc + 1);
118 135
119 sp -= envc+1;
120 envp = sp;
121 sp -= argc+1;
122 argv = sp;
123
124 flat_stack_align(sp);
125 if (flat_argvp_envp_on_stack()) { 136 if (flat_argvp_envp_on_stack()) {
126 --sp; put_user((unsigned long) envp, sp); 137 put_user((unsigned long) envp, sp + 2);
127 --sp; put_user((unsigned long) argv, sp); 138 put_user((unsigned long) argv, sp + 1);
128 } 139 }
129 140
130 put_user(argc,--sp); 141 put_user(argc, sp);
131 current->mm->arg_start = (unsigned long) p; 142 current->mm->arg_start = (unsigned long) p;
132 while (argc-->0) { 143 while (argc-->0) {
133 put_user((unsigned long) p, argv++); 144 put_user((unsigned long) p, argv++);
@@ -558,7 +569,9 @@ static int load_flat_file(struct linux_binprm * bprm,
558 ret = realdatastart; 569 ret = realdatastart;
559 goto err; 570 goto err;
560 } 571 }
561 datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); 572 datapos = ALIGN(realdatastart +
573 MAX_SHARED_LIBS * sizeof(unsigned long),
574 FLAT_DATA_ALIGN);
562 575
563 DBG_FLT("BINFMT_FLAT: Allocated data+bss+stack (%d bytes): %x\n", 576 DBG_FLT("BINFMT_FLAT: Allocated data+bss+stack (%d bytes): %x\n",
564 (int)(data_len + bss_len + stack_len), (int)datapos); 577 (int)(data_len + bss_len + stack_len), (int)datapos);
@@ -604,9 +617,12 @@ static int load_flat_file(struct linux_binprm * bprm,
604 } 617 }
605 618
606 realdatastart = textpos + ntohl(hdr->data_start); 619 realdatastart = textpos + ntohl(hdr->data_start);
607 datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); 620 datapos = ALIGN(realdatastart +
608 reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) + 621 MAX_SHARED_LIBS * sizeof(unsigned long),
609 MAX_SHARED_LIBS * sizeof(unsigned long)); 622 FLAT_DATA_ALIGN);
623
624 reloc = (unsigned long *)
625 (datapos + (ntohl(hdr->reloc_start) - text_len));
610 memp = textpos; 626 memp = textpos;
611 memp_size = len; 627 memp_size = len;
612#ifdef CONFIG_BINFMT_ZFLAT 628#ifdef CONFIG_BINFMT_ZFLAT
@@ -854,7 +870,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
854 stack_len = TOP_OF_ARGS - bprm->p; /* the strings */ 870 stack_len = TOP_OF_ARGS - bprm->p; /* the strings */
855 stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */ 871 stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
856 stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */ 872 stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
857 873 stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */
858 874
859 res = load_flat_file(bprm, &libinfo, 0, &stack_len); 875 res = load_flat_file(bprm, &libinfo, 0, &stack_len);
860 if (res > (unsigned long)-4096) 876 if (res > (unsigned long)-4096)
diff --git a/fs/bio.c b/fs/bio.c
index 98711647ece4..740699c4f90c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,10 +26,9 @@
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <trace/block.h>
30#include <scsi/sg.h> /* for struct sg_iovec */ 29#include <scsi/sg.h> /* for struct sg_iovec */
31 30
32DEFINE_TRACE(block_split); 31#include <trace/events/block.h>
33 32
34/* 33/*
35 * Test patch to inline a certain number of bi_io_vec's inside the bio 34 * Test patch to inline a certain number of bi_io_vec's inside the bio
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3e2c7c738f23..35af93355063 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2622,7 +2622,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
2622 search_start); 2622 search_start);
2623 if (block_group && block_group_bits(block_group, data)) { 2623 if (block_group && block_group_bits(block_group, data)) {
2624 down_read(&space_info->groups_sem); 2624 down_read(&space_info->groups_sem);
2625 goto have_block_group; 2625 if (list_empty(&block_group->list) ||
2626 block_group->ro) {
2627 /*
2628 * someone is removing this block group,
2629 * we can't jump into the have_block_group
2630 * target because our list pointers are not
2631 * valid
2632 */
2633 btrfs_put_block_group(block_group);
2634 up_read(&space_info->groups_sem);
2635 } else
2636 goto have_block_group;
2626 } else if (block_group) { 2637 } else if (block_group) {
2627 btrfs_put_block_group(block_group); 2638 btrfs_put_block_group(block_group);
2628 } 2639 }
@@ -2656,6 +2667,13 @@ have_block_group:
2656 * people trying to start a new cluster 2667 * people trying to start a new cluster
2657 */ 2668 */
2658 spin_lock(&last_ptr->refill_lock); 2669 spin_lock(&last_ptr->refill_lock);
2670 if (last_ptr->block_group &&
2671 (last_ptr->block_group->ro ||
2672 !block_group_bits(last_ptr->block_group, data))) {
2673 offset = 0;
2674 goto refill_cluster;
2675 }
2676
2659 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 2677 offset = btrfs_alloc_from_cluster(block_group, last_ptr,
2660 num_bytes, search_start); 2678 num_bytes, search_start);
2661 if (offset) { 2679 if (offset) {
@@ -2681,10 +2699,17 @@ have_block_group:
2681 2699
2682 last_ptr_loop = 1; 2700 last_ptr_loop = 1;
2683 search_start = block_group->key.objectid; 2701 search_start = block_group->key.objectid;
2702 /*
2703 * we know this block group is properly
2704 * in the list because
2705 * btrfs_remove_block_group, drops the
2706 * cluster before it removes the block
2707 * group from the list
2708 */
2684 goto have_block_group; 2709 goto have_block_group;
2685 } 2710 }
2686 spin_unlock(&last_ptr->lock); 2711 spin_unlock(&last_ptr->lock);
2687 2712refill_cluster:
2688 /* 2713 /*
2689 * this cluster didn't work out, free it and 2714 * this cluster didn't work out, free it and
2690 * start over 2715 * start over
@@ -5968,6 +5993,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5968{ 5993{
5969 struct btrfs_path *path; 5994 struct btrfs_path *path;
5970 struct btrfs_block_group_cache *block_group; 5995 struct btrfs_block_group_cache *block_group;
5996 struct btrfs_free_cluster *cluster;
5971 struct btrfs_key key; 5997 struct btrfs_key key;
5972 int ret; 5998 int ret;
5973 5999
@@ -5979,6 +6005,21 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5979 6005
5980 memcpy(&key, &block_group->key, sizeof(key)); 6006 memcpy(&key, &block_group->key, sizeof(key));
5981 6007
6008 /* make sure this block group isn't part of an allocation cluster */
6009 cluster = &root->fs_info->data_alloc_cluster;
6010 spin_lock(&cluster->refill_lock);
6011 btrfs_return_cluster_to_free_space(block_group, cluster);
6012 spin_unlock(&cluster->refill_lock);
6013
6014 /*
6015 * make sure this block group isn't part of a metadata
6016 * allocation cluster
6017 */
6018 cluster = &root->fs_info->meta_alloc_cluster;
6019 spin_lock(&cluster->refill_lock);
6020 btrfs_return_cluster_to_free_space(block_group, cluster);
6021 spin_unlock(&cluster->refill_lock);
6022
5982 path = btrfs_alloc_path(); 6023 path = btrfs_alloc_path();
5983 BUG_ON(!path); 6024 BUG_ON(!path);
5984 6025
@@ -5988,7 +6029,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5988 spin_unlock(&root->fs_info->block_group_cache_lock); 6029 spin_unlock(&root->fs_info->block_group_cache_lock);
5989 btrfs_remove_free_space_cache(block_group); 6030 btrfs_remove_free_space_cache(block_group);
5990 down_write(&block_group->space_info->groups_sem); 6031 down_write(&block_group->space_info->groups_sem);
5991 list_del(&block_group->list); 6032 /*
6033 * we must use list_del_init so people can check to see if they
6034 * are still on the list after taking the semaphore
6035 */
6036 list_del_init(&block_group->list);
5992 up_write(&block_group->space_info->groups_sem); 6037 up_write(&block_group->space_info->groups_sem);
5993 6038
5994 spin_lock(&block_group->space_info->lock); 6039 spin_lock(&block_group->space_info->lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5f01dad4b696..a6d35b0054ca 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1440,6 +1440,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1440 device->io_align = root->sectorsize; 1440 device->io_align = root->sectorsize;
1441 device->sector_size = root->sectorsize; 1441 device->sector_size = root->sectorsize;
1442 device->total_bytes = i_size_read(bdev->bd_inode); 1442 device->total_bytes = i_size_read(bdev->bd_inode);
1443 device->disk_total_bytes = device->total_bytes;
1443 device->dev_root = root->fs_info->dev_root; 1444 device->dev_root = root->fs_info->dev_root;
1444 device->bdev = bdev; 1445 device->bdev = bdev;
1445 device->in_fs_metadata = 1; 1446 device->in_fs_metadata = 1;
diff --git a/fs/buffer.c b/fs/buffer.c
index aed297739eb0..49106127a4aa 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2736,6 +2736,8 @@ has_buffers:
2736 pos += blocksize; 2736 pos += blocksize;
2737 } 2737 }
2738 2738
2739 map_bh.b_size = blocksize;
2740 map_bh.b_state = 0;
2739 err = get_block(inode, iblock, &map_bh, 0); 2741 err = get_block(inode, iblock, &map_bh, 0);
2740 if (err) 2742 if (err)
2741 goto unlock; 2743 goto unlock;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 19218e1463d6..f7c255f9c624 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -122,13 +122,13 @@ static inline void cachefiles_state_changed(struct cachefiles_cache *cache)
122} 122}
123 123
124/* 124/*
125 * cf-bind.c 125 * bind.c
126 */ 126 */
127extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); 127extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args);
128extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); 128extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache);
129 129
130/* 130/*
131 * cf-daemon.c 131 * daemon.c
132 */ 132 */
133extern const struct file_operations cachefiles_daemon_fops; 133extern const struct file_operations cachefiles_daemon_fops;
134 134
@@ -136,17 +136,17 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache,
136 unsigned fnr, unsigned bnr); 136 unsigned fnr, unsigned bnr);
137 137
138/* 138/*
139 * cf-interface.c 139 * interface.c
140 */ 140 */
141extern const struct fscache_cache_ops cachefiles_cache_ops; 141extern const struct fscache_cache_ops cachefiles_cache_ops;
142 142
143/* 143/*
144 * cf-key.c 144 * key.c
145 */ 145 */
146extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); 146extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type);
147 147
148/* 148/*
149 * cf-namei.c 149 * namei.c
150 */ 150 */
151extern int cachefiles_delete_object(struct cachefiles_cache *cache, 151extern int cachefiles_delete_object(struct cachefiles_cache *cache,
152 struct cachefiles_object *object); 152 struct cachefiles_object *object);
@@ -165,7 +165,7 @@ extern int cachefiles_check_in_use(struct cachefiles_cache *cache,
165 struct dentry *dir, char *filename); 165 struct dentry *dir, char *filename);
166 166
167/* 167/*
168 * cf-proc.c 168 * proc.c
169 */ 169 */
170#ifdef CONFIG_CACHEFILES_HISTOGRAM 170#ifdef CONFIG_CACHEFILES_HISTOGRAM
171extern atomic_t cachefiles_lookup_histogram[HZ]; 171extern atomic_t cachefiles_lookup_histogram[HZ];
@@ -190,7 +190,7 @@ void cachefiles_hist(atomic_t histogram[], unsigned long start_jif)
190#endif 190#endif
191 191
192/* 192/*
193 * cf-rdwr.c 193 * rdwr.c
194 */ 194 */
195extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, 195extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *,
196 struct page *, gfp_t); 196 struct page *, gfp_t);
@@ -205,7 +205,7 @@ extern int cachefiles_write_page(struct fscache_storage *, struct page *);
205extern void cachefiles_uncache_page(struct fscache_object *, struct page *); 205extern void cachefiles_uncache_page(struct fscache_object *, struct page *);
206 206
207/* 207/*
208 * cf-security.c 208 * security.c
209 */ 209 */
210extern int cachefiles_get_security_ID(struct cachefiles_cache *cache); 210extern int cachefiles_get_security_ID(struct cachefiles_cache *cache);
211extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, 211extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
@@ -225,7 +225,7 @@ static inline void cachefiles_end_secure(struct cachefiles_cache *cache,
225} 225}
226 226
227/* 227/*
228 * cf-xattr.c 228 * xattr.c
229 */ 229 */
230extern int cachefiles_check_object_type(struct cachefiles_object *object); 230extern int cachefiles_check_object_type(struct cachefiles_object *object);
231extern int cachefiles_set_object_xattr(struct cachefiles_object *object, 231extern int cachefiles_set_object_xattr(struct cachefiles_object *object,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 11431ed72a7f..3758965d73d5 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -225,6 +225,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
225 if (!(oflags & FMODE_READ)) 225 if (!(oflags & FMODE_READ))
226 write_only = true; 226 write_only = true;
227 227
228 mode &= ~current_umask();
228 rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, 229 rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode,
229 pnetfid, presp_data, &oplock, full_path, 230 pnetfid, presp_data, &oplock, full_path,
230 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 231 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
@@ -310,7 +311,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
310 return -ENOMEM; 311 return -ENOMEM;
311 } 312 }
312 313
313 mode &= ~current_umask();
314 if (oplockEnabled) 314 if (oplockEnabled)
315 oplock = REQ_OPLOCK; 315 oplock = REQ_OPLOCK;
316 316
@@ -336,7 +336,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
336 else /* success, no need to query */ 336 else /* success, no need to query */
337 goto cifs_create_set_dentry; 337 goto cifs_create_set_dentry;
338 } else if ((rc != -EIO) && (rc != -EREMOTE) && 338 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
339 (rc != -EOPNOTSUPP)) /* path not found or net err */ 339 (rc != -EOPNOTSUPP) && (rc != -EINVAL))
340 goto cifs_create_out; 340 goto cifs_create_out;
341 /* else fallthrough to retry, using older open call, this is 341 /* else fallthrough to retry, using older open call, this is
342 case where server does not support this SMB level, and 342 case where server does not support this SMB level, and
@@ -609,7 +609,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
609 int xid; 609 int xid;
610 int rc = 0; /* to get around spurious gcc warning, set to zero here */ 610 int rc = 0; /* to get around spurious gcc warning, set to zero here */
611 int oplock = 0; 611 int oplock = 0;
612 int mode;
613 __u16 fileHandle = 0; 612 __u16 fileHandle = 0;
614 bool posix_open = false; 613 bool posix_open = false;
615 struct cifs_sb_info *cifs_sb; 614 struct cifs_sb_info *cifs_sb;
@@ -658,30 +657,36 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
658 } 657 }
659 cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode)); 658 cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode));
660 659
660 /* Posix open is only called (at lookup time) for file create now.
661 * For opens (rather than creates), because we do not know if it
662 * is a file or directory yet, and current Samba no longer allows
663 * us to do posix open on dirs, we could end up wasting an open call
664 * on what turns out to be a dir. For file opens, we wait to call posix
665 * open till cifs_open. It could be added here (lookup) in the future
666 * but the performance tradeoff of the extra network request when EISDIR
667 * or EACCES is returned would have to be weighed against the 50%
668 * reduction in network traffic in the other paths.
669 */
661 if (pTcon->unix_ext) { 670 if (pTcon->unix_ext) {
662 if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && 671 if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
663 (nd->flags & LOOKUP_OPEN)) { 672 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
664 if (!((nd->intent.open.flags & O_CREAT) && 673 (nd->intent.open.flags & O_CREAT)) {
665 (nd->intent.open.flags & O_EXCL))) { 674 rc = cifs_posix_open(full_path, &newInode,
666 mode = nd->intent.open.create_mode & 675 parent_dir_inode->i_sb,
667 ~current_umask(); 676 nd->intent.open.create_mode,
668 rc = cifs_posix_open(full_path, &newInode,
669 parent_dir_inode->i_sb, mode,
670 nd->intent.open.flags, &oplock, 677 nd->intent.open.flags, &oplock,
671 &fileHandle, xid); 678 &fileHandle, xid);
672 /* 679 /*
673 * This code works around a bug in 680 * The check below works around a bug in POSIX
674 * samba posix open in samba versions 3.3.1 681 * open in samba versions 3.3.1 and earlier where
675 * and earlier where create works 682 * open could incorrectly fail with invalid parameter.
676 * but open fails with invalid parameter. 683 * If either that or op not supported returned, follow
677 * If either of these error codes are 684 * the normal lookup.
678 * returned, follow the normal lookup. 685 */
679 * Otherwise, the error during posix open 686 if ((rc == 0) || (rc == -ENOENT))
680 * is handled. 687 posix_open = true;
681 */ 688 else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP))
682 if ((rc != -EINVAL) && (rc != -EOPNOTSUPP)) 689 pTcon->broken_posix_open = true;
683 posix_open = true;
684 }
685 } 690 }
686 if (!posix_open) 691 if (!posix_open)
687 rc = cifs_get_inode_info_unix(&newInode, full_path, 692 rc = cifs_get_inode_info_unix(&newInode, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 38c06f826575..302ea15f02e6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -130,10 +130,6 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode,
130 struct cifsFileInfo *pCifsFile, int oplock, u16 netfid) 130 struct cifsFileInfo *pCifsFile, int oplock, u16 netfid)
131{ 131{
132 132
133 file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
134 if (file->private_data == NULL)
135 return -ENOMEM;
136 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
137 write_lock(&GlobalSMBSeslock); 133 write_lock(&GlobalSMBSeslock);
138 134
139 pCifsInode = CIFS_I(file->f_path.dentry->d_inode); 135 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
@@ -184,6 +180,38 @@ psx_client_can_cache:
184 return 0; 180 return 0;
185} 181}
186 182
183static struct cifsFileInfo *
184cifs_fill_filedata(struct file *file)
185{
186 struct list_head *tmp;
187 struct cifsFileInfo *pCifsFile = NULL;
188 struct cifsInodeInfo *pCifsInode = NULL;
189
190 /* search inode for this file and fill in file->private_data */
191 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
192 read_lock(&GlobalSMBSeslock);
193 list_for_each(tmp, &pCifsInode->openFileList) {
194 pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
195 if ((pCifsFile->pfile == NULL) &&
196 (pCifsFile->pid == current->tgid)) {
197 /* mode set in cifs_create */
198
199 /* needed for writepage */
200 pCifsFile->pfile = file;
201 file->private_data = pCifsFile;
202 break;
203 }
204 }
205 read_unlock(&GlobalSMBSeslock);
206
207 if (file->private_data != NULL) {
208 return pCifsFile;
209 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
210 cERROR(1, ("could not find file instance for "
211 "new file %p", file));
212 return NULL;
213}
214
187/* all arguments to this function must be checked for validity in caller */ 215/* all arguments to this function must be checked for validity in caller */
188static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, 216static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
189 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile, 217 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
@@ -258,7 +286,6 @@ int cifs_open(struct inode *inode, struct file *file)
258 struct cifsTconInfo *tcon; 286 struct cifsTconInfo *tcon;
259 struct cifsFileInfo *pCifsFile; 287 struct cifsFileInfo *pCifsFile;
260 struct cifsInodeInfo *pCifsInode; 288 struct cifsInodeInfo *pCifsInode;
261 struct list_head *tmp;
262 char *full_path = NULL; 289 char *full_path = NULL;
263 int desiredAccess; 290 int desiredAccess;
264 int disposition; 291 int disposition;
@@ -270,32 +297,12 @@ int cifs_open(struct inode *inode, struct file *file)
270 cifs_sb = CIFS_SB(inode->i_sb); 297 cifs_sb = CIFS_SB(inode->i_sb);
271 tcon = cifs_sb->tcon; 298 tcon = cifs_sb->tcon;
272 299
273 /* search inode for this file and fill in file->private_data */
274 pCifsInode = CIFS_I(file->f_path.dentry->d_inode); 300 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
275 read_lock(&GlobalSMBSeslock); 301 pCifsFile = cifs_fill_filedata(file);
276 list_for_each(tmp, &pCifsInode->openFileList) { 302 if (pCifsFile) {
277 pCifsFile = list_entry(tmp, struct cifsFileInfo,
278 flist);
279 if ((pCifsFile->pfile == NULL) &&
280 (pCifsFile->pid == current->tgid)) {
281 /* mode set in cifs_create */
282
283 /* needed for writepage */
284 pCifsFile->pfile = file;
285
286 file->private_data = pCifsFile;
287 break;
288 }
289 }
290 read_unlock(&GlobalSMBSeslock);
291
292 if (file->private_data != NULL) {
293 rc = 0;
294 FreeXid(xid); 303 FreeXid(xid);
295 return rc; 304 return 0;
296 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) 305 }
297 cERROR(1, ("could not find file instance for "
298 "new file %p", file));
299 306
300 full_path = build_path_from_dentry(file->f_path.dentry); 307 full_path = build_path_from_dentry(file->f_path.dentry);
301 if (full_path == NULL) { 308 if (full_path == NULL) {
@@ -325,6 +332,7 @@ int cifs_open(struct inode *inode, struct file *file)
325 /* no need for special case handling of setting mode 332 /* no need for special case handling of setting mode
326 on read only files needed here */ 333 on read only files needed here */
327 334
335 pCifsFile = cifs_fill_filedata(file);
328 cifs_posix_open_inode_helper(inode, file, pCifsInode, 336 cifs_posix_open_inode_helper(inode, file, pCifsInode,
329 pCifsFile, oplock, netfid); 337 pCifsFile, oplock, netfid);
330 goto out; 338 goto out;
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index e0cbd16f6dc9..1c341304621f 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -28,7 +28,7 @@
28#define FSCACHE_MAX_THREADS 32 28#define FSCACHE_MAX_THREADS 32
29 29
30/* 30/*
31 * fsc-cache.c 31 * cache.c
32 */ 32 */
33extern struct list_head fscache_cache_list; 33extern struct list_head fscache_cache_list;
34extern struct rw_semaphore fscache_addremove_sem; 34extern struct rw_semaphore fscache_addremove_sem;
@@ -37,7 +37,7 @@ extern struct fscache_cache *fscache_select_cache_for_object(
37 struct fscache_cookie *); 37 struct fscache_cookie *);
38 38
39/* 39/*
40 * fsc-cookie.c 40 * cookie.c
41 */ 41 */
42extern struct kmem_cache *fscache_cookie_jar; 42extern struct kmem_cache *fscache_cookie_jar;
43 43
@@ -45,13 +45,13 @@ extern void fscache_cookie_init_once(void *);
45extern void __fscache_cookie_put(struct fscache_cookie *); 45extern void __fscache_cookie_put(struct fscache_cookie *);
46 46
47/* 47/*
48 * fsc-fsdef.c 48 * fsdef.c
49 */ 49 */
50extern struct fscache_cookie fscache_fsdef_index; 50extern struct fscache_cookie fscache_fsdef_index;
51extern struct fscache_cookie_def fscache_fsdef_netfs_def; 51extern struct fscache_cookie_def fscache_fsdef_netfs_def;
52 52
53/* 53/*
54 * fsc-histogram.c 54 * histogram.c
55 */ 55 */
56#ifdef CONFIG_FSCACHE_HISTOGRAM 56#ifdef CONFIG_FSCACHE_HISTOGRAM
57extern atomic_t fscache_obj_instantiate_histogram[HZ]; 57extern atomic_t fscache_obj_instantiate_histogram[HZ];
@@ -75,7 +75,7 @@ extern const struct file_operations fscache_histogram_fops;
75#endif 75#endif
76 76
77/* 77/*
78 * fsc-main.c 78 * main.c
79 */ 79 */
80extern unsigned fscache_defer_lookup; 80extern unsigned fscache_defer_lookup;
81extern unsigned fscache_defer_create; 81extern unsigned fscache_defer_create;
@@ -86,14 +86,14 @@ extern int fscache_wait_bit(void *);
86extern int fscache_wait_bit_interruptible(void *); 86extern int fscache_wait_bit_interruptible(void *);
87 87
88/* 88/*
89 * fsc-object.c 89 * object.c
90 */ 90 */
91extern void fscache_withdrawing_object(struct fscache_cache *, 91extern void fscache_withdrawing_object(struct fscache_cache *,
92 struct fscache_object *); 92 struct fscache_object *);
93extern void fscache_enqueue_object(struct fscache_object *); 93extern void fscache_enqueue_object(struct fscache_object *);
94 94
95/* 95/*
96 * fsc-operation.c 96 * operation.c
97 */ 97 */
98extern int fscache_submit_exclusive_op(struct fscache_object *, 98extern int fscache_submit_exclusive_op(struct fscache_object *,
99 struct fscache_operation *); 99 struct fscache_operation *);
@@ -104,7 +104,7 @@ extern void fscache_start_operations(struct fscache_object *);
104extern void fscache_operation_gc(struct work_struct *); 104extern void fscache_operation_gc(struct work_struct *);
105 105
106/* 106/*
107 * fsc-proc.c 107 * proc.c
108 */ 108 */
109#ifdef CONFIG_PROC_FS 109#ifdef CONFIG_PROC_FS
110extern int __init fscache_proc_init(void); 110extern int __init fscache_proc_init(void);
@@ -115,7 +115,7 @@ extern void fscache_proc_cleanup(void);
115#endif 115#endif
116 116
117/* 117/*
118 * fsc-stats.c 118 * stats.c
119 */ 119 */
120#ifdef CONFIG_FSCACHE_STATS 120#ifdef CONFIG_FSCACHE_STATS
121extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS]; 121extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS];
diff --git a/fs/inode.c b/fs/inode.c
index 0571983755dc..bca0c618fdb3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -219,6 +219,7 @@ static struct inode *alloc_inode(struct super_block *sb)
219void destroy_inode(struct inode *inode) 219void destroy_inode(struct inode *inode)
220{ 220{
221 BUG_ON(inode_has_buffers(inode)); 221 BUG_ON(inode_has_buffers(inode));
222 ima_inode_free(inode);
222 security_inode_free(inode); 223 security_inode_free(inode);
223 if (inode->i_sb->s_op->destroy_inode) 224 if (inode->i_sb->s_op->destroy_inode)
224 inode->i_sb->s_op->destroy_inode(inode); 225 inode->i_sb->s_op->destroy_inode(inode);
@@ -1053,13 +1054,22 @@ int insert_inode_locked(struct inode *inode)
1053 struct super_block *sb = inode->i_sb; 1054 struct super_block *sb = inode->i_sb;
1054 ino_t ino = inode->i_ino; 1055 ino_t ino = inode->i_ino;
1055 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1056 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1056 struct inode *old;
1057 1057
1058 inode->i_state |= I_LOCK|I_NEW; 1058 inode->i_state |= I_LOCK|I_NEW;
1059 while (1) { 1059 while (1) {
1060 struct hlist_node *node;
1061 struct inode *old = NULL;
1060 spin_lock(&inode_lock); 1062 spin_lock(&inode_lock);
1061 old = find_inode_fast(sb, head, ino); 1063 hlist_for_each_entry(old, node, head, i_hash) {
1062 if (likely(!old)) { 1064 if (old->i_ino != ino)
1065 continue;
1066 if (old->i_sb != sb)
1067 continue;
1068 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
1069 continue;
1070 break;
1071 }
1072 if (likely(!node)) {
1063 hlist_add_head(&inode->i_hash, head); 1073 hlist_add_head(&inode->i_hash, head);
1064 spin_unlock(&inode_lock); 1074 spin_unlock(&inode_lock);
1065 return 0; 1075 return 0;
@@ -1081,14 +1091,24 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1081{ 1091{
1082 struct super_block *sb = inode->i_sb; 1092 struct super_block *sb = inode->i_sb;
1083 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1093 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1084 struct inode *old;
1085 1094
1086 inode->i_state |= I_LOCK|I_NEW; 1095 inode->i_state |= I_LOCK|I_NEW;
1087 1096
1088 while (1) { 1097 while (1) {
1098 struct hlist_node *node;
1099 struct inode *old = NULL;
1100
1089 spin_lock(&inode_lock); 1101 spin_lock(&inode_lock);
1090 old = find_inode(sb, head, test, data); 1102 hlist_for_each_entry(old, node, head, i_hash) {
1091 if (likely(!old)) { 1103 if (old->i_sb != sb)
1104 continue;
1105 if (!test(old, data))
1106 continue;
1107 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
1108 continue;
1109 break;
1110 }
1111 if (likely(!node)) {
1092 hlist_add_head(&inode->i_hash, head); 1112 hlist_add_head(&inode->i_hash, head);
1093 spin_unlock(&inode_lock); 1113 spin_unlock(&inode_lock);
1094 return 0; 1114 return 0;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 06560c520f49..618e21c0b7a3 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -241,7 +241,7 @@ write_out_data:
241 spin_lock(&journal->j_list_lock); 241 spin_lock(&journal->j_list_lock);
242 } 242 }
243 /* Someone already cleaned up the buffer? */ 243 /* Someone already cleaned up the buffer? */
244 if (!buffer_jbd(bh) 244 if (!buffer_jbd(bh) || bh2jh(bh) != jh
245 || jh->b_transaction != commit_transaction 245 || jh->b_transaction != commit_transaction
246 || jh->b_jlist != BJ_SyncData) { 246 || jh->b_jlist != BJ_SyncData) {
247 jbd_unlock_bh_state(bh); 247 jbd_unlock_bh_state(bh);
@@ -478,7 +478,9 @@ void journal_commit_transaction(journal_t *journal)
478 spin_lock(&journal->j_list_lock); 478 spin_lock(&journal->j_list_lock);
479 continue; 479 continue;
480 } 480 }
481 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { 481 if (buffer_jbd(bh) && bh2jh(bh) == jh &&
482 jh->b_transaction == commit_transaction &&
483 jh->b_jlist == BJ_Locked) {
482 __journal_unfile_buffer(jh); 484 __journal_unfile_buffer(jh);
483 jbd_unlock_bh_state(bh); 485 jbd_unlock_bh_state(bh);
484 journal_remove_journal_head(bh); 486 journal_remove_journal_head(bh);
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index c32b4a1ad6cf..a0244740b75a 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -480,13 +480,6 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
480 return; 480 return;
481 481
482filebad: 482filebad:
483 mutex_lock(&c->erase_free_sem);
484 spin_lock(&c->erase_completion_lock);
485 /* Stick it on a list (any list) so erase_failed can take it
486 right off again. Silly, but shouldn't happen often. */
487 list_move(&jeb->list, &c->erasing_list);
488 spin_unlock(&c->erase_completion_lock);
489 mutex_unlock(&c->erase_free_sem);
490 jffs2_erase_failed(c, jeb, bad_offset); 483 jffs2_erase_failed(c, jeb, bad_offset);
491 return; 484 return;
492 485
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a4d242680299..4674f8092da8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2594,12 +2594,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data)
2594 unsigned long timestamp = (unsigned long)data; 2594 unsigned long timestamp = (unsigned long)data;
2595 2595
2596 if (task->tk_status < 0) { 2596 if (task->tk_status < 0) {
2597 switch (task->tk_status) { 2597 /* Unless we're shutting down, schedule state recovery! */
2598 case -NFS4ERR_STALE_CLIENTID: 2598 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
2599 case -NFS4ERR_EXPIRED: 2599 nfs4_schedule_state_recovery(clp);
2600 case -NFS4ERR_CB_PATH_DOWN:
2601 nfs4_schedule_state_recovery(clp);
2602 }
2603 return; 2600 return;
2604 } 2601 }
2605 spin_lock(&clp->cl_lock); 2602 spin_lock(&clp->cl_lock);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index d9ef602fbc5a..e3ed5908820b 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -129,7 +129,7 @@ enum {
129 Opt_err 129 Opt_err
130}; 130};
131 131
132static match_table_t __initconst tokens = { 132static const match_table_t tokens __initconst = {
133 {Opt_port, "port=%u"}, 133 {Opt_port, "port=%u"},
134 {Opt_rsize, "rsize=%u"}, 134 {Opt_rsize, "rsize=%u"},
135 {Opt_wsize, "wsize=%u"}, 135 {Opt_wsize, "wsize=%u"},
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6c68ffd6b4bb..b660435978d2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1015,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1015 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); 1015 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
1016 set_fs(oldfs); 1016 set_fs(oldfs);
1017 if (host_err >= 0) { 1017 if (host_err >= 0) {
1018 *cnt = host_err;
1018 nfsdstats.io_write += host_err; 1019 nfsdstats.io_write += host_err;
1019 fsnotify_modify(file->f_path.dentry); 1020 fsnotify_modify(file->f_path.dentry);
1020 } 1021 }
@@ -1060,10 +1061,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1060 } 1061 }
1061 1062
1062 dprintk("nfsd: write complete host_err=%d\n", host_err); 1063 dprintk("nfsd: write complete host_err=%d\n", host_err);
1063 if (host_err >= 0) { 1064 if (host_err >= 0)
1064 err = 0; 1065 err = 0;
1065 *cnt = host_err; 1066 else
1066 } else
1067 err = nfserrno(host_err); 1067 err = nfserrno(host_err);
1068out: 1068out:
1069 return err; 1069 return err;
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index e90b60dfced9..300f1cdfa862 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -311,7 +311,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
311 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); 311 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
312 if (ret < 0) { 312 if (ret < 0) {
313 if (ret != -ENOENT) 313 if (ret != -ENOENT)
314 goto out_sem; 314 goto out_header;
315 /* skip hole */ 315 /* skip hole */
316 ret = 0; 316 ret = 0;
317 continue; 317 continue;
@@ -344,7 +344,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
344 continue; 344 continue;
345 printk(KERN_ERR "%s: cannot delete block\n", 345 printk(KERN_ERR "%s: cannot delete block\n",
346 __func__); 346 __func__);
347 goto out_sem; 347 goto out_header;
348 } 348 }
349 } 349 }
350 350
@@ -361,6 +361,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
361 nilfs_mdt_mark_dirty(cpfile); 361 nilfs_mdt_mark_dirty(cpfile);
362 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr, KM_USER0);
363 } 363 }
364
365 out_header:
364 brelse(header_bh); 366 brelse(header_bh);
365 367
366 out_sem: 368 out_sem:
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 50ff3f2cdf24..d6759b92006f 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -576,7 +576,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
576 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); 576 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
577 577
578 out_free: 578 out_free:
579 while (--n > 0) 579 while (--n >= 0)
580 vfree(kbufs[n]); 580 vfree(kbufs[n]);
581 kfree(kbufs[4]); 581 kfree(kbufs[4]);
582 return ret; 582 return ret;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fb45615943c2..3326bbf9ab95 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1956,7 +1956,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
1956 const struct pid_entry *p = ptr; 1956 const struct pid_entry *p = ptr;
1957 struct inode *inode; 1957 struct inode *inode;
1958 struct proc_inode *ei; 1958 struct proc_inode *ei;
1959 struct dentry *error = ERR_PTR(-EINVAL); 1959 struct dentry *error = ERR_PTR(-ENOENT);
1960 1960
1961 inode = proc_pid_make_inode(dir->i_sb, task); 1961 inode = proc_pid_make_inode(dir->i_sb, task);
1962 if (!inode) 1962 if (!inode)
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 9bca39cf99ee..1afa4dd4cae2 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -12,20 +12,14 @@
12 12
13static int loadavg_proc_show(struct seq_file *m, void *v) 13static int loadavg_proc_show(struct seq_file *m, void *v)
14{ 14{
15 int a, b, c; 15 unsigned long avnrun[3];
16 unsigned long seq;
17 16
18 do { 17 get_avenrun(avnrun, FIXED_1/200, 0);
19 seq = read_seqbegin(&xtime_lock);
20 a = avenrun[0] + (FIXED_1/200);
21 b = avenrun[1] + (FIXED_1/200);
22 c = avenrun[2] + (FIXED_1/200);
23 } while (read_seqretry(&xtime_lock, seq));
24 18
25 seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", 19 seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
26 LOAD_INT(a), LOAD_FRAC(a), 20 LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
27 LOAD_INT(b), LOAD_FRAC(b), 21 LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
28 LOAD_INT(c), LOAD_FRAC(c), 22 LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
29 nr_running(), nr_threads, 23 nr_running(), nr_threads,
30 task_active_pid_ns(current)->last_pid); 24 task_active_pid_ns(current)->last_pid);
31 return 0; 25 return 0;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b1606e07b7a3..561a9c050cef 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -723,7 +723,7 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
723 mutex_unlock(&sysfs_workq_mutex); 723 mutex_unlock(&sysfs_workq_mutex);
724 724
725 if (sysfs_workqueue == NULL) { 725 if (sysfs_workqueue == NULL) {
726 sysfs_workqueue = create_workqueue("sysfsd"); 726 sysfs_workqueue = create_singlethread_workqueue("sysfsd");
727 if (sysfs_workqueue == NULL) { 727 if (sysfs_workqueue == NULL) {
728 module_put(owner); 728 module_put(owner);
729 return -ENOMEM; 729 return -ENOMEM;
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index af6843c7ee4b..179cbd630f69 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -103,7 +103,7 @@ extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
103static inline int 103static inline int
104kmem_shake_allow(gfp_t gfp_mask) 104kmem_shake_allow(gfp_t gfp_mask)
105{ 105{
106 return (gfp_mask & __GFP_WAIT) != 0; 106 return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
107} 107}
108 108
109#endif /* __XFS_SUPPORT_KMEM_H__ */ 109#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e6d839bddbf0..7465f9ee125f 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -347,13 +347,15 @@ xfs_swap_extents(
347 347
348 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); 348 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
349 349
350out_unlock:
351 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
352 xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
353out: 350out:
354 kmem_free(tempifp); 351 kmem_free(tempifp);
355 return error; 352 return error;
356 353
354out_unlock:
355 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
356 xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
357 goto out;
358
357out_trans_cancel: 359out_trans_cancel:
358 xfs_trans_cancel(tp, 0); 360 xfs_trans_cancel(tp, 0);
359 goto out_unlock; 361 goto out_unlock;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8379e3bca26c..cbd451bb4848 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -160,7 +160,7 @@ xfs_growfs_data_private(
160 nagcount = new + (nb_mod != 0); 160 nagcount = new + (nb_mod != 0);
161 if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { 161 if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
162 nagcount--; 162 nagcount--;
163 nb = nagcount * mp->m_sb.sb_agblocks; 163 nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
164 if (nb < mp->m_sb.sb_dblocks) 164 if (nb < mp->m_sb.sb_dblocks)
165 return XFS_ERROR(EINVAL); 165 return XFS_ERROR(EINVAL);
166 } 166 }
diff --git a/include/Kbuild b/include/Kbuild
index d8c3e3cbf416..fe36accd4328 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -8,3 +8,4 @@ header-y += mtd/
8header-y += rdma/ 8header-y += rdma/
9header-y += video/ 9header-y += video/
10header-y += drm/ 10header-y += drm/
11header-y += xen/
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 8e6d0ca70aba..e410f602cab1 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
280#endif 280#endif
281 281
282/* 282/*
283 * A facility to provide batching of the reload of page tables with the 283 * A facility to provide batching of the reload of page tables and
284 * actual context switch code for paravirtualized guests. By convention, 284 * other process state with the actual context switch code for
285 * only one of the lazy modes (CPU, MMU) should be active at any given 285 * paravirtualized guests. By convention, only one of the batched
286 * time, entry should never be nested, and entry and exits should always 286 * update (lazy) modes (CPU, MMU) should be active at any given time,
287 * be paired. This is for sanity of maintaining and reasoning about the 287 * entry should never be nested, and entry and exits should always be
288 * kernel code. 288 * paired. This is for sanity of maintaining and reasoning about the
289 * kernel code. In this case, the exit (end of the context switch) is
290 * in architecture-specific code, and so doesn't need a generic
291 * definition.
289 */ 292 */
290#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE 293#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
291#define arch_enter_lazy_cpu_mode() do {} while (0) 294#define arch_start_context_switch(prev) do {} while (0)
292#define arch_leave_lazy_cpu_mode() do {} while (0)
293#define arch_flush_lazy_cpu_mode() do {} while (0)
294#endif 295#endif
295 296
296#ifndef __HAVE_PFNMAP_TRACKING 297#ifndef __HAVE_PFNMAP_TRACKING
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 89853bcd27a6..f1736ca7922c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -63,7 +63,7 @@
63#define BRANCH_PROFILE() 63#define BRANCH_PROFILE()
64#endif 64#endif
65 65
66#ifdef CONFIG_EVENT_TRACER 66#ifdef CONFIG_EVENT_TRACING
67#define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \ 67#define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \
68 *(_ftrace_events) \ 68 *(_ftrace_events) \
69 VMLINUX_SYMBOL(__stop_ftrace_events) = .; 69 VMLINUX_SYMBOL(__stop_ftrace_events) = .;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index c8c422151431..b84d8ae35e6f 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1519,6 +1519,30 @@ static __inline__ void *drm_calloc(size_t nmemb, size_t size, int area)
1519{ 1519{
1520 return kcalloc(nmemb, size, GFP_KERNEL); 1520 return kcalloc(nmemb, size, GFP_KERNEL);
1521} 1521}
1522
1523static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
1524{
1525 u8 *addr;
1526
1527 if (size <= PAGE_SIZE)
1528 return kcalloc(nmemb, size, GFP_KERNEL);
1529
1530 addr = vmalloc(nmemb * size);
1531 if (!addr)
1532 return NULL;
1533
1534 memset(addr, 0, nmemb * size);
1535
1536 return addr;
1537}
1538
1539static __inline void drm_free_large(void *ptr)
1540{
1541 if (!is_vmalloc_addr(ptr))
1542 return kfree(ptr);
1543
1544 vfree(ptr);
1545}
1522#else 1546#else
1523extern void *drm_alloc(size_t size, int area); 1547extern void *drm_alloc(size_t size, int area);
1524extern void drm_free(void *pt, size_t size, int area); 1548extern void drm_free(void *pt, size_t size, int area);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 3c1924c010e8..7300fb866767 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -471,6 +471,9 @@ struct drm_connector {
471 u32 property_ids[DRM_CONNECTOR_MAX_PROPERTY]; 471 u32 property_ids[DRM_CONNECTOR_MAX_PROPERTY];
472 uint64_t property_values[DRM_CONNECTOR_MAX_PROPERTY]; 472 uint64_t property_values[DRM_CONNECTOR_MAX_PROPERTY];
473 473
474 /* requested DPMS state */
475 int dpms;
476
474 void *helper_private; 477 void *helper_private;
475 478
476 uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER]; 479 uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER];
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index ec073d8288d9..6769ff6c1bc0 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -99,6 +99,8 @@ extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
99 struct drm_framebuffer *old_fb); 99 struct drm_framebuffer *old_fb);
100extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc); 100extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc);
101 101
102extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode);
103
102extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb, 104extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
103 struct drm_mode_fb_cmd *mode_cmd); 105 struct drm_mode_fb_cmd *mode_cmd);
104 106
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 88be890ee3c7..51b4b0a5ce8c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -119,7 +119,7 @@ extern int pci_mmcfg_config_num;
119extern int sbf_port; 119extern int sbf_port;
120extern unsigned long acpi_realmode_flags; 120extern unsigned long acpi_realmode_flags;
121 121
122int acpi_register_gsi (u32 gsi, int triggering, int polarity); 122int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity);
123int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); 123int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
124 124
125#ifdef CONFIG_X86_IO_APIC 125#ifdef CONFIG_X86_IO_APIC
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 48ee32a18ac5..64a982ea5d5f 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -159,6 +159,7 @@
159#define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) 159#define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS)
160 160
161#ifndef __ASSEMBLY__ 161#ifndef __ASSEMBLY__
162struct amba_device; /* in uncompress this is included but amba/bus.h is not */
162struct amba_pl010_data { 163struct amba_pl010_data {
163 void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl); 164 void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl);
164}; 165};
diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h
index 63265852b7d1..7b09c8348fd3 100644
--- a/include/linux/auto_fs.h
+++ b/include/linux/auto_fs.h
@@ -14,13 +14,12 @@
14#ifndef _LINUX_AUTO_FS_H 14#ifndef _LINUX_AUTO_FS_H
15#define _LINUX_AUTO_FS_H 15#define _LINUX_AUTO_FS_H
16 16
17#include <linux/types.h>
17#ifdef __KERNEL__ 18#ifdef __KERNEL__
18#include <linux/fs.h> 19#include <linux/fs.h>
19#include <linux/limits.h> 20#include <linux/limits.h>
20#include <linux/types.h>
21#include <linux/ioctl.h> 21#include <linux/ioctl.h>
22#else 22#else
23#include <asm/types.h>
24#include <sys/ioctl.h> 23#include <sys/ioctl.h>
25#endif /* __KERNEL__ */ 24#endif /* __KERNEL__ */
26 25
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d960889e92ef..7e4350ece0f8 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -116,9 +116,9 @@ struct blk_io_trace {
116 * The remap event 116 * The remap event
117 */ 117 */
118struct blk_io_trace_remap { 118struct blk_io_trace_remap {
119 __be32 device;
120 __be32 device_from; 119 __be32 device_from;
121 __be64 sector; 120 __be32 device_to;
121 __be64 sector_from;
122}; 122};
123 123
124enum { 124enum {
@@ -165,8 +165,9 @@ struct blk_trace {
165 165
166extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); 166extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
167extern void blk_trace_shutdown(struct request_queue *); 167extern void blk_trace_shutdown(struct request_queue *);
168extern int do_blk_trace_setup(struct request_queue *q, 168extern int do_blk_trace_setup(struct request_queue *q, char *name,
169 char *name, dev_t dev, struct blk_user_trace_setup *buts); 169 dev_t dev, struct block_device *bdev,
170 struct blk_user_trace_setup *buts);
170extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); 171extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
171 172
172/** 173/**
@@ -193,22 +194,42 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
193extern void blk_add_driver_data(struct request_queue *q, struct request *rq, 194extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
194 void *data, size_t len); 195 void *data, size_t len);
195extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 196extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
197 struct block_device *bdev,
196 char __user *arg); 198 char __user *arg);
197extern int blk_trace_startstop(struct request_queue *q, int start); 199extern int blk_trace_startstop(struct request_queue *q, int start);
198extern int blk_trace_remove(struct request_queue *q); 200extern int blk_trace_remove(struct request_queue *q);
201extern int blk_trace_init_sysfs(struct device *dev);
199 202
200extern struct attribute_group blk_trace_attr_group; 203extern struct attribute_group blk_trace_attr_group;
201 204
202#else /* !CONFIG_BLK_DEV_IO_TRACE */ 205#else /* !CONFIG_BLK_DEV_IO_TRACE */
203#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 206# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
204#define blk_trace_shutdown(q) do { } while (0) 207# define blk_trace_shutdown(q) do { } while (0)
205#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) 208# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY)
206#define blk_add_driver_data(q, rq, data, len) do {} while (0) 209# define blk_add_driver_data(q, rq, data, len) do {} while (0)
207#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) 210# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY)
208#define blk_trace_startstop(q, start) (-ENOTTY) 211# define blk_trace_startstop(q, start) (-ENOTTY)
209#define blk_trace_remove(q) (-ENOTTY) 212# define blk_trace_remove(q) (-ENOTTY)
210#define blk_add_trace_msg(q, fmt, ...) do { } while (0) 213# define blk_add_trace_msg(q, fmt, ...) do { } while (0)
214static inline int blk_trace_init_sysfs(struct device *dev)
215{
216 return 0;
217}
211 218
212#endif /* CONFIG_BLK_DEV_IO_TRACE */ 219#endif /* CONFIG_BLK_DEV_IO_TRACE */
220
221#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK)
222
223static inline int blk_cmd_buf_len(struct request *rq)
224{
225 return blk_pc_request(rq) ? rq->cmd_len * 3 : 1;
226}
227
228extern void blk_dump_cmd(char *buf, struct request *rq);
229extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
230extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
231
232#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
233
213#endif /* __KERNEL__ */ 234#endif /* __KERNEL__ */
214#endif 235#endif
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f2ded21f9a3c..af931ee43dd8 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
222int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); 222int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
223int get_compat_sigevent(struct sigevent *event, 223int get_compat_sigevent(struct sigevent *event,
224 const struct compat_sigevent __user *u_event); 224 const struct compat_sigevent __user *u_event);
225long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
226 struct compat_siginfo __user *uinfo);
225 227
226static inline int compat_timeval_compare(struct compat_timeval *lhs, 228static inline int compat_timeval_compare(struct compat_timeval *lhs,
227 struct compat_timeval *rhs) 229 struct compat_timeval *rhs)
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9f315382610b..c5ac87ca7bc6 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1022,6 +1022,8 @@ typedef struct cpumask *cpumask_var_t;
1022 1022
1023bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); 1023bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
1024bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); 1024bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
1025bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
1026bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
1025void alloc_bootmem_cpumask_var(cpumask_var_t *mask); 1027void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
1026void free_cpumask_var(cpumask_var_t mask); 1028void free_cpumask_var(cpumask_var_t mask);
1027void free_bootmem_cpumask_var(cpumask_var_t mask); 1029void free_bootmem_cpumask_var(cpumask_var_t mask);
@@ -1040,6 +1042,19 @@ static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
1040 return true; 1042 return true;
1041} 1043}
1042 1044
1045static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
1046{
1047 cpumask_clear(*mask);
1048 return true;
1049}
1050
1051static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
1052 int node)
1053{
1054 cpumask_clear(*mask);
1055 return true;
1056}
1057
1043static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) 1058static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
1044{ 1059{
1045} 1060}
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 3282ee4318e7..4fa999696310 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -13,6 +13,7 @@
13#define _LINUX_CRED_H 13#define _LINUX_CRED_H
14 14
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/init.h>
16#include <linux/key.h> 17#include <linux/key.h>
17#include <asm/atomic.h> 18#include <asm/atomic.h>
18 19
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 28d53cb7b5a2..171ad8aedc83 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);
32 32
33extern void dma_debug_init(u32 num_entries); 33extern void dma_debug_init(u32 num_entries);
34 34
35extern int dma_debug_resize_entries(u32 num_entries);
36
35extern void debug_dma_map_page(struct device *dev, struct page *page, 37extern void debug_dma_map_page(struct device *dev, struct page *page,
36 size_t offset, size_t size, 38 size_t offset, size_t size,
37 int direction, dma_addr_t dma_addr, 39 int direction, dma_addr_t dma_addr,
@@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
91{ 93{
92} 94}
93 95
96static inline int dma_debug_resize_entries(u32 num_entries)
97{
98 return 0;
99}
100
94static inline void debug_dma_map_page(struct device *dev, struct page *page, 101static inline void debug_dma_map_page(struct device *dev, struct page *page,
95 size_t offset, size_t size, 102 size_t offset, size_t size,
96 int direction, dma_addr_t dma_addr, 103 int direction, dma_addr_t dma_addr,
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e397dc342cda..10ff5c498824 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -108,6 +108,7 @@ struct irte {
108}; 108};
109#ifdef CONFIG_INTR_REMAP 109#ifdef CONFIG_INTR_REMAP
110extern int intr_remapping_enabled; 110extern int intr_remapping_enabled;
111extern int intr_remapping_supported(void);
111extern int enable_intr_remapping(int); 112extern int enable_intr_remapping(int);
112extern void disable_intr_remapping(void); 113extern void disable_intr_remapping(void);
113extern int reenable_intr_remapping(int); 114extern int reenable_intr_remapping(int);
@@ -157,6 +158,8 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic)
157} 158}
158#define irq_remapped(irq) (0) 159#define irq_remapped(irq) (0)
159#define enable_intr_remapping(mode) (-1) 160#define enable_intr_remapping(mode) (-1)
161#define disable_intr_remapping() (0)
162#define reenable_intr_remapping(mode) (0)
160#define intr_remapping_enabled (0) 163#define intr_remapping_enabled (0)
161#endif 164#endif
162 165
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8a0c2f221e6b..39b95c56587e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
233 233
234extern int skip_trace(unsigned long ip); 234extern int skip_trace(unsigned long ip);
235 235
236extern void ftrace_release(void *start, unsigned long size);
237
238extern void ftrace_disable_daemon(void); 236extern void ftrace_disable_daemon(void);
239extern void ftrace_enable_daemon(void); 237extern void ftrace_enable_daemon(void);
240#else 238#else
@@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled)
325 323
326#ifdef CONFIG_FTRACE_MCOUNT_RECORD 324#ifdef CONFIG_FTRACE_MCOUNT_RECORD
327extern void ftrace_init(void); 325extern void ftrace_init(void);
328extern void ftrace_init_module(struct module *mod,
329 unsigned long *start, unsigned long *end);
330#else 326#else
331static inline void ftrace_init(void) { } 327static inline void ftrace_init(void) { }
332static inline void
333ftrace_init_module(struct module *mod,
334 unsigned long *start, unsigned long *end) { }
335#endif 328#endif
336 329
337/* 330/*
@@ -368,6 +361,7 @@ struct ftrace_ret_stack {
368 unsigned long ret; 361 unsigned long ret;
369 unsigned long func; 362 unsigned long func;
370 unsigned long long calltime; 363 unsigned long long calltime;
364 unsigned long long subtime;
371}; 365};
372 366
373/* 367/*
@@ -379,8 +373,6 @@ extern void return_to_handler(void);
379 373
380extern int 374extern int
381ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); 375ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
382extern void
383ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret);
384 376
385/* 377/*
386 * Sometimes we don't want to trace a function with the function 378 * Sometimes we don't want to trace a function with the function
@@ -496,8 +488,15 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
496 488
497extern int ftrace_dump_on_oops; 489extern int ftrace_dump_on_oops;
498 490
491#ifdef CONFIG_PREEMPT
492#define INIT_TRACE_RECURSION .trace_recursion = 0,
493#endif
494
499#endif /* CONFIG_TRACING */ 495#endif /* CONFIG_TRACING */
500 496
497#ifndef INIT_TRACE_RECURSION
498#define INIT_TRACE_RECURSION
499#endif
501 500
502#ifdef CONFIG_HW_BRANCH_TRACER 501#ifdef CONFIG_HW_BRANCH_TRACER
503 502
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
new file mode 100644
index 000000000000..5c093ffc655b
--- /dev/null
+++ b/include/linux/ftrace_event.h
@@ -0,0 +1,172 @@
1#ifndef _LINUX_FTRACE_EVENT_H
2#define _LINUX_FTRACE_EVENT_H
3
4#include <linux/trace_seq.h>
5#include <linux/ring_buffer.h>
6#include <linux/percpu.h>
7
8struct trace_array;
9struct tracer;
10struct dentry;
11
12DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
13
14struct trace_print_flags {
15 unsigned long mask;
16 const char *name;
17};
18
19const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
20 unsigned long flags,
21 const struct trace_print_flags *flag_array);
22
23const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
24 const struct trace_print_flags *symbol_array);
25
26/*
27 * The trace entry - the most basic unit of tracing. This is what
28 * is printed in the end as a single line in the trace output, such as:
29 *
30 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
31 */
32struct trace_entry {
33 unsigned short type;
34 unsigned char flags;
35 unsigned char preempt_count;
36 int pid;
37 int tgid;
38};
39
40#define FTRACE_MAX_EVENT \
41 ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
42
43/*
44 * Trace iterator - used by printout routines who present trace
45 * results to users and which routines might sleep, etc:
46 */
47struct trace_iterator {
48 struct trace_array *tr;
49 struct tracer *trace;
50 void *private;
51 int cpu_file;
52 struct mutex mutex;
53 struct ring_buffer_iter *buffer_iter[NR_CPUS];
54 unsigned long iter_flags;
55
56 /* The below is zeroed out in pipe_read */
57 struct trace_seq seq;
58 struct trace_entry *ent;
59 int cpu;
60 u64 ts;
61
62 loff_t pos;
63 long idx;
64
65 cpumask_var_t started;
66};
67
68
69typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
70 int flags);
71struct trace_event {
72 struct hlist_node node;
73 struct list_head list;
74 int type;
75 trace_print_func trace;
76 trace_print_func raw;
77 trace_print_func hex;
78 trace_print_func binary;
79};
80
81extern int register_ftrace_event(struct trace_event *event);
82extern int unregister_ftrace_event(struct trace_event *event);
83
84/* Return values for print_line callback */
85enum print_line_t {
86 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
87 TRACE_TYPE_HANDLED = 1,
88 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */
89 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
90};
91
92
93struct ring_buffer_event *
94trace_current_buffer_lock_reserve(int type, unsigned long len,
95 unsigned long flags, int pc);
96void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
97 unsigned long flags, int pc);
98void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
99 unsigned long flags, int pc);
100void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
101
102void tracing_record_cmdline(struct task_struct *tsk);
103
104struct ftrace_event_call {
105 struct list_head list;
106 char *name;
107 char *system;
108 struct dentry *dir;
109 struct trace_event *event;
110 int enabled;
111 int (*regfunc)(void);
112 void (*unregfunc)(void);
113 int id;
114 int (*raw_init)(void);
115 int (*show_format)(struct trace_seq *s);
116 int (*define_fields)(void);
117 struct list_head fields;
118 int filter_active;
119 void *filter;
120 void *mod;
121
122#ifdef CONFIG_EVENT_PROFILE
123 atomic_t profile_count;
124 int (*profile_enable)(struct ftrace_event_call *);
125 void (*profile_disable)(struct ftrace_event_call *);
126#endif
127};
128
129#define MAX_FILTER_PRED 32
130#define MAX_FILTER_STR_VAL 128
131
132extern int init_preds(struct ftrace_event_call *call);
133extern void destroy_preds(struct ftrace_event_call *call);
134extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
135extern int filter_current_check_discard(struct ftrace_event_call *call,
136 void *rec,
137 struct ring_buffer_event *event);
138
139extern int trace_define_field(struct ftrace_event_call *call, char *type,
140 char *name, int offset, int size, int is_signed);
141
142#define is_signed_type(type) (((type)(-1)) < 0)
143
144int trace_set_clr_event(const char *system, const char *event, int set);
145
146/*
147 * The double __builtin_constant_p is because gcc will give us an error
148 * if we try to allocate the static variable to fmt if it is not a
149 * constant. Even with the outer if statement optimizing out.
150 */
151#define event_trace_printk(ip, fmt, args...) \
152do { \
153 __trace_printk_check_format(fmt, ##args); \
154 tracing_record_cmdline(current); \
155 if (__builtin_constant_p(fmt)) { \
156 static const char *trace_printk_fmt \
157 __attribute__((section("__trace_printk_fmt"))) = \
158 __builtin_constant_p(fmt) ? fmt : NULL; \
159 \
160 __trace_bprintk(ip, trace_printk_fmt, ##args); \
161 } else \
162 __trace_printk(ip, fmt, ##args); \
163} while (0)
164
165#define __common_field(type, item, is_signed) \
166 ret = trace_define_field(event_call, #type, "common_" #item, \
167 offsetof(typeof(field.ent), item), \
168 sizeof(field.ent.item), is_signed); \
169 if (ret) \
170 return ret;
171
172#endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 3bf5bb5a34f9..34956c8fdebf 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -23,6 +23,8 @@ union ktime;
23#define FUTEX_TRYLOCK_PI 8 23#define FUTEX_TRYLOCK_PI 8
24#define FUTEX_WAIT_BITSET 9 24#define FUTEX_WAIT_BITSET 9
25#define FUTEX_WAKE_BITSET 10 25#define FUTEX_WAKE_BITSET 10
26#define FUTEX_WAIT_REQUEUE_PI 11
27#define FUTEX_CMP_REQUEUE_PI 12
26 28
27#define FUTEX_PRIVATE_FLAG 128 29#define FUTEX_PRIVATE_FLAG 128
28#define FUTEX_CLOCK_REALTIME 256 30#define FUTEX_CLOCK_REALTIME 256
@@ -38,6 +40,10 @@ union ktime;
38#define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) 40#define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
39#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) 41#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG)
40#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) 42#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG)
43#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \
44 FUTEX_PRIVATE_FLAG)
45#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
46 FUTEX_PRIVATE_FLAG)
41 47
42/* 48/*
43 * Support for robust futexes: the kernel cleans up held futexes at 49 * Support for robust futexes: the kernel cleans up held futexes at
diff --git a/include/linux/i7300_idle.h b/include/linux/i7300_idle.h
index 05a80c44513c..1587b7dec505 100644
--- a/include/linux/i7300_idle.h
+++ b/include/linux/i7300_idle.h
@@ -16,35 +16,33 @@
16struct fbd_ioat { 16struct fbd_ioat {
17 unsigned int vendor; 17 unsigned int vendor;
18 unsigned int ioat_dev; 18 unsigned int ioat_dev;
19 unsigned int enabled;
19}; 20};
20 21
21/* 22/*
22 * The i5000 chip-set has the same hooks as the i7300 23 * The i5000 chip-set has the same hooks as the i7300
23 * but support is disabled by default because this driver 24 * but it is not enabled by default and must be manually
24 * has not been validated on that platform. 25 * manually enabled with "forceload=1" because it is
26 * only lightly validated.
25 */ 27 */
26#define SUPPORT_I5000 0
27 28
28static const struct fbd_ioat fbd_ioat_list[] = { 29static const struct fbd_ioat fbd_ioat_list[] = {
29 {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB}, 30 {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB, 1},
30#if SUPPORT_I5000 31 {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT, 0},
31 {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT},
32#endif
33 {0, 0} 32 {0, 0}
34}; 33};
35 34
36/* table of devices that work with this driver */ 35/* table of devices that work with this driver */
37static const struct pci_device_id pci_tbl[] = { 36static const struct pci_device_id pci_tbl[] = {
38 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_FBD_CNB) }, 37 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_FBD_CNB) },
39#if SUPPORT_I5000
40 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) }, 38 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) },
41#endif
42 { } /* Terminating entry */ 39 { } /* Terminating entry */
43}; 40};
44 41
45/* Check for known platforms with I/O-AT */ 42/* Check for known platforms with I/O-AT */
46static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev, 43static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev,
47 struct pci_dev **ioat_dev) 44 struct pci_dev **ioat_dev,
45 int enable_all)
48{ 46{
49 int i; 47 int i;
50 struct pci_dev *memdev, *dmadev; 48 struct pci_dev *memdev, *dmadev;
@@ -69,6 +67,8 @@ static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev,
69 for (i = 0; fbd_ioat_list[i].vendor != 0; i++) { 67 for (i = 0; fbd_ioat_list[i].vendor != 0; i++) {
70 if (dmadev->vendor == fbd_ioat_list[i].vendor && 68 if (dmadev->vendor == fbd_ioat_list[i].vendor &&
71 dmadev->device == fbd_ioat_list[i].ioat_dev) { 69 dmadev->device == fbd_ioat_list[i].ioat_dev) {
70 if (!(fbd_ioat_list[i].enabled || enable_all))
71 continue;
72 if (fbd_dev) 72 if (fbd_dev)
73 *fbd_dev = memdev; 73 *fbd_dev = memdev;
74 if (ioat_dev) 74 if (ioat_dev)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ff65fffb078f..9fed365a598b 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1109,7 +1109,7 @@ void ide_fix_driveid(u16 *);
1109 1109
1110extern void ide_fixstring(u8 *, const int, const int); 1110extern void ide_fixstring(u8 *, const int, const int);
1111 1111
1112int ide_busy_sleep(ide_hwif_t *, unsigned long, int); 1112int ide_busy_sleep(ide_drive_t *, unsigned long, int);
1113 1113
1114int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); 1114int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long);
1115 1115
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d87247d2641f..889bf99eca6d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -174,6 +174,7 @@ extern struct cred init_cred;
174 INIT_TRACE_IRQFLAGS \ 174 INIT_TRACE_IRQFLAGS \
175 INIT_LOCKDEP \ 175 INIT_LOCKDEP \
176 INIT_FTRACE_GRAPH \ 176 INIT_FTRACE_GRAPH \
177 INIT_TRACE_RECURSION \
177} 178}
178 179
179 180
diff --git a/include/linux/input.h b/include/linux/input.h
index 0e6ff5de3588..6fed4f6a9c9e 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -656,6 +656,7 @@ struct input_absinfo {
656#define ABS_MT_POSITION_Y 0x36 /* Center Y ellipse position */ 656#define ABS_MT_POSITION_Y 0x36 /* Center Y ellipse position */
657#define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ 657#define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */
658#define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ 658#define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */
659#define ABS_MT_TRACKING_ID 0x39 /* Unique ID of initiated contact */
659 660
660#define ABS_MAX 0x3f 661#define ABS_MAX 0x3f
661#define ABS_CNT (ABS_MAX+1) 662#define ABS_CNT (ABS_MAX+1)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 91bb76f44f14..ff374ceface0 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -566,6 +566,6 @@ struct irq_desc;
566extern int early_irq_init(void); 566extern int early_irq_init(void);
567extern int arch_probe_nr_irqs(void); 567extern int arch_probe_nr_irqs(void);
568extern int arch_early_irq_init(void); 568extern int arch_early_irq_init(void);
569extern int arch_init_chip_data(struct irq_desc *desc, int cpu); 569extern int arch_init_chip_data(struct irq_desc *desc, int node);
570 570
571#endif 571#endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index b7cbeed972e4..eedbb8e5e0cc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -117,7 +117,7 @@ struct irq_chip {
117 void (*eoi)(unsigned int irq); 117 void (*eoi)(unsigned int irq);
118 118
119 void (*end)(unsigned int irq); 119 void (*end)(unsigned int irq);
120 void (*set_affinity)(unsigned int irq, 120 int (*set_affinity)(unsigned int irq,
121 const struct cpumask *dest); 121 const struct cpumask *dest);
122 int (*retrigger)(unsigned int irq); 122 int (*retrigger)(unsigned int irq);
123 int (*set_type)(unsigned int irq, unsigned int flow_type); 123 int (*set_type)(unsigned int irq, unsigned int flow_type);
@@ -187,7 +187,7 @@ struct irq_desc {
187 spinlock_t lock; 187 spinlock_t lock;
188#ifdef CONFIG_SMP 188#ifdef CONFIG_SMP
189 cpumask_var_t affinity; 189 cpumask_var_t affinity;
190 unsigned int cpu; 190 unsigned int node;
191#ifdef CONFIG_GENERIC_PENDING_IRQ 191#ifdef CONFIG_GENERIC_PENDING_IRQ
192 cpumask_var_t pending_mask; 192 cpumask_var_t pending_mask;
193#endif 193#endif
@@ -201,26 +201,23 @@ struct irq_desc {
201} ____cacheline_internodealigned_in_smp; 201} ____cacheline_internodealigned_in_smp;
202 202
203extern void arch_init_copy_chip_data(struct irq_desc *old_desc, 203extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
204 struct irq_desc *desc, int cpu); 204 struct irq_desc *desc, int node);
205extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); 205extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
206 206
207#ifndef CONFIG_SPARSE_IRQ 207#ifndef CONFIG_SPARSE_IRQ
208extern struct irq_desc irq_desc[NR_IRQS]; 208extern struct irq_desc irq_desc[NR_IRQS];
209#else /* CONFIG_SPARSE_IRQ */ 209#endif
210extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
211#endif /* CONFIG_SPARSE_IRQ */
212
213extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
214 210
215static inline struct irq_desc * 211#ifdef CONFIG_NUMA_IRQ_DESC
216irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) 212extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
217{
218#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
219 return irq_to_desc(irq);
220#else 213#else
214static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
215{
221 return desc; 216 return desc;
222#endif
223} 217}
218#endif
219
220extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
224 221
225/* 222/*
226 * Migration helpers for obsolete names, they will go away: 223 * Migration helpers for obsolete names, they will go away:
@@ -386,7 +383,7 @@ extern void set_irq_noprobe(unsigned int irq);
386extern void set_irq_probe(unsigned int irq); 383extern void set_irq_probe(unsigned int irq);
387 384
388/* Handle dynamic irq creation and destruction */ 385/* Handle dynamic irq creation and destruction */
389extern unsigned int create_irq_nr(unsigned int irq_want); 386extern unsigned int create_irq_nr(unsigned int irq_want, int node);
390extern int create_irq(void); 387extern int create_irq(void);
391extern void destroy_irq(unsigned int irq); 388extern void destroy_irq(unsigned int irq);
392 389
@@ -424,47 +421,48 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
424 421
425#ifdef CONFIG_SMP 422#ifdef CONFIG_SMP
426/** 423/**
427 * init_alloc_desc_masks - allocate cpumasks for irq_desc 424 * alloc_desc_masks - allocate cpumasks for irq_desc
428 * @desc: pointer to irq_desc struct 425 * @desc: pointer to irq_desc struct
429 * @cpu: cpu which will be handling the cpumasks 426 * @cpu: cpu which will be handling the cpumasks
430 * @boot: true if need bootmem 427 * @boot: true if need bootmem
431 * 428 *
432 * Allocates affinity and pending_mask cpumask if required. 429 * Allocates affinity and pending_mask cpumask if required.
433 * Returns true if successful (or not required). 430 * Returns true if successful (or not required).
434 * Side effect: affinity has all bits set, pending_mask has all bits clear.
435 */ 431 */
436static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, 432static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
437 bool boot) 433 bool boot)
438{ 434{
439 int node; 435#ifdef CONFIG_CPUMASK_OFFSTACK
440
441 if (boot) { 436 if (boot) {
442 alloc_bootmem_cpumask_var(&desc->affinity); 437 alloc_bootmem_cpumask_var(&desc->affinity);
443 cpumask_setall(desc->affinity);
444 438
445#ifdef CONFIG_GENERIC_PENDING_IRQ 439#ifdef CONFIG_GENERIC_PENDING_IRQ
446 alloc_bootmem_cpumask_var(&desc->pending_mask); 440 alloc_bootmem_cpumask_var(&desc->pending_mask);
447 cpumask_clear(desc->pending_mask);
448#endif 441#endif
449 return true; 442 return true;
450 } 443 }
451 444
452 node = cpu_to_node(cpu);
453
454 if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) 445 if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
455 return false; 446 return false;
456 cpumask_setall(desc->affinity);
457 447
458#ifdef CONFIG_GENERIC_PENDING_IRQ 448#ifdef CONFIG_GENERIC_PENDING_IRQ
459 if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { 449 if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) {
460 free_cpumask_var(desc->affinity); 450 free_cpumask_var(desc->affinity);
461 return false; 451 return false;
462 } 452 }
463 cpumask_clear(desc->pending_mask); 453#endif
464#endif 454#endif
465 return true; 455 return true;
466} 456}
467 457
458static inline void init_desc_masks(struct irq_desc *desc)
459{
460 cpumask_setall(desc->affinity);
461#ifdef CONFIG_GENERIC_PENDING_IRQ
462 cpumask_clear(desc->pending_mask);
463#endif
464}
465
468/** 466/**
469 * init_copy_desc_masks - copy cpumasks for irq_desc 467 * init_copy_desc_masks - copy cpumasks for irq_desc
470 * @old_desc: pointer to old irq_desc struct 468 * @old_desc: pointer to old irq_desc struct
@@ -478,7 +476,7 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
478static inline void init_copy_desc_masks(struct irq_desc *old_desc, 476static inline void init_copy_desc_masks(struct irq_desc *old_desc,
479 struct irq_desc *new_desc) 477 struct irq_desc *new_desc)
480{ 478{
481#ifdef CONFIG_CPUMASKS_OFFSTACK 479#ifdef CONFIG_CPUMASK_OFFSTACK
482 cpumask_copy(new_desc->affinity, old_desc->affinity); 480 cpumask_copy(new_desc->affinity, old_desc->affinity);
483 481
484#ifdef CONFIG_GENERIC_PENDING_IRQ 482#ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -499,12 +497,16 @@ static inline void free_desc_masks(struct irq_desc *old_desc,
499 497
500#else /* !CONFIG_SMP */ 498#else /* !CONFIG_SMP */
501 499
502static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, 500static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
503 bool boot) 501 bool boot)
504{ 502{
505 return true; 503 return true;
506} 504}
507 505
506static inline void init_desc_masks(struct irq_desc *desc)
507{
508}
509
508static inline void init_copy_desc_masks(struct irq_desc *old_desc, 510static inline void init_copy_desc_masks(struct irq_desc *old_desc,
509 struct irq_desc *new_desc) 511 struct irq_desc *new_desc)
510{ 512{
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
new file mode 100644
index 000000000000..b616d3930c3b
--- /dev/null
+++ b/include/linux/kmemtrace.h
@@ -0,0 +1,25 @@
1/*
2 * Copyright (C) 2008 Eduard - Gabriel Munteanu
3 *
4 * This file is released under GPL version 2.
5 */
6
7#ifndef _LINUX_KMEMTRACE_H
8#define _LINUX_KMEMTRACE_H
9
10#ifdef __KERNEL__
11
12#include <trace/events/kmem.h>
13
14#ifdef CONFIG_KMEMTRACE
15extern void kmemtrace_init(void);
16#else
17static inline void kmemtrace_init(void)
18{
19}
20#endif
21
22#endif /* __KERNEL__ */
23
24#endif /* _LINUX_KMEMTRACE_H */
25
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d475c7..9772d6cbfc82 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -19,6 +19,7 @@ struct anon_vma;
19struct file_ra_state; 19struct file_ra_state;
20struct user_struct; 20struct user_struct;
21struct writeback_control; 21struct writeback_control;
22struct rlimit;
22 23
23#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ 24#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */
24extern unsigned long max_mapnr; 25extern unsigned long max_mapnr;
@@ -1031,8 +1032,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
1031 unsigned long end_pfn); 1032 unsigned long end_pfn);
1032extern void remove_active_range(unsigned int nid, unsigned long start_pfn, 1033extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
1033 unsigned long end_pfn); 1034 unsigned long end_pfn);
1034extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
1035 unsigned long end_pfn);
1036extern void remove_all_active_ranges(void); 1035extern void remove_all_active_ranges(void);
1037extern unsigned long absent_pages_in_range(unsigned long start_pfn, 1036extern unsigned long absent_pages_in_range(unsigned long start_pfn,
1038 unsigned long end_pfn); 1037 unsigned long end_pfn);
@@ -1319,8 +1318,8 @@ int vmemmap_populate_basepages(struct page *start_page,
1319int vmemmap_populate(struct page *start_page, unsigned long pages, int node); 1318int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
1320void vmemmap_populate_print_last(void); 1319void vmemmap_populate_print_last(void);
1321 1320
1322extern void *alloc_locked_buffer(size_t size); 1321extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
1323extern void free_locked_buffer(void *buffer, size_t size); 1322 size_t size);
1324extern void release_locked_buffer(void *buffer, size_t size); 1323extern void refund_locked_memory(struct mm_struct *mm, size_t size);
1325#endif /* __KERNEL__ */ 1324#endif /* __KERNEL__ */
1326#endif /* _LINUX_MM_H */ 1325#endif /* _LINUX_MM_H */
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 3d1b7bde1283..97491f78b08c 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -30,6 +30,8 @@ extern unsigned int kmmio_count;
30 30
31extern int register_kmmio_probe(struct kmmio_probe *p); 31extern int register_kmmio_probe(struct kmmio_probe *p);
32extern void unregister_kmmio_probe(struct kmmio_probe *p); 32extern void unregister_kmmio_probe(struct kmmio_probe *p);
33extern int kmmio_init(void);
34extern void kmmio_cleanup(void);
33 35
34#ifdef CONFIG_MMIOTRACE 36#ifdef CONFIG_MMIOTRACE
35/* kmmio is active by some kmmio_probes? */ 37/* kmmio is active by some kmmio_probes? */
diff --git a/include/linux/module.h b/include/linux/module.h
index 627ac082e2a6..a8f2c0aa4c32 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -337,6 +337,14 @@ struct module
337 const char **trace_bprintk_fmt_start; 337 const char **trace_bprintk_fmt_start;
338 unsigned int num_trace_bprintk_fmt; 338 unsigned int num_trace_bprintk_fmt;
339#endif 339#endif
340#ifdef CONFIG_EVENT_TRACING
341 struct ftrace_event_call *trace_events;
342 unsigned int num_trace_events;
343#endif
344#ifdef CONFIG_FTRACE_MCOUNT_RECORD
345 unsigned long *ftrace_callsites;
346 unsigned int num_ftrace_callsites;
347#endif
340 348
341#ifdef CONFIG_MODULE_UNLOAD 349#ifdef CONFIG_MODULE_UNLOAD
342 /* What modules depend on me? */ 350 /* What modules depend on me? */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 3069ec7e0ab8..878cab4f5fcc 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -150,5 +150,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
150 */ 150 */
151extern int mutex_trylock(struct mutex *lock); 151extern int mutex_trylock(struct mutex *lock);
152extern void mutex_unlock(struct mutex *lock); 152extern void mutex_unlock(struct mutex *lock);
153extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
153 154
154#endif 155#endif
diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h
index 0217fb81a630..0e2e100c44a2 100644
--- a/include/linux/net_dropmon.h
+++ b/include/linux/net_dropmon.h
@@ -1,6 +1,7 @@
1#ifndef __NET_DROPMON_H 1#ifndef __NET_DROPMON_H
2#define __NET_DROPMON_H 2#define __NET_DROPMON_H
3 3
4#include <linux/types.h>
4#include <linux/netlink.h> 5#include <linux/netlink.h>
5 6
6struct net_dm_drop_point { 7struct net_dm_drop_point {
diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index 3066789b972a..b2f384d42611 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -35,6 +35,9 @@ enum tcp_conntrack {
35/* Has unacknowledged data */ 35/* Has unacknowledged data */
36#define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED 0x10 36#define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED 0x10
37 37
38/* The field td_maxack has been set */
39#define IP_CT_TCP_FLAG_MAXACK_SET 0x20
40
38struct nf_ct_tcp_flags { 41struct nf_ct_tcp_flags {
39 __u8 flags; 42 __u8 flags;
40 __u8 mask; 43 __u8 mask;
@@ -46,6 +49,7 @@ struct ip_ct_tcp_state {
46 u_int32_t td_end; /* max of seq + len */ 49 u_int32_t td_end; /* max of seq + len */
47 u_int32_t td_maxend; /* max of ack + max(win, 1) */ 50 u_int32_t td_maxend; /* max of ack + max(win, 1) */
48 u_int32_t td_maxwin; /* max(win) */ 51 u_int32_t td_maxwin; /* max(win) */
52 u_int32_t td_maxack; /* max of ack */
49 u_int8_t td_scale; /* window scale factor */ 53 u_int8_t td_scale; /* window scale factor */
50 u_int8_t flags; /* per direction options */ 54 u_int8_t flags; /* per direction options */
51}; 55};
diff --git a/include/linux/parport.h b/include/linux/parport.h
index e1f83c5065c5..38a423ed3c01 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -324,6 +324,10 @@ struct parport {
324 int spintime; 324 int spintime;
325 atomic_t ref_count; 325 atomic_t ref_count;
326 326
327 unsigned long devflags;
328#define PARPORT_DEVPROC_REGISTERED 0
329 struct pardevice *proc_device; /* Currently register proc device */
330
327 struct list_head full_list; 331 struct list_head full_list;
328 struct parport *slaves[3]; 332 struct parport *slaves[3];
329}; 333};
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 06ba90c211a5..0f71812d67d3 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1406,7 +1406,7 @@
1406#define PCI_DEVICE_ID_VIA_82C598_1 0x8598 1406#define PCI_DEVICE_ID_VIA_82C598_1 0x8598
1407#define PCI_DEVICE_ID_VIA_838X_1 0xB188 1407#define PCI_DEVICE_ID_VIA_838X_1 0xB188
1408#define PCI_DEVICE_ID_VIA_83_87XX_1 0xB198 1408#define PCI_DEVICE_ID_VIA_83_87XX_1 0xB198
1409#define PCI_DEVICE_ID_VIA_C409_IDE 0XC409 1409#define PCI_DEVICE_ID_VIA_VX855_IDE 0xC409
1410#define PCI_DEVICE_ID_VIA_ANON 0xFFFF 1410#define PCI_DEVICE_ID_VIA_ANON 0xFFFF
1411 1411
1412#define PCI_VENDOR_ID_SIEMENS 0x110A 1412#define PCI_VENDOR_ID_SIEMENS 0x110A
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 67c15653fc23..59e133d39d50 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child,
95 struct task_struct *new_parent); 95 struct task_struct *new_parent);
96extern void __ptrace_unlink(struct task_struct *child); 96extern void __ptrace_unlink(struct task_struct *child);
97extern void exit_ptrace(struct task_struct *tracer); 97extern void exit_ptrace(struct task_struct *tracer);
98extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
99#define PTRACE_MODE_READ 1 98#define PTRACE_MODE_READ 1
100#define PTRACE_MODE_ATTACH 2 99#define PTRACE_MODE_ATTACH 2
101/* Returns 0 on success, -errno on denial. */ 100/* Returns 0 on success, -errno on denial. */
@@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task)
327#define arch_ptrace_untrace(task) do { } while (0) 326#define arch_ptrace_untrace(task) do { } while (0)
328#endif 327#endif
329 328
330#ifndef arch_ptrace_fork
331/*
332 * Do machine-specific work to initialize a new task.
333 *
334 * This is called from copy_process().
335 */
336#define arch_ptrace_fork(child, clone_flags) do { } while (0)
337#endif
338
339extern int task_current_syscall(struct task_struct *target, long *callno, 329extern int task_current_syscall(struct task_struct *target, long *callno,
340 unsigned long args[6], unsigned int maxargs, 330 unsigned long args[6], unsigned int maxargs,
341 unsigned long *sp, unsigned long *pc); 331 unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e649bd3f2c97..5710f43bbc9e 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -198,6 +198,32 @@ static inline void list_splice_init_rcu(struct list_head *list,
198 at->prev = last; 198 at->prev = last;
199} 199}
200 200
201/**
202 * list_entry_rcu - get the struct for this entry
203 * @ptr: the &struct list_head pointer.
204 * @type: the type of the struct this is embedded in.
205 * @member: the name of the list_struct within the struct.
206 *
207 * This primitive may safely run concurrently with the _rcu list-mutation
208 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
209 */
210#define list_entry_rcu(ptr, type, member) \
211 container_of(rcu_dereference(ptr), type, member)
212
213/**
214 * list_first_entry_rcu - get the first element from a list
215 * @ptr: the list head to take the element from.
216 * @type: the type of the struct this is embedded in.
217 * @member: the name of the list_struct within the struct.
218 *
219 * Note, that list is expected to be not empty.
220 *
221 * This primitive may safely run concurrently with the _rcu list-mutation
222 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
223 */
224#define list_first_entry_rcu(ptr, type, member) \
225 list_entry_rcu((ptr)->next, type, member)
226
201#define __list_for_each_rcu(pos, head) \ 227#define __list_for_each_rcu(pos, head) \
202 for (pos = rcu_dereference((head)->next); \ 228 for (pos = rcu_dereference((head)->next); \
203 pos != (head); \ 229 pos != (head); \
@@ -214,9 +240,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
214 * as long as the traversal is guarded by rcu_read_lock(). 240 * as long as the traversal is guarded by rcu_read_lock().
215 */ 241 */
216#define list_for_each_entry_rcu(pos, head, member) \ 242#define list_for_each_entry_rcu(pos, head, member) \
217 for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ 243 for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
218 prefetch(pos->member.next), &pos->member != (head); \ 244 prefetch(pos->member.next), &pos->member != (head); \
219 pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) 245 pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
220 246
221 247
222/** 248/**
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 58b2aa5312b9..5a5153806c42 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -161,8 +161,15 @@ struct rcu_data {
161 unsigned long offline_fqs; /* Kicked due to being offline. */ 161 unsigned long offline_fqs; /* Kicked due to being offline. */
162 unsigned long resched_ipi; /* Sent a resched IPI. */ 162 unsigned long resched_ipi; /* Sent a resched IPI. */
163 163
164 /* 5) For future __rcu_pending statistics. */ 164 /* 5) __rcu_pending() statistics. */
165 long n_rcu_pending; /* rcu_pending() calls since boot. */ 165 long n_rcu_pending; /* rcu_pending() calls since boot. */
166 long n_rp_qs_pending;
167 long n_rp_cb_ready;
168 long n_rp_cpu_needs_gp;
169 long n_rp_gp_completed;
170 long n_rp_gp_started;
171 long n_rp_need_fqs;
172 long n_rp_need_nothing;
166 173
167 int cpu; 174 int cpu;
168}; 175};
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e1b7b2173885..8670f1575fe1 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -11,7 +11,7 @@ struct ring_buffer_iter;
11 * Don't refer to this struct directly, use functions below. 11 * Don't refer to this struct directly, use functions below.
12 */ 12 */
13struct ring_buffer_event { 13struct ring_buffer_event {
14 u32 type:2, len:3, time_delta:27; 14 u32 type_len:5, time_delta:27;
15 u32 array[]; 15 u32 array[];
16}; 16};
17 17
@@ -24,7 +24,8 @@ struct ring_buffer_event {
24 * size is variable depending on how much 24 * size is variable depending on how much
25 * padding is needed 25 * padding is needed
26 * If time_delta is non zero: 26 * If time_delta is non zero:
27 * everything else same as RINGBUF_TYPE_DATA 27 * array[0] holds the actual length
28 * size = 4 + length (bytes)
28 * 29 *
29 * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta 30 * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta
30 * array[0] = time delta (28 .. 59) 31 * array[0] = time delta (28 .. 59)
@@ -35,22 +36,23 @@ struct ring_buffer_event {
35 * array[1..2] = tv_sec 36 * array[1..2] = tv_sec
36 * size = 16 bytes 37 * size = 16 bytes
37 * 38 *
38 * @RINGBUF_TYPE_DATA: Data record 39 * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
39 * If len is zero: 40 * Data record
41 * If type_len is zero:
40 * array[0] holds the actual length 42 * array[0] holds the actual length
41 * array[1..(length+3)/4] holds data 43 * array[1..(length+3)/4] holds data
42 * size = 4 + 4 + length (bytes) 44 * size = 4 + length (bytes)
43 * else 45 * else
44 * length = len << 2 46 * length = type_len << 2
45 * array[0..(length+3)/4-1] holds data 47 * array[0..(length+3)/4-1] holds data
46 * size = 4 + length (bytes) 48 * size = 4 + length (bytes)
47 */ 49 */
48enum ring_buffer_type { 50enum ring_buffer_type {
51 RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
49 RINGBUF_TYPE_PADDING, 52 RINGBUF_TYPE_PADDING,
50 RINGBUF_TYPE_TIME_EXTEND, 53 RINGBUF_TYPE_TIME_EXTEND,
51 /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ 54 /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
52 RINGBUF_TYPE_TIME_STAMP, 55 RINGBUF_TYPE_TIME_STAMP,
53 RINGBUF_TYPE_DATA,
54}; 56};
55 57
56unsigned ring_buffer_event_length(struct ring_buffer_event *event); 58unsigned ring_buffer_event_length(struct ring_buffer_event *event);
@@ -68,13 +70,54 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event)
68 return event->time_delta; 70 return event->time_delta;
69} 71}
70 72
73/*
74 * ring_buffer_event_discard can discard any event in the ring buffer.
75 * it is up to the caller to protect against a reader from
76 * consuming it or a writer from wrapping and replacing it.
77 *
78 * No external protection is needed if this is called before
79 * the event is commited. But in that case it would be better to
80 * use ring_buffer_discard_commit.
81 *
82 * Note, if an event that has not been committed is discarded
83 * with ring_buffer_event_discard, it must still be committed.
84 */
71void ring_buffer_event_discard(struct ring_buffer_event *event); 85void ring_buffer_event_discard(struct ring_buffer_event *event);
72 86
73/* 87/*
88 * ring_buffer_discard_commit will remove an event that has not
89 * ben committed yet. If this is used, then ring_buffer_unlock_commit
90 * must not be called on the discarded event. This function
91 * will try to remove the event from the ring buffer completely
92 * if another event has not been written after it.
93 *
94 * Example use:
95 *
96 * if (some_condition)
97 * ring_buffer_discard_commit(buffer, event);
98 * else
99 * ring_buffer_unlock_commit(buffer, event);
100 */
101void ring_buffer_discard_commit(struct ring_buffer *buffer,
102 struct ring_buffer_event *event);
103
104/*
74 * size is in bytes for each per CPU buffer. 105 * size is in bytes for each per CPU buffer.
75 */ 106 */
76struct ring_buffer * 107struct ring_buffer *
77ring_buffer_alloc(unsigned long size, unsigned flags); 108__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key);
109
110/*
111 * Because the ring buffer is generic, if other users of the ring buffer get
112 * traced by ftrace, it can produce lockdep warnings. We need to keep each
113 * ring buffer's lock class separate.
114 */
115#define ring_buffer_alloc(size, flags) \
116({ \
117 static struct lock_class_key __key; \
118 __ring_buffer_alloc((size), (flags), &__key); \
119})
120
78void ring_buffer_free(struct ring_buffer *buffer); 121void ring_buffer_free(struct ring_buffer *buffer);
79 122
80int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); 123int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
@@ -122,6 +165,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer);
122unsigned long ring_buffer_overruns(struct ring_buffer *buffer); 165unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
123unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); 166unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
124unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); 167unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
168unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
169unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu);
125 170
126u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); 171u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
127void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, 172void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
@@ -137,6 +182,11 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
137int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, 182int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
138 size_t len, int cpu, int full); 183 size_t len, int cpu, int full);
139 184
185struct trace_seq;
186
187int ring_buffer_print_entry_header(struct trace_seq *s);
188int ring_buffer_print_page_header(struct trace_seq *s);
189
140enum ring_buffer_flags { 190enum ring_buffer_flags {
141 RB_FL_OVERWRITE = 1 << 0, 191 RB_FL_OVERWRITE = 1 << 0,
142}; 192};
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049c..d1399660b776 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -77,6 +77,7 @@ struct sched_param {
77#include <linux/proportions.h> 77#include <linux/proportions.h>
78#include <linux/seccomp.h> 78#include <linux/seccomp.h>
79#include <linux/rcupdate.h> 79#include <linux/rcupdate.h>
80#include <linux/rculist.h>
80#include <linux/rtmutex.h> 81#include <linux/rtmutex.h>
81 82
82#include <linux/time.h> 83#include <linux/time.h>
@@ -96,8 +97,8 @@ struct exec_domain;
96struct futex_pi_state; 97struct futex_pi_state;
97struct robust_list_head; 98struct robust_list_head;
98struct bio; 99struct bio;
99struct bts_tracer;
100struct fs_struct; 100struct fs_struct;
101struct bts_context;
101 102
102/* 103/*
103 * List of flags we want to share for kernel threads, 104 * List of flags we want to share for kernel threads,
@@ -116,6 +117,7 @@ struct fs_struct;
116 * 11 bit fractions. 117 * 11 bit fractions.
117 */ 118 */
118extern unsigned long avenrun[]; /* Load averages */ 119extern unsigned long avenrun[]; /* Load averages */
120extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
119 121
120#define FSHIFT 11 /* nr of bits of precision */ 122#define FSHIFT 11 /* nr of bits of precision */
121#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ 123#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */
@@ -135,8 +137,8 @@ DECLARE_PER_CPU(unsigned long, process_counts);
135extern int nr_processes(void); 137extern int nr_processes(void);
136extern unsigned long nr_running(void); 138extern unsigned long nr_running(void);
137extern unsigned long nr_uninterruptible(void); 139extern unsigned long nr_uninterruptible(void);
138extern unsigned long nr_active(void);
139extern unsigned long nr_iowait(void); 140extern unsigned long nr_iowait(void);
141extern void calc_global_load(void);
140 142
141extern unsigned long get_parent_ip(unsigned long addr); 143extern unsigned long get_parent_ip(unsigned long addr);
142 144
@@ -838,7 +840,17 @@ struct sched_group {
838 */ 840 */
839 u32 reciprocal_cpu_power; 841 u32 reciprocal_cpu_power;
840 842
841 unsigned long cpumask[]; 843 /*
844 * The CPUs this group covers.
845 *
846 * NOTE: this field is variable length. (Allocated dynamically
847 * by attaching extra space to the end of the structure,
848 * depending on how many CPUs the kernel has booted up with)
849 *
850 * It is also be embedded into static data structures at build
851 * time. (See 'struct static_sched_group' in kernel/sched.c)
852 */
853 unsigned long cpumask[0];
842}; 854};
843 855
844static inline struct cpumask *sched_group_cpus(struct sched_group *sg) 856static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
@@ -924,8 +936,17 @@ struct sched_domain {
924 char *name; 936 char *name;
925#endif 937#endif
926 938
927 /* span of all CPUs in this domain */ 939 /*
928 unsigned long span[]; 940 * Span of all CPUs in this domain.
941 *
942 * NOTE: this field is variable length. (Allocated dynamically
943 * by attaching extra space to the end of the structure,
944 * depending on how many CPUs the kernel has booted up with)
945 *
946 * It is also be embedded into static data structures at build
947 * time. (See 'struct static_sched_domain' in kernel/sched.c)
948 */
949 unsigned long span[0];
929}; 950};
930 951
931static inline struct cpumask *sched_domain_span(struct sched_domain *sd) 952static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
@@ -1209,18 +1230,11 @@ struct task_struct {
1209 struct list_head ptraced; 1230 struct list_head ptraced;
1210 struct list_head ptrace_entry; 1231 struct list_head ptrace_entry;
1211 1232
1212#ifdef CONFIG_X86_PTRACE_BTS
1213 /* 1233 /*
1214 * This is the tracer handle for the ptrace BTS extension. 1234 * This is the tracer handle for the ptrace BTS extension.
1215 * This field actually belongs to the ptracer task. 1235 * This field actually belongs to the ptracer task.
1216 */ 1236 */
1217 struct bts_tracer *bts; 1237 struct bts_context *bts;
1218 /*
1219 * The buffer to hold the BTS data.
1220 */
1221 void *bts_buffer;
1222 size_t bts_size;
1223#endif /* CONFIG_X86_PTRACE_BTS */
1224 1238
1225 /* PID/PID hash table linkage. */ 1239 /* PID/PID hash table linkage. */
1226 struct pid_link pids[PIDTYPE_MAX]; 1240 struct pid_link pids[PIDTYPE_MAX];
@@ -1428,7 +1442,9 @@ struct task_struct {
1428#ifdef CONFIG_TRACING 1442#ifdef CONFIG_TRACING
1429 /* state flags for use by tracers */ 1443 /* state flags for use by tracers */
1430 unsigned long trace; 1444 unsigned long trace;
1431#endif 1445 /* bitmask of trace recursion */
1446 unsigned long trace_recursion;
1447#endif /* CONFIG_TRACING */
1432}; 1448};
1433 1449
1434/* Future-safe accessor for struct task_struct's cpus_allowed. */ 1450/* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -2001,8 +2017,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
2001extern char *get_task_comm(char *to, struct task_struct *tsk); 2017extern char *get_task_comm(char *to, struct task_struct *tsk);
2002 2018
2003#ifdef CONFIG_SMP 2019#ifdef CONFIG_SMP
2020extern void wait_task_context_switch(struct task_struct *p);
2004extern unsigned long wait_task_inactive(struct task_struct *, long match_state); 2021extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
2005#else 2022#else
2023static inline void wait_task_context_switch(struct task_struct *p) {}
2006static inline unsigned long wait_task_inactive(struct task_struct *p, 2024static inline unsigned long wait_task_inactive(struct task_struct *p,
2007 long match_state) 2025 long match_state)
2008{ 2026{
@@ -2010,7 +2028,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
2010} 2028}
2011#endif 2029#endif
2012 2030
2013#define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) 2031#define next_task(p) \
2032 list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
2014 2033
2015#define for_each_process(p) \ 2034#define for_each_process(p) \
2016 for (p = &init_task ; (p = next_task(p)) != &init_task ; ) 2035 for (p = &init_task ; (p = next_task(p)) != &init_task ; )
@@ -2049,8 +2068,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2)
2049 2068
2050static inline struct task_struct *next_thread(const struct task_struct *p) 2069static inline struct task_struct *next_thread(const struct task_struct *p)
2051{ 2070{
2052 return list_entry(rcu_dereference(p->thread_group.next), 2071 return list_entry_rcu(p->thread_group.next,
2053 struct task_struct, thread_group); 2072 struct task_struct, thread_group);
2054} 2073}
2055 2074
2056static inline int thread_group_empty(struct task_struct *p) 2075static inline int thread_group_empty(struct task_struct *p)
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 84f997f8aa53..c7552836bd95 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig)
235extern int next_signal(struct sigpending *pending, sigset_t *mask); 235extern int next_signal(struct sigpending *pending, sigset_t *mask);
236extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); 236extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
237extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); 237extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
238extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
239 siginfo_t *info);
238extern long do_sigpending(void __user *, unsigned long); 240extern long do_sigpending(void __user *, unsigned long);
239extern int sigprocmask(int, sigset_t *, sigset_t *); 241extern int sigprocmask(int, sigset_t *, sigset_t *);
240extern int show_unhandled_signals; 242extern int show_unhandled_signals;
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 5ac9b0bcaf9a..713f841ecaa9 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,7 @@
14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ 14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ 15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <trace/kmemtrace.h> 17#include <linux/kmemtrace.h>
18 18
19/* Size description struct for general caches. */ 19/* Size description struct for general caches. */
20struct cache_sizes { 20struct cache_sizes {
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5046f90c1171..be5d40c43bd2 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,7 +10,7 @@
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13#include <trace/kmemtrace.h> 13#include <linux/kmemtrace.h>
14 14
15enum stat_item { 15enum stat_item {
16 ALLOC_FASTPATH, /* Allocation from cpu slab */ 16 ALLOC_FASTPATH, /* Allocation from cpu slab */
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 938234c4a996..d4841ed8215b 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -60,6 +60,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
60#define __raw_spin_is_locked(lock) ((void)(lock), 0) 60#define __raw_spin_is_locked(lock) ((void)(lock), 0)
61/* for sched.c and kernel_lock.c: */ 61/* for sched.c and kernel_lock.c: */
62# define __raw_spin_lock(lock) do { (void)(lock); } while (0) 62# define __raw_spin_lock(lock) do { (void)(lock); } while (0)
63# define __raw_spin_lock_flags(lock, flags) do { (void)(lock); } while (0)
63# define __raw_spin_unlock(lock) do { (void)(lock); } while (0) 64# define __raw_spin_unlock(lock) do { (void)(lock); } while (0)
64# define __raw_spin_trylock(lock) ({ (void)(lock); 1; }) 65# define __raw_spin_trylock(lock) ({ (void)(lock); 1; })
65#endif /* DEBUG_SPINLOCK */ 66#endif /* DEBUG_SPINLOCK */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 62d81435347a..d476aad3ff57 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -437,6 +437,11 @@ static inline int mem_cgroup_cache_charge_swapin(struct page *page,
437 return 0; 437 return 0;
438} 438}
439 439
440static inline void
441mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
442{
443}
444
440#endif /* CONFIG_SWAP */ 445#endif /* CONFIG_SWAP */
441#endif /* __KERNEL__*/ 446#endif /* __KERNEL__*/
442#endif /* _LINUX_SWAP_H */ 447#endif /* _LINUX_SWAP_H */
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ac9ff54f7cb3..cb1a6631b8f4 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
29 29
30extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, 30extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
31 phys_addr_t address); 31 phys_addr_t address);
32extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); 32extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
33 dma_addr_t address);
33 34
34extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); 35extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size);
35 36
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e6b820f8b56b..a8cc4e13434c 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -21,13 +21,14 @@ struct restart_block {
21 struct { 21 struct {
22 unsigned long arg0, arg1, arg2, arg3; 22 unsigned long arg0, arg1, arg2, arg3;
23 }; 23 };
24 /* For futex_wait */ 24 /* For futex_wait and futex_wait_requeue_pi */
25 struct { 25 struct {
26 u32 *uaddr; 26 u32 *uaddr;
27 u32 val; 27 u32 val;
28 u32 flags; 28 u32 flags;
29 u32 bitset; 29 u32 bitset;
30 u64 time; 30 u64 time;
31 u32 *uaddr2;
31 } futex; 32 } futex;
32 /* For nanosleep */ 33 /* For nanosleep */
33 struct { 34 struct {
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
new file mode 100644
index 000000000000..c68bccba2074
--- /dev/null
+++ b/include/linux/trace_seq.h
@@ -0,0 +1,92 @@
1#ifndef _LINUX_TRACE_SEQ_H
2#define _LINUX_TRACE_SEQ_H
3
4#include <linux/fs.h>
5
6/*
7 * Trace sequences are used to allow a function to call several other functions
8 * to create a string of data to use (up to a max of PAGE_SIZE.
9 */
10
11struct trace_seq {
12 unsigned char buffer[PAGE_SIZE];
13 unsigned int len;
14 unsigned int readpos;
15};
16
17static inline void
18trace_seq_init(struct trace_seq *s)
19{
20 s->len = 0;
21 s->readpos = 0;
22}
23
24/*
25 * Currently only defined when tracing is enabled.
26 */
27#ifdef CONFIG_TRACING
28extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3)));
30extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
31 __attribute__ ((format (printf, 2, 0)));
32extern int
33trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
34extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
35extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
36 size_t cnt);
37extern int trace_seq_puts(struct trace_seq *s, const char *str);
38extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
39extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
40extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
41 size_t len);
42extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
43extern int trace_seq_path(struct trace_seq *s, struct path *path);
44
45#else /* CONFIG_TRACING */
46static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
47{
48 return 0;
49}
50static inline int
51trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
52{
53 return 0;
54}
55
56static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s)
57{
58}
59static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
60 size_t cnt)
61{
62 return 0;
63}
64static inline int trace_seq_puts(struct trace_seq *s, const char *str)
65{
66 return 0;
67}
68static inline int trace_seq_putc(struct trace_seq *s, unsigned char c)
69{
70 return 0;
71}
72static inline int
73trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
74{
75 return 0;
76}
77static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
78 size_t len)
79{
80 return 0;
81}
82static inline void *trace_seq_reserve(struct trace_seq *s, size_t len)
83{
84 return NULL;
85}
86static inline int trace_seq_path(struct trace_seq *s, struct path *path)
87{
88 return 0;
89}
90#endif /* CONFIG_TRACING */
91
92#endif /* _LINUX_TRACE_SEQ_H */
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index c7aa154f4bfc..eb96603d92db 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -259,14 +259,12 @@ static inline void tracehook_finish_clone(struct task_struct *child,
259 259
260/** 260/**
261 * tracehook_report_clone - in parent, new child is about to start running 261 * tracehook_report_clone - in parent, new child is about to start running
262 * @trace: return value from tracehook_prepare_clone()
263 * @regs: parent's user register state 262 * @regs: parent's user register state
264 * @clone_flags: flags from parent's system call 263 * @clone_flags: flags from parent's system call
265 * @pid: new child's PID in the parent's namespace 264 * @pid: new child's PID in the parent's namespace
266 * @child: new child task 265 * @child: new child task
267 * 266 *
268 * Called after a child is set up, but before it has been started 267 * Called after a child is set up, but before it has been started running.
269 * running. @trace is the value returned by tracehook_prepare_clone().
270 * This is not a good place to block, because the child has not started 268 * This is not a good place to block, because the child has not started
271 * yet. Suspend the child here if desired, and then block in 269 * yet. Suspend the child here if desired, and then block in
272 * tracehook_report_clone_complete(). This must prevent the child from 270 * tracehook_report_clone_complete(). This must prevent the child from
@@ -276,13 +274,14 @@ static inline void tracehook_finish_clone(struct task_struct *child,
276 * 274 *
277 * Called with no locks held, but the child cannot run until this returns. 275 * Called with no locks held, but the child cannot run until this returns.
278 */ 276 */
279static inline void tracehook_report_clone(int trace, struct pt_regs *regs, 277static inline void tracehook_report_clone(struct pt_regs *regs,
280 unsigned long clone_flags, 278 unsigned long clone_flags,
281 pid_t pid, struct task_struct *child) 279 pid_t pid, struct task_struct *child)
282{ 280{
283 if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) { 281 if (unlikely(task_ptrace(child))) {
284 /* 282 /*
285 * The child starts up with an immediate SIGSTOP. 283 * It doesn't matter who attached/attaching to this
284 * task, the pending SIGSTOP is right in any case.
286 */ 285 */
287 sigaddset(&child->pending.signal, SIGSTOP); 286 sigaddset(&child->pending.signal, SIGSTOP);
288 set_tsk_thread_flag(child, TIF_SIGPENDING); 287 set_tsk_thread_flag(child, TIF_SIGPENDING);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d35a7ee7611f..14df7e635d43 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -31,6 +31,8 @@ struct tracepoint {
31 * Keep in sync with vmlinux.lds.h. 31 * Keep in sync with vmlinux.lds.h.
32 */ 32 */
33 33
34#ifndef DECLARE_TRACE
35
34#define TP_PROTO(args...) args 36#define TP_PROTO(args...) args
35#define TP_ARGS(args...) args 37#define TP_ARGS(args...) args
36 38
@@ -114,6 +116,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
114 struct tracepoint *end) 116 struct tracepoint *end)
115{ } 117{ }
116#endif /* CONFIG_TRACEPOINTS */ 118#endif /* CONFIG_TRACEPOINTS */
119#endif /* DECLARE_TRACE */
117 120
118/* 121/*
119 * Connect a probe to a tracepoint. 122 * Connect a probe to a tracepoint.
@@ -154,10 +157,8 @@ static inline void tracepoint_synchronize_unregister(void)
154} 157}
155 158
156#define PARAMS(args...) args 159#define PARAMS(args...) args
157#define TRACE_FORMAT(name, proto, args, fmt) \
158 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
159
160 160
161#ifndef TRACE_EVENT
161/* 162/*
162 * For use with the TRACE_EVENT macro: 163 * For use with the TRACE_EVENT macro:
163 * 164 *
@@ -262,5 +263,6 @@ static inline void tracepoint_synchronize_unregister(void)
262 263
263#define TRACE_EVENT(name, proto, args, struct, assign, print) \ 264#define TRACE_EVENT(name, proto, args, struct, assign, print) \
264 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 265 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
266#endif
265 267
266#endif 268#endif
diff --git a/include/linux/wait.h b/include/linux/wait.h
index bc024632f365..6788e1a4d4ca 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -132,8 +132,6 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
132 list_del(&old->task_list); 132 list_del(&old->task_list);
133} 133}
134 134
135void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
136 int nr_exclusive, int sync, void *key);
137void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); 135void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
138void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); 136void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
139void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, 137void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index c9184f756cad..68a8d873bbd9 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -680,7 +680,7 @@ fc_remote_port_chkready(struct fc_rport *rport)
680 if (rport->roles & FC_PORT_ROLE_FCP_TARGET) 680 if (rport->roles & FC_PORT_ROLE_FCP_TARGET)
681 result = 0; 681 result = 0;
682 else if (rport->flags & FC_RPORT_DEVLOSS_PENDING) 682 else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
683 result = DID_TRANSPORT_DISRUPTED << 16; 683 result = DID_IMM_RETRY << 16;
684 else 684 else
685 result = DID_NO_CONNECT << 16; 685 result = DID_NO_CONNECT << 16;
686 break; 686 break;
@@ -688,7 +688,7 @@ fc_remote_port_chkready(struct fc_rport *rport)
688 if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT) 688 if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)
689 result = DID_TRANSPORT_FAILFAST << 16; 689 result = DID_TRANSPORT_FAILFAST << 16;
690 else 690 else
691 result = DID_TRANSPORT_DISRUPTED << 16; 691 result = DID_IMM_RETRY << 16;
692 break; 692 break;
693 default: 693 default:
694 result = DID_NO_CONNECT << 16; 694 result = DID_NO_CONNECT << 16;
diff --git a/include/trace/block.h b/include/trace/block.h
deleted file mode 100644
index 25b7068b819e..000000000000
--- a/include/trace/block.h
+++ /dev/null
@@ -1,76 +0,0 @@
1#ifndef _TRACE_BLOCK_H
2#define _TRACE_BLOCK_H
3
4#include <linux/blkdev.h>
5#include <linux/tracepoint.h>
6
7DECLARE_TRACE(block_rq_abort,
8 TP_PROTO(struct request_queue *q, struct request *rq),
9 TP_ARGS(q, rq));
10
11DECLARE_TRACE(block_rq_insert,
12 TP_PROTO(struct request_queue *q, struct request *rq),
13 TP_ARGS(q, rq));
14
15DECLARE_TRACE(block_rq_issue,
16 TP_PROTO(struct request_queue *q, struct request *rq),
17 TP_ARGS(q, rq));
18
19DECLARE_TRACE(block_rq_requeue,
20 TP_PROTO(struct request_queue *q, struct request *rq),
21 TP_ARGS(q, rq));
22
23DECLARE_TRACE(block_rq_complete,
24 TP_PROTO(struct request_queue *q, struct request *rq),
25 TP_ARGS(q, rq));
26
27DECLARE_TRACE(block_bio_bounce,
28 TP_PROTO(struct request_queue *q, struct bio *bio),
29 TP_ARGS(q, bio));
30
31DECLARE_TRACE(block_bio_complete,
32 TP_PROTO(struct request_queue *q, struct bio *bio),
33 TP_ARGS(q, bio));
34
35DECLARE_TRACE(block_bio_backmerge,
36 TP_PROTO(struct request_queue *q, struct bio *bio),
37 TP_ARGS(q, bio));
38
39DECLARE_TRACE(block_bio_frontmerge,
40 TP_PROTO(struct request_queue *q, struct bio *bio),
41 TP_ARGS(q, bio));
42
43DECLARE_TRACE(block_bio_queue,
44 TP_PROTO(struct request_queue *q, struct bio *bio),
45 TP_ARGS(q, bio));
46
47DECLARE_TRACE(block_getrq,
48 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
49 TP_ARGS(q, bio, rw));
50
51DECLARE_TRACE(block_sleeprq,
52 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
53 TP_ARGS(q, bio, rw));
54
55DECLARE_TRACE(block_plug,
56 TP_PROTO(struct request_queue *q),
57 TP_ARGS(q));
58
59DECLARE_TRACE(block_unplug_timer,
60 TP_PROTO(struct request_queue *q),
61 TP_ARGS(q));
62
63DECLARE_TRACE(block_unplug_io,
64 TP_PROTO(struct request_queue *q),
65 TP_ARGS(q));
66
67DECLARE_TRACE(block_split,
68 TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
69 TP_ARGS(q, bio, pdu));
70
71DECLARE_TRACE(block_remap,
72 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
73 sector_t from, sector_t to),
74 TP_ARGS(q, bio, dev, from, to));
75
76#endif
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
new file mode 100644
index 000000000000..f7a7ae1e8f90
--- /dev/null
+++ b/include/trace/define_trace.h
@@ -0,0 +1,75 @@
1/*
2 * Trace files that want to automate creationg of all tracepoints defined
3 * in their file should include this file. The following are macros that the
4 * trace file may define:
5 *
6 * TRACE_SYSTEM defines the system the tracepoint is for
7 *
8 * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h
9 * This macro may be defined to tell define_trace.h what file to include.
10 * Note, leave off the ".h".
11 *
12 * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace
13 * then this macro can define the path to use. Note, the path is relative to
14 * define_trace.h, not the file including it. Full path names for out of tree
15 * modules must be used.
16 */
17
18#ifdef CREATE_TRACE_POINTS
19
20/* Prevent recursion */
21#undef CREATE_TRACE_POINTS
22
23#include <linux/stringify.h>
24
25#undef TRACE_EVENT
26#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
27 DEFINE_TRACE(name)
28
29#undef DECLARE_TRACE
30#define DECLARE_TRACE(name, proto, args) \
31 DEFINE_TRACE(name)
32
33#undef TRACE_INCLUDE
34#undef __TRACE_INCLUDE
35
36#ifndef TRACE_INCLUDE_FILE
37# define TRACE_INCLUDE_FILE TRACE_SYSTEM
38# define UNDEF_TRACE_INCLUDE_FILE
39#endif
40
41#ifndef TRACE_INCLUDE_PATH
42# define __TRACE_INCLUDE(system) <trace/events/system.h>
43# define UNDEF_TRACE_INCLUDE_PATH
44#else
45# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
46#endif
47
48# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system)
49
50/* Let the trace headers be reread */
51#define TRACE_HEADER_MULTI_READ
52
53#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
54
55#ifdef CONFIG_EVENT_TRACING
56#include <trace/ftrace.h>
57#endif
58
59#undef TRACE_HEADER_MULTI_READ
60
61/* Only undef what we defined in this file */
62#ifdef UNDEF_TRACE_INCLUDE_FILE
63# undef TRACE_INCLUDE_FILE
64# undef UNDEF_TRACE_INCLUDE_FILE
65#endif
66
67#ifdef UNDEF_TRACE_INCLUDE_PATH
68# undef TRACE_INCLUDE_PATH
69# undef UNDEF_TRACE_INCLUDE_PATH
70#endif
71
72/* We may be processing more files */
73#define CREATE_TRACE_POINTS
74
75#endif /* CREATE_TRACE_POINTS */
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
new file mode 100644
index 000000000000..53effd496a50
--- /dev/null
+++ b/include/trace/events/block.h
@@ -0,0 +1,498 @@
1#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_BLOCK_H
3
4#include <linux/blktrace_api.h>
5#include <linux/blkdev.h>
6#include <linux/tracepoint.h>
7
8#undef TRACE_SYSTEM
9#define TRACE_SYSTEM block
10
11TRACE_EVENT(block_rq_abort,
12
13 TP_PROTO(struct request_queue *q, struct request *rq),
14
15 TP_ARGS(q, rq),
16
17 TP_STRUCT__entry(
18 __field( dev_t, dev )
19 __field( sector_t, sector )
20 __field( unsigned int, nr_sector )
21 __field( int, errors )
22 __array( char, rwbs, 6 )
23 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
24 ),
25
26 TP_fast_assign(
27 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
28 __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector;
29 __entry->nr_sector = blk_pc_request(rq) ?
30 0 : rq->hard_nr_sectors;
31 __entry->errors = rq->errors;
32
33 blk_fill_rwbs_rq(__entry->rwbs, rq);
34 blk_dump_cmd(__get_str(cmd), rq);
35 ),
36
37 TP_printk("%d,%d %s (%s) %llu + %u [%d]",
38 MAJOR(__entry->dev), MINOR(__entry->dev),
39 __entry->rwbs, __get_str(cmd),
40 (unsigned long long)__entry->sector,
41 __entry->nr_sector, __entry->errors)
42);
43
44TRACE_EVENT(block_rq_insert,
45
46 TP_PROTO(struct request_queue *q, struct request *rq),
47
48 TP_ARGS(q, rq),
49
50 TP_STRUCT__entry(
51 __field( dev_t, dev )
52 __field( sector_t, sector )
53 __field( unsigned int, nr_sector )
54 __field( unsigned int, bytes )
55 __array( char, rwbs, 6 )
56 __array( char, comm, TASK_COMM_LEN )
57 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
58 ),
59
60 TP_fast_assign(
61 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
62 __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector;
63 __entry->nr_sector = blk_pc_request(rq) ?
64 0 : rq->hard_nr_sectors;
65 __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0;
66
67 blk_fill_rwbs_rq(__entry->rwbs, rq);
68 blk_dump_cmd(__get_str(cmd), rq);
69 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
70 ),
71
72 TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
73 MAJOR(__entry->dev), MINOR(__entry->dev),
74 __entry->rwbs, __entry->bytes, __get_str(cmd),
75 (unsigned long long)__entry->sector,
76 __entry->nr_sector, __entry->comm)
77);
78
79TRACE_EVENT(block_rq_issue,
80
81 TP_PROTO(struct request_queue *q, struct request *rq),
82
83 TP_ARGS(q, rq),
84
85 TP_STRUCT__entry(
86 __field( dev_t, dev )
87 __field( sector_t, sector )
88 __field( unsigned int, nr_sector )
89 __field( unsigned int, bytes )
90 __array( char, rwbs, 6 )
91 __array( char, comm, TASK_COMM_LEN )
92 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
93 ),
94
95 TP_fast_assign(
96 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
97 __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector;
98 __entry->nr_sector = blk_pc_request(rq) ?
99 0 : rq->hard_nr_sectors;
100 __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0;
101
102 blk_fill_rwbs_rq(__entry->rwbs, rq);
103 blk_dump_cmd(__get_str(cmd), rq);
104 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
105 ),
106
107 TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
108 MAJOR(__entry->dev), MINOR(__entry->dev),
109 __entry->rwbs, __entry->bytes, __get_str(cmd),
110 (unsigned long long)__entry->sector,
111 __entry->nr_sector, __entry->comm)
112);
113
114TRACE_EVENT(block_rq_requeue,
115
116 TP_PROTO(struct request_queue *q, struct request *rq),
117
118 TP_ARGS(q, rq),
119
120 TP_STRUCT__entry(
121 __field( dev_t, dev )
122 __field( sector_t, sector )
123 __field( unsigned int, nr_sector )
124 __field( int, errors )
125 __array( char, rwbs, 6 )
126 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
127 ),
128
129 TP_fast_assign(
130 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
131 __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector;
132 __entry->nr_sector = blk_pc_request(rq) ?
133 0 : rq->hard_nr_sectors;
134 __entry->errors = rq->errors;
135
136 blk_fill_rwbs_rq(__entry->rwbs, rq);
137 blk_dump_cmd(__get_str(cmd), rq);
138 ),
139
140 TP_printk("%d,%d %s (%s) %llu + %u [%d]",
141 MAJOR(__entry->dev), MINOR(__entry->dev),
142 __entry->rwbs, __get_str(cmd),
143 (unsigned long long)__entry->sector,
144 __entry->nr_sector, __entry->errors)
145);
146
147TRACE_EVENT(block_rq_complete,
148
149 TP_PROTO(struct request_queue *q, struct request *rq),
150
151 TP_ARGS(q, rq),
152
153 TP_STRUCT__entry(
154 __field( dev_t, dev )
155 __field( sector_t, sector )
156 __field( unsigned int, nr_sector )
157 __field( int, errors )
158 __array( char, rwbs, 6 )
159 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
160 ),
161
162 TP_fast_assign(
163 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
164 __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector;
165 __entry->nr_sector = blk_pc_request(rq) ?
166 0 : rq->hard_nr_sectors;
167 __entry->errors = rq->errors;
168
169 blk_fill_rwbs_rq(__entry->rwbs, rq);
170 blk_dump_cmd(__get_str(cmd), rq);
171 ),
172
173 TP_printk("%d,%d %s (%s) %llu + %u [%d]",
174 MAJOR(__entry->dev), MINOR(__entry->dev),
175 __entry->rwbs, __get_str(cmd),
176 (unsigned long long)__entry->sector,
177 __entry->nr_sector, __entry->errors)
178);
179TRACE_EVENT(block_bio_bounce,
180
181 TP_PROTO(struct request_queue *q, struct bio *bio),
182
183 TP_ARGS(q, bio),
184
185 TP_STRUCT__entry(
186 __field( dev_t, dev )
187 __field( sector_t, sector )
188 __field( unsigned int, nr_sector )
189 __array( char, rwbs, 6 )
190 __array( char, comm, TASK_COMM_LEN )
191 ),
192
193 TP_fast_assign(
194 __entry->dev = bio->bi_bdev->bd_dev;
195 __entry->sector = bio->bi_sector;
196 __entry->nr_sector = bio->bi_size >> 9;
197 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
198 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
199 ),
200
201 TP_printk("%d,%d %s %llu + %u [%s]",
202 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
203 (unsigned long long)__entry->sector,
204 __entry->nr_sector, __entry->comm)
205);
206
207TRACE_EVENT(block_bio_complete,
208
209 TP_PROTO(struct request_queue *q, struct bio *bio),
210
211 TP_ARGS(q, bio),
212
213 TP_STRUCT__entry(
214 __field( dev_t, dev )
215 __field( sector_t, sector )
216 __field( unsigned, nr_sector )
217 __field( int, error )
218 __array( char, rwbs, 6 )
219 ),
220
221 TP_fast_assign(
222 __entry->dev = bio->bi_bdev->bd_dev;
223 __entry->sector = bio->bi_sector;
224 __entry->nr_sector = bio->bi_size >> 9;
225 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
226 ),
227
228 TP_printk("%d,%d %s %llu + %u [%d]",
229 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
230 (unsigned long long)__entry->sector,
231 __entry->nr_sector, __entry->error)
232);
233
234TRACE_EVENT(block_bio_backmerge,
235
236 TP_PROTO(struct request_queue *q, struct bio *bio),
237
238 TP_ARGS(q, bio),
239
240 TP_STRUCT__entry(
241 __field( dev_t, dev )
242 __field( sector_t, sector )
243 __field( unsigned int, nr_sector )
244 __array( char, rwbs, 6 )
245 __array( char, comm, TASK_COMM_LEN )
246 ),
247
248 TP_fast_assign(
249 __entry->dev = bio->bi_bdev->bd_dev;
250 __entry->sector = bio->bi_sector;
251 __entry->nr_sector = bio->bi_size >> 9;
252 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
253 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
254 ),
255
256 TP_printk("%d,%d %s %llu + %u [%s]",
257 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
258 (unsigned long long)__entry->sector,
259 __entry->nr_sector, __entry->comm)
260);
261
262TRACE_EVENT(block_bio_frontmerge,
263
264 TP_PROTO(struct request_queue *q, struct bio *bio),
265
266 TP_ARGS(q, bio),
267
268 TP_STRUCT__entry(
269 __field( dev_t, dev )
270 __field( sector_t, sector )
271 __field( unsigned, nr_sector )
272 __array( char, rwbs, 6 )
273 __array( char, comm, TASK_COMM_LEN )
274 ),
275
276 TP_fast_assign(
277 __entry->dev = bio->bi_bdev->bd_dev;
278 __entry->sector = bio->bi_sector;
279 __entry->nr_sector = bio->bi_size >> 9;
280 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
281 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
282 ),
283
284 TP_printk("%d,%d %s %llu + %u [%s]",
285 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
286 (unsigned long long)__entry->sector,
287 __entry->nr_sector, __entry->comm)
288);
289
290TRACE_EVENT(block_bio_queue,
291
292 TP_PROTO(struct request_queue *q, struct bio *bio),
293
294 TP_ARGS(q, bio),
295
296 TP_STRUCT__entry(
297 __field( dev_t, dev )
298 __field( sector_t, sector )
299 __field( unsigned int, nr_sector )
300 __array( char, rwbs, 6 )
301 __array( char, comm, TASK_COMM_LEN )
302 ),
303
304 TP_fast_assign(
305 __entry->dev = bio->bi_bdev->bd_dev;
306 __entry->sector = bio->bi_sector;
307 __entry->nr_sector = bio->bi_size >> 9;
308 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
309 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
310 ),
311
312 TP_printk("%d,%d %s %llu + %u [%s]",
313 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
314 (unsigned long long)__entry->sector,
315 __entry->nr_sector, __entry->comm)
316);
317
318TRACE_EVENT(block_getrq,
319
320 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
321
322 TP_ARGS(q, bio, rw),
323
324 TP_STRUCT__entry(
325 __field( dev_t, dev )
326 __field( sector_t, sector )
327 __field( unsigned int, nr_sector )
328 __array( char, rwbs, 6 )
329 __array( char, comm, TASK_COMM_LEN )
330 ),
331
332 TP_fast_assign(
333 __entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
334 __entry->sector = bio ? bio->bi_sector : 0;
335 __entry->nr_sector = bio ? bio->bi_size >> 9 : 0;
336 blk_fill_rwbs(__entry->rwbs,
337 bio ? bio->bi_rw : 0, __entry->nr_sector);
338 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
339 ),
340
341 TP_printk("%d,%d %s %llu + %u [%s]",
342 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
343 (unsigned long long)__entry->sector,
344 __entry->nr_sector, __entry->comm)
345);
346
347TRACE_EVENT(block_sleeprq,
348
349 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
350
351 TP_ARGS(q, bio, rw),
352
353 TP_STRUCT__entry(
354 __field( dev_t, dev )
355 __field( sector_t, sector )
356 __field( unsigned int, nr_sector )
357 __array( char, rwbs, 6 )
358 __array( char, comm, TASK_COMM_LEN )
359 ),
360
361 TP_fast_assign(
362 __entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
363 __entry->sector = bio ? bio->bi_sector : 0;
364 __entry->nr_sector = bio ? bio->bi_size >> 9 : 0;
365 blk_fill_rwbs(__entry->rwbs,
366 bio ? bio->bi_rw : 0, __entry->nr_sector);
367 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
368 ),
369
370 TP_printk("%d,%d %s %llu + %u [%s]",
371 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
372 (unsigned long long)__entry->sector,
373 __entry->nr_sector, __entry->comm)
374);
375
376TRACE_EVENT(block_plug,
377
378 TP_PROTO(struct request_queue *q),
379
380 TP_ARGS(q),
381
382 TP_STRUCT__entry(
383 __array( char, comm, TASK_COMM_LEN )
384 ),
385
386 TP_fast_assign(
387 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
388 ),
389
390 TP_printk("[%s]", __entry->comm)
391);
392
393TRACE_EVENT(block_unplug_timer,
394
395 TP_PROTO(struct request_queue *q),
396
397 TP_ARGS(q),
398
399 TP_STRUCT__entry(
400 __field( int, nr_rq )
401 __array( char, comm, TASK_COMM_LEN )
402 ),
403
404 TP_fast_assign(
405 __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE];
406 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
407 ),
408
409 TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
410);
411
412TRACE_EVENT(block_unplug_io,
413
414 TP_PROTO(struct request_queue *q),
415
416 TP_ARGS(q),
417
418 TP_STRUCT__entry(
419 __field( int, nr_rq )
420 __array( char, comm, TASK_COMM_LEN )
421 ),
422
423 TP_fast_assign(
424 __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE];
425 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
426 ),
427
428 TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
429);
430
431TRACE_EVENT(block_split,
432
433 TP_PROTO(struct request_queue *q, struct bio *bio,
434 unsigned int new_sector),
435
436 TP_ARGS(q, bio, new_sector),
437
438 TP_STRUCT__entry(
439 __field( dev_t, dev )
440 __field( sector_t, sector )
441 __field( sector_t, new_sector )
442 __array( char, rwbs, 6 )
443 __array( char, comm, TASK_COMM_LEN )
444 ),
445
446 TP_fast_assign(
447 __entry->dev = bio->bi_bdev->bd_dev;
448 __entry->sector = bio->bi_sector;
449 __entry->new_sector = new_sector;
450 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
451 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
452 ),
453
454 TP_printk("%d,%d %s %llu / %llu [%s]",
455 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
456 (unsigned long long)__entry->sector,
457 (unsigned long long)__entry->new_sector,
458 __entry->comm)
459);
460
461TRACE_EVENT(block_remap,
462
463 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
464 sector_t from),
465
466 TP_ARGS(q, bio, dev, from),
467
468 TP_STRUCT__entry(
469 __field( dev_t, dev )
470 __field( sector_t, sector )
471 __field( unsigned int, nr_sector )
472 __field( dev_t, old_dev )
473 __field( sector_t, old_sector )
474 __array( char, rwbs, 6 )
475 ),
476
477 TP_fast_assign(
478 __entry->dev = bio->bi_bdev->bd_dev;
479 __entry->sector = bio->bi_sector;
480 __entry->nr_sector = bio->bi_size >> 9;
481 __entry->old_dev = dev;
482 __entry->old_sector = from;
483 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
484 ),
485
486 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
487 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
488 (unsigned long long)__entry->sector,
489 __entry->nr_sector,
490 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
491 (unsigned long long)__entry->old_sector)
492);
493
494#endif /* _TRACE_BLOCK_H */
495
496/* This part must be outside protection */
497#include <trace/define_trace.h>
498
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
new file mode 100644
index 000000000000..b0c7ede55eb1
--- /dev/null
+++ b/include/trace/events/irq.h
@@ -0,0 +1,145 @@
1#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_IRQ_H
3
4#include <linux/tracepoint.h>
5#include <linux/interrupt.h>
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM irq
9
10#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
11#define show_softirq_name(val) \
12 __print_symbolic(val, \
13 softirq_name(HI), \
14 softirq_name(TIMER), \
15 softirq_name(NET_TX), \
16 softirq_name(NET_RX), \
17 softirq_name(BLOCK), \
18 softirq_name(TASKLET), \
19 softirq_name(SCHED), \
20 softirq_name(HRTIMER), \
21 softirq_name(RCU))
22
23/**
24 * irq_handler_entry - called immediately before the irq action handler
25 * @irq: irq number
26 * @action: pointer to struct irqaction
27 *
28 * The struct irqaction pointed to by @action contains various
29 * information about the handler, including the device name,
30 * @action->name, and the device id, @action->dev_id. When used in
31 * conjunction with the irq_handler_exit tracepoint, we can figure
32 * out irq handler latencies.
33 */
34TRACE_EVENT(irq_handler_entry,
35
36 TP_PROTO(int irq, struct irqaction *action),
37
38 TP_ARGS(irq, action),
39
40 TP_STRUCT__entry(
41 __field( int, irq )
42 __string( name, action->name )
43 ),
44
45 TP_fast_assign(
46 __entry->irq = irq;
47 __assign_str(name, action->name);
48 ),
49
50 TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
51);
52
53/**
54 * irq_handler_exit - called immediately after the irq action handler returns
55 * @irq: irq number
56 * @action: pointer to struct irqaction
57 * @ret: return value
58 *
59 * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
60 * @action->handler scuccessully handled this irq. Otherwise, the irq might be
61 * a shared irq line, or the irq was not handled successfully. Can be used in
62 * conjunction with the irq_handler_entry to understand irq handler latencies.
63 */
64TRACE_EVENT(irq_handler_exit,
65
66 TP_PROTO(int irq, struct irqaction *action, int ret),
67
68 TP_ARGS(irq, action, ret),
69
70 TP_STRUCT__entry(
71 __field( int, irq )
72 __field( int, ret )
73 ),
74
75 TP_fast_assign(
76 __entry->irq = irq;
77 __entry->ret = ret;
78 ),
79
80 TP_printk("irq=%d return=%s",
81 __entry->irq, __entry->ret ? "handled" : "unhandled")
82);
83
84/**
85 * softirq_entry - called immediately before the softirq handler
86 * @h: pointer to struct softirq_action
87 * @vec: pointer to first struct softirq_action in softirq_vec array
88 *
89 * The @h parameter, contains a pointer to the struct softirq_action
90 * which has a pointer to the action handler that is called. By subtracting
91 * the @vec pointer from the @h pointer, we can determine the softirq
92 * number. Also, when used in combination with the softirq_exit tracepoint
93 * we can determine the softirq latency.
94 */
95TRACE_EVENT(softirq_entry,
96
97 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
98
99 TP_ARGS(h, vec),
100
101 TP_STRUCT__entry(
102 __field( int, vec )
103 ),
104
105 TP_fast_assign(
106 __entry->vec = (int)(h - vec);
107 ),
108
109 TP_printk("softirq=%d action=%s", __entry->vec,
110 show_softirq_name(__entry->vec))
111);
112
113/**
114 * softirq_exit - called immediately after the softirq handler returns
115 * @h: pointer to struct softirq_action
116 * @vec: pointer to first struct softirq_action in softirq_vec array
117 *
118 * The @h parameter contains a pointer to the struct softirq_action
119 * that has handled the softirq. By subtracting the @vec pointer from
120 * the @h pointer, we can determine the softirq number. Also, when used in
121 * combination with the softirq_entry tracepoint we can determine the softirq
122 * latency.
123 */
124TRACE_EVENT(softirq_exit,
125
126 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
127
128 TP_ARGS(h, vec),
129
130 TP_STRUCT__entry(
131 __field( int, vec )
132 ),
133
134 TP_fast_assign(
135 __entry->vec = (int)(h - vec);
136 ),
137
138 TP_printk("softirq=%d action=%s", __entry->vec,
139 show_softirq_name(__entry->vec))
140);
141
142#endif /* _TRACE_IRQ_H */
143
144/* This part must be outside protection */
145#include <trace/define_trace.h>
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
new file mode 100644
index 000000000000..9baba50d6512
--- /dev/null
+++ b/include/trace/events/kmem.h
@@ -0,0 +1,231 @@
1#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_KMEM_H
3
4#include <linux/types.h>
5#include <linux/tracepoint.h>
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM kmem
9
10/*
11 * The order of these masks is important. Matching masks will be seen
12 * first and the left over flags will end up showing by themselves.
13 *
14 * For example, if we have GFP_KERNEL before GFP_USER we wil get:
15 *
16 * GFP_KERNEL|GFP_HARDWALL
17 *
18 * Thus most bits set go first.
19 */
20#define show_gfp_flags(flags) \
21 (flags) ? __print_flags(flags, "|", \
22 {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \
23 {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \
24 {(unsigned long)GFP_USER, "GFP_USER"}, \
25 {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \
26 {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \
27 {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \
28 {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \
29 {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \
30 {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \
31 {(unsigned long)__GFP_WAIT, "GFP_WAIT"}, \
32 {(unsigned long)__GFP_IO, "GFP_IO"}, \
33 {(unsigned long)__GFP_COLD, "GFP_COLD"}, \
34 {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \
35 {(unsigned long)__GFP_REPEAT, "GFP_REPEAT"}, \
36 {(unsigned long)__GFP_NOFAIL, "GFP_NOFAIL"}, \
37 {(unsigned long)__GFP_NORETRY, "GFP_NORETRY"}, \
38 {(unsigned long)__GFP_COMP, "GFP_COMP"}, \
39 {(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \
40 {(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \
41 {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
42 {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
43 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
44 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \
45 ) : "GFP_NOWAIT"
46
47TRACE_EVENT(kmalloc,
48
49 TP_PROTO(unsigned long call_site,
50 const void *ptr,
51 size_t bytes_req,
52 size_t bytes_alloc,
53 gfp_t gfp_flags),
54
55 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
56
57 TP_STRUCT__entry(
58 __field( unsigned long, call_site )
59 __field( const void *, ptr )
60 __field( size_t, bytes_req )
61 __field( size_t, bytes_alloc )
62 __field( gfp_t, gfp_flags )
63 ),
64
65 TP_fast_assign(
66 __entry->call_site = call_site;
67 __entry->ptr = ptr;
68 __entry->bytes_req = bytes_req;
69 __entry->bytes_alloc = bytes_alloc;
70 __entry->gfp_flags = gfp_flags;
71 ),
72
73 TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
74 __entry->call_site,
75 __entry->ptr,
76 __entry->bytes_req,
77 __entry->bytes_alloc,
78 show_gfp_flags(__entry->gfp_flags))
79);
80
81TRACE_EVENT(kmem_cache_alloc,
82
83 TP_PROTO(unsigned long call_site,
84 const void *ptr,
85 size_t bytes_req,
86 size_t bytes_alloc,
87 gfp_t gfp_flags),
88
89 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
90
91 TP_STRUCT__entry(
92 __field( unsigned long, call_site )
93 __field( const void *, ptr )
94 __field( size_t, bytes_req )
95 __field( size_t, bytes_alloc )
96 __field( gfp_t, gfp_flags )
97 ),
98
99 TP_fast_assign(
100 __entry->call_site = call_site;
101 __entry->ptr = ptr;
102 __entry->bytes_req = bytes_req;
103 __entry->bytes_alloc = bytes_alloc;
104 __entry->gfp_flags = gfp_flags;
105 ),
106
107 TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
108 __entry->call_site,
109 __entry->ptr,
110 __entry->bytes_req,
111 __entry->bytes_alloc,
112 show_gfp_flags(__entry->gfp_flags))
113);
114
115TRACE_EVENT(kmalloc_node,
116
117 TP_PROTO(unsigned long call_site,
118 const void *ptr,
119 size_t bytes_req,
120 size_t bytes_alloc,
121 gfp_t gfp_flags,
122 int node),
123
124 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
125
126 TP_STRUCT__entry(
127 __field( unsigned long, call_site )
128 __field( const void *, ptr )
129 __field( size_t, bytes_req )
130 __field( size_t, bytes_alloc )
131 __field( gfp_t, gfp_flags )
132 __field( int, node )
133 ),
134
135 TP_fast_assign(
136 __entry->call_site = call_site;
137 __entry->ptr = ptr;
138 __entry->bytes_req = bytes_req;
139 __entry->bytes_alloc = bytes_alloc;
140 __entry->gfp_flags = gfp_flags;
141 __entry->node = node;
142 ),
143
144 TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
145 __entry->call_site,
146 __entry->ptr,
147 __entry->bytes_req,
148 __entry->bytes_alloc,
149 show_gfp_flags(__entry->gfp_flags),
150 __entry->node)
151);
152
153TRACE_EVENT(kmem_cache_alloc_node,
154
155 TP_PROTO(unsigned long call_site,
156 const void *ptr,
157 size_t bytes_req,
158 size_t bytes_alloc,
159 gfp_t gfp_flags,
160 int node),
161
162 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
163
164 TP_STRUCT__entry(
165 __field( unsigned long, call_site )
166 __field( const void *, ptr )
167 __field( size_t, bytes_req )
168 __field( size_t, bytes_alloc )
169 __field( gfp_t, gfp_flags )
170 __field( int, node )
171 ),
172
173 TP_fast_assign(
174 __entry->call_site = call_site;
175 __entry->ptr = ptr;
176 __entry->bytes_req = bytes_req;
177 __entry->bytes_alloc = bytes_alloc;
178 __entry->gfp_flags = gfp_flags;
179 __entry->node = node;
180 ),
181
182 TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
183 __entry->call_site,
184 __entry->ptr,
185 __entry->bytes_req,
186 __entry->bytes_alloc,
187 show_gfp_flags(__entry->gfp_flags),
188 __entry->node)
189);
190
191TRACE_EVENT(kfree,
192
193 TP_PROTO(unsigned long call_site, const void *ptr),
194
195 TP_ARGS(call_site, ptr),
196
197 TP_STRUCT__entry(
198 __field( unsigned long, call_site )
199 __field( const void *, ptr )
200 ),
201
202 TP_fast_assign(
203 __entry->call_site = call_site;
204 __entry->ptr = ptr;
205 ),
206
207 TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
208);
209
210TRACE_EVENT(kmem_cache_free,
211
212 TP_PROTO(unsigned long call_site, const void *ptr),
213
214 TP_ARGS(call_site, ptr),
215
216 TP_STRUCT__entry(
217 __field( unsigned long, call_site )
218 __field( const void *, ptr )
219 ),
220
221 TP_fast_assign(
222 __entry->call_site = call_site;
223 __entry->ptr = ptr;
224 ),
225
226 TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
227);
228#endif /* _TRACE_KMEM_H */
229
230/* This part must be outside protection */
231#include <trace/define_trace.h>
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
new file mode 100644
index 000000000000..0e956c9dfd7e
--- /dev/null
+++ b/include/trace/events/lockdep.h
@@ -0,0 +1,96 @@
1#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_LOCKDEP_H
3
4#include <linux/lockdep.h>
5#include <linux/tracepoint.h>
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM lockdep
9
10#ifdef CONFIG_LOCKDEP
11
12TRACE_EVENT(lock_acquire,
13
14 TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
15 int trylock, int read, int check,
16 struct lockdep_map *next_lock, unsigned long ip),
17
18 TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
19
20 TP_STRUCT__entry(
21 __field(unsigned int, flags)
22 __string(name, lock->name)
23 ),
24
25 TP_fast_assign(
26 __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
27 __assign_str(name, lock->name);
28 ),
29
30 TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "",
31 (__entry->flags & 2) ? "read " : "",
32 __get_str(name))
33);
34
35TRACE_EVENT(lock_release,
36
37 TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
38
39 TP_ARGS(lock, nested, ip),
40
41 TP_STRUCT__entry(
42 __string(name, lock->name)
43 ),
44
45 TP_fast_assign(
46 __assign_str(name, lock->name);
47 ),
48
49 TP_printk("%s", __get_str(name))
50);
51
52#ifdef CONFIG_LOCK_STAT
53
54TRACE_EVENT(lock_contended,
55
56 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
57
58 TP_ARGS(lock, ip),
59
60 TP_STRUCT__entry(
61 __string(name, lock->name)
62 ),
63
64 TP_fast_assign(
65 __assign_str(name, lock->name);
66 ),
67
68 TP_printk("%s", __get_str(name))
69);
70
71TRACE_EVENT(lock_acquired,
72 TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
73
74 TP_ARGS(lock, ip, waittime),
75
76 TP_STRUCT__entry(
77 __string(name, lock->name)
78 __field(unsigned long, wait_usec)
79 __field(unsigned long, wait_nsec_rem)
80 ),
81 TP_fast_assign(
82 __assign_str(name, lock->name);
83 __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
84 __entry->wait_usec = (unsigned long) waittime;
85 ),
86 TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec,
87 __entry->wait_nsec_rem)
88);
89
90#endif
91#endif
92
93#endif /* _TRACE_LOCKDEP_H */
94
95/* This part must be outside protection */
96#include <trace/define_trace.h>
diff --git a/include/trace/sched_event_types.h b/include/trace/events/sched.h
index 63547dc1125f..24ab5bcff7b2 100644
--- a/include/trace/sched_event_types.h
+++ b/include/trace/events/sched.h
@@ -1,9 +1,8 @@
1#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_SCHED_H
1 3
2/* use <trace/sched.h> instead */ 4#include <linux/sched.h>
3#ifndef TRACE_EVENT 5#include <linux/tracepoint.h>
4# error Do not include this file directly.
5# error Unless you know what you are doing.
6#endif
7 6
8#undef TRACE_SYSTEM 7#undef TRACE_SYSTEM
9#define TRACE_SYSTEM sched 8#define TRACE_SYSTEM sched
@@ -157,6 +156,7 @@ TRACE_EVENT(sched_switch,
157 __array( char, prev_comm, TASK_COMM_LEN ) 156 __array( char, prev_comm, TASK_COMM_LEN )
158 __field( pid_t, prev_pid ) 157 __field( pid_t, prev_pid )
159 __field( int, prev_prio ) 158 __field( int, prev_prio )
159 __field( long, prev_state )
160 __array( char, next_comm, TASK_COMM_LEN ) 160 __array( char, next_comm, TASK_COMM_LEN )
161 __field( pid_t, next_pid ) 161 __field( pid_t, next_pid )
162 __field( int, next_prio ) 162 __field( int, next_prio )
@@ -166,13 +166,19 @@ TRACE_EVENT(sched_switch,
166 memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); 166 memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
167 __entry->prev_pid = prev->pid; 167 __entry->prev_pid = prev->pid;
168 __entry->prev_prio = prev->prio; 168 __entry->prev_prio = prev->prio;
169 __entry->prev_state = prev->state;
169 memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); 170 memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
170 __entry->next_pid = next->pid; 171 __entry->next_pid = next->pid;
171 __entry->next_prio = next->prio; 172 __entry->next_prio = next->prio;
172 ), 173 ),
173 174
174 TP_printk("task %s:%d [%d] ==> %s:%d [%d]", 175 TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
175 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 176 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
177 __entry->prev_state ?
178 __print_flags(__entry->prev_state, "|",
179 { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
180 { 16, "Z" }, { 32, "X" }, { 64, "x" },
181 { 128, "W" }) : "R",
176 __entry->next_comm, __entry->next_pid, __entry->next_prio) 182 __entry->next_comm, __entry->next_pid, __entry->next_prio)
177); 183);
178 184
@@ -181,9 +187,9 @@ TRACE_EVENT(sched_switch,
181 */ 187 */
182TRACE_EVENT(sched_migrate_task, 188TRACE_EVENT(sched_migrate_task,
183 189
184 TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), 190 TP_PROTO(struct task_struct *p, int dest_cpu),
185 191
186 TP_ARGS(p, orig_cpu, dest_cpu), 192 TP_ARGS(p, dest_cpu),
187 193
188 TP_STRUCT__entry( 194 TP_STRUCT__entry(
189 __array( char, comm, TASK_COMM_LEN ) 195 __array( char, comm, TASK_COMM_LEN )
@@ -197,7 +203,7 @@ TRACE_EVENT(sched_migrate_task,
197 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 203 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
198 __entry->pid = p->pid; 204 __entry->pid = p->pid;
199 __entry->prio = p->prio; 205 __entry->prio = p->prio;
200 __entry->orig_cpu = orig_cpu; 206 __entry->orig_cpu = task_cpu(p);
201 __entry->dest_cpu = dest_cpu; 207 __entry->dest_cpu = dest_cpu;
202 ), 208 ),
203 209
@@ -334,4 +340,7 @@ TRACE_EVENT(sched_signal_send,
334 __entry->sig, __entry->comm, __entry->pid) 340 __entry->sig, __entry->comm, __entry->pid)
335); 341);
336 342
337#undef TRACE_SYSTEM 343#endif /* _TRACE_SCHED_H */
344
345/* This part must be outside protection */
346#include <trace/define_trace.h>
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
new file mode 100644
index 000000000000..1e8fabb57c06
--- /dev/null
+++ b/include/trace/events/skb.h
@@ -0,0 +1,40 @@
1#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_SKB_H
3
4#include <linux/skbuff.h>
5#include <linux/tracepoint.h>
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM skb
9
10/*
11 * Tracepoint for free an sk_buff:
12 */
13TRACE_EVENT(kfree_skb,
14
15 TP_PROTO(struct sk_buff *skb, void *location),
16
17 TP_ARGS(skb, location),
18
19 TP_STRUCT__entry(
20 __field( void *, skbaddr )
21 __field( unsigned short, protocol )
22 __field( void *, location )
23 ),
24
25 TP_fast_assign(
26 __entry->skbaddr = skb;
27 if (skb) {
28 __entry->protocol = ntohs(skb->protocol);
29 }
30 __entry->location = location;
31 ),
32
33 TP_printk("skbaddr=%p protocol=%u location=%p",
34 __entry->skbaddr, __entry->protocol, __entry->location)
35);
36
37#endif /* _TRACE_SKB_H */
38
39/* This part must be outside protection */
40#include <trace/define_trace.h>
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
new file mode 100644
index 000000000000..035f1bff288e
--- /dev/null
+++ b/include/trace/events/workqueue.h
@@ -0,0 +1,100 @@
1#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_WORKQUEUE_H
3
4#include <linux/workqueue.h>
5#include <linux/sched.h>
6#include <linux/tracepoint.h>
7
8#undef TRACE_SYSTEM
9#define TRACE_SYSTEM workqueue
10
11TRACE_EVENT(workqueue_insertion,
12
13 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
14
15 TP_ARGS(wq_thread, work),
16
17 TP_STRUCT__entry(
18 __array(char, thread_comm, TASK_COMM_LEN)
19 __field(pid_t, thread_pid)
20 __field(work_func_t, func)
21 ),
22
23 TP_fast_assign(
24 memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
25 __entry->thread_pid = wq_thread->pid;
26 __entry->func = work->func;
27 ),
28
29 TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
30 __entry->thread_pid, __entry->func)
31);
32
33TRACE_EVENT(workqueue_execution,
34
35 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
36
37 TP_ARGS(wq_thread, work),
38
39 TP_STRUCT__entry(
40 __array(char, thread_comm, TASK_COMM_LEN)
41 __field(pid_t, thread_pid)
42 __field(work_func_t, func)
43 ),
44
45 TP_fast_assign(
46 memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
47 __entry->thread_pid = wq_thread->pid;
48 __entry->func = work->func;
49 ),
50
51 TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
52 __entry->thread_pid, __entry->func)
53);
54
55/* Trace the creation of one workqueue thread on a cpu */
56TRACE_EVENT(workqueue_creation,
57
58 TP_PROTO(struct task_struct *wq_thread, int cpu),
59
60 TP_ARGS(wq_thread, cpu),
61
62 TP_STRUCT__entry(
63 __array(char, thread_comm, TASK_COMM_LEN)
64 __field(pid_t, thread_pid)
65 __field(int, cpu)
66 ),
67
68 TP_fast_assign(
69 memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
70 __entry->thread_pid = wq_thread->pid;
71 __entry->cpu = cpu;
72 ),
73
74 TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
75 __entry->thread_pid, __entry->cpu)
76);
77
78TRACE_EVENT(workqueue_destruction,
79
80 TP_PROTO(struct task_struct *wq_thread),
81
82 TP_ARGS(wq_thread),
83
84 TP_STRUCT__entry(
85 __array(char, thread_comm, TASK_COMM_LEN)
86 __field(pid_t, thread_pid)
87 ),
88
89 TP_fast_assign(
90 memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
91 __entry->thread_pid = wq_thread->pid;
92 ),
93
94 TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
95);
96
97#endif /* _TRACE_WORKQUEUE_H */
98
99/* This part must be outside protection */
100#include <trace/define_trace.h>
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
new file mode 100644
index 000000000000..1867553c61e5
--- /dev/null
+++ b/include/trace/ftrace.h
@@ -0,0 +1,591 @@
1/*
2 * Stage 1 of the trace events.
3 *
4 * Override the macros in <trace/trace_events.h> to include the following:
5 *
6 * struct ftrace_raw_<call> {
7 * struct trace_entry ent;
8 * <type> <item>;
9 * <type2> <item2>[<len>];
10 * [...]
11 * };
12 *
13 * The <type> <item> is created by the __field(type, item) macro or
14 * the __array(type2, item2, len) macro.
15 * We simply do "type item;", and that will create the fields
16 * in the structure.
17 */
18
19#include <linux/ftrace_event.h>
20
21#undef __field
22#define __field(type, item) type item;
23
24#undef __array
25#define __array(type, item, len) type item[len];
26
27#undef __dynamic_array
28#define __dynamic_array(type, item, len) unsigned short __data_loc_##item;
29
30#undef __string
31#define __string(item, src) __dynamic_array(char, item, -1)
32
33#undef TP_STRUCT__entry
34#define TP_STRUCT__entry(args...) args
35
36#undef TRACE_EVENT
37#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
38 struct ftrace_raw_##name { \
39 struct trace_entry ent; \
40 tstruct \
41 char __data[0]; \
42 }; \
43 static struct ftrace_event_call event_##name
44
45#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
46
47
48/*
49 * Stage 2 of the trace events.
50 *
51 * Include the following:
52 *
53 * struct ftrace_data_offsets_<call> {
54 * int <item1>;
55 * int <item2>;
56 * [...]
57 * };
58 *
59 * The __dynamic_array() macro will create each int <item>, this is
60 * to keep the offset of each array from the beginning of the event.
61 */
62
63#undef __field
64#define __field(type, item);
65
66#undef __array
67#define __array(type, item, len)
68
69#undef __dynamic_array
70#define __dynamic_array(type, item, len) int item;
71
72#undef __string
73#define __string(item, src) __dynamic_array(char, item, -1)
74
75#undef TRACE_EVENT
76#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
77 struct ftrace_data_offsets_##call { \
78 tstruct; \
79 };
80
81#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
82
83/*
84 * Setup the showing format of trace point.
85 *
86 * int
87 * ftrace_format_##call(struct trace_seq *s)
88 * {
89 * struct ftrace_raw_##call field;
90 * int ret;
91 *
92 * ret = trace_seq_printf(s, #type " " #item ";"
93 * " offset:%u; size:%u;\n",
94 * offsetof(struct ftrace_raw_##call, item),
95 * sizeof(field.type));
96 *
97 * }
98 */
99
100#undef TP_STRUCT__entry
101#define TP_STRUCT__entry(args...) args
102
103#undef __field
104#define __field(type, item) \
105 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
106 "offset:%u;\tsize:%u;\n", \
107 (unsigned int)offsetof(typeof(field), item), \
108 (unsigned int)sizeof(field.item)); \
109 if (!ret) \
110 return 0;
111
112#undef __array
113#define __array(type, item, len) \
114 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
115 "offset:%u;\tsize:%u;\n", \
116 (unsigned int)offsetof(typeof(field), item), \
117 (unsigned int)sizeof(field.item)); \
118 if (!ret) \
119 return 0;
120
121#undef __dynamic_array
122#define __dynamic_array(type, item, len) \
123 ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \
124 "offset:%u;\tsize:%u;\n", \
125 (unsigned int)offsetof(typeof(field), \
126 __data_loc_##item), \
127 (unsigned int)sizeof(field.__data_loc_##item)); \
128 if (!ret) \
129 return 0;
130
131#undef __string
132#define __string(item, src) __dynamic_array(char, item, -1)
133
134#undef __entry
135#define __entry REC
136
137#undef __print_symbolic
138#undef __get_dynamic_array
139#undef __get_str
140
141#undef TP_printk
142#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
143
144#undef TP_fast_assign
145#define TP_fast_assign(args...) args
146
147#undef TRACE_EVENT
148#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
149static int \
150ftrace_format_##call(struct trace_seq *s) \
151{ \
152 struct ftrace_raw_##call field __attribute__((unused)); \
153 int ret = 0; \
154 \
155 tstruct; \
156 \
157 trace_seq_printf(s, "\nprint fmt: " print); \
158 \
159 return ret; \
160}
161
162#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
163
164/*
165 * Stage 3 of the trace events.
166 *
167 * Override the macros in <trace/trace_events.h> to include the following:
168 *
169 * enum print_line_t
170 * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
171 * {
172 * struct trace_seq *s = &iter->seq;
173 * struct ftrace_raw_<call> *field; <-- defined in stage 1
174 * struct trace_entry *entry;
175 * struct trace_seq *p;
176 * int ret;
177 *
178 * entry = iter->ent;
179 *
180 * if (entry->type != event_<call>.id) {
181 * WARN_ON_ONCE(1);
182 * return TRACE_TYPE_UNHANDLED;
183 * }
184 *
185 * field = (typeof(field))entry;
186 *
187 * p = get_cpu_var(ftrace_event_seq);
188 * trace_seq_init(p);
189 * ret = trace_seq_printf(s, <TP_printk> "\n");
190 * put_cpu();
191 * if (!ret)
192 * return TRACE_TYPE_PARTIAL_LINE;
193 *
194 * return TRACE_TYPE_HANDLED;
195 * }
196 *
197 * This is the method used to print the raw event to the trace
198 * output format. Note, this is not needed if the data is read
199 * in binary.
200 */
201
202#undef __entry
203#define __entry field
204
205#undef TP_printk
206#define TP_printk(fmt, args...) fmt "\n", args
207
208#undef __get_dynamic_array
209#define __get_dynamic_array(field) \
210 ((void *)__entry + __entry->__data_loc_##field)
211
212#undef __get_str
213#define __get_str(field) (char *)__get_dynamic_array(field)
214
215#undef __print_flags
216#define __print_flags(flag, delim, flag_array...) \
217 ({ \
218 static const struct trace_print_flags flags[] = \
219 { flag_array, { -1, NULL }}; \
220 ftrace_print_flags_seq(p, delim, flag, flags); \
221 })
222
223#undef __print_symbolic
224#define __print_symbolic(value, symbol_array...) \
225 ({ \
226 static const struct trace_print_flags symbols[] = \
227 { symbol_array, { -1, NULL }}; \
228 ftrace_print_symbols_seq(p, value, symbols); \
229 })
230
231#undef TRACE_EVENT
232#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
233enum print_line_t \
234ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
235{ \
236 struct trace_seq *s = &iter->seq; \
237 struct ftrace_raw_##call *field; \
238 struct trace_entry *entry; \
239 struct trace_seq *p; \
240 int ret; \
241 \
242 entry = iter->ent; \
243 \
244 if (entry->type != event_##call.id) { \
245 WARN_ON_ONCE(1); \
246 return TRACE_TYPE_UNHANDLED; \
247 } \
248 \
249 field = (typeof(field))entry; \
250 \
251 p = &get_cpu_var(ftrace_event_seq); \
252 trace_seq_init(p); \
253 ret = trace_seq_printf(s, #call ": " print); \
254 put_cpu(); \
255 if (!ret) \
256 return TRACE_TYPE_PARTIAL_LINE; \
257 \
258 return TRACE_TYPE_HANDLED; \
259}
260
261#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
262
263#undef __field
264#define __field(type, item) \
265 ret = trace_define_field(event_call, #type, #item, \
266 offsetof(typeof(field), item), \
267 sizeof(field.item), is_signed_type(type)); \
268 if (ret) \
269 return ret;
270
271#undef __array
272#define __array(type, item, len) \
273 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
274 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
275 offsetof(typeof(field), item), \
276 sizeof(field.item), 0); \
277 if (ret) \
278 return ret;
279
280#undef __dynamic_array
281#define __dynamic_array(type, item, len) \
282 ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\
283 offsetof(typeof(field), __data_loc_##item), \
284 sizeof(field.__data_loc_##item), 0);
285
286#undef __string
287#define __string(item, src) __dynamic_array(char, item, -1)
288
289#undef TRACE_EVENT
290#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
291int \
292ftrace_define_fields_##call(void) \
293{ \
294 struct ftrace_raw_##call field; \
295 struct ftrace_event_call *event_call = &event_##call; \
296 int ret; \
297 \
298 __common_field(int, type, 1); \
299 __common_field(unsigned char, flags, 0); \
300 __common_field(unsigned char, preempt_count, 0); \
301 __common_field(int, pid, 1); \
302 __common_field(int, tgid, 1); \
303 \
304 tstruct; \
305 \
306 return ret; \
307}
308
309#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
310
311/*
312 * remember the offset of each array from the beginning of the event.
313 */
314
315#undef __entry
316#define __entry entry
317
318#undef __field
319#define __field(type, item)
320
321#undef __array
322#define __array(type, item, len)
323
324#undef __dynamic_array
325#define __dynamic_array(type, item, len) \
326 __data_offsets->item = __data_size + \
327 offsetof(typeof(*entry), __data); \
328 __data_size += (len) * sizeof(type);
329
330#undef __string
331#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \
332
333#undef TRACE_EVENT
334#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
335static inline int ftrace_get_offsets_##call( \
336 struct ftrace_data_offsets_##call *__data_offsets, proto) \
337{ \
338 int __data_size = 0; \
339 struct ftrace_raw_##call __maybe_unused *entry; \
340 \
341 tstruct; \
342 \
343 return __data_size; \
344}
345
346#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
347
348/*
349 * Stage 4 of the trace events.
350 *
351 * Override the macros in <trace/trace_events.h> to include the following:
352 *
353 * static void ftrace_event_<call>(proto)
354 * {
355 * event_trace_printk(_RET_IP_, "<call>: " <fmt>);
356 * }
357 *
358 * static int ftrace_reg_event_<call>(void)
359 * {
360 * int ret;
361 *
362 * ret = register_trace_<call>(ftrace_event_<call>);
363 * if (!ret)
364 * pr_info("event trace: Could not activate trace point "
365 * "probe to <call>");
366 * return ret;
367 * }
368 *
369 * static void ftrace_unreg_event_<call>(void)
370 * {
371 * unregister_trace_<call>(ftrace_event_<call>);
372 * }
373 *
374 *
375 * For those macros defined with TRACE_EVENT:
376 *
377 * static struct ftrace_event_call event_<call>;
378 *
379 * static void ftrace_raw_event_<call>(proto)
380 * {
381 * struct ring_buffer_event *event;
382 * struct ftrace_raw_<call> *entry; <-- defined in stage 1
383 * unsigned long irq_flags;
384 * int pc;
385 *
386 * local_save_flags(irq_flags);
387 * pc = preempt_count();
388 *
389 * event = trace_current_buffer_lock_reserve(event_<call>.id,
390 * sizeof(struct ftrace_raw_<call>),
391 * irq_flags, pc);
392 * if (!event)
393 * return;
394 * entry = ring_buffer_event_data(event);
395 *
396 * <assign>; <-- Here we assign the entries by the __field and
397 * __array macros.
398 *
399 * trace_current_buffer_unlock_commit(event, irq_flags, pc);
400 * }
401 *
402 * static int ftrace_raw_reg_event_<call>(void)
403 * {
404 * int ret;
405 *
406 * ret = register_trace_<call>(ftrace_raw_event_<call>);
407 * if (!ret)
408 * pr_info("event trace: Could not activate trace point "
409 * "probe to <call>");
410 * return ret;
411 * }
412 *
413 * static void ftrace_unreg_event_<call>(void)
414 * {
415 * unregister_trace_<call>(ftrace_raw_event_<call>);
416 * }
417 *
418 * static struct trace_event ftrace_event_type_<call> = {
419 * .trace = ftrace_raw_output_<call>, <-- stage 2
420 * };
421 *
422 * static int ftrace_raw_init_event_<call>(void)
423 * {
424 * int id;
425 *
426 * id = register_ftrace_event(&ftrace_event_type_<call>);
427 * if (!id)
428 * return -ENODEV;
429 * event_<call>.id = id;
430 * return 0;
431 * }
432 *
433 * static struct ftrace_event_call __used
434 * __attribute__((__aligned__(4)))
435 * __attribute__((section("_ftrace_events"))) event_<call> = {
436 * .name = "<call>",
437 * .system = "<system>",
438 * .raw_init = ftrace_raw_init_event_<call>,
439 * .regfunc = ftrace_reg_event_<call>,
440 * .unregfunc = ftrace_unreg_event_<call>,
441 * .show_format = ftrace_format_<call>,
442 * }
443 *
444 */
445
446#undef TP_FMT
447#define TP_FMT(fmt, args...) fmt "\n", ##args
448
449#ifdef CONFIG_EVENT_PROFILE
450#define _TRACE_PROFILE(call, proto, args) \
451static void ftrace_profile_##call(proto) \
452{ \
453 extern void perf_tpcounter_event(int); \
454 perf_tpcounter_event(event_##call.id); \
455} \
456 \
457static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
458{ \
459 int ret = 0; \
460 \
461 if (!atomic_inc_return(&event_call->profile_count)) \
462 ret = register_trace_##call(ftrace_profile_##call); \
463 \
464 return ret; \
465} \
466 \
467static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
468{ \
469 if (atomic_add_negative(-1, &event_call->profile_count)) \
470 unregister_trace_##call(ftrace_profile_##call); \
471}
472
473#define _TRACE_PROFILE_INIT(call) \
474 .profile_count = ATOMIC_INIT(-1), \
475 .profile_enable = ftrace_profile_enable_##call, \
476 .profile_disable = ftrace_profile_disable_##call,
477
478#else
479#define _TRACE_PROFILE(call, proto, args)
480#define _TRACE_PROFILE_INIT(call)
481#endif
482
483#undef __entry
484#define __entry entry
485
486#undef __field
487#define __field(type, item)
488
489#undef __array
490#define __array(type, item, len)
491
492#undef __dynamic_array
493#define __dynamic_array(type, item, len) \
494 __entry->__data_loc_##item = __data_offsets.item;
495
496#undef __string
497#define __string(item, src) __dynamic_array(char, item, -1) \
498
499#undef __assign_str
500#define __assign_str(dst, src) \
501 strcpy(__get_str(dst), src);
502
503#undef TRACE_EVENT
504#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
505_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
506 \
507static struct ftrace_event_call event_##call; \
508 \
509static void ftrace_raw_event_##call(proto) \
510{ \
511 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
512 struct ftrace_event_call *event_call = &event_##call; \
513 struct ring_buffer_event *event; \
514 struct ftrace_raw_##call *entry; \
515 unsigned long irq_flags; \
516 int __data_size; \
517 int pc; \
518 \
519 local_save_flags(irq_flags); \
520 pc = preempt_count(); \
521 \
522 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
523 \
524 event = trace_current_buffer_lock_reserve(event_##call.id, \
525 sizeof(*entry) + __data_size, \
526 irq_flags, pc); \
527 if (!event) \
528 return; \
529 entry = ring_buffer_event_data(event); \
530 \
531 \
532 tstruct \
533 \
534 { assign; } \
535 \
536 if (!filter_current_check_discard(event_call, entry, event)) \
537 trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
538} \
539 \
540static int ftrace_raw_reg_event_##call(void) \
541{ \
542 int ret; \
543 \
544 ret = register_trace_##call(ftrace_raw_event_##call); \
545 if (ret) \
546 pr_info("event trace: Could not activate trace point " \
547 "probe to " #call "\n"); \
548 return ret; \
549} \
550 \
551static void ftrace_raw_unreg_event_##call(void) \
552{ \
553 unregister_trace_##call(ftrace_raw_event_##call); \
554} \
555 \
556static struct trace_event ftrace_event_type_##call = { \
557 .trace = ftrace_raw_output_##call, \
558}; \
559 \
560static int ftrace_raw_init_event_##call(void) \
561{ \
562 int id; \
563 \
564 id = register_ftrace_event(&ftrace_event_type_##call); \
565 if (!id) \
566 return -ENODEV; \
567 event_##call.id = id; \
568 INIT_LIST_HEAD(&event_##call.fields); \
569 init_preds(&event_##call); \
570 return 0; \
571} \
572 \
573static struct ftrace_event_call __used \
574__attribute__((__aligned__(4))) \
575__attribute__((section("_ftrace_events"))) event_##call = { \
576 .name = #call, \
577 .system = __stringify(TRACE_SYSTEM), \
578 .event = &ftrace_event_type_##call, \
579 .raw_init = ftrace_raw_init_event_##call, \
580 .regfunc = ftrace_raw_reg_event_##call, \
581 .unregfunc = ftrace_raw_unreg_event_##call, \
582 .show_format = ftrace_format_##call, \
583 .define_fields = ftrace_define_fields_##call, \
584 _TRACE_PROFILE_INIT(call) \
585}
586
587#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
588
589#undef _TRACE_PROFILE
590#undef _TRACE_PROFILE_INIT
591
diff --git a/include/trace/irq.h b/include/trace/irq.h
deleted file mode 100644
index ff5d4495dc37..000000000000
--- a/include/trace/irq.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef _TRACE_IRQ_H
2#define _TRACE_IRQ_H
3
4#include <linux/interrupt.h>
5#include <linux/tracepoint.h>
6
7#include <trace/irq_event_types.h>
8
9#endif
diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
deleted file mode 100644
index 85964ebd47ec..000000000000
--- a/include/trace/irq_event_types.h
+++ /dev/null
@@ -1,55 +0,0 @@
1
2/* use <trace/irq.h> instead */
3#ifndef TRACE_FORMAT
4# error Do not include this file directly.
5# error Unless you know what you are doing.
6#endif
7
8#undef TRACE_SYSTEM
9#define TRACE_SYSTEM irq
10
11/*
12 * Tracepoint for entry of interrupt handler:
13 */
14TRACE_FORMAT(irq_handler_entry,
15 TP_PROTO(int irq, struct irqaction *action),
16 TP_ARGS(irq, action),
17 TP_FMT("irq=%d handler=%s", irq, action->name)
18 );
19
20/*
21 * Tracepoint for return of an interrupt handler:
22 */
23TRACE_EVENT(irq_handler_exit,
24
25 TP_PROTO(int irq, struct irqaction *action, int ret),
26
27 TP_ARGS(irq, action, ret),
28
29 TP_STRUCT__entry(
30 __field( int, irq )
31 __field( int, ret )
32 ),
33
34 TP_fast_assign(
35 __entry->irq = irq;
36 __entry->ret = ret;
37 ),
38
39 TP_printk("irq=%d return=%s",
40 __entry->irq, __entry->ret ? "handled" : "unhandled")
41);
42
43TRACE_FORMAT(softirq_entry,
44 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
45 TP_ARGS(h, vec),
46 TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
47 );
48
49TRACE_FORMAT(softirq_exit,
50 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
51 TP_ARGS(h, vec),
52 TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
53 );
54
55#undef TRACE_SYSTEM
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
deleted file mode 100644
index 28ee69f9cd46..000000000000
--- a/include/trace/kmemtrace.h
+++ /dev/null
@@ -1,63 +0,0 @@
1/*
2 * Copyright (C) 2008 Eduard - Gabriel Munteanu
3 *
4 * This file is released under GPL version 2.
5 */
6
7#ifndef _LINUX_KMEMTRACE_H
8#define _LINUX_KMEMTRACE_H
9
10#ifdef __KERNEL__
11
12#include <linux/tracepoint.h>
13#include <linux/types.h>
14
15#ifdef CONFIG_KMEMTRACE
16extern void kmemtrace_init(void);
17#else
18static inline void kmemtrace_init(void)
19{
20}
21#endif
22
23DECLARE_TRACE(kmalloc,
24 TP_PROTO(unsigned long call_site,
25 const void *ptr,
26 size_t bytes_req,
27 size_t bytes_alloc,
28 gfp_t gfp_flags),
29 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
30DECLARE_TRACE(kmem_cache_alloc,
31 TP_PROTO(unsigned long call_site,
32 const void *ptr,
33 size_t bytes_req,
34 size_t bytes_alloc,
35 gfp_t gfp_flags),
36 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
37DECLARE_TRACE(kmalloc_node,
38 TP_PROTO(unsigned long call_site,
39 const void *ptr,
40 size_t bytes_req,
41 size_t bytes_alloc,
42 gfp_t gfp_flags,
43 int node),
44 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
45DECLARE_TRACE(kmem_cache_alloc_node,
46 TP_PROTO(unsigned long call_site,
47 const void *ptr,
48 size_t bytes_req,
49 size_t bytes_alloc,
50 gfp_t gfp_flags,
51 int node),
52 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
53DECLARE_TRACE(kfree,
54 TP_PROTO(unsigned long call_site, const void *ptr),
55 TP_ARGS(call_site, ptr));
56DECLARE_TRACE(kmem_cache_free,
57 TP_PROTO(unsigned long call_site, const void *ptr),
58 TP_ARGS(call_site, ptr));
59
60#endif /* __KERNEL__ */
61
62#endif /* _LINUX_KMEMTRACE_H */
63
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
deleted file mode 100644
index 5ca67df87f2a..000000000000
--- a/include/trace/lockdep.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef _TRACE_LOCKDEP_H
2#define _TRACE_LOCKDEP_H
3
4#include <linux/lockdep.h>
5#include <linux/tracepoint.h>
6
7#include <trace/lockdep_event_types.h>
8
9#endif
diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
deleted file mode 100644
index adccfcd2ec8f..000000000000
--- a/include/trace/lockdep_event_types.h
+++ /dev/null
@@ -1,44 +0,0 @@
1
2#ifndef TRACE_FORMAT
3# error Do not include this file directly.
4# error Unless you know what you are doing.
5#endif
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM lock
9
10#ifdef CONFIG_LOCKDEP
11
12TRACE_FORMAT(lock_acquire,
13 TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
14 int trylock, int read, int check,
15 struct lockdep_map *next_lock, unsigned long ip),
16 TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
17 TP_FMT("%s%s%s", trylock ? "try " : "",
18 read ? "read " : "", lock->name)
19 );
20
21TRACE_FORMAT(lock_release,
22 TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
23 TP_ARGS(lock, nested, ip),
24 TP_FMT("%s", lock->name)
25 );
26
27#ifdef CONFIG_LOCK_STAT
28
29TRACE_FORMAT(lock_contended,
30 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
31 TP_ARGS(lock, ip),
32 TP_FMT("%s", lock->name)
33 );
34
35TRACE_FORMAT(lock_acquired,
36 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
37 TP_ARGS(lock, ip),
38 TP_FMT("%s", lock->name)
39 );
40
41#endif
42#endif
43
44#undef TRACE_SYSTEM
diff --git a/include/trace/sched.h b/include/trace/sched.h
deleted file mode 100644
index 4e372a1a29bf..000000000000
--- a/include/trace/sched.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef _TRACE_SCHED_H
2#define _TRACE_SCHED_H
3
4#include <linux/sched.h>
5#include <linux/tracepoint.h>
6
7#include <trace/sched_event_types.h>
8
9#endif
diff --git a/include/trace/skb.h b/include/trace/skb.h
deleted file mode 100644
index b66206d9be72..000000000000
--- a/include/trace/skb.h
+++ /dev/null
@@ -1,11 +0,0 @@
1#ifndef _TRACE_SKB_H_
2#define _TRACE_SKB_H_
3
4#include <linux/skbuff.h>
5#include <linux/tracepoint.h>
6
7DECLARE_TRACE(kfree_skb,
8 TP_PROTO(struct sk_buff *skb, void *location),
9 TP_ARGS(skb, location));
10
11#endif
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
deleted file mode 100644
index df56f5694be6..000000000000
--- a/include/trace/trace_event_types.h
+++ /dev/null
@@ -1,5 +0,0 @@
1/* trace/<type>_event_types.h here */
2
3#include <trace/sched_event_types.h>
4#include <trace/irq_event_types.h>
5#include <trace/lockdep_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
deleted file mode 100644
index fd13750ca4ba..000000000000
--- a/include/trace/trace_events.h
+++ /dev/null
@@ -1,5 +0,0 @@
1/* trace/<type>.h here */
2
3#include <trace/sched.h>
4#include <trace/irq.h>
5#include <trace/lockdep.h>
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
deleted file mode 100644
index 7626523deeba..000000000000
--- a/include/trace/workqueue.h
+++ /dev/null
@@ -1,25 +0,0 @@
1#ifndef __TRACE_WORKQUEUE_H
2#define __TRACE_WORKQUEUE_H
3
4#include <linux/tracepoint.h>
5#include <linux/workqueue.h>
6#include <linux/sched.h>
7
8DECLARE_TRACE(workqueue_insertion,
9 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
10 TP_ARGS(wq_thread, work));
11
12DECLARE_TRACE(workqueue_execution,
13 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
14 TP_ARGS(wq_thread, work));
15
16/* Trace the creation of one workqueue thread on a cpu */
17DECLARE_TRACE(workqueue_creation,
18 TP_PROTO(struct task_struct *wq_thread, int cpu),
19 TP_ARGS(wq_thread, cpu));
20
21DECLARE_TRACE(workqueue_destruction,
22 TP_PROTO(struct task_struct *wq_thread),
23 TP_ARGS(wq_thread));
24
25#endif /* __TRACE_WORKQUEUE_H */
diff --git a/include/xen/Kbuild b/include/xen/Kbuild
new file mode 100644
index 000000000000..4e65c16a445b
--- /dev/null
+++ b/include/xen/Kbuild
@@ -0,0 +1 @@
header-y += evtchn.h
diff --git a/include/xen/events.h b/include/xen/events.h
index 0d5f1adc0363..e68d59a90ca8 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq);
53 irq will be disabled so it won't deliver an interrupt. */ 53 irq will be disabled so it won't deliver an interrupt. */
54void xen_poll_irq(int irq); 54void xen_poll_irq(int irq);
55 55
56/* Determine the IRQ which is bound to an event channel */
57unsigned irq_from_evtchn(unsigned int evtchn);
58
56#endif /* _XEN_EVENTS_H */ 59#endif /* _XEN_EVENTS_H */
diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h
new file mode 100644
index 000000000000..14e833ee4e0b
--- /dev/null
+++ b/include/xen/evtchn.h
@@ -0,0 +1,88 @@
1/******************************************************************************
2 * evtchn.h
3 *
4 * Interface to /dev/xen/evtchn.
5 *
6 * Copyright (c) 2003-2005, K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#ifndef __LINUX_PUBLIC_EVTCHN_H__
34#define __LINUX_PUBLIC_EVTCHN_H__
35
36/*
37 * Bind a fresh port to VIRQ @virq.
38 * Return allocated port.
39 */
40#define IOCTL_EVTCHN_BIND_VIRQ \
41 _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq))
42struct ioctl_evtchn_bind_virq {
43 unsigned int virq;
44};
45
46/*
47 * Bind a fresh port to remote <@remote_domain, @remote_port>.
48 * Return allocated port.
49 */
50#define IOCTL_EVTCHN_BIND_INTERDOMAIN \
51 _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain))
52struct ioctl_evtchn_bind_interdomain {
53 unsigned int remote_domain, remote_port;
54};
55
56/*
57 * Allocate a fresh port for binding to @remote_domain.
58 * Return allocated port.
59 */
60#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \
61 _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port))
62struct ioctl_evtchn_bind_unbound_port {
63 unsigned int remote_domain;
64};
65
66/*
67 * Unbind previously allocated @port.
68 */
69#define IOCTL_EVTCHN_UNBIND \
70 _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind))
71struct ioctl_evtchn_unbind {
72 unsigned int port;
73};
74
75/*
76 * Unbind previously allocated @port.
77 */
78#define IOCTL_EVTCHN_NOTIFY \
79 _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify))
80struct ioctl_evtchn_notify {
81 unsigned int port;
82};
83
84/* Clear and reinitialise the event buffer. Clear error condition. */
85#define IOCTL_EVTCHN_RESET \
86 _IOC(_IOC_NONE, 'E', 5, 0)
87
88#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h
index 453235e923f0..e8b6519d47e9 100644
--- a/include/xen/interface/version.h
+++ b/include/xen/interface/version.h
@@ -57,4 +57,7 @@ struct xen_feature_info {
57/* Declares the features reported by XENVER_get_features. */ 57/* Declares the features reported by XENVER_get_features. */
58#include "features.h" 58#include "features.h"
59 59
60/* arg == NULL; returns host memory page size. */
61#define XENVER_pagesize 7
62
60#endif /* __XEN_PUBLIC_VERSION_H__ */ 63#endif /* __XEN_PUBLIC_VERSION_H__ */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index f87f9614844d..b9763badbd77 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -91,8 +91,7 @@ struct xenbus_driver {
91 void (*otherend_changed)(struct xenbus_device *dev, 91 void (*otherend_changed)(struct xenbus_device *dev,
92 enum xenbus_state backend_state); 92 enum xenbus_state backend_state);
93 int (*remove)(struct xenbus_device *dev); 93 int (*remove)(struct xenbus_device *dev);
94 int (*suspend)(struct xenbus_device *dev); 94 int (*suspend)(struct xenbus_device *dev, pm_message_t state);
95 int (*suspend_cancel)(struct xenbus_device *dev);
96 int (*resume)(struct xenbus_device *dev); 95 int (*resume)(struct xenbus_device *dev);
97 int (*uevent)(struct xenbus_device *, char **, int, char *, int); 96 int (*uevent)(struct xenbus_device *, char **, int, char *, int);
98 struct device_driver driver; 97 struct device_driver driver;
diff --git a/init/Kconfig b/init/Kconfig
index 7be4d3836745..d4e9671347ee 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -308,7 +308,7 @@ menu "RCU Subsystem"
308 308
309choice 309choice
310 prompt "RCU Implementation" 310 prompt "RCU Implementation"
311 default CLASSIC_RCU 311 default TREE_RCU
312 312
313config CLASSIC_RCU 313config CLASSIC_RCU
314 bool "Classic RCU" 314 bool "Classic RCU"
diff --git a/init/main.c b/init/main.c
index 3bbf93be744c..bb7dc57eee36 100644
--- a/init/main.c
+++ b/init/main.c
@@ -64,6 +64,7 @@
64#include <linux/idr.h> 64#include <linux/idr.h>
65#include <linux/ftrace.h> 65#include <linux/ftrace.h>
66#include <linux/async.h> 66#include <linux/async.h>
67#include <linux/kmemtrace.h>
67#include <trace/boot.h> 68#include <trace/boot.h>
68 69
69#include <asm/io.h> 70#include <asm/io.h>
@@ -71,7 +72,6 @@
71#include <asm/setup.h> 72#include <asm/setup.h>
72#include <asm/sections.h> 73#include <asm/sections.h>
73#include <asm/cacheflush.h> 74#include <asm/cacheflush.h>
74#include <trace/kmemtrace.h>
75 75
76#ifdef CONFIG_X86_LOCAL_APIC 76#ifdef CONFIG_X86_LOCAL_APIC
77#include <asm/smp.h> 77#include <asm/smp.h>
@@ -566,8 +566,7 @@ asmlinkage void __init start_kernel(void)
566 tick_init(); 566 tick_init();
567 boot_cpu_init(); 567 boot_cpu_init();
568 page_address_init(); 568 page_address_init();
569 printk(KERN_NOTICE); 569 printk(KERN_NOTICE "%s", linux_banner);
570 printk(linux_banner);
571 setup_arch(&command_line); 570 setup_arch(&command_line);
572 mm_init_owner(&init_mm, &init_task); 571 mm_init_owner(&init_mm, &init_task);
573 setup_command_line(command_line); 572 setup_command_line(command_line);
diff --git a/ipc/sem.c b/ipc/sem.c
index 16a2189e96f9..87c2b641fd7b 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1290,8 +1290,8 @@ void exit_sem(struct task_struct *tsk)
1290 int i; 1290 int i;
1291 1291
1292 rcu_read_lock(); 1292 rcu_read_lock();
1293 un = list_entry(rcu_dereference(ulp->list_proc.next), 1293 un = list_entry_rcu(ulp->list_proc.next,
1294 struct sem_undo, list_proc); 1294 struct sem_undo, list_proc);
1295 if (&un->list_proc == &ulp->list_proc) 1295 if (&un->list_proc == &ulp->list_proc)
1296 semid = -1; 1296 semid = -1;
1297 else 1297 else
diff --git a/ipc/shm.c b/ipc/shm.c
index faa46da99ebe..425971600485 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -969,10 +969,13 @@ SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
969SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) 969SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
970{ 970{
971 struct mm_struct *mm = current->mm; 971 struct mm_struct *mm = current->mm;
972 struct vm_area_struct *vma, *next; 972 struct vm_area_struct *vma;
973 unsigned long addr = (unsigned long)shmaddr; 973 unsigned long addr = (unsigned long)shmaddr;
974 loff_t size = 0;
975 int retval = -EINVAL; 974 int retval = -EINVAL;
975#ifdef CONFIG_MMU
976 loff_t size = 0;
977 struct vm_area_struct *next;
978#endif
976 979
977 if (addr & ~PAGE_MASK) 980 if (addr & ~PAGE_MASK)
978 return retval; 981 return retval;
diff --git a/kernel/Makefile b/kernel/Makefile
index 42423665660a..a35eee3436de 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -93,6 +93,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o
93obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o 93obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
94obj-$(CONFIG_FUNCTION_TRACER) += trace/ 94obj-$(CONFIG_FUNCTION_TRACER) += trace/
95obj-$(CONFIG_TRACING) += trace/ 95obj-$(CONFIG_TRACING) += trace/
96obj-$(CONFIG_X86_DS) += trace/
96obj-$(CONFIG_SMP) += sched_cpupri.o 97obj-$(CONFIG_SMP) += sched_cpupri.o
97obj-$(CONFIG_SLOW_WORK) += slow-work.o 98obj-$(CONFIG_SLOW_WORK) += slow-work.o
98 99
diff --git a/kernel/async.c b/kernel/async.c
index 968ef9457d4e..27235f5de198 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -92,19 +92,18 @@ extern int initcall_debug;
92static async_cookie_t __lowest_in_progress(struct list_head *running) 92static async_cookie_t __lowest_in_progress(struct list_head *running)
93{ 93{
94 struct async_entry *entry; 94 struct async_entry *entry;
95
95 if (!list_empty(running)) { 96 if (!list_empty(running)) {
96 entry = list_first_entry(running, 97 entry = list_first_entry(running,
97 struct async_entry, list); 98 struct async_entry, list);
98 return entry->cookie; 99 return entry->cookie;
99 } else if (!list_empty(&async_pending)) {
100 entry = list_first_entry(&async_pending,
101 struct async_entry, list);
102 return entry->cookie;
103 } else {
104 /* nothing in progress... next_cookie is "infinity" */
105 return next_cookie;
106 } 100 }
107 101
102 list_for_each_entry(entry, &async_pending, list)
103 if (entry->running == running)
104 return entry->cookie;
105
106 return next_cookie; /* "infinity" value */
108} 107}
109 108
110static async_cookie_t lowest_in_progress(struct list_head *running) 109static async_cookie_t lowest_in_progress(struct list_head *running)
diff --git a/kernel/compat.c b/kernel/compat.c
index 42d56544460f..f6c204f07ea6 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
882 882
883} 883}
884 884
885asmlinkage long
886compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
887 struct compat_siginfo __user *uinfo)
888{
889 siginfo_t info;
890
891 if (copy_siginfo_from_user32(&info, uinfo))
892 return -EFAULT;
893 return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
894}
895
885#ifdef __ARCH_WANT_COMPAT_SYS_TIME 896#ifdef __ARCH_WANT_COMPAT_SYS_TIME
886 897
887/* compat_time_t is a 32 bit "long" and needs to get converted. */ 898/* compat_time_t is a 32 bit "long" and needs to get converted. */
diff --git a/kernel/exit.c b/kernel/exit.c
index abf9cf3b95c6..cab535c427b8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -48,7 +48,7 @@
48#include <linux/tracehook.h> 48#include <linux/tracehook.h>
49#include <linux/fs_struct.h> 49#include <linux/fs_struct.h>
50#include <linux/init_task.h> 50#include <linux/init_task.h>
51#include <trace/sched.h> 51#include <trace/events/sched.h>
52 52
53#include <asm/uaccess.h> 53#include <asm/uaccess.h>
54#include <asm/unistd.h> 54#include <asm/unistd.h>
@@ -56,10 +56,6 @@
56#include <asm/mmu_context.h> 56#include <asm/mmu_context.h>
57#include "cred-internals.h" 57#include "cred-internals.h"
58 58
59DEFINE_TRACE(sched_process_free);
60DEFINE_TRACE(sched_process_exit);
61DEFINE_TRACE(sched_process_wait);
62
63static void exit_mm(struct task_struct * tsk); 59static void exit_mm(struct task_struct * tsk);
64 60
65static void __unhash_process(struct task_struct *p) 61static void __unhash_process(struct task_struct *p)
diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd00726..bb762b4dd217 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,7 +61,6 @@
61#include <linux/proc_fs.h> 61#include <linux/proc_fs.h>
62#include <linux/blkdev.h> 62#include <linux/blkdev.h>
63#include <linux/fs_struct.h> 63#include <linux/fs_struct.h>
64#include <trace/sched.h>
65#include <linux/magic.h> 64#include <linux/magic.h>
66 65
67#include <asm/pgtable.h> 66#include <asm/pgtable.h>
@@ -71,6 +70,8 @@
71#include <asm/cacheflush.h> 70#include <asm/cacheflush.h>
72#include <asm/tlbflush.h> 71#include <asm/tlbflush.h>
73 72
73#include <trace/events/sched.h>
74
74/* 75/*
75 * Protected counters by write_lock_irq(&tasklist_lock) 76 * Protected counters by write_lock_irq(&tasklist_lock)
76 */ 77 */
@@ -83,8 +84,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
83 84
84__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 85__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
85 86
86DEFINE_TRACE(sched_process_fork);
87
88int nr_processes(void) 87int nr_processes(void)
89{ 88{
90 int cpu; 89 int cpu;
@@ -982,6 +981,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
982 if (!p) 981 if (!p)
983 goto fork_out; 982 goto fork_out;
984 983
984 ftrace_graph_init_task(p);
985
985 rt_mutex_init_task(p); 986 rt_mutex_init_task(p);
986 987
987#ifdef CONFIG_PROVE_LOCKING 988#ifdef CONFIG_PROVE_LOCKING
@@ -1089,8 +1090,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1089#ifdef CONFIG_DEBUG_MUTEXES 1090#ifdef CONFIG_DEBUG_MUTEXES
1090 p->blocked_on = NULL; /* not blocked yet */ 1091 p->blocked_on = NULL; /* not blocked yet */
1091#endif 1092#endif
1092 if (unlikely(current->ptrace)) 1093
1093 ptrace_fork(p, clone_flags); 1094 p->bts = NULL;
1094 1095
1095 /* Perform scheduler related setup. Assign this task to a CPU. */ 1096 /* Perform scheduler related setup. Assign this task to a CPU. */
1096 sched_fork(p, clone_flags); 1097 sched_fork(p, clone_flags);
@@ -1131,8 +1132,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1131 } 1132 }
1132 } 1133 }
1133 1134
1134 ftrace_graph_init_task(p);
1135
1136 p->pid = pid_nr(pid); 1135 p->pid = pid_nr(pid);
1137 p->tgid = p->pid; 1136 p->tgid = p->pid;
1138 if (clone_flags & CLONE_THREAD) 1137 if (clone_flags & CLONE_THREAD)
@@ -1141,7 +1140,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1141 if (current->nsproxy != p->nsproxy) { 1140 if (current->nsproxy != p->nsproxy) {
1142 retval = ns_cgroup_clone(p, pid); 1141 retval = ns_cgroup_clone(p, pid);
1143 if (retval) 1142 if (retval)
1144 goto bad_fork_free_graph; 1143 goto bad_fork_free_pid;
1145 } 1144 }
1146 1145
1147 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1146 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1233,7 +1232,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1233 spin_unlock(&current->sighand->siglock); 1232 spin_unlock(&current->sighand->siglock);
1234 write_unlock_irq(&tasklist_lock); 1233 write_unlock_irq(&tasklist_lock);
1235 retval = -ERESTARTNOINTR; 1234 retval = -ERESTARTNOINTR;
1236 goto bad_fork_free_graph; 1235 goto bad_fork_free_pid;
1237 } 1236 }
1238 1237
1239 if (clone_flags & CLONE_THREAD) { 1238 if (clone_flags & CLONE_THREAD) {
@@ -1268,8 +1267,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1268 cgroup_post_fork(p); 1267 cgroup_post_fork(p);
1269 return p; 1268 return p;
1270 1269
1271bad_fork_free_graph:
1272 ftrace_graph_exit_task(p);
1273bad_fork_free_pid: 1270bad_fork_free_pid:
1274 if (pid != &init_struct_pid) 1271 if (pid != &init_struct_pid)
1275 free_pid(pid); 1272 free_pid(pid);
@@ -1409,7 +1406,7 @@ long do_fork(unsigned long clone_flags,
1409 } 1406 }
1410 1407
1411 audit_finish_fork(p); 1408 audit_finish_fork(p);
1412 tracehook_report_clone(trace, regs, clone_flags, nr, p); 1409 tracehook_report_clone(regs, clone_flags, nr, p);
1413 1410
1414 /* 1411 /*
1415 * We set PF_STARTING at creation in case tracing wants to 1412 * We set PF_STARTING at creation in case tracing wants to
diff --git a/kernel/futex.c b/kernel/futex.c
index d546b2d53a62..80b5ce716596 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -19,6 +19,10 @@
19 * PRIVATE futexes by Eric Dumazet 19 * PRIVATE futexes by Eric Dumazet
20 * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> 20 * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
21 * 21 *
22 * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
23 * Copyright (C) IBM Corporation, 2009
24 * Thanks to Thomas Gleixner for conceptual design and careful reviews.
25 *
22 * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly 26 * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
23 * enough at me, Linus for the original (flawed) idea, Matthew 27 * enough at me, Linus for the original (flawed) idea, Matthew
24 * Kirkwood for proof-of-concept implementation. 28 * Kirkwood for proof-of-concept implementation.
@@ -96,8 +100,8 @@ struct futex_pi_state {
96 */ 100 */
97struct futex_q { 101struct futex_q {
98 struct plist_node list; 102 struct plist_node list;
99 /* There can only be a single waiter */ 103 /* Waiter reference */
100 wait_queue_head_t waiter; 104 struct task_struct *task;
101 105
102 /* Which hash list lock to use: */ 106 /* Which hash list lock to use: */
103 spinlock_t *lock_ptr; 107 spinlock_t *lock_ptr;
@@ -107,7 +111,9 @@ struct futex_q {
107 111
108 /* Optional priority inheritance state: */ 112 /* Optional priority inheritance state: */
109 struct futex_pi_state *pi_state; 113 struct futex_pi_state *pi_state;
110 struct task_struct *task; 114
115 /* rt_waiter storage for requeue_pi: */
116 struct rt_mutex_waiter *rt_waiter;
111 117
112 /* Bitset for the optional bitmasked wakeup */ 118 /* Bitset for the optional bitmasked wakeup */
113 u32 bitset; 119 u32 bitset;
@@ -278,6 +284,25 @@ void put_futex_key(int fshared, union futex_key *key)
278 drop_futex_key_refs(key); 284 drop_futex_key_refs(key);
279} 285}
280 286
287/**
288 * futex_top_waiter() - Return the highest priority waiter on a futex
289 * @hb: the hash bucket the futex_q's reside in
290 * @key: the futex key (to distinguish it from other futex futex_q's)
291 *
292 * Must be called with the hb lock held.
293 */
294static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
295 union futex_key *key)
296{
297 struct futex_q *this;
298
299 plist_for_each_entry(this, &hb->chain, list) {
300 if (match_futex(&this->key, key))
301 return this;
302 }
303 return NULL;
304}
305
281static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) 306static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
282{ 307{
283 u32 curval; 308 u32 curval;
@@ -539,28 +564,160 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
539 return 0; 564 return 0;
540} 565}
541 566
567/**
568 * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex
569 * @uaddr: the pi futex user address
570 * @hb: the pi futex hash bucket
571 * @key: the futex key associated with uaddr and hb
572 * @ps: the pi_state pointer where we store the result of the
573 * lookup
574 * @task: the task to perform the atomic lock work for. This will
575 * be "current" except in the case of requeue pi.
576 * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
577 *
578 * Returns:
579 * 0 - ready to wait
580 * 1 - acquired the lock
581 * <0 - error
582 *
583 * The hb->lock and futex_key refs shall be held by the caller.
584 */
585static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
586 union futex_key *key,
587 struct futex_pi_state **ps,
588 struct task_struct *task, int set_waiters)
589{
590 int lock_taken, ret, ownerdied = 0;
591 u32 uval, newval, curval;
592
593retry:
594 ret = lock_taken = 0;
595
596 /*
597 * To avoid races, we attempt to take the lock here again
598 * (by doing a 0 -> TID atomic cmpxchg), while holding all
599 * the locks. It will most likely not succeed.
600 */
601 newval = task_pid_vnr(task);
602 if (set_waiters)
603 newval |= FUTEX_WAITERS;
604
605 curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
606
607 if (unlikely(curval == -EFAULT))
608 return -EFAULT;
609
610 /*
611 * Detect deadlocks.
612 */
613 if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
614 return -EDEADLK;
615
616 /*
617 * Surprise - we got the lock. Just return to userspace:
618 */
619 if (unlikely(!curval))
620 return 1;
621
622 uval = curval;
623
624 /*
625 * Set the FUTEX_WAITERS flag, so the owner will know it has someone
626 * to wake at the next unlock.
627 */
628 newval = curval | FUTEX_WAITERS;
629
630 /*
631 * There are two cases, where a futex might have no owner (the
632 * owner TID is 0): OWNER_DIED. We take over the futex in this
633 * case. We also do an unconditional take over, when the owner
634 * of the futex died.
635 *
636 * This is safe as we are protected by the hash bucket lock !
637 */
638 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
639 /* Keep the OWNER_DIED bit */
640 newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
641 ownerdied = 0;
642 lock_taken = 1;
643 }
644
645 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
646
647 if (unlikely(curval == -EFAULT))
648 return -EFAULT;
649 if (unlikely(curval != uval))
650 goto retry;
651
652 /*
653 * We took the lock due to owner died take over.
654 */
655 if (unlikely(lock_taken))
656 return 1;
657
658 /*
659 * We dont have the lock. Look up the PI state (or create it if
660 * we are the first waiter):
661 */
662 ret = lookup_pi_state(uval, hb, key, ps);
663
664 if (unlikely(ret)) {
665 switch (ret) {
666 case -ESRCH:
667 /*
668 * No owner found for this futex. Check if the
669 * OWNER_DIED bit is set to figure out whether
670 * this is a robust futex or not.
671 */
672 if (get_futex_value_locked(&curval, uaddr))
673 return -EFAULT;
674
675 /*
676 * We simply start over in case of a robust
677 * futex. The code above will take the futex
678 * and return happy.
679 */
680 if (curval & FUTEX_OWNER_DIED) {
681 ownerdied = 1;
682 goto retry;
683 }
684 default:
685 break;
686 }
687 }
688
689 return ret;
690}
691
542/* 692/*
543 * The hash bucket lock must be held when this is called. 693 * The hash bucket lock must be held when this is called.
544 * Afterwards, the futex_q must not be accessed. 694 * Afterwards, the futex_q must not be accessed.
545 */ 695 */
546static void wake_futex(struct futex_q *q) 696static void wake_futex(struct futex_q *q)
547{ 697{
548 plist_del(&q->list, &q->list.plist); 698 struct task_struct *p = q->task;
699
549 /* 700 /*
550 * The lock in wake_up_all() is a crucial memory barrier after the 701 * We set q->lock_ptr = NULL _before_ we wake up the task. If
551 * plist_del() and also before assigning to q->lock_ptr. 702 * a non futex wake up happens on another CPU then the task
703 * might exit and p would dereference a non existing task
704 * struct. Prevent this by holding a reference on p across the
705 * wake up.
552 */ 706 */
553 wake_up(&q->waiter); 707 get_task_struct(p);
708
709 plist_del(&q->list, &q->list.plist);
554 /* 710 /*
555 * The waiting task can free the futex_q as soon as this is written, 711 * The waiting task can free the futex_q as soon as
556 * without taking any locks. This must come last. 712 * q->lock_ptr = NULL is written, without taking any locks. A
557 * 713 * memory barrier is required here to prevent the following
558 * A memory barrier is required here to prevent the following store to 714 * store to lock_ptr from getting ahead of the plist_del.
559 * lock_ptr from getting ahead of the wakeup. Clearing the lock at the
560 * end of wake_up() does not prevent this store from moving.
561 */ 715 */
562 smp_wmb(); 716 smp_wmb();
563 q->lock_ptr = NULL; 717 q->lock_ptr = NULL;
718
719 wake_up_state(p, TASK_NORMAL);
720 put_task_struct(p);
564} 721}
565 722
566static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) 723static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
@@ -689,7 +846,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
689 846
690 plist_for_each_entry_safe(this, next, head, list) { 847 plist_for_each_entry_safe(this, next, head, list) {
691 if (match_futex (&this->key, &key)) { 848 if (match_futex (&this->key, &key)) {
692 if (this->pi_state) { 849 if (this->pi_state || this->rt_waiter) {
693 ret = -EINVAL; 850 ret = -EINVAL;
694 break; 851 break;
695 } 852 }
@@ -802,24 +959,185 @@ out:
802 return ret; 959 return ret;
803} 960}
804 961
805/* 962/**
806 * Requeue all waiters hashed on one physical page to another 963 * requeue_futex() - Requeue a futex_q from one hb to another
807 * physical page. 964 * @q: the futex_q to requeue
965 * @hb1: the source hash_bucket
966 * @hb2: the target hash_bucket
967 * @key2: the new key for the requeued futex_q
968 */
969static inline
970void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
971 struct futex_hash_bucket *hb2, union futex_key *key2)
972{
973
974 /*
975 * If key1 and key2 hash to the same bucket, no need to
976 * requeue.
977 */
978 if (likely(&hb1->chain != &hb2->chain)) {
979 plist_del(&q->list, &hb1->chain);
980 plist_add(&q->list, &hb2->chain);
981 q->lock_ptr = &hb2->lock;
982#ifdef CONFIG_DEBUG_PI_LIST
983 q->list.plist.lock = &hb2->lock;
984#endif
985 }
986 get_futex_key_refs(key2);
987 q->key = *key2;
988}
989
990/**
991 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
992 * q: the futex_q
993 * key: the key of the requeue target futex
994 *
995 * During futex_requeue, with requeue_pi=1, it is possible to acquire the
996 * target futex if it is uncontended or via a lock steal. Set the futex_q key
997 * to the requeue target futex so the waiter can detect the wakeup on the right
998 * futex, but remove it from the hb and NULL the rt_waiter so it can detect
999 * atomic lock acquisition. Must be called with the q->lock_ptr held.
1000 */
1001static inline
1002void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
1003{
1004 drop_futex_key_refs(&q->key);
1005 get_futex_key_refs(key);
1006 q->key = *key;
1007
1008 WARN_ON(plist_node_empty(&q->list));
1009 plist_del(&q->list, &q->list.plist);
1010
1011 WARN_ON(!q->rt_waiter);
1012 q->rt_waiter = NULL;
1013
1014 wake_up_state(q->task, TASK_NORMAL);
1015}
1016
1017/**
1018 * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
1019 * @pifutex: the user address of the to futex
1020 * @hb1: the from futex hash bucket, must be locked by the caller
1021 * @hb2: the to futex hash bucket, must be locked by the caller
1022 * @key1: the from futex key
1023 * @key2: the to futex key
1024 * @ps: address to store the pi_state pointer
1025 * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
1026 *
1027 * Try and get the lock on behalf of the top waiter if we can do it atomically.
1028 * Wake the top waiter if we succeed. If the caller specified set_waiters,
1029 * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
1030 * hb1 and hb2 must be held by the caller.
1031 *
1032 * Returns:
1033 * 0 - failed to acquire the lock atomicly
1034 * 1 - acquired the lock
1035 * <0 - error
1036 */
1037static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1038 struct futex_hash_bucket *hb1,
1039 struct futex_hash_bucket *hb2,
1040 union futex_key *key1, union futex_key *key2,
1041 struct futex_pi_state **ps, int set_waiters)
1042{
1043 struct futex_q *top_waiter = NULL;
1044 u32 curval;
1045 int ret;
1046
1047 if (get_futex_value_locked(&curval, pifutex))
1048 return -EFAULT;
1049
1050 /*
1051 * Find the top_waiter and determine if there are additional waiters.
1052 * If the caller intends to requeue more than 1 waiter to pifutex,
1053 * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
1054 * as we have means to handle the possible fault. If not, don't set
1055 * the bit unecessarily as it will force the subsequent unlock to enter
1056 * the kernel.
1057 */
1058 top_waiter = futex_top_waiter(hb1, key1);
1059
1060 /* There are no waiters, nothing for us to do. */
1061 if (!top_waiter)
1062 return 0;
1063
1064 /*
1065 * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
1066 * the contended case or if set_waiters is 1. The pi_state is returned
1067 * in ps in contended cases.
1068 */
1069 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1070 set_waiters);
1071 if (ret == 1)
1072 requeue_pi_wake_futex(top_waiter, key2);
1073
1074 return ret;
1075}
1076
1077/**
1078 * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
1079 * uaddr1: source futex user address
1080 * uaddr2: target futex user address
1081 * nr_wake: number of waiters to wake (must be 1 for requeue_pi)
1082 * nr_requeue: number of waiters to requeue (0-INT_MAX)
1083 * requeue_pi: if we are attempting to requeue from a non-pi futex to a
1084 * pi futex (pi to pi requeue is not supported)
1085 *
1086 * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
1087 * uaddr2 atomically on behalf of the top waiter.
1088 *
1089 * Returns:
1090 * >=0 - on success, the number of tasks requeued or woken
1091 * <0 - on error
808 */ 1092 */
809static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, 1093static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
810 int nr_wake, int nr_requeue, u32 *cmpval) 1094 int nr_wake, int nr_requeue, u32 *cmpval,
1095 int requeue_pi)
811{ 1096{
812 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; 1097 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1098 int drop_count = 0, task_count = 0, ret;
1099 struct futex_pi_state *pi_state = NULL;
813 struct futex_hash_bucket *hb1, *hb2; 1100 struct futex_hash_bucket *hb1, *hb2;
814 struct plist_head *head1; 1101 struct plist_head *head1;
815 struct futex_q *this, *next; 1102 struct futex_q *this, *next;
816 int ret, drop_count = 0; 1103 u32 curval2;
1104
1105 if (requeue_pi) {
1106 /*
1107 * requeue_pi requires a pi_state, try to allocate it now
1108 * without any locks in case it fails.
1109 */
1110 if (refill_pi_state_cache())
1111 return -ENOMEM;
1112 /*
1113 * requeue_pi must wake as many tasks as it can, up to nr_wake
1114 * + nr_requeue, since it acquires the rt_mutex prior to
1115 * returning to userspace, so as to not leave the rt_mutex with
1116 * waiters and no owner. However, second and third wake-ups
1117 * cannot be predicted as they involve race conditions with the
1118 * first wake and a fault while looking up the pi_state. Both
1119 * pthread_cond_signal() and pthread_cond_broadcast() should
1120 * use nr_wake=1.
1121 */
1122 if (nr_wake != 1)
1123 return -EINVAL;
1124 }
817 1125
818retry: 1126retry:
1127 if (pi_state != NULL) {
1128 /*
1129 * We will have to lookup the pi_state again, so free this one
1130 * to keep the accounting correct.
1131 */
1132 free_pi_state(pi_state);
1133 pi_state = NULL;
1134 }
1135
819 ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); 1136 ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
820 if (unlikely(ret != 0)) 1137 if (unlikely(ret != 0))
821 goto out; 1138 goto out;
822 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_READ); 1139 ret = get_futex_key(uaddr2, fshared, &key2,
1140 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
823 if (unlikely(ret != 0)) 1141 if (unlikely(ret != 0))
824 goto out_put_key1; 1142 goto out_put_key1;
825 1143
@@ -854,32 +1172,99 @@ retry_private:
854 } 1172 }
855 } 1173 }
856 1174
1175 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1176 /*
1177 * Attempt to acquire uaddr2 and wake the top waiter. If we
1178 * intend to requeue waiters, force setting the FUTEX_WAITERS
1179 * bit. We force this here where we are able to easily handle
1180 * faults rather in the requeue loop below.
1181 */
1182 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1183 &key2, &pi_state, nr_requeue);
1184
1185 /*
1186 * At this point the top_waiter has either taken uaddr2 or is
1187 * waiting on it. If the former, then the pi_state will not
1188 * exist yet, look it up one more time to ensure we have a
1189 * reference to it.
1190 */
1191 if (ret == 1) {
1192 WARN_ON(pi_state);
1193 task_count++;
1194 ret = get_futex_value_locked(&curval2, uaddr2);
1195 if (!ret)
1196 ret = lookup_pi_state(curval2, hb2, &key2,
1197 &pi_state);
1198 }
1199
1200 switch (ret) {
1201 case 0:
1202 break;
1203 case -EFAULT:
1204 double_unlock_hb(hb1, hb2);
1205 put_futex_key(fshared, &key2);
1206 put_futex_key(fshared, &key1);
1207 ret = get_user(curval2, uaddr2);
1208 if (!ret)
1209 goto retry;
1210 goto out;
1211 case -EAGAIN:
1212 /* The owner was exiting, try again. */
1213 double_unlock_hb(hb1, hb2);
1214 put_futex_key(fshared, &key2);
1215 put_futex_key(fshared, &key1);
1216 cond_resched();
1217 goto retry;
1218 default:
1219 goto out_unlock;
1220 }
1221 }
1222
857 head1 = &hb1->chain; 1223 head1 = &hb1->chain;
858 plist_for_each_entry_safe(this, next, head1, list) { 1224 plist_for_each_entry_safe(this, next, head1, list) {
859 if (!match_futex (&this->key, &key1)) 1225 if (task_count - nr_wake >= nr_requeue)
1226 break;
1227
1228 if (!match_futex(&this->key, &key1))
860 continue; 1229 continue;
861 if (++ret <= nr_wake) { 1230
1231 WARN_ON(!requeue_pi && this->rt_waiter);
1232 WARN_ON(requeue_pi && !this->rt_waiter);
1233
1234 /*
1235 * Wake nr_wake waiters. For requeue_pi, if we acquired the
1236 * lock, we already woke the top_waiter. If not, it will be
1237 * woken by futex_unlock_pi().
1238 */
1239 if (++task_count <= nr_wake && !requeue_pi) {
862 wake_futex(this); 1240 wake_futex(this);
863 } else { 1241 continue;
864 /* 1242 }
865 * If key1 and key2 hash to the same bucket, no need to
866 * requeue.
867 */
868 if (likely(head1 != &hb2->chain)) {
869 plist_del(&this->list, &hb1->chain);
870 plist_add(&this->list, &hb2->chain);
871 this->lock_ptr = &hb2->lock;
872#ifdef CONFIG_DEBUG_PI_LIST
873 this->list.plist.lock = &hb2->lock;
874#endif
875 }
876 this->key = key2;
877 get_futex_key_refs(&key2);
878 drop_count++;
879 1243
880 if (ret - nr_wake >= nr_requeue) 1244 /*
881 break; 1245 * Requeue nr_requeue waiters and possibly one more in the case
1246 * of requeue_pi if we couldn't acquire the lock atomically.
1247 */
1248 if (requeue_pi) {
1249 /* Prepare the waiter to take the rt_mutex. */
1250 atomic_inc(&pi_state->refcount);
1251 this->pi_state = pi_state;
1252 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1253 this->rt_waiter,
1254 this->task, 1);
1255 if (ret == 1) {
1256 /* We got the lock. */
1257 requeue_pi_wake_futex(this, &key2);
1258 continue;
1259 } else if (ret) {
1260 /* -EDEADLK */
1261 this->pi_state = NULL;
1262 free_pi_state(pi_state);
1263 goto out_unlock;
1264 }
882 } 1265 }
1266 requeue_futex(this, hb1, hb2, &key2);
1267 drop_count++;
883 } 1268 }
884 1269
885out_unlock: 1270out_unlock:
@@ -899,7 +1284,9 @@ out_put_keys:
899out_put_key1: 1284out_put_key1:
900 put_futex_key(fshared, &key1); 1285 put_futex_key(fshared, &key1);
901out: 1286out:
902 return ret; 1287 if (pi_state != NULL)
1288 free_pi_state(pi_state);
1289 return ret ? ret : task_count;
903} 1290}
904 1291
905/* The key must be already stored in q->key. */ 1292/* The key must be already stored in q->key. */
@@ -907,8 +1294,6 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
907{ 1294{
908 struct futex_hash_bucket *hb; 1295 struct futex_hash_bucket *hb;
909 1296
910 init_waitqueue_head(&q->waiter);
911
912 get_futex_key_refs(&q->key); 1297 get_futex_key_refs(&q->key);
913 hb = hash_futex(&q->key); 1298 hb = hash_futex(&q->key);
914 q->lock_ptr = &hb->lock; 1299 q->lock_ptr = &hb->lock;
@@ -1119,35 +1504,149 @@ handle_fault:
1119 */ 1504 */
1120#define FLAGS_SHARED 0x01 1505#define FLAGS_SHARED 0x01
1121#define FLAGS_CLOCKRT 0x02 1506#define FLAGS_CLOCKRT 0x02
1507#define FLAGS_HAS_TIMEOUT 0x04
1122 1508
1123static long futex_wait_restart(struct restart_block *restart); 1509static long futex_wait_restart(struct restart_block *restart);
1124 1510
1125static int futex_wait(u32 __user *uaddr, int fshared, 1511/**
1126 u32 val, ktime_t *abs_time, u32 bitset, int clockrt) 1512 * fixup_owner() - Post lock pi_state and corner case management
1513 * @uaddr: user address of the futex
1514 * @fshared: whether the futex is shared (1) or not (0)
1515 * @q: futex_q (contains pi_state and access to the rt_mutex)
1516 * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0)
1517 *
1518 * After attempting to lock an rt_mutex, this function is called to cleanup
1519 * the pi_state owner as well as handle race conditions that may allow us to
1520 * acquire the lock. Must be called with the hb lock held.
1521 *
1522 * Returns:
1523 * 1 - success, lock taken
1524 * 0 - success, lock not taken
1525 * <0 - on error (-EFAULT)
1526 */
1527static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
1528 int locked)
1127{ 1529{
1128 struct task_struct *curr = current; 1530 struct task_struct *owner;
1129 struct restart_block *restart; 1531 int ret = 0;
1130 DECLARE_WAITQUEUE(wait, curr);
1131 struct futex_hash_bucket *hb;
1132 struct futex_q q;
1133 u32 uval;
1134 int ret;
1135 struct hrtimer_sleeper t;
1136 int rem = 0;
1137 1532
1138 if (!bitset) 1533 if (locked) {
1139 return -EINVAL; 1534 /*
1535 * Got the lock. We might not be the anticipated owner if we
1536 * did a lock-steal - fix up the PI-state in that case:
1537 */
1538 if (q->pi_state->owner != current)
1539 ret = fixup_pi_state_owner(uaddr, q, current, fshared);
1540 goto out;
1541 }
1140 1542
1141 q.pi_state = NULL; 1543 /*
1142 q.bitset = bitset; 1544 * Catch the rare case, where the lock was released when we were on the
1143retry: 1545 * way back before we locked the hash bucket.
1144 q.key = FUTEX_KEY_INIT; 1546 */
1145 ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_READ); 1547 if (q->pi_state->owner == current) {
1146 if (unlikely(ret != 0)) 1548 /*
1549 * Try to get the rt_mutex now. This might fail as some other
1550 * task acquired the rt_mutex after we removed ourself from the
1551 * rt_mutex waiters list.
1552 */
1553 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1554 locked = 1;
1555 goto out;
1556 }
1557
1558 /*
1559 * pi_state is incorrect, some other task did a lock steal and
1560 * we returned due to timeout or signal without taking the
1561 * rt_mutex. Too late. We can access the rt_mutex_owner without
1562 * locking, as the other task is now blocked on the hash bucket
1563 * lock. Fix the state up.
1564 */
1565 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1566 ret = fixup_pi_state_owner(uaddr, q, owner, fshared);
1147 goto out; 1567 goto out;
1568 }
1148 1569
1149retry_private: 1570 /*
1150 hb = queue_lock(&q); 1571 * Paranoia check. If we did not take the lock, then we should not be
1572 * the owner, nor the pending owner, of the rt_mutex.
1573 */
1574 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1575 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1576 "pi-state %p\n", ret,
1577 q->pi_state->pi_mutex.owner,
1578 q->pi_state->owner);
1579
1580out:
1581 return ret ? ret : locked;
1582}
1583
1584/**
1585 * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
1586 * @hb: the futex hash bucket, must be locked by the caller
1587 * @q: the futex_q to queue up on
1588 * @timeout: the prepared hrtimer_sleeper, or null for no timeout
1589 */
1590static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1591 struct hrtimer_sleeper *timeout)
1592{
1593 queue_me(q, hb);
1594
1595 /*
1596 * There might have been scheduling since the queue_me(), as we
1597 * cannot hold a spinlock across the get_user() in case it
1598 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
1599 * queueing ourselves into the futex hash. This code thus has to
1600 * rely on the futex_wake() code removing us from hash when it
1601 * wakes us up.
1602 */
1603 set_current_state(TASK_INTERRUPTIBLE);
1604
1605 /* Arm the timer */
1606 if (timeout) {
1607 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1608 if (!hrtimer_active(&timeout->timer))
1609 timeout->task = NULL;
1610 }
1611
1612 /*
1613 * !plist_node_empty() is safe here without any lock.
1614 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
1615 */
1616 if (likely(!plist_node_empty(&q->list))) {
1617 /*
1618 * If the timer has already expired, current will already be
1619 * flagged for rescheduling. Only call schedule if there
1620 * is no timeout, or if it has yet to expire.
1621 */
1622 if (!timeout || timeout->task)
1623 schedule();
1624 }
1625 __set_current_state(TASK_RUNNING);
1626}
1627
1628/**
1629 * futex_wait_setup() - Prepare to wait on a futex
1630 * @uaddr: the futex userspace address
1631 * @val: the expected value
1632 * @fshared: whether the futex is shared (1) or not (0)
1633 * @q: the associated futex_q
1634 * @hb: storage for hash_bucket pointer to be returned to caller
1635 *
1636 * Setup the futex_q and locate the hash_bucket. Get the futex value and
1637 * compare it with the expected value. Handle atomic faults internally.
1638 * Return with the hb lock held and a q.key reference on success, and unlocked
1639 * with no q.key reference on failure.
1640 *
1641 * Returns:
1642 * 0 - uaddr contains val and hb has been locked
1643 * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
1644 */
1645static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
1646 struct futex_q *q, struct futex_hash_bucket **hb)
1647{
1648 u32 uval;
1649 int ret;
1151 1650
1152 /* 1651 /*
1153 * Access the page AFTER the hash-bucket is locked. 1652 * Access the page AFTER the hash-bucket is locked.
@@ -1165,95 +1664,83 @@ retry_private:
1165 * A consequence is that futex_wait() can return zero and absorb 1664 * A consequence is that futex_wait() can return zero and absorb
1166 * a wakeup when *uaddr != val on entry to the syscall. This is 1665 * a wakeup when *uaddr != val on entry to the syscall. This is
1167 * rare, but normal. 1666 * rare, but normal.
1168 *
1169 * For shared futexes, we hold the mmap semaphore, so the mapping
1170 * cannot have changed since we looked it up in get_futex_key.
1171 */ 1667 */
1668retry:
1669 q->key = FUTEX_KEY_INIT;
1670 ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ);
1671 if (unlikely(ret != 0))
1672 return ret;
1673
1674retry_private:
1675 *hb = queue_lock(q);
1676
1172 ret = get_futex_value_locked(&uval, uaddr); 1677 ret = get_futex_value_locked(&uval, uaddr);
1173 1678
1174 if (unlikely(ret)) { 1679 if (ret) {
1175 queue_unlock(&q, hb); 1680 queue_unlock(q, *hb);
1176 1681
1177 ret = get_user(uval, uaddr); 1682 ret = get_user(uval, uaddr);
1178 if (ret) 1683 if (ret)
1179 goto out_put_key; 1684 goto out;
1180 1685
1181 if (!fshared) 1686 if (!fshared)
1182 goto retry_private; 1687 goto retry_private;
1183 1688
1184 put_futex_key(fshared, &q.key); 1689 put_futex_key(fshared, &q->key);
1185 goto retry; 1690 goto retry;
1186 } 1691 }
1187 ret = -EWOULDBLOCK;
1188 if (unlikely(uval != val)) {
1189 queue_unlock(&q, hb);
1190 goto out_put_key;
1191 }
1192 1692
1193 /* Only actually queue if *uaddr contained val. */ 1693 if (uval != val) {
1194 queue_me(&q, hb); 1694 queue_unlock(q, *hb);
1695 ret = -EWOULDBLOCK;
1696 }
1195 1697
1196 /* 1698out:
1197 * There might have been scheduling since the queue_me(), as we 1699 if (ret)
1198 * cannot hold a spinlock across the get_user() in case it 1700 put_futex_key(fshared, &q->key);
1199 * faults, and we cannot just set TASK_INTERRUPTIBLE state when 1701 return ret;
1200 * queueing ourselves into the futex hash. This code thus has to 1702}
1201 * rely on the futex_wake() code removing us from hash when it
1202 * wakes us up.
1203 */
1204 1703
1205 /* add_wait_queue is the barrier after __set_current_state. */ 1704static int futex_wait(u32 __user *uaddr, int fshared,
1206 __set_current_state(TASK_INTERRUPTIBLE); 1705 u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
1207 add_wait_queue(&q.waiter, &wait); 1706{
1208 /* 1707 struct hrtimer_sleeper timeout, *to = NULL;
1209 * !plist_node_empty() is safe here without any lock. 1708 struct restart_block *restart;
1210 * q.lock_ptr != 0 is not safe, because of ordering against wakeup. 1709 struct futex_hash_bucket *hb;
1211 */ 1710 struct futex_q q;
1212 if (likely(!plist_node_empty(&q.list))) { 1711 int ret;
1213 if (!abs_time)
1214 schedule();
1215 else {
1216 hrtimer_init_on_stack(&t.timer,
1217 clockrt ? CLOCK_REALTIME :
1218 CLOCK_MONOTONIC,
1219 HRTIMER_MODE_ABS);
1220 hrtimer_init_sleeper(&t, current);
1221 hrtimer_set_expires_range_ns(&t.timer, *abs_time,
1222 current->timer_slack_ns);
1223
1224 hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
1225 if (!hrtimer_active(&t.timer))
1226 t.task = NULL;
1227 1712
1228 /* 1713 if (!bitset)
1229 * the timer could have already expired, in which 1714 return -EINVAL;
1230 * case current would be flagged for rescheduling.
1231 * Don't bother calling schedule.
1232 */
1233 if (likely(t.task))
1234 schedule();
1235 1715
1236 hrtimer_cancel(&t.timer); 1716 q.pi_state = NULL;
1717 q.bitset = bitset;
1718 q.rt_waiter = NULL;
1237 1719
1238 /* Flag if a timeout occured */ 1720 if (abs_time) {
1239 rem = (t.task == NULL); 1721 to = &timeout;
1240 1722
1241 destroy_hrtimer_on_stack(&t.timer); 1723 hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
1242 } 1724 CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1725 hrtimer_init_sleeper(to, current);
1726 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1727 current->timer_slack_ns);
1243 } 1728 }
1244 __set_current_state(TASK_RUNNING);
1245 1729
1246 /* 1730 /* Prepare to wait on uaddr. */
1247 * NOTE: we don't remove ourselves from the waitqueue because 1731 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
1248 * we are the only user of it. 1732 if (ret)
1249 */ 1733 goto out;
1734
1735 /* queue_me and wait for wakeup, timeout, or a signal. */
1736 futex_wait_queue_me(hb, &q, to);
1250 1737
1251 /* If we were woken (and unqueued), we succeeded, whatever. */ 1738 /* If we were woken (and unqueued), we succeeded, whatever. */
1252 ret = 0; 1739 ret = 0;
1253 if (!unqueue_me(&q)) 1740 if (!unqueue_me(&q))
1254 goto out_put_key; 1741 goto out_put_key;
1255 ret = -ETIMEDOUT; 1742 ret = -ETIMEDOUT;
1256 if (rem) 1743 if (to && !to->task)
1257 goto out_put_key; 1744 goto out_put_key;
1258 1745
1259 /* 1746 /*
@@ -1270,7 +1757,7 @@ retry_private:
1270 restart->futex.val = val; 1757 restart->futex.val = val;
1271 restart->futex.time = abs_time->tv64; 1758 restart->futex.time = abs_time->tv64;
1272 restart->futex.bitset = bitset; 1759 restart->futex.bitset = bitset;
1273 restart->futex.flags = 0; 1760 restart->futex.flags = FLAGS_HAS_TIMEOUT;
1274 1761
1275 if (fshared) 1762 if (fshared)
1276 restart->futex.flags |= FLAGS_SHARED; 1763 restart->futex.flags |= FLAGS_SHARED;
@@ -1282,6 +1769,10 @@ retry_private:
1282out_put_key: 1769out_put_key:
1283 put_futex_key(fshared, &q.key); 1770 put_futex_key(fshared, &q.key);
1284out: 1771out:
1772 if (to) {
1773 hrtimer_cancel(&to->timer);
1774 destroy_hrtimer_on_stack(&to->timer);
1775 }
1285 return ret; 1776 return ret;
1286} 1777}
1287 1778
@@ -1290,13 +1781,16 @@ static long futex_wait_restart(struct restart_block *restart)
1290{ 1781{
1291 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; 1782 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
1292 int fshared = 0; 1783 int fshared = 0;
1293 ktime_t t; 1784 ktime_t t, *tp = NULL;
1294 1785
1295 t.tv64 = restart->futex.time; 1786 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1787 t.tv64 = restart->futex.time;
1788 tp = &t;
1789 }
1296 restart->fn = do_no_restart_syscall; 1790 restart->fn = do_no_restart_syscall;
1297 if (restart->futex.flags & FLAGS_SHARED) 1791 if (restart->futex.flags & FLAGS_SHARED)
1298 fshared = 1; 1792 fshared = 1;
1299 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, 1793 return (long)futex_wait(uaddr, fshared, restart->futex.val, tp,
1300 restart->futex.bitset, 1794 restart->futex.bitset,
1301 restart->futex.flags & FLAGS_CLOCKRT); 1795 restart->futex.flags & FLAGS_CLOCKRT);
1302} 1796}
@@ -1312,11 +1806,10 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1312 int detect, ktime_t *time, int trylock) 1806 int detect, ktime_t *time, int trylock)
1313{ 1807{
1314 struct hrtimer_sleeper timeout, *to = NULL; 1808 struct hrtimer_sleeper timeout, *to = NULL;
1315 struct task_struct *curr = current;
1316 struct futex_hash_bucket *hb; 1809 struct futex_hash_bucket *hb;
1317 u32 uval, newval, curval; 1810 u32 uval;
1318 struct futex_q q; 1811 struct futex_q q;
1319 int ret, lock_taken, ownerdied = 0; 1812 int res, ret;
1320 1813
1321 if (refill_pi_state_cache()) 1814 if (refill_pi_state_cache())
1322 return -ENOMEM; 1815 return -ENOMEM;
@@ -1330,6 +1823,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1330 } 1823 }
1331 1824
1332 q.pi_state = NULL; 1825 q.pi_state = NULL;
1826 q.rt_waiter = NULL;
1333retry: 1827retry:
1334 q.key = FUTEX_KEY_INIT; 1828 q.key = FUTEX_KEY_INIT;
1335 ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); 1829 ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
@@ -1339,81 +1833,15 @@ retry:
1339retry_private: 1833retry_private:
1340 hb = queue_lock(&q); 1834 hb = queue_lock(&q);
1341 1835
1342retry_locked: 1836 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
1343 ret = lock_taken = 0;
1344
1345 /*
1346 * To avoid races, we attempt to take the lock here again
1347 * (by doing a 0 -> TID atomic cmpxchg), while holding all
1348 * the locks. It will most likely not succeed.
1349 */
1350 newval = task_pid_vnr(current);
1351
1352 curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
1353
1354 if (unlikely(curval == -EFAULT))
1355 goto uaddr_faulted;
1356
1357 /*
1358 * Detect deadlocks. In case of REQUEUE_PI this is a valid
1359 * situation and we return success to user space.
1360 */
1361 if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
1362 ret = -EDEADLK;
1363 goto out_unlock_put_key;
1364 }
1365
1366 /*
1367 * Surprise - we got the lock. Just return to userspace:
1368 */
1369 if (unlikely(!curval))
1370 goto out_unlock_put_key;
1371
1372 uval = curval;
1373
1374 /*
1375 * Set the WAITERS flag, so the owner will know it has someone
1376 * to wake at next unlock
1377 */
1378 newval = curval | FUTEX_WAITERS;
1379
1380 /*
1381 * There are two cases, where a futex might have no owner (the
1382 * owner TID is 0): OWNER_DIED. We take over the futex in this
1383 * case. We also do an unconditional take over, when the owner
1384 * of the futex died.
1385 *
1386 * This is safe as we are protected by the hash bucket lock !
1387 */
1388 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
1389 /* Keep the OWNER_DIED bit */
1390 newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
1391 ownerdied = 0;
1392 lock_taken = 1;
1393 }
1394
1395 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1396
1397 if (unlikely(curval == -EFAULT))
1398 goto uaddr_faulted;
1399 if (unlikely(curval != uval))
1400 goto retry_locked;
1401
1402 /*
1403 * We took the lock due to owner died take over.
1404 */
1405 if (unlikely(lock_taken))
1406 goto out_unlock_put_key;
1407
1408 /*
1409 * We dont have the lock. Look up the PI state (or create it if
1410 * we are the first waiter):
1411 */
1412 ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
1413
1414 if (unlikely(ret)) { 1837 if (unlikely(ret)) {
1415 switch (ret) { 1838 switch (ret) {
1416 1839 case 1:
1840 /* We got the lock. */
1841 ret = 0;
1842 goto out_unlock_put_key;
1843 case -EFAULT:
1844 goto uaddr_faulted;
1417 case -EAGAIN: 1845 case -EAGAIN:
1418 /* 1846 /*
1419 * Task is exiting and we just wait for the 1847 * Task is exiting and we just wait for the
@@ -1423,25 +1851,6 @@ retry_locked:
1423 put_futex_key(fshared, &q.key); 1851 put_futex_key(fshared, &q.key);
1424 cond_resched(); 1852 cond_resched();
1425 goto retry; 1853 goto retry;
1426
1427 case -ESRCH:
1428 /*
1429 * No owner found for this futex. Check if the
1430 * OWNER_DIED bit is set to figure out whether
1431 * this is a robust futex or not.
1432 */
1433 if (get_futex_value_locked(&curval, uaddr))
1434 goto uaddr_faulted;
1435
1436 /*
1437 * We simply start over in case of a robust
1438 * futex. The code above will take the futex
1439 * and return happy.
1440 */
1441 if (curval & FUTEX_OWNER_DIED) {
1442 ownerdied = 1;
1443 goto retry_locked;
1444 }
1445 default: 1854 default:
1446 goto out_unlock_put_key; 1855 goto out_unlock_put_key;
1447 } 1856 }
@@ -1465,71 +1874,21 @@ retry_locked:
1465 } 1874 }
1466 1875
1467 spin_lock(q.lock_ptr); 1876 spin_lock(q.lock_ptr);
1468 1877 /*
1469 if (!ret) { 1878 * Fixup the pi_state owner and possibly acquire the lock if we
1470 /* 1879 * haven't already.
1471 * Got the lock. We might not be the anticipated owner 1880 */
1472 * if we did a lock-steal - fix up the PI-state in 1881 res = fixup_owner(uaddr, fshared, &q, !ret);
1473 * that case: 1882 /*
1474 */ 1883 * If fixup_owner() returned an error, proprogate that. If it acquired
1475 if (q.pi_state->owner != curr) 1884 * the lock, clear our -ETIMEDOUT or -EINTR.
1476 ret = fixup_pi_state_owner(uaddr, &q, curr, fshared); 1885 */
1477 } else { 1886 if (res)
1478 /* 1887 ret = (res < 0) ? res : 0;
1479 * Catch the rare case, where the lock was released
1480 * when we were on the way back before we locked the
1481 * hash bucket.
1482 */
1483 if (q.pi_state->owner == curr) {
1484 /*
1485 * Try to get the rt_mutex now. This might
1486 * fail as some other task acquired the
1487 * rt_mutex after we removed ourself from the
1488 * rt_mutex waiters list.
1489 */
1490 if (rt_mutex_trylock(&q.pi_state->pi_mutex))
1491 ret = 0;
1492 else {
1493 /*
1494 * pi_state is incorrect, some other
1495 * task did a lock steal and we
1496 * returned due to timeout or signal
1497 * without taking the rt_mutex. Too
1498 * late. We can access the
1499 * rt_mutex_owner without locking, as
1500 * the other task is now blocked on
1501 * the hash bucket lock. Fix the state
1502 * up.
1503 */
1504 struct task_struct *owner;
1505 int res;
1506
1507 owner = rt_mutex_owner(&q.pi_state->pi_mutex);
1508 res = fixup_pi_state_owner(uaddr, &q, owner,
1509 fshared);
1510
1511 /* propagate -EFAULT, if the fixup failed */
1512 if (res)
1513 ret = res;
1514 }
1515 } else {
1516 /*
1517 * Paranoia check. If we did not take the lock
1518 * in the trylock above, then we should not be
1519 * the owner of the rtmutex, neither the real
1520 * nor the pending one:
1521 */
1522 if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
1523 printk(KERN_ERR "futex_lock_pi: ret = %d "
1524 "pi-mutex: %p pi-state %p\n", ret,
1525 q.pi_state->pi_mutex.owner,
1526 q.pi_state->owner);
1527 }
1528 }
1529 1888
1530 /* 1889 /*
1531 * If fixup_pi_state_owner() faulted and was unable to handle the 1890 * If fixup_owner() faulted and was unable to handle the fault, unlock
1532 * fault, unlock it and return the fault to userspace. 1891 * it and return the fault to userspace.
1533 */ 1892 */
1534 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) 1893 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
1535 rt_mutex_unlock(&q.pi_state->pi_mutex); 1894 rt_mutex_unlock(&q.pi_state->pi_mutex);
@@ -1537,9 +1896,7 @@ retry_locked:
1537 /* Unqueue and drop the lock */ 1896 /* Unqueue and drop the lock */
1538 unqueue_me_pi(&q); 1897 unqueue_me_pi(&q);
1539 1898
1540 if (to) 1899 goto out;
1541 destroy_hrtimer_on_stack(&to->timer);
1542 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1543 1900
1544out_unlock_put_key: 1901out_unlock_put_key:
1545 queue_unlock(&q, hb); 1902 queue_unlock(&q, hb);
@@ -1549,7 +1906,7 @@ out_put_key:
1549out: 1906out:
1550 if (to) 1907 if (to)
1551 destroy_hrtimer_on_stack(&to->timer); 1908 destroy_hrtimer_on_stack(&to->timer);
1552 return ret; 1909 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1553 1910
1554uaddr_faulted: 1911uaddr_faulted:
1555 /* 1912 /*
@@ -1572,7 +1929,6 @@ uaddr_faulted:
1572 goto retry; 1929 goto retry;
1573} 1930}
1574 1931
1575
1576/* 1932/*
1577 * Userspace attempted a TID -> 0 atomic transition, and failed. 1933 * Userspace attempted a TID -> 0 atomic transition, and failed.
1578 * This is the in-kernel slowpath: we look up the PI state (if any), 1934 * This is the in-kernel slowpath: we look up the PI state (if any),
@@ -1674,6 +2030,229 @@ pi_faulted:
1674 return ret; 2030 return ret;
1675} 2031}
1676 2032
2033/**
2034 * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
2035 * @hb: the hash_bucket futex_q was original enqueued on
2036 * @q: the futex_q woken while waiting to be requeued
2037 * @key2: the futex_key of the requeue target futex
2038 * @timeout: the timeout associated with the wait (NULL if none)
2039 *
2040 * Detect if the task was woken on the initial futex as opposed to the requeue
2041 * target futex. If so, determine if it was a timeout or a signal that caused
2042 * the wakeup and return the appropriate error code to the caller. Must be
2043 * called with the hb lock held.
2044 *
2045 * Returns
2046 * 0 - no early wakeup detected
2047 * <0 - -ETIMEDOUT or -ERESTARTNOINTR
2048 */
2049static inline
2050int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2051 struct futex_q *q, union futex_key *key2,
2052 struct hrtimer_sleeper *timeout)
2053{
2054 int ret = 0;
2055
2056 /*
2057 * With the hb lock held, we avoid races while we process the wakeup.
2058 * We only need to hold hb (and not hb2) to ensure atomicity as the
2059 * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
2060 * It can't be requeued from uaddr2 to something else since we don't
2061 * support a PI aware source futex for requeue.
2062 */
2063 if (!match_futex(&q->key, key2)) {
2064 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2065 /*
2066 * We were woken prior to requeue by a timeout or a signal.
2067 * Unqueue the futex_q and determine which it was.
2068 */
2069 plist_del(&q->list, &q->list.plist);
2070 drop_futex_key_refs(&q->key);
2071
2072 if (timeout && !timeout->task)
2073 ret = -ETIMEDOUT;
2074 else
2075 ret = -ERESTARTNOINTR;
2076 }
2077 return ret;
2078}
2079
2080/**
2081 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
2082 * @uaddr: the futex we initialyl wait on (non-pi)
2083 * @fshared: whether the futexes are shared (1) or not (0). They must be
2084 * the same type, no requeueing from private to shared, etc.
2085 * @val: the expected value of uaddr
2086 * @abs_time: absolute timeout
2087 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all.
2088 * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
2089 * @uaddr2: the pi futex we will take prior to returning to user-space
2090 *
2091 * The caller will wait on uaddr and will be requeued by futex_requeue() to
2092 * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and
2093 * complete the acquisition of the rt_mutex prior to returning to userspace.
2094 * This ensures the rt_mutex maintains an owner when it has waiters; without
2095 * one, the pi logic wouldn't know which task to boost/deboost, if there was a
2096 * need to.
2097 *
2098 * We call schedule in futex_wait_queue_me() when we enqueue and return there
2099 * via the following:
2100 * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
2101 * 2) wakeup on uaddr2 after a requeue and subsequent unlock
2102 * 3) signal (before or after requeue)
2103 * 4) timeout (before or after requeue)
2104 *
2105 * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function.
2106 *
2107 * If 2, we may then block on trying to take the rt_mutex and return via:
2108 * 5) successful lock
2109 * 6) signal
2110 * 7) timeout
2111 * 8) other lock acquisition failure
2112 *
2113 * If 6, we setup a restart_block with futex_lock_pi() as the function.
2114 *
2115 * If 4 or 7, we cleanup and return with -ETIMEDOUT.
2116 *
2117 * Returns:
2118 * 0 - On success
2119 * <0 - On error
2120 */
2121static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2122 u32 val, ktime_t *abs_time, u32 bitset,
2123 int clockrt, u32 __user *uaddr2)
2124{
2125 struct hrtimer_sleeper timeout, *to = NULL;
2126 struct rt_mutex_waiter rt_waiter;
2127 struct rt_mutex *pi_mutex = NULL;
2128 struct futex_hash_bucket *hb;
2129 union futex_key key2;
2130 struct futex_q q;
2131 int res, ret;
2132
2133 if (!bitset)
2134 return -EINVAL;
2135
2136 if (abs_time) {
2137 to = &timeout;
2138 hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
2139 CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
2140 hrtimer_init_sleeper(to, current);
2141 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2142 current->timer_slack_ns);
2143 }
2144
2145 /*
2146 * The waiter is allocated on our stack, manipulated by the requeue
2147 * code while we sleep on uaddr.
2148 */
2149 debug_rt_mutex_init_waiter(&rt_waiter);
2150 rt_waiter.task = NULL;
2151
2152 q.pi_state = NULL;
2153 q.bitset = bitset;
2154 q.rt_waiter = &rt_waiter;
2155
2156 key2 = FUTEX_KEY_INIT;
2157 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
2158 if (unlikely(ret != 0))
2159 goto out;
2160
2161 /* Prepare to wait on uaddr. */
2162 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
2163 if (ret)
2164 goto out_key2;
2165
2166 /* Queue the futex_q, drop the hb lock, wait for wakeup. */
2167 futex_wait_queue_me(hb, &q, to);
2168
2169 spin_lock(&hb->lock);
2170 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2171 spin_unlock(&hb->lock);
2172 if (ret)
2173 goto out_put_keys;
2174
2175 /*
2176 * In order for us to be here, we know our q.key == key2, and since
2177 * we took the hb->lock above, we also know that futex_requeue() has
2178 * completed and we no longer have to concern ourselves with a wakeup
2179 * race with the atomic proxy lock acquition by the requeue code.
2180 */
2181
2182 /* Check if the requeue code acquired the second futex for us. */
2183 if (!q.rt_waiter) {
2184 /*
2185 * Got the lock. We might not be the anticipated owner if we
2186 * did a lock-steal - fix up the PI-state in that case.
2187 */
2188 if (q.pi_state && (q.pi_state->owner != current)) {
2189 spin_lock(q.lock_ptr);
2190 ret = fixup_pi_state_owner(uaddr2, &q, current,
2191 fshared);
2192 spin_unlock(q.lock_ptr);
2193 }
2194 } else {
2195 /*
2196 * We have been woken up by futex_unlock_pi(), a timeout, or a
2197 * signal. futex_unlock_pi() will not destroy the lock_ptr nor
2198 * the pi_state.
2199 */
2200 WARN_ON(!&q.pi_state);
2201 pi_mutex = &q.pi_state->pi_mutex;
2202 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2203 debug_rt_mutex_free_waiter(&rt_waiter);
2204
2205 spin_lock(q.lock_ptr);
2206 /*
2207 * Fixup the pi_state owner and possibly acquire the lock if we
2208 * haven't already.
2209 */
2210 res = fixup_owner(uaddr2, fshared, &q, !ret);
2211 /*
2212 * If fixup_owner() returned an error, proprogate that. If it
2213 * acquired the lock, clear our -ETIMEDOUT or -EINTR.
2214 */
2215 if (res)
2216 ret = (res < 0) ? res : 0;
2217
2218 /* Unqueue and drop the lock. */
2219 unqueue_me_pi(&q);
2220 }
2221
2222 /*
2223 * If fixup_pi_state_owner() faulted and was unable to handle the
2224 * fault, unlock the rt_mutex and return the fault to userspace.
2225 */
2226 if (ret == -EFAULT) {
2227 if (rt_mutex_owner(pi_mutex) == current)
2228 rt_mutex_unlock(pi_mutex);
2229 } else if (ret == -EINTR) {
2230 /*
2231 * We've already been requeued, but we have no way to
2232 * restart by calling futex_lock_pi() directly. We
2233 * could restart the syscall, but that will look at
2234 * the user space value and return right away. So we
2235 * drop back with EWOULDBLOCK to tell user space that
2236 * "val" has been changed. That's the same what the
2237 * restart of the syscall would do in
2238 * futex_wait_setup().
2239 */
2240 ret = -EWOULDBLOCK;
2241 }
2242
2243out_put_keys:
2244 put_futex_key(fshared, &q.key);
2245out_key2:
2246 put_futex_key(fshared, &key2);
2247
2248out:
2249 if (to) {
2250 hrtimer_cancel(&to->timer);
2251 destroy_hrtimer_on_stack(&to->timer);
2252 }
2253 return ret;
2254}
2255
1677/* 2256/*
1678 * Support for robust futexes: the kernel cleans up held futexes at 2257 * Support for robust futexes: the kernel cleans up held futexes at
1679 * thread exit time. 2258 * thread exit time.
@@ -1896,7 +2475,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
1896 fshared = 1; 2475 fshared = 1;
1897 2476
1898 clockrt = op & FUTEX_CLOCK_REALTIME; 2477 clockrt = op & FUTEX_CLOCK_REALTIME;
1899 if (clockrt && cmd != FUTEX_WAIT_BITSET) 2478 if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
1900 return -ENOSYS; 2479 return -ENOSYS;
1901 2480
1902 switch (cmd) { 2481 switch (cmd) {
@@ -1911,10 +2490,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
1911 ret = futex_wake(uaddr, fshared, val, val3); 2490 ret = futex_wake(uaddr, fshared, val, val3);
1912 break; 2491 break;
1913 case FUTEX_REQUEUE: 2492 case FUTEX_REQUEUE:
1914 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); 2493 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
1915 break; 2494 break;
1916 case FUTEX_CMP_REQUEUE: 2495 case FUTEX_CMP_REQUEUE:
1917 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); 2496 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
2497 0);
1918 break; 2498 break;
1919 case FUTEX_WAKE_OP: 2499 case FUTEX_WAKE_OP:
1920 ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); 2500 ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
@@ -1931,6 +2511,15 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
1931 if (futex_cmpxchg_enabled) 2511 if (futex_cmpxchg_enabled)
1932 ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); 2512 ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
1933 break; 2513 break;
2514 case FUTEX_WAIT_REQUEUE_PI:
2515 val3 = FUTEX_BITSET_MATCH_ANY;
2516 ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
2517 clockrt, uaddr2);
2518 break;
2519 case FUTEX_CMP_REQUEUE_PI:
2520 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
2521 1);
2522 break;
1934 default: 2523 default:
1935 ret = -ENOSYS; 2524 ret = -ENOSYS;
1936 } 2525 }
@@ -1948,7 +2537,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
1948 int cmd = op & FUTEX_CMD_MASK; 2537 int cmd = op & FUTEX_CMD_MASK;
1949 2538
1950 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || 2539 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
1951 cmd == FUTEX_WAIT_BITSET)) { 2540 cmd == FUTEX_WAIT_BITSET ||
2541 cmd == FUTEX_WAIT_REQUEUE_PI)) {
1952 if (copy_from_user(&ts, utime, sizeof(ts)) != 0) 2542 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
1953 return -EFAULT; 2543 return -EFAULT;
1954 if (!timespec_valid(&ts)) 2544 if (!timespec_valid(&ts))
@@ -1960,11 +2550,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
1960 tp = &t; 2550 tp = &t;
1961 } 2551 }
1962 /* 2552 /*
1963 * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. 2553 * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
1964 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. 2554 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
1965 */ 2555 */
1966 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || 2556 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
1967 cmd == FUTEX_WAKE_OP) 2557 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
1968 val2 = (u32) (unsigned long) utime; 2558 val2 = (u32) (unsigned long) utime;
1969 2559
1970 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); 2560 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 3394f8f52964..7d047808419d 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
3obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o 3obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
4obj-$(CONFIG_PROC_FS) += proc.o 4obj-$(CONFIG_PROC_FS) += proc.o
5obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o 5obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
6obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o 6obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o
7obj-$(CONFIG_PM_SLEEP) += pm.o 7obj-$(CONFIG_PM_SLEEP) += pm.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c687ba4363f2..13c68e71b726 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -359,7 +359,6 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
359 359
360 spin_lock(&desc->lock); 360 spin_lock(&desc->lock);
361 mask_ack_irq(desc, irq); 361 mask_ack_irq(desc, irq);
362 desc = irq_remap_to_desc(irq, desc);
363 362
364 if (unlikely(desc->status & IRQ_INPROGRESS)) 363 if (unlikely(desc->status & IRQ_INPROGRESS))
365 goto out_unlock; 364 goto out_unlock;
@@ -438,7 +437,6 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
438 desc->status &= ~IRQ_INPROGRESS; 437 desc->status &= ~IRQ_INPROGRESS;
439out: 438out:
440 desc->chip->eoi(irq); 439 desc->chip->eoi(irq);
441 desc = irq_remap_to_desc(irq, desc);
442 440
443 spin_unlock(&desc->lock); 441 spin_unlock(&desc->lock);
444} 442}
@@ -475,7 +473,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
475 !desc->action)) { 473 !desc->action)) {
476 desc->status |= (IRQ_PENDING | IRQ_MASKED); 474 desc->status |= (IRQ_PENDING | IRQ_MASKED);
477 mask_ack_irq(desc, irq); 475 mask_ack_irq(desc, irq);
478 desc = irq_remap_to_desc(irq, desc);
479 goto out_unlock; 476 goto out_unlock;
480 } 477 }
481 kstat_incr_irqs_this_cpu(irq, desc); 478 kstat_incr_irqs_this_cpu(irq, desc);
@@ -483,7 +480,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
483 /* Start handling the irq */ 480 /* Start handling the irq */
484 if (desc->chip->ack) 481 if (desc->chip->ack)
485 desc->chip->ack(irq); 482 desc->chip->ack(irq);
486 desc = irq_remap_to_desc(irq, desc);
487 483
488 /* Mark the IRQ currently in progress.*/ 484 /* Mark the IRQ currently in progress.*/
489 desc->status |= IRQ_INPROGRESS; 485 desc->status |= IRQ_INPROGRESS;
@@ -544,10 +540,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
544 if (!noirqdebug) 540 if (!noirqdebug)
545 note_interrupt(irq, desc, action_ret); 541 note_interrupt(irq, desc, action_ret);
546 542
547 if (desc->chip->eoi) { 543 if (desc->chip->eoi)
548 desc->chip->eoi(irq); 544 desc->chip->eoi(irq);
549 desc = irq_remap_to_desc(irq, desc);
550 }
551} 545}
552 546
553void 547void
@@ -582,10 +576,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
582 576
583 /* Uninstall? */ 577 /* Uninstall? */
584 if (handle == handle_bad_irq) { 578 if (handle == handle_bad_irq) {
585 if (desc->chip != &no_irq_chip) { 579 if (desc->chip != &no_irq_chip)
586 mask_ack_irq(desc, irq); 580 mask_ack_irq(desc, irq);
587 desc = irq_remap_to_desc(irq, desc);
588 }
589 desc->status |= IRQ_DISABLED; 581 desc->status |= IRQ_DISABLED;
590 desc->depth = 1; 582 desc->depth = 1;
591 } 583 }
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 26e08754744f..a60018402f42 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -11,14 +11,15 @@
11 */ 11 */
12 12
13#include <linux/irq.h> 13#include <linux/irq.h>
14#include <linux/slab.h>
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/random.h> 16#include <linux/random.h>
16#include <linux/interrupt.h> 17#include <linux/interrupt.h>
17#include <linux/kernel_stat.h> 18#include <linux/kernel_stat.h>
18#include <linux/rculist.h> 19#include <linux/rculist.h>
19#include <linux/hash.h> 20#include <linux/hash.h>
20#include <trace/irq.h>
21#include <linux/bootmem.h> 21#include <linux/bootmem.h>
22#include <trace/events/irq.h>
22 23
23#include "internals.h" 24#include "internals.h"
24 25
@@ -81,45 +82,48 @@ static struct irq_desc irq_desc_init = {
81 .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), 82 .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
82}; 83};
83 84
84void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) 85void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
85{ 86{
86 int node;
87 void *ptr; 87 void *ptr;
88 88
89 node = cpu_to_node(cpu); 89 if (slab_is_available())
90 ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node); 90 ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
91 GFP_ATOMIC, node);
92 else
93 ptr = alloc_bootmem_node(NODE_DATA(node),
94 nr * sizeof(*desc->kstat_irqs));
91 95
92 /* 96 /*
93 * don't overwite if can not get new one 97 * don't overwite if can not get new one
94 * init_copy_kstat_irqs() could still use old one 98 * init_copy_kstat_irqs() could still use old one
95 */ 99 */
96 if (ptr) { 100 if (ptr) {
97 printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", 101 printk(KERN_DEBUG " alloc kstat_irqs on node %d\n", node);
98 cpu, node);
99 desc->kstat_irqs = ptr; 102 desc->kstat_irqs = ptr;
100 } 103 }
101} 104}
102 105
103static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) 106static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
104{ 107{
105 memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); 108 memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
106 109
107 spin_lock_init(&desc->lock); 110 spin_lock_init(&desc->lock);
108 desc->irq = irq; 111 desc->irq = irq;
109#ifdef CONFIG_SMP 112#ifdef CONFIG_SMP
110 desc->cpu = cpu; 113 desc->node = node;
111#endif 114#endif
112 lockdep_set_class(&desc->lock, &irq_desc_lock_class); 115 lockdep_set_class(&desc->lock, &irq_desc_lock_class);
113 init_kstat_irqs(desc, cpu, nr_cpu_ids); 116 init_kstat_irqs(desc, node, nr_cpu_ids);
114 if (!desc->kstat_irqs) { 117 if (!desc->kstat_irqs) {
115 printk(KERN_ERR "can not alloc kstat_irqs\n"); 118 printk(KERN_ERR "can not alloc kstat_irqs\n");
116 BUG_ON(1); 119 BUG_ON(1);
117 } 120 }
118 if (!init_alloc_desc_masks(desc, cpu, false)) { 121 if (!alloc_desc_masks(desc, node, false)) {
119 printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); 122 printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
120 BUG_ON(1); 123 BUG_ON(1);
121 } 124 }
122 arch_init_chip_data(desc, cpu); 125 init_desc_masks(desc);
126 arch_init_chip_data(desc, node);
123} 127}
124 128
125/* 129/*
@@ -169,7 +173,8 @@ int __init early_irq_init(void)
169 desc[i].irq = i; 173 desc[i].irq = i;
170 desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; 174 desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
171 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); 175 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
172 init_alloc_desc_masks(&desc[i], 0, true); 176 alloc_desc_masks(&desc[i], 0, true);
177 init_desc_masks(&desc[i]);
173 irq_desc_ptrs[i] = desc + i; 178 irq_desc_ptrs[i] = desc + i;
174 } 179 }
175 180
@@ -187,11 +192,10 @@ struct irq_desc *irq_to_desc(unsigned int irq)
187 return NULL; 192 return NULL;
188} 193}
189 194
190struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) 195struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
191{ 196{
192 struct irq_desc *desc; 197 struct irq_desc *desc;
193 unsigned long flags; 198 unsigned long flags;
194 int node;
195 199
196 if (irq >= nr_irqs) { 200 if (irq >= nr_irqs) {
197 WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n", 201 WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
@@ -210,15 +214,17 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
210 if (desc) 214 if (desc)
211 goto out_unlock; 215 goto out_unlock;
212 216
213 node = cpu_to_node(cpu); 217 if (slab_is_available())
214 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); 218 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
215 printk(KERN_DEBUG " alloc irq_desc for %d on cpu %d node %d\n", 219 else
216 irq, cpu, node); 220 desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc));
221
222 printk(KERN_DEBUG " alloc irq_desc for %d on node %d\n", irq, node);
217 if (!desc) { 223 if (!desc) {
218 printk(KERN_ERR "can not alloc irq_desc\n"); 224 printk(KERN_ERR "can not alloc irq_desc\n");
219 BUG_ON(1); 225 BUG_ON(1);
220 } 226 }
221 init_one_irq_desc(irq, desc, cpu); 227 init_one_irq_desc(irq, desc, node);
222 228
223 irq_desc_ptrs[irq] = desc; 229 irq_desc_ptrs[irq] = desc;
224 230
@@ -256,7 +262,8 @@ int __init early_irq_init(void)
256 262
257 for (i = 0; i < count; i++) { 263 for (i = 0; i < count; i++) {
258 desc[i].irq = i; 264 desc[i].irq = i;
259 init_alloc_desc_masks(&desc[i], 0, true); 265 alloc_desc_masks(&desc[i], 0, true);
266 init_desc_masks(&desc[i]);
260 desc[i].kstat_irqs = kstat_irqs_all[i]; 267 desc[i].kstat_irqs = kstat_irqs_all[i];
261 } 268 }
262 return arch_early_irq_init(); 269 return arch_early_irq_init();
@@ -267,7 +274,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
267 return (irq < NR_IRQS) ? irq_desc + irq : NULL; 274 return (irq < NR_IRQS) ? irq_desc + irq : NULL;
268} 275}
269 276
270struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) 277struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
271{ 278{
272 return irq_to_desc(irq); 279 return irq_to_desc(irq);
273} 280}
@@ -348,9 +355,6 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
348 "but no thread function available.", irq, action->name); 355 "but no thread function available.", irq, action->name);
349} 356}
350 357
351DEFINE_TRACE(irq_handler_entry);
352DEFINE_TRACE(irq_handler_exit);
353
354/** 358/**
355 * handle_IRQ_event - irq action chain handler 359 * handle_IRQ_event - irq action chain handler
356 * @irq: the interrupt number 360 * @irq: the interrupt number
@@ -453,11 +457,8 @@ unsigned int __do_IRQ(unsigned int irq)
453 /* 457 /*
454 * No locking required for CPU-local interrupts: 458 * No locking required for CPU-local interrupts:
455 */ 459 */
456 if (desc->chip->ack) { 460 if (desc->chip->ack)
457 desc->chip->ack(irq); 461 desc->chip->ack(irq);
458 /* get new one */
459 desc = irq_remap_to_desc(irq, desc);
460 }
461 if (likely(!(desc->status & IRQ_DISABLED))) { 462 if (likely(!(desc->status & IRQ_DISABLED))) {
462 action_ret = handle_IRQ_event(irq, desc->action); 463 action_ret = handle_IRQ_event(irq, desc->action);
463 if (!noirqdebug) 464 if (!noirqdebug)
@@ -468,10 +469,8 @@ unsigned int __do_IRQ(unsigned int irq)
468 } 469 }
469 470
470 spin_lock(&desc->lock); 471 spin_lock(&desc->lock);
471 if (desc->chip->ack) { 472 if (desc->chip->ack)
472 desc->chip->ack(irq); 473 desc->chip->ack(irq);
473 desc = irq_remap_to_desc(irq, desc);
474 }
475 /* 474 /*
476 * REPLAY is when Linux resends an IRQ that was dropped earlier 475 * REPLAY is when Linux resends an IRQ that was dropped earlier
477 * WAITING is used by probe to mark irqs that are being tested 476 * WAITING is used by probe to mark irqs that are being tested
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 01ce20eab38f..73468253143b 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -16,7 +16,7 @@ extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
16extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); 16extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
17 17
18extern struct lock_class_key irq_desc_lock_class; 18extern struct lock_class_key irq_desc_lock_class;
19extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); 19extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
20extern void clear_kstat_irqs(struct irq_desc *desc); 20extern void clear_kstat_irqs(struct irq_desc *desc);
21extern spinlock_t sparse_irq_lock; 21extern spinlock_t sparse_irq_lock;
22 22
@@ -42,6 +42,9 @@ static inline void unregister_handler_proc(unsigned int irq,
42 42
43extern int irq_select_affinity_usr(unsigned int irq); 43extern int irq_select_affinity_usr(unsigned int irq);
44 44
45extern void
46irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask);
47
45/* 48/*
46 * Debugging printout: 49 * Debugging printout:
47 */ 50 */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 2734eca59243..aaf5c9d05770 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -80,7 +80,7 @@ int irq_can_set_affinity(unsigned int irq)
80 return 1; 80 return 1;
81} 81}
82 82
83static void 83void
84irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask) 84irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
85{ 85{
86 struct irqaction *action = desc->action; 86 struct irqaction *action = desc->action;
@@ -109,17 +109,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
109 spin_lock_irqsave(&desc->lock, flags); 109 spin_lock_irqsave(&desc->lock, flags);
110 110
111#ifdef CONFIG_GENERIC_PENDING_IRQ 111#ifdef CONFIG_GENERIC_PENDING_IRQ
112 if (desc->status & IRQ_MOVE_PCNTXT) 112 if (desc->status & IRQ_MOVE_PCNTXT) {
113 desc->chip->set_affinity(irq, cpumask); 113 if (!desc->chip->set_affinity(irq, cpumask)) {
114 cpumask_copy(desc->affinity, cpumask);
115 irq_set_thread_affinity(desc, cpumask);
116 }
117 }
114 else { 118 else {
115 desc->status |= IRQ_MOVE_PENDING; 119 desc->status |= IRQ_MOVE_PENDING;
116 cpumask_copy(desc->pending_mask, cpumask); 120 cpumask_copy(desc->pending_mask, cpumask);
117 } 121 }
118#else 122#else
119 cpumask_copy(desc->affinity, cpumask); 123 if (!desc->chip->set_affinity(irq, cpumask)) {
120 desc->chip->set_affinity(irq, cpumask); 124 cpumask_copy(desc->affinity, cpumask);
125 irq_set_thread_affinity(desc, cpumask);
126 }
121#endif 127#endif
122 irq_set_thread_affinity(desc, cpumask);
123 desc->status |= IRQ_AFFINITY_SET; 128 desc->status |= IRQ_AFFINITY_SET;
124 spin_unlock_irqrestore(&desc->lock, flags); 129 spin_unlock_irqrestore(&desc->lock, flags);
125 return 0; 130 return 0;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index e05ad9be43b7..cfe767ca1545 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -1,5 +1,8 @@
1 1
2#include <linux/irq.h> 2#include <linux/irq.h>
3#include <linux/interrupt.h>
4
5#include "internals.h"
3 6
4void move_masked_irq(int irq) 7void move_masked_irq(int irq)
5{ 8{
@@ -39,11 +42,12 @@ void move_masked_irq(int irq)
39 * masking the irqs. 42 * masking the irqs.
40 */ 43 */
41 if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) 44 if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
42 < nr_cpu_ids)) { 45 < nr_cpu_ids))
43 cpumask_and(desc->affinity, 46 if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
44 desc->pending_mask, cpu_online_mask); 47 cpumask_copy(desc->affinity, desc->pending_mask);
45 desc->chip->set_affinity(irq, desc->affinity); 48 irq_set_thread_affinity(desc, desc->pending_mask);
46 } 49 }
50
47 cpumask_clear(desc->pending_mask); 51 cpumask_clear(desc->pending_mask);
48} 52}
49 53
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 44bbdcbaf8d2..2f69bee57bf2 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -15,9 +15,9 @@
15 15
16static void init_copy_kstat_irqs(struct irq_desc *old_desc, 16static void init_copy_kstat_irqs(struct irq_desc *old_desc,
17 struct irq_desc *desc, 17 struct irq_desc *desc,
18 int cpu, int nr) 18 int node, int nr)
19{ 19{
20 init_kstat_irqs(desc, cpu, nr); 20 init_kstat_irqs(desc, node, nr);
21 21
22 if (desc->kstat_irqs != old_desc->kstat_irqs) 22 if (desc->kstat_irqs != old_desc->kstat_irqs)
23 memcpy(desc->kstat_irqs, old_desc->kstat_irqs, 23 memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
@@ -34,20 +34,20 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
34} 34}
35 35
36static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, 36static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
37 struct irq_desc *desc, int cpu) 37 struct irq_desc *desc, int node)
38{ 38{
39 memcpy(desc, old_desc, sizeof(struct irq_desc)); 39 memcpy(desc, old_desc, sizeof(struct irq_desc));
40 if (!init_alloc_desc_masks(desc, cpu, false)) { 40 if (!alloc_desc_masks(desc, node, false)) {
41 printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " 41 printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
42 "for migration.\n", irq); 42 "for migration.\n", irq);
43 return false; 43 return false;
44 } 44 }
45 spin_lock_init(&desc->lock); 45 spin_lock_init(&desc->lock);
46 desc->cpu = cpu; 46 desc->node = node;
47 lockdep_set_class(&desc->lock, &irq_desc_lock_class); 47 lockdep_set_class(&desc->lock, &irq_desc_lock_class);
48 init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); 48 init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
49 init_copy_desc_masks(old_desc, desc); 49 init_copy_desc_masks(old_desc, desc);
50 arch_init_copy_chip_data(old_desc, desc, cpu); 50 arch_init_copy_chip_data(old_desc, desc, node);
51 return true; 51 return true;
52} 52}
53 53
@@ -59,12 +59,11 @@ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
59} 59}
60 60
61static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, 61static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
62 int cpu) 62 int node)
63{ 63{
64 struct irq_desc *desc; 64 struct irq_desc *desc;
65 unsigned int irq; 65 unsigned int irq;
66 unsigned long flags; 66 unsigned long flags;
67 int node;
68 67
69 irq = old_desc->irq; 68 irq = old_desc->irq;
70 69
@@ -76,7 +75,6 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
76 if (desc && old_desc != desc) 75 if (desc && old_desc != desc)
77 goto out_unlock; 76 goto out_unlock;
78 77
79 node = cpu_to_node(cpu);
80 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); 78 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
81 if (!desc) { 79 if (!desc) {
82 printk(KERN_ERR "irq %d: can not get new irq_desc " 80 printk(KERN_ERR "irq %d: can not get new irq_desc "
@@ -85,7 +83,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
85 desc = old_desc; 83 desc = old_desc;
86 goto out_unlock; 84 goto out_unlock;
87 } 85 }
88 if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) { 86 if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) {
89 /* still use old one */ 87 /* still use old one */
90 kfree(desc); 88 kfree(desc);
91 desc = old_desc; 89 desc = old_desc;
@@ -97,9 +95,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
97 95
98 /* free the old one */ 96 /* free the old one */
99 free_one_irq_desc(old_desc, desc); 97 free_one_irq_desc(old_desc, desc);
100 spin_unlock(&old_desc->lock);
101 kfree(old_desc); 98 kfree(old_desc);
102 spin_lock(&desc->lock);
103 99
104 return desc; 100 return desc;
105 101
@@ -109,24 +105,14 @@ out_unlock:
109 return desc; 105 return desc;
110} 106}
111 107
112struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) 108struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
113{ 109{
114 int old_cpu;
115 int node, old_node;
116
117 /* those all static, do move them */ 110 /* those all static, do move them */
118 if (desc->irq < NR_IRQS_LEGACY) 111 if (desc->irq < NR_IRQS_LEGACY)
119 return desc; 112 return desc;
120 113
121 old_cpu = desc->cpu; 114 if (desc->node != node)
122 if (old_cpu != cpu) { 115 desc = __real_move_irq_desc(desc, node);
123 node = cpu_to_node(cpu);
124 old_node = cpu_to_node(old_cpu);
125 if (old_node != node)
126 desc = __real_move_irq_desc(desc, cpu);
127 else
128 desc->cpu = cpu;
129 }
130 116
131 return desc; 117 return desc;
132} 118}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 5a758c6e4950..e4983770913b 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1451,7 +1451,6 @@ int kernel_kexec(void)
1451 error = device_suspend(PMSG_FREEZE); 1451 error = device_suspend(PMSG_FREEZE);
1452 if (error) 1452 if (error)
1453 goto Resume_console; 1453 goto Resume_console;
1454 device_pm_lock();
1455 /* At this point, device_suspend() has been called, 1454 /* At this point, device_suspend() has been called,
1456 * but *not* device_power_down(). We *must* 1455 * but *not* device_power_down(). We *must*
1457 * device_power_down() now. Otherwise, drivers for 1456 * device_power_down() now. Otherwise, drivers for
@@ -1489,7 +1488,6 @@ int kernel_kexec(void)
1489 enable_nonboot_cpus(); 1488 enable_nonboot_cpus();
1490 device_power_up(PMSG_RESTORE); 1489 device_power_up(PMSG_RESTORE);
1491 Resume_devices: 1490 Resume_devices:
1492 device_pm_unlock();
1493 device_resume(PMSG_RESTORE); 1491 device_resume(PMSG_RESTORE);
1494 Resume_console: 1492 Resume_console:
1495 resume_console(); 1493 resume_console();
diff --git a/kernel/kmod.c b/kernel/kmod.c
index b750675251e5..7e95bedb2bfc 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -370,8 +370,10 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
370 sub_info->argv = argv; 370 sub_info->argv = argv;
371 sub_info->envp = envp; 371 sub_info->envp = envp;
372 sub_info->cred = prepare_usermodehelper_creds(); 372 sub_info->cred = prepare_usermodehelper_creds();
373 if (!sub_info->cred) 373 if (!sub_info->cred) {
374 kfree(sub_info);
374 return NULL; 375 return NULL;
376 }
375 377
376 out: 378 out:
377 return sub_info; 379 return sub_info;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ebaf8519abf..41c88fe40500 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,7 +13,7 @@
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/mutex.h> 15#include <linux/mutex.h>
16#include <trace/sched.h> 16#include <trace/events/sched.h>
17 17
18#define KTHREAD_NICE_LEVEL (-5) 18#define KTHREAD_NICE_LEVEL (-5)
19 19
@@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock);
21static LIST_HEAD(kthread_create_list); 21static LIST_HEAD(kthread_create_list);
22struct task_struct *kthreadd_task; 22struct task_struct *kthreadd_task;
23 23
24DEFINE_TRACE(sched_kthread_stop);
25DEFINE_TRACE(sched_kthread_stop_ret);
26
27struct kthread_create_info 24struct kthread_create_info
28{ 25{
29 /* Information passed to kthread() from kthreadd. */ 26 /* Information passed to kthread() from kthreadd. */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index accb40cdb12a..8bbeef996c76 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -42,12 +42,14 @@
42#include <linux/hash.h> 42#include <linux/hash.h>
43#include <linux/ftrace.h> 43#include <linux/ftrace.h>
44#include <linux/stringify.h> 44#include <linux/stringify.h>
45#include <trace/lockdep.h>
46 45
47#include <asm/sections.h> 46#include <asm/sections.h>
48 47
49#include "lockdep_internals.h" 48#include "lockdep_internals.h"
50 49
50#define CREATE_TRACE_POINTS
51#include <trace/events/lockdep.h>
52
51#ifdef CONFIG_PROVE_LOCKING 53#ifdef CONFIG_PROVE_LOCKING
52int prove_locking = 1; 54int prove_locking = 1;
53module_param(prove_locking, int, 0644); 55module_param(prove_locking, int, 0644);
@@ -2935,8 +2937,6 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
2935} 2937}
2936EXPORT_SYMBOL_GPL(lock_set_class); 2938EXPORT_SYMBOL_GPL(lock_set_class);
2937 2939
2938DEFINE_TRACE(lock_acquire);
2939
2940/* 2940/*
2941 * We are not always called with irqs disabled - do that here, 2941 * We are not always called with irqs disabled - do that here,
2942 * and also avoid lockdep recursion: 2942 * and also avoid lockdep recursion:
@@ -2963,8 +2963,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2963} 2963}
2964EXPORT_SYMBOL_GPL(lock_acquire); 2964EXPORT_SYMBOL_GPL(lock_acquire);
2965 2965
2966DEFINE_TRACE(lock_release);
2967
2968void lock_release(struct lockdep_map *lock, int nested, 2966void lock_release(struct lockdep_map *lock, int nested,
2969 unsigned long ip) 2967 unsigned long ip)
2970{ 2968{
@@ -3105,6 +3103,8 @@ found_it:
3105 hlock->holdtime_stamp = now; 3103 hlock->holdtime_stamp = now;
3106 } 3104 }
3107 3105
3106 trace_lock_acquired(lock, ip, waittime);
3107
3108 stats = get_lock_stats(hlock_class(hlock)); 3108 stats = get_lock_stats(hlock_class(hlock));
3109 if (waittime) { 3109 if (waittime) {
3110 if (hlock->read) 3110 if (hlock->read)
@@ -3120,8 +3120,6 @@ found_it:
3120 lock->ip = ip; 3120 lock->ip = ip;
3121} 3121}
3122 3122
3123DEFINE_TRACE(lock_contended);
3124
3125void lock_contended(struct lockdep_map *lock, unsigned long ip) 3123void lock_contended(struct lockdep_map *lock, unsigned long ip)
3126{ 3124{
3127 unsigned long flags; 3125 unsigned long flags;
@@ -3143,14 +3141,10 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3143} 3141}
3144EXPORT_SYMBOL_GPL(lock_contended); 3142EXPORT_SYMBOL_GPL(lock_contended);
3145 3143
3146DEFINE_TRACE(lock_acquired);
3147
3148void lock_acquired(struct lockdep_map *lock, unsigned long ip) 3144void lock_acquired(struct lockdep_map *lock, unsigned long ip)
3149{ 3145{
3150 unsigned long flags; 3146 unsigned long flags;
3151 3147
3152 trace_lock_acquired(lock, ip);
3153
3154 if (unlikely(!lock_stat)) 3148 if (unlikely(!lock_stat))
3155 return; 3149 return;
3156 3150
diff --git a/kernel/module.c b/kernel/module.c
index e797812a4d95..2383e60fcf3f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -18,6 +18,7 @@
18*/ 18*/
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/moduleloader.h> 20#include <linux/moduleloader.h>
21#include <linux/ftrace_event.h>
21#include <linux/init.h> 22#include <linux/init.h>
22#include <linux/kallsyms.h> 23#include <linux/kallsyms.h>
23#include <linux/fs.h> 24#include <linux/fs.h>
@@ -1489,9 +1490,6 @@ static void free_module(struct module *mod)
1489 /* Free any allocated parameters. */ 1490 /* Free any allocated parameters. */
1490 destroy_params(mod->kp, mod->num_kp); 1491 destroy_params(mod->kp, mod->num_kp);
1491 1492
1492 /* release any pointers to mcount in this module */
1493 ftrace_release(mod->module_core, mod->core_size);
1494
1495 /* This may be NULL, but that's OK */ 1493 /* This may be NULL, but that's OK */
1496 module_free(mod, mod->module_init); 1494 module_free(mod, mod->module_init);
1497 kfree(mod->args); 1495 kfree(mod->args);
@@ -1892,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod,
1892 unsigned int symindex = 0; 1890 unsigned int symindex = 0;
1893 unsigned int strindex = 0; 1891 unsigned int strindex = 0;
1894 unsigned int modindex, versindex, infoindex, pcpuindex; 1892 unsigned int modindex, versindex, infoindex, pcpuindex;
1895 unsigned int num_mcount;
1896 struct module *mod; 1893 struct module *mod;
1897 long err = 0; 1894 long err = 0;
1898 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1895 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
1899 unsigned long *mseg;
1900 mm_segment_t old_fs; 1896 mm_segment_t old_fs;
1901 1897
1902 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", 1898 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2172,7 +2168,19 @@ static noinline struct module *load_module(void __user *umod,
2172 sizeof(*mod->tracepoints), 2168 sizeof(*mod->tracepoints),
2173 &mod->num_tracepoints); 2169 &mod->num_tracepoints);
2174#endif 2170#endif
2175 2171#ifdef CONFIG_EVENT_TRACING
2172 mod->trace_events = section_objs(hdr, sechdrs, secstrings,
2173 "_ftrace_events",
2174 sizeof(*mod->trace_events),
2175 &mod->num_trace_events);
2176#endif
2177#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2178 /* sechdrs[0].sh_size is always zero */
2179 mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
2180 "__mcount_loc",
2181 sizeof(*mod->ftrace_callsites),
2182 &mod->num_ftrace_callsites);
2183#endif
2176#ifdef CONFIG_MODVERSIONS 2184#ifdef CONFIG_MODVERSIONS
2177 if ((mod->num_syms && !mod->crcs) 2185 if ((mod->num_syms && !mod->crcs)
2178 || (mod->num_gpl_syms && !mod->gpl_crcs) 2186 || (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2237,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod,
2237 dynamic_debug_setup(debug, num_debug); 2245 dynamic_debug_setup(debug, num_debug);
2238 } 2246 }
2239 2247
2240 /* sechdrs[0].sh_size is always zero */
2241 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
2242 sizeof(*mseg), &num_mcount);
2243 ftrace_init_module(mod, mseg, mseg + num_mcount);
2244
2245 err = module_finalize(hdr, sechdrs, mod); 2248 err = module_finalize(hdr, sechdrs, mod);
2246 if (err < 0) 2249 if (err < 0)
2247 goto cleanup; 2250 goto cleanup;
@@ -2302,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod,
2302 cleanup: 2305 cleanup:
2303 kobject_del(&mod->mkobj.kobj); 2306 kobject_del(&mod->mkobj.kobj);
2304 kobject_put(&mod->mkobj.kobj); 2307 kobject_put(&mod->mkobj.kobj);
2305 ftrace_release(mod->module_core, mod->core_size);
2306 free_unload: 2308 free_unload:
2307 module_unload_free(mod); 2309 module_unload_free(mod);
2308#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) 2310#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 507cf2b5e9f1..e5cc0cd28d54 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -249,7 +249,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
249 249
250 /* didnt get the lock, go to sleep: */ 250 /* didnt get the lock, go to sleep: */
251 spin_unlock_mutex(&lock->wait_lock, flags); 251 spin_unlock_mutex(&lock->wait_lock, flags);
252 __schedule(); 252 preempt_enable_no_resched();
253 schedule();
254 preempt_disable();
253 spin_lock_mutex(&lock->wait_lock, flags); 255 spin_lock_mutex(&lock->wait_lock, flags);
254 } 256 }
255 257
@@ -471,5 +473,28 @@ int __sched mutex_trylock(struct mutex *lock)
471 473
472 return ret; 474 return ret;
473} 475}
474
475EXPORT_SYMBOL(mutex_trylock); 476EXPORT_SYMBOL(mutex_trylock);
477
478/**
479 * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
480 * @cnt: the atomic which we are to dec
481 * @lock: the mutex to return holding if we dec to 0
482 *
483 * return true and hold lock if we dec to 0, return false otherwise
484 */
485int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
486{
487 /* dec if we can't possibly hit 0 */
488 if (atomic_add_unless(cnt, -1, 1))
489 return 0;
490 /* we might hit 0, so take the lock */
491 mutex_lock(lock);
492 if (!atomic_dec_and_test(cnt)) {
493 /* when we actually did the dec, we didn't hit 0 */
494 mutex_unlock(lock);
495 return 0;
496 }
497 /* we hit 0, and we hold the lock */
498 return 1;
499}
500EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index b0dc9e7a0d17..5cb080e7eebd 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -215,8 +215,6 @@ static int create_image(int platform_mode)
215 if (error) 215 if (error)
216 return error; 216 return error;
217 217
218 device_pm_lock();
219
220 /* At this point, device_suspend() has been called, but *not* 218 /* At this point, device_suspend() has been called, but *not*
221 * device_power_down(). We *must* call device_power_down() now. 219 * device_power_down(). We *must* call device_power_down() now.
222 * Otherwise, drivers for some devices (e.g. interrupt controllers) 220 * Otherwise, drivers for some devices (e.g. interrupt controllers)
@@ -227,7 +225,7 @@ static int create_image(int platform_mode)
227 if (error) { 225 if (error) {
228 printk(KERN_ERR "PM: Some devices failed to power down, " 226 printk(KERN_ERR "PM: Some devices failed to power down, "
229 "aborting hibernation\n"); 227 "aborting hibernation\n");
230 goto Unlock; 228 return error;
231 } 229 }
232 230
233 error = platform_pre_snapshot(platform_mode); 231 error = platform_pre_snapshot(platform_mode);
@@ -280,9 +278,6 @@ static int create_image(int platform_mode)
280 device_power_up(in_suspend ? 278 device_power_up(in_suspend ?
281 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 279 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
282 280
283 Unlock:
284 device_pm_unlock();
285
286 return error; 281 return error;
287} 282}
288 283
@@ -344,13 +339,11 @@ static int resume_target_kernel(bool platform_mode)
344{ 339{
345 int error; 340 int error;
346 341
347 device_pm_lock();
348
349 error = device_power_down(PMSG_QUIESCE); 342 error = device_power_down(PMSG_QUIESCE);
350 if (error) { 343 if (error) {
351 printk(KERN_ERR "PM: Some devices failed to power down, " 344 printk(KERN_ERR "PM: Some devices failed to power down, "
352 "aborting resume\n"); 345 "aborting resume\n");
353 goto Unlock; 346 return error;
354 } 347 }
355 348
356 error = platform_pre_restore(platform_mode); 349 error = platform_pre_restore(platform_mode);
@@ -403,9 +396,6 @@ static int resume_target_kernel(bool platform_mode)
403 396
404 device_power_up(PMSG_RECOVER); 397 device_power_up(PMSG_RECOVER);
405 398
406 Unlock:
407 device_pm_unlock();
408
409 return error; 399 return error;
410} 400}
411 401
@@ -464,11 +454,9 @@ int hibernation_platform_enter(void)
464 goto Resume_devices; 454 goto Resume_devices;
465 } 455 }
466 456
467 device_pm_lock();
468
469 error = device_power_down(PMSG_HIBERNATE); 457 error = device_power_down(PMSG_HIBERNATE);
470 if (error) 458 if (error)
471 goto Unlock; 459 goto Resume_devices;
472 460
473 error = hibernation_ops->prepare(); 461 error = hibernation_ops->prepare();
474 if (error) 462 if (error)
@@ -493,9 +481,6 @@ int hibernation_platform_enter(void)
493 481
494 device_power_up(PMSG_RESTORE); 482 device_power_up(PMSG_RESTORE);
495 483
496 Unlock:
497 device_pm_unlock();
498
499 Resume_devices: 484 Resume_devices:
500 entering_platform_hibernation = false; 485 entering_platform_hibernation = false;
501 device_resume(PMSG_RESTORE); 486 device_resume(PMSG_RESTORE);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f99ed6a75eac..868028280d13 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -289,12 +289,10 @@ static int suspend_enter(suspend_state_t state)
289{ 289{
290 int error; 290 int error;
291 291
292 device_pm_lock();
293
294 if (suspend_ops->prepare) { 292 if (suspend_ops->prepare) {
295 error = suspend_ops->prepare(); 293 error = suspend_ops->prepare();
296 if (error) 294 if (error)
297 goto Done; 295 return error;
298 } 296 }
299 297
300 error = device_power_down(PMSG_SUSPEND); 298 error = device_power_down(PMSG_SUSPEND);
@@ -343,9 +341,6 @@ static int suspend_enter(suspend_state_t state)
343 if (suspend_ops->finish) 341 if (suspend_ops->finish)
344 suspend_ops->finish(); 342 suspend_ops->finish();
345 343
346 Done:
347 device_pm_unlock();
348
349 return error; 344 return error;
350} 345}
351 346
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0692ab5a0d67..2442d140bd9a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -25,16 +25,6 @@
25 25
26 26
27/* 27/*
28 * Initialize a new task whose father had been ptraced.
29 *
30 * Called from copy_process().
31 */
32void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
33{
34 arch_ptrace_fork(child, clone_flags);
35}
36
37/*
38 * ptrace a task: make the debugger its new parent and 28 * ptrace a task: make the debugger its new parent and
39 * move it to the ptrace list. 29 * move it to the ptrace list.
40 * 30 *
@@ -304,6 +294,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
304 if (child->ptrace) { 294 if (child->ptrace) {
305 child->exit_code = data; 295 child->exit_code = data;
306 dead = __ptrace_detach(current, child); 296 dead = __ptrace_detach(current, child);
297 if (!child->exit_state)
298 wake_up_process(child);
307 } 299 }
308 write_unlock_irq(&tasklist_lock); 300 write_unlock_irq(&tasklist_lock);
309 301
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ce97a4df64d3..beb0e659adcc 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -1356,17 +1356,11 @@ static int rcu_sched_grace_period(void *arg)
1356 1356
1357 rcu_ctrlblk.sched_sleep = rcu_sched_sleeping; 1357 rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
1358 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); 1358 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1359 ret = 0; 1359 ret = 0; /* unused */
1360 __wait_event_interruptible(rcu_ctrlblk.sched_wq, 1360 __wait_event_interruptible(rcu_ctrlblk.sched_wq,
1361 rcu_ctrlblk.sched_sleep != rcu_sched_sleeping, 1361 rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
1362 ret); 1362 ret);
1363 1363
1364 /*
1365 * Signals would prevent us from sleeping, and we cannot
1366 * do much with them in any case. So flush them.
1367 */
1368 if (ret)
1369 flush_signals(current);
1370 couldsleepnext = 0; 1364 couldsleepnext = 0;
1371 1365
1372 } while (!kthread_should_stop()); 1366 } while (!kthread_should_stop());
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d2a372fb0b9b..0dccfbba6d26 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1259,31 +1259,44 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1259 check_cpu_stall(rsp, rdp); 1259 check_cpu_stall(rsp, rdp);
1260 1260
1261 /* Is the RCU core waiting for a quiescent state from this CPU? */ 1261 /* Is the RCU core waiting for a quiescent state from this CPU? */
1262 if (rdp->qs_pending) 1262 if (rdp->qs_pending) {
1263 rdp->n_rp_qs_pending++;
1263 return 1; 1264 return 1;
1265 }
1264 1266
1265 /* Does this CPU have callbacks ready to invoke? */ 1267 /* Does this CPU have callbacks ready to invoke? */
1266 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1268 if (cpu_has_callbacks_ready_to_invoke(rdp)) {
1269 rdp->n_rp_cb_ready++;
1267 return 1; 1270 return 1;
1271 }
1268 1272
1269 /* Has RCU gone idle with this CPU needing another grace period? */ 1273 /* Has RCU gone idle with this CPU needing another grace period? */
1270 if (cpu_needs_another_gp(rsp, rdp)) 1274 if (cpu_needs_another_gp(rsp, rdp)) {
1275 rdp->n_rp_cpu_needs_gp++;
1271 return 1; 1276 return 1;
1277 }
1272 1278
1273 /* Has another RCU grace period completed? */ 1279 /* Has another RCU grace period completed? */
1274 if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */ 1280 if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */
1281 rdp->n_rp_gp_completed++;
1275 return 1; 1282 return 1;
1283 }
1276 1284
1277 /* Has a new RCU grace period started? */ 1285 /* Has a new RCU grace period started? */
1278 if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */ 1286 if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */
1287 rdp->n_rp_gp_started++;
1279 return 1; 1288 return 1;
1289 }
1280 1290
1281 /* Has an RCU GP gone long enough to send resched IPIs &c? */ 1291 /* Has an RCU GP gone long enough to send resched IPIs &c? */
1282 if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && 1292 if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
1283 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) 1293 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) {
1294 rdp->n_rp_need_fqs++;
1284 return 1; 1295 return 1;
1296 }
1285 1297
1286 /* nothing to do */ 1298 /* nothing to do */
1299 rdp->n_rp_need_nothing++;
1287 return 0; 1300 return 0;
1288} 1301}
1289 1302
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4b1875ba9404..fe1dcdbf1ca3 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -213,7 +213,63 @@ static struct file_operations rcugp_fops = {
213 .release = single_release, 213 .release = single_release,
214}; 214};
215 215
216static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir; 216static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
217{
218 seq_printf(m, "%3d%cnp=%ld "
219 "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n",
220 rdp->cpu,
221 cpu_is_offline(rdp->cpu) ? '!' : ' ',
222 rdp->n_rcu_pending,
223 rdp->n_rp_qs_pending,
224 rdp->n_rp_cb_ready,
225 rdp->n_rp_cpu_needs_gp,
226 rdp->n_rp_gp_completed,
227 rdp->n_rp_gp_started,
228 rdp->n_rp_need_fqs,
229 rdp->n_rp_need_nothing);
230}
231
232static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
233{
234 int cpu;
235 struct rcu_data *rdp;
236
237 for_each_possible_cpu(cpu) {
238 rdp = rsp->rda[cpu];
239 if (rdp->beenonline)
240 print_one_rcu_pending(m, rdp);
241 }
242}
243
244static int show_rcu_pending(struct seq_file *m, void *unused)
245{
246 seq_puts(m, "rcu:\n");
247 print_rcu_pendings(m, &rcu_state);
248 seq_puts(m, "rcu_bh:\n");
249 print_rcu_pendings(m, &rcu_bh_state);
250 return 0;
251}
252
253static int rcu_pending_open(struct inode *inode, struct file *file)
254{
255 return single_open(file, show_rcu_pending, NULL);
256}
257
258static struct file_operations rcu_pending_fops = {
259 .owner = THIS_MODULE,
260 .open = rcu_pending_open,
261 .read = seq_read,
262 .llseek = seq_lseek,
263 .release = single_release,
264};
265
266static struct dentry *rcudir;
267static struct dentry *datadir;
268static struct dentry *datadir_csv;
269static struct dentry *gpdir;
270static struct dentry *hierdir;
271static struct dentry *rcu_pendingdir;
272
217static int __init rcuclassic_trace_init(void) 273static int __init rcuclassic_trace_init(void)
218{ 274{
219 rcudir = debugfs_create_dir("rcu", NULL); 275 rcudir = debugfs_create_dir("rcu", NULL);
@@ -238,6 +294,11 @@ static int __init rcuclassic_trace_init(void)
238 NULL, &rcuhier_fops); 294 NULL, &rcuhier_fops);
239 if (!hierdir) 295 if (!hierdir)
240 goto free_out; 296 goto free_out;
297
298 rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir,
299 NULL, &rcu_pending_fops);
300 if (!rcu_pendingdir)
301 goto free_out;
241 return 0; 302 return 0;
242free_out: 303free_out:
243 if (datadir) 304 if (datadir)
@@ -257,6 +318,7 @@ static void __exit rcuclassic_trace_cleanup(void)
257 debugfs_remove(datadir_csv); 318 debugfs_remove(datadir_csv);
258 debugfs_remove(gpdir); 319 debugfs_remove(gpdir);
259 debugfs_remove(hierdir); 320 debugfs_remove(hierdir);
321 debugfs_remove(rcu_pendingdir);
260 debugfs_remove(rcudir); 322 debugfs_remove(rcudir);
261} 323}
262 324
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 69d9cb921ffa..820c5af44f3e 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -300,7 +300,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
300 * assigned pending owner [which might not have taken the 300 * assigned pending owner [which might not have taken the
301 * lock yet]: 301 * lock yet]:
302 */ 302 */
303static inline int try_to_steal_lock(struct rt_mutex *lock) 303static inline int try_to_steal_lock(struct rt_mutex *lock,
304 struct task_struct *task)
304{ 305{
305 struct task_struct *pendowner = rt_mutex_owner(lock); 306 struct task_struct *pendowner = rt_mutex_owner(lock);
306 struct rt_mutex_waiter *next; 307 struct rt_mutex_waiter *next;
@@ -309,11 +310,11 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
309 if (!rt_mutex_owner_pending(lock)) 310 if (!rt_mutex_owner_pending(lock))
310 return 0; 311 return 0;
311 312
312 if (pendowner == current) 313 if (pendowner == task)
313 return 1; 314 return 1;
314 315
315 spin_lock_irqsave(&pendowner->pi_lock, flags); 316 spin_lock_irqsave(&pendowner->pi_lock, flags);
316 if (current->prio >= pendowner->prio) { 317 if (task->prio >= pendowner->prio) {
317 spin_unlock_irqrestore(&pendowner->pi_lock, flags); 318 spin_unlock_irqrestore(&pendowner->pi_lock, flags);
318 return 0; 319 return 0;
319 } 320 }
@@ -338,21 +339,21 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
338 * We are going to steal the lock and a waiter was 339 * We are going to steal the lock and a waiter was
339 * enqueued on the pending owners pi_waiters queue. So 340 * enqueued on the pending owners pi_waiters queue. So
340 * we have to enqueue this waiter into 341 * we have to enqueue this waiter into
341 * current->pi_waiters list. This covers the case, 342 * task->pi_waiters list. This covers the case,
342 * where current is boosted because it holds another 343 * where task is boosted because it holds another
343 * lock and gets unboosted because the booster is 344 * lock and gets unboosted because the booster is
344 * interrupted, so we would delay a waiter with higher 345 * interrupted, so we would delay a waiter with higher
345 * priority as current->normal_prio. 346 * priority as task->normal_prio.
346 * 347 *
347 * Note: in the rare case of a SCHED_OTHER task changing 348 * Note: in the rare case of a SCHED_OTHER task changing
348 * its priority and thus stealing the lock, next->task 349 * its priority and thus stealing the lock, next->task
349 * might be current: 350 * might be task:
350 */ 351 */
351 if (likely(next->task != current)) { 352 if (likely(next->task != task)) {
352 spin_lock_irqsave(&current->pi_lock, flags); 353 spin_lock_irqsave(&task->pi_lock, flags);
353 plist_add(&next->pi_list_entry, &current->pi_waiters); 354 plist_add(&next->pi_list_entry, &task->pi_waiters);
354 __rt_mutex_adjust_prio(current); 355 __rt_mutex_adjust_prio(task);
355 spin_unlock_irqrestore(&current->pi_lock, flags); 356 spin_unlock_irqrestore(&task->pi_lock, flags);
356 } 357 }
357 return 1; 358 return 1;
358} 359}
@@ -389,7 +390,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
389 */ 390 */
390 mark_rt_mutex_waiters(lock); 391 mark_rt_mutex_waiters(lock);
391 392
392 if (rt_mutex_owner(lock) && !try_to_steal_lock(lock)) 393 if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current))
393 return 0; 394 return 0;
394 395
395 /* We got the lock. */ 396 /* We got the lock. */
@@ -411,6 +412,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
411 */ 412 */
412static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 413static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
413 struct rt_mutex_waiter *waiter, 414 struct rt_mutex_waiter *waiter,
415 struct task_struct *task,
414 int detect_deadlock) 416 int detect_deadlock)
415{ 417{
416 struct task_struct *owner = rt_mutex_owner(lock); 418 struct task_struct *owner = rt_mutex_owner(lock);
@@ -418,21 +420,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
418 unsigned long flags; 420 unsigned long flags;
419 int chain_walk = 0, res; 421 int chain_walk = 0, res;
420 422
421 spin_lock_irqsave(&current->pi_lock, flags); 423 spin_lock_irqsave(&task->pi_lock, flags);
422 __rt_mutex_adjust_prio(current); 424 __rt_mutex_adjust_prio(task);
423 waiter->task = current; 425 waiter->task = task;
424 waiter->lock = lock; 426 waiter->lock = lock;
425 plist_node_init(&waiter->list_entry, current->prio); 427 plist_node_init(&waiter->list_entry, task->prio);
426 plist_node_init(&waiter->pi_list_entry, current->prio); 428 plist_node_init(&waiter->pi_list_entry, task->prio);
427 429
428 /* Get the top priority waiter on the lock */ 430 /* Get the top priority waiter on the lock */
429 if (rt_mutex_has_waiters(lock)) 431 if (rt_mutex_has_waiters(lock))
430 top_waiter = rt_mutex_top_waiter(lock); 432 top_waiter = rt_mutex_top_waiter(lock);
431 plist_add(&waiter->list_entry, &lock->wait_list); 433 plist_add(&waiter->list_entry, &lock->wait_list);
432 434
433 current->pi_blocked_on = waiter; 435 task->pi_blocked_on = waiter;
434 436
435 spin_unlock_irqrestore(&current->pi_lock, flags); 437 spin_unlock_irqrestore(&task->pi_lock, flags);
436 438
437 if (waiter == rt_mutex_top_waiter(lock)) { 439 if (waiter == rt_mutex_top_waiter(lock)) {
438 spin_lock_irqsave(&owner->pi_lock, flags); 440 spin_lock_irqsave(&owner->pi_lock, flags);
@@ -460,7 +462,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
460 spin_unlock(&lock->wait_lock); 462 spin_unlock(&lock->wait_lock);
461 463
462 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 464 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
463 current); 465 task);
464 466
465 spin_lock(&lock->wait_lock); 467 spin_lock(&lock->wait_lock);
466 468
@@ -605,37 +607,25 @@ void rt_mutex_adjust_pi(struct task_struct *task)
605 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); 607 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
606} 608}
607 609
608/* 610/**
609 * Slow path lock function: 611 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
612 * @lock: the rt_mutex to take
613 * @state: the state the task should block in (TASK_INTERRUPTIBLE
614 * or TASK_UNINTERRUPTIBLE)
615 * @timeout: the pre-initialized and started timer, or NULL for none
616 * @waiter: the pre-initialized rt_mutex_waiter
617 * @detect_deadlock: passed to task_blocks_on_rt_mutex
618 *
619 * lock->wait_lock must be held by the caller.
610 */ 620 */
611static int __sched 621static int __sched
612rt_mutex_slowlock(struct rt_mutex *lock, int state, 622__rt_mutex_slowlock(struct rt_mutex *lock, int state,
613 struct hrtimer_sleeper *timeout, 623 struct hrtimer_sleeper *timeout,
614 int detect_deadlock) 624 struct rt_mutex_waiter *waiter,
625 int detect_deadlock)
615{ 626{
616 struct rt_mutex_waiter waiter;
617 int ret = 0; 627 int ret = 0;
618 628
619 debug_rt_mutex_init_waiter(&waiter);
620 waiter.task = NULL;
621
622 spin_lock(&lock->wait_lock);
623
624 /* Try to acquire the lock again: */
625 if (try_to_take_rt_mutex(lock)) {
626 spin_unlock(&lock->wait_lock);
627 return 0;
628 }
629
630 set_current_state(state);
631
632 /* Setup the timer, when timeout != NULL */
633 if (unlikely(timeout)) {
634 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
635 if (!hrtimer_active(&timeout->timer))
636 timeout->task = NULL;
637 }
638
639 for (;;) { 629 for (;;) {
640 /* Try to acquire the lock: */ 630 /* Try to acquire the lock: */
641 if (try_to_take_rt_mutex(lock)) 631 if (try_to_take_rt_mutex(lock))
@@ -656,19 +646,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
656 } 646 }
657 647
658 /* 648 /*
659 * waiter.task is NULL the first time we come here and 649 * waiter->task is NULL the first time we come here and
660 * when we have been woken up by the previous owner 650 * when we have been woken up by the previous owner
661 * but the lock got stolen by a higher prio task. 651 * but the lock got stolen by a higher prio task.
662 */ 652 */
663 if (!waiter.task) { 653 if (!waiter->task) {
664 ret = task_blocks_on_rt_mutex(lock, &waiter, 654 ret = task_blocks_on_rt_mutex(lock, waiter, current,
665 detect_deadlock); 655 detect_deadlock);
666 /* 656 /*
667 * If we got woken up by the owner then start loop 657 * If we got woken up by the owner then start loop
668 * all over without going into schedule to try 658 * all over without going into schedule to try
669 * to get the lock now: 659 * to get the lock now:
670 */ 660 */
671 if (unlikely(!waiter.task)) { 661 if (unlikely(!waiter->task)) {
672 /* 662 /*
673 * Reset the return value. We might 663 * Reset the return value. We might
674 * have returned with -EDEADLK and the 664 * have returned with -EDEADLK and the
@@ -684,15 +674,52 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
684 674
685 spin_unlock(&lock->wait_lock); 675 spin_unlock(&lock->wait_lock);
686 676
687 debug_rt_mutex_print_deadlock(&waiter); 677 debug_rt_mutex_print_deadlock(waiter);
688 678
689 if (waiter.task) 679 if (waiter->task)
690 schedule_rt_mutex(lock); 680 schedule_rt_mutex(lock);
691 681
692 spin_lock(&lock->wait_lock); 682 spin_lock(&lock->wait_lock);
693 set_current_state(state); 683 set_current_state(state);
694 } 684 }
695 685
686 return ret;
687}
688
689/*
690 * Slow path lock function:
691 */
692static int __sched
693rt_mutex_slowlock(struct rt_mutex *lock, int state,
694 struct hrtimer_sleeper *timeout,
695 int detect_deadlock)
696{
697 struct rt_mutex_waiter waiter;
698 int ret = 0;
699
700 debug_rt_mutex_init_waiter(&waiter);
701 waiter.task = NULL;
702
703 spin_lock(&lock->wait_lock);
704
705 /* Try to acquire the lock again: */
706 if (try_to_take_rt_mutex(lock)) {
707 spin_unlock(&lock->wait_lock);
708 return 0;
709 }
710
711 set_current_state(state);
712
713 /* Setup the timer, when timeout != NULL */
714 if (unlikely(timeout)) {
715 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
716 if (!hrtimer_active(&timeout->timer))
717 timeout->task = NULL;
718 }
719
720 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
721 detect_deadlock);
722
696 set_current_state(TASK_RUNNING); 723 set_current_state(TASK_RUNNING);
697 724
698 if (unlikely(waiter.task)) 725 if (unlikely(waiter.task))
@@ -864,9 +891,9 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
864EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); 891EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
865 892
866/** 893/**
867 * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible 894 * rt_mutex_timed_lock - lock a rt_mutex interruptible
868 * the timeout structure is provided 895 * the timeout structure is provided
869 * by the caller 896 * by the caller
870 * 897 *
871 * @lock: the rt_mutex to be locked 898 * @lock: the rt_mutex to be locked
872 * @timeout: timeout structure or NULL (no timeout) 899 * @timeout: timeout structure or NULL (no timeout)
@@ -913,7 +940,7 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
913} 940}
914EXPORT_SYMBOL_GPL(rt_mutex_unlock); 941EXPORT_SYMBOL_GPL(rt_mutex_unlock);
915 942
916/*** 943/**
917 * rt_mutex_destroy - mark a mutex unusable 944 * rt_mutex_destroy - mark a mutex unusable
918 * @lock: the mutex to be destroyed 945 * @lock: the mutex to be destroyed
919 * 946 *
@@ -986,6 +1013,59 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
986} 1013}
987 1014
988/** 1015/**
1016 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1017 * @lock: the rt_mutex to take
1018 * @waiter: the pre-initialized rt_mutex_waiter
1019 * @task: the task to prepare
1020 * @detect_deadlock: perform deadlock detection (1) or not (0)
1021 *
1022 * Returns:
1023 * 0 - task blocked on lock
1024 * 1 - acquired the lock for task, caller should wake it up
1025 * <0 - error
1026 *
1027 * Special API call for FUTEX_REQUEUE_PI support.
1028 */
1029int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1030 struct rt_mutex_waiter *waiter,
1031 struct task_struct *task, int detect_deadlock)
1032{
1033 int ret;
1034
1035 spin_lock(&lock->wait_lock);
1036
1037 mark_rt_mutex_waiters(lock);
1038
1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
1040 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock);
1042
1043 rt_mutex_set_owner(lock, task, 0);
1044
1045 rt_mutex_deadlock_account_lock(lock, task);
1046 return 1;
1047 }
1048
1049 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
1050
1051
1052 if (ret && !waiter->task) {
1053 /*
1054 * Reset the return value. We might have
1055 * returned with -EDEADLK and the owner
1056 * released the lock while we were walking the
1057 * pi chain. Let the waiter sort it out.
1058 */
1059 ret = 0;
1060 }
1061 spin_unlock(&lock->wait_lock);
1062
1063 debug_rt_mutex_print_deadlock(waiter);
1064
1065 return ret;
1066}
1067
1068/**
989 * rt_mutex_next_owner - return the next owner of the lock 1069 * rt_mutex_next_owner - return the next owner of the lock
990 * 1070 *
991 * @lock: the rt lock query 1071 * @lock: the rt lock query
@@ -1004,3 +1084,57 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1004 1084
1005 return rt_mutex_top_waiter(lock)->task; 1085 return rt_mutex_top_waiter(lock)->task;
1006} 1086}
1087
1088/**
1089 * rt_mutex_finish_proxy_lock() - Complete lock acquisition
1090 * @lock: the rt_mutex we were woken on
1091 * @to: the timeout, null if none. hrtimer should already have
1092 * been started.
1093 * @waiter: the pre-initialized rt_mutex_waiter
1094 * @detect_deadlock: perform deadlock detection (1) or not (0)
1095 *
1096 * Complete the lock acquisition started our behalf by another thread.
1097 *
1098 * Returns:
1099 * 0 - success
1100 * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
1101 *
1102 * Special API call for PI-futex requeue support
1103 */
1104int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1105 struct hrtimer_sleeper *to,
1106 struct rt_mutex_waiter *waiter,
1107 int detect_deadlock)
1108{
1109 int ret;
1110
1111 spin_lock(&lock->wait_lock);
1112
1113 set_current_state(TASK_INTERRUPTIBLE);
1114
1115 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
1116 detect_deadlock);
1117
1118 set_current_state(TASK_RUNNING);
1119
1120 if (unlikely(waiter->task))
1121 remove_waiter(lock, waiter);
1122
1123 /*
1124 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1125 * have to fix that up.
1126 */
1127 fixup_rt_mutex_waiters(lock);
1128
1129 spin_unlock(&lock->wait_lock);
1130
1131 /*
1132 * Readjust priority, when we did not get the lock. We might have been
1133 * the pending owner and boosted. Since we did not take the lock, the
1134 * PI boost has to go.
1135 */
1136 if (unlikely(ret))
1137 rt_mutex_adjust_prio(current);
1138
1139 return ret;
1140}
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index e124bf5800ea..97a2f81866af 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -120,6 +120,14 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
120 struct task_struct *proxy_owner); 120 struct task_struct *proxy_owner);
121extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, 121extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
122 struct task_struct *proxy_owner); 122 struct task_struct *proxy_owner);
123extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
124 struct rt_mutex_waiter *waiter,
125 struct task_struct *task,
126 int detect_deadlock);
127extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
128 struct hrtimer_sleeper *to,
129 struct rt_mutex_waiter *waiter,
130 int detect_deadlock);
123 131
124#ifdef CONFIG_DEBUG_RT_MUTEXES 132#ifdef CONFIG_DEBUG_RT_MUTEXES
125# include "rtmutex-debug.h" 133# include "rtmutex-debug.h"
diff --git a/kernel/sched.c b/kernel/sched.c
index 26efa475bdc1..14c447ae5d53 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -72,13 +72,15 @@
72#include <linux/debugfs.h> 72#include <linux/debugfs.h>
73#include <linux/ctype.h> 73#include <linux/ctype.h>
74#include <linux/ftrace.h> 74#include <linux/ftrace.h>
75#include <trace/sched.h>
76 75
77#include <asm/tlb.h> 76#include <asm/tlb.h>
78#include <asm/irq_regs.h> 77#include <asm/irq_regs.h>
79 78
80#include "sched_cpupri.h" 79#include "sched_cpupri.h"
81 80
81#define CREATE_TRACE_POINTS
82#include <trace/events/sched.h>
83
82/* 84/*
83 * Convert user-nice values [ -20 ... 0 ... 19 ] 85 * Convert user-nice values [ -20 ... 0 ... 19 ]
84 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], 86 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -118,12 +120,6 @@
118 */ 120 */
119#define RUNTIME_INF ((u64)~0ULL) 121#define RUNTIME_INF ((u64)~0ULL)
120 122
121DEFINE_TRACE(sched_wait_task);
122DEFINE_TRACE(sched_wakeup);
123DEFINE_TRACE(sched_wakeup_new);
124DEFINE_TRACE(sched_switch);
125DEFINE_TRACE(sched_migrate_task);
126
127#ifdef CONFIG_SMP 123#ifdef CONFIG_SMP
128 124
129static void double_rq_lock(struct rq *rq1, struct rq *rq2); 125static void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -630,6 +626,10 @@ struct rq {
630 struct list_head migration_queue; 626 struct list_head migration_queue;
631#endif 627#endif
632 628
629 /* calc_load related fields */
630 unsigned long calc_load_update;
631 long calc_load_active;
632
633#ifdef CONFIG_SCHED_HRTICK 633#ifdef CONFIG_SCHED_HRTICK
634#ifdef CONFIG_SMP 634#ifdef CONFIG_SMP
635 int hrtick_csd_pending; 635 int hrtick_csd_pending;
@@ -1728,6 +1728,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
1728} 1728}
1729#endif 1729#endif
1730 1730
1731static void calc_load_account_active(struct rq *this_rq);
1732
1731#include "sched_stats.h" 1733#include "sched_stats.h"
1732#include "sched_idletask.c" 1734#include "sched_idletask.c"
1733#include "sched_fair.c" 1735#include "sched_fair.c"
@@ -1958,7 +1960,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1958 1960
1959 clock_offset = old_rq->clock - new_rq->clock; 1961 clock_offset = old_rq->clock - new_rq->clock;
1960 1962
1961 trace_sched_migrate_task(p, task_cpu(p), new_cpu); 1963 trace_sched_migrate_task(p, new_cpu);
1962 1964
1963#ifdef CONFIG_SCHEDSTATS 1965#ifdef CONFIG_SCHEDSTATS
1964 if (p->se.wait_start) 1966 if (p->se.wait_start)
@@ -2015,6 +2017,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
2015} 2017}
2016 2018
2017/* 2019/*
2020 * wait_task_context_switch - wait for a thread to complete at least one
2021 * context switch.
2022 *
2023 * @p must not be current.
2024 */
2025void wait_task_context_switch(struct task_struct *p)
2026{
2027 unsigned long nvcsw, nivcsw, flags;
2028 int running;
2029 struct rq *rq;
2030
2031 nvcsw = p->nvcsw;
2032 nivcsw = p->nivcsw;
2033 for (;;) {
2034 /*
2035 * The runqueue is assigned before the actual context
2036 * switch. We need to take the runqueue lock.
2037 *
2038 * We could check initially without the lock but it is
2039 * very likely that we need to take the lock in every
2040 * iteration.
2041 */
2042 rq = task_rq_lock(p, &flags);
2043 running = task_running(rq, p);
2044 task_rq_unlock(rq, &flags);
2045
2046 if (likely(!running))
2047 break;
2048 /*
2049 * The switch count is incremented before the actual
2050 * context switch. We thus wait for two switches to be
2051 * sure at least one completed.
2052 */
2053 if ((p->nvcsw - nvcsw) > 1)
2054 break;
2055 if ((p->nivcsw - nivcsw) > 1)
2056 break;
2057
2058 cpu_relax();
2059 }
2060}
2061
2062/*
2018 * wait_task_inactive - wait for a thread to unschedule. 2063 * wait_task_inactive - wait for a thread to unschedule.
2019 * 2064 *
2020 * If @match_state is nonzero, it's the @p->state value just checked and 2065 * If @match_state is nonzero, it's the @p->state value just checked and
@@ -2458,6 +2503,17 @@ out:
2458 return success; 2503 return success;
2459} 2504}
2460 2505
2506/**
2507 * wake_up_process - Wake up a specific process
2508 * @p: The process to be woken up.
2509 *
2510 * Attempt to wake up the nominated process and move it to the set of runnable
2511 * processes. Returns 1 if the process was woken up, 0 if it was already
2512 * running.
2513 *
2514 * It may be assumed that this function implies a write memory barrier before
2515 * changing the task state if and only if any tasks are woken up.
2516 */
2461int wake_up_process(struct task_struct *p) 2517int wake_up_process(struct task_struct *p)
2462{ 2518{
2463 return try_to_wake_up(p, TASK_ALL, 0); 2519 return try_to_wake_up(p, TASK_ALL, 0);
@@ -2766,7 +2822,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2766 * combine the page table reload and the switch backend into 2822 * combine the page table reload and the switch backend into
2767 * one hypercall. 2823 * one hypercall.
2768 */ 2824 */
2769 arch_enter_lazy_cpu_mode(); 2825 arch_start_context_switch(prev);
2770 2826
2771 if (unlikely(!mm)) { 2827 if (unlikely(!mm)) {
2772 next->active_mm = oldmm; 2828 next->active_mm = oldmm;
@@ -2856,19 +2912,72 @@ unsigned long nr_iowait(void)
2856 return sum; 2912 return sum;
2857} 2913}
2858 2914
2859unsigned long nr_active(void) 2915/* Variables and functions for calc_load */
2916static atomic_long_t calc_load_tasks;
2917static unsigned long calc_load_update;
2918unsigned long avenrun[3];
2919EXPORT_SYMBOL(avenrun);
2920
2921/**
2922 * get_avenrun - get the load average array
2923 * @loads: pointer to dest load array
2924 * @offset: offset to add
2925 * @shift: shift count to shift the result left
2926 *
2927 * These values are estimates at best, so no need for locking.
2928 */
2929void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
2930{
2931 loads[0] = (avenrun[0] + offset) << shift;
2932 loads[1] = (avenrun[1] + offset) << shift;
2933 loads[2] = (avenrun[2] + offset) << shift;
2934}
2935
2936static unsigned long
2937calc_load(unsigned long load, unsigned long exp, unsigned long active)
2860{ 2938{
2861 unsigned long i, running = 0, uninterruptible = 0; 2939 load *= exp;
2940 load += active * (FIXED_1 - exp);
2941 return load >> FSHIFT;
2942}
2862 2943
2863 for_each_online_cpu(i) { 2944/*
2864 running += cpu_rq(i)->nr_running; 2945 * calc_load - update the avenrun load estimates 10 ticks after the
2865 uninterruptible += cpu_rq(i)->nr_uninterruptible; 2946 * CPUs have updated calc_load_tasks.
2866 } 2947 */
2948void calc_global_load(void)
2949{
2950 unsigned long upd = calc_load_update + 10;
2951 long active;
2867 2952
2868 if (unlikely((long)uninterruptible < 0)) 2953 if (time_before(jiffies, upd))
2869 uninterruptible = 0; 2954 return;
2870 2955
2871 return running + uninterruptible; 2956 active = atomic_long_read(&calc_load_tasks);
2957 active = active > 0 ? active * FIXED_1 : 0;
2958
2959 avenrun[0] = calc_load(avenrun[0], EXP_1, active);
2960 avenrun[1] = calc_load(avenrun[1], EXP_5, active);
2961 avenrun[2] = calc_load(avenrun[2], EXP_15, active);
2962
2963 calc_load_update += LOAD_FREQ;
2964}
2965
2966/*
2967 * Either called from update_cpu_load() or from a cpu going idle
2968 */
2969static void calc_load_account_active(struct rq *this_rq)
2970{
2971 long nr_active, delta;
2972
2973 nr_active = this_rq->nr_running;
2974 nr_active += (long) this_rq->nr_uninterruptible;
2975
2976 if (nr_active != this_rq->calc_load_active) {
2977 delta = nr_active - this_rq->calc_load_active;
2978 this_rq->calc_load_active = nr_active;
2979 atomic_long_add(delta, &calc_load_tasks);
2980 }
2872} 2981}
2873 2982
2874/* 2983/*
@@ -2899,6 +3008,11 @@ static void update_cpu_load(struct rq *this_rq)
2899 new_load += scale-1; 3008 new_load += scale-1;
2900 this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; 3009 this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
2901 } 3010 }
3011
3012 if (time_after_eq(jiffies, this_rq->calc_load_update)) {
3013 this_rq->calc_load_update += LOAD_FREQ;
3014 calc_load_account_active(this_rq);
3015 }
2902} 3016}
2903 3017
2904#ifdef CONFIG_SMP 3018#ifdef CONFIG_SMP
@@ -4240,10 +4354,126 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
4240static struct { 4354static struct {
4241 atomic_t load_balancer; 4355 atomic_t load_balancer;
4242 cpumask_var_t cpu_mask; 4356 cpumask_var_t cpu_mask;
4357 cpumask_var_t ilb_grp_nohz_mask;
4243} nohz ____cacheline_aligned = { 4358} nohz ____cacheline_aligned = {
4244 .load_balancer = ATOMIC_INIT(-1), 4359 .load_balancer = ATOMIC_INIT(-1),
4245}; 4360};
4246 4361
4362#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
4363/**
4364 * lowest_flag_domain - Return lowest sched_domain containing flag.
4365 * @cpu: The cpu whose lowest level of sched domain is to
4366 * be returned.
4367 * @flag: The flag to check for the lowest sched_domain
4368 * for the given cpu.
4369 *
4370 * Returns the lowest sched_domain of a cpu which contains the given flag.
4371 */
4372static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
4373{
4374 struct sched_domain *sd;
4375
4376 for_each_domain(cpu, sd)
4377 if (sd && (sd->flags & flag))
4378 break;
4379
4380 return sd;
4381}
4382
4383/**
4384 * for_each_flag_domain - Iterates over sched_domains containing the flag.
4385 * @cpu: The cpu whose domains we're iterating over.
4386 * @sd: variable holding the value of the power_savings_sd
4387 * for cpu.
4388 * @flag: The flag to filter the sched_domains to be iterated.
4389 *
4390 * Iterates over all the scheduler domains for a given cpu that has the 'flag'
4391 * set, starting from the lowest sched_domain to the highest.
4392 */
4393#define for_each_flag_domain(cpu, sd, flag) \
4394 for (sd = lowest_flag_domain(cpu, flag); \
4395 (sd && (sd->flags & flag)); sd = sd->parent)
4396
4397/**
4398 * is_semi_idle_group - Checks if the given sched_group is semi-idle.
4399 * @ilb_group: group to be checked for semi-idleness
4400 *
4401 * Returns: 1 if the group is semi-idle. 0 otherwise.
4402 *
4403 * We define a sched_group to be semi idle if it has atleast one idle-CPU
4404 * and atleast one non-idle CPU. This helper function checks if the given
4405 * sched_group is semi-idle or not.
4406 */
4407static inline int is_semi_idle_group(struct sched_group *ilb_group)
4408{
4409 cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
4410 sched_group_cpus(ilb_group));
4411
4412 /*
4413 * A sched_group is semi-idle when it has atleast one busy cpu
4414 * and atleast one idle cpu.
4415 */
4416 if (cpumask_empty(nohz.ilb_grp_nohz_mask))
4417 return 0;
4418
4419 if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
4420 return 0;
4421
4422 return 1;
4423}
4424/**
4425 * find_new_ilb - Finds the optimum idle load balancer for nomination.
4426 * @cpu: The cpu which is nominating a new idle_load_balancer.
4427 *
4428 * Returns: Returns the id of the idle load balancer if it exists,
4429 * Else, returns >= nr_cpu_ids.
4430 *
4431 * This algorithm picks the idle load balancer such that it belongs to a
4432 * semi-idle powersavings sched_domain. The idea is to try and avoid
4433 * completely idle packages/cores just for the purpose of idle load balancing
4434 * when there are other idle cpu's which are better suited for that job.
4435 */
4436static int find_new_ilb(int cpu)
4437{
4438 struct sched_domain *sd;
4439 struct sched_group *ilb_group;
4440
4441 /*
4442 * Have idle load balancer selection from semi-idle packages only
4443 * when power-aware load balancing is enabled
4444 */
4445 if (!(sched_smt_power_savings || sched_mc_power_savings))
4446 goto out_done;
4447
4448 /*
4449 * Optimize for the case when we have no idle CPUs or only one
4450 * idle CPU. Don't walk the sched_domain hierarchy in such cases
4451 */
4452 if (cpumask_weight(nohz.cpu_mask) < 2)
4453 goto out_done;
4454
4455 for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
4456 ilb_group = sd->groups;
4457
4458 do {
4459 if (is_semi_idle_group(ilb_group))
4460 return cpumask_first(nohz.ilb_grp_nohz_mask);
4461
4462 ilb_group = ilb_group->next;
4463
4464 } while (ilb_group != sd->groups);
4465 }
4466
4467out_done:
4468 return cpumask_first(nohz.cpu_mask);
4469}
4470#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
4471static inline int find_new_ilb(int call_cpu)
4472{
4473 return cpumask_first(nohz.cpu_mask);
4474}
4475#endif
4476
4247/* 4477/*
4248 * This routine will try to nominate the ilb (idle load balancing) 4478 * This routine will try to nominate the ilb (idle load balancing)
4249 * owner among the cpus whose ticks are stopped. ilb owner will do the idle 4479 * owner among the cpus whose ticks are stopped. ilb owner will do the idle
@@ -4298,8 +4528,24 @@ int select_nohz_load_balancer(int stop_tick)
4298 /* make me the ilb owner */ 4528 /* make me the ilb owner */
4299 if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) 4529 if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1)
4300 return 1; 4530 return 1;
4301 } else if (atomic_read(&nohz.load_balancer) == cpu) 4531 } else if (atomic_read(&nohz.load_balancer) == cpu) {
4532 int new_ilb;
4533
4534 if (!(sched_smt_power_savings ||
4535 sched_mc_power_savings))
4536 return 1;
4537 /*
4538 * Check to see if there is a more power-efficient
4539 * ilb.
4540 */
4541 new_ilb = find_new_ilb(cpu);
4542 if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
4543 atomic_set(&nohz.load_balancer, -1);
4544 resched_cpu(new_ilb);
4545 return 0;
4546 }
4302 return 1; 4547 return 1;
4548 }
4303 } else { 4549 } else {
4304 if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) 4550 if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
4305 return 0; 4551 return 0;
@@ -4468,15 +4714,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4468 } 4714 }
4469 4715
4470 if (atomic_read(&nohz.load_balancer) == -1) { 4716 if (atomic_read(&nohz.load_balancer) == -1) {
4471 /* 4717 int ilb = find_new_ilb(cpu);
4472 * simple selection for now: Nominate the
4473 * first cpu in the nohz list to be the next
4474 * ilb owner.
4475 *
4476 * TBD: Traverse the sched domains and nominate
4477 * the nearest cpu in the nohz.cpu_mask.
4478 */
4479 int ilb = cpumask_first(nohz.cpu_mask);
4480 4718
4481 if (ilb < nr_cpu_ids) 4719 if (ilb < nr_cpu_ids)
4482 resched_cpu(ilb); 4720 resched_cpu(ilb);
@@ -5007,13 +5245,15 @@ pick_next_task(struct rq *rq)
5007/* 5245/*
5008 * schedule() is the main scheduler function. 5246 * schedule() is the main scheduler function.
5009 */ 5247 */
5010asmlinkage void __sched __schedule(void) 5248asmlinkage void __sched schedule(void)
5011{ 5249{
5012 struct task_struct *prev, *next; 5250 struct task_struct *prev, *next;
5013 unsigned long *switch_count; 5251 unsigned long *switch_count;
5014 struct rq *rq; 5252 struct rq *rq;
5015 int cpu; 5253 int cpu;
5016 5254
5255need_resched:
5256 preempt_disable();
5017 cpu = smp_processor_id(); 5257 cpu = smp_processor_id();
5018 rq = cpu_rq(cpu); 5258 rq = cpu_rq(cpu);
5019 rcu_qsctr_inc(cpu); 5259 rcu_qsctr_inc(cpu);
@@ -5070,15 +5310,9 @@ need_resched_nonpreemptible:
5070 5310
5071 if (unlikely(reacquire_kernel_lock(current) < 0)) 5311 if (unlikely(reacquire_kernel_lock(current) < 0))
5072 goto need_resched_nonpreemptible; 5312 goto need_resched_nonpreemptible;
5073}
5074 5313
5075asmlinkage void __sched schedule(void)
5076{
5077need_resched:
5078 preempt_disable();
5079 __schedule();
5080 preempt_enable_no_resched(); 5314 preempt_enable_no_resched();
5081 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 5315 if (need_resched())
5082 goto need_resched; 5316 goto need_resched;
5083} 5317}
5084EXPORT_SYMBOL(schedule); 5318EXPORT_SYMBOL(schedule);
@@ -5221,7 +5455,7 @@ EXPORT_SYMBOL(default_wake_function);
5221 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 5455 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
5222 * zero in this (rare) case, and we handle it by continuing to scan the queue. 5456 * zero in this (rare) case, and we handle it by continuing to scan the queue.
5223 */ 5457 */
5224void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 5458static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
5225 int nr_exclusive, int sync, void *key) 5459 int nr_exclusive, int sync, void *key)
5226{ 5460{
5227 wait_queue_t *curr, *next; 5461 wait_queue_t *curr, *next;
@@ -5241,6 +5475,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
5241 * @mode: which threads 5475 * @mode: which threads
5242 * @nr_exclusive: how many wake-one or wake-many threads to wake up 5476 * @nr_exclusive: how many wake-one or wake-many threads to wake up
5243 * @key: is directly passed to the wakeup function 5477 * @key: is directly passed to the wakeup function
5478 *
5479 * It may be assumed that this function implies a write memory barrier before
5480 * changing the task state if and only if any tasks are woken up.
5244 */ 5481 */
5245void __wake_up(wait_queue_head_t *q, unsigned int mode, 5482void __wake_up(wait_queue_head_t *q, unsigned int mode,
5246 int nr_exclusive, void *key) 5483 int nr_exclusive, void *key)
@@ -5279,6 +5516,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
5279 * with each other. This can prevent needless bouncing between CPUs. 5516 * with each other. This can prevent needless bouncing between CPUs.
5280 * 5517 *
5281 * On UP it can prevent extra preemption. 5518 * On UP it can prevent extra preemption.
5519 *
5520 * It may be assumed that this function implies a write memory barrier before
5521 * changing the task state if and only if any tasks are woken up.
5282 */ 5522 */
5283void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, 5523void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
5284 int nr_exclusive, void *key) 5524 int nr_exclusive, void *key)
@@ -5315,6 +5555,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
5315 * awakened in the same order in which they were queued. 5555 * awakened in the same order in which they were queued.
5316 * 5556 *
5317 * See also complete_all(), wait_for_completion() and related routines. 5557 * See also complete_all(), wait_for_completion() and related routines.
5558 *
5559 * It may be assumed that this function implies a write memory barrier before
5560 * changing the task state if and only if any tasks are woken up.
5318 */ 5561 */
5319void complete(struct completion *x) 5562void complete(struct completion *x)
5320{ 5563{
@@ -5332,6 +5575,9 @@ EXPORT_SYMBOL(complete);
5332 * @x: holds the state of this particular completion 5575 * @x: holds the state of this particular completion
5333 * 5576 *
5334 * This will wake up all threads waiting on this particular completion event. 5577 * This will wake up all threads waiting on this particular completion event.
5578 *
5579 * It may be assumed that this function implies a write memory barrier before
5580 * changing the task state if and only if any tasks are woken up.
5335 */ 5581 */
5336void complete_all(struct completion *x) 5582void complete_all(struct completion *x)
5337{ 5583{
@@ -6490,8 +6736,9 @@ void sched_show_task(struct task_struct *p)
6490#ifdef CONFIG_DEBUG_STACK_USAGE 6736#ifdef CONFIG_DEBUG_STACK_USAGE
6491 free = stack_not_used(p); 6737 free = stack_not_used(p);
6492#endif 6738#endif
6493 printk(KERN_CONT "%5lu %5d %6d\n", free, 6739 printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
6494 task_pid_nr(p), task_pid_nr(p->real_parent)); 6740 task_pid_nr(p), task_pid_nr(p->real_parent),
6741 (unsigned long)task_thread_info(p)->flags);
6495 6742
6496 show_stack(p, NULL); 6743 show_stack(p, NULL);
6497} 6744}
@@ -6970,6 +7217,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
6970 7217
6971 } 7218 }
6972} 7219}
7220
7221/*
7222 * remove the tasks which were accounted by rq from calc_load_tasks.
7223 */
7224static void calc_global_load_remove(struct rq *rq)
7225{
7226 atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
7227}
6973#endif /* CONFIG_HOTPLUG_CPU */ 7228#endif /* CONFIG_HOTPLUG_CPU */
6974 7229
6975#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) 7230#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -7204,6 +7459,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7204 /* Update our root-domain */ 7459 /* Update our root-domain */
7205 rq = cpu_rq(cpu); 7460 rq = cpu_rq(cpu);
7206 spin_lock_irqsave(&rq->lock, flags); 7461 spin_lock_irqsave(&rq->lock, flags);
7462 rq->calc_load_update = calc_load_update;
7463 rq->calc_load_active = 0;
7207 if (rq->rd) { 7464 if (rq->rd) {
7208 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 7465 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7209 7466
@@ -7243,7 +7500,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7243 cpuset_unlock(); 7500 cpuset_unlock();
7244 migrate_nr_uninterruptible(rq); 7501 migrate_nr_uninterruptible(rq);
7245 BUG_ON(rq->nr_running != 0); 7502 BUG_ON(rq->nr_running != 0);
7246 7503 calc_global_load_remove(rq);
7247 /* 7504 /*
7248 * No need to migrate the tasks: it was best-effort if 7505 * No need to migrate the tasks: it was best-effort if
7249 * they didn't take sched_hotcpu_mutex. Just wake up 7506 * they didn't take sched_hotcpu_mutex. Just wake up
@@ -7753,8 +8010,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
7753 8010
7754/* 8011/*
7755 * The cpus mask in sched_group and sched_domain hangs off the end. 8012 * The cpus mask in sched_group and sched_domain hangs off the end.
7756 * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space 8013 *
7757 * for nr_cpu_ids < CONFIG_NR_CPUS. 8014 * ( See the the comments in include/linux/sched.h:struct sched_group
8015 * and struct sched_domain. )
7758 */ 8016 */
7759struct static_sched_group { 8017struct static_sched_group {
7760 struct sched_group sg; 8018 struct sched_group sg;
@@ -7875,7 +8133,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
7875 struct sched_domain *sd; 8133 struct sched_domain *sd;
7876 8134
7877 sd = &per_cpu(phys_domains, j).sd; 8135 sd = &per_cpu(phys_domains, j).sd;
7878 if (j != cpumask_first(sched_group_cpus(sd->groups))) { 8136 if (j != group_first_cpu(sd->groups)) {
7879 /* 8137 /*
7880 * Only add "power" once for each 8138 * Only add "power" once for each
7881 * physical package. 8139 * physical package.
@@ -7953,7 +8211,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
7953 8211
7954 WARN_ON(!sd || !sd->groups); 8212 WARN_ON(!sd || !sd->groups);
7955 8213
7956 if (cpu != cpumask_first(sched_group_cpus(sd->groups))) 8214 if (cpu != group_first_cpu(sd->groups))
7957 return; 8215 return;
7958 8216
7959 child = sd->child; 8217 child = sd->child;
@@ -8938,6 +9196,8 @@ void __init sched_init(void)
8938 rq = cpu_rq(i); 9196 rq = cpu_rq(i);
8939 spin_lock_init(&rq->lock); 9197 spin_lock_init(&rq->lock);
8940 rq->nr_running = 0; 9198 rq->nr_running = 0;
9199 rq->calc_load_active = 0;
9200 rq->calc_load_update = jiffies + LOAD_FREQ;
8941 init_cfs_rq(&rq->cfs, rq); 9201 init_cfs_rq(&rq->cfs, rq);
8942 init_rt_rq(&rq->rt, rq); 9202 init_rt_rq(&rq->rt, rq);
8943#ifdef CONFIG_FAIR_GROUP_SCHED 9203#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -9045,6 +9305,9 @@ void __init sched_init(void)
9045 * when this runqueue becomes "idle". 9305 * when this runqueue becomes "idle".
9046 */ 9306 */
9047 init_idle(current, smp_processor_id()); 9307 init_idle(current, smp_processor_id());
9308
9309 calc_load_update = jiffies + LOAD_FREQ;
9310
9048 /* 9311 /*
9049 * During early bootup we pretend to be a normal task: 9312 * During early bootup we pretend to be a normal task:
9050 */ 9313 */
@@ -9055,6 +9318,7 @@ void __init sched_init(void)
9055#ifdef CONFIG_SMP 9318#ifdef CONFIG_SMP
9056#ifdef CONFIG_NO_HZ 9319#ifdef CONFIG_NO_HZ
9057 alloc_bootmem_cpumask_var(&nohz.cpu_mask); 9320 alloc_bootmem_cpumask_var(&nohz.cpu_mask);
9321 alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask);
9058#endif 9322#endif
9059 alloc_bootmem_cpumask_var(&cpu_isolated_map); 9323 alloc_bootmem_cpumask_var(&cpu_isolated_map);
9060#endif /* SMP */ 9324#endif /* SMP */
@@ -9800,6 +10064,13 @@ static int sched_rt_global_constraints(void)
9800 if (sysctl_sched_rt_period <= 0) 10064 if (sysctl_sched_rt_period <= 0)
9801 return -EINVAL; 10065 return -EINVAL;
9802 10066
10067 /*
10068 * There's always some RT tasks in the root group
10069 * -- migration, kstopmachine etc..
10070 */
10071 if (sysctl_sched_rt_runtime == 0)
10072 return -EBUSY;
10073
9803 spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 10074 spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
9804 for_each_possible_cpu(i) { 10075 for_each_possible_cpu(i) {
9805 struct rt_rq *rt_rq = &cpu_rq(i)->rt; 10076 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index cdd3c89574cd..344712a5e3ed 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -165,7 +165,7 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
165 vec->count = 0; 165 vec->count = 0;
166 if (bootmem) 166 if (bootmem)
167 alloc_bootmem_cpumask_var(&vec->mask); 167 alloc_bootmem_cpumask_var(&vec->mask);
168 else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL)) 168 else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
169 goto cleanup; 169 goto cleanup;
170 } 170 }
171 171
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3816f217f119..5f9650e8fe75 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1487,17 +1487,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1487 1487
1488 find_matching_se(&se, &pse); 1488 find_matching_se(&se, &pse);
1489 1489
1490 while (se) { 1490 BUG_ON(!pse);
1491 BUG_ON(!pse);
1492 1491
1493 if (wakeup_preempt_entity(se, pse) == 1) { 1492 if (wakeup_preempt_entity(se, pse) == 1)
1494 resched_task(curr); 1493 resched_task(curr);
1495 break;
1496 }
1497
1498 se = parent_entity(se);
1499 pse = parent_entity(pse);
1500 }
1501} 1494}
1502 1495
1503static struct task_struct *pick_next_task_fair(struct rq *rq) 1496static struct task_struct *pick_next_task_fair(struct rq *rq)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 8a21a2e28c13..499672c10cbd 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -22,7 +22,8 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sy
22static struct task_struct *pick_next_task_idle(struct rq *rq) 22static struct task_struct *pick_next_task_idle(struct rq *rq)
23{ 23{
24 schedstat_inc(rq, sched_goidle); 24 schedstat_inc(rq, sched_goidle);
25 25 /* adjust the active tasks as we might go into a long sleep */
26 calc_load_account_active(rq);
26 return rq->idle; 27 return rq->idle;
27} 28}
28 29
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index f2c66f8f9712..9bf0d2a73045 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1591,7 +1591,7 @@ static inline void init_sched_rt_class(void)
1591 unsigned int i; 1591 unsigned int i;
1592 1592
1593 for_each_possible_cpu(i) 1593 for_each_possible_cpu(i)
1594 alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), 1594 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
1595 GFP_KERNEL, cpu_to_node(i)); 1595 GFP_KERNEL, cpu_to_node(i));
1596} 1596}
1597#endif /* CONFIG_SMP */ 1597#endif /* CONFIG_SMP */
diff --git a/kernel/signal.c b/kernel/signal.c
index d8034737db4c..dba6ae99978a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,7 +27,7 @@
27#include <linux/freezer.h> 27#include <linux/freezer.h>
28#include <linux/pid_namespace.h> 28#include <linux/pid_namespace.h>
29#include <linux/nsproxy.h> 29#include <linux/nsproxy.h>
30#include <trace/sched.h> 30#include <trace/events/sched.h>
31 31
32#include <asm/param.h> 32#include <asm/param.h>
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
@@ -41,8 +41,6 @@
41 41
42static struct kmem_cache *sigqueue_cachep; 42static struct kmem_cache *sigqueue_cachep;
43 43
44DEFINE_TRACE(sched_signal_send);
45
46static void __user *sig_handler(struct task_struct *t, int sig) 44static void __user *sig_handler(struct task_struct *t, int sig)
47{ 45{
48 return t->sighand->action[sig - 1].sa.sa_handler; 46 return t->sighand->action[sig - 1].sa.sa_handler;
@@ -2278,24 +2276,17 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
2278 return kill_something_info(sig, &info, pid); 2276 return kill_something_info(sig, &info, pid);
2279} 2277}
2280 2278
2281static int do_tkill(pid_t tgid, pid_t pid, int sig) 2279static int
2280do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
2282{ 2281{
2283 int error;
2284 struct siginfo info;
2285 struct task_struct *p; 2282 struct task_struct *p;
2286 unsigned long flags; 2283 unsigned long flags;
2287 2284 int error = -ESRCH;
2288 error = -ESRCH;
2289 info.si_signo = sig;
2290 info.si_errno = 0;
2291 info.si_code = SI_TKILL;
2292 info.si_pid = task_tgid_vnr(current);
2293 info.si_uid = current_uid();
2294 2285
2295 rcu_read_lock(); 2286 rcu_read_lock();
2296 p = find_task_by_vpid(pid); 2287 p = find_task_by_vpid(pid);
2297 if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { 2288 if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
2298 error = check_kill_permission(sig, &info, p); 2289 error = check_kill_permission(sig, info, p);
2299 /* 2290 /*
2300 * The null signal is a permissions and process existence 2291 * The null signal is a permissions and process existence
2301 * probe. No signal is actually delivered. 2292 * probe. No signal is actually delivered.
@@ -2305,7 +2296,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
2305 * signal is private anyway. 2296 * signal is private anyway.
2306 */ 2297 */
2307 if (!error && sig && lock_task_sighand(p, &flags)) { 2298 if (!error && sig && lock_task_sighand(p, &flags)) {
2308 error = specific_send_sig_info(sig, &info, p); 2299 error = specific_send_sig_info(sig, info, p);
2309 unlock_task_sighand(p, &flags); 2300 unlock_task_sighand(p, &flags);
2310 } 2301 }
2311 } 2302 }
@@ -2314,6 +2305,19 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
2314 return error; 2305 return error;
2315} 2306}
2316 2307
2308static int do_tkill(pid_t tgid, pid_t pid, int sig)
2309{
2310 struct siginfo info;
2311
2312 info.si_signo = sig;
2313 info.si_errno = 0;
2314 info.si_code = SI_TKILL;
2315 info.si_pid = task_tgid_vnr(current);
2316 info.si_uid = current_uid();
2317
2318 return do_send_specific(tgid, pid, sig, &info);
2319}
2320
2317/** 2321/**
2318 * sys_tgkill - send signal to one specific thread 2322 * sys_tgkill - send signal to one specific thread
2319 * @tgid: the thread group ID of the thread 2323 * @tgid: the thread group ID of the thread
@@ -2363,6 +2367,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
2363 return kill_proc_info(sig, &info, pid); 2367 return kill_proc_info(sig, &info, pid);
2364} 2368}
2365 2369
2370long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
2371{
2372 /* This is only valid for single tasks */
2373 if (pid <= 0 || tgid <= 0)
2374 return -EINVAL;
2375
2376 /* Not even root can pretend to send signals from the kernel.
2377 Nor can they impersonate a kill(), which adds source info. */
2378 if (info->si_code >= 0)
2379 return -EPERM;
2380 info->si_signo = sig;
2381
2382 return do_send_specific(tgid, pid, sig, info);
2383}
2384
2385SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
2386 siginfo_t __user *, uinfo)
2387{
2388 siginfo_t info;
2389
2390 if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
2391 return -EFAULT;
2392
2393 return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
2394}
2395
2366int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) 2396int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2367{ 2397{
2368 struct task_struct *t = current; 2398 struct task_struct *t = current;
diff --git a/kernel/smp.c b/kernel/smp.c
index 858baac568ee..ad63d8501207 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -52,7 +52,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
52 switch (action) { 52 switch (action) {
53 case CPU_UP_PREPARE: 53 case CPU_UP_PREPARE:
54 case CPU_UP_PREPARE_FROZEN: 54 case CPU_UP_PREPARE_FROZEN:
55 if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, 55 if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
56 cpu_to_node(cpu))) 56 cpu_to_node(cpu)))
57 return NOTIFY_BAD; 57 return NOTIFY_BAD;
58 break; 58 break;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b525dd348511..258885a543db 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -24,7 +24,9 @@
24#include <linux/ftrace.h> 24#include <linux/ftrace.h>
25#include <linux/smp.h> 25#include <linux/smp.h>
26#include <linux/tick.h> 26#include <linux/tick.h>
27#include <trace/irq.h> 27
28#define CREATE_TRACE_POINTS
29#include <trace/events/irq.h>
28 30
29#include <asm/irq.h> 31#include <asm/irq.h>
30/* 32/*
@@ -186,9 +188,6 @@ EXPORT_SYMBOL(local_bh_enable_ip);
186 */ 188 */
187#define MAX_SOFTIRQ_RESTART 10 189#define MAX_SOFTIRQ_RESTART 10
188 190
189DEFINE_TRACE(softirq_entry);
190DEFINE_TRACE(softirq_exit);
191
192asmlinkage void __do_softirq(void) 191asmlinkage void __do_softirq(void)
193{ 192{
194 struct softirq_action *h; 193 struct softirq_action *h;
@@ -828,7 +827,7 @@ int __init __weak arch_early_irq_init(void)
828 return 0; 827 return 0;
829} 828}
830 829
831int __weak arch_init_chip_data(struct irq_desc *desc, int cpu) 830int __weak arch_init_chip_data(struct irq_desc *desc, int node)
832{ 831{
833 return 0; 832 return 0;
834} 833}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b2970d56fb76..6a463716ecbf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -731,6 +731,14 @@ static struct ctl_table kern_table[] = {
731 }, 731 },
732 { 732 {
733 .ctl_name = CTL_UNNUMBERED, 733 .ctl_name = CTL_UNNUMBERED,
734 .procname = "bootloader_version",
735 .data = &bootloader_version,
736 .maxlen = sizeof (int),
737 .mode = 0444,
738 .proc_handler = &proc_dointvec,
739 },
740 {
741 .ctl_name = CTL_UNNUMBERED,
734 .procname = "kstack_depth_to_print", 742 .procname = "kstack_depth_to_print",
735 .data = &kstack_depth_to_print, 743 .data = &kstack_depth_to_print,
736 .maxlen = sizeof(int), 744 .maxlen = sizeof(int),
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e97c50f8458b..e8c77d9c633a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -22,7 +22,7 @@
22 22
23/* 23/*
24 * This read-write spinlock protects us from races in SMP while 24 * This read-write spinlock protects us from races in SMP while
25 * playing with xtime and avenrun. 25 * playing with xtime.
26 */ 26 */
27__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); 27__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
28 28
diff --git a/kernel/timer.c b/kernel/timer.c
index cffffad01c31..a26ed294f938 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1123,47 +1123,6 @@ void update_process_times(int user_tick)
1123} 1123}
1124 1124
1125/* 1125/*
1126 * Nr of active tasks - counted in fixed-point numbers
1127 */
1128static unsigned long count_active_tasks(void)
1129{
1130 return nr_active() * FIXED_1;
1131}
1132
1133/*
1134 * Hmm.. Changed this, as the GNU make sources (load.c) seems to
1135 * imply that avenrun[] is the standard name for this kind of thing.
1136 * Nothing else seems to be standardized: the fractional size etc
1137 * all seem to differ on different machines.
1138 *
1139 * Requires xtime_lock to access.
1140 */
1141unsigned long avenrun[3];
1142
1143EXPORT_SYMBOL(avenrun);
1144
1145/*
1146 * calc_load - given tick count, update the avenrun load estimates.
1147 * This is called while holding a write_lock on xtime_lock.
1148 */
1149static inline void calc_load(unsigned long ticks)
1150{
1151 unsigned long active_tasks; /* fixed-point */
1152 static int count = LOAD_FREQ;
1153
1154 count -= ticks;
1155 if (unlikely(count < 0)) {
1156 active_tasks = count_active_tasks();
1157 do {
1158 CALC_LOAD(avenrun[0], EXP_1, active_tasks);
1159 CALC_LOAD(avenrun[1], EXP_5, active_tasks);
1160 CALC_LOAD(avenrun[2], EXP_15, active_tasks);
1161 count += LOAD_FREQ;
1162 } while (count < 0);
1163 }
1164}
1165
1166/*
1167 * This function runs timers and the timer-tq in bottom half context. 1126 * This function runs timers and the timer-tq in bottom half context.
1168 */ 1127 */
1169static void run_timer_softirq(struct softirq_action *h) 1128static void run_timer_softirq(struct softirq_action *h)
@@ -1187,16 +1146,6 @@ void run_local_timers(void)
1187} 1146}
1188 1147
1189/* 1148/*
1190 * Called by the timer interrupt. xtime_lock must already be taken
1191 * by the timer IRQ!
1192 */
1193static inline void update_times(unsigned long ticks)
1194{
1195 update_wall_time();
1196 calc_load(ticks);
1197}
1198
1199/*
1200 * The 64-bit jiffies value is not atomic - you MUST NOT read it 1149 * The 64-bit jiffies value is not atomic - you MUST NOT read it
1201 * without sampling the sequence number in xtime_lock. 1150 * without sampling the sequence number in xtime_lock.
1202 * jiffies is defined in the linker script... 1151 * jiffies is defined in the linker script...
@@ -1205,7 +1154,8 @@ static inline void update_times(unsigned long ticks)
1205void do_timer(unsigned long ticks) 1154void do_timer(unsigned long ticks)
1206{ 1155{
1207 jiffies_64 += ticks; 1156 jiffies_64 += ticks;
1208 update_times(ticks); 1157 update_wall_time();
1158 calc_global_load();
1209} 1159}
1210 1160
1211#ifdef __ARCH_WANT_SYS_ALARM 1161#ifdef __ARCH_WANT_SYS_ALARM
@@ -1406,37 +1356,17 @@ int do_sysinfo(struct sysinfo *info)
1406{ 1356{
1407 unsigned long mem_total, sav_total; 1357 unsigned long mem_total, sav_total;
1408 unsigned int mem_unit, bitcount; 1358 unsigned int mem_unit, bitcount;
1409 unsigned long seq; 1359 struct timespec tp;
1410 1360
1411 memset(info, 0, sizeof(struct sysinfo)); 1361 memset(info, 0, sizeof(struct sysinfo));
1412 1362
1413 do { 1363 ktime_get_ts(&tp);
1414 struct timespec tp; 1364 monotonic_to_bootbased(&tp);
1415 seq = read_seqbegin(&xtime_lock); 1365 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1416
1417 /*
1418 * This is annoying. The below is the same thing
1419 * posix_get_clock_monotonic() does, but it wants to
1420 * take the lock which we want to cover the loads stuff
1421 * too.
1422 */
1423
1424 getnstimeofday(&tp);
1425 tp.tv_sec += wall_to_monotonic.tv_sec;
1426 tp.tv_nsec += wall_to_monotonic.tv_nsec;
1427 monotonic_to_bootbased(&tp);
1428 if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
1429 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
1430 tp.tv_sec++;
1431 }
1432 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1433 1366
1434 info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); 1367 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
1435 info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
1436 info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
1437 1368
1438 info->procs = nr_threads; 1369 info->procs = nr_threads;
1439 } while (read_seqretry(&xtime_lock, seq));
1440 1370
1441 si_meminfo(info); 1371 si_meminfo(info);
1442 si_swapinfo(info); 1372 si_swapinfo(info);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 417d1985e299..4a13e5a01ce3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -48,6 +48,21 @@ config FTRACE_NMI_ENTER
48 depends on HAVE_FTRACE_NMI_ENTER 48 depends on HAVE_FTRACE_NMI_ENTER
49 default y 49 default y
50 50
51config EVENT_TRACING
52 select CONTEXT_SWITCH_TRACER
53 bool
54
55config CONTEXT_SWITCH_TRACER
56 select MARKERS
57 bool
58
59# All tracer options should select GENERIC_TRACER. For those options that are
60# enabled by all tracers (context switch and event tracer) they select TRACING.
61# This allows those options to appear when no other tracer is selected. But the
62# options do not appear when something else selects it. We need the two options
63# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
64# hidding of the automatic options options.
65
51config TRACING 66config TRACING
52 bool 67 bool
53 select DEBUG_FS 68 select DEBUG_FS
@@ -56,6 +71,11 @@ config TRACING
56 select TRACEPOINTS 71 select TRACEPOINTS
57 select NOP_TRACER 72 select NOP_TRACER
58 select BINARY_PRINTF 73 select BINARY_PRINTF
74 select EVENT_TRACING
75
76config GENERIC_TRACER
77 bool
78 select TRACING
59 79
60# 80#
61# Minimum requirements an architecture has to meet for us to 81# Minimum requirements an architecture has to meet for us to
@@ -73,14 +93,20 @@ config TRACING_SUPPORT
73 93
74if TRACING_SUPPORT 94if TRACING_SUPPORT
75 95
76menu "Tracers" 96menuconfig FTRACE
97 bool "Tracers"
98 default y if DEBUG_KERNEL
99 help
100 Enable the kernel tracing infrastructure.
101
102if FTRACE
77 103
78config FUNCTION_TRACER 104config FUNCTION_TRACER
79 bool "Kernel Function Tracer" 105 bool "Kernel Function Tracer"
80 depends on HAVE_FUNCTION_TRACER 106 depends on HAVE_FUNCTION_TRACER
81 select FRAME_POINTER 107 select FRAME_POINTER
82 select KALLSYMS 108 select KALLSYMS
83 select TRACING 109 select GENERIC_TRACER
84 select CONTEXT_SWITCH_TRACER 110 select CONTEXT_SWITCH_TRACER
85 help 111 help
86 Enable the kernel to trace every kernel function. This is done 112 Enable the kernel to trace every kernel function. This is done
@@ -104,13 +130,14 @@ config FUNCTION_GRAPH_TRACER
104 the return value. This is done by setting the current return 130 the return value. This is done by setting the current return
105 address on the current task structure into a stack of calls. 131 address on the current task structure into a stack of calls.
106 132
133
107config IRQSOFF_TRACER 134config IRQSOFF_TRACER
108 bool "Interrupts-off Latency Tracer" 135 bool "Interrupts-off Latency Tracer"
109 default n 136 default n
110 depends on TRACE_IRQFLAGS_SUPPORT 137 depends on TRACE_IRQFLAGS_SUPPORT
111 depends on GENERIC_TIME 138 depends on GENERIC_TIME
112 select TRACE_IRQFLAGS 139 select TRACE_IRQFLAGS
113 select TRACING 140 select GENERIC_TRACER
114 select TRACER_MAX_TRACE 141 select TRACER_MAX_TRACE
115 help 142 help
116 This option measures the time spent in irqs-off critical 143 This option measures the time spent in irqs-off critical
@@ -131,7 +158,7 @@ config PREEMPT_TRACER
131 default n 158 default n
132 depends on GENERIC_TIME 159 depends on GENERIC_TIME
133 depends on PREEMPT 160 depends on PREEMPT
134 select TRACING 161 select GENERIC_TRACER
135 select TRACER_MAX_TRACE 162 select TRACER_MAX_TRACE
136 help 163 help
137 This option measures the time spent in preemption off critical 164 This option measures the time spent in preemption off critical
@@ -150,7 +177,7 @@ config PREEMPT_TRACER
150config SYSPROF_TRACER 177config SYSPROF_TRACER
151 bool "Sysprof Tracer" 178 bool "Sysprof Tracer"
152 depends on X86 179 depends on X86
153 select TRACING 180 select GENERIC_TRACER
154 select CONTEXT_SWITCH_TRACER 181 select CONTEXT_SWITCH_TRACER
155 help 182 help
156 This tracer provides the trace needed by the 'Sysprof' userspace 183 This tracer provides the trace needed by the 'Sysprof' userspace
@@ -158,40 +185,33 @@ config SYSPROF_TRACER
158 185
159config SCHED_TRACER 186config SCHED_TRACER
160 bool "Scheduling Latency Tracer" 187 bool "Scheduling Latency Tracer"
161 select TRACING 188 select GENERIC_TRACER
162 select CONTEXT_SWITCH_TRACER 189 select CONTEXT_SWITCH_TRACER
163 select TRACER_MAX_TRACE 190 select TRACER_MAX_TRACE
164 help 191 help
165 This tracer tracks the latency of the highest priority task 192 This tracer tracks the latency of the highest priority task
166 to be scheduled in, starting from the point it has woken up. 193 to be scheduled in, starting from the point it has woken up.
167 194
168config CONTEXT_SWITCH_TRACER 195config ENABLE_DEFAULT_TRACERS
169 bool "Trace process context switches" 196 bool "Trace process context switches and events"
170 select TRACING 197 depends on !GENERIC_TRACER
171 select MARKERS
172 help
173 This tracer gets called from the context switch and records
174 all switching of tasks.
175
176config EVENT_TRACER
177 bool "Trace various events in the kernel"
178 select TRACING 198 select TRACING
179 help 199 help
180 This tracer hooks to various trace points in the kernel 200 This tracer hooks to various trace points in the kernel
181 allowing the user to pick and choose which trace point they 201 allowing the user to pick and choose which trace point they
182 want to trace. 202 want to trace. It also includes the sched_switch tracer plugin.
183 203
184config FTRACE_SYSCALLS 204config FTRACE_SYSCALLS
185 bool "Trace syscalls" 205 bool "Trace syscalls"
186 depends on HAVE_FTRACE_SYSCALLS 206 depends on HAVE_FTRACE_SYSCALLS
187 select TRACING 207 select GENERIC_TRACER
188 select KALLSYMS 208 select KALLSYMS
189 help 209 help
190 Basic tracer to catch the syscall entry and exit events. 210 Basic tracer to catch the syscall entry and exit events.
191 211
192config BOOT_TRACER 212config BOOT_TRACER
193 bool "Trace boot initcalls" 213 bool "Trace boot initcalls"
194 select TRACING 214 select GENERIC_TRACER
195 select CONTEXT_SWITCH_TRACER 215 select CONTEXT_SWITCH_TRACER
196 help 216 help
197 This tracer helps developers to optimize boot times: it records 217 This tracer helps developers to optimize boot times: it records
@@ -207,8 +227,36 @@ config BOOT_TRACER
207 to enable this on bootup. 227 to enable this on bootup.
208 228
209config TRACE_BRANCH_PROFILING 229config TRACE_BRANCH_PROFILING
230 bool
231 select GENERIC_TRACER
232
233choice
234 prompt "Branch Profiling"
235 default BRANCH_PROFILE_NONE
236 help
237 The branch profiling is a software profiler. It will add hooks
238 into the C conditionals to test which path a branch takes.
239
240 The likely/unlikely profiler only looks at the conditions that
241 are annotated with a likely or unlikely macro.
242
243 The "all branch" profiler will profile every if statement in the
244 kernel. This profiler will also enable the likely/unlikely
245 profiler as well.
246
247 Either of the above profilers add a bit of overhead to the system.
248 If unsure choose "No branch profiling".
249
250config BRANCH_PROFILE_NONE
251 bool "No branch profiling"
252 help
253 No branch profiling. Branch profiling adds a bit of overhead.
254 Only enable it if you want to analyse the branching behavior.
255 Otherwise keep it disabled.
256
257config PROFILE_ANNOTATED_BRANCHES
210 bool "Trace likely/unlikely profiler" 258 bool "Trace likely/unlikely profiler"
211 select TRACING 259 select TRACE_BRANCH_PROFILING
212 help 260 help
213 This tracer profiles all the the likely and unlikely macros 261 This tracer profiles all the the likely and unlikely macros
214 in the kernel. It will display the results in: 262 in the kernel. It will display the results in:
@@ -218,11 +266,9 @@ config TRACE_BRANCH_PROFILING
218 Note: this will add a significant overhead, only turn this 266 Note: this will add a significant overhead, only turn this
219 on if you need to profile the system's use of these macros. 267 on if you need to profile the system's use of these macros.
220 268
221 Say N if unsure.
222
223config PROFILE_ALL_BRANCHES 269config PROFILE_ALL_BRANCHES
224 bool "Profile all if conditionals" 270 bool "Profile all if conditionals"
225 depends on TRACE_BRANCH_PROFILING 271 select TRACE_BRANCH_PROFILING
226 help 272 help
227 This tracer profiles all branch conditions. Every if () 273 This tracer profiles all branch conditions. Every if ()
228 taken in the kernel is recorded whether it hit or miss. 274 taken in the kernel is recorded whether it hit or miss.
@@ -230,11 +276,12 @@ config PROFILE_ALL_BRANCHES
230 276
231 /debugfs/tracing/profile_branch 277 /debugfs/tracing/profile_branch
232 278
279 This option also enables the likely/unlikely profiler.
280
233 This configuration, when enabled, will impose a great overhead 281 This configuration, when enabled, will impose a great overhead
234 on the system. This should only be enabled when the system 282 on the system. This should only be enabled when the system
235 is to be analyzed 283 is to be analyzed
236 284endchoice
237 Say N if unsure.
238 285
239config TRACING_BRANCHES 286config TRACING_BRANCHES
240 bool 287 bool
@@ -261,7 +308,7 @@ config BRANCH_TRACER
261config POWER_TRACER 308config POWER_TRACER
262 bool "Trace power consumption behavior" 309 bool "Trace power consumption behavior"
263 depends on X86 310 depends on X86
264 select TRACING 311 select GENERIC_TRACER
265 help 312 help
266 This tracer helps developers to analyze and optimize the kernels 313 This tracer helps developers to analyze and optimize the kernels
267 power management decisions, specifically the C-state and P-state 314 power management decisions, specifically the C-state and P-state
@@ -295,14 +342,14 @@ config STACK_TRACER
295config HW_BRANCH_TRACER 342config HW_BRANCH_TRACER
296 depends on HAVE_HW_BRANCH_TRACER 343 depends on HAVE_HW_BRANCH_TRACER
297 bool "Trace hw branches" 344 bool "Trace hw branches"
298 select TRACING 345 select GENERIC_TRACER
299 help 346 help
300 This tracer records all branches on the system in a circular 347 This tracer records all branches on the system in a circular
301 buffer giving access to the last N branches for each cpu. 348 buffer giving access to the last N branches for each cpu.
302 349
303config KMEMTRACE 350config KMEMTRACE
304 bool "Trace SLAB allocations" 351 bool "Trace SLAB allocations"
305 select TRACING 352 select GENERIC_TRACER
306 help 353 help
307 kmemtrace provides tracing for slab allocator functions, such as 354 kmemtrace provides tracing for slab allocator functions, such as
308 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected 355 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
@@ -322,7 +369,7 @@ config KMEMTRACE
322 369
323config WORKQUEUE_TRACER 370config WORKQUEUE_TRACER
324 bool "Trace workqueues" 371 bool "Trace workqueues"
325 select TRACING 372 select GENERIC_TRACER
326 help 373 help
327 The workqueue tracer provides some statistical informations 374 The workqueue tracer provides some statistical informations
328 about each cpu workqueue thread such as the number of the 375 about each cpu workqueue thread such as the number of the
@@ -338,7 +385,7 @@ config BLK_DEV_IO_TRACE
338 select RELAY 385 select RELAY
339 select DEBUG_FS 386 select DEBUG_FS
340 select TRACEPOINTS 387 select TRACEPOINTS
341 select TRACING 388 select GENERIC_TRACER
342 select STACKTRACE 389 select STACKTRACE
343 help 390 help
344 Say Y here if you want to be able to trace the block layer actions 391 Say Y here if you want to be able to trace the block layer actions
@@ -375,6 +422,20 @@ config DYNAMIC_FTRACE
375 were made. If so, it runs stop_machine (stops all CPUS) 422 were made. If so, it runs stop_machine (stops all CPUS)
376 and modifies the code to jump over the call to ftrace. 423 and modifies the code to jump over the call to ftrace.
377 424
425config FUNCTION_PROFILER
426 bool "Kernel function profiler"
427 depends on FUNCTION_TRACER
428 default n
429 help
430 This option enables the kernel function profiler. A file is created
431 in debugfs called function_profile_enabled which defaults to zero.
432 When a 1 is echoed into this file profiling begins, and when a
433 zero is entered, profiling stops. A file in the trace_stats
434 directory called functions, that show the list of functions that
435 have been hit and their counters.
436
437 If in doubt, say N
438
378config FTRACE_MCOUNT_RECORD 439config FTRACE_MCOUNT_RECORD
379 def_bool y 440 def_bool y
380 depends on DYNAMIC_FTRACE 441 depends on DYNAMIC_FTRACE
@@ -385,7 +446,7 @@ config FTRACE_SELFTEST
385 446
386config FTRACE_STARTUP_TEST 447config FTRACE_STARTUP_TEST
387 bool "Perform a startup test on ftrace" 448 bool "Perform a startup test on ftrace"
388 depends on TRACING 449 depends on GENERIC_TRACER
389 select FTRACE_SELFTEST 450 select FTRACE_SELFTEST
390 help 451 help
391 This option performs a series of startup tests on ftrace. On bootup 452 This option performs a series of startup tests on ftrace. On bootup
@@ -396,7 +457,7 @@ config FTRACE_STARTUP_TEST
396config MMIOTRACE 457config MMIOTRACE
397 bool "Memory mapped IO tracing" 458 bool "Memory mapped IO tracing"
398 depends on HAVE_MMIOTRACE_SUPPORT && PCI 459 depends on HAVE_MMIOTRACE_SUPPORT && PCI
399 select TRACING 460 select GENERIC_TRACER
400 help 461 help
401 Mmiotrace traces Memory Mapped I/O access and is meant for 462 Mmiotrace traces Memory Mapped I/O access and is meant for
402 debugging and reverse engineering. It is called from the ioremap 463 debugging and reverse engineering. It is called from the ioremap
@@ -416,7 +477,23 @@ config MMIOTRACE_TEST
416 477
417 Say N, unless you absolutely know what you are doing. 478 Say N, unless you absolutely know what you are doing.
418 479
419endmenu 480config RING_BUFFER_BENCHMARK
481 tristate "Ring buffer benchmark stress tester"
482 depends on RING_BUFFER
483 help
484 This option creates a test to stress the ring buffer and bench mark it.
485 It creates its own ring buffer such that it will not interfer with
486 any other users of the ring buffer (such as ftrace). It then creates
487 a producer and consumer that will run for 10 seconds and sleep for
488 10 seconds. Each interval it will print out the number of events
489 it recorded and give a rough estimate of how long each iteration took.
490
491 It does not disable interrupts or raise its priority, so it may be
492 affected by processes that are running.
493
494 If unsure, say N
495
496endif # FTRACE
420 497
421endif # TRACING_SUPPORT 498endif # TRACING_SUPPORT
422 499
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 2630f5121ec1..844164dca90a 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -15,11 +15,17 @@ ifdef CONFIG_TRACING_BRANCHES
15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING 15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
16endif 16endif
17 17
18#
19# Make the trace clocks available generally: it's infrastructure
20# relied on by ptrace for example:
21#
22obj-y += trace_clock.o
23
18obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o 24obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 25obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
26obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
20 27
21obj-$(CONFIG_TRACING) += trace.o 28obj-$(CONFIG_TRACING) += trace.o
22obj-$(CONFIG_TRACING) += trace_clock.o
23obj-$(CONFIG_TRACING) += trace_output.o 29obj-$(CONFIG_TRACING) += trace_output.o
24obj-$(CONFIG_TRACING) += trace_stat.o 30obj-$(CONFIG_TRACING) += trace_stat.o
25obj-$(CONFIG_TRACING) += trace_printk.o 31obj-$(CONFIG_TRACING) += trace_printk.o
@@ -39,12 +45,14 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
39obj-$(CONFIG_POWER_TRACER) += trace_power.o 45obj-$(CONFIG_POWER_TRACER) += trace_power.o
40obj-$(CONFIG_KMEMTRACE) += kmemtrace.o 46obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
41obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 47obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
42obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 48obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
43obj-$(CONFIG_EVENT_TRACER) += trace_events.o 49ifeq ($(CONFIG_BLOCK),y)
44obj-$(CONFIG_EVENT_TRACER) += events.o 50obj-$(CONFIG_EVENT_TRACING) += blktrace.o
45obj-$(CONFIG_EVENT_TRACER) += trace_export.o 51endif
52obj-$(CONFIG_EVENT_TRACING) += trace_events.o
53obj-$(CONFIG_EVENT_TRACING) += trace_export.o
46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 54obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
47obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 55obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
48obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o 56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
49 57
50libftrace-y := ftrace.o 58libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 921ef5d1f0ba..7bd6a9893c24 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,10 +23,14 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/time.h> 25#include <linux/time.h>
26#include <trace/block.h>
27#include <linux/uaccess.h> 26#include <linux/uaccess.h>
27
28#include <trace/events/block.h>
29
28#include "trace_output.h" 30#include "trace_output.h"
29 31
32#ifdef CONFIG_BLK_DEV_IO_TRACE
33
30static unsigned int blktrace_seq __read_mostly = 1; 34static unsigned int blktrace_seq __read_mostly = 1;
31 35
32static struct trace_array *blk_tr; 36static struct trace_array *blk_tr;
@@ -147,7 +151,7 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
147{ 151{
148 if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) 152 if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
149 return 1; 153 return 1;
150 if (sector < bt->start_lba || sector > bt->end_lba) 154 if (sector && (sector < bt->start_lba || sector > bt->end_lba))
151 return 1; 155 return 1;
152 if (bt->pid && pid != bt->pid) 156 if (bt->pid && pid != bt->pid)
153 return 1; 157 return 1;
@@ -192,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
192 what |= MASK_TC_BIT(rw, DISCARD); 196 what |= MASK_TC_BIT(rw, DISCARD);
193 197
194 pid = tsk->pid; 198 pid = tsk->pid;
195 if (unlikely(act_log_check(bt, what, sector, pid))) 199 if (act_log_check(bt, what, sector, pid))
196 return; 200 return;
197 cpu = raw_smp_processor_id(); 201 cpu = raw_smp_processor_id();
198 202
@@ -262,6 +266,7 @@ static void blk_trace_free(struct blk_trace *bt)
262{ 266{
263 debugfs_remove(bt->msg_file); 267 debugfs_remove(bt->msg_file);
264 debugfs_remove(bt->dropped_file); 268 debugfs_remove(bt->dropped_file);
269 debugfs_remove(bt->dir);
265 relay_close(bt->rchan); 270 relay_close(bt->rchan);
266 free_percpu(bt->sequence); 271 free_percpu(bt->sequence);
267 free_percpu(bt->msg_data); 272 free_percpu(bt->msg_data);
@@ -403,11 +408,29 @@ static struct rchan_callbacks blk_relay_callbacks = {
403 .remove_buf_file = blk_remove_buf_file_callback, 408 .remove_buf_file = blk_remove_buf_file_callback,
404}; 409};
405 410
411static void blk_trace_setup_lba(struct blk_trace *bt,
412 struct block_device *bdev)
413{
414 struct hd_struct *part = NULL;
415
416 if (bdev)
417 part = bdev->bd_part;
418
419 if (part) {
420 bt->start_lba = part->start_sect;
421 bt->end_lba = part->start_sect + part->nr_sects;
422 } else {
423 bt->start_lba = 0;
424 bt->end_lba = -1ULL;
425 }
426}
427
406/* 428/*
407 * Setup everything required to start tracing 429 * Setup everything required to start tracing
408 */ 430 */
409int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 431int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
410 struct blk_user_trace_setup *buts) 432 struct block_device *bdev,
433 struct blk_user_trace_setup *buts)
411{ 434{
412 struct blk_trace *old_bt, *bt = NULL; 435 struct blk_trace *old_bt, *bt = NULL;
413 struct dentry *dir = NULL; 436 struct dentry *dir = NULL;
@@ -480,10 +503,13 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
480 if (!bt->act_mask) 503 if (!bt->act_mask)
481 bt->act_mask = (u16) -1; 504 bt->act_mask = (u16) -1;
482 505
483 bt->start_lba = buts->start_lba; 506 blk_trace_setup_lba(bt, bdev);
484 bt->end_lba = buts->end_lba; 507
485 if (!bt->end_lba) 508 /* overwrite with user settings */
486 bt->end_lba = -1ULL; 509 if (buts->start_lba)
510 bt->start_lba = buts->start_lba;
511 if (buts->end_lba)
512 bt->end_lba = buts->end_lba;
487 513
488 bt->pid = buts->pid; 514 bt->pid = buts->pid;
489 bt->trace_state = Blktrace_setup; 515 bt->trace_state = Blktrace_setup;
@@ -505,6 +531,7 @@ err:
505} 531}
506 532
507int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 533int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
534 struct block_device *bdev,
508 char __user *arg) 535 char __user *arg)
509{ 536{
510 struct blk_user_trace_setup buts; 537 struct blk_user_trace_setup buts;
@@ -514,7 +541,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
514 if (ret) 541 if (ret)
515 return -EFAULT; 542 return -EFAULT;
516 543
517 ret = do_blk_trace_setup(q, name, dev, &buts); 544 ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
518 if (ret) 545 if (ret)
519 return ret; 546 return ret;
520 547
@@ -582,7 +609,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
582 switch (cmd) { 609 switch (cmd) {
583 case BLKTRACESETUP: 610 case BLKTRACESETUP:
584 bdevname(bdev, b); 611 bdevname(bdev, b);
585 ret = blk_trace_setup(q, b, bdev->bd_dev, arg); 612 ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
586 break; 613 break;
587 case BLKTRACESTART: 614 case BLKTRACESTART:
588 start = 1; 615 start = 1;
@@ -809,7 +836,6 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
809 * @bio: the source bio 836 * @bio: the source bio
810 * @dev: target device 837 * @dev: target device
811 * @from: source sector 838 * @from: source sector
812 * @to: target sector
813 * 839 *
814 * Description: 840 * Description:
815 * Device mapper or raid target sometimes need to split a bio because 841 * Device mapper or raid target sometimes need to split a bio because
@@ -817,7 +843,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
817 * 843 *
818 **/ 844 **/
819static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, 845static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
820 dev_t dev, sector_t from, sector_t to) 846 dev_t dev, sector_t from)
821{ 847{
822 struct blk_trace *bt = q->blk_trace; 848 struct blk_trace *bt = q->blk_trace;
823 struct blk_io_trace_remap r; 849 struct blk_io_trace_remap r;
@@ -825,12 +851,13 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
825 if (likely(!bt)) 851 if (likely(!bt))
826 return; 852 return;
827 853
828 r.device = cpu_to_be32(dev); 854 r.device_from = cpu_to_be32(dev);
829 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); 855 r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev);
830 r.sector = cpu_to_be64(to); 856 r.sector_from = cpu_to_be64(from);
831 857
832 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, 858 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
833 !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); 859 BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
860 sizeof(r), &r);
834} 861}
835 862
836/** 863/**
@@ -971,6 +998,16 @@ static inline const void *pdu_start(const struct trace_entry *ent)
971 return te_blk_io_trace(ent) + 1; 998 return te_blk_io_trace(ent) + 1;
972} 999}
973 1000
1001static inline u32 t_action(const struct trace_entry *ent)
1002{
1003 return te_blk_io_trace(ent)->action;
1004}
1005
1006static inline u32 t_bytes(const struct trace_entry *ent)
1007{
1008 return te_blk_io_trace(ent)->bytes;
1009}
1010
974static inline u32 t_sec(const struct trace_entry *ent) 1011static inline u32 t_sec(const struct trace_entry *ent)
975{ 1012{
976 return te_blk_io_trace(ent)->bytes >> 9; 1013 return te_blk_io_trace(ent)->bytes >> 9;
@@ -996,11 +1033,11 @@ static void get_pdu_remap(const struct trace_entry *ent,
996 struct blk_io_trace_remap *r) 1033 struct blk_io_trace_remap *r)
997{ 1034{
998 const struct blk_io_trace_remap *__r = pdu_start(ent); 1035 const struct blk_io_trace_remap *__r = pdu_start(ent);
999 __u64 sector = __r->sector; 1036 __u64 sector_from = __r->sector_from;
1000 1037
1001 r->device = be32_to_cpu(__r->device);
1002 r->device_from = be32_to_cpu(__r->device_from); 1038 r->device_from = be32_to_cpu(__r->device_from);
1003 r->sector = be64_to_cpu(sector); 1039 r->device_to = be32_to_cpu(__r->device_to);
1040 r->sector_from = be64_to_cpu(sector_from);
1004} 1041}
1005 1042
1006typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); 1043typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
@@ -1031,36 +1068,98 @@ static int blk_log_action(struct trace_iterator *iter, const char *act)
1031 MAJOR(t->device), MINOR(t->device), act, rwbs); 1068 MAJOR(t->device), MINOR(t->device), act, rwbs);
1032} 1069}
1033 1070
1071static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
1072{
1073 const unsigned char *pdu_buf;
1074 int pdu_len;
1075 int i, end, ret;
1076
1077 pdu_buf = pdu_start(ent);
1078 pdu_len = te_blk_io_trace(ent)->pdu_len;
1079
1080 if (!pdu_len)
1081 return 1;
1082
1083 /* find the last zero that needs to be printed */
1084 for (end = pdu_len - 1; end >= 0; end--)
1085 if (pdu_buf[end])
1086 break;
1087 end++;
1088
1089 if (!trace_seq_putc(s, '('))
1090 return 0;
1091
1092 for (i = 0; i < pdu_len; i++) {
1093
1094 ret = trace_seq_printf(s, "%s%02x",
1095 i == 0 ? "" : " ", pdu_buf[i]);
1096 if (!ret)
1097 return ret;
1098
1099 /*
1100 * stop when the rest is just zeroes and indicate so
1101 * with a ".." appended
1102 */
1103 if (i == end && end != pdu_len - 1)
1104 return trace_seq_puts(s, " ..) ");
1105 }
1106
1107 return trace_seq_puts(s, ") ");
1108}
1109
1034static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) 1110static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
1035{ 1111{
1036 char cmd[TASK_COMM_LEN]; 1112 char cmd[TASK_COMM_LEN];
1037 1113
1038 trace_find_cmdline(ent->pid, cmd); 1114 trace_find_cmdline(ent->pid, cmd);
1039 1115
1040 if (t_sec(ent)) 1116 if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
1041 return trace_seq_printf(s, "%llu + %u [%s]\n", 1117 int ret;
1042 t_sector(ent), t_sec(ent), cmd); 1118
1043 return trace_seq_printf(s, "[%s]\n", cmd); 1119 ret = trace_seq_printf(s, "%u ", t_bytes(ent));
1120 if (!ret)
1121 return 0;
1122 ret = blk_log_dump_pdu(s, ent);
1123 if (!ret)
1124 return 0;
1125 return trace_seq_printf(s, "[%s]\n", cmd);
1126 } else {
1127 if (t_sec(ent))
1128 return trace_seq_printf(s, "%llu + %u [%s]\n",
1129 t_sector(ent), t_sec(ent), cmd);
1130 return trace_seq_printf(s, "[%s]\n", cmd);
1131 }
1044} 1132}
1045 1133
1046static int blk_log_with_error(struct trace_seq *s, 1134static int blk_log_with_error(struct trace_seq *s,
1047 const struct trace_entry *ent) 1135 const struct trace_entry *ent)
1048{ 1136{
1049 if (t_sec(ent)) 1137 if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
1050 return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), 1138 int ret;
1051 t_sec(ent), t_error(ent)); 1139
1052 return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent)); 1140 ret = blk_log_dump_pdu(s, ent);
1141 if (ret)
1142 return trace_seq_printf(s, "[%d]\n", t_error(ent));
1143 return 0;
1144 } else {
1145 if (t_sec(ent))
1146 return trace_seq_printf(s, "%llu + %u [%d]\n",
1147 t_sector(ent),
1148 t_sec(ent), t_error(ent));
1149 return trace_seq_printf(s, "%llu [%d]\n",
1150 t_sector(ent), t_error(ent));
1151 }
1053} 1152}
1054 1153
1055static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) 1154static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
1056{ 1155{
1057 struct blk_io_trace_remap r = { .device = 0, }; 1156 struct blk_io_trace_remap r = { .device_from = 0, };
1058 1157
1059 get_pdu_remap(ent, &r); 1158 get_pdu_remap(ent, &r);
1060 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", 1159 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
1061 t_sector(ent), 1160 t_sector(ent), t_sec(ent),
1062 t_sec(ent), MAJOR(r.device), MINOR(r.device), 1161 MAJOR(r.device_from), MINOR(r.device_from),
1063 (unsigned long long)r.sector); 1162 (unsigned long long)r.sector_from);
1064} 1163}
1065 1164
1066static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) 1165static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
@@ -1117,7 +1216,6 @@ static void blk_tracer_print_header(struct seq_file *m)
1117static void blk_tracer_start(struct trace_array *tr) 1216static void blk_tracer_start(struct trace_array *tr)
1118{ 1217{
1119 blk_tracer_enabled = true; 1218 blk_tracer_enabled = true;
1120 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1121} 1219}
1122 1220
1123static int blk_tracer_init(struct trace_array *tr) 1221static int blk_tracer_init(struct trace_array *tr)
@@ -1130,7 +1228,6 @@ static int blk_tracer_init(struct trace_array *tr)
1130static void blk_tracer_stop(struct trace_array *tr) 1228static void blk_tracer_stop(struct trace_array *tr)
1131{ 1229{
1132 blk_tracer_enabled = false; 1230 blk_tracer_enabled = false;
1133 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1134} 1231}
1135 1232
1136static void blk_tracer_reset(struct trace_array *tr) 1233static void blk_tracer_reset(struct trace_array *tr)
@@ -1182,7 +1279,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter,
1182 } 1279 }
1183 1280
1184 if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) 1281 if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
1185 ret = trace_seq_printf(s, "Bad pc action %x\n", what); 1282 ret = trace_seq_printf(s, "Unknown action %x\n", what);
1186 else { 1283 else {
1187 ret = log_action(iter, what2act[what].act[long_act]); 1284 ret = log_action(iter, what2act[what].act[long_act]);
1188 if (ret) 1285 if (ret)
@@ -1195,9 +1292,6 @@ out:
1195static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1292static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1196 int flags) 1293 int flags)
1197{ 1294{
1198 if (!trace_print_context(iter))
1199 return TRACE_TYPE_PARTIAL_LINE;
1200
1201 return print_one_line(iter, false); 1295 return print_one_line(iter, false);
1202} 1296}
1203 1297
@@ -1232,6 +1326,18 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1232 return print_one_line(iter, true); 1326 return print_one_line(iter, true);
1233} 1327}
1234 1328
1329static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set)
1330{
1331 /* don't output context-info for blk_classic output */
1332 if (bit == TRACE_BLK_OPT_CLASSIC) {
1333 if (set)
1334 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1335 else
1336 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1337 }
1338 return 0;
1339}
1340
1235static struct tracer blk_tracer __read_mostly = { 1341static struct tracer blk_tracer __read_mostly = {
1236 .name = "blk", 1342 .name = "blk",
1237 .init = blk_tracer_init, 1343 .init = blk_tracer_init,
@@ -1241,6 +1347,7 @@ static struct tracer blk_tracer __read_mostly = {
1241 .print_header = blk_tracer_print_header, 1347 .print_header = blk_tracer_print_header,
1242 .print_line = blk_tracer_print_line, 1348 .print_line = blk_tracer_print_line,
1243 .flags = &blk_tracer_flags, 1349 .flags = &blk_tracer_flags,
1350 .set_flag = blk_tracer_set_flag,
1244}; 1351};
1245 1352
1246static struct trace_event trace_blk_event = { 1353static struct trace_event trace_blk_event = {
@@ -1285,7 +1392,8 @@ static int blk_trace_remove_queue(struct request_queue *q)
1285/* 1392/*
1286 * Setup everything required to start tracing 1393 * Setup everything required to start tracing
1287 */ 1394 */
1288static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) 1395static int blk_trace_setup_queue(struct request_queue *q,
1396 struct block_device *bdev)
1289{ 1397{
1290 struct blk_trace *old_bt, *bt = NULL; 1398 struct blk_trace *old_bt, *bt = NULL;
1291 int ret = -ENOMEM; 1399 int ret = -ENOMEM;
@@ -1298,9 +1406,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
1298 if (!bt->msg_data) 1406 if (!bt->msg_data)
1299 goto free_bt; 1407 goto free_bt;
1300 1408
1301 bt->dev = dev; 1409 bt->dev = bdev->bd_dev;
1302 bt->act_mask = (u16)-1; 1410 bt->act_mask = (u16)-1;
1303 bt->end_lba = -1ULL; 1411
1412 blk_trace_setup_lba(bt, bdev);
1304 1413
1305 old_bt = xchg(&q->blk_trace, bt); 1414 old_bt = xchg(&q->blk_trace, bt);
1306 if (old_bt != NULL) { 1415 if (old_bt != NULL) {
@@ -1517,7 +1626,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1517 1626
1518 if (attr == &dev_attr_enable) { 1627 if (attr == &dev_attr_enable) {
1519 if (value) 1628 if (value)
1520 ret = blk_trace_setup_queue(q, bdev->bd_dev); 1629 ret = blk_trace_setup_queue(q, bdev);
1521 else 1630 else
1522 ret = blk_trace_remove_queue(q); 1631 ret = blk_trace_remove_queue(q);
1523 goto out_unlock_bdev; 1632 goto out_unlock_bdev;
@@ -1525,7 +1634,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1525 1634
1526 ret = 0; 1635 ret = 0;
1527 if (q->blk_trace == NULL) 1636 if (q->blk_trace == NULL)
1528 ret = blk_trace_setup_queue(q, bdev->bd_dev); 1637 ret = blk_trace_setup_queue(q, bdev);
1529 1638
1530 if (ret == 0) { 1639 if (ret == 0) {
1531 if (attr == &dev_attr_act_mask) 1640 if (attr == &dev_attr_act_mask)
@@ -1548,3 +1657,80 @@ out:
1548 return ret ? ret : count; 1657 return ret ? ret : count;
1549} 1658}
1550 1659
1660int blk_trace_init_sysfs(struct device *dev)
1661{
1662 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
1663}
1664
1665#endif /* CONFIG_BLK_DEV_IO_TRACE */
1666
1667#ifdef CONFIG_EVENT_TRACING
1668
1669void blk_dump_cmd(char *buf, struct request *rq)
1670{
1671 int i, end;
1672 int len = rq->cmd_len;
1673 unsigned char *cmd = rq->cmd;
1674
1675 if (!blk_pc_request(rq)) {
1676 buf[0] = '\0';
1677 return;
1678 }
1679
1680 for (end = len - 1; end >= 0; end--)
1681 if (cmd[end])
1682 break;
1683 end++;
1684
1685 for (i = 0; i < len; i++) {
1686 buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
1687 if (i == end && end != len - 1) {
1688 sprintf(buf, " ..");
1689 break;
1690 }
1691 }
1692}
1693
1694void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1695{
1696 int i = 0;
1697
1698 if (rw & WRITE)
1699 rwbs[i++] = 'W';
1700 else if (rw & 1 << BIO_RW_DISCARD)
1701 rwbs[i++] = 'D';
1702 else if (bytes)
1703 rwbs[i++] = 'R';
1704 else
1705 rwbs[i++] = 'N';
1706
1707 if (rw & 1 << BIO_RW_AHEAD)
1708 rwbs[i++] = 'A';
1709 if (rw & 1 << BIO_RW_BARRIER)
1710 rwbs[i++] = 'B';
1711 if (rw & 1 << BIO_RW_SYNCIO)
1712 rwbs[i++] = 'S';
1713 if (rw & 1 << BIO_RW_META)
1714 rwbs[i++] = 'M';
1715
1716 rwbs[i] = '\0';
1717}
1718
1719void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
1720{
1721 int rw = rq->cmd_flags & 0x03;
1722 int bytes;
1723
1724 if (blk_discard_rq(rq))
1725 rw |= (1 << BIO_RW_DISCARD);
1726
1727 if (blk_pc_request(rq))
1728 bytes = rq->data_len;
1729 else
1730 bytes = rq->hard_nr_sectors << 9;
1731
1732 blk_fill_rwbs(rwbs, rw, bytes);
1733}
1734
1735#endif /* CONFIG_EVENT_TRACING */
1736
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
deleted file mode 100644
index 246f2aa6dc46..000000000000
--- a/kernel/trace/events.c
+++ /dev/null
@@ -1,14 +0,0 @@
1/*
2 * This is the place to register all trace points as events.
3 */
4
5#include <linux/stringify.h>
6
7#include <trace/trace_events.h>
8
9#include "trace_output.h"
10
11#include "trace_events_stage_1.h"
12#include "trace_events_stage_2.h"
13#include "trace_events_stage_3.h"
14
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f1ed080406c3..bb60732ade0c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -29,11 +29,13 @@
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h> 30#include <linux/hash.h>
31 31
32#include <trace/sched.h> 32#include <trace/events/sched.h>
33 33
34#include <asm/ftrace.h> 34#include <asm/ftrace.h>
35#include <asm/setup.h>
35 36
36#include "trace.h" 37#include "trace_output.h"
38#include "trace_stat.h"
37 39
38#define FTRACE_WARN_ON(cond) \ 40#define FTRACE_WARN_ON(cond) \
39 do { \ 41 do { \
@@ -68,7 +70,7 @@ static DEFINE_MUTEX(ftrace_lock);
68 70
69static struct ftrace_ops ftrace_list_end __read_mostly = 71static struct ftrace_ops ftrace_list_end __read_mostly =
70{ 72{
71 .func = ftrace_stub, 73 .func = ftrace_stub,
72}; 74};
73 75
74static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 76static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
@@ -240,6 +242,580 @@ static void ftrace_update_pid_func(void)
240#endif 242#endif
241} 243}
242 244
245#ifdef CONFIG_FUNCTION_PROFILER
246struct ftrace_profile {
247 struct hlist_node node;
248 unsigned long ip;
249 unsigned long counter;
250#ifdef CONFIG_FUNCTION_GRAPH_TRACER
251 unsigned long long time;
252#endif
253};
254
255struct ftrace_profile_page {
256 struct ftrace_profile_page *next;
257 unsigned long index;
258 struct ftrace_profile records[];
259};
260
261struct ftrace_profile_stat {
262 atomic_t disabled;
263 struct hlist_head *hash;
264 struct ftrace_profile_page *pages;
265 struct ftrace_profile_page *start;
266 struct tracer_stat stat;
267};
268
269#define PROFILE_RECORDS_SIZE \
270 (PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
271
272#define PROFILES_PER_PAGE \
273 (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
274
275static int ftrace_profile_bits __read_mostly;
276static int ftrace_profile_enabled __read_mostly;
277
278/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
279static DEFINE_MUTEX(ftrace_profile_lock);
280
281static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
282
283#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
284
285static void *
286function_stat_next(void *v, int idx)
287{
288 struct ftrace_profile *rec = v;
289 struct ftrace_profile_page *pg;
290
291 pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
292
293 again:
294 rec++;
295 if ((void *)rec >= (void *)&pg->records[pg->index]) {
296 pg = pg->next;
297 if (!pg)
298 return NULL;
299 rec = &pg->records[0];
300 if (!rec->counter)
301 goto again;
302 }
303
304 return rec;
305}
306
307static void *function_stat_start(struct tracer_stat *trace)
308{
309 struct ftrace_profile_stat *stat =
310 container_of(trace, struct ftrace_profile_stat, stat);
311
312 if (!stat || !stat->start)
313 return NULL;
314
315 return function_stat_next(&stat->start->records[0], 0);
316}
317
318#ifdef CONFIG_FUNCTION_GRAPH_TRACER
319/* function graph compares on total time */
320static int function_stat_cmp(void *p1, void *p2)
321{
322 struct ftrace_profile *a = p1;
323 struct ftrace_profile *b = p2;
324
325 if (a->time < b->time)
326 return -1;
327 if (a->time > b->time)
328 return 1;
329 else
330 return 0;
331}
332#else
333/* not function graph compares against hits */
334static int function_stat_cmp(void *p1, void *p2)
335{
336 struct ftrace_profile *a = p1;
337 struct ftrace_profile *b = p2;
338
339 if (a->counter < b->counter)
340 return -1;
341 if (a->counter > b->counter)
342 return 1;
343 else
344 return 0;
345}
346#endif
347
348static int function_stat_headers(struct seq_file *m)
349{
350#ifdef CONFIG_FUNCTION_GRAPH_TRACER
351 seq_printf(m, " Function "
352 "Hit Time Avg\n"
353 " -------- "
354 "--- ---- ---\n");
355#else
356 seq_printf(m, " Function Hit\n"
357 " -------- ---\n");
358#endif
359 return 0;
360}
361
362static int function_stat_show(struct seq_file *m, void *v)
363{
364 struct ftrace_profile *rec = v;
365 char str[KSYM_SYMBOL_LEN];
366#ifdef CONFIG_FUNCTION_GRAPH_TRACER
367 static DEFINE_MUTEX(mutex);
368 static struct trace_seq s;
369 unsigned long long avg;
370#endif
371
372 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
373 seq_printf(m, " %-30.30s %10lu", str, rec->counter);
374
375#ifdef CONFIG_FUNCTION_GRAPH_TRACER
376 seq_printf(m, " ");
377 avg = rec->time;
378 do_div(avg, rec->counter);
379
380 mutex_lock(&mutex);
381 trace_seq_init(&s);
382 trace_print_graph_duration(rec->time, &s);
383 trace_seq_puts(&s, " ");
384 trace_print_graph_duration(avg, &s);
385 trace_print_seq(m, &s);
386 mutex_unlock(&mutex);
387#endif
388 seq_putc(m, '\n');
389
390 return 0;
391}
392
393static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
394{
395 struct ftrace_profile_page *pg;
396
397 pg = stat->pages = stat->start;
398
399 while (pg) {
400 memset(pg->records, 0, PROFILE_RECORDS_SIZE);
401 pg->index = 0;
402 pg = pg->next;
403 }
404
405 memset(stat->hash, 0,
406 FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
407}
408
409int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
410{
411 struct ftrace_profile_page *pg;
412 int functions;
413 int pages;
414 int i;
415
416 /* If we already allocated, do nothing */
417 if (stat->pages)
418 return 0;
419
420 stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
421 if (!stat->pages)
422 return -ENOMEM;
423
424#ifdef CONFIG_DYNAMIC_FTRACE
425 functions = ftrace_update_tot_cnt;
426#else
427 /*
428 * We do not know the number of functions that exist because
429 * dynamic tracing is what counts them. With past experience
430 * we have around 20K functions. That should be more than enough.
431 * It is highly unlikely we will execute every function in
432 * the kernel.
433 */
434 functions = 20000;
435#endif
436
437 pg = stat->start = stat->pages;
438
439 pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
440
441 for (i = 0; i < pages; i++) {
442 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
443 if (!pg->next)
444 goto out_free;
445 pg = pg->next;
446 }
447
448 return 0;
449
450 out_free:
451 pg = stat->start;
452 while (pg) {
453 unsigned long tmp = (unsigned long)pg;
454
455 pg = pg->next;
456 free_page(tmp);
457 }
458
459 free_page((unsigned long)stat->pages);
460 stat->pages = NULL;
461 stat->start = NULL;
462
463 return -ENOMEM;
464}
465
466static int ftrace_profile_init_cpu(int cpu)
467{
468 struct ftrace_profile_stat *stat;
469 int size;
470
471 stat = &per_cpu(ftrace_profile_stats, cpu);
472
473 if (stat->hash) {
474 /* If the profile is already created, simply reset it */
475 ftrace_profile_reset(stat);
476 return 0;
477 }
478
479 /*
480 * We are profiling all functions, but usually only a few thousand
481 * functions are hit. We'll make a hash of 1024 items.
482 */
483 size = FTRACE_PROFILE_HASH_SIZE;
484
485 stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
486
487 if (!stat->hash)
488 return -ENOMEM;
489
490 if (!ftrace_profile_bits) {
491 size--;
492
493 for (; size; size >>= 1)
494 ftrace_profile_bits++;
495 }
496
497 /* Preallocate the function profiling pages */
498 if (ftrace_profile_pages_init(stat) < 0) {
499 kfree(stat->hash);
500 stat->hash = NULL;
501 return -ENOMEM;
502 }
503
504 return 0;
505}
506
507static int ftrace_profile_init(void)
508{
509 int cpu;
510 int ret = 0;
511
512 for_each_online_cpu(cpu) {
513 ret = ftrace_profile_init_cpu(cpu);
514 if (ret)
515 break;
516 }
517
518 return ret;
519}
520
521/* interrupts must be disabled */
522static struct ftrace_profile *
523ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
524{
525 struct ftrace_profile *rec;
526 struct hlist_head *hhd;
527 struct hlist_node *n;
528 unsigned long key;
529
530 key = hash_long(ip, ftrace_profile_bits);
531 hhd = &stat->hash[key];
532
533 if (hlist_empty(hhd))
534 return NULL;
535
536 hlist_for_each_entry_rcu(rec, n, hhd, node) {
537 if (rec->ip == ip)
538 return rec;
539 }
540
541 return NULL;
542}
543
544static void ftrace_add_profile(struct ftrace_profile_stat *stat,
545 struct ftrace_profile *rec)
546{
547 unsigned long key;
548
549 key = hash_long(rec->ip, ftrace_profile_bits);
550 hlist_add_head_rcu(&rec->node, &stat->hash[key]);
551}
552
553/*
554 * The memory is already allocated, this simply finds a new record to use.
555 */
556static struct ftrace_profile *
557ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
558{
559 struct ftrace_profile *rec = NULL;
560
561 /* prevent recursion (from NMIs) */
562 if (atomic_inc_return(&stat->disabled) != 1)
563 goto out;
564
565 /*
566 * Try to find the function again since an NMI
567 * could have added it
568 */
569 rec = ftrace_find_profiled_func(stat, ip);
570 if (rec)
571 goto out;
572
573 if (stat->pages->index == PROFILES_PER_PAGE) {
574 if (!stat->pages->next)
575 goto out;
576 stat->pages = stat->pages->next;
577 }
578
579 rec = &stat->pages->records[stat->pages->index++];
580 rec->ip = ip;
581 ftrace_add_profile(stat, rec);
582
583 out:
584 atomic_dec(&stat->disabled);
585
586 return rec;
587}
588
589static void
590function_profile_call(unsigned long ip, unsigned long parent_ip)
591{
592 struct ftrace_profile_stat *stat;
593 struct ftrace_profile *rec;
594 unsigned long flags;
595
596 if (!ftrace_profile_enabled)
597 return;
598
599 local_irq_save(flags);
600
601 stat = &__get_cpu_var(ftrace_profile_stats);
602 if (!stat->hash || !ftrace_profile_enabled)
603 goto out;
604
605 rec = ftrace_find_profiled_func(stat, ip);
606 if (!rec) {
607 rec = ftrace_profile_alloc(stat, ip);
608 if (!rec)
609 goto out;
610 }
611
612 rec->counter++;
613 out:
614 local_irq_restore(flags);
615}
616
617#ifdef CONFIG_FUNCTION_GRAPH_TRACER
618static int profile_graph_entry(struct ftrace_graph_ent *trace)
619{
620 function_profile_call(trace->func, 0);
621 return 1;
622}
623
624static void profile_graph_return(struct ftrace_graph_ret *trace)
625{
626 struct ftrace_profile_stat *stat;
627 unsigned long long calltime;
628 struct ftrace_profile *rec;
629 unsigned long flags;
630
631 local_irq_save(flags);
632 stat = &__get_cpu_var(ftrace_profile_stats);
633 if (!stat->hash || !ftrace_profile_enabled)
634 goto out;
635
636 calltime = trace->rettime - trace->calltime;
637
638 if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
639 int index;
640
641 index = trace->depth;
642
643 /* Append this call time to the parent time to subtract */
644 if (index)
645 current->ret_stack[index - 1].subtime += calltime;
646
647 if (current->ret_stack[index].subtime < calltime)
648 calltime -= current->ret_stack[index].subtime;
649 else
650 calltime = 0;
651 }
652
653 rec = ftrace_find_profiled_func(stat, trace->func);
654 if (rec)
655 rec->time += calltime;
656
657 out:
658 local_irq_restore(flags);
659}
660
661static int register_ftrace_profiler(void)
662{
663 return register_ftrace_graph(&profile_graph_return,
664 &profile_graph_entry);
665}
666
667static void unregister_ftrace_profiler(void)
668{
669 unregister_ftrace_graph();
670}
671#else
672static struct ftrace_ops ftrace_profile_ops __read_mostly =
673{
674 .func = function_profile_call,
675};
676
677static int register_ftrace_profiler(void)
678{
679 return register_ftrace_function(&ftrace_profile_ops);
680}
681
682static void unregister_ftrace_profiler(void)
683{
684 unregister_ftrace_function(&ftrace_profile_ops);
685}
686#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
687
688static ssize_t
689ftrace_profile_write(struct file *filp, const char __user *ubuf,
690 size_t cnt, loff_t *ppos)
691{
692 unsigned long val;
693 char buf[64]; /* big enough to hold a number */
694 int ret;
695
696 if (cnt >= sizeof(buf))
697 return -EINVAL;
698
699 if (copy_from_user(&buf, ubuf, cnt))
700 return -EFAULT;
701
702 buf[cnt] = 0;
703
704 ret = strict_strtoul(buf, 10, &val);
705 if (ret < 0)
706 return ret;
707
708 val = !!val;
709
710 mutex_lock(&ftrace_profile_lock);
711 if (ftrace_profile_enabled ^ val) {
712 if (val) {
713 ret = ftrace_profile_init();
714 if (ret < 0) {
715 cnt = ret;
716 goto out;
717 }
718
719 ret = register_ftrace_profiler();
720 if (ret < 0) {
721 cnt = ret;
722 goto out;
723 }
724 ftrace_profile_enabled = 1;
725 } else {
726 ftrace_profile_enabled = 0;
727 /*
728 * unregister_ftrace_profiler calls stop_machine
729 * so this acts like an synchronize_sched.
730 */
731 unregister_ftrace_profiler();
732 }
733 }
734 out:
735 mutex_unlock(&ftrace_profile_lock);
736
737 filp->f_pos += cnt;
738
739 return cnt;
740}
741
742static ssize_t
743ftrace_profile_read(struct file *filp, char __user *ubuf,
744 size_t cnt, loff_t *ppos)
745{
746 char buf[64]; /* big enough to hold a number */
747 int r;
748
749 r = sprintf(buf, "%u\n", ftrace_profile_enabled);
750 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
751}
752
753static const struct file_operations ftrace_profile_fops = {
754 .open = tracing_open_generic,
755 .read = ftrace_profile_read,
756 .write = ftrace_profile_write,
757};
758
759/* used to initialize the real stat files */
760static struct tracer_stat function_stats __initdata = {
761 .name = "functions",
762 .stat_start = function_stat_start,
763 .stat_next = function_stat_next,
764 .stat_cmp = function_stat_cmp,
765 .stat_headers = function_stat_headers,
766 .stat_show = function_stat_show
767};
768
769static void ftrace_profile_debugfs(struct dentry *d_tracer)
770{
771 struct ftrace_profile_stat *stat;
772 struct dentry *entry;
773 char *name;
774 int ret;
775 int cpu;
776
777 for_each_possible_cpu(cpu) {
778 stat = &per_cpu(ftrace_profile_stats, cpu);
779
780 /* allocate enough for function name + cpu number */
781 name = kmalloc(32, GFP_KERNEL);
782 if (!name) {
783 /*
784 * The files created are permanent, if something happens
785 * we still do not free memory.
786 */
787 kfree(stat);
788 WARN(1,
789 "Could not allocate stat file for cpu %d\n",
790 cpu);
791 return;
792 }
793 stat->stat = function_stats;
794 snprintf(name, 32, "function%d", cpu);
795 stat->stat.name = name;
796 ret = register_stat_tracer(&stat->stat);
797 if (ret) {
798 WARN(1,
799 "Could not register function stat for cpu %d\n",
800 cpu);
801 kfree(name);
802 return;
803 }
804 }
805
806 entry = debugfs_create_file("function_profile_enabled", 0644,
807 d_tracer, NULL, &ftrace_profile_fops);
808 if (!entry)
809 pr_warning("Could not create debugfs "
810 "'function_profile_enabled' entry\n");
811}
812
813#else /* CONFIG_FUNCTION_PROFILER */
814static void ftrace_profile_debugfs(struct dentry *d_tracer)
815{
816}
817#endif /* CONFIG_FUNCTION_PROFILER */
818
243/* set when tracing only a pid */ 819/* set when tracing only a pid */
244struct pid *ftrace_pid_trace; 820struct pid *ftrace_pid_trace;
245static struct pid * const ftrace_swapper_pid = &init_struct_pid; 821static struct pid * const ftrace_swapper_pid = &init_struct_pid;
@@ -261,7 +837,6 @@ struct ftrace_func_probe {
261 struct rcu_head rcu; 837 struct rcu_head rcu;
262}; 838};
263 839
264
265enum { 840enum {
266 FTRACE_ENABLE_CALLS = (1 << 0), 841 FTRACE_ENABLE_CALLS = (1 << 0),
267 FTRACE_DISABLE_CALLS = (1 << 1), 842 FTRACE_DISABLE_CALLS = (1 << 1),
@@ -346,30 +921,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec)
346 rec->flags |= FTRACE_FL_FREE; 921 rec->flags |= FTRACE_FL_FREE;
347} 922}
348 923
349void ftrace_release(void *start, unsigned long size)
350{
351 struct dyn_ftrace *rec;
352 struct ftrace_page *pg;
353 unsigned long s = (unsigned long)start;
354 unsigned long e = s + size;
355
356 if (ftrace_disabled || !start)
357 return;
358
359 mutex_lock(&ftrace_lock);
360 do_for_each_ftrace_rec(pg, rec) {
361 if ((rec->ip >= s) && (rec->ip < e)) {
362 /*
363 * rec->ip is changed in ftrace_free_rec()
364 * It should not between s and e if record was freed.
365 */
366 FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
367 ftrace_free_rec(rec);
368 }
369 } while_for_each_ftrace_rec();
370 mutex_unlock(&ftrace_lock);
371}
372
373static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 924static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
374{ 925{
375 struct dyn_ftrace *rec; 926 struct dyn_ftrace *rec;
@@ -1408,7 +1959,7 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1408 1959
1409static struct ftrace_ops trace_probe_ops __read_mostly = 1960static struct ftrace_ops trace_probe_ops __read_mostly =
1410{ 1961{
1411 .func = function_trace_probe_call, 1962 .func = function_trace_probe_call,
1412}; 1963};
1413 1964
1414static int ftrace_probe_registered; 1965static int ftrace_probe_registered;
@@ -1823,6 +2374,45 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset)
1823 ftrace_set_regex(buf, len, reset, 0); 2374 ftrace_set_regex(buf, len, reset, 0);
1824} 2375}
1825 2376
2377/*
2378 * command line interface to allow users to set filters on boot up.
2379 */
2380#define FTRACE_FILTER_SIZE COMMAND_LINE_SIZE
2381static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
2382static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
2383
2384static int __init set_ftrace_notrace(char *str)
2385{
2386 strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
2387 return 1;
2388}
2389__setup("ftrace_notrace=", set_ftrace_notrace);
2390
2391static int __init set_ftrace_filter(char *str)
2392{
2393 strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
2394 return 1;
2395}
2396__setup("ftrace_filter=", set_ftrace_filter);
2397
2398static void __init set_ftrace_early_filter(char *buf, int enable)
2399{
2400 char *func;
2401
2402 while (buf) {
2403 func = strsep(&buf, ",");
2404 ftrace_set_regex(func, strlen(func), 0, enable);
2405 }
2406}
2407
2408static void __init set_ftrace_early_filters(void)
2409{
2410 if (ftrace_filter_buf[0])
2411 set_ftrace_early_filter(ftrace_filter_buf, 1);
2412 if (ftrace_notrace_buf[0])
2413 set_ftrace_early_filter(ftrace_notrace_buf, 0);
2414}
2415
1826static int 2416static int
1827ftrace_regex_release(struct inode *inode, struct file *file, int enable) 2417ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1828{ 2418{
@@ -2128,38 +2718,23 @@ static const struct file_operations ftrace_graph_fops = {
2128 2718
2129static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) 2719static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
2130{ 2720{
2131 struct dentry *entry;
2132 2721
2133 entry = debugfs_create_file("available_filter_functions", 0444, 2722 trace_create_file("available_filter_functions", 0444,
2134 d_tracer, NULL, &ftrace_avail_fops); 2723 d_tracer, NULL, &ftrace_avail_fops);
2135 if (!entry)
2136 pr_warning("Could not create debugfs "
2137 "'available_filter_functions' entry\n");
2138 2724
2139 entry = debugfs_create_file("failures", 0444, 2725 trace_create_file("failures", 0444,
2140 d_tracer, NULL, &ftrace_failures_fops); 2726 d_tracer, NULL, &ftrace_failures_fops);
2141 if (!entry)
2142 pr_warning("Could not create debugfs 'failures' entry\n");
2143 2727
2144 entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, 2728 trace_create_file("set_ftrace_filter", 0644, d_tracer,
2145 NULL, &ftrace_filter_fops); 2729 NULL, &ftrace_filter_fops);
2146 if (!entry)
2147 pr_warning("Could not create debugfs "
2148 "'set_ftrace_filter' entry\n");
2149 2730
2150 entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, 2731 trace_create_file("set_ftrace_notrace", 0644, d_tracer,
2151 NULL, &ftrace_notrace_fops); 2732 NULL, &ftrace_notrace_fops);
2152 if (!entry)
2153 pr_warning("Could not create debugfs "
2154 "'set_ftrace_notrace' entry\n");
2155 2733
2156#ifdef CONFIG_FUNCTION_GRAPH_TRACER 2734#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2157 entry = debugfs_create_file("set_graph_function", 0444, d_tracer, 2735 trace_create_file("set_graph_function", 0444, d_tracer,
2158 NULL, 2736 NULL,
2159 &ftrace_graph_fops); 2737 &ftrace_graph_fops);
2160 if (!entry)
2161 pr_warning("Could not create debugfs "
2162 "'set_graph_function' entry\n");
2163#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2738#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2164 2739
2165 return 0; 2740 return 0;
@@ -2197,14 +2772,72 @@ static int ftrace_convert_nops(struct module *mod,
2197 return 0; 2772 return 0;
2198} 2773}
2199 2774
2200void ftrace_init_module(struct module *mod, 2775#ifdef CONFIG_MODULES
2201 unsigned long *start, unsigned long *end) 2776void ftrace_release(void *start, void *end)
2777{
2778 struct dyn_ftrace *rec;
2779 struct ftrace_page *pg;
2780 unsigned long s = (unsigned long)start;
2781 unsigned long e = (unsigned long)end;
2782
2783 if (ftrace_disabled || !start || start == end)
2784 return;
2785
2786 mutex_lock(&ftrace_lock);
2787 do_for_each_ftrace_rec(pg, rec) {
2788 if ((rec->ip >= s) && (rec->ip < e)) {
2789 /*
2790 * rec->ip is changed in ftrace_free_rec()
2791 * It should not between s and e if record was freed.
2792 */
2793 FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
2794 ftrace_free_rec(rec);
2795 }
2796 } while_for_each_ftrace_rec();
2797 mutex_unlock(&ftrace_lock);
2798}
2799
2800static void ftrace_init_module(struct module *mod,
2801 unsigned long *start, unsigned long *end)
2202{ 2802{
2203 if (ftrace_disabled || start == end) 2803 if (ftrace_disabled || start == end)
2204 return; 2804 return;
2205 ftrace_convert_nops(mod, start, end); 2805 ftrace_convert_nops(mod, start, end);
2206} 2806}
2207 2807
2808static int ftrace_module_notify(struct notifier_block *self,
2809 unsigned long val, void *data)
2810{
2811 struct module *mod = data;
2812
2813 switch (val) {
2814 case MODULE_STATE_COMING:
2815 ftrace_init_module(mod, mod->ftrace_callsites,
2816 mod->ftrace_callsites +
2817 mod->num_ftrace_callsites);
2818 break;
2819 case MODULE_STATE_GOING:
2820 ftrace_release(mod->ftrace_callsites,
2821 mod->ftrace_callsites +
2822 mod->num_ftrace_callsites);
2823 break;
2824 }
2825
2826 return 0;
2827}
2828#else
2829static int ftrace_module_notify(struct notifier_block *self,
2830 unsigned long val, void *data)
2831{
2832 return 0;
2833}
2834#endif /* CONFIG_MODULES */
2835
2836struct notifier_block ftrace_module_nb = {
2837 .notifier_call = ftrace_module_notify,
2838 .priority = 0,
2839};
2840
2208extern unsigned long __start_mcount_loc[]; 2841extern unsigned long __start_mcount_loc[];
2209extern unsigned long __stop_mcount_loc[]; 2842extern unsigned long __stop_mcount_loc[];
2210 2843
@@ -2236,6 +2869,12 @@ void __init ftrace_init(void)
2236 __start_mcount_loc, 2869 __start_mcount_loc,
2237 __stop_mcount_loc); 2870 __stop_mcount_loc);
2238 2871
2872 ret = register_module_notifier(&ftrace_module_nb);
2873 if (ret)
2874 pr_warning("Failed to register trace ftrace module notifier\n");
2875
2876 set_ftrace_early_filters();
2877
2239 return; 2878 return;
2240 failed: 2879 failed:
2241 ftrace_disabled = 1; 2880 ftrace_disabled = 1;
@@ -2417,7 +3056,6 @@ static const struct file_operations ftrace_pid_fops = {
2417static __init int ftrace_init_debugfs(void) 3056static __init int ftrace_init_debugfs(void)
2418{ 3057{
2419 struct dentry *d_tracer; 3058 struct dentry *d_tracer;
2420 struct dentry *entry;
2421 3059
2422 d_tracer = tracing_init_dentry(); 3060 d_tracer = tracing_init_dentry();
2423 if (!d_tracer) 3061 if (!d_tracer)
@@ -2425,11 +3063,11 @@ static __init int ftrace_init_debugfs(void)
2425 3063
2426 ftrace_init_dyn_debugfs(d_tracer); 3064 ftrace_init_dyn_debugfs(d_tracer);
2427 3065
2428 entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer, 3066 trace_create_file("set_ftrace_pid", 0644, d_tracer,
2429 NULL, &ftrace_pid_fops); 3067 NULL, &ftrace_pid_fops);
2430 if (!entry) 3068
2431 pr_warning("Could not create debugfs " 3069 ftrace_profile_debugfs(d_tracer);
2432 "'set_ftrace_pid' entry\n"); 3070
2433 return 0; 3071 return 0;
2434} 3072}
2435fs_initcall(ftrace_init_debugfs); 3073fs_initcall(ftrace_init_debugfs);
@@ -2538,7 +3176,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
2538 3176
2539#ifdef CONFIG_FUNCTION_GRAPH_TRACER 3177#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2540 3178
2541static atomic_t ftrace_graph_active; 3179static int ftrace_graph_active;
2542static struct notifier_block ftrace_suspend_notifier; 3180static struct notifier_block ftrace_suspend_notifier;
2543 3181
2544int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) 3182int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
@@ -2580,12 +3218,12 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
2580 } 3218 }
2581 3219
2582 if (t->ret_stack == NULL) { 3220 if (t->ret_stack == NULL) {
2583 t->curr_ret_stack = -1;
2584 /* Make sure IRQs see the -1 first: */
2585 barrier();
2586 t->ret_stack = ret_stack_list[start++];
2587 atomic_set(&t->tracing_graph_pause, 0); 3221 atomic_set(&t->tracing_graph_pause, 0);
2588 atomic_set(&t->trace_overrun, 0); 3222 atomic_set(&t->trace_overrun, 0);
3223 t->curr_ret_stack = -1;
3224 /* Make sure the tasks see the -1 first: */
3225 smp_wmb();
3226 t->ret_stack = ret_stack_list[start++];
2589 } 3227 }
2590 } while_each_thread(g, t); 3228 } while_each_thread(g, t);
2591 3229
@@ -2643,8 +3281,10 @@ static int start_graph_tracing(void)
2643 return -ENOMEM; 3281 return -ENOMEM;
2644 3282
2645 /* The cpu_boot init_task->ret_stack will never be freed */ 3283 /* The cpu_boot init_task->ret_stack will never be freed */
2646 for_each_online_cpu(cpu) 3284 for_each_online_cpu(cpu) {
2647 ftrace_graph_init_task(idle_task(cpu)); 3285 if (!idle_task(cpu)->ret_stack)
3286 ftrace_graph_init_task(idle_task(cpu));
3287 }
2648 3288
2649 do { 3289 do {
2650 ret = alloc_retstack_tasklist(ret_stack_list); 3290 ret = alloc_retstack_tasklist(ret_stack_list);
@@ -2690,7 +3330,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2690 mutex_lock(&ftrace_lock); 3330 mutex_lock(&ftrace_lock);
2691 3331
2692 /* we currently allow only one tracer registered at a time */ 3332 /* we currently allow only one tracer registered at a time */
2693 if (atomic_read(&ftrace_graph_active)) { 3333 if (ftrace_graph_active) {
2694 ret = -EBUSY; 3334 ret = -EBUSY;
2695 goto out; 3335 goto out;
2696 } 3336 }
@@ -2698,10 +3338,10 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2698 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; 3338 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2699 register_pm_notifier(&ftrace_suspend_notifier); 3339 register_pm_notifier(&ftrace_suspend_notifier);
2700 3340
2701 atomic_inc(&ftrace_graph_active); 3341 ftrace_graph_active++;
2702 ret = start_graph_tracing(); 3342 ret = start_graph_tracing();
2703 if (ret) { 3343 if (ret) {
2704 atomic_dec(&ftrace_graph_active); 3344 ftrace_graph_active--;
2705 goto out; 3345 goto out;
2706 } 3346 }
2707 3347
@@ -2719,10 +3359,10 @@ void unregister_ftrace_graph(void)
2719{ 3359{
2720 mutex_lock(&ftrace_lock); 3360 mutex_lock(&ftrace_lock);
2721 3361
2722 if (!unlikely(atomic_read(&ftrace_graph_active))) 3362 if (unlikely(!ftrace_graph_active))
2723 goto out; 3363 goto out;
2724 3364
2725 atomic_dec(&ftrace_graph_active); 3365 ftrace_graph_active--;
2726 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); 3366 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
2727 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 3367 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
2728 ftrace_graph_entry = ftrace_graph_entry_stub; 3368 ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -2736,18 +3376,25 @@ void unregister_ftrace_graph(void)
2736/* Allocate a return stack for newly created task */ 3376/* Allocate a return stack for newly created task */
2737void ftrace_graph_init_task(struct task_struct *t) 3377void ftrace_graph_init_task(struct task_struct *t)
2738{ 3378{
2739 if (atomic_read(&ftrace_graph_active)) { 3379 /* Make sure we do not use the parent ret_stack */
2740 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH 3380 t->ret_stack = NULL;
3381
3382 if (ftrace_graph_active) {
3383 struct ftrace_ret_stack *ret_stack;
3384
3385 ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
2741 * sizeof(struct ftrace_ret_stack), 3386 * sizeof(struct ftrace_ret_stack),
2742 GFP_KERNEL); 3387 GFP_KERNEL);
2743 if (!t->ret_stack) 3388 if (!ret_stack)
2744 return; 3389 return;
2745 t->curr_ret_stack = -1; 3390 t->curr_ret_stack = -1;
2746 atomic_set(&t->tracing_graph_pause, 0); 3391 atomic_set(&t->tracing_graph_pause, 0);
2747 atomic_set(&t->trace_overrun, 0); 3392 atomic_set(&t->trace_overrun, 0);
2748 t->ftrace_timestamp = 0; 3393 t->ftrace_timestamp = 0;
2749 } else 3394 /* make curr_ret_stack visable before we add the ret_stack */
2750 t->ret_stack = NULL; 3395 smp_wmb();
3396 t->ret_stack = ret_stack;
3397 }
2751} 3398}
2752 3399
2753void ftrace_graph_exit_task(struct task_struct *t) 3400void ftrace_graph_exit_task(struct task_struct *t)
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 5011f4d91e37..86cdf671d7e2 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -12,7 +12,7 @@
12#include <linux/dcache.h> 12#include <linux/dcache.h>
13#include <linux/fs.h> 13#include <linux/fs.h>
14 14
15#include <trace/kmemtrace.h> 15#include <linux/kmemtrace.h>
16 16
17#include "trace_output.h" 17#include "trace_output.h"
18#include "trace.h" 18#include "trace.h"
@@ -42,6 +42,7 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
42 gfp_t gfp_flags, 42 gfp_t gfp_flags,
43 int node) 43 int node)
44{ 44{
45 struct ftrace_event_call *call = &event_kmem_alloc;
45 struct trace_array *tr = kmemtrace_array; 46 struct trace_array *tr = kmemtrace_array;
46 struct kmemtrace_alloc_entry *entry; 47 struct kmemtrace_alloc_entry *entry;
47 struct ring_buffer_event *event; 48 struct ring_buffer_event *event;
@@ -62,7 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
62 entry->gfp_flags = gfp_flags; 63 entry->gfp_flags = gfp_flags;
63 entry->node = node; 64 entry->node = node;
64 65
65 ring_buffer_unlock_commit(tr->buffer, event); 66 if (!filter_check_discard(call, entry, tr->buffer, event))
67 ring_buffer_unlock_commit(tr->buffer, event);
66 68
67 trace_wake_up(); 69 trace_wake_up();
68} 70}
@@ -71,6 +73,7 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
71 unsigned long call_site, 73 unsigned long call_site,
72 const void *ptr) 74 const void *ptr)
73{ 75{
76 struct ftrace_event_call *call = &event_kmem_free;
74 struct trace_array *tr = kmemtrace_array; 77 struct trace_array *tr = kmemtrace_array;
75 struct kmemtrace_free_entry *entry; 78 struct kmemtrace_free_entry *entry;
76 struct ring_buffer_event *event; 79 struct ring_buffer_event *event;
@@ -86,7 +89,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
86 entry->call_site = call_site; 89 entry->call_site = call_site;
87 entry->ptr = ptr; 90 entry->ptr = ptr;
88 91
89 ring_buffer_unlock_commit(tr->buffer, event); 92 if (!filter_check_discard(call, entry, tr->buffer, event))
93 ring_buffer_unlock_commit(tr->buffer, event);
90 94
91 trace_wake_up(); 95 trace_wake_up();
92} 96}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 960cbf44c844..2e642b2b7253 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -22,6 +22,28 @@
22#include "trace.h" 22#include "trace.h"
23 23
24/* 24/*
25 * The ring buffer header is special. We must manually up keep it.
26 */
27int ring_buffer_print_entry_header(struct trace_seq *s)
28{
29 int ret;
30
31 ret = trace_seq_printf(s, "# compressed entry header\n");
32 ret = trace_seq_printf(s, "\ttype_len : 5 bits\n");
33 ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n");
34 ret = trace_seq_printf(s, "\tarray : 32 bits\n");
35 ret = trace_seq_printf(s, "\n");
36 ret = trace_seq_printf(s, "\tpadding : type == %d\n",
37 RINGBUF_TYPE_PADDING);
38 ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
39 RINGBUF_TYPE_TIME_EXTEND);
40 ret = trace_seq_printf(s, "\tdata max type_len == %d\n",
41 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
42
43 return ret;
44}
45
46/*
25 * The ring buffer is made up of a list of pages. A separate list of pages is 47 * The ring buffer is made up of a list of pages. A separate list of pages is
26 * allocated for each CPU. A writer may only write to a buffer that is 48 * allocated for each CPU. A writer may only write to a buffer that is
27 * associated with the CPU it is currently executing on. A reader may read 49 * associated with the CPU it is currently executing on. A reader may read
@@ -182,7 +204,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
182 204
183#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 205#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
184#define RB_ALIGNMENT 4U 206#define RB_ALIGNMENT 4U
185#define RB_MAX_SMALL_DATA 28 207#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
208
209/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
210#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
186 211
187enum { 212enum {
188 RB_LEN_TIME_EXTEND = 8, 213 RB_LEN_TIME_EXTEND = 8,
@@ -191,48 +216,28 @@ enum {
191 216
192static inline int rb_null_event(struct ring_buffer_event *event) 217static inline int rb_null_event(struct ring_buffer_event *event)
193{ 218{
194 return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; 219 return event->type_len == RINGBUF_TYPE_PADDING
220 && event->time_delta == 0;
195} 221}
196 222
197static inline int rb_discarded_event(struct ring_buffer_event *event) 223static inline int rb_discarded_event(struct ring_buffer_event *event)
198{ 224{
199 return event->type == RINGBUF_TYPE_PADDING && event->time_delta; 225 return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
200} 226}
201 227
202static void rb_event_set_padding(struct ring_buffer_event *event) 228static void rb_event_set_padding(struct ring_buffer_event *event)
203{ 229{
204 event->type = RINGBUF_TYPE_PADDING; 230 event->type_len = RINGBUF_TYPE_PADDING;
205 event->time_delta = 0; 231 event->time_delta = 0;
206} 232}
207 233
208/**
209 * ring_buffer_event_discard - discard an event in the ring buffer
210 * @buffer: the ring buffer
211 * @event: the event to discard
212 *
213 * Sometimes a event that is in the ring buffer needs to be ignored.
214 * This function lets the user discard an event in the ring buffer
215 * and then that event will not be read later.
216 *
217 * Note, it is up to the user to be careful with this, and protect
218 * against races. If the user discards an event that has been consumed
219 * it is possible that it could corrupt the ring buffer.
220 */
221void ring_buffer_event_discard(struct ring_buffer_event *event)
222{
223 event->type = RINGBUF_TYPE_PADDING;
224 /* time delta must be non zero */
225 if (!event->time_delta)
226 event->time_delta = 1;
227}
228
229static unsigned 234static unsigned
230rb_event_data_length(struct ring_buffer_event *event) 235rb_event_data_length(struct ring_buffer_event *event)
231{ 236{
232 unsigned length; 237 unsigned length;
233 238
234 if (event->len) 239 if (event->type_len)
235 length = event->len * RB_ALIGNMENT; 240 length = event->type_len * RB_ALIGNMENT;
236 else 241 else
237 length = event->array[0]; 242 length = event->array[0];
238 return length + RB_EVNT_HDR_SIZE; 243 return length + RB_EVNT_HDR_SIZE;
@@ -242,12 +247,12 @@ rb_event_data_length(struct ring_buffer_event *event)
242static unsigned 247static unsigned
243rb_event_length(struct ring_buffer_event *event) 248rb_event_length(struct ring_buffer_event *event)
244{ 249{
245 switch (event->type) { 250 switch (event->type_len) {
246 case RINGBUF_TYPE_PADDING: 251 case RINGBUF_TYPE_PADDING:
247 if (rb_null_event(event)) 252 if (rb_null_event(event))
248 /* undefined */ 253 /* undefined */
249 return -1; 254 return -1;
250 return rb_event_data_length(event); 255 return event->array[0] + RB_EVNT_HDR_SIZE;
251 256
252 case RINGBUF_TYPE_TIME_EXTEND: 257 case RINGBUF_TYPE_TIME_EXTEND:
253 return RB_LEN_TIME_EXTEND; 258 return RB_LEN_TIME_EXTEND;
@@ -271,7 +276,7 @@ rb_event_length(struct ring_buffer_event *event)
271unsigned ring_buffer_event_length(struct ring_buffer_event *event) 276unsigned ring_buffer_event_length(struct ring_buffer_event *event)
272{ 277{
273 unsigned length = rb_event_length(event); 278 unsigned length = rb_event_length(event);
274 if (event->type != RINGBUF_TYPE_DATA) 279 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
275 return length; 280 return length;
276 length -= RB_EVNT_HDR_SIZE; 281 length -= RB_EVNT_HDR_SIZE;
277 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) 282 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
@@ -284,9 +289,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
284static void * 289static void *
285rb_event_data(struct ring_buffer_event *event) 290rb_event_data(struct ring_buffer_event *event)
286{ 291{
287 BUG_ON(event->type != RINGBUF_TYPE_DATA); 292 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
288 /* If length is in len field, then array[0] has the data */ 293 /* If length is in len field, then array[0] has the data */
289 if (event->len) 294 if (event->type_len)
290 return (void *)&event->array[0]; 295 return (void *)&event->array[0];
291 /* Otherwise length is in array[0] and array[1] has the data */ 296 /* Otherwise length is in array[0] and array[1] has the data */
292 return (void *)&event->array[1]; 297 return (void *)&event->array[1];
@@ -316,9 +321,10 @@ struct buffer_data_page {
316}; 321};
317 322
318struct buffer_page { 323struct buffer_page {
324 struct list_head list; /* list of buffer pages */
319 local_t write; /* index for next write */ 325 local_t write; /* index for next write */
320 unsigned read; /* index for next read */ 326 unsigned read; /* index for next read */
321 struct list_head list; /* list of free pages */ 327 local_t entries; /* entries on this page */
322 struct buffer_data_page *page; /* Actual data page */ 328 struct buffer_data_page *page; /* Actual data page */
323}; 329};
324 330
@@ -361,6 +367,34 @@ static inline int test_time_stamp(u64 delta)
361 367
362#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) 368#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
363 369
370/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
371#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
372
373/* Max number of timestamps that can fit on a page */
374#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
375
376int ring_buffer_print_page_header(struct trace_seq *s)
377{
378 struct buffer_data_page field;
379 int ret;
380
381 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
382 "offset:0;\tsize:%u;\n",
383 (unsigned int)sizeof(field.time_stamp));
384
385 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
386 "offset:%u;\tsize:%u;\n",
387 (unsigned int)offsetof(typeof(field), commit),
388 (unsigned int)sizeof(field.commit));
389
390 ret = trace_seq_printf(s, "\tfield: char data;\t"
391 "offset:%u;\tsize:%u;\n",
392 (unsigned int)offsetof(typeof(field), data),
393 (unsigned int)BUF_PAGE_SIZE);
394
395 return ret;
396}
397
364/* 398/*
365 * head_page == tail_page && head == tail then buffer is empty. 399 * head_page == tail_page && head == tail then buffer is empty.
366 */ 400 */
@@ -375,8 +409,11 @@ struct ring_buffer_per_cpu {
375 struct buffer_page *tail_page; /* write to tail */ 409 struct buffer_page *tail_page; /* write to tail */
376 struct buffer_page *commit_page; /* committed pages */ 410 struct buffer_page *commit_page; /* committed pages */
377 struct buffer_page *reader_page; 411 struct buffer_page *reader_page;
412 unsigned long nmi_dropped;
413 unsigned long commit_overrun;
378 unsigned long overrun; 414 unsigned long overrun;
379 unsigned long entries; 415 unsigned long read;
416 local_t entries;
380 u64 write_stamp; 417 u64 write_stamp;
381 u64 read_stamp; 418 u64 read_stamp;
382 atomic_t record_disabled; 419 atomic_t record_disabled;
@@ -389,6 +426,8 @@ struct ring_buffer {
389 atomic_t record_disabled; 426 atomic_t record_disabled;
390 cpumask_var_t cpumask; 427 cpumask_var_t cpumask;
391 428
429 struct lock_class_key *reader_lock_key;
430
392 struct mutex mutex; 431 struct mutex mutex;
393 432
394 struct ring_buffer_per_cpu **buffers; 433 struct ring_buffer_per_cpu **buffers;
@@ -420,13 +459,18 @@ struct ring_buffer_iter {
420/* Up this if you want to test the TIME_EXTENTS and normalization */ 459/* Up this if you want to test the TIME_EXTENTS and normalization */
421#define DEBUG_SHIFT 0 460#define DEBUG_SHIFT 0
422 461
462static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu)
463{
464 /* shift to debug/test normalization and TIME_EXTENTS */
465 return buffer->clock() << DEBUG_SHIFT;
466}
467
423u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) 468u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
424{ 469{
425 u64 time; 470 u64 time;
426 471
427 preempt_disable_notrace(); 472 preempt_disable_notrace();
428 /* shift to debug/test normalization and TIME_EXTENTS */ 473 time = rb_time_stamp(buffer, cpu);
429 time = buffer->clock() << DEBUG_SHIFT;
430 preempt_enable_no_resched_notrace(); 474 preempt_enable_no_resched_notrace();
431 475
432 return time; 476 return time;
@@ -523,6 +567,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
523 cpu_buffer->cpu = cpu; 567 cpu_buffer->cpu = cpu;
524 cpu_buffer->buffer = buffer; 568 cpu_buffer->buffer = buffer;
525 spin_lock_init(&cpu_buffer->reader_lock); 569 spin_lock_init(&cpu_buffer->reader_lock);
570 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
526 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 571 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
527 INIT_LIST_HEAD(&cpu_buffer->pages); 572 INIT_LIST_HEAD(&cpu_buffer->pages);
528 573
@@ -593,7 +638,8 @@ static int rb_cpu_notify(struct notifier_block *self,
593 * when the buffer wraps. If this flag is not set, the buffer will 638 * when the buffer wraps. If this flag is not set, the buffer will
594 * drop data when the tail hits the head. 639 * drop data when the tail hits the head.
595 */ 640 */
596struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) 641struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
642 struct lock_class_key *key)
597{ 643{
598 struct ring_buffer *buffer; 644 struct ring_buffer *buffer;
599 int bsize; 645 int bsize;
@@ -616,6 +662,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
616 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 662 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
617 buffer->flags = flags; 663 buffer->flags = flags;
618 buffer->clock = trace_clock_local; 664 buffer->clock = trace_clock_local;
665 buffer->reader_lock_key = key;
619 666
620 /* need at least two pages */ 667 /* need at least two pages */
621 if (buffer->pages == 1) 668 if (buffer->pages == 1)
@@ -673,7 +720,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
673 kfree(buffer); 720 kfree(buffer);
674 return NULL; 721 return NULL;
675} 722}
676EXPORT_SYMBOL_GPL(ring_buffer_alloc); 723EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
677 724
678/** 725/**
679 * ring_buffer_free - free a ring buffer. 726 * ring_buffer_free - free a ring buffer.
@@ -947,31 +994,6 @@ static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
947 return rb_page_commit(cpu_buffer->head_page); 994 return rb_page_commit(cpu_buffer->head_page);
948} 995}
949 996
950/*
951 * When the tail hits the head and the buffer is in overwrite mode,
952 * the head jumps to the next page and all content on the previous
953 * page is discarded. But before doing so, we update the overrun
954 * variable of the buffer.
955 */
956static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
957{
958 struct ring_buffer_event *event;
959 unsigned long head;
960
961 for (head = 0; head < rb_head_size(cpu_buffer);
962 head += rb_event_length(event)) {
963
964 event = __rb_page_index(cpu_buffer->head_page, head);
965 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
966 return;
967 /* Only count data entries */
968 if (event->type != RINGBUF_TYPE_DATA)
969 continue;
970 cpu_buffer->overrun++;
971 cpu_buffer->entries--;
972 }
973}
974
975static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 997static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
976 struct buffer_page **bpage) 998 struct buffer_page **bpage)
977{ 999{
@@ -991,7 +1013,7 @@ rb_event_index(struct ring_buffer_event *event)
991 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 1013 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
992} 1014}
993 1015
994static int 1016static inline int
995rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 1017rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
996 struct ring_buffer_event *event) 1018 struct ring_buffer_event *event)
997{ 1019{
@@ -1110,28 +1132,21 @@ static void
1110rb_update_event(struct ring_buffer_event *event, 1132rb_update_event(struct ring_buffer_event *event,
1111 unsigned type, unsigned length) 1133 unsigned type, unsigned length)
1112{ 1134{
1113 event->type = type; 1135 event->type_len = type;
1114 1136
1115 switch (type) { 1137 switch (type) {
1116 1138
1117 case RINGBUF_TYPE_PADDING: 1139 case RINGBUF_TYPE_PADDING:
1118 break;
1119
1120 case RINGBUF_TYPE_TIME_EXTEND: 1140 case RINGBUF_TYPE_TIME_EXTEND:
1121 event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
1122 break;
1123
1124 case RINGBUF_TYPE_TIME_STAMP: 1141 case RINGBUF_TYPE_TIME_STAMP:
1125 event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
1126 break; 1142 break;
1127 1143
1128 case RINGBUF_TYPE_DATA: 1144 case 0:
1129 length -= RB_EVNT_HDR_SIZE; 1145 length -= RB_EVNT_HDR_SIZE;
1130 if (length > RB_MAX_SMALL_DATA) { 1146 if (length > RB_MAX_SMALL_DATA)
1131 event->len = 0;
1132 event->array[0] = length; 1147 event->array[0] = length;
1133 } else 1148 else
1134 event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1149 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1135 break; 1150 break;
1136 default: 1151 default:
1137 BUG(); 1152 BUG();
@@ -1155,131 +1170,156 @@ static unsigned rb_calculate_event_length(unsigned length)
1155 return length; 1170 return length;
1156} 1171}
1157 1172
1173
1158static struct ring_buffer_event * 1174static struct ring_buffer_event *
1159__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1175rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1160 unsigned type, unsigned long length, u64 *ts) 1176 unsigned long length, unsigned long tail,
1177 struct buffer_page *commit_page,
1178 struct buffer_page *tail_page, u64 *ts)
1161{ 1179{
1162 struct buffer_page *tail_page, *head_page, *reader_page, *commit_page; 1180 struct buffer_page *next_page, *head_page, *reader_page;
1163 unsigned long tail, write;
1164 struct ring_buffer *buffer = cpu_buffer->buffer; 1181 struct ring_buffer *buffer = cpu_buffer->buffer;
1165 struct ring_buffer_event *event; 1182 struct ring_buffer_event *event;
1166 unsigned long flags;
1167 bool lock_taken = false; 1183 bool lock_taken = false;
1184 unsigned long flags;
1168 1185
1169 commit_page = cpu_buffer->commit_page; 1186 next_page = tail_page;
1170 /* we just need to protect against interrupts */
1171 barrier();
1172 tail_page = cpu_buffer->tail_page;
1173 write = local_add_return(length, &tail_page->write);
1174 tail = write - length;
1175 1187
1176 /* See if we shot pass the end of this buffer page */ 1188 local_irq_save(flags);
1177 if (write > BUF_PAGE_SIZE) { 1189 /*
1178 struct buffer_page *next_page = tail_page; 1190 * Since the write to the buffer is still not
1191 * fully lockless, we must be careful with NMIs.
1192 * The locks in the writers are taken when a write
1193 * crosses to a new page. The locks protect against
1194 * races with the readers (this will soon be fixed
1195 * with a lockless solution).
1196 *
1197 * Because we can not protect against NMIs, and we
1198 * want to keep traces reentrant, we need to manage
1199 * what happens when we are in an NMI.
1200 *
1201 * NMIs can happen after we take the lock.
1202 * If we are in an NMI, only take the lock
1203 * if it is not already taken. Otherwise
1204 * simply fail.
1205 */
1206 if (unlikely(in_nmi())) {
1207 if (!__raw_spin_trylock(&cpu_buffer->lock)) {
1208 cpu_buffer->nmi_dropped++;
1209 goto out_reset;
1210 }
1211 } else
1212 __raw_spin_lock(&cpu_buffer->lock);
1179 1213
1180 local_irq_save(flags); 1214 lock_taken = true;
1181 /*
1182 * Since the write to the buffer is still not
1183 * fully lockless, we must be careful with NMIs.
1184 * The locks in the writers are taken when a write
1185 * crosses to a new page. The locks protect against
1186 * races with the readers (this will soon be fixed
1187 * with a lockless solution).
1188 *
1189 * Because we can not protect against NMIs, and we
1190 * want to keep traces reentrant, we need to manage
1191 * what happens when we are in an NMI.
1192 *
1193 * NMIs can happen after we take the lock.
1194 * If we are in an NMI, only take the lock
1195 * if it is not already taken. Otherwise
1196 * simply fail.
1197 */
1198 if (unlikely(in_nmi())) {
1199 if (!__raw_spin_trylock(&cpu_buffer->lock))
1200 goto out_reset;
1201 } else
1202 __raw_spin_lock(&cpu_buffer->lock);
1203 1215
1204 lock_taken = true; 1216 rb_inc_page(cpu_buffer, &next_page);
1205 1217
1206 rb_inc_page(cpu_buffer, &next_page); 1218 head_page = cpu_buffer->head_page;
1219 reader_page = cpu_buffer->reader_page;
1207 1220
1208 head_page = cpu_buffer->head_page; 1221 /* we grabbed the lock before incrementing */
1209 reader_page = cpu_buffer->reader_page; 1222 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1223 goto out_reset;
1210 1224
1211 /* we grabbed the lock before incrementing */ 1225 /*
1212 if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) 1226 * If for some reason, we had an interrupt storm that made
1213 goto out_reset; 1227 * it all the way around the buffer, bail, and warn
1228 * about it.
1229 */
1230 if (unlikely(next_page == commit_page)) {
1231 cpu_buffer->commit_overrun++;
1232 goto out_reset;
1233 }
1214 1234
1215 /* 1235 if (next_page == head_page) {
1216 * If for some reason, we had an interrupt storm that made 1236 if (!(buffer->flags & RB_FL_OVERWRITE))
1217 * it all the way around the buffer, bail, and warn
1218 * about it.
1219 */
1220 if (unlikely(next_page == commit_page)) {
1221 WARN_ON_ONCE(1);
1222 goto out_reset; 1237 goto out_reset;
1223 }
1224 1238
1225 if (next_page == head_page) { 1239 /* tail_page has not moved yet? */
1226 if (!(buffer->flags & RB_FL_OVERWRITE)) 1240 if (tail_page == cpu_buffer->tail_page) {
1227 goto out_reset; 1241 /* count overflows */
1228 1242 cpu_buffer->overrun +=
1229 /* tail_page has not moved yet? */ 1243 local_read(&head_page->entries);
1230 if (tail_page == cpu_buffer->tail_page) {
1231 /* count overflows */
1232 rb_update_overflow(cpu_buffer);
1233 1244
1234 rb_inc_page(cpu_buffer, &head_page); 1245 rb_inc_page(cpu_buffer, &head_page);
1235 cpu_buffer->head_page = head_page; 1246 cpu_buffer->head_page = head_page;
1236 cpu_buffer->head_page->read = 0; 1247 cpu_buffer->head_page->read = 0;
1237 }
1238 } 1248 }
1249 }
1239 1250
1240 /* 1251 /*
1241 * If the tail page is still the same as what we think 1252 * If the tail page is still the same as what we think
1242 * it is, then it is up to us to update the tail 1253 * it is, then it is up to us to update the tail
1243 * pointer. 1254 * pointer.
1244 */ 1255 */
1245 if (tail_page == cpu_buffer->tail_page) { 1256 if (tail_page == cpu_buffer->tail_page) {
1246 local_set(&next_page->write, 0); 1257 local_set(&next_page->write, 0);
1247 local_set(&next_page->page->commit, 0); 1258 local_set(&next_page->entries, 0);
1248 cpu_buffer->tail_page = next_page; 1259 local_set(&next_page->page->commit, 0);
1260 cpu_buffer->tail_page = next_page;
1261
1262 /* reread the time stamp */
1263 *ts = rb_time_stamp(buffer, cpu_buffer->cpu);
1264 cpu_buffer->tail_page->page->time_stamp = *ts;
1265 }
1249 1266
1250 /* reread the time stamp */ 1267 /*
1251 *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu); 1268 * The actual tail page has moved forward.
1252 cpu_buffer->tail_page->page->time_stamp = *ts; 1269 */
1253 } 1270 if (tail < BUF_PAGE_SIZE) {
1271 /* Mark the rest of the page with padding */
1272 event = __rb_page_index(tail_page, tail);
1273 rb_event_set_padding(event);
1274 }
1254 1275
1255 /* 1276 /* Set the write back to the previous setting */
1256 * The actual tail page has moved forward. 1277 local_sub(length, &tail_page->write);
1257 */
1258 if (tail < BUF_PAGE_SIZE) {
1259 /* Mark the rest of the page with padding */
1260 event = __rb_page_index(tail_page, tail);
1261 rb_event_set_padding(event);
1262 }
1263 1278
1264 if (tail <= BUF_PAGE_SIZE) 1279 /*
1265 /* Set the write back to the previous setting */ 1280 * If this was a commit entry that failed,
1266 local_set(&tail_page->write, tail); 1281 * increment that too
1282 */
1283 if (tail_page == cpu_buffer->commit_page &&
1284 tail == rb_commit_index(cpu_buffer)) {
1285 rb_set_commit_to_write(cpu_buffer);
1286 }
1267 1287
1268 /* 1288 __raw_spin_unlock(&cpu_buffer->lock);
1269 * If this was a commit entry that failed, 1289 local_irq_restore(flags);
1270 * increment that too 1290
1271 */ 1291 /* fail and let the caller try again */
1272 if (tail_page == cpu_buffer->commit_page && 1292 return ERR_PTR(-EAGAIN);
1273 tail == rb_commit_index(cpu_buffer)) { 1293
1274 rb_set_commit_to_write(cpu_buffer); 1294 out_reset:
1275 } 1295 /* reset write */
1296 local_sub(length, &tail_page->write);
1276 1297
1298 if (likely(lock_taken))
1277 __raw_spin_unlock(&cpu_buffer->lock); 1299 __raw_spin_unlock(&cpu_buffer->lock);
1278 local_irq_restore(flags); 1300 local_irq_restore(flags);
1301 return NULL;
1302}
1279 1303
1280 /* fail and let the caller try again */ 1304static struct ring_buffer_event *
1281 return ERR_PTR(-EAGAIN); 1305__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1282 } 1306 unsigned type, unsigned long length, u64 *ts)
1307{
1308 struct buffer_page *tail_page, *commit_page;
1309 struct ring_buffer_event *event;
1310 unsigned long tail, write;
1311
1312 commit_page = cpu_buffer->commit_page;
1313 /* we just need to protect against interrupts */
1314 barrier();
1315 tail_page = cpu_buffer->tail_page;
1316 write = local_add_return(length, &tail_page->write);
1317 tail = write - length;
1318
1319 /* See if we shot pass the end of this buffer page */
1320 if (write > BUF_PAGE_SIZE)
1321 return rb_move_tail(cpu_buffer, length, tail,
1322 commit_page, tail_page, ts);
1283 1323
1284 /* We reserved something on the buffer */ 1324 /* We reserved something on the buffer */
1285 1325
@@ -1289,6 +1329,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1289 event = __rb_page_index(tail_page, tail); 1329 event = __rb_page_index(tail_page, tail);
1290 rb_update_event(event, type, length); 1330 rb_update_event(event, type, length);
1291 1331
1332 /* The passed in type is zero for DATA */
1333 if (likely(!type))
1334 local_inc(&tail_page->entries);
1335
1292 /* 1336 /*
1293 * If this is a commit and the tail is zero, then update 1337 * If this is a commit and the tail is zero, then update
1294 * this page's time stamp. 1338 * this page's time stamp.
@@ -1297,16 +1341,38 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1297 cpu_buffer->commit_page->page->time_stamp = *ts; 1341 cpu_buffer->commit_page->page->time_stamp = *ts;
1298 1342
1299 return event; 1343 return event;
1344}
1300 1345
1301 out_reset: 1346static inline int
1302 /* reset write */ 1347rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1303 if (tail <= BUF_PAGE_SIZE) 1348 struct ring_buffer_event *event)
1304 local_set(&tail_page->write, tail); 1349{
1350 unsigned long new_index, old_index;
1351 struct buffer_page *bpage;
1352 unsigned long index;
1353 unsigned long addr;
1305 1354
1306 if (likely(lock_taken)) 1355 new_index = rb_event_index(event);
1307 __raw_spin_unlock(&cpu_buffer->lock); 1356 old_index = new_index + rb_event_length(event);
1308 local_irq_restore(flags); 1357 addr = (unsigned long)event;
1309 return NULL; 1358 addr &= PAGE_MASK;
1359
1360 bpage = cpu_buffer->tail_page;
1361
1362 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
1363 /*
1364 * This is on the tail page. It is possible that
1365 * a write could come in and move the tail page
1366 * and write to the next page. That is fine
1367 * because we just shorten what is on this page.
1368 */
1369 index = local_cmpxchg(&bpage->write, old_index, new_index);
1370 if (index == old_index)
1371 return 1;
1372 }
1373
1374 /* could not discard */
1375 return 0;
1310} 1376}
1311 1377
1312static int 1378static int
@@ -1351,16 +1417,23 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1351 event->array[0] = *delta >> TS_SHIFT; 1417 event->array[0] = *delta >> TS_SHIFT;
1352 } else { 1418 } else {
1353 cpu_buffer->commit_page->page->time_stamp = *ts; 1419 cpu_buffer->commit_page->page->time_stamp = *ts;
1354 event->time_delta = 0; 1420 /* try to discard, since we do not need this */
1355 event->array[0] = 0; 1421 if (!rb_try_to_discard(cpu_buffer, event)) {
1422 /* nope, just zero it */
1423 event->time_delta = 0;
1424 event->array[0] = 0;
1425 }
1356 } 1426 }
1357 cpu_buffer->write_stamp = *ts; 1427 cpu_buffer->write_stamp = *ts;
1358 /* let the caller know this was the commit */ 1428 /* let the caller know this was the commit */
1359 ret = 1; 1429 ret = 1;
1360 } else { 1430 } else {
1361 /* Darn, this is just wasted space */ 1431 /* Try to discard the event */
1362 event->time_delta = 0; 1432 if (!rb_try_to_discard(cpu_buffer, event)) {
1363 event->array[0] = 0; 1433 /* Darn, this is just wasted space */
1434 event->time_delta = 0;
1435 event->array[0] = 0;
1436 }
1364 ret = 0; 1437 ret = 0;
1365 } 1438 }
1366 1439
@@ -1371,13 +1444,14 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1371 1444
1372static struct ring_buffer_event * 1445static struct ring_buffer_event *
1373rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, 1446rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1374 unsigned type, unsigned long length) 1447 unsigned long length)
1375{ 1448{
1376 struct ring_buffer_event *event; 1449 struct ring_buffer_event *event;
1377 u64 ts, delta; 1450 u64 ts, delta = 0;
1378 int commit = 0; 1451 int commit = 0;
1379 int nr_loops = 0; 1452 int nr_loops = 0;
1380 1453
1454 length = rb_calculate_event_length(length);
1381 again: 1455 again:
1382 /* 1456 /*
1383 * We allow for interrupts to reenter here and do a trace. 1457 * We allow for interrupts to reenter here and do a trace.
@@ -1391,7 +1465,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1391 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 1465 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1392 return NULL; 1466 return NULL;
1393 1467
1394 ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); 1468 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
1395 1469
1396 /* 1470 /*
1397 * Only the first commit can update the timestamp. 1471 * Only the first commit can update the timestamp.
@@ -1401,23 +1475,24 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1401 * also be made. But only the entry that did the actual 1475 * also be made. But only the entry that did the actual
1402 * commit will be something other than zero. 1476 * commit will be something other than zero.
1403 */ 1477 */
1404 if (cpu_buffer->tail_page == cpu_buffer->commit_page && 1478 if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
1405 rb_page_write(cpu_buffer->tail_page) == 1479 rb_page_write(cpu_buffer->tail_page) ==
1406 rb_commit_index(cpu_buffer)) { 1480 rb_commit_index(cpu_buffer))) {
1481 u64 diff;
1407 1482
1408 delta = ts - cpu_buffer->write_stamp; 1483 diff = ts - cpu_buffer->write_stamp;
1409 1484
1410 /* make sure this delta is calculated here */ 1485 /* make sure this diff is calculated here */
1411 barrier(); 1486 barrier();
1412 1487
1413 /* Did the write stamp get updated already? */ 1488 /* Did the write stamp get updated already? */
1414 if (unlikely(ts < cpu_buffer->write_stamp)) 1489 if (unlikely(ts < cpu_buffer->write_stamp))
1415 delta = 0; 1490 goto get_event;
1416 1491
1417 if (test_time_stamp(delta)) { 1492 delta = diff;
1493 if (unlikely(test_time_stamp(delta))) {
1418 1494
1419 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 1495 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1420
1421 if (commit == -EBUSY) 1496 if (commit == -EBUSY)
1422 return NULL; 1497 return NULL;
1423 1498
@@ -1426,12 +1501,11 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1426 1501
1427 RB_WARN_ON(cpu_buffer, commit < 0); 1502 RB_WARN_ON(cpu_buffer, commit < 0);
1428 } 1503 }
1429 } else 1504 }
1430 /* Non commits have zero deltas */
1431 delta = 0;
1432 1505
1433 event = __rb_reserve_next(cpu_buffer, type, length, &ts); 1506 get_event:
1434 if (PTR_ERR(event) == -EAGAIN) 1507 event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
1508 if (unlikely(PTR_ERR(event) == -EAGAIN))
1435 goto again; 1509 goto again;
1436 1510
1437 if (!event) { 1511 if (!event) {
@@ -1448,7 +1522,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1448 * If the timestamp was commited, make the commit our entry 1522 * If the timestamp was commited, make the commit our entry
1449 * now so that we will update it when needed. 1523 * now so that we will update it when needed.
1450 */ 1524 */
1451 if (commit) 1525 if (unlikely(commit))
1452 rb_set_commit_event(cpu_buffer, event); 1526 rb_set_commit_event(cpu_buffer, event);
1453 else if (!rb_is_commit(cpu_buffer, event)) 1527 else if (!rb_is_commit(cpu_buffer, event))
1454 delta = 0; 1528 delta = 0;
@@ -1458,6 +1532,36 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1458 return event; 1532 return event;
1459} 1533}
1460 1534
1535#define TRACE_RECURSIVE_DEPTH 16
1536
1537static int trace_recursive_lock(void)
1538{
1539 current->trace_recursion++;
1540
1541 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
1542 return 0;
1543
1544 /* Disable all tracing before we do anything else */
1545 tracing_off_permanent();
1546
1547 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
1548 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
1549 current->trace_recursion,
1550 hardirq_count() >> HARDIRQ_SHIFT,
1551 softirq_count() >> SOFTIRQ_SHIFT,
1552 in_nmi());
1553
1554 WARN_ON_ONCE(1);
1555 return -1;
1556}
1557
1558static void trace_recursive_unlock(void)
1559{
1560 WARN_ON_ONCE(!current->trace_recursion);
1561
1562 current->trace_recursion--;
1563}
1564
1461static DEFINE_PER_CPU(int, rb_need_resched); 1565static DEFINE_PER_CPU(int, rb_need_resched);
1462 1566
1463/** 1567/**
@@ -1491,6 +1595,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1491 /* If we are tracing schedule, we don't want to recurse */ 1595 /* If we are tracing schedule, we don't want to recurse */
1492 resched = ftrace_preempt_disable(); 1596 resched = ftrace_preempt_disable();
1493 1597
1598 if (trace_recursive_lock())
1599 goto out_nocheck;
1600
1494 cpu = raw_smp_processor_id(); 1601 cpu = raw_smp_processor_id();
1495 1602
1496 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 1603 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -1501,11 +1608,10 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1501 if (atomic_read(&cpu_buffer->record_disabled)) 1608 if (atomic_read(&cpu_buffer->record_disabled))
1502 goto out; 1609 goto out;
1503 1610
1504 length = rb_calculate_event_length(length); 1611 if (length > BUF_MAX_DATA_SIZE)
1505 if (length > BUF_PAGE_SIZE)
1506 goto out; 1612 goto out;
1507 1613
1508 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); 1614 event = rb_reserve_next_event(cpu_buffer, length);
1509 if (!event) 1615 if (!event)
1510 goto out; 1616 goto out;
1511 1617
@@ -1520,6 +1626,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1520 return event; 1626 return event;
1521 1627
1522 out: 1628 out:
1629 trace_recursive_unlock();
1630
1631 out_nocheck:
1523 ftrace_preempt_enable(resched); 1632 ftrace_preempt_enable(resched);
1524 return NULL; 1633 return NULL;
1525} 1634}
@@ -1528,7 +1637,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
1528static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 1637static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1529 struct ring_buffer_event *event) 1638 struct ring_buffer_event *event)
1530{ 1639{
1531 cpu_buffer->entries++; 1640 local_inc(&cpu_buffer->entries);
1532 1641
1533 /* Only process further if we own the commit */ 1642 /* Only process further if we own the commit */
1534 if (!rb_is_commit(cpu_buffer, event)) 1643 if (!rb_is_commit(cpu_buffer, event))
@@ -1558,6 +1667,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1558 1667
1559 rb_commit(cpu_buffer, event); 1668 rb_commit(cpu_buffer, event);
1560 1669
1670 trace_recursive_unlock();
1671
1561 /* 1672 /*
1562 * Only the last preempt count needs to restore preemption. 1673 * Only the last preempt count needs to restore preemption.
1563 */ 1674 */
@@ -1570,6 +1681,99 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1570} 1681}
1571EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); 1682EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
1572 1683
1684static inline void rb_event_discard(struct ring_buffer_event *event)
1685{
1686 /* array[0] holds the actual length for the discarded event */
1687 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
1688 event->type_len = RINGBUF_TYPE_PADDING;
1689 /* time delta must be non zero */
1690 if (!event->time_delta)
1691 event->time_delta = 1;
1692}
1693
1694/**
1695 * ring_buffer_event_discard - discard any event in the ring buffer
1696 * @event: the event to discard
1697 *
1698 * Sometimes a event that is in the ring buffer needs to be ignored.
1699 * This function lets the user discard an event in the ring buffer
1700 * and then that event will not be read later.
1701 *
1702 * Note, it is up to the user to be careful with this, and protect
1703 * against races. If the user discards an event that has been consumed
1704 * it is possible that it could corrupt the ring buffer.
1705 */
1706void ring_buffer_event_discard(struct ring_buffer_event *event)
1707{
1708 rb_event_discard(event);
1709}
1710EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
1711
1712/**
1713 * ring_buffer_commit_discard - discard an event that has not been committed
1714 * @buffer: the ring buffer
1715 * @event: non committed event to discard
1716 *
1717 * This is similar to ring_buffer_event_discard but must only be
1718 * performed on an event that has not been committed yet. The difference
1719 * is that this will also try to free the event from the ring buffer
1720 * if another event has not been added behind it.
1721 *
1722 * If another event has been added behind it, it will set the event
1723 * up as discarded, and perform the commit.
1724 *
1725 * If this function is called, do not call ring_buffer_unlock_commit on
1726 * the event.
1727 */
1728void ring_buffer_discard_commit(struct ring_buffer *buffer,
1729 struct ring_buffer_event *event)
1730{
1731 struct ring_buffer_per_cpu *cpu_buffer;
1732 int cpu;
1733
1734 /* The event is discarded regardless */
1735 rb_event_discard(event);
1736
1737 /*
1738 * This must only be called if the event has not been
1739 * committed yet. Thus we can assume that preemption
1740 * is still disabled.
1741 */
1742 RB_WARN_ON(buffer, preemptible());
1743
1744 cpu = smp_processor_id();
1745 cpu_buffer = buffer->buffers[cpu];
1746
1747 if (!rb_try_to_discard(cpu_buffer, event))
1748 goto out;
1749
1750 /*
1751 * The commit is still visible by the reader, so we
1752 * must increment entries.
1753 */
1754 local_inc(&cpu_buffer->entries);
1755 out:
1756 /*
1757 * If a write came in and pushed the tail page
1758 * we still need to update the commit pointer
1759 * if we were the commit.
1760 */
1761 if (rb_is_commit(cpu_buffer, event))
1762 rb_set_commit_to_write(cpu_buffer);
1763
1764 trace_recursive_unlock();
1765
1766 /*
1767 * Only the last preempt count needs to restore preemption.
1768 */
1769 if (preempt_count() == 1)
1770 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1771 else
1772 preempt_enable_no_resched_notrace();
1773
1774}
1775EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
1776
1573/** 1777/**
1574 * ring_buffer_write - write data to the buffer without reserving 1778 * ring_buffer_write - write data to the buffer without reserving
1575 * @buffer: The ring buffer to write to. 1779 * @buffer: The ring buffer to write to.
@@ -1589,7 +1793,6 @@ int ring_buffer_write(struct ring_buffer *buffer,
1589{ 1793{
1590 struct ring_buffer_per_cpu *cpu_buffer; 1794 struct ring_buffer_per_cpu *cpu_buffer;
1591 struct ring_buffer_event *event; 1795 struct ring_buffer_event *event;
1592 unsigned long event_length;
1593 void *body; 1796 void *body;
1594 int ret = -EBUSY; 1797 int ret = -EBUSY;
1595 int cpu, resched; 1798 int cpu, resched;
@@ -1612,9 +1815,10 @@ int ring_buffer_write(struct ring_buffer *buffer,
1612 if (atomic_read(&cpu_buffer->record_disabled)) 1815 if (atomic_read(&cpu_buffer->record_disabled))
1613 goto out; 1816 goto out;
1614 1817
1615 event_length = rb_calculate_event_length(length); 1818 if (length > BUF_MAX_DATA_SIZE)
1616 event = rb_reserve_next_event(cpu_buffer, 1819 goto out;
1617 RINGBUF_TYPE_DATA, event_length); 1820
1821 event = rb_reserve_next_event(cpu_buffer, length);
1618 if (!event) 1822 if (!event)
1619 goto out; 1823 goto out;
1620 1824
@@ -1728,7 +1932,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1728 return 0; 1932 return 0;
1729 1933
1730 cpu_buffer = buffer->buffers[cpu]; 1934 cpu_buffer = buffer->buffers[cpu];
1731 ret = cpu_buffer->entries; 1935 ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun)
1936 - cpu_buffer->read;
1732 1937
1733 return ret; 1938 return ret;
1734} 1939}
@@ -1755,6 +1960,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1755EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 1960EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
1756 1961
1757/** 1962/**
1963 * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
1964 * @buffer: The ring buffer
1965 * @cpu: The per CPU buffer to get the number of overruns from
1966 */
1967unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
1968{
1969 struct ring_buffer_per_cpu *cpu_buffer;
1970 unsigned long ret;
1971
1972 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1973 return 0;
1974
1975 cpu_buffer = buffer->buffers[cpu];
1976 ret = cpu_buffer->nmi_dropped;
1977
1978 return ret;
1979}
1980EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
1981
1982/**
1983 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
1984 * @buffer: The ring buffer
1985 * @cpu: The per CPU buffer to get the number of overruns from
1986 */
1987unsigned long
1988ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
1989{
1990 struct ring_buffer_per_cpu *cpu_buffer;
1991 unsigned long ret;
1992
1993 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1994 return 0;
1995
1996 cpu_buffer = buffer->buffers[cpu];
1997 ret = cpu_buffer->commit_overrun;
1998
1999 return ret;
2000}
2001EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
2002
2003/**
1758 * ring_buffer_entries - get the number of entries in a buffer 2004 * ring_buffer_entries - get the number of entries in a buffer
1759 * @buffer: The ring buffer 2005 * @buffer: The ring buffer
1760 * 2006 *
@@ -1770,7 +2016,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
1770 /* if you care about this being correct, lock the buffer */ 2016 /* if you care about this being correct, lock the buffer */
1771 for_each_buffer_cpu(buffer, cpu) { 2017 for_each_buffer_cpu(buffer, cpu) {
1772 cpu_buffer = buffer->buffers[cpu]; 2018 cpu_buffer = buffer->buffers[cpu];
1773 entries += cpu_buffer->entries; 2019 entries += (local_read(&cpu_buffer->entries) -
2020 cpu_buffer->overrun) - cpu_buffer->read;
1774 } 2021 }
1775 2022
1776 return entries; 2023 return entries;
@@ -1862,7 +2109,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1862{ 2109{
1863 u64 delta; 2110 u64 delta;
1864 2111
1865 switch (event->type) { 2112 switch (event->type_len) {
1866 case RINGBUF_TYPE_PADDING: 2113 case RINGBUF_TYPE_PADDING:
1867 return; 2114 return;
1868 2115
@@ -1893,7 +2140,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1893{ 2140{
1894 u64 delta; 2141 u64 delta;
1895 2142
1896 switch (event->type) { 2143 switch (event->type_len) {
1897 case RINGBUF_TYPE_PADDING: 2144 case RINGBUF_TYPE_PADDING:
1898 return; 2145 return;
1899 2146
@@ -1966,6 +2213,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1966 cpu_buffer->reader_page->list.prev = reader->list.prev; 2213 cpu_buffer->reader_page->list.prev = reader->list.prev;
1967 2214
1968 local_set(&cpu_buffer->reader_page->write, 0); 2215 local_set(&cpu_buffer->reader_page->write, 0);
2216 local_set(&cpu_buffer->reader_page->entries, 0);
1969 local_set(&cpu_buffer->reader_page->page->commit, 0); 2217 local_set(&cpu_buffer->reader_page->page->commit, 0);
1970 2218
1971 /* Make the reader page now replace the head */ 2219 /* Make the reader page now replace the head */
@@ -2008,8 +2256,9 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
2008 2256
2009 event = rb_reader_event(cpu_buffer); 2257 event = rb_reader_event(cpu_buffer);
2010 2258
2011 if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) 2259 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
2012 cpu_buffer->entries--; 2260 || rb_discarded_event(event))
2261 cpu_buffer->read++;
2013 2262
2014 rb_update_read_stamp(cpu_buffer, event); 2263 rb_update_read_stamp(cpu_buffer, event);
2015 2264
@@ -2031,8 +2280,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
2031 * Check if we are at the end of the buffer. 2280 * Check if we are at the end of the buffer.
2032 */ 2281 */
2033 if (iter->head >= rb_page_size(iter->head_page)) { 2282 if (iter->head >= rb_page_size(iter->head_page)) {
2034 if (RB_WARN_ON(buffer, 2283 /* discarded commits can make the page empty */
2035 iter->head_page == cpu_buffer->commit_page)) 2284 if (iter->head_page == cpu_buffer->commit_page)
2036 return; 2285 return;
2037 rb_inc_iter(iter); 2286 rb_inc_iter(iter);
2038 return; 2287 return;
@@ -2075,12 +2324,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2075 /* 2324 /*
2076 * We repeat when a timestamp is encountered. It is possible 2325 * We repeat when a timestamp is encountered. It is possible
2077 * to get multiple timestamps from an interrupt entering just 2326 * to get multiple timestamps from an interrupt entering just
2078 * as one timestamp is about to be written. The max times 2327 * as one timestamp is about to be written, or from discarded
2079 * that this can happen is the number of nested interrupts we 2328 * commits. The most that we can have is the number on a single page.
2080 * can have. Nesting 10 deep of interrupts is clearly
2081 * an anomaly.
2082 */ 2329 */
2083 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) 2330 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
2084 return NULL; 2331 return NULL;
2085 2332
2086 reader = rb_get_reader_page(cpu_buffer); 2333 reader = rb_get_reader_page(cpu_buffer);
@@ -2089,7 +2336,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2089 2336
2090 event = rb_reader_event(cpu_buffer); 2337 event = rb_reader_event(cpu_buffer);
2091 2338
2092 switch (event->type) { 2339 switch (event->type_len) {
2093 case RINGBUF_TYPE_PADDING: 2340 case RINGBUF_TYPE_PADDING:
2094 if (rb_null_event(event)) 2341 if (rb_null_event(event))
2095 RB_WARN_ON(cpu_buffer, 1); 2342 RB_WARN_ON(cpu_buffer, 1);
@@ -2146,14 +2393,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2146 2393
2147 again: 2394 again:
2148 /* 2395 /*
2149 * We repeat when a timestamp is encountered. It is possible 2396 * We repeat when a timestamp is encountered.
2150 * to get multiple timestamps from an interrupt entering just 2397 * We can get multiple timestamps by nested interrupts or also
2151 * as one timestamp is about to be written. The max times 2398 * if filtering is on (discarding commits). Since discarding
2152 * that this can happen is the number of nested interrupts we 2399 * commits can be frequent we can get a lot of timestamps.
2153 * can have. Nesting 10 deep of interrupts is clearly 2400 * But we limit them by not adding timestamps if they begin
2154 * an anomaly. 2401 * at the start of a page.
2155 */ 2402 */
2156 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) 2403 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
2157 return NULL; 2404 return NULL;
2158 2405
2159 if (rb_per_cpu_empty(cpu_buffer)) 2406 if (rb_per_cpu_empty(cpu_buffer))
@@ -2161,7 +2408,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2161 2408
2162 event = rb_iter_head_event(iter); 2409 event = rb_iter_head_event(iter);
2163 2410
2164 switch (event->type) { 2411 switch (event->type_len) {
2165 case RINGBUF_TYPE_PADDING: 2412 case RINGBUF_TYPE_PADDING:
2166 if (rb_null_event(event)) { 2413 if (rb_null_event(event)) {
2167 rb_inc_iter(iter); 2414 rb_inc_iter(iter);
@@ -2220,7 +2467,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2220 event = rb_buffer_peek(buffer, cpu, ts); 2467 event = rb_buffer_peek(buffer, cpu, ts);
2221 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2468 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2222 2469
2223 if (event && event->type == RINGBUF_TYPE_PADDING) { 2470 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2224 cpu_relax(); 2471 cpu_relax();
2225 goto again; 2472 goto again;
2226 } 2473 }
@@ -2248,7 +2495,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2248 event = rb_iter_peek(iter, ts); 2495 event = rb_iter_peek(iter, ts);
2249 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2496 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2250 2497
2251 if (event && event->type == RINGBUF_TYPE_PADDING) { 2498 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2252 cpu_relax(); 2499 cpu_relax();
2253 goto again; 2500 goto again;
2254 } 2501 }
@@ -2293,7 +2540,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2293 out: 2540 out:
2294 preempt_enable(); 2541 preempt_enable();
2295 2542
2296 if (event && event->type == RINGBUF_TYPE_PADDING) { 2543 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2297 cpu_relax(); 2544 cpu_relax();
2298 goto again; 2545 goto again;
2299 } 2546 }
@@ -2386,7 +2633,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2386 out: 2633 out:
2387 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2634 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2388 2635
2389 if (event && event->type == RINGBUF_TYPE_PADDING) { 2636 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2390 cpu_relax(); 2637 cpu_relax();
2391 goto again; 2638 goto again;
2392 } 2639 }
@@ -2411,6 +2658,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2411 cpu_buffer->head_page 2658 cpu_buffer->head_page
2412 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 2659 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
2413 local_set(&cpu_buffer->head_page->write, 0); 2660 local_set(&cpu_buffer->head_page->write, 0);
2661 local_set(&cpu_buffer->head_page->entries, 0);
2414 local_set(&cpu_buffer->head_page->page->commit, 0); 2662 local_set(&cpu_buffer->head_page->page->commit, 0);
2415 2663
2416 cpu_buffer->head_page->read = 0; 2664 cpu_buffer->head_page->read = 0;
@@ -2420,11 +2668,15 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2420 2668
2421 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 2669 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
2422 local_set(&cpu_buffer->reader_page->write, 0); 2670 local_set(&cpu_buffer->reader_page->write, 0);
2671 local_set(&cpu_buffer->reader_page->entries, 0);
2423 local_set(&cpu_buffer->reader_page->page->commit, 0); 2672 local_set(&cpu_buffer->reader_page->page->commit, 0);
2424 cpu_buffer->reader_page->read = 0; 2673 cpu_buffer->reader_page->read = 0;
2425 2674
2675 cpu_buffer->nmi_dropped = 0;
2676 cpu_buffer->commit_overrun = 0;
2426 cpu_buffer->overrun = 0; 2677 cpu_buffer->overrun = 0;
2427 cpu_buffer->entries = 0; 2678 cpu_buffer->read = 0;
2679 local_set(&cpu_buffer->entries, 0);
2428 2680
2429 cpu_buffer->write_stamp = 0; 2681 cpu_buffer->write_stamp = 0;
2430 cpu_buffer->read_stamp = 0; 2682 cpu_buffer->read_stamp = 0;
@@ -2443,6 +2695,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2443 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2695 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2444 return; 2696 return;
2445 2697
2698 atomic_inc(&cpu_buffer->record_disabled);
2699
2446 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2700 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2447 2701
2448 __raw_spin_lock(&cpu_buffer->lock); 2702 __raw_spin_lock(&cpu_buffer->lock);
@@ -2452,6 +2706,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2452 __raw_spin_unlock(&cpu_buffer->lock); 2706 __raw_spin_unlock(&cpu_buffer->lock);
2453 2707
2454 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2708 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2709
2710 atomic_dec(&cpu_buffer->record_disabled);
2455} 2711}
2456EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); 2712EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
2457 2713
@@ -2578,28 +2834,6 @@ out:
2578} 2834}
2579EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 2835EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
2580 2836
2581static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2582 struct buffer_data_page *bpage,
2583 unsigned int offset)
2584{
2585 struct ring_buffer_event *event;
2586 unsigned long head;
2587
2588 __raw_spin_lock(&cpu_buffer->lock);
2589 for (head = offset; head < local_read(&bpage->commit);
2590 head += rb_event_length(event)) {
2591
2592 event = __rb_data_page_index(bpage, head);
2593 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
2594 return;
2595 /* Only count data entries */
2596 if (event->type != RINGBUF_TYPE_DATA)
2597 continue;
2598 cpu_buffer->entries--;
2599 }
2600 __raw_spin_unlock(&cpu_buffer->lock);
2601}
2602
2603/** 2837/**
2604 * ring_buffer_alloc_read_page - allocate a page to read from buffer 2838 * ring_buffer_alloc_read_page - allocate a page to read from buffer
2605 * @buffer: the buffer to allocate for. 2839 * @buffer: the buffer to allocate for.
@@ -2630,6 +2864,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2630 2864
2631 return bpage; 2865 return bpage;
2632} 2866}
2867EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
2633 2868
2634/** 2869/**
2635 * ring_buffer_free_read_page - free an allocated read page 2870 * ring_buffer_free_read_page - free an allocated read page
@@ -2642,6 +2877,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2642{ 2877{
2643 free_page((unsigned long)data); 2878 free_page((unsigned long)data);
2644} 2879}
2880EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
2645 2881
2646/** 2882/**
2647 * ring_buffer_read_page - extract a page from the ring buffer 2883 * ring_buffer_read_page - extract a page from the ring buffer
@@ -2768,16 +3004,17 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2768 /* we copied everything to the beginning */ 3004 /* we copied everything to the beginning */
2769 read = 0; 3005 read = 0;
2770 } else { 3006 } else {
3007 /* update the entry counter */
3008 cpu_buffer->read += local_read(&reader->entries);
3009
2771 /* swap the pages */ 3010 /* swap the pages */
2772 rb_init_page(bpage); 3011 rb_init_page(bpage);
2773 bpage = reader->page; 3012 bpage = reader->page;
2774 reader->page = *data_page; 3013 reader->page = *data_page;
2775 local_set(&reader->write, 0); 3014 local_set(&reader->write, 0);
3015 local_set(&reader->entries, 0);
2776 reader->read = 0; 3016 reader->read = 0;
2777 *data_page = bpage; 3017 *data_page = bpage;
2778
2779 /* update the entry counter */
2780 rb_remove_entries(cpu_buffer, bpage, read);
2781 } 3018 }
2782 ret = read; 3019 ret = read;
2783 3020
@@ -2787,6 +3024,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2787 out: 3024 out:
2788 return ret; 3025 return ret;
2789} 3026}
3027EXPORT_SYMBOL_GPL(ring_buffer_read_page);
2790 3028
2791static ssize_t 3029static ssize_t
2792rb_simple_read(struct file *filp, char __user *ubuf, 3030rb_simple_read(struct file *filp, char __user *ubuf,
@@ -2845,14 +3083,11 @@ static const struct file_operations rb_simple_fops = {
2845static __init int rb_init_debugfs(void) 3083static __init int rb_init_debugfs(void)
2846{ 3084{
2847 struct dentry *d_tracer; 3085 struct dentry *d_tracer;
2848 struct dentry *entry;
2849 3086
2850 d_tracer = tracing_init_dentry(); 3087 d_tracer = tracing_init_dentry();
2851 3088
2852 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 3089 trace_create_file("tracing_on", 0644, d_tracer,
2853 &ring_buffer_flags, &rb_simple_fops); 3090 &ring_buffer_flags, &rb_simple_fops);
2854 if (!entry)
2855 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2856 3091
2857 return 0; 3092 return 0;
2858} 3093}
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
new file mode 100644
index 000000000000..8d68e149a8b3
--- /dev/null
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -0,0 +1,416 @@
1/*
2 * ring buffer tester and benchmark
3 *
4 * Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/ring_buffer.h>
7#include <linux/completion.h>
8#include <linux/kthread.h>
9#include <linux/module.h>
10#include <linux/time.h>
11
12struct rb_page {
13 u64 ts;
14 local_t commit;
15 char data[4080];
16};
17
18/* run time and sleep time in seconds */
19#define RUN_TIME 10
20#define SLEEP_TIME 10
21
22/* number of events for writer to wake up the reader */
23static int wakeup_interval = 100;
24
25static int reader_finish;
26static struct completion read_start;
27static struct completion read_done;
28
29static struct ring_buffer *buffer;
30static struct task_struct *producer;
31static struct task_struct *consumer;
32static unsigned long read;
33
34static int disable_reader;
35module_param(disable_reader, uint, 0644);
36MODULE_PARM_DESC(disable_reader, "only run producer");
37
38static int read_events;
39
40static int kill_test;
41
42#define KILL_TEST() \
43 do { \
44 if (!kill_test) { \
45 kill_test = 1; \
46 WARN_ON(1); \
47 } \
48 } while (0)
49
50enum event_status {
51 EVENT_FOUND,
52 EVENT_DROPPED,
53};
54
55static enum event_status read_event(int cpu)
56{
57 struct ring_buffer_event *event;
58 int *entry;
59 u64 ts;
60
61 event = ring_buffer_consume(buffer, cpu, &ts);
62 if (!event)
63 return EVENT_DROPPED;
64
65 entry = ring_buffer_event_data(event);
66 if (*entry != cpu) {
67 KILL_TEST();
68 return EVENT_DROPPED;
69 }
70
71 read++;
72 return EVENT_FOUND;
73}
74
75static enum event_status read_page(int cpu)
76{
77 struct ring_buffer_event *event;
78 struct rb_page *rpage;
79 unsigned long commit;
80 void *bpage;
81 int *entry;
82 int ret;
83 int inc;
84 int i;
85
86 bpage = ring_buffer_alloc_read_page(buffer);
87 if (!bpage)
88 return EVENT_DROPPED;
89
90 ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
91 if (ret >= 0) {
92 rpage = bpage;
93 commit = local_read(&rpage->commit);
94 for (i = 0; i < commit && !kill_test; i += inc) {
95
96 if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
97 KILL_TEST();
98 break;
99 }
100
101 inc = -1;
102 event = (void *)&rpage->data[i];
103 switch (event->type_len) {
104 case RINGBUF_TYPE_PADDING:
105 /* We don't expect any padding */
106 KILL_TEST();
107 break;
108 case RINGBUF_TYPE_TIME_EXTEND:
109 inc = 8;
110 break;
111 case 0:
112 entry = ring_buffer_event_data(event);
113 if (*entry != cpu) {
114 KILL_TEST();
115 break;
116 }
117 read++;
118 if (!event->array[0]) {
119 KILL_TEST();
120 break;
121 }
122 inc = event->array[0];
123 break;
124 default:
125 entry = ring_buffer_event_data(event);
126 if (*entry != cpu) {
127 KILL_TEST();
128 break;
129 }
130 read++;
131 inc = ((event->type_len + 1) * 4);
132 }
133 if (kill_test)
134 break;
135
136 if (inc <= 0) {
137 KILL_TEST();
138 break;
139 }
140 }
141 }
142 ring_buffer_free_read_page(buffer, bpage);
143
144 if (ret < 0)
145 return EVENT_DROPPED;
146 return EVENT_FOUND;
147}
148
149static void ring_buffer_consumer(void)
150{
151 /* toggle between reading pages and events */
152 read_events ^= 1;
153
154 read = 0;
155 while (!reader_finish && !kill_test) {
156 int found;
157
158 do {
159 int cpu;
160
161 found = 0;
162 for_each_online_cpu(cpu) {
163 enum event_status stat;
164
165 if (read_events)
166 stat = read_event(cpu);
167 else
168 stat = read_page(cpu);
169
170 if (kill_test)
171 break;
172 if (stat == EVENT_FOUND)
173 found = 1;
174 }
175 } while (found && !kill_test);
176
177 set_current_state(TASK_INTERRUPTIBLE);
178 if (reader_finish)
179 break;
180
181 schedule();
182 __set_current_state(TASK_RUNNING);
183 }
184 reader_finish = 0;
185 complete(&read_done);
186}
187
188static void ring_buffer_producer(void)
189{
190 struct timeval start_tv;
191 struct timeval end_tv;
192 unsigned long long time;
193 unsigned long long entries;
194 unsigned long long overruns;
195 unsigned long missed = 0;
196 unsigned long hit = 0;
197 unsigned long avg;
198 int cnt = 0;
199
200 /*
201 * Hammer the buffer for 10 secs (this may
202 * make the system stall)
203 */
204 pr_info("Starting ring buffer hammer\n");
205 do_gettimeofday(&start_tv);
206 do {
207 struct ring_buffer_event *event;
208 int *entry;
209
210 event = ring_buffer_lock_reserve(buffer, 10);
211 if (!event) {
212 missed++;
213 } else {
214 hit++;
215 entry = ring_buffer_event_data(event);
216 *entry = smp_processor_id();
217 ring_buffer_unlock_commit(buffer, event);
218 }
219 do_gettimeofday(&end_tv);
220
221 cnt++;
222 if (consumer && !(cnt % wakeup_interval))
223 wake_up_process(consumer);
224
225#ifndef CONFIG_PREEMPT
226 /*
227 * If we are a non preempt kernel, the 10 second run will
228 * stop everything while it runs. Instead, we will call
229 * cond_resched and also add any time that was lost by a
230 * rescedule.
231 *
232 * Do a cond resched at the same frequency we would wake up
233 * the reader.
234 */
235 if (cnt % wakeup_interval)
236 cond_resched();
237#endif
238
239 } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
240 pr_info("End ring buffer hammer\n");
241
242 if (consumer) {
243 /* Init both completions here to avoid races */
244 init_completion(&read_start);
245 init_completion(&read_done);
246 /* the completions must be visible before the finish var */
247 smp_wmb();
248 reader_finish = 1;
249 /* finish var visible before waking up the consumer */
250 smp_wmb();
251 wake_up_process(consumer);
252 wait_for_completion(&read_done);
253 }
254
255 time = end_tv.tv_sec - start_tv.tv_sec;
256 time *= USEC_PER_SEC;
257 time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
258
259 entries = ring_buffer_entries(buffer);
260 overruns = ring_buffer_overruns(buffer);
261
262 if (kill_test)
263 pr_info("ERROR!\n");
264 pr_info("Time: %lld (usecs)\n", time);
265 pr_info("Overruns: %lld\n", overruns);
266 if (disable_reader)
267 pr_info("Read: (reader disabled)\n");
268 else
269 pr_info("Read: %ld (by %s)\n", read,
270 read_events ? "events" : "pages");
271 pr_info("Entries: %lld\n", entries);
272 pr_info("Total: %lld\n", entries + overruns + read);
273 pr_info("Missed: %ld\n", missed);
274 pr_info("Hit: %ld\n", hit);
275
276 /* Convert time from usecs to millisecs */
277 do_div(time, USEC_PER_MSEC);
278 if (time)
279 hit /= (long)time;
280 else
281 pr_info("TIME IS ZERO??\n");
282
283 pr_info("Entries per millisec: %ld\n", hit);
284
285 if (hit) {
286 /* Calculate the average time in nanosecs */
287 avg = NSEC_PER_MSEC / hit;
288 pr_info("%ld ns per entry\n", avg);
289 }
290
291 if (missed) {
292 if (time)
293 missed /= (long)time;
294
295 pr_info("Total iterations per millisec: %ld\n", hit + missed);
296
297 /* it is possible that hit + missed will overflow and be zero */
298 if (!(hit + missed)) {
299 pr_info("hit + missed overflowed and totalled zero!\n");
300 hit--; /* make it non zero */
301 }
302
303 /* Caculate the average time in nanosecs */
304 avg = NSEC_PER_MSEC / (hit + missed);
305 pr_info("%ld ns per entry\n", avg);
306 }
307}
308
309static void wait_to_die(void)
310{
311 set_current_state(TASK_INTERRUPTIBLE);
312 while (!kthread_should_stop()) {
313 schedule();
314 set_current_state(TASK_INTERRUPTIBLE);
315 }
316 __set_current_state(TASK_RUNNING);
317}
318
319static int ring_buffer_consumer_thread(void *arg)
320{
321 while (!kthread_should_stop() && !kill_test) {
322 complete(&read_start);
323
324 ring_buffer_consumer();
325
326 set_current_state(TASK_INTERRUPTIBLE);
327 if (kthread_should_stop() || kill_test)
328 break;
329
330 schedule();
331 __set_current_state(TASK_RUNNING);
332 }
333 __set_current_state(TASK_RUNNING);
334
335 if (kill_test)
336 wait_to_die();
337
338 return 0;
339}
340
341static int ring_buffer_producer_thread(void *arg)
342{
343 init_completion(&read_start);
344
345 while (!kthread_should_stop() && !kill_test) {
346 ring_buffer_reset(buffer);
347
348 if (consumer) {
349 smp_wmb();
350 wake_up_process(consumer);
351 wait_for_completion(&read_start);
352 }
353
354 ring_buffer_producer();
355
356 pr_info("Sleeping for 10 secs\n");
357 set_current_state(TASK_INTERRUPTIBLE);
358 schedule_timeout(HZ * SLEEP_TIME);
359 __set_current_state(TASK_RUNNING);
360 }
361
362 if (kill_test)
363 wait_to_die();
364
365 return 0;
366}
367
368static int __init ring_buffer_benchmark_init(void)
369{
370 int ret;
371
372 /* make a one meg buffer in overwite mode */
373 buffer = ring_buffer_alloc(1000000, RB_FL_OVERWRITE);
374 if (!buffer)
375 return -ENOMEM;
376
377 if (!disable_reader) {
378 consumer = kthread_create(ring_buffer_consumer_thread,
379 NULL, "rb_consumer");
380 ret = PTR_ERR(consumer);
381 if (IS_ERR(consumer))
382 goto out_fail;
383 }
384
385 producer = kthread_run(ring_buffer_producer_thread,
386 NULL, "rb_producer");
387 ret = PTR_ERR(producer);
388
389 if (IS_ERR(producer))
390 goto out_kill;
391
392 return 0;
393
394 out_kill:
395 if (consumer)
396 kthread_stop(consumer);
397
398 out_fail:
399 ring_buffer_free(buffer);
400 return ret;
401}
402
403static void __exit ring_buffer_benchmark_exit(void)
404{
405 kthread_stop(producer);
406 if (consumer)
407 kthread_stop(consumer);
408 ring_buffer_free(buffer);
409}
410
411module_init(ring_buffer_benchmark_init);
412module_exit(ring_buffer_benchmark_exit);
413
414MODULE_AUTHOR("Steven Rostedt");
415MODULE_DESCRIPTION("ring_buffer_benchmark");
416MODULE_LICENSE("GPL");
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index cda81ec58d9f..8acd9b81a5d7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -171,6 +171,13 @@ static struct trace_array global_trace;
171 171
172static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); 172static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
173 173
174int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
175 struct ring_buffer_event *event)
176{
177 return filter_check_discard(call, rec, global_trace.buffer, event);
178}
179EXPORT_SYMBOL_GPL(filter_current_check_discard);
180
174cycle_t ftrace_now(int cpu) 181cycle_t ftrace_now(int cpu)
175{ 182{
176 u64 ts; 183 u64 ts;
@@ -255,7 +262,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
255 262
256/* trace_flags holds trace_options default values */ 263/* trace_flags holds trace_options default values */
257unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 264unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
258 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; 265 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
266 TRACE_ITER_GRAPH_TIME;
259 267
260/** 268/**
261 * trace_wake_up - wake up tasks waiting for trace input 269 * trace_wake_up - wake up tasks waiting for trace input
@@ -317,6 +325,7 @@ static const char *trace_options[] = {
317 "latency-format", 325 "latency-format",
318 "global-clock", 326 "global-clock",
319 "sleep-time", 327 "sleep-time",
328 "graph-time",
320 NULL 329 NULL
321}; 330};
322 331
@@ -402,17 +411,6 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
402 return cnt; 411 return cnt;
403} 412}
404 413
405static void
406trace_print_seq(struct seq_file *m, struct trace_seq *s)
407{
408 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
409
410 s->buffer[len] = 0;
411 seq_puts(m, s->buffer);
412
413 trace_seq_init(s);
414}
415
416/** 414/**
417 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 415 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
418 * @tr: tracer 416 * @tr: tracer
@@ -641,6 +639,16 @@ void tracing_reset_online_cpus(struct trace_array *tr)
641 tracing_reset(tr, cpu); 639 tracing_reset(tr, cpu);
642} 640}
643 641
642void tracing_reset_current(int cpu)
643{
644 tracing_reset(&global_trace, cpu);
645}
646
647void tracing_reset_current_online_cpus(void)
648{
649 tracing_reset_online_cpus(&global_trace);
650}
651
644#define SAVED_CMDLINES 128 652#define SAVED_CMDLINES 128
645#define NO_CMDLINE_MAP UINT_MAX 653#define NO_CMDLINE_MAP UINT_MAX
646static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; 654static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
@@ -800,6 +808,7 @@ void trace_find_cmdline(int pid, char comm[])
800 return; 808 return;
801 } 809 }
802 810
811 preempt_disable();
803 __raw_spin_lock(&trace_cmdline_lock); 812 __raw_spin_lock(&trace_cmdline_lock);
804 map = map_pid_to_cmdline[pid]; 813 map = map_pid_to_cmdline[pid];
805 if (map != NO_CMDLINE_MAP) 814 if (map != NO_CMDLINE_MAP)
@@ -808,6 +817,7 @@ void trace_find_cmdline(int pid, char comm[])
808 strcpy(comm, "<...>"); 817 strcpy(comm, "<...>");
809 818
810 __raw_spin_unlock(&trace_cmdline_lock); 819 __raw_spin_unlock(&trace_cmdline_lock);
820 preempt_enable();
811} 821}
812 822
813void tracing_record_cmdline(struct task_struct *tsk) 823void tracing_record_cmdline(struct task_struct *tsk)
@@ -840,7 +850,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
840} 850}
841 851
842struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 852struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
843 unsigned char type, 853 int type,
844 unsigned long len, 854 unsigned long len,
845 unsigned long flags, int pc) 855 unsigned long flags, int pc)
846{ 856{
@@ -883,30 +893,40 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
883} 893}
884 894
885struct ring_buffer_event * 895struct ring_buffer_event *
886trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, 896trace_current_buffer_lock_reserve(int type, unsigned long len,
887 unsigned long flags, int pc) 897 unsigned long flags, int pc)
888{ 898{
889 return trace_buffer_lock_reserve(&global_trace, 899 return trace_buffer_lock_reserve(&global_trace,
890 type, len, flags, pc); 900 type, len, flags, pc);
891} 901}
902EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
892 903
893void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 904void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
894 unsigned long flags, int pc) 905 unsigned long flags, int pc)
895{ 906{
896 return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); 907 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
897} 908}
909EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
898 910
899void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, 911void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
900 unsigned long flags, int pc) 912 unsigned long flags, int pc)
901{ 913{
902 return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); 914 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
915}
916EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
917
918void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
919{
920 ring_buffer_discard_commit(global_trace.buffer, event);
903} 921}
922EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
904 923
905void 924void
906trace_function(struct trace_array *tr, 925trace_function(struct trace_array *tr,
907 unsigned long ip, unsigned long parent_ip, unsigned long flags, 926 unsigned long ip, unsigned long parent_ip, unsigned long flags,
908 int pc) 927 int pc)
909{ 928{
929 struct ftrace_event_call *call = &event_function;
910 struct ring_buffer_event *event; 930 struct ring_buffer_event *event;
911 struct ftrace_entry *entry; 931 struct ftrace_entry *entry;
912 932
@@ -921,7 +941,9 @@ trace_function(struct trace_array *tr,
921 entry = ring_buffer_event_data(event); 941 entry = ring_buffer_event_data(event);
922 entry->ip = ip; 942 entry->ip = ip;
923 entry->parent_ip = parent_ip; 943 entry->parent_ip = parent_ip;
924 ring_buffer_unlock_commit(tr->buffer, event); 944
945 if (!filter_check_discard(call, entry, tr->buffer, event))
946 ring_buffer_unlock_commit(tr->buffer, event);
925} 947}
926 948
927#ifdef CONFIG_FUNCTION_GRAPH_TRACER 949#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -930,6 +952,7 @@ static int __trace_graph_entry(struct trace_array *tr,
930 unsigned long flags, 952 unsigned long flags,
931 int pc) 953 int pc)
932{ 954{
955 struct ftrace_event_call *call = &event_funcgraph_entry;
933 struct ring_buffer_event *event; 956 struct ring_buffer_event *event;
934 struct ftrace_graph_ent_entry *entry; 957 struct ftrace_graph_ent_entry *entry;
935 958
@@ -942,7 +965,8 @@ static int __trace_graph_entry(struct trace_array *tr,
942 return 0; 965 return 0;
943 entry = ring_buffer_event_data(event); 966 entry = ring_buffer_event_data(event);
944 entry->graph_ent = *trace; 967 entry->graph_ent = *trace;
945 ring_buffer_unlock_commit(global_trace.buffer, event); 968 if (!filter_current_check_discard(call, entry, event))
969 ring_buffer_unlock_commit(global_trace.buffer, event);
946 970
947 return 1; 971 return 1;
948} 972}
@@ -952,6 +976,7 @@ static void __trace_graph_return(struct trace_array *tr,
952 unsigned long flags, 976 unsigned long flags,
953 int pc) 977 int pc)
954{ 978{
979 struct ftrace_event_call *call = &event_funcgraph_exit;
955 struct ring_buffer_event *event; 980 struct ring_buffer_event *event;
956 struct ftrace_graph_ret_entry *entry; 981 struct ftrace_graph_ret_entry *entry;
957 982
@@ -964,7 +989,8 @@ static void __trace_graph_return(struct trace_array *tr,
964 return; 989 return;
965 entry = ring_buffer_event_data(event); 990 entry = ring_buffer_event_data(event);
966 entry->ret = *trace; 991 entry->ret = *trace;
967 ring_buffer_unlock_commit(global_trace.buffer, event); 992 if (!filter_current_check_discard(call, entry, event))
993 ring_buffer_unlock_commit(global_trace.buffer, event);
968} 994}
969#endif 995#endif
970 996
@@ -982,6 +1008,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
982 int skip, int pc) 1008 int skip, int pc)
983{ 1009{
984#ifdef CONFIG_STACKTRACE 1010#ifdef CONFIG_STACKTRACE
1011 struct ftrace_event_call *call = &event_kernel_stack;
985 struct ring_buffer_event *event; 1012 struct ring_buffer_event *event;
986 struct stack_entry *entry; 1013 struct stack_entry *entry;
987 struct stack_trace trace; 1014 struct stack_trace trace;
@@ -999,7 +1026,8 @@ static void __ftrace_trace_stack(struct trace_array *tr,
999 trace.entries = entry->caller; 1026 trace.entries = entry->caller;
1000 1027
1001 save_stack_trace(&trace); 1028 save_stack_trace(&trace);
1002 ring_buffer_unlock_commit(tr->buffer, event); 1029 if (!filter_check_discard(call, entry, tr->buffer, event))
1030 ring_buffer_unlock_commit(tr->buffer, event);
1003#endif 1031#endif
1004} 1032}
1005 1033
@@ -1024,6 +1052,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1024 unsigned long flags, int pc) 1052 unsigned long flags, int pc)
1025{ 1053{
1026#ifdef CONFIG_STACKTRACE 1054#ifdef CONFIG_STACKTRACE
1055 struct ftrace_event_call *call = &event_user_stack;
1027 struct ring_buffer_event *event; 1056 struct ring_buffer_event *event;
1028 struct userstack_entry *entry; 1057 struct userstack_entry *entry;
1029 struct stack_trace trace; 1058 struct stack_trace trace;
@@ -1045,7 +1074,8 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1045 trace.entries = entry->caller; 1074 trace.entries = entry->caller;
1046 1075
1047 save_stack_trace_user(&trace); 1076 save_stack_trace_user(&trace);
1048 ring_buffer_unlock_commit(tr->buffer, event); 1077 if (!filter_check_discard(call, entry, tr->buffer, event))
1078 ring_buffer_unlock_commit(tr->buffer, event);
1049#endif 1079#endif
1050} 1080}
1051 1081
@@ -1089,6 +1119,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
1089 struct task_struct *next, 1119 struct task_struct *next,
1090 unsigned long flags, int pc) 1120 unsigned long flags, int pc)
1091{ 1121{
1122 struct ftrace_event_call *call = &event_context_switch;
1092 struct ring_buffer_event *event; 1123 struct ring_buffer_event *event;
1093 struct ctx_switch_entry *entry; 1124 struct ctx_switch_entry *entry;
1094 1125
@@ -1104,7 +1135,9 @@ tracing_sched_switch_trace(struct trace_array *tr,
1104 entry->next_prio = next->prio; 1135 entry->next_prio = next->prio;
1105 entry->next_state = next->state; 1136 entry->next_state = next->state;
1106 entry->next_cpu = task_cpu(next); 1137 entry->next_cpu = task_cpu(next);
1107 trace_buffer_unlock_commit(tr, event, flags, pc); 1138
1139 if (!filter_check_discard(call, entry, tr->buffer, event))
1140 trace_buffer_unlock_commit(tr, event, flags, pc);
1108} 1141}
1109 1142
1110void 1143void
@@ -1113,6 +1146,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
1113 struct task_struct *curr, 1146 struct task_struct *curr,
1114 unsigned long flags, int pc) 1147 unsigned long flags, int pc)
1115{ 1148{
1149 struct ftrace_event_call *call = &event_wakeup;
1116 struct ring_buffer_event *event; 1150 struct ring_buffer_event *event;
1117 struct ctx_switch_entry *entry; 1151 struct ctx_switch_entry *entry;
1118 1152
@@ -1129,7 +1163,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
1129 entry->next_state = wakee->state; 1163 entry->next_state = wakee->state;
1130 entry->next_cpu = task_cpu(wakee); 1164 entry->next_cpu = task_cpu(wakee);
1131 1165
1132 ring_buffer_unlock_commit(tr->buffer, event); 1166 if (!filter_check_discard(call, entry, tr->buffer, event))
1167 ring_buffer_unlock_commit(tr->buffer, event);
1133 ftrace_trace_stack(tr, flags, 6, pc); 1168 ftrace_trace_stack(tr, flags, 6, pc);
1134 ftrace_trace_userstack(tr, flags, pc); 1169 ftrace_trace_userstack(tr, flags, pc);
1135} 1170}
@@ -1230,11 +1265,13 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1230 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1265 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
1231 static u32 trace_buf[TRACE_BUF_SIZE]; 1266 static u32 trace_buf[TRACE_BUF_SIZE];
1232 1267
1268 struct ftrace_event_call *call = &event_bprint;
1233 struct ring_buffer_event *event; 1269 struct ring_buffer_event *event;
1234 struct trace_array *tr = &global_trace; 1270 struct trace_array *tr = &global_trace;
1235 struct trace_array_cpu *data; 1271 struct trace_array_cpu *data;
1236 struct bprint_entry *entry; 1272 struct bprint_entry *entry;
1237 unsigned long flags; 1273 unsigned long flags;
1274 int disable;
1238 int resched; 1275 int resched;
1239 int cpu, len = 0, size, pc; 1276 int cpu, len = 0, size, pc;
1240 1277
@@ -1249,7 +1286,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1249 cpu = raw_smp_processor_id(); 1286 cpu = raw_smp_processor_id();
1250 data = tr->data[cpu]; 1287 data = tr->data[cpu];
1251 1288
1252 if (unlikely(atomic_read(&data->disabled))) 1289 disable = atomic_inc_return(&data->disabled);
1290 if (unlikely(disable != 1))
1253 goto out; 1291 goto out;
1254 1292
1255 /* Lockdep uses trace_printk for lock tracing */ 1293 /* Lockdep uses trace_printk for lock tracing */
@@ -1269,13 +1307,15 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1269 entry->fmt = fmt; 1307 entry->fmt = fmt;
1270 1308
1271 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1309 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1272 ring_buffer_unlock_commit(tr->buffer, event); 1310 if (!filter_check_discard(call, entry, tr->buffer, event))
1311 ring_buffer_unlock_commit(tr->buffer, event);
1273 1312
1274out_unlock: 1313out_unlock:
1275 __raw_spin_unlock(&trace_buf_lock); 1314 __raw_spin_unlock(&trace_buf_lock);
1276 local_irq_restore(flags); 1315 local_irq_restore(flags);
1277 1316
1278out: 1317out:
1318 atomic_dec_return(&data->disabled);
1279 ftrace_preempt_enable(resched); 1319 ftrace_preempt_enable(resched);
1280 unpause_graph_tracing(); 1320 unpause_graph_tracing();
1281 1321
@@ -1288,12 +1328,14 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1288 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1328 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
1289 static char trace_buf[TRACE_BUF_SIZE]; 1329 static char trace_buf[TRACE_BUF_SIZE];
1290 1330
1331 struct ftrace_event_call *call = &event_print;
1291 struct ring_buffer_event *event; 1332 struct ring_buffer_event *event;
1292 struct trace_array *tr = &global_trace; 1333 struct trace_array *tr = &global_trace;
1293 struct trace_array_cpu *data; 1334 struct trace_array_cpu *data;
1294 int cpu, len = 0, size, pc; 1335 int cpu, len = 0, size, pc;
1295 struct print_entry *entry; 1336 struct print_entry *entry;
1296 unsigned long irq_flags; 1337 unsigned long irq_flags;
1338 int disable;
1297 1339
1298 if (tracing_disabled || tracing_selftest_running) 1340 if (tracing_disabled || tracing_selftest_running)
1299 return 0; 1341 return 0;
@@ -1303,7 +1345,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1303 cpu = raw_smp_processor_id(); 1345 cpu = raw_smp_processor_id();
1304 data = tr->data[cpu]; 1346 data = tr->data[cpu];
1305 1347
1306 if (unlikely(atomic_read(&data->disabled))) 1348 disable = atomic_inc_return(&data->disabled);
1349 if (unlikely(disable != 1))
1307 goto out; 1350 goto out;
1308 1351
1309 pause_graph_tracing(); 1352 pause_graph_tracing();
@@ -1323,13 +1366,15 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1323 1366
1324 memcpy(&entry->buf, trace_buf, len); 1367 memcpy(&entry->buf, trace_buf, len);
1325 entry->buf[len] = 0; 1368 entry->buf[len] = 0;
1326 ring_buffer_unlock_commit(tr->buffer, event); 1369 if (!filter_check_discard(call, entry, tr->buffer, event))
1370 ring_buffer_unlock_commit(tr->buffer, event);
1327 1371
1328 out_unlock: 1372 out_unlock:
1329 __raw_spin_unlock(&trace_buf_lock); 1373 __raw_spin_unlock(&trace_buf_lock);
1330 raw_local_irq_restore(irq_flags); 1374 raw_local_irq_restore(irq_flags);
1331 unpause_graph_tracing(); 1375 unpause_graph_tracing();
1332 out: 1376 out:
1377 atomic_dec_return(&data->disabled);
1333 preempt_enable_notrace(); 1378 preempt_enable_notrace();
1334 1379
1335 return len; 1380 return len;
@@ -1526,12 +1571,14 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1526 p = s_next(m, p, &l); 1571 p = s_next(m, p, &l);
1527 } 1572 }
1528 1573
1574 trace_event_read_lock();
1529 return p; 1575 return p;
1530} 1576}
1531 1577
1532static void s_stop(struct seq_file *m, void *p) 1578static void s_stop(struct seq_file *m, void *p)
1533{ 1579{
1534 atomic_dec(&trace_record_cmdline_disabled); 1580 atomic_dec(&trace_record_cmdline_disabled);
1581 trace_event_read_unlock();
1535} 1582}
1536 1583
1537static void print_lat_help_header(struct seq_file *m) 1584static void print_lat_help_header(struct seq_file *m)
@@ -1774,6 +1821,7 @@ static int trace_empty(struct trace_iterator *iter)
1774 return 1; 1821 return 1;
1775} 1822}
1776 1823
1824/* Called with trace_event_read_lock() held. */
1777static enum print_line_t print_trace_line(struct trace_iterator *iter) 1825static enum print_line_t print_trace_line(struct trace_iterator *iter)
1778{ 1826{
1779 enum print_line_t ret; 1827 enum print_line_t ret;
@@ -2397,6 +2445,56 @@ static const struct file_operations tracing_readme_fops = {
2397}; 2445};
2398 2446
2399static ssize_t 2447static ssize_t
2448tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
2449 size_t cnt, loff_t *ppos)
2450{
2451 char *buf_comm;
2452 char *file_buf;
2453 char *buf;
2454 int len = 0;
2455 int pid;
2456 int i;
2457
2458 file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
2459 if (!file_buf)
2460 return -ENOMEM;
2461
2462 buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
2463 if (!buf_comm) {
2464 kfree(file_buf);
2465 return -ENOMEM;
2466 }
2467
2468 buf = file_buf;
2469
2470 for (i = 0; i < SAVED_CMDLINES; i++) {
2471 int r;
2472
2473 pid = map_cmdline_to_pid[i];
2474 if (pid == -1 || pid == NO_CMDLINE_MAP)
2475 continue;
2476
2477 trace_find_cmdline(pid, buf_comm);
2478 r = sprintf(buf, "%d %s\n", pid, buf_comm);
2479 buf += r;
2480 len += r;
2481 }
2482
2483 len = simple_read_from_buffer(ubuf, cnt, ppos,
2484 file_buf, len);
2485
2486 kfree(file_buf);
2487 kfree(buf_comm);
2488
2489 return len;
2490}
2491
2492static const struct file_operations tracing_saved_cmdlines_fops = {
2493 .open = tracing_open_generic,
2494 .read = tracing_saved_cmdlines_read,
2495};
2496
2497static ssize_t
2400tracing_ctrl_read(struct file *filp, char __user *ubuf, 2498tracing_ctrl_read(struct file *filp, char __user *ubuf,
2401 size_t cnt, loff_t *ppos) 2499 size_t cnt, loff_t *ppos)
2402{ 2500{
@@ -2728,6 +2826,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2728 /* trace pipe does not show start of buffer */ 2826 /* trace pipe does not show start of buffer */
2729 cpumask_setall(iter->started); 2827 cpumask_setall(iter->started);
2730 2828
2829 if (trace_flags & TRACE_ITER_LATENCY_FMT)
2830 iter->iter_flags |= TRACE_FILE_LAT_FMT;
2831
2731 iter->cpu_file = cpu_file; 2832 iter->cpu_file = cpu_file;
2732 iter->tr = &global_trace; 2833 iter->tr = &global_trace;
2733 mutex_init(&iter->mutex); 2834 mutex_init(&iter->mutex);
@@ -2915,6 +3016,7 @@ waitagain:
2915 offsetof(struct trace_iterator, seq)); 3016 offsetof(struct trace_iterator, seq));
2916 iter->pos = -1; 3017 iter->pos = -1;
2917 3018
3019 trace_event_read_lock();
2918 while (find_next_entry_inc(iter) != NULL) { 3020 while (find_next_entry_inc(iter) != NULL) {
2919 enum print_line_t ret; 3021 enum print_line_t ret;
2920 int len = iter->seq.len; 3022 int len = iter->seq.len;
@@ -2931,6 +3033,7 @@ waitagain:
2931 if (iter->seq.len >= cnt) 3033 if (iter->seq.len >= cnt)
2932 break; 3034 break;
2933 } 3035 }
3036 trace_event_read_unlock();
2934 3037
2935 /* Now copy what we have to the user */ 3038 /* Now copy what we have to the user */
2936 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 3039 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
@@ -3053,6 +3156,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3053 goto out_err; 3156 goto out_err;
3054 } 3157 }
3055 3158
3159 trace_event_read_lock();
3160
3056 /* Fill as many pages as possible. */ 3161 /* Fill as many pages as possible. */
3057 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3162 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
3058 pages[i] = alloc_page(GFP_KERNEL); 3163 pages[i] = alloc_page(GFP_KERNEL);
@@ -3075,6 +3180,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3075 trace_seq_init(&iter->seq); 3180 trace_seq_init(&iter->seq);
3076 } 3181 }
3077 3182
3183 trace_event_read_unlock();
3078 mutex_unlock(&iter->mutex); 3184 mutex_unlock(&iter->mutex);
3079 3185
3080 spd.nr_pages = i; 3186 spd.nr_pages = i;
@@ -3425,7 +3531,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3425 .spd_release = buffer_spd_release, 3531 .spd_release = buffer_spd_release,
3426 }; 3532 };
3427 struct buffer_ref *ref; 3533 struct buffer_ref *ref;
3428 int size, i; 3534 int entries, size, i;
3429 size_t ret; 3535 size_t ret;
3430 3536
3431 if (*ppos & (PAGE_SIZE - 1)) { 3537 if (*ppos & (PAGE_SIZE - 1)) {
@@ -3440,7 +3546,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3440 len &= PAGE_MASK; 3546 len &= PAGE_MASK;
3441 } 3547 }
3442 3548
3443 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) { 3549 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3550
3551 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
3444 struct page *page; 3552 struct page *page;
3445 int r; 3553 int r;
3446 3554
@@ -3457,7 +3565,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3457 } 3565 }
3458 3566
3459 r = ring_buffer_read_page(ref->buffer, &ref->page, 3567 r = ring_buffer_read_page(ref->buffer, &ref->page,
3460 len, info->cpu, 0); 3568 len, info->cpu, 1);
3461 if (r < 0) { 3569 if (r < 0) {
3462 ring_buffer_free_read_page(ref->buffer, 3570 ring_buffer_free_read_page(ref->buffer,
3463 ref->page); 3571 ref->page);
@@ -3481,6 +3589,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3481 spd.partial[i].private = (unsigned long)ref; 3589 spd.partial[i].private = (unsigned long)ref;
3482 spd.nr_pages++; 3590 spd.nr_pages++;
3483 *ppos += PAGE_SIZE; 3591 *ppos += PAGE_SIZE;
3592
3593 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3484 } 3594 }
3485 3595
3486 spd.nr_pages = i; 3596 spd.nr_pages = i;
@@ -3508,6 +3618,45 @@ static const struct file_operations tracing_buffers_fops = {
3508 .llseek = no_llseek, 3618 .llseek = no_llseek,
3509}; 3619};
3510 3620
3621static ssize_t
3622tracing_stats_read(struct file *filp, char __user *ubuf,
3623 size_t count, loff_t *ppos)
3624{
3625 unsigned long cpu = (unsigned long)filp->private_data;
3626 struct trace_array *tr = &global_trace;
3627 struct trace_seq *s;
3628 unsigned long cnt;
3629
3630 s = kmalloc(sizeof(*s), GFP_ATOMIC);
3631 if (!s)
3632 return ENOMEM;
3633
3634 trace_seq_init(s);
3635
3636 cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
3637 trace_seq_printf(s, "entries: %ld\n", cnt);
3638
3639 cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
3640 trace_seq_printf(s, "overrun: %ld\n", cnt);
3641
3642 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
3643 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
3644
3645 cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
3646 trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
3647
3648 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
3649
3650 kfree(s);
3651
3652 return count;
3653}
3654
3655static const struct file_operations tracing_stats_fops = {
3656 .open = tracing_open_generic,
3657 .read = tracing_stats_read,
3658};
3659
3511#ifdef CONFIG_DYNAMIC_FTRACE 3660#ifdef CONFIG_DYNAMIC_FTRACE
3512 3661
3513int __weak ftrace_arch_read_dyn_info(char *buf, int size) 3662int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3597,7 +3746,7 @@ struct dentry *tracing_dentry_percpu(void)
3597static void tracing_init_debugfs_percpu(long cpu) 3746static void tracing_init_debugfs_percpu(long cpu)
3598{ 3747{
3599 struct dentry *d_percpu = tracing_dentry_percpu(); 3748 struct dentry *d_percpu = tracing_dentry_percpu();
3600 struct dentry *entry, *d_cpu; 3749 struct dentry *d_cpu;
3601 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ 3750 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
3602 char cpu_dir[7]; 3751 char cpu_dir[7];
3603 3752
@@ -3612,21 +3761,18 @@ static void tracing_init_debugfs_percpu(long cpu)
3612 } 3761 }
3613 3762
3614 /* per cpu trace_pipe */ 3763 /* per cpu trace_pipe */
3615 entry = debugfs_create_file("trace_pipe", 0444, d_cpu, 3764 trace_create_file("trace_pipe", 0444, d_cpu,
3616 (void *) cpu, &tracing_pipe_fops); 3765 (void *) cpu, &tracing_pipe_fops);
3617 if (!entry)
3618 pr_warning("Could not create debugfs 'trace_pipe' entry\n");
3619 3766
3620 /* per cpu trace */ 3767 /* per cpu trace */
3621 entry = debugfs_create_file("trace", 0644, d_cpu, 3768 trace_create_file("trace", 0644, d_cpu,
3622 (void *) cpu, &tracing_fops); 3769 (void *) cpu, &tracing_fops);
3623 if (!entry) 3770
3624 pr_warning("Could not create debugfs 'trace' entry\n"); 3771 trace_create_file("trace_pipe_raw", 0444, d_cpu,
3772 (void *) cpu, &tracing_buffers_fops);
3625 3773
3626 entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu, 3774 trace_create_file("stats", 0444, d_cpu,
3627 (void *) cpu, &tracing_buffers_fops); 3775 (void *) cpu, &tracing_stats_fops);
3628 if (!entry)
3629 pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n");
3630} 3776}
3631 3777
3632#ifdef CONFIG_FTRACE_SELFTEST 3778#ifdef CONFIG_FTRACE_SELFTEST
@@ -3782,6 +3928,22 @@ static const struct file_operations trace_options_core_fops = {
3782 .write = trace_options_core_write, 3928 .write = trace_options_core_write,
3783}; 3929};
3784 3930
3931struct dentry *trace_create_file(const char *name,
3932 mode_t mode,
3933 struct dentry *parent,
3934 void *data,
3935 const struct file_operations *fops)
3936{
3937 struct dentry *ret;
3938
3939 ret = debugfs_create_file(name, mode, parent, data, fops);
3940 if (!ret)
3941 pr_warning("Could not create debugfs '%s' entry\n", name);
3942
3943 return ret;
3944}
3945
3946
3785static struct dentry *trace_options_init_dentry(void) 3947static struct dentry *trace_options_init_dentry(void)
3786{ 3948{
3787 struct dentry *d_tracer; 3949 struct dentry *d_tracer;
@@ -3809,7 +3971,6 @@ create_trace_option_file(struct trace_option_dentry *topt,
3809 struct tracer_opt *opt) 3971 struct tracer_opt *opt)
3810{ 3972{
3811 struct dentry *t_options; 3973 struct dentry *t_options;
3812 struct dentry *entry;
3813 3974
3814 t_options = trace_options_init_dentry(); 3975 t_options = trace_options_init_dentry();
3815 if (!t_options) 3976 if (!t_options)
@@ -3818,11 +3979,9 @@ create_trace_option_file(struct trace_option_dentry *topt,
3818 topt->flags = flags; 3979 topt->flags = flags;
3819 topt->opt = opt; 3980 topt->opt = opt;
3820 3981
3821 entry = debugfs_create_file(opt->name, 0644, t_options, topt, 3982 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
3822 &trace_options_fops); 3983 &trace_options_fops);
3823 3984
3824 topt->entry = entry;
3825
3826} 3985}
3827 3986
3828static struct trace_option_dentry * 3987static struct trace_option_dentry *
@@ -3877,123 +4036,84 @@ static struct dentry *
3877create_trace_option_core_file(const char *option, long index) 4036create_trace_option_core_file(const char *option, long index)
3878{ 4037{
3879 struct dentry *t_options; 4038 struct dentry *t_options;
3880 struct dentry *entry;
3881 4039
3882 t_options = trace_options_init_dentry(); 4040 t_options = trace_options_init_dentry();
3883 if (!t_options) 4041 if (!t_options)
3884 return NULL; 4042 return NULL;
3885 4043
3886 entry = debugfs_create_file(option, 0644, t_options, (void *)index, 4044 return trace_create_file(option, 0644, t_options, (void *)index,
3887 &trace_options_core_fops); 4045 &trace_options_core_fops);
3888
3889 return entry;
3890} 4046}
3891 4047
3892static __init void create_trace_options_dir(void) 4048static __init void create_trace_options_dir(void)
3893{ 4049{
3894 struct dentry *t_options; 4050 struct dentry *t_options;
3895 struct dentry *entry;
3896 int i; 4051 int i;
3897 4052
3898 t_options = trace_options_init_dentry(); 4053 t_options = trace_options_init_dentry();
3899 if (!t_options) 4054 if (!t_options)
3900 return; 4055 return;
3901 4056
3902 for (i = 0; trace_options[i]; i++) { 4057 for (i = 0; trace_options[i]; i++)
3903 entry = create_trace_option_core_file(trace_options[i], i); 4058 create_trace_option_core_file(trace_options[i], i);
3904 if (!entry)
3905 pr_warning("Could not create debugfs %s entry\n",
3906 trace_options[i]);
3907 }
3908} 4059}
3909 4060
3910static __init int tracer_init_debugfs(void) 4061static __init int tracer_init_debugfs(void)
3911{ 4062{
3912 struct dentry *d_tracer; 4063 struct dentry *d_tracer;
3913 struct dentry *entry;
3914 int cpu; 4064 int cpu;
3915 4065
3916 d_tracer = tracing_init_dentry(); 4066 d_tracer = tracing_init_dentry();
3917 4067
3918 entry = debugfs_create_file("tracing_enabled", 0644, d_tracer, 4068 trace_create_file("tracing_enabled", 0644, d_tracer,
3919 &global_trace, &tracing_ctrl_fops); 4069 &global_trace, &tracing_ctrl_fops);
3920 if (!entry)
3921 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
3922 4070
3923 entry = debugfs_create_file("trace_options", 0644, d_tracer, 4071 trace_create_file("trace_options", 0644, d_tracer,
3924 NULL, &tracing_iter_fops); 4072 NULL, &tracing_iter_fops);
3925 if (!entry)
3926 pr_warning("Could not create debugfs 'trace_options' entry\n");
3927 4073
3928 create_trace_options_dir(); 4074 trace_create_file("tracing_cpumask", 0644, d_tracer,
4075 NULL, &tracing_cpumask_fops);
4076
4077 trace_create_file("trace", 0644, d_tracer,
4078 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
3929 4079
3930 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 4080 trace_create_file("available_tracers", 0444, d_tracer,
3931 NULL, &tracing_cpumask_fops); 4081 &global_trace, &show_traces_fops);
3932 if (!entry) 4082
3933 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); 4083 trace_create_file("current_tracer", 0644, d_tracer,
3934 4084 &global_trace, &set_tracer_fops);
3935 entry = debugfs_create_file("trace", 0644, d_tracer, 4085
3936 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); 4086 trace_create_file("tracing_max_latency", 0644, d_tracer,
3937 if (!entry) 4087 &tracing_max_latency, &tracing_max_lat_fops);
3938 pr_warning("Could not create debugfs 'trace' entry\n"); 4088
3939 4089 trace_create_file("tracing_thresh", 0644, d_tracer,
3940 entry = debugfs_create_file("available_tracers", 0444, d_tracer, 4090 &tracing_thresh, &tracing_max_lat_fops);
3941 &global_trace, &show_traces_fops); 4091
3942 if (!entry) 4092 trace_create_file("README", 0444, d_tracer,
3943 pr_warning("Could not create debugfs 'available_tracers' entry\n"); 4093 NULL, &tracing_readme_fops);
3944 4094
3945 entry = debugfs_create_file("current_tracer", 0444, d_tracer, 4095 trace_create_file("trace_pipe", 0444, d_tracer,
3946 &global_trace, &set_tracer_fops);
3947 if (!entry)
3948 pr_warning("Could not create debugfs 'current_tracer' entry\n");
3949
3950 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
3951 &tracing_max_latency,
3952 &tracing_max_lat_fops);
3953 if (!entry)
3954 pr_warning("Could not create debugfs "
3955 "'tracing_max_latency' entry\n");
3956
3957 entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
3958 &tracing_thresh, &tracing_max_lat_fops);
3959 if (!entry)
3960 pr_warning("Could not create debugfs "
3961 "'tracing_thresh' entry\n");
3962 entry = debugfs_create_file("README", 0644, d_tracer,
3963 NULL, &tracing_readme_fops);
3964 if (!entry)
3965 pr_warning("Could not create debugfs 'README' entry\n");
3966
3967 entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
3968 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); 4096 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
3969 if (!entry) 4097
3970 pr_warning("Could not create debugfs " 4098 trace_create_file("buffer_size_kb", 0644, d_tracer,
3971 "'trace_pipe' entry\n"); 4099 &global_trace, &tracing_entries_fops);
3972 4100
3973 entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, 4101 trace_create_file("trace_marker", 0220, d_tracer,
3974 &global_trace, &tracing_entries_fops); 4102 NULL, &tracing_mark_fops);
3975 if (!entry) 4103
3976 pr_warning("Could not create debugfs " 4104 trace_create_file("saved_cmdlines", 0444, d_tracer,
3977 "'buffer_size_kb' entry\n"); 4105 NULL, &tracing_saved_cmdlines_fops);
3978
3979 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
3980 NULL, &tracing_mark_fops);
3981 if (!entry)
3982 pr_warning("Could not create debugfs "
3983 "'trace_marker' entry\n");
3984 4106
3985#ifdef CONFIG_DYNAMIC_FTRACE 4107#ifdef CONFIG_DYNAMIC_FTRACE
3986 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4108 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
3987 &ftrace_update_tot_cnt, 4109 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
3988 &tracing_dyn_info_fops);
3989 if (!entry)
3990 pr_warning("Could not create debugfs "
3991 "'dyn_ftrace_total_info' entry\n");
3992#endif 4110#endif
3993#ifdef CONFIG_SYSPROF_TRACER 4111#ifdef CONFIG_SYSPROF_TRACER
3994 init_tracer_sysprof_debugfs(d_tracer); 4112 init_tracer_sysprof_debugfs(d_tracer);
3995#endif 4113#endif
3996 4114
4115 create_trace_options_dir();
4116
3997 for_each_tracing_cpu(cpu) 4117 for_each_tracing_cpu(cpu)
3998 tracing_init_debugfs_percpu(cpu); 4118 tracing_init_debugfs_percpu(cpu);
3999 4119
@@ -4064,7 +4184,8 @@ trace_printk_seq(struct trace_seq *s)
4064 4184
4065static void __ftrace_dump(bool disable_tracing) 4185static void __ftrace_dump(bool disable_tracing)
4066{ 4186{
4067 static DEFINE_SPINLOCK(ftrace_dump_lock); 4187 static raw_spinlock_t ftrace_dump_lock =
4188 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
4068 /* use static because iter can be a bit big for the stack */ 4189 /* use static because iter can be a bit big for the stack */
4069 static struct trace_iterator iter; 4190 static struct trace_iterator iter;
4070 unsigned int old_userobj; 4191 unsigned int old_userobj;
@@ -4073,7 +4194,8 @@ static void __ftrace_dump(bool disable_tracing)
4073 int cnt = 0, cpu; 4194 int cnt = 0, cpu;
4074 4195
4075 /* only one dump */ 4196 /* only one dump */
4076 spin_lock_irqsave(&ftrace_dump_lock, flags); 4197 local_irq_save(flags);
4198 __raw_spin_lock(&ftrace_dump_lock);
4077 if (dump_ran) 4199 if (dump_ran)
4078 goto out; 4200 goto out;
4079 4201
@@ -4145,7 +4267,8 @@ static void __ftrace_dump(bool disable_tracing)
4145 } 4267 }
4146 4268
4147 out: 4269 out:
4148 spin_unlock_irqrestore(&ftrace_dump_lock, flags); 4270 __raw_spin_unlock(&ftrace_dump_lock);
4271 local_irq_restore(flags);
4149} 4272}
4150 4273
4151/* By default: disable tracing after the dump */ 4274/* By default: disable tracing after the dump */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e685ac2b2ba1..6e735d4771f8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,9 +9,12 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h> 11#include <trace/boot.h>
12#include <trace/kmemtrace.h> 12#include <linux/kmemtrace.h>
13#include <trace/power.h> 13#include <trace/power.h>
14 14
15#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h>
17
15enum trace_type { 18enum trace_type {
16 __TRACE_FIRST_TYPE = 0, 19 __TRACE_FIRST_TYPE = 0,
17 20
@@ -42,20 +45,6 @@ enum trace_type {
42}; 45};
43 46
44/* 47/*
45 * The trace entry - the most basic unit of tracing. This is what
46 * is printed in the end as a single line in the trace output, such as:
47 *
48 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
49 */
50struct trace_entry {
51 unsigned char type;
52 unsigned char flags;
53 unsigned char preempt_count;
54 int pid;
55 int tgid;
56};
57
58/*
59 * Function trace entry - function address and parent function addres: 48 * Function trace entry - function address and parent function addres:
60 */ 49 */
61struct ftrace_entry { 50struct ftrace_entry {
@@ -263,8 +252,6 @@ struct trace_array_cpu {
263 char comm[TASK_COMM_LEN]; 252 char comm[TASK_COMM_LEN];
264}; 253};
265 254
266struct trace_iterator;
267
268/* 255/*
269 * The trace array - an array of per-CPU trace arrays. This is the 256 * The trace array - an array of per-CPU trace arrays. This is the
270 * highest level data structure that individual tracers deal with. 257 * highest level data structure that individual tracers deal with.
@@ -339,15 +326,6 @@ extern void __ftrace_bad_type(void);
339 __ftrace_bad_type(); \ 326 __ftrace_bad_type(); \
340 } while (0) 327 } while (0)
341 328
342/* Return values for print_line callback */
343enum print_line_t {
344 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
345 TRACE_TYPE_HANDLED = 1,
346 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */
347 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
348};
349
350
351/* 329/*
352 * An option specific to a tracer. This is a boolean value. 330 * An option specific to a tracer. This is a boolean value.
353 * The bit is the bit index that sets its value on the 331 * The bit is the bit index that sets its value on the
@@ -423,60 +401,30 @@ struct tracer {
423 struct tracer_stat *stats; 401 struct tracer_stat *stats;
424}; 402};
425 403
426struct trace_seq {
427 unsigned char buffer[PAGE_SIZE];
428 unsigned int len;
429 unsigned int readpos;
430};
431
432static inline void
433trace_seq_init(struct trace_seq *s)
434{
435 s->len = 0;
436 s->readpos = 0;
437}
438
439 404
440#define TRACE_PIPE_ALL_CPU -1 405#define TRACE_PIPE_ALL_CPU -1
441 406
442/*
443 * Trace iterator - used by printout routines who present trace
444 * results to users and which routines might sleep, etc:
445 */
446struct trace_iterator {
447 struct trace_array *tr;
448 struct tracer *trace;
449 void *private;
450 int cpu_file;
451 struct mutex mutex;
452 struct ring_buffer_iter *buffer_iter[NR_CPUS];
453
454 /* The below is zeroed out in pipe_read */
455 struct trace_seq seq;
456 struct trace_entry *ent;
457 int cpu;
458 u64 ts;
459
460 unsigned long iter_flags;
461 loff_t pos;
462 long idx;
463
464 cpumask_var_t started;
465};
466
467int tracer_init(struct tracer *t, struct trace_array *tr); 407int tracer_init(struct tracer *t, struct trace_array *tr);
468int tracing_is_enabled(void); 408int tracing_is_enabled(void);
469void trace_wake_up(void); 409void trace_wake_up(void);
470void tracing_reset(struct trace_array *tr, int cpu); 410void tracing_reset(struct trace_array *tr, int cpu);
471void tracing_reset_online_cpus(struct trace_array *tr); 411void tracing_reset_online_cpus(struct trace_array *tr);
412void tracing_reset_current(int cpu);
413void tracing_reset_current_online_cpus(void);
472int tracing_open_generic(struct inode *inode, struct file *filp); 414int tracing_open_generic(struct inode *inode, struct file *filp);
415struct dentry *trace_create_file(const char *name,
416 mode_t mode,
417 struct dentry *parent,
418 void *data,
419 const struct file_operations *fops);
420
473struct dentry *tracing_init_dentry(void); 421struct dentry *tracing_init_dentry(void);
474void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 422void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
475 423
476struct ring_buffer_event; 424struct ring_buffer_event;
477 425
478struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 426struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
479 unsigned char type, 427 int type,
480 unsigned long len, 428 unsigned long len,
481 unsigned long flags, 429 unsigned long flags,
482 int pc); 430 int pc);
@@ -484,14 +432,6 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
484 struct ring_buffer_event *event, 432 struct ring_buffer_event *event,
485 unsigned long flags, int pc); 433 unsigned long flags, int pc);
486 434
487struct ring_buffer_event *
488trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
489 unsigned long flags, int pc);
490void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
491 unsigned long flags, int pc);
492void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
493 unsigned long flags, int pc);
494
495struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 435struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
496 struct trace_array_cpu *data); 436 struct trace_array_cpu *data);
497 437
@@ -514,7 +454,6 @@ void tracing_sched_switch_trace(struct trace_array *tr,
514 struct task_struct *prev, 454 struct task_struct *prev,
515 struct task_struct *next, 455 struct task_struct *next,
516 unsigned long flags, int pc); 456 unsigned long flags, int pc);
517void tracing_record_cmdline(struct task_struct *tsk);
518 457
519void tracing_sched_wakeup_trace(struct trace_array *tr, 458void tracing_sched_wakeup_trace(struct trace_array *tr,
520 struct task_struct *wakee, 459 struct task_struct *wakee,
@@ -599,6 +538,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
599 struct trace_array *tr); 538 struct trace_array *tr);
600extern int trace_selftest_startup_branch(struct tracer *trace, 539extern int trace_selftest_startup_branch(struct tracer *trace,
601 struct trace_array *tr); 540 struct trace_array *tr);
541extern int trace_selftest_startup_hw_branches(struct tracer *trace,
542 struct trace_array *tr);
602#endif /* CONFIG_FTRACE_STARTUP_TEST */ 543#endif /* CONFIG_FTRACE_STARTUP_TEST */
603 544
604extern void *head_page(struct trace_array_cpu *data); 545extern void *head_page(struct trace_array_cpu *data);
@@ -613,6 +554,8 @@ extern unsigned long trace_flags;
613/* Standard output formatting function used for function return traces */ 554/* Standard output formatting function used for function return traces */
614#ifdef CONFIG_FUNCTION_GRAPH_TRACER 555#ifdef CONFIG_FUNCTION_GRAPH_TRACER
615extern enum print_line_t print_graph_function(struct trace_iterator *iter); 556extern enum print_line_t print_graph_function(struct trace_iterator *iter);
557extern enum print_line_t
558trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
616 559
617#ifdef CONFIG_DYNAMIC_FTRACE 560#ifdef CONFIG_DYNAMIC_FTRACE
618/* TODO: make this variable */ 561/* TODO: make this variable */
@@ -644,7 +587,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
644 return 1; 587 return 1;
645} 588}
646#endif /* CONFIG_DYNAMIC_FTRACE */ 589#endif /* CONFIG_DYNAMIC_FTRACE */
647
648#else /* CONFIG_FUNCTION_GRAPH_TRACER */ 590#else /* CONFIG_FUNCTION_GRAPH_TRACER */
649static inline enum print_line_t 591static inline enum print_line_t
650print_graph_function(struct trace_iterator *iter) 592print_graph_function(struct trace_iterator *iter)
@@ -692,6 +634,7 @@ enum trace_iterator_flags {
692 TRACE_ITER_LATENCY_FMT = 0x40000, 634 TRACE_ITER_LATENCY_FMT = 0x40000,
693 TRACE_ITER_GLOBAL_CLK = 0x80000, 635 TRACE_ITER_GLOBAL_CLK = 0x80000,
694 TRACE_ITER_SLEEP_TIME = 0x100000, 636 TRACE_ITER_SLEEP_TIME = 0x100000,
637 TRACE_ITER_GRAPH_TIME = 0x200000,
695}; 638};
696 639
697/* 640/*
@@ -790,103 +733,113 @@ struct ftrace_event_field {
790 char *type; 733 char *type;
791 int offset; 734 int offset;
792 int size; 735 int size;
736 int is_signed;
793}; 737};
794 738
795struct ftrace_event_call { 739struct event_filter {
796 char *name; 740 int n_preds;
797 char *system;
798 struct dentry *dir;
799 int enabled;
800 int (*regfunc)(void);
801 void (*unregfunc)(void);
802 int id;
803 int (*raw_init)(void);
804 int (*show_format)(struct trace_seq *s);
805 int (*define_fields)(void);
806 struct list_head fields;
807 struct filter_pred **preds; 741 struct filter_pred **preds;
808 742 char *filter_string;
809#ifdef CONFIG_EVENT_PROFILE
810 atomic_t profile_count;
811 int (*profile_enable)(struct ftrace_event_call *);
812 void (*profile_disable)(struct ftrace_event_call *);
813#endif
814}; 743};
815 744
816struct event_subsystem { 745struct event_subsystem {
817 struct list_head list; 746 struct list_head list;
818 const char *name; 747 const char *name;
819 struct dentry *entry; 748 struct dentry *entry;
820 struct filter_pred **preds; 749 void *filter;
821}; 750};
822 751
823#define events_for_each(event) \
824 for (event = __start_ftrace_events; \
825 (unsigned long)event < (unsigned long)__stop_ftrace_events; \
826 event++)
827
828#define MAX_FILTER_PRED 8
829
830struct filter_pred; 752struct filter_pred;
831 753
832typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); 754typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
755 int val1, int val2);
833 756
834struct filter_pred { 757struct filter_pred {
835 filter_pred_fn_t fn; 758 filter_pred_fn_t fn;
836 u64 val; 759 u64 val;
837 char *str_val; 760 char str_val[MAX_FILTER_STR_VAL];
838 int str_len; 761 int str_len;
839 char *field_name; 762 char *field_name;
840 int offset; 763 int offset;
841 int not; 764 int not;
842 int or; 765 int op;
843 int compound; 766 int pop_n;
844 int clear;
845}; 767};
846 768
847int trace_define_field(struct ftrace_event_call *call, char *type, 769extern void print_event_filter(struct ftrace_event_call *call,
848 char *name, int offset, int size);
849extern void filter_free_pred(struct filter_pred *pred);
850extern void filter_print_preds(struct filter_pred **preds,
851 struct trace_seq *s); 770 struct trace_seq *s);
852extern int filter_parse(char **pbuf, struct filter_pred *pred); 771extern int apply_event_filter(struct ftrace_event_call *call,
853extern int filter_add_pred(struct ftrace_event_call *call, 772 char *filter_string);
854 struct filter_pred *pred); 773extern int apply_subsystem_event_filter(struct event_subsystem *system,
855extern void filter_free_preds(struct ftrace_event_call *call); 774 char *filter_string);
856extern int filter_match_preds(struct ftrace_event_call *call, void *rec); 775extern void print_subsystem_event_filter(struct event_subsystem *system,
857extern void filter_free_subsystem_preds(struct event_subsystem *system); 776 struct trace_seq *s);
858extern int filter_add_subsystem_pred(struct event_subsystem *system, 777
859 struct filter_pred *pred); 778static inline int
860 779filter_check_discard(struct ftrace_event_call *call, void *rec,
861void event_trace_printk(unsigned long ip, const char *fmt, ...); 780 struct ring_buffer *buffer,
862extern struct ftrace_event_call __start_ftrace_events[]; 781 struct ring_buffer_event *event)
863extern struct ftrace_event_call __stop_ftrace_events[]; 782{
864 783 if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
865#define for_each_event(event) \ 784 ring_buffer_discard_commit(buffer, event);
866 for (event = __start_ftrace_events; \ 785 return 1;
867 (unsigned long)event < (unsigned long)__stop_ftrace_events; \ 786 }
868 event++) 787
788 return 0;
789}
790
791#define DEFINE_COMPARISON_PRED(type) \
792static int filter_pred_##type(struct filter_pred *pred, void *event, \
793 int val1, int val2) \
794{ \
795 type *addr = (type *)(event + pred->offset); \
796 type val = (type)pred->val; \
797 int match = 0; \
798 \
799 switch (pred->op) { \
800 case OP_LT: \
801 match = (*addr < val); \
802 break; \
803 case OP_LE: \
804 match = (*addr <= val); \
805 break; \
806 case OP_GT: \
807 match = (*addr > val); \
808 break; \
809 case OP_GE: \
810 match = (*addr >= val); \
811 break; \
812 default: \
813 break; \
814 } \
815 \
816 return match; \
817}
818
819#define DEFINE_EQUALITY_PRED(size) \
820static int filter_pred_##size(struct filter_pred *pred, void *event, \
821 int val1, int val2) \
822{ \
823 u##size *addr = (u##size *)(event + pred->offset); \
824 u##size val = (u##size)pred->val; \
825 int match; \
826 \
827 match = (val == *addr) ^ pred->not; \
828 \
829 return match; \
830}
831
832extern struct mutex event_mutex;
833extern struct list_head ftrace_events;
869 834
870extern const char *__start___trace_bprintk_fmt[]; 835extern const char *__start___trace_bprintk_fmt[];
871extern const char *__stop___trace_bprintk_fmt[]; 836extern const char *__stop___trace_bprintk_fmt[];
872 837
873/* 838#undef TRACE_EVENT_FORMAT
874 * The double __builtin_constant_p is because gcc will give us an error 839#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
875 * if we try to allocate the static variable to fmt if it is not a 840 extern struct ftrace_event_call event_##call;
876 * constant. Even with the outer if statement optimizing out. 841#undef TRACE_EVENT_FORMAT_NOFILTER
877 */ 842#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
878#define event_trace_printk(ip, fmt, args...) \ 843#include "trace_event_types.h"
879do { \
880 __trace_printk_check_format(fmt, ##args); \
881 tracing_record_cmdline(current); \
882 if (__builtin_constant_p(fmt)) { \
883 static const char *trace_printk_fmt \
884 __attribute__((section("__trace_printk_fmt"))) = \
885 __builtin_constant_p(fmt) ? fmt : NULL; \
886 \
887 __trace_bprintk(ip, trace_printk_fmt, ##args); \
888 } else \
889 __trace_printk(ip, fmt, ##args); \
890} while (0)
891 844
892#endif /* _LINUX_KERNEL_TRACE_H */ 845#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 7a30fc4c3642..a29ef23ffb47 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -9,6 +9,7 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <linux/kallsyms.h> 11#include <linux/kallsyms.h>
12#include <linux/time.h>
12 13
13#include "trace.h" 14#include "trace.h"
14#include "trace_output.h" 15#include "trace_output.h"
@@ -67,7 +68,7 @@ initcall_call_print_line(struct trace_iterator *iter)
67 trace_assign_type(field, entry); 68 trace_assign_type(field, entry);
68 call = &field->boot_call; 69 call = &field->boot_call;
69 ts = iter->ts; 70 ts = iter->ts;
70 nsec_rem = do_div(ts, 1000000000); 71 nsec_rem = do_div(ts, NSEC_PER_SEC);
71 72
72 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", 73 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
73 (unsigned long)ts, nsec_rem, call->func, call->caller); 74 (unsigned long)ts, nsec_rem, call->func, call->caller);
@@ -92,7 +93,7 @@ initcall_ret_print_line(struct trace_iterator *iter)
92 trace_assign_type(field, entry); 93 trace_assign_type(field, entry);
93 init_ret = &field->boot_ret; 94 init_ret = &field->boot_ret;
94 ts = iter->ts; 95 ts = iter->ts;
95 nsec_rem = do_div(ts, 1000000000); 96 nsec_rem = do_div(ts, NSEC_PER_SEC);
96 97
97 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " 98 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
98 "returned %d after %llu msecs\n", 99 "returned %d after %llu msecs\n",
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 8333715e4066..7a7a9fd249a9 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -30,6 +30,7 @@ static struct trace_array *branch_tracer;
30static void 30static void
31probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) 31probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
32{ 32{
33 struct ftrace_event_call *call = &event_branch;
33 struct trace_array *tr = branch_tracer; 34 struct trace_array *tr = branch_tracer;
34 struct ring_buffer_event *event; 35 struct ring_buffer_event *event;
35 struct trace_branch *entry; 36 struct trace_branch *entry;
@@ -73,7 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
73 entry->line = f->line; 74 entry->line = f->line;
74 entry->correct = val == expect; 75 entry->correct = val == expect;
75 76
76 ring_buffer_unlock_commit(tr->buffer, event); 77 if (!filter_check_discard(call, entry, tr->buffer, event))
78 ring_buffer_unlock_commit(tr->buffer, event);
77 79
78 out: 80 out:
79 atomic_dec(&tr->data[cpu]->disabled); 81 atomic_dec(&tr->data[cpu]->disabled);
@@ -271,7 +273,7 @@ static int branch_stat_show(struct seq_file *m, void *v)
271 return 0; 273 return 0;
272} 274}
273 275
274static void *annotated_branch_stat_start(void) 276static void *annotated_branch_stat_start(struct tracer_stat *trace)
275{ 277{
276 return __start_annotated_branch_profile; 278 return __start_annotated_branch_profile;
277} 279}
@@ -346,7 +348,7 @@ static int all_branch_stat_headers(struct seq_file *m)
346 return 0; 348 return 0;
347} 349}
348 350
349static void *all_branch_stat_start(void) 351static void *all_branch_stat_start(struct tracer_stat *trace)
350{ 352{
351 return __start_branch_profile; 353 return __start_branch_profile;
352} 354}
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 22cba9970776..5b5895afecfe 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -10,22 +10,30 @@
10int ftrace_profile_enable(int event_id) 10int ftrace_profile_enable(int event_id)
11{ 11{
12 struct ftrace_event_call *event; 12 struct ftrace_event_call *event;
13 int ret = -EINVAL;
13 14
14 for_each_event(event) { 15 mutex_lock(&event_mutex);
15 if (event->id == event_id) 16 list_for_each_entry(event, &ftrace_events, list) {
16 return event->profile_enable(event); 17 if (event->id == event_id) {
18 ret = event->profile_enable(event);
19 break;
20 }
17 } 21 }
22 mutex_unlock(&event_mutex);
18 23
19 return -EINVAL; 24 return ret;
20} 25}
21 26
22void ftrace_profile_disable(int event_id) 27void ftrace_profile_disable(int event_id)
23{ 28{
24 struct ftrace_event_call *event; 29 struct ftrace_event_call *event;
25 30
26 for_each_event(event) { 31 mutex_lock(&event_mutex);
27 if (event->id == event_id) 32 list_for_each_entry(event, &ftrace_events, list) {
28 return event->profile_disable(event); 33 if (event->id == event_id) {
34 event->profile_disable(event);
35 break;
36 }
29 } 37 }
38 mutex_unlock(&event_mutex);
30} 39}
31
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index fd78bee71dd7..5e32e375134d 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -57,7 +57,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
57 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") 57 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
58); 58);
59 59
60TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, 60TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore,
61 TRACE_STRUCT( 61 TRACE_STRUCT(
62 TRACE_FIELD(unsigned long, arg1, arg1) 62 TRACE_FIELD(unsigned long, arg1, arg1)
63 TRACE_FIELD(unsigned long, arg2, arg2) 63 TRACE_FIELD(unsigned long, arg2, arg2)
@@ -122,8 +122,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
122TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, 122TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
123 TRACE_STRUCT( 123 TRACE_STRUCT(
124 TRACE_FIELD(unsigned int, line, line) 124 TRACE_FIELD(unsigned int, line, line)
125 TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func) 125 TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
126 TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file) 126 TRACE_FUNC_SIZE+1, func)
127 TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
128 TRACE_FUNC_SIZE+1, file)
127 TRACE_FIELD(char, correct, correct) 129 TRACE_FIELD(char, correct, correct)
128 ), 130 ),
129 TP_RAW_FMT("%u:%s:%s (%u)") 131 TP_RAW_FMT("%u:%s:%s (%u)")
@@ -139,8 +141,8 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
139 141
140TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, 142TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
141 TRACE_STRUCT( 143 TRACE_STRUCT(
142 TRACE_FIELD(ktime_t, state_data.stamp, stamp) 144 TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
143 TRACE_FIELD(ktime_t, state_data.end, end) 145 TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
144 TRACE_FIELD(int, state_data.type, type) 146 TRACE_FIELD(int, state_data.type, type)
145 TRACE_FIELD(int, state_data.state, state) 147 TRACE_FIELD(int, state_data.state, state)
146 ), 148 ),
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 576f4fa2af0d..aa08be69a1b6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -8,19 +8,25 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/workqueue.h>
12#include <linux/spinlock.h>
13#include <linux/kthread.h>
11#include <linux/debugfs.h> 14#include <linux/debugfs.h>
12#include <linux/uaccess.h> 15#include <linux/uaccess.h>
13#include <linux/module.h> 16#include <linux/module.h>
14#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/delay.h>
15 19
16#include "trace_output.h" 20#include "trace_output.h"
17 21
18#define TRACE_SYSTEM "TRACE_SYSTEM" 22#define TRACE_SYSTEM "TRACE_SYSTEM"
19 23
20static DEFINE_MUTEX(event_mutex); 24DEFINE_MUTEX(event_mutex);
25
26LIST_HEAD(ftrace_events);
21 27
22int trace_define_field(struct ftrace_event_call *call, char *type, 28int trace_define_field(struct ftrace_event_call *call, char *type,
23 char *name, int offset, int size) 29 char *name, int offset, int size, int is_signed)
24{ 30{
25 struct ftrace_event_field *field; 31 struct ftrace_event_field *field;
26 32
@@ -38,6 +44,7 @@ int trace_define_field(struct ftrace_event_call *call, char *type,
38 44
39 field->offset = offset; 45 field->offset = offset;
40 field->size = size; 46 field->size = size;
47 field->is_signed = is_signed;
41 list_add(&field->link, &call->fields); 48 list_add(&field->link, &call->fields);
42 49
43 return 0; 50 return 0;
@@ -51,47 +58,94 @@ err:
51 58
52 return -ENOMEM; 59 return -ENOMEM;
53} 60}
61EXPORT_SYMBOL_GPL(trace_define_field);
54 62
55static void ftrace_clear_events(void) 63#ifdef CONFIG_MODULES
56{
57 struct ftrace_event_call *call = (void *)__start_ftrace_events;
58
59 64
60 while ((unsigned long)call < (unsigned long)__stop_ftrace_events) { 65static void trace_destroy_fields(struct ftrace_event_call *call)
66{
67 struct ftrace_event_field *field, *next;
61 68
62 if (call->enabled) { 69 list_for_each_entry_safe(field, next, &call->fields, link) {
63 call->enabled = 0; 70 list_del(&field->link);
64 call->unregfunc(); 71 kfree(field->type);
65 } 72 kfree(field->name);
66 call++; 73 kfree(field);
67 } 74 }
68} 75}
69 76
77#endif /* CONFIG_MODULES */
78
70static void ftrace_event_enable_disable(struct ftrace_event_call *call, 79static void ftrace_event_enable_disable(struct ftrace_event_call *call,
71 int enable) 80 int enable)
72{ 81{
73
74 switch (enable) { 82 switch (enable) {
75 case 0: 83 case 0:
76 if (call->enabled) { 84 if (call->enabled) {
77 call->enabled = 0; 85 call->enabled = 0;
86 tracing_stop_cmdline_record();
78 call->unregfunc(); 87 call->unregfunc();
79 } 88 }
80 break; 89 break;
81 case 1: 90 case 1:
82 if (!call->enabled) { 91 if (!call->enabled) {
83 call->enabled = 1; 92 call->enabled = 1;
93 tracing_start_cmdline_record();
84 call->regfunc(); 94 call->regfunc();
85 } 95 }
86 break; 96 break;
87 } 97 }
88} 98}
89 99
100static void ftrace_clear_events(void)
101{
102 struct ftrace_event_call *call;
103
104 mutex_lock(&event_mutex);
105 list_for_each_entry(call, &ftrace_events, list) {
106 ftrace_event_enable_disable(call, 0);
107 }
108 mutex_unlock(&event_mutex);
109}
110
111/*
112 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
113 */
114static int __ftrace_set_clr_event(const char *match, const char *sub,
115 const char *event, int set)
116{
117 struct ftrace_event_call *call;
118 int ret = -EINVAL;
119
120 mutex_lock(&event_mutex);
121 list_for_each_entry(call, &ftrace_events, list) {
122
123 if (!call->name || !call->regfunc)
124 continue;
125
126 if (match &&
127 strcmp(match, call->name) != 0 &&
128 strcmp(match, call->system) != 0)
129 continue;
130
131 if (sub && strcmp(sub, call->system) != 0)
132 continue;
133
134 if (event && strcmp(event, call->name) != 0)
135 continue;
136
137 ftrace_event_enable_disable(call, set);
138
139 ret = 0;
140 }
141 mutex_unlock(&event_mutex);
142
143 return ret;
144}
145
90static int ftrace_set_clr_event(char *buf, int set) 146static int ftrace_set_clr_event(char *buf, int set)
91{ 147{
92 struct ftrace_event_call *call = __start_ftrace_events;
93 char *event = NULL, *sub = NULL, *match; 148 char *event = NULL, *sub = NULL, *match;
94 int ret = -EINVAL;
95 149
96 /* 150 /*
97 * The buf format can be <subsystem>:<event-name> 151 * The buf format can be <subsystem>:<event-name>
@@ -117,30 +171,24 @@ static int ftrace_set_clr_event(char *buf, int set)
117 event = NULL; 171 event = NULL;
118 } 172 }
119 173
120 mutex_lock(&event_mutex); 174 return __ftrace_set_clr_event(match, sub, event, set);
121 for_each_event(call) { 175}
122
123 if (!call->name || !call->regfunc)
124 continue;
125
126 if (match &&
127 strcmp(match, call->name) != 0 &&
128 strcmp(match, call->system) != 0)
129 continue;
130
131 if (sub && strcmp(sub, call->system) != 0)
132 continue;
133
134 if (event && strcmp(event, call->name) != 0)
135 continue;
136
137 ftrace_event_enable_disable(call, set);
138
139 ret = 0;
140 }
141 mutex_unlock(&event_mutex);
142 176
143 return ret; 177/**
178 * trace_set_clr_event - enable or disable an event
179 * @system: system name to match (NULL for any system)
180 * @event: event name to match (NULL for all events, within system)
181 * @set: 1 to enable, 0 to disable
182 *
183 * This is a way for other parts of the kernel to enable or disable
184 * event recording.
185 *
186 * Returns 0 on success, -EINVAL if the parameters do not match any
187 * registered events.
188 */
189int trace_set_clr_event(const char *system, const char *event, int set)
190{
191 return __ftrace_set_clr_event(NULL, system, event, set);
144} 192}
145 193
146/* 128 should be much more than enough */ 194/* 128 should be much more than enough */
@@ -224,15 +272,17 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
224static void * 272static void *
225t_next(struct seq_file *m, void *v, loff_t *pos) 273t_next(struct seq_file *m, void *v, loff_t *pos)
226{ 274{
227 struct ftrace_event_call *call = m->private; 275 struct list_head *list = m->private;
228 struct ftrace_event_call *next = call; 276 struct ftrace_event_call *call;
229 277
230 (*pos)++; 278 (*pos)++;
231 279
232 for (;;) { 280 for (;;) {
233 if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) 281 if (list == &ftrace_events)
234 return NULL; 282 return NULL;
235 283
284 call = list_entry(list, struct ftrace_event_call, list);
285
236 /* 286 /*
237 * The ftrace subsystem is for showing formats only. 287 * The ftrace subsystem is for showing formats only.
238 * They can not be enabled or disabled via the event files. 288 * They can not be enabled or disabled via the event files.
@@ -240,45 +290,51 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
240 if (call->regfunc) 290 if (call->regfunc)
241 break; 291 break;
242 292
243 call++; 293 list = list->next;
244 next = call;
245 } 294 }
246 295
247 m->private = ++next; 296 m->private = list->next;
248 297
249 return call; 298 return call;
250} 299}
251 300
252static void *t_start(struct seq_file *m, loff_t *pos) 301static void *t_start(struct seq_file *m, loff_t *pos)
253{ 302{
303 mutex_lock(&event_mutex);
304 if (*pos == 0)
305 m->private = ftrace_events.next;
254 return t_next(m, NULL, pos); 306 return t_next(m, NULL, pos);
255} 307}
256 308
257static void * 309static void *
258s_next(struct seq_file *m, void *v, loff_t *pos) 310s_next(struct seq_file *m, void *v, loff_t *pos)
259{ 311{
260 struct ftrace_event_call *call = m->private; 312 struct list_head *list = m->private;
261 struct ftrace_event_call *next; 313 struct ftrace_event_call *call;
262 314
263 (*pos)++; 315 (*pos)++;
264 316
265 retry: 317 retry:
266 if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) 318 if (list == &ftrace_events)
267 return NULL; 319 return NULL;
268 320
321 call = list_entry(list, struct ftrace_event_call, list);
322
269 if (!call->enabled) { 323 if (!call->enabled) {
270 call++; 324 list = list->next;
271 goto retry; 325 goto retry;
272 } 326 }
273 327
274 next = call; 328 m->private = list->next;
275 m->private = ++next;
276 329
277 return call; 330 return call;
278} 331}
279 332
280static void *s_start(struct seq_file *m, loff_t *pos) 333static void *s_start(struct seq_file *m, loff_t *pos)
281{ 334{
335 mutex_lock(&event_mutex);
336 if (*pos == 0)
337 m->private = ftrace_events.next;
282 return s_next(m, NULL, pos); 338 return s_next(m, NULL, pos);
283} 339}
284 340
@@ -295,12 +351,12 @@ static int t_show(struct seq_file *m, void *v)
295 351
296static void t_stop(struct seq_file *m, void *p) 352static void t_stop(struct seq_file *m, void *p)
297{ 353{
354 mutex_unlock(&event_mutex);
298} 355}
299 356
300static int 357static int
301ftrace_event_seq_open(struct inode *inode, struct file *file) 358ftrace_event_seq_open(struct inode *inode, struct file *file)
302{ 359{
303 int ret;
304 const struct seq_operations *seq_ops; 360 const struct seq_operations *seq_ops;
305 361
306 if ((file->f_mode & FMODE_WRITE) && 362 if ((file->f_mode & FMODE_WRITE) &&
@@ -308,13 +364,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
308 ftrace_clear_events(); 364 ftrace_clear_events();
309 365
310 seq_ops = inode->i_private; 366 seq_ops = inode->i_private;
311 ret = seq_open(file, seq_ops); 367 return seq_open(file, seq_ops);
312 if (!ret) {
313 struct seq_file *m = file->private_data;
314
315 m->private = __start_ftrace_events;
316 }
317 return ret;
318} 368}
319 369
320static ssize_t 370static ssize_t
@@ -374,8 +424,93 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
374 return cnt; 424 return cnt;
375} 425}
376 426
427static ssize_t
428system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
429 loff_t *ppos)
430{
431 const char set_to_char[4] = { '?', '0', '1', 'X' };
432 const char *system = filp->private_data;
433 struct ftrace_event_call *call;
434 char buf[2];
435 int set = 0;
436 int ret;
437
438 mutex_lock(&event_mutex);
439 list_for_each_entry(call, &ftrace_events, list) {
440 if (!call->name || !call->regfunc)
441 continue;
442
443 if (system && strcmp(call->system, system) != 0)
444 continue;
445
446 /*
447 * We need to find out if all the events are set
448 * or if all events or cleared, or if we have
449 * a mixture.
450 */
451 set |= (1 << !!call->enabled);
452
453 /*
454 * If we have a mixture, no need to look further.
455 */
456 if (set == 3)
457 break;
458 }
459 mutex_unlock(&event_mutex);
460
461 buf[0] = set_to_char[set];
462 buf[1] = '\n';
463
464 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
465
466 return ret;
467}
468
469static ssize_t
470system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
471 loff_t *ppos)
472{
473 const char *system = filp->private_data;
474 unsigned long val;
475 char buf[64];
476 ssize_t ret;
477
478 if (cnt >= sizeof(buf))
479 return -EINVAL;
480
481 if (copy_from_user(&buf, ubuf, cnt))
482 return -EFAULT;
483
484 buf[cnt] = 0;
485
486 ret = strict_strtoul(buf, 10, &val);
487 if (ret < 0)
488 return ret;
489
490 ret = tracing_update_buffers();
491 if (ret < 0)
492 return ret;
493
494 if (val != 0 && val != 1)
495 return -EINVAL;
496
497 ret = __ftrace_set_clr_event(NULL, system, NULL, val);
498 if (ret)
499 goto out;
500
501 ret = cnt;
502
503out:
504 *ppos += cnt;
505
506 return ret;
507}
508
509extern char *__bad_type_size(void);
510
377#undef FIELD 511#undef FIELD
378#define FIELD(type, name) \ 512#define FIELD(type, name) \
513 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
379 #type, "common_" #name, offsetof(typeof(field), name), \ 514 #type, "common_" #name, offsetof(typeof(field), name), \
380 sizeof(field.name) 515 sizeof(field.name)
381 516
@@ -391,7 +526,7 @@ static int trace_write_header(struct trace_seq *s)
391 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 526 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
392 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 527 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
393 "\n", 528 "\n",
394 FIELD(unsigned char, type), 529 FIELD(unsigned short, type),
395 FIELD(unsigned char, flags), 530 FIELD(unsigned char, flags),
396 FIELD(unsigned char, preempt_count), 531 FIELD(unsigned char, preempt_count),
397 FIELD(int, pid), 532 FIELD(int, pid),
@@ -481,7 +616,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
481 616
482 trace_seq_init(s); 617 trace_seq_init(s);
483 618
484 filter_print_preds(call->preds, s); 619 print_event_filter(call, s);
485 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); 620 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
486 621
487 kfree(s); 622 kfree(s);
@@ -494,38 +629,26 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
494 loff_t *ppos) 629 loff_t *ppos)
495{ 630{
496 struct ftrace_event_call *call = filp->private_data; 631 struct ftrace_event_call *call = filp->private_data;
497 char buf[64], *pbuf = buf; 632 char *buf;
498 struct filter_pred *pred;
499 int err; 633 int err;
500 634
501 if (cnt >= sizeof(buf)) 635 if (cnt >= PAGE_SIZE)
502 return -EINVAL; 636 return -EINVAL;
503 637
504 if (copy_from_user(&buf, ubuf, cnt)) 638 buf = (char *)__get_free_page(GFP_TEMPORARY);
505 return -EFAULT; 639 if (!buf)
506 buf[cnt] = '\0';
507
508 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
509 if (!pred)
510 return -ENOMEM; 640 return -ENOMEM;
511 641
512 err = filter_parse(&pbuf, pred); 642 if (copy_from_user(buf, ubuf, cnt)) {
513 if (err < 0) { 643 free_page((unsigned long) buf);
514 filter_free_pred(pred); 644 return -EFAULT;
515 return err;
516 }
517
518 if (pred->clear) {
519 filter_free_preds(call);
520 filter_free_pred(pred);
521 return cnt;
522 } 645 }
646 buf[cnt] = '\0';
523 647
524 err = filter_add_pred(call, pred); 648 err = apply_event_filter(call, buf);
525 if (err < 0) { 649 free_page((unsigned long) buf);
526 filter_free_pred(pred); 650 if (err < 0)
527 return err; 651 return err;
528 }
529 652
530 *ppos += cnt; 653 *ppos += cnt;
531 654
@@ -549,7 +672,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
549 672
550 trace_seq_init(s); 673 trace_seq_init(s);
551 674
552 filter_print_preds(system->preds, s); 675 print_subsystem_event_filter(system, s);
553 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); 676 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
554 677
555 kfree(s); 678 kfree(s);
@@ -562,45 +685,56 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
562 loff_t *ppos) 685 loff_t *ppos)
563{ 686{
564 struct event_subsystem *system = filp->private_data; 687 struct event_subsystem *system = filp->private_data;
565 char buf[64], *pbuf = buf; 688 char *buf;
566 struct filter_pred *pred;
567 int err; 689 int err;
568 690
569 if (cnt >= sizeof(buf)) 691 if (cnt >= PAGE_SIZE)
570 return -EINVAL; 692 return -EINVAL;
571 693
572 if (copy_from_user(&buf, ubuf, cnt)) 694 buf = (char *)__get_free_page(GFP_TEMPORARY);
573 return -EFAULT; 695 if (!buf)
574 buf[cnt] = '\0';
575
576 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
577 if (!pred)
578 return -ENOMEM; 696 return -ENOMEM;
579 697
580 err = filter_parse(&pbuf, pred); 698 if (copy_from_user(buf, ubuf, cnt)) {
581 if (err < 0) { 699 free_page((unsigned long) buf);
582 filter_free_pred(pred); 700 return -EFAULT;
583 return err;
584 }
585
586 if (pred->clear) {
587 filter_free_subsystem_preds(system);
588 filter_free_pred(pred);
589 return cnt;
590 } 701 }
702 buf[cnt] = '\0';
591 703
592 err = filter_add_subsystem_pred(system, pred); 704 err = apply_subsystem_event_filter(system, buf);
593 if (err < 0) { 705 free_page((unsigned long) buf);
594 filter_free_subsystem_preds(system); 706 if (err < 0)
595 filter_free_pred(pred);
596 return err; 707 return err;
597 }
598 708
599 *ppos += cnt; 709 *ppos += cnt;
600 710
601 return cnt; 711 return cnt;
602} 712}
603 713
714static ssize_t
715show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
716{
717 int (*func)(struct trace_seq *s) = filp->private_data;
718 struct trace_seq *s;
719 int r;
720
721 if (*ppos)
722 return 0;
723
724 s = kmalloc(sizeof(*s), GFP_KERNEL);
725 if (!s)
726 return -ENOMEM;
727
728 trace_seq_init(s);
729
730 func(s);
731 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
732
733 kfree(s);
734
735 return r;
736}
737
604static const struct seq_operations show_event_seq_ops = { 738static const struct seq_operations show_event_seq_ops = {
605 .start = t_start, 739 .start = t_start,
606 .next = t_next, 740 .next = t_next,
@@ -658,6 +792,17 @@ static const struct file_operations ftrace_subsystem_filter_fops = {
658 .write = subsystem_filter_write, 792 .write = subsystem_filter_write,
659}; 793};
660 794
795static const struct file_operations ftrace_system_enable_fops = {
796 .open = tracing_open_generic,
797 .read = system_enable_read,
798 .write = system_enable_write,
799};
800
801static const struct file_operations ftrace_show_header_fops = {
802 .open = tracing_open_generic,
803 .read = show_header,
804};
805
661static struct dentry *event_trace_events_dir(void) 806static struct dentry *event_trace_events_dir(void)
662{ 807{
663 static struct dentry *d_tracer; 808 static struct dentry *d_tracer;
@@ -684,6 +829,7 @@ static struct dentry *
684event_subsystem_dir(const char *name, struct dentry *d_events) 829event_subsystem_dir(const char *name, struct dentry *d_events)
685{ 830{
686 struct event_subsystem *system; 831 struct event_subsystem *system;
832 struct dentry *entry;
687 833
688 /* First see if we did not already create this dir */ 834 /* First see if we did not already create this dir */
689 list_for_each_entry(system, &event_subsystems, list) { 835 list_for_each_entry(system, &event_subsystems, list) {
@@ -707,16 +853,46 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
707 return d_events; 853 return d_events;
708 } 854 }
709 855
710 system->name = name; 856 system->name = kstrdup(name, GFP_KERNEL);
857 if (!system->name) {
858 debugfs_remove(system->entry);
859 kfree(system);
860 return d_events;
861 }
862
711 list_add(&system->list, &event_subsystems); 863 list_add(&system->list, &event_subsystems);
712 864
713 system->preds = NULL; 865 system->filter = NULL;
866
867 system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
868 if (!system->filter) {
869 pr_warning("Could not allocate filter for subsystem "
870 "'%s'\n", name);
871 return system->entry;
872 }
873
874 entry = debugfs_create_file("filter", 0644, system->entry, system,
875 &ftrace_subsystem_filter_fops);
876 if (!entry) {
877 kfree(system->filter);
878 system->filter = NULL;
879 pr_warning("Could not create debugfs "
880 "'%s/filter' entry\n", name);
881 }
882
883 entry = trace_create_file("enable", 0644, system->entry,
884 (void *)system->name,
885 &ftrace_system_enable_fops);
714 886
715 return system->entry; 887 return system->entry;
716} 888}
717 889
718static int 890static int
719event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) 891event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
892 const struct file_operations *id,
893 const struct file_operations *enable,
894 const struct file_operations *filter,
895 const struct file_operations *format)
720{ 896{
721 struct dentry *entry; 897 struct dentry *entry;
722 int ret; 898 int ret;
@@ -725,7 +901,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
725 * If the trace point header did not define TRACE_SYSTEM 901 * If the trace point header did not define TRACE_SYSTEM
726 * then the system would be called "TRACE_SYSTEM". 902 * then the system would be called "TRACE_SYSTEM".
727 */ 903 */
728 if (strcmp(call->system, "TRACE_SYSTEM") != 0) 904 if (strcmp(call->system, TRACE_SYSTEM) != 0)
729 d_events = event_subsystem_dir(call->system, d_events); 905 d_events = event_subsystem_dir(call->system, d_events);
730 906
731 if (call->raw_init) { 907 if (call->raw_init) {
@@ -744,21 +920,13 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
744 return -1; 920 return -1;
745 } 921 }
746 922
747 if (call->regfunc) { 923 if (call->regfunc)
748 entry = debugfs_create_file("enable", 0644, call->dir, call, 924 entry = trace_create_file("enable", 0644, call->dir, call,
749 &ftrace_enable_fops); 925 enable);
750 if (!entry)
751 pr_warning("Could not create debugfs "
752 "'%s/enable' entry\n", call->name);
753 }
754 926
755 if (call->id) { 927 if (call->id)
756 entry = debugfs_create_file("id", 0444, call->dir, call, 928 entry = trace_create_file("id", 0444, call->dir, call,
757 &ftrace_event_id_fops); 929 id);
758 if (!entry)
759 pr_warning("Could not create debugfs '%s/id' entry\n",
760 call->name);
761 }
762 930
763 if (call->define_fields) { 931 if (call->define_fields) {
764 ret = call->define_fields(); 932 ret = call->define_fields();
@@ -767,32 +935,195 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
767 " events/%s\n", call->name); 935 " events/%s\n", call->name);
768 return ret; 936 return ret;
769 } 937 }
770 entry = debugfs_create_file("filter", 0644, call->dir, call, 938 entry = trace_create_file("filter", 0644, call->dir, call,
771 &ftrace_event_filter_fops); 939 filter);
772 if (!entry)
773 pr_warning("Could not create debugfs "
774 "'%s/filter' entry\n", call->name);
775 } 940 }
776 941
777 /* A trace may not want to export its format */ 942 /* A trace may not want to export its format */
778 if (!call->show_format) 943 if (!call->show_format)
779 return 0; 944 return 0;
780 945
781 entry = debugfs_create_file("format", 0444, call->dir, call, 946 entry = trace_create_file("format", 0444, call->dir, call,
782 &ftrace_event_format_fops); 947 format);
783 if (!entry) 948
784 pr_warning("Could not create debugfs " 949 return 0;
785 "'%s/format' entry\n", call->name); 950}
951
952#define for_each_event(event, start, end) \
953 for (event = start; \
954 (unsigned long)event < (unsigned long)end; \
955 event++)
956
957#ifdef CONFIG_MODULES
958
959static LIST_HEAD(ftrace_module_file_list);
960
961/*
962 * Modules must own their file_operations to keep up with
963 * reference counting.
964 */
965struct ftrace_module_file_ops {
966 struct list_head list;
967 struct module *mod;
968 struct file_operations id;
969 struct file_operations enable;
970 struct file_operations format;
971 struct file_operations filter;
972};
973
974static struct ftrace_module_file_ops *
975trace_create_file_ops(struct module *mod)
976{
977 struct ftrace_module_file_ops *file_ops;
978
979 /*
980 * This is a bit of a PITA. To allow for correct reference
981 * counting, modules must "own" their file_operations.
982 * To do this, we allocate the file operations that will be
983 * used in the event directory.
984 */
985
986 file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
987 if (!file_ops)
988 return NULL;
989
990 file_ops->mod = mod;
991
992 file_ops->id = ftrace_event_id_fops;
993 file_ops->id.owner = mod;
994
995 file_ops->enable = ftrace_enable_fops;
996 file_ops->enable.owner = mod;
997
998 file_ops->filter = ftrace_event_filter_fops;
999 file_ops->filter.owner = mod;
1000
1001 file_ops->format = ftrace_event_format_fops;
1002 file_ops->format.owner = mod;
1003
1004 list_add(&file_ops->list, &ftrace_module_file_list);
1005
1006 return file_ops;
1007}
1008
1009static void trace_module_add_events(struct module *mod)
1010{
1011 struct ftrace_module_file_ops *file_ops = NULL;
1012 struct ftrace_event_call *call, *start, *end;
1013 struct dentry *d_events;
1014
1015 start = mod->trace_events;
1016 end = mod->trace_events + mod->num_trace_events;
1017
1018 if (start == end)
1019 return;
1020
1021 d_events = event_trace_events_dir();
1022 if (!d_events)
1023 return;
1024
1025 for_each_event(call, start, end) {
1026 /* The linker may leave blanks */
1027 if (!call->name)
1028 continue;
1029
1030 /*
1031 * This module has events, create file ops for this module
1032 * if not already done.
1033 */
1034 if (!file_ops) {
1035 file_ops = trace_create_file_ops(mod);
1036 if (!file_ops)
1037 return;
1038 }
1039 call->mod = mod;
1040 list_add(&call->list, &ftrace_events);
1041 event_create_dir(call, d_events,
1042 &file_ops->id, &file_ops->enable,
1043 &file_ops->filter, &file_ops->format);
1044 }
1045}
1046
1047static void trace_module_remove_events(struct module *mod)
1048{
1049 struct ftrace_module_file_ops *file_ops;
1050 struct ftrace_event_call *call, *p;
1051 bool found = false;
1052
1053 down_write(&trace_event_mutex);
1054 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1055 if (call->mod == mod) {
1056 found = true;
1057 ftrace_event_enable_disable(call, 0);
1058 if (call->event)
1059 __unregister_ftrace_event(call->event);
1060 debugfs_remove_recursive(call->dir);
1061 list_del(&call->list);
1062 trace_destroy_fields(call);
1063 destroy_preds(call);
1064 }
1065 }
1066
1067 /* Now free the file_operations */
1068 list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1069 if (file_ops->mod == mod)
1070 break;
1071 }
1072 if (&file_ops->list != &ftrace_module_file_list) {
1073 list_del(&file_ops->list);
1074 kfree(file_ops);
1075 }
1076
1077 /*
1078 * It is safest to reset the ring buffer if the module being unloaded
1079 * registered any events.
1080 */
1081 if (found)
1082 tracing_reset_current_online_cpus();
1083 up_write(&trace_event_mutex);
1084}
1085
1086static int trace_module_notify(struct notifier_block *self,
1087 unsigned long val, void *data)
1088{
1089 struct module *mod = data;
1090
1091 mutex_lock(&event_mutex);
1092 switch (val) {
1093 case MODULE_STATE_COMING:
1094 trace_module_add_events(mod);
1095 break;
1096 case MODULE_STATE_GOING:
1097 trace_module_remove_events(mod);
1098 break;
1099 }
1100 mutex_unlock(&event_mutex);
786 1101
787 return 0; 1102 return 0;
788} 1103}
1104#else
1105static int trace_module_notify(struct notifier_block *self,
1106 unsigned long val, void *data)
1107{
1108 return 0;
1109}
1110#endif /* CONFIG_MODULES */
1111
1112struct notifier_block trace_module_nb = {
1113 .notifier_call = trace_module_notify,
1114 .priority = 0,
1115};
1116
1117extern struct ftrace_event_call __start_ftrace_events[];
1118extern struct ftrace_event_call __stop_ftrace_events[];
789 1119
790static __init int event_trace_init(void) 1120static __init int event_trace_init(void)
791{ 1121{
792 struct ftrace_event_call *call = __start_ftrace_events; 1122 struct ftrace_event_call *call;
793 struct dentry *d_tracer; 1123 struct dentry *d_tracer;
794 struct dentry *entry; 1124 struct dentry *entry;
795 struct dentry *d_events; 1125 struct dentry *d_events;
1126 int ret;
796 1127
797 d_tracer = tracing_init_dentry(); 1128 d_tracer = tracing_init_dentry();
798 if (!d_tracer) 1129 if (!d_tracer)
@@ -816,13 +1147,243 @@ static __init int event_trace_init(void)
816 if (!d_events) 1147 if (!d_events)
817 return 0; 1148 return 0;
818 1149
819 for_each_event(call) { 1150 /* ring buffer internal formats */
1151 trace_create_file("header_page", 0444, d_events,
1152 ring_buffer_print_page_header,
1153 &ftrace_show_header_fops);
1154
1155 trace_create_file("header_event", 0444, d_events,
1156 ring_buffer_print_entry_header,
1157 &ftrace_show_header_fops);
1158
1159 trace_create_file("enable", 0644, d_events,
1160 NULL, &ftrace_system_enable_fops);
1161
1162 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
820 /* The linker may leave blanks */ 1163 /* The linker may leave blanks */
821 if (!call->name) 1164 if (!call->name)
822 continue; 1165 continue;
823 event_create_dir(call, d_events); 1166 list_add(&call->list, &ftrace_events);
1167 event_create_dir(call, d_events, &ftrace_event_id_fops,
1168 &ftrace_enable_fops, &ftrace_event_filter_fops,
1169 &ftrace_event_format_fops);
824 } 1170 }
825 1171
1172 ret = register_module_notifier(&trace_module_nb);
1173 if (ret)
1174 pr_warning("Failed to register trace events module notifier\n");
1175
826 return 0; 1176 return 0;
827} 1177}
828fs_initcall(event_trace_init); 1178fs_initcall(event_trace_init);
1179
1180#ifdef CONFIG_FTRACE_STARTUP_TEST
1181
1182static DEFINE_SPINLOCK(test_spinlock);
1183static DEFINE_SPINLOCK(test_spinlock_irq);
1184static DEFINE_MUTEX(test_mutex);
1185
1186static __init void test_work(struct work_struct *dummy)
1187{
1188 spin_lock(&test_spinlock);
1189 spin_lock_irq(&test_spinlock_irq);
1190 udelay(1);
1191 spin_unlock_irq(&test_spinlock_irq);
1192 spin_unlock(&test_spinlock);
1193
1194 mutex_lock(&test_mutex);
1195 msleep(1);
1196 mutex_unlock(&test_mutex);
1197}
1198
1199static __init int event_test_thread(void *unused)
1200{
1201 void *test_malloc;
1202
1203 test_malloc = kmalloc(1234, GFP_KERNEL);
1204 if (!test_malloc)
1205 pr_info("failed to kmalloc\n");
1206
1207 schedule_on_each_cpu(test_work);
1208
1209 kfree(test_malloc);
1210
1211 set_current_state(TASK_INTERRUPTIBLE);
1212 while (!kthread_should_stop())
1213 schedule();
1214
1215 return 0;
1216}
1217
1218/*
1219 * Do various things that may trigger events.
1220 */
1221static __init void event_test_stuff(void)
1222{
1223 struct task_struct *test_thread;
1224
1225 test_thread = kthread_run(event_test_thread, NULL, "test-events");
1226 msleep(1);
1227 kthread_stop(test_thread);
1228}
1229
1230/*
1231 * For every trace event defined, we will test each trace point separately,
1232 * and then by groups, and finally all trace points.
1233 */
1234static __init void event_trace_self_tests(void)
1235{
1236 struct ftrace_event_call *call;
1237 struct event_subsystem *system;
1238 int ret;
1239
1240 pr_info("Running tests on trace events:\n");
1241
1242 list_for_each_entry(call, &ftrace_events, list) {
1243
1244 /* Only test those that have a regfunc */
1245 if (!call->regfunc)
1246 continue;
1247
1248 pr_info("Testing event %s: ", call->name);
1249
1250 /*
1251 * If an event is already enabled, someone is using
1252 * it and the self test should not be on.
1253 */
1254 if (call->enabled) {
1255 pr_warning("Enabled event during self test!\n");
1256 WARN_ON_ONCE(1);
1257 continue;
1258 }
1259
1260 ftrace_event_enable_disable(call, 1);
1261 event_test_stuff();
1262 ftrace_event_enable_disable(call, 0);
1263
1264 pr_cont("OK\n");
1265 }
1266
1267 /* Now test at the sub system level */
1268
1269 pr_info("Running tests on trace event systems:\n");
1270
1271 list_for_each_entry(system, &event_subsystems, list) {
1272
1273 /* the ftrace system is special, skip it */
1274 if (strcmp(system->name, "ftrace") == 0)
1275 continue;
1276
1277 pr_info("Testing event system %s: ", system->name);
1278
1279 ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1280 if (WARN_ON_ONCE(ret)) {
1281 pr_warning("error enabling system %s\n",
1282 system->name);
1283 continue;
1284 }
1285
1286 event_test_stuff();
1287
1288 ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1289 if (WARN_ON_ONCE(ret))
1290 pr_warning("error disabling system %s\n",
1291 system->name);
1292
1293 pr_cont("OK\n");
1294 }
1295
1296 /* Test with all events enabled */
1297
1298 pr_info("Running tests on all trace events:\n");
1299 pr_info("Testing all events: ");
1300
1301 ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1302 if (WARN_ON_ONCE(ret)) {
1303 pr_warning("error enabling all events\n");
1304 return;
1305 }
1306
1307 event_test_stuff();
1308
1309 /* reset sysname */
1310 ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1311 if (WARN_ON_ONCE(ret)) {
1312 pr_warning("error disabling all events\n");
1313 return;
1314 }
1315
1316 pr_cont("OK\n");
1317}
1318
1319#ifdef CONFIG_FUNCTION_TRACER
1320
1321static DEFINE_PER_CPU(atomic_t, test_event_disable);
1322
1323static void
1324function_test_events_call(unsigned long ip, unsigned long parent_ip)
1325{
1326 struct ring_buffer_event *event;
1327 struct ftrace_entry *entry;
1328 unsigned long flags;
1329 long disabled;
1330 int resched;
1331 int cpu;
1332 int pc;
1333
1334 pc = preempt_count();
1335 resched = ftrace_preempt_disable();
1336 cpu = raw_smp_processor_id();
1337 disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
1338
1339 if (disabled != 1)
1340 goto out;
1341
1342 local_save_flags(flags);
1343
1344 event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry),
1345 flags, pc);
1346 if (!event)
1347 goto out;
1348 entry = ring_buffer_event_data(event);
1349 entry->ip = ip;
1350 entry->parent_ip = parent_ip;
1351
1352 trace_nowake_buffer_unlock_commit(event, flags, pc);
1353
1354 out:
1355 atomic_dec(&per_cpu(test_event_disable, cpu));
1356 ftrace_preempt_enable(resched);
1357}
1358
1359static struct ftrace_ops trace_ops __initdata =
1360{
1361 .func = function_test_events_call,
1362};
1363
1364static __init void event_trace_self_test_with_function(void)
1365{
1366 register_ftrace_function(&trace_ops);
1367 pr_info("Running tests again, along with the function tracer\n");
1368 event_trace_self_tests();
1369 unregister_ftrace_function(&trace_ops);
1370}
1371#else
1372static __init void event_trace_self_test_with_function(void)
1373{
1374}
1375#endif
1376
1377static __init int event_trace_self_tests_init(void)
1378{
1379
1380 event_trace_self_tests();
1381
1382 event_trace_self_test_with_function();
1383
1384 return 0;
1385}
1386
1387late_initcall(event_trace_self_tests_init);
1388
1389#endif
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e03cbf1e38f3..db6e54bdb596 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,119 +22,297 @@
22#include <linux/uaccess.h> 22#include <linux/uaccess.h>
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/ctype.h> 24#include <linux/ctype.h>
25#include <linux/mutex.h>
25 26
26#include "trace.h" 27#include "trace.h"
27#include "trace_output.h" 28#include "trace_output.h"
28 29
29static int filter_pred_64(struct filter_pred *pred, void *event) 30static DEFINE_MUTEX(filter_mutex);
31
32enum filter_op_ids
33{
34 OP_OR,
35 OP_AND,
36 OP_NE,
37 OP_EQ,
38 OP_LT,
39 OP_LE,
40 OP_GT,
41 OP_GE,
42 OP_NONE,
43 OP_OPEN_PAREN,
44};
45
46struct filter_op {
47 int id;
48 char *string;
49 int precedence;
50};
51
52static struct filter_op filter_ops[] = {
53 { OP_OR, "||", 1 },
54 { OP_AND, "&&", 2 },
55 { OP_NE, "!=", 4 },
56 { OP_EQ, "==", 4 },
57 { OP_LT, "<", 5 },
58 { OP_LE, "<=", 5 },
59 { OP_GT, ">", 5 },
60 { OP_GE, ">=", 5 },
61 { OP_NONE, "OP_NONE", 0 },
62 { OP_OPEN_PAREN, "(", 0 },
63};
64
65enum {
66 FILT_ERR_NONE,
67 FILT_ERR_INVALID_OP,
68 FILT_ERR_UNBALANCED_PAREN,
69 FILT_ERR_TOO_MANY_OPERANDS,
70 FILT_ERR_OPERAND_TOO_LONG,
71 FILT_ERR_FIELD_NOT_FOUND,
72 FILT_ERR_ILLEGAL_FIELD_OP,
73 FILT_ERR_ILLEGAL_INTVAL,
74 FILT_ERR_BAD_SUBSYS_FILTER,
75 FILT_ERR_TOO_MANY_PREDS,
76 FILT_ERR_MISSING_FIELD,
77 FILT_ERR_INVALID_FILTER,
78};
79
80static char *err_text[] = {
81 "No error",
82 "Invalid operator",
83 "Unbalanced parens",
84 "Too many operands",
85 "Operand too long",
86 "Field not found",
87 "Illegal operation for field type",
88 "Illegal integer value",
89 "Couldn't find or set field in one of a subsystem's events",
90 "Too many terms in predicate expression",
91 "Missing field name and/or value",
92 "Meaningless filter expression",
93};
94
95struct opstack_op {
96 int op;
97 struct list_head list;
98};
99
100struct postfix_elt {
101 int op;
102 char *operand;
103 struct list_head list;
104};
105
106struct filter_parse_state {
107 struct filter_op *ops;
108 struct list_head opstack;
109 struct list_head postfix;
110 int lasterr;
111 int lasterr_pos;
112
113 struct {
114 char *string;
115 unsigned int cnt;
116 unsigned int tail;
117 } infix;
118
119 struct {
120 char string[MAX_FILTER_STR_VAL];
121 int pos;
122 unsigned int tail;
123 } operand;
124};
125
126DEFINE_COMPARISON_PRED(s64);
127DEFINE_COMPARISON_PRED(u64);
128DEFINE_COMPARISON_PRED(s32);
129DEFINE_COMPARISON_PRED(u32);
130DEFINE_COMPARISON_PRED(s16);
131DEFINE_COMPARISON_PRED(u16);
132DEFINE_COMPARISON_PRED(s8);
133DEFINE_COMPARISON_PRED(u8);
134
135DEFINE_EQUALITY_PRED(64);
136DEFINE_EQUALITY_PRED(32);
137DEFINE_EQUALITY_PRED(16);
138DEFINE_EQUALITY_PRED(8);
139
140static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
141 void *event __attribute((unused)),
142 int val1, int val2)
143{
144 return val1 && val2;
145}
146
147static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
148 void *event __attribute((unused)),
149 int val1, int val2)
150{
151 return val1 || val2;
152}
153
154/* Filter predicate for fixed sized arrays of characters */
155static int filter_pred_string(struct filter_pred *pred, void *event,
156 int val1, int val2)
30{ 157{
31 u64 *addr = (u64 *)(event + pred->offset); 158 char *addr = (char *)(event + pred->offset);
32 u64 val = (u64)pred->val; 159 int cmp, match;
33 int match; 160
161 cmp = strncmp(addr, pred->str_val, pred->str_len);
34 162
35 match = (val == *addr) ^ pred->not; 163 match = (!cmp) ^ pred->not;
36 164
37 return match; 165 return match;
38} 166}
39 167
40static int filter_pred_32(struct filter_pred *pred, void *event) 168/*
169 * Filter predicate for dynamic sized arrays of characters.
170 * These are implemented through a list of strings at the end
171 * of the entry.
172 * Also each of these strings have a field in the entry which
173 * contains its offset from the beginning of the entry.
174 * We have then first to get this field, dereference it
175 * and add it to the address of the entry, and at last we have
176 * the address of the string.
177 */
178static int filter_pred_strloc(struct filter_pred *pred, void *event,
179 int val1, int val2)
41{ 180{
42 u32 *addr = (u32 *)(event + pred->offset); 181 int str_loc = *(int *)(event + pred->offset);
43 u32 val = (u32)pred->val; 182 char *addr = (char *)(event + str_loc);
44 int match; 183 int cmp, match;
184
185 cmp = strncmp(addr, pred->str_val, pred->str_len);
45 186
46 match = (val == *addr) ^ pred->not; 187 match = (!cmp) ^ pred->not;
47 188
48 return match; 189 return match;
49} 190}
50 191
51static int filter_pred_16(struct filter_pred *pred, void *event) 192static int filter_pred_none(struct filter_pred *pred, void *event,
193 int val1, int val2)
194{
195 return 0;
196}
197
198/* return 1 if event matches, 0 otherwise (discard) */
199int filter_match_preds(struct ftrace_event_call *call, void *rec)
52{ 200{
53 u16 *addr = (u16 *)(event + pred->offset); 201 struct event_filter *filter = call->filter;
54 u16 val = (u16)pred->val; 202 int match, top = 0, val1 = 0, val2 = 0;
55 int match; 203 int stack[MAX_FILTER_PRED];
204 struct filter_pred *pred;
205 int i;
206
207 for (i = 0; i < filter->n_preds; i++) {
208 pred = filter->preds[i];
209 if (!pred->pop_n) {
210 match = pred->fn(pred, rec, val1, val2);
211 stack[top++] = match;
212 continue;
213 }
214 if (pred->pop_n > top) {
215 WARN_ON_ONCE(1);
216 return 0;
217 }
218 val1 = stack[--top];
219 val2 = stack[--top];
220 match = pred->fn(pred, rec, val1, val2);
221 stack[top++] = match;
222 }
56 223
57 match = (val == *addr) ^ pred->not; 224 return stack[--top];
225}
226EXPORT_SYMBOL_GPL(filter_match_preds);
58 227
59 return match; 228static void parse_error(struct filter_parse_state *ps, int err, int pos)
229{
230 ps->lasterr = err;
231 ps->lasterr_pos = pos;
60} 232}
61 233
62static int filter_pred_8(struct filter_pred *pred, void *event) 234static void remove_filter_string(struct event_filter *filter)
63{ 235{
64 u8 *addr = (u8 *)(event + pred->offset); 236 kfree(filter->filter_string);
65 u8 val = (u8)pred->val; 237 filter->filter_string = NULL;
66 int match; 238}
67 239
68 match = (val == *addr) ^ pred->not; 240static int replace_filter_string(struct event_filter *filter,
241 char *filter_string)
242{
243 kfree(filter->filter_string);
244 filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
245 if (!filter->filter_string)
246 return -ENOMEM;
69 247
70 return match; 248 return 0;
71} 249}
72 250
73static int filter_pred_string(struct filter_pred *pred, void *event) 251static int append_filter_string(struct event_filter *filter,
252 char *string)
74{ 253{
75 char *addr = (char *)(event + pred->offset); 254 int newlen;
76 int cmp, match; 255 char *new_filter_string;
77 256
78 cmp = strncmp(addr, pred->str_val, pred->str_len); 257 BUG_ON(!filter->filter_string);
258 newlen = strlen(filter->filter_string) + strlen(string) + 1;
259 new_filter_string = kmalloc(newlen, GFP_KERNEL);
260 if (!new_filter_string)
261 return -ENOMEM;
79 262
80 match = (!cmp) ^ pred->not; 263 strcpy(new_filter_string, filter->filter_string);
264 strcat(new_filter_string, string);
265 kfree(filter->filter_string);
266 filter->filter_string = new_filter_string;
81 267
82 return match; 268 return 0;
83} 269}
84 270
85/* return 1 if event matches, 0 otherwise (discard) */ 271static void append_filter_err(struct filter_parse_state *ps,
86int filter_match_preds(struct ftrace_event_call *call, void *rec) 272 struct event_filter *filter)
87{ 273{
88 int i, matched, and_failed = 0; 274 int pos = ps->lasterr_pos;
89 struct filter_pred *pred; 275 char *buf, *pbuf;
90 276
91 for (i = 0; i < MAX_FILTER_PRED; i++) { 277 buf = (char *)__get_free_page(GFP_TEMPORARY);
92 if (call->preds[i]) { 278 if (!buf)
93 pred = call->preds[i]; 279 return;
94 if (and_failed && !pred->or)
95 continue;
96 matched = pred->fn(pred, rec);
97 if (!matched && !pred->or) {
98 and_failed = 1;
99 continue;
100 } else if (matched && pred->or)
101 return 1;
102 } else
103 break;
104 }
105 280
106 if (and_failed) 281 append_filter_string(filter, "\n");
107 return 0; 282 memset(buf, ' ', PAGE_SIZE);
283 if (pos > PAGE_SIZE - 128)
284 pos = 0;
285 buf[pos] = '^';
286 pbuf = &buf[pos] + 1;
108 287
109 return 1; 288 sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]);
289 append_filter_string(filter, buf);
290 free_page((unsigned long) buf);
110} 291}
111 292
112void filter_print_preds(struct filter_pred **preds, struct trace_seq *s) 293void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
113{ 294{
114 char *field_name; 295 struct event_filter *filter = call->filter;
115 struct filter_pred *pred;
116 int i;
117 296
118 if (!preds) { 297 mutex_lock(&filter_mutex);
298 if (filter->filter_string)
299 trace_seq_printf(s, "%s\n", filter->filter_string);
300 else
119 trace_seq_printf(s, "none\n"); 301 trace_seq_printf(s, "none\n");
120 return; 302 mutex_unlock(&filter_mutex);
121 } 303}
122 304
123 for (i = 0; i < MAX_FILTER_PRED; i++) { 305void print_subsystem_event_filter(struct event_subsystem *system,
124 if (preds[i]) { 306 struct trace_seq *s)
125 pred = preds[i]; 307{
126 field_name = pred->field_name; 308 struct event_filter *filter = system->filter;
127 if (i) 309
128 trace_seq_printf(s, pred->or ? "|| " : "&& "); 310 mutex_lock(&filter_mutex);
129 trace_seq_printf(s, "%s ", field_name); 311 if (filter->filter_string)
130 trace_seq_printf(s, pred->not ? "!= " : "== "); 312 trace_seq_printf(s, "%s\n", filter->filter_string);
131 if (pred->str_val) 313 else
132 trace_seq_printf(s, "%s\n", pred->str_val); 314 trace_seq_printf(s, "none\n");
133 else 315 mutex_unlock(&filter_mutex);
134 trace_seq_printf(s, "%llu\n", pred->val);
135 } else
136 break;
137 }
138} 316}
139 317
140static struct ftrace_event_field * 318static struct ftrace_event_field *
@@ -150,284 +328,828 @@ find_event_field(struct ftrace_event_call *call, char *name)
150 return NULL; 328 return NULL;
151} 329}
152 330
153void filter_free_pred(struct filter_pred *pred) 331static void filter_free_pred(struct filter_pred *pred)
154{ 332{
155 if (!pred) 333 if (!pred)
156 return; 334 return;
157 335
158 kfree(pred->field_name); 336 kfree(pred->field_name);
159 kfree(pred->str_val);
160 kfree(pred); 337 kfree(pred);
161} 338}
162 339
163void filter_free_preds(struct ftrace_event_call *call) 340static void filter_clear_pred(struct filter_pred *pred)
164{ 341{
165 int i; 342 kfree(pred->field_name);
343 pred->field_name = NULL;
344 pred->str_len = 0;
345}
166 346
167 if (call->preds) { 347static int filter_set_pred(struct filter_pred *dest,
168 for (i = 0; i < MAX_FILTER_PRED; i++) 348 struct filter_pred *src,
169 filter_free_pred(call->preds[i]); 349 filter_pred_fn_t fn)
170 kfree(call->preds); 350{
171 call->preds = NULL; 351 *dest = *src;
352 if (src->field_name) {
353 dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
354 if (!dest->field_name)
355 return -ENOMEM;
172 } 356 }
357 dest->fn = fn;
358
359 return 0;
173} 360}
174 361
175void filter_free_subsystem_preds(struct event_subsystem *system) 362static void filter_disable_preds(struct ftrace_event_call *call)
176{ 363{
177 struct ftrace_event_call *call = __start_ftrace_events; 364 struct event_filter *filter = call->filter;
178 int i; 365 int i;
179 366
180 if (system->preds) { 367 call->filter_active = 0;
181 for (i = 0; i < MAX_FILTER_PRED; i++) 368 filter->n_preds = 0;
182 filter_free_pred(system->preds[i]);
183 kfree(system->preds);
184 system->preds = NULL;
185 }
186 369
187 events_for_each(call) { 370 for (i = 0; i < MAX_FILTER_PRED; i++)
188 if (!call->name || !call->regfunc) 371 filter->preds[i]->fn = filter_pred_none;
189 continue; 372}
373
374void destroy_preds(struct ftrace_event_call *call)
375{
376 struct event_filter *filter = call->filter;
377 int i;
190 378
191 if (!strcmp(call->system, system->name)) 379 for (i = 0; i < MAX_FILTER_PRED; i++) {
192 filter_free_preds(call); 380 if (filter->preds[i])
381 filter_free_pred(filter->preds[i]);
193 } 382 }
383 kfree(filter->preds);
384 kfree(filter);
385 call->filter = NULL;
194} 386}
195 387
196static int __filter_add_pred(struct ftrace_event_call *call, 388int init_preds(struct ftrace_event_call *call)
197 struct filter_pred *pred)
198{ 389{
390 struct event_filter *filter;
391 struct filter_pred *pred;
199 int i; 392 int i;
200 393
201 if (call->preds && !pred->compound) 394 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
202 filter_free_preds(call); 395 if (!call->filter)
396 return -ENOMEM;
203 397
204 if (!call->preds) { 398 call->filter_active = 0;
205 call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), 399 filter->n_preds = 0;
206 GFP_KERNEL); 400
207 if (!call->preds) 401 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
208 return -ENOMEM; 402 if (!filter->preds)
209 } 403 goto oom;
210 404
211 for (i = 0; i < MAX_FILTER_PRED; i++) { 405 for (i = 0; i < MAX_FILTER_PRED; i++) {
212 if (!call->preds[i]) { 406 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
213 call->preds[i] = pred; 407 if (!pred)
214 return 0; 408 goto oom;
409 pred->fn = filter_pred_none;
410 filter->preds[i] = pred;
411 }
412
413 return 0;
414
415oom:
416 destroy_preds(call);
417
418 return -ENOMEM;
419}
420EXPORT_SYMBOL_GPL(init_preds);
421
422static void filter_free_subsystem_preds(struct event_subsystem *system)
423{
424 struct event_filter *filter = system->filter;
425 struct ftrace_event_call *call;
426 int i;
427
428 if (filter->n_preds) {
429 for (i = 0; i < filter->n_preds; i++)
430 filter_free_pred(filter->preds[i]);
431 kfree(filter->preds);
432 filter->preds = NULL;
433 filter->n_preds = 0;
434 }
435
436 mutex_lock(&event_mutex);
437 list_for_each_entry(call, &ftrace_events, list) {
438 if (!call->define_fields)
439 continue;
440
441 if (!strcmp(call->system, system->name)) {
442 filter_disable_preds(call);
443 remove_filter_string(call->filter);
215 } 444 }
216 } 445 }
446 mutex_unlock(&event_mutex);
447}
448
449static int filter_add_pred_fn(struct filter_parse_state *ps,
450 struct ftrace_event_call *call,
451 struct filter_pred *pred,
452 filter_pred_fn_t fn)
453{
454 struct event_filter *filter = call->filter;
455 int idx, err;
456
457 if (filter->n_preds == MAX_FILTER_PRED) {
458 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
459 return -ENOSPC;
460 }
461
462 idx = filter->n_preds;
463 filter_clear_pred(filter->preds[idx]);
464 err = filter_set_pred(filter->preds[idx], pred, fn);
465 if (err)
466 return err;
217 467
218 return -ENOSPC; 468 filter->n_preds++;
469 call->filter_active = 1;
470
471 return 0;
219} 472}
220 473
474enum {
475 FILTER_STATIC_STRING = 1,
476 FILTER_DYN_STRING
477};
478
221static int is_string_field(const char *type) 479static int is_string_field(const char *type)
222{ 480{
481 if (strstr(type, "__data_loc") && strstr(type, "char"))
482 return FILTER_DYN_STRING;
483
223 if (strchr(type, '[') && strstr(type, "char")) 484 if (strchr(type, '[') && strstr(type, "char"))
224 return 1; 485 return FILTER_STATIC_STRING;
225 486
226 return 0; 487 return 0;
227} 488}
228 489
229int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) 490static int is_legal_op(struct ftrace_event_field *field, int op)
230{ 491{
231 struct ftrace_event_field *field; 492 if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE))
232 493 return 0;
233 field = find_event_field(call, pred->field_name);
234 if (!field)
235 return -EINVAL;
236 494
237 pred->offset = field->offset; 495 return 1;
496}
238 497
239 if (is_string_field(field->type)) { 498static filter_pred_fn_t select_comparison_fn(int op, int field_size,
240 if (!pred->str_val) 499 int field_is_signed)
241 return -EINVAL; 500{
242 pred->fn = filter_pred_string; 501 filter_pred_fn_t fn = NULL;
243 pred->str_len = field->size;
244 return __filter_add_pred(call, pred);
245 } else {
246 if (pred->str_val)
247 return -EINVAL;
248 }
249 502
250 switch (field->size) { 503 switch (field_size) {
251 case 8: 504 case 8:
252 pred->fn = filter_pred_64; 505 if (op == OP_EQ || op == OP_NE)
506 fn = filter_pred_64;
507 else if (field_is_signed)
508 fn = filter_pred_s64;
509 else
510 fn = filter_pred_u64;
253 break; 511 break;
254 case 4: 512 case 4:
255 pred->fn = filter_pred_32; 513 if (op == OP_EQ || op == OP_NE)
514 fn = filter_pred_32;
515 else if (field_is_signed)
516 fn = filter_pred_s32;
517 else
518 fn = filter_pred_u32;
256 break; 519 break;
257 case 2: 520 case 2:
258 pred->fn = filter_pred_16; 521 if (op == OP_EQ || op == OP_NE)
522 fn = filter_pred_16;
523 else if (field_is_signed)
524 fn = filter_pred_s16;
525 else
526 fn = filter_pred_u16;
259 break; 527 break;
260 case 1: 528 case 1:
261 pred->fn = filter_pred_8; 529 if (op == OP_EQ || op == OP_NE)
530 fn = filter_pred_8;
531 else if (field_is_signed)
532 fn = filter_pred_s8;
533 else
534 fn = filter_pred_u8;
262 break; 535 break;
263 default:
264 return -EINVAL;
265 } 536 }
266 537
267 return __filter_add_pred(call, pred); 538 return fn;
268} 539}
269 540
270static struct filter_pred *copy_pred(struct filter_pred *pred) 541static int filter_add_pred(struct filter_parse_state *ps,
542 struct ftrace_event_call *call,
543 struct filter_pred *pred)
271{ 544{
272 struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL); 545 struct ftrace_event_field *field;
273 if (!new_pred) 546 filter_pred_fn_t fn;
274 return NULL; 547 unsigned long long val;
548 int string_type;
549
550 pred->fn = filter_pred_none;
551
552 if (pred->op == OP_AND) {
553 pred->pop_n = 2;
554 return filter_add_pred_fn(ps, call, pred, filter_pred_and);
555 } else if (pred->op == OP_OR) {
556 pred->pop_n = 2;
557 return filter_add_pred_fn(ps, call, pred, filter_pred_or);
558 }
559
560 field = find_event_field(call, pred->field_name);
561 if (!field) {
562 parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
563 return -EINVAL;
564 }
275 565
276 memcpy(new_pred, pred, sizeof(*pred)); 566 pred->offset = field->offset;
277 567
278 if (pred->field_name) { 568 if (!is_legal_op(field, pred->op)) {
279 new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); 569 parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0);
280 if (!new_pred->field_name) { 570 return -EINVAL;
281 kfree(new_pred);
282 return NULL;
283 }
284 } 571 }
285 572
286 if (pred->str_val) { 573 string_type = is_string_field(field->type);
287 new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL); 574 if (string_type) {
288 if (!new_pred->str_val) { 575 if (string_type == FILTER_STATIC_STRING)
289 filter_free_pred(new_pred); 576 fn = filter_pred_string;
290 return NULL; 577 else
578 fn = filter_pred_strloc;
579 pred->str_len = field->size;
580 if (pred->op == OP_NE)
581 pred->not = 1;
582 return filter_add_pred_fn(ps, call, pred, fn);
583 } else {
584 if (strict_strtoull(pred->str_val, 0, &val)) {
585 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
586 return -EINVAL;
291 } 587 }
588 pred->val = val;
589 }
590
591 fn = select_comparison_fn(pred->op, field->size, field->is_signed);
592 if (!fn) {
593 parse_error(ps, FILT_ERR_INVALID_OP, 0);
594 return -EINVAL;
292 } 595 }
293 596
294 return new_pred; 597 if (pred->op == OP_NE)
598 pred->not = 1;
599
600 return filter_add_pred_fn(ps, call, pred, fn);
295} 601}
296 602
297int filter_add_subsystem_pred(struct event_subsystem *system, 603static int filter_add_subsystem_pred(struct filter_parse_state *ps,
298 struct filter_pred *pred) 604 struct event_subsystem *system,
605 struct filter_pred *pred,
606 char *filter_string)
299{ 607{
300 struct ftrace_event_call *call = __start_ftrace_events; 608 struct event_filter *filter = system->filter;
301 struct filter_pred *event_pred; 609 struct ftrace_event_call *call;
302 int i; 610 int err = 0;
303
304 if (system->preds && !pred->compound)
305 filter_free_subsystem_preds(system);
306 611
307 if (!system->preds) { 612 if (!filter->preds) {
308 system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), 613 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
309 GFP_KERNEL); 614 GFP_KERNEL);
310 if (!system->preds) 615
616 if (!filter->preds)
311 return -ENOMEM; 617 return -ENOMEM;
312 } 618 }
313 619
314 for (i = 0; i < MAX_FILTER_PRED; i++) { 620 if (filter->n_preds == MAX_FILTER_PRED) {
315 if (!system->preds[i]) { 621 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
316 system->preds[i] = pred; 622 return -ENOSPC;
317 break;
318 }
319 } 623 }
320 624
321 if (i == MAX_FILTER_PRED) 625 filter->preds[filter->n_preds] = pred;
322 return -ENOSPC; 626 filter->n_preds++;
323 627
324 events_for_each(call) { 628 mutex_lock(&event_mutex);
325 int err; 629 list_for_each_entry(call, &ftrace_events, list) {
326 630
327 if (!call->name || !call->regfunc) 631 if (!call->define_fields)
328 continue; 632 continue;
329 633
330 if (strcmp(call->system, system->name)) 634 if (strcmp(call->system, system->name))
331 continue; 635 continue;
332 636
333 if (!find_event_field(call, pred->field_name)) 637 err = filter_add_pred(ps, call, pred);
334 continue; 638 if (err) {
639 mutex_unlock(&event_mutex);
640 filter_free_subsystem_preds(system);
641 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
642 goto out;
643 }
644 replace_filter_string(call->filter, filter_string);
645 }
646 mutex_unlock(&event_mutex);
647out:
648 return err;
649}
335 650
336 event_pred = copy_pred(pred); 651static void parse_init(struct filter_parse_state *ps,
337 if (!event_pred) 652 struct filter_op *ops,
338 goto oom; 653 char *infix_string)
654{
655 memset(ps, '\0', sizeof(*ps));
339 656
340 err = filter_add_pred(call, event_pred); 657 ps->infix.string = infix_string;
341 if (err) 658 ps->infix.cnt = strlen(infix_string);
342 filter_free_pred(event_pred); 659 ps->ops = ops;
343 if (err == -ENOMEM) 660
344 goto oom; 661 INIT_LIST_HEAD(&ps->opstack);
662 INIT_LIST_HEAD(&ps->postfix);
663}
664
665static char infix_next(struct filter_parse_state *ps)
666{
667 ps->infix.cnt--;
668
669 return ps->infix.string[ps->infix.tail++];
670}
671
672static char infix_peek(struct filter_parse_state *ps)
673{
674 if (ps->infix.tail == strlen(ps->infix.string))
675 return 0;
676
677 return ps->infix.string[ps->infix.tail];
678}
679
680static void infix_advance(struct filter_parse_state *ps)
681{
682 ps->infix.cnt--;
683 ps->infix.tail++;
684}
685
686static inline int is_precedence_lower(struct filter_parse_state *ps,
687 int a, int b)
688{
689 return ps->ops[a].precedence < ps->ops[b].precedence;
690}
691
692static inline int is_op_char(struct filter_parse_state *ps, char c)
693{
694 int i;
695
696 for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
697 if (ps->ops[i].string[0] == c)
698 return 1;
345 } 699 }
346 700
347 return 0; 701 return 0;
702}
348 703
349oom: 704static int infix_get_op(struct filter_parse_state *ps, char firstc)
350 system->preds[i] = NULL; 705{
351 return -ENOMEM; 706 char nextc = infix_peek(ps);
707 char opstr[3];
708 int i;
709
710 opstr[0] = firstc;
711 opstr[1] = nextc;
712 opstr[2] = '\0';
713
714 for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
715 if (!strcmp(opstr, ps->ops[i].string)) {
716 infix_advance(ps);
717 return ps->ops[i].id;
718 }
719 }
720
721 opstr[1] = '\0';
722
723 for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
724 if (!strcmp(opstr, ps->ops[i].string))
725 return ps->ops[i].id;
726 }
727
728 return OP_NONE;
352} 729}
353 730
354int filter_parse(char **pbuf, struct filter_pred *pred) 731static inline void clear_operand_string(struct filter_parse_state *ps)
355{ 732{
356 char *tmp, *tok, *val_str = NULL; 733 memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL);
357 int tok_n = 0; 734 ps->operand.tail = 0;
735}
358 736
359 /* field ==/!= number, or/and field ==/!= number, number */ 737static inline int append_operand_char(struct filter_parse_state *ps, char c)
360 while ((tok = strsep(pbuf, " \n"))) { 738{
361 if (tok_n == 0) { 739 if (ps->operand.tail == MAX_FILTER_STR_VAL - 1)
362 if (!strcmp(tok, "0")) { 740 return -EINVAL;
363 pred->clear = 1; 741
364 return 0; 742 ps->operand.string[ps->operand.tail++] = c;
365 } else if (!strcmp(tok, "&&")) { 743
366 pred->or = 0; 744 return 0;
367 pred->compound = 1; 745}
368 } else if (!strcmp(tok, "||")) { 746
369 pred->or = 1; 747static int filter_opstack_push(struct filter_parse_state *ps, int op)
370 pred->compound = 1; 748{
371 } else 749 struct opstack_op *opstack_op;
372 pred->field_name = tok; 750
373 tok_n = 1; 751 opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL);
752 if (!opstack_op)
753 return -ENOMEM;
754
755 opstack_op->op = op;
756 list_add(&opstack_op->list, &ps->opstack);
757
758 return 0;
759}
760
761static int filter_opstack_empty(struct filter_parse_state *ps)
762{
763 return list_empty(&ps->opstack);
764}
765
766static int filter_opstack_top(struct filter_parse_state *ps)
767{
768 struct opstack_op *opstack_op;
769
770 if (filter_opstack_empty(ps))
771 return OP_NONE;
772
773 opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
774
775 return opstack_op->op;
776}
777
778static int filter_opstack_pop(struct filter_parse_state *ps)
779{
780 struct opstack_op *opstack_op;
781 int op;
782
783 if (filter_opstack_empty(ps))
784 return OP_NONE;
785
786 opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
787 op = opstack_op->op;
788 list_del(&opstack_op->list);
789
790 kfree(opstack_op);
791
792 return op;
793}
794
795static void filter_opstack_clear(struct filter_parse_state *ps)
796{
797 while (!filter_opstack_empty(ps))
798 filter_opstack_pop(ps);
799}
800
801static char *curr_operand(struct filter_parse_state *ps)
802{
803 return ps->operand.string;
804}
805
806static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
807{
808 struct postfix_elt *elt;
809
810 elt = kmalloc(sizeof(*elt), GFP_KERNEL);
811 if (!elt)
812 return -ENOMEM;
813
814 elt->op = OP_NONE;
815 elt->operand = kstrdup(operand, GFP_KERNEL);
816 if (!elt->operand) {
817 kfree(elt);
818 return -ENOMEM;
819 }
820
821 list_add_tail(&elt->list, &ps->postfix);
822
823 return 0;
824}
825
826static int postfix_append_op(struct filter_parse_state *ps, int op)
827{
828 struct postfix_elt *elt;
829
830 elt = kmalloc(sizeof(*elt), GFP_KERNEL);
831 if (!elt)
832 return -ENOMEM;
833
834 elt->op = op;
835 elt->operand = NULL;
836
837 list_add_tail(&elt->list, &ps->postfix);
838
839 return 0;
840}
841
842static void postfix_clear(struct filter_parse_state *ps)
843{
844 struct postfix_elt *elt;
845
846 while (!list_empty(&ps->postfix)) {
847 elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
848 kfree(elt->operand);
849 list_del(&elt->list);
850 }
851}
852
853static int filter_parse(struct filter_parse_state *ps)
854{
855 int in_string = 0;
856 int op, top_op;
857 char ch;
858
859 while ((ch = infix_next(ps))) {
860 if (ch == '"') {
861 in_string ^= 1;
374 continue; 862 continue;
375 } 863 }
376 if (tok_n == 1) { 864
377 if (!pred->field_name) 865 if (in_string)
378 pred->field_name = tok; 866 goto parse_operand;
379 else if (!strcmp(tok, "!=")) 867
380 pred->not = 1; 868 if (isspace(ch))
381 else if (!strcmp(tok, "==")) 869 continue;
382 pred->not = 0; 870
383 else { 871 if (is_op_char(ps, ch)) {
384 pred->field_name = NULL; 872 op = infix_get_op(ps, ch);
873 if (op == OP_NONE) {
874 parse_error(ps, FILT_ERR_INVALID_OP, 0);
385 return -EINVAL; 875 return -EINVAL;
386 } 876 }
387 tok_n = 2; 877
878 if (strlen(curr_operand(ps))) {
879 postfix_append_operand(ps, curr_operand(ps));
880 clear_operand_string(ps);
881 }
882
883 while (!filter_opstack_empty(ps)) {
884 top_op = filter_opstack_top(ps);
885 if (!is_precedence_lower(ps, top_op, op)) {
886 top_op = filter_opstack_pop(ps);
887 postfix_append_op(ps, top_op);
888 continue;
889 }
890 break;
891 }
892
893 filter_opstack_push(ps, op);
388 continue; 894 continue;
389 } 895 }
390 if (tok_n == 2) { 896
391 if (pred->compound) { 897 if (ch == '(') {
392 if (!strcmp(tok, "!=")) 898 filter_opstack_push(ps, OP_OPEN_PAREN);
393 pred->not = 1; 899 continue;
394 else if (!strcmp(tok, "==")) 900 }
395 pred->not = 0; 901
396 else { 902 if (ch == ')') {
397 pred->field_name = NULL; 903 if (strlen(curr_operand(ps))) {
398 return -EINVAL; 904 postfix_append_operand(ps, curr_operand(ps));
399 } 905 clear_operand_string(ps);
400 } else { 906 }
401 val_str = tok; 907
402 break; /* done */ 908 top_op = filter_opstack_pop(ps);
909 while (top_op != OP_NONE) {
910 if (top_op == OP_OPEN_PAREN)
911 break;
912 postfix_append_op(ps, top_op);
913 top_op = filter_opstack_pop(ps);
914 }
915 if (top_op == OP_NONE) {
916 parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
917 return -EINVAL;
403 } 918 }
404 tok_n = 3;
405 continue; 919 continue;
406 } 920 }
407 if (tok_n == 3) { 921parse_operand:
408 val_str = tok; 922 if (append_operand_char(ps, ch)) {
409 break; /* done */ 923 parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
924 return -EINVAL;
410 } 925 }
411 } 926 }
412 927
413 if (!val_str) { 928 if (strlen(curr_operand(ps)))
414 pred->field_name = NULL; 929 postfix_append_operand(ps, curr_operand(ps));
415 return -EINVAL; 930
931 while (!filter_opstack_empty(ps)) {
932 top_op = filter_opstack_pop(ps);
933 if (top_op == OP_NONE)
934 break;
935 if (top_op == OP_OPEN_PAREN) {
936 parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
937 return -EINVAL;
938 }
939 postfix_append_op(ps, top_op);
416 } 940 }
417 941
418 pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); 942 return 0;
419 if (!pred->field_name) 943}
420 return -ENOMEM;
421 944
422 pred->val = simple_strtoull(val_str, &tmp, 0); 945static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
423 if (tmp == val_str) { 946{
424 pred->str_val = kstrdup(val_str, GFP_KERNEL); 947 struct filter_pred *pred;
425 if (!pred->str_val) 948
426 return -ENOMEM; 949 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
427 } else if (*tmp != '\0') 950 if (!pred)
951 return NULL;
952
953 pred->field_name = kstrdup(operand1, GFP_KERNEL);
954 if (!pred->field_name) {
955 kfree(pred);
956 return NULL;
957 }
958
959 strcpy(pred->str_val, operand2);
960 pred->str_len = strlen(operand2);
961
962 pred->op = op;
963
964 return pred;
965}
966
967static struct filter_pred *create_logical_pred(int op)
968{
969 struct filter_pred *pred;
970
971 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
972 if (!pred)
973 return NULL;
974
975 pred->op = op;
976
977 return pred;
978}
979
980static int check_preds(struct filter_parse_state *ps)
981{
982 int n_normal_preds = 0, n_logical_preds = 0;
983 struct postfix_elt *elt;
984
985 list_for_each_entry(elt, &ps->postfix, list) {
986 if (elt->op == OP_NONE)
987 continue;
988
989 if (elt->op == OP_AND || elt->op == OP_OR) {
990 n_logical_preds++;
991 continue;
992 }
993 n_normal_preds++;
994 }
995
996 if (!n_normal_preds || n_logical_preds >= n_normal_preds) {
997 parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
428 return -EINVAL; 998 return -EINVAL;
999 }
429 1000
430 return 0; 1001 return 0;
431} 1002}
432 1003
1004static int replace_preds(struct event_subsystem *system,
1005 struct ftrace_event_call *call,
1006 struct filter_parse_state *ps,
1007 char *filter_string)
1008{
1009 char *operand1 = NULL, *operand2 = NULL;
1010 struct filter_pred *pred;
1011 struct postfix_elt *elt;
1012 int err;
1013
1014 err = check_preds(ps);
1015 if (err)
1016 return err;
1017
1018 list_for_each_entry(elt, &ps->postfix, list) {
1019 if (elt->op == OP_NONE) {
1020 if (!operand1)
1021 operand1 = elt->operand;
1022 else if (!operand2)
1023 operand2 = elt->operand;
1024 else {
1025 parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
1026 return -EINVAL;
1027 }
1028 continue;
1029 }
1030
1031 if (elt->op == OP_AND || elt->op == OP_OR) {
1032 pred = create_logical_pred(elt->op);
1033 if (call) {
1034 err = filter_add_pred(ps, call, pred);
1035 filter_free_pred(pred);
1036 } else
1037 err = filter_add_subsystem_pred(ps, system,
1038 pred, filter_string);
1039 if (err)
1040 return err;
1041
1042 operand1 = operand2 = NULL;
1043 continue;
1044 }
1045
1046 if (!operand1 || !operand2) {
1047 parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
1048 return -EINVAL;
1049 }
1050
1051 pred = create_pred(elt->op, operand1, operand2);
1052 if (call) {
1053 err = filter_add_pred(ps, call, pred);
1054 filter_free_pred(pred);
1055 } else
1056 err = filter_add_subsystem_pred(ps, system, pred,
1057 filter_string);
1058 if (err)
1059 return err;
1060
1061 operand1 = operand2 = NULL;
1062 }
1063
1064 return 0;
1065}
1066
1067int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1068{
1069 int err;
1070
1071 struct filter_parse_state *ps;
1072
1073 mutex_lock(&filter_mutex);
1074
1075 if (!strcmp(strstrip(filter_string), "0")) {
1076 filter_disable_preds(call);
1077 remove_filter_string(call->filter);
1078 mutex_unlock(&filter_mutex);
1079 return 0;
1080 }
1081
1082 err = -ENOMEM;
1083 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1084 if (!ps)
1085 goto out_unlock;
1086
1087 filter_disable_preds(call);
1088 replace_filter_string(call->filter, filter_string);
1089
1090 parse_init(ps, filter_ops, filter_string);
1091 err = filter_parse(ps);
1092 if (err) {
1093 append_filter_err(ps, call->filter);
1094 goto out;
1095 }
1096
1097 err = replace_preds(NULL, call, ps, filter_string);
1098 if (err)
1099 append_filter_err(ps, call->filter);
1100
1101out:
1102 filter_opstack_clear(ps);
1103 postfix_clear(ps);
1104 kfree(ps);
1105out_unlock:
1106 mutex_unlock(&filter_mutex);
1107
1108 return err;
1109}
1110
1111int apply_subsystem_event_filter(struct event_subsystem *system,
1112 char *filter_string)
1113{
1114 int err;
1115
1116 struct filter_parse_state *ps;
1117
1118 mutex_lock(&filter_mutex);
1119
1120 if (!strcmp(strstrip(filter_string), "0")) {
1121 filter_free_subsystem_preds(system);
1122 remove_filter_string(system->filter);
1123 mutex_unlock(&filter_mutex);
1124 return 0;
1125 }
1126
1127 err = -ENOMEM;
1128 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1129 if (!ps)
1130 goto out_unlock;
1131
1132 filter_free_subsystem_preds(system);
1133 replace_filter_string(system->filter, filter_string);
1134
1135 parse_init(ps, filter_ops, filter_string);
1136 err = filter_parse(ps);
1137 if (err) {
1138 append_filter_err(ps, system->filter);
1139 goto out;
1140 }
1141
1142 err = replace_preds(system, NULL, ps, filter_string);
1143 if (err)
1144 append_filter_err(ps, system->filter);
1145
1146out:
1147 filter_opstack_clear(ps);
1148 postfix_clear(ps);
1149 kfree(ps);
1150out_unlock:
1151 mutex_unlock(&filter_mutex);
1152
1153 return err;
1154}
433 1155
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
deleted file mode 100644
index 38985f9b379c..000000000000
--- a/kernel/trace/trace_events_stage_1.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * Stage 1 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * struct ftrace_raw_<call> {
7 * struct trace_entry ent;
8 * <type> <item>;
9 * <type2> <item2>[<len>];
10 * [...]
11 * };
12 *
13 * The <type> <item> is created by the __field(type, item) macro or
14 * the __array(type2, item2, len) macro.
15 * We simply do "type item;", and that will create the fields
16 * in the structure.
17 */
18
19#undef TRACE_FORMAT
20#define TRACE_FORMAT(call, proto, args, fmt)
21
22#undef __array
23#define __array(type, item, len) type item[len];
24
25#undef __field
26#define __field(type, item) type item;
27
28#undef TP_STRUCT__entry
29#define TP_STRUCT__entry(args...) args
30
31#undef TRACE_EVENT
32#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
33 struct ftrace_raw_##name { \
34 struct trace_entry ent; \
35 tstruct \
36 }; \
37 static struct ftrace_event_call event_##name
38
39#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
deleted file mode 100644
index d363c6672c6c..000000000000
--- a/kernel/trace/trace_events_stage_2.h
+++ /dev/null
@@ -1,176 +0,0 @@
1/*
2 * Stage 2 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * enum print_line_t
7 * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
8 * {
9 * struct trace_seq *s = &iter->seq;
10 * struct ftrace_raw_<call> *field; <-- defined in stage 1
11 * struct trace_entry *entry;
12 * int ret;
13 *
14 * entry = iter->ent;
15 *
16 * if (entry->type != event_<call>.id) {
17 * WARN_ON_ONCE(1);
18 * return TRACE_TYPE_UNHANDLED;
19 * }
20 *
21 * field = (typeof(field))entry;
22 *
23 * ret = trace_seq_printf(s, <TP_printk> "\n");
24 * if (!ret)
25 * return TRACE_TYPE_PARTIAL_LINE;
26 *
27 * return TRACE_TYPE_HANDLED;
28 * }
29 *
30 * This is the method used to print the raw event to the trace
31 * output format. Note, this is not needed if the data is read
32 * in binary.
33 */
34
35#undef __entry
36#define __entry field
37
38#undef TP_printk
39#define TP_printk(fmt, args...) fmt "\n", args
40
41#undef TRACE_EVENT
42#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
43enum print_line_t \
44ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
45{ \
46 struct trace_seq *s = &iter->seq; \
47 struct ftrace_raw_##call *field; \
48 struct trace_entry *entry; \
49 int ret; \
50 \
51 entry = iter->ent; \
52 \
53 if (entry->type != event_##call.id) { \
54 WARN_ON_ONCE(1); \
55 return TRACE_TYPE_UNHANDLED; \
56 } \
57 \
58 field = (typeof(field))entry; \
59 \
60 ret = trace_seq_printf(s, #call ": " print); \
61 if (!ret) \
62 return TRACE_TYPE_PARTIAL_LINE; \
63 \
64 return TRACE_TYPE_HANDLED; \
65}
66
67#include <trace/trace_event_types.h>
68
69/*
70 * Setup the showing format of trace point.
71 *
72 * int
73 * ftrace_format_##call(struct trace_seq *s)
74 * {
75 * struct ftrace_raw_##call field;
76 * int ret;
77 *
78 * ret = trace_seq_printf(s, #type " " #item ";"
79 * " offset:%u; size:%u;\n",
80 * offsetof(struct ftrace_raw_##call, item),
81 * sizeof(field.type));
82 *
83 * }
84 */
85
86#undef TP_STRUCT__entry
87#define TP_STRUCT__entry(args...) args
88
89#undef __field
90#define __field(type, item) \
91 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
92 "offset:%u;\tsize:%u;\n", \
93 (unsigned int)offsetof(typeof(field), item), \
94 (unsigned int)sizeof(field.item)); \
95 if (!ret) \
96 return 0;
97
98#undef __array
99#define __array(type, item, len) \
100 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
101 "offset:%u;\tsize:%u;\n", \
102 (unsigned int)offsetof(typeof(field), item), \
103 (unsigned int)sizeof(field.item)); \
104 if (!ret) \
105 return 0;
106
107#undef __entry
108#define __entry REC
109
110#undef TP_printk
111#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
112
113#undef TP_fast_assign
114#define TP_fast_assign(args...) args
115
116#undef TRACE_EVENT
117#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
118static int \
119ftrace_format_##call(struct trace_seq *s) \
120{ \
121 struct ftrace_raw_##call field; \
122 int ret; \
123 \
124 tstruct; \
125 \
126 trace_seq_printf(s, "\nprint fmt: " print); \
127 \
128 return ret; \
129}
130
131#include <trace/trace_event_types.h>
132
133#undef __field
134#define __field(type, item) \
135 ret = trace_define_field(event_call, #type, #item, \
136 offsetof(typeof(field), item), \
137 sizeof(field.item)); \
138 if (ret) \
139 return ret;
140
141#undef __array
142#define __array(type, item, len) \
143 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
144 offsetof(typeof(field), item), \
145 sizeof(field.item)); \
146 if (ret) \
147 return ret;
148
149#define __common_field(type, item) \
150 ret = trace_define_field(event_call, #type, "common_" #item, \
151 offsetof(typeof(field.ent), item), \
152 sizeof(field.ent.item)); \
153 if (ret) \
154 return ret;
155
156#undef TRACE_EVENT
157#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
158int \
159ftrace_define_fields_##call(void) \
160{ \
161 struct ftrace_raw_##call field; \
162 struct ftrace_event_call *event_call = &event_##call; \
163 int ret; \
164 \
165 __common_field(unsigned char, type); \
166 __common_field(unsigned char, flags); \
167 __common_field(unsigned char, preempt_count); \
168 __common_field(int, pid); \
169 __common_field(int, tgid); \
170 \
171 tstruct; \
172 \
173 return ret; \
174}
175
176#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
deleted file mode 100644
index 9d2fa78cecca..000000000000
--- a/kernel/trace/trace_events_stage_3.h
+++ /dev/null
@@ -1,281 +0,0 @@
1/*
2 * Stage 3 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * static void ftrace_event_<call>(proto)
7 * {
8 * event_trace_printk(_RET_IP_, "<call>: " <fmt>);
9 * }
10 *
11 * static int ftrace_reg_event_<call>(void)
12 * {
13 * int ret;
14 *
15 * ret = register_trace_<call>(ftrace_event_<call>);
16 * if (!ret)
17 * pr_info("event trace: Could not activate trace point "
18 * "probe to <call>");
19 * return ret;
20 * }
21 *
22 * static void ftrace_unreg_event_<call>(void)
23 * {
24 * unregister_trace_<call>(ftrace_event_<call>);
25 * }
26 *
27 * For those macros defined with TRACE_FORMAT:
28 *
29 * static struct ftrace_event_call __used
30 * __attribute__((__aligned__(4)))
31 * __attribute__((section("_ftrace_events"))) event_<call> = {
32 * .name = "<call>",
33 * .regfunc = ftrace_reg_event_<call>,
34 * .unregfunc = ftrace_unreg_event_<call>,
35 * }
36 *
37 *
38 * For those macros defined with TRACE_EVENT:
39 *
40 * static struct ftrace_event_call event_<call>;
41 *
42 * static void ftrace_raw_event_<call>(proto)
43 * {
44 * struct ring_buffer_event *event;
45 * struct ftrace_raw_<call> *entry; <-- defined in stage 1
46 * unsigned long irq_flags;
47 * int pc;
48 *
49 * local_save_flags(irq_flags);
50 * pc = preempt_count();
51 *
52 * event = trace_current_buffer_lock_reserve(event_<call>.id,
53 * sizeof(struct ftrace_raw_<call>),
54 * irq_flags, pc);
55 * if (!event)
56 * return;
57 * entry = ring_buffer_event_data(event);
58 *
59 * <assign>; <-- Here we assign the entries by the __field and
60 * __array macros.
61 *
62 * trace_current_buffer_unlock_commit(event, irq_flags, pc);
63 * }
64 *
65 * static int ftrace_raw_reg_event_<call>(void)
66 * {
67 * int ret;
68 *
69 * ret = register_trace_<call>(ftrace_raw_event_<call>);
70 * if (!ret)
71 * pr_info("event trace: Could not activate trace point "
72 * "probe to <call>");
73 * return ret;
74 * }
75 *
76 * static void ftrace_unreg_event_<call>(void)
77 * {
78 * unregister_trace_<call>(ftrace_raw_event_<call>);
79 * }
80 *
81 * static struct trace_event ftrace_event_type_<call> = {
82 * .trace = ftrace_raw_output_<call>, <-- stage 2
83 * };
84 *
85 * static int ftrace_raw_init_event_<call>(void)
86 * {
87 * int id;
88 *
89 * id = register_ftrace_event(&ftrace_event_type_<call>);
90 * if (!id)
91 * return -ENODEV;
92 * event_<call>.id = id;
93 * return 0;
94 * }
95 *
96 * static struct ftrace_event_call __used
97 * __attribute__((__aligned__(4)))
98 * __attribute__((section("_ftrace_events"))) event_<call> = {
99 * .name = "<call>",
100 * .system = "<system>",
101 * .raw_init = ftrace_raw_init_event_<call>,
102 * .regfunc = ftrace_reg_event_<call>,
103 * .unregfunc = ftrace_unreg_event_<call>,
104 * .show_format = ftrace_format_<call>,
105 * }
106 *
107 */
108
109#undef TP_FMT
110#define TP_FMT(fmt, args...) fmt "\n", ##args
111
112#ifdef CONFIG_EVENT_PROFILE
113#define _TRACE_PROFILE(call, proto, args) \
114static void ftrace_profile_##call(proto) \
115{ \
116 extern void perf_tpcounter_event(int); \
117 perf_tpcounter_event(event_##call.id); \
118} \
119 \
120static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
121{ \
122 int ret = 0; \
123 \
124 if (!atomic_inc_return(&call->profile_count)) \
125 ret = register_trace_##call(ftrace_profile_##call); \
126 \
127 return ret; \
128} \
129 \
130static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
131{ \
132 if (atomic_add_negative(-1, &call->profile_count)) \
133 unregister_trace_##call(ftrace_profile_##call); \
134}
135
136#define _TRACE_PROFILE_INIT(call) \
137 .profile_count = ATOMIC_INIT(-1), \
138 .profile_enable = ftrace_profile_enable_##call, \
139 .profile_disable = ftrace_profile_disable_##call,
140
141#else
142#define _TRACE_PROFILE(call, proto, args)
143#define _TRACE_PROFILE_INIT(call)
144#endif
145
146#define _TRACE_FORMAT(call, proto, args, fmt) \
147static void ftrace_event_##call(proto) \
148{ \
149 event_trace_printk(_RET_IP_, #call ": " fmt); \
150} \
151 \
152static int ftrace_reg_event_##call(void) \
153{ \
154 int ret; \
155 \
156 ret = register_trace_##call(ftrace_event_##call); \
157 if (ret) \
158 pr_info("event trace: Could not activate trace point " \
159 "probe to " #call "\n"); \
160 return ret; \
161} \
162 \
163static void ftrace_unreg_event_##call(void) \
164{ \
165 unregister_trace_##call(ftrace_event_##call); \
166} \
167 \
168static struct ftrace_event_call event_##call; \
169 \
170static int ftrace_init_event_##call(void) \
171{ \
172 int id; \
173 \
174 id = register_ftrace_event(NULL); \
175 if (!id) \
176 return -ENODEV; \
177 event_##call.id = id; \
178 return 0; \
179}
180
181#undef TRACE_FORMAT
182#define TRACE_FORMAT(call, proto, args, fmt) \
183_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
184_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
185static struct ftrace_event_call __used \
186__attribute__((__aligned__(4))) \
187__attribute__((section("_ftrace_events"))) event_##call = { \
188 .name = #call, \
189 .system = __stringify(TRACE_SYSTEM), \
190 .raw_init = ftrace_init_event_##call, \
191 .regfunc = ftrace_reg_event_##call, \
192 .unregfunc = ftrace_unreg_event_##call, \
193 _TRACE_PROFILE_INIT(call) \
194}
195
196#undef __entry
197#define __entry entry
198
199#undef TRACE_EVENT
200#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
201_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
202 \
203static struct ftrace_event_call event_##call; \
204 \
205static void ftrace_raw_event_##call(proto) \
206{ \
207 struct ftrace_event_call *call = &event_##call; \
208 struct ring_buffer_event *event; \
209 struct ftrace_raw_##call *entry; \
210 unsigned long irq_flags; \
211 int pc; \
212 \
213 local_save_flags(irq_flags); \
214 pc = preempt_count(); \
215 \
216 event = trace_current_buffer_lock_reserve(event_##call.id, \
217 sizeof(struct ftrace_raw_##call), \
218 irq_flags, pc); \
219 if (!event) \
220 return; \
221 entry = ring_buffer_event_data(event); \
222 \
223 assign; \
224 \
225 if (call->preds && !filter_match_preds(call, entry)) \
226 ring_buffer_event_discard(event); \
227 \
228 trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
229 \
230} \
231 \
232static int ftrace_raw_reg_event_##call(void) \
233{ \
234 int ret; \
235 \
236 ret = register_trace_##call(ftrace_raw_event_##call); \
237 if (ret) \
238 pr_info("event trace: Could not activate trace point " \
239 "probe to " #call "\n"); \
240 return ret; \
241} \
242 \
243static void ftrace_raw_unreg_event_##call(void) \
244{ \
245 unregister_trace_##call(ftrace_raw_event_##call); \
246} \
247 \
248static struct trace_event ftrace_event_type_##call = { \
249 .trace = ftrace_raw_output_##call, \
250}; \
251 \
252static int ftrace_raw_init_event_##call(void) \
253{ \
254 int id; \
255 \
256 id = register_ftrace_event(&ftrace_event_type_##call); \
257 if (!id) \
258 return -ENODEV; \
259 event_##call.id = id; \
260 INIT_LIST_HEAD(&event_##call.fields); \
261 return 0; \
262} \
263 \
264static struct ftrace_event_call __used \
265__attribute__((__aligned__(4))) \
266__attribute__((section("_ftrace_events"))) event_##call = { \
267 .name = #call, \
268 .system = __stringify(TRACE_SYSTEM), \
269 .raw_init = ftrace_raw_init_event_##call, \
270 .regfunc = ftrace_raw_reg_event_##call, \
271 .unregfunc = ftrace_raw_unreg_event_##call, \
272 .show_format = ftrace_format_##call, \
273 .define_fields = ftrace_define_fields_##call, \
274 _TRACE_PROFILE_INIT(call) \
275}
276
277#include <trace/trace_event_types.h>
278
279#undef _TRACE_PROFILE
280#undef _TRACE_PROFILE_INIT
281
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 07a22c33ebf3..d06cf898dc86 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -19,8 +19,12 @@
19#undef TRACE_STRUCT 19#undef TRACE_STRUCT
20#define TRACE_STRUCT(args...) args 20#define TRACE_STRUCT(args...) args
21 21
22extern void __bad_type_size(void);
23
22#undef TRACE_FIELD 24#undef TRACE_FIELD
23#define TRACE_FIELD(type, item, assign) \ 25#define TRACE_FIELD(type, item, assign) \
26 if (sizeof(type) != sizeof(field.item)) \
27 __bad_type_size(); \
24 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 28 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
25 "offset:%u;\tsize:%u;\n", \ 29 "offset:%u;\tsize:%u;\n", \
26 (unsigned int)offsetof(typeof(field), item), \ 30 (unsigned int)offsetof(typeof(field), item), \
@@ -30,7 +34,7 @@
30 34
31 35
32#undef TRACE_FIELD_SPECIAL 36#undef TRACE_FIELD_SPECIAL
33#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ 37#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
34 ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ 38 ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \
35 "offset:%u;\tsize:%u;\n", \ 39 "offset:%u;\tsize:%u;\n", \
36 (unsigned int)offsetof(typeof(field), item), \ 40 (unsigned int)offsetof(typeof(field), item), \
@@ -46,6 +50,9 @@
46 if (!ret) \ 50 if (!ret) \
47 return 0; 51 return 0;
48 52
53#undef TRACE_FIELD_SIGN
54#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
55 TRACE_FIELD(type, item, assign)
49 56
50#undef TP_RAW_FMT 57#undef TP_RAW_FMT
51#define TP_RAW_FMT(args...) args 58#define TP_RAW_FMT(args...) args
@@ -65,6 +72,22 @@ ftrace_format_##call(struct trace_seq *s) \
65 return ret; \ 72 return ret; \
66} 73}
67 74
75#undef TRACE_EVENT_FORMAT_NOFILTER
76#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
77 tpfmt) \
78static int \
79ftrace_format_##call(struct trace_seq *s) \
80{ \
81 struct args field; \
82 int ret; \
83 \
84 tstruct; \
85 \
86 trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \
87 \
88 return ret; \
89}
90
68#include "trace_event_types.h" 91#include "trace_event_types.h"
69 92
70#undef TRACE_ZERO_CHAR 93#undef TRACE_ZERO_CHAR
@@ -78,6 +101,10 @@ ftrace_format_##call(struct trace_seq *s) \
78#define TRACE_FIELD(type, item, assign)\ 101#define TRACE_FIELD(type, item, assign)\
79 entry->item = assign; 102 entry->item = assign;
80 103
104#undef TRACE_FIELD_SIGN
105#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
106 TRACE_FIELD(type, item, assign)
107
81#undef TP_CMD 108#undef TP_CMD
82#define TP_CMD(cmd...) cmd 109#define TP_CMD(cmd...) cmd
83 110
@@ -85,18 +112,95 @@ ftrace_format_##call(struct trace_seq *s) \
85#define TRACE_ENTRY entry 112#define TRACE_ENTRY entry
86 113
87#undef TRACE_FIELD_SPECIAL 114#undef TRACE_FIELD_SPECIAL
88#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ 115#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
89 cmd; 116 cmd;
90 117
91#undef TRACE_EVENT_FORMAT 118#undef TRACE_EVENT_FORMAT
92#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 119#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
120int ftrace_define_fields_##call(void); \
121static int ftrace_raw_init_event_##call(void); \
122 \
123struct ftrace_event_call __used \
124__attribute__((__aligned__(4))) \
125__attribute__((section("_ftrace_events"))) event_##call = { \
126 .name = #call, \
127 .id = proto, \
128 .system = __stringify(TRACE_SYSTEM), \
129 .raw_init = ftrace_raw_init_event_##call, \
130 .show_format = ftrace_format_##call, \
131 .define_fields = ftrace_define_fields_##call, \
132}; \
133static int ftrace_raw_init_event_##call(void) \
134{ \
135 INIT_LIST_HEAD(&event_##call.fields); \
136 init_preds(&event_##call); \
137 return 0; \
138} \
139
140#undef TRACE_EVENT_FORMAT_NOFILTER
141#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
142 tpfmt) \
93 \ 143 \
94static struct ftrace_event_call __used \ 144struct ftrace_event_call __used \
95__attribute__((__aligned__(4))) \ 145__attribute__((__aligned__(4))) \
96__attribute__((section("_ftrace_events"))) event_##call = { \ 146__attribute__((section("_ftrace_events"))) event_##call = { \
97 .name = #call, \ 147 .name = #call, \
98 .id = proto, \ 148 .id = proto, \
99 .system = __stringify(TRACE_SYSTEM), \ 149 .system = __stringify(TRACE_SYSTEM), \
100 .show_format = ftrace_format_##call, \ 150 .show_format = ftrace_format_##call, \
151};
152
153#include "trace_event_types.h"
154
155#undef TRACE_FIELD
156#define TRACE_FIELD(type, item, assign) \
157 ret = trace_define_field(event_call, #type, #item, \
158 offsetof(typeof(field), item), \
159 sizeof(field.item), is_signed_type(type)); \
160 if (ret) \
161 return ret;
162
163#undef TRACE_FIELD_SPECIAL
164#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \
165 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
166 offsetof(typeof(field), item), \
167 sizeof(field.item), 0); \
168 if (ret) \
169 return ret;
170
171#undef TRACE_FIELD_SIGN
172#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
173 ret = trace_define_field(event_call, #type, #item, \
174 offsetof(typeof(field), item), \
175 sizeof(field.item), is_signed); \
176 if (ret) \
177 return ret;
178
179#undef TRACE_FIELD_ZERO_CHAR
180#define TRACE_FIELD_ZERO_CHAR(item)
181
182#undef TRACE_EVENT_FORMAT
183#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
184int \
185ftrace_define_fields_##call(void) \
186{ \
187 struct ftrace_event_call *event_call = &event_##call; \
188 struct args field; \
189 int ret; \
190 \
191 __common_field(unsigned char, type, 0); \
192 __common_field(unsigned char, flags, 0); \
193 __common_field(unsigned char, preempt_count, 0); \
194 __common_field(int, pid, 1); \
195 __common_field(int, tgid, 1); \
196 \
197 tstruct; \
198 \
199 return ret; \
101} 200}
201
202#undef TRACE_EVENT_FORMAT_NOFILTER
203#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
204 tpfmt)
205
102#include "trace_event_types.h" 206#include "trace_event_types.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d28687e7b3a7..8b592418d8b2 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -65,6 +65,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
65 if (!current->ret_stack) 65 if (!current->ret_stack)
66 return -EBUSY; 66 return -EBUSY;
67 67
68 /*
69 * We must make sure the ret_stack is tested before we read
70 * anything else.
71 */
72 smp_rmb();
73
68 /* The return trace stack is full */ 74 /* The return trace stack is full */
69 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { 75 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
70 atomic_inc(&current->trace_overrun); 76 atomic_inc(&current->trace_overrun);
@@ -78,13 +84,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
78 current->ret_stack[index].ret = ret; 84 current->ret_stack[index].ret = ret;
79 current->ret_stack[index].func = func; 85 current->ret_stack[index].func = func;
80 current->ret_stack[index].calltime = calltime; 86 current->ret_stack[index].calltime = calltime;
87 current->ret_stack[index].subtime = 0;
81 *depth = index; 88 *depth = index;
82 89
83 return 0; 90 return 0;
84} 91}
85 92
86/* Retrieve a function return address to the trace stack on thread info.*/ 93/* Retrieve a function return address to the trace stack on thread info.*/
87void 94static void
88ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) 95ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
89{ 96{
90 int index; 97 int index;
@@ -104,9 +111,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
104 trace->calltime = current->ret_stack[index].calltime; 111 trace->calltime = current->ret_stack[index].calltime;
105 trace->overrun = atomic_read(&current->trace_overrun); 112 trace->overrun = atomic_read(&current->trace_overrun);
106 trace->depth = index; 113 trace->depth = index;
107 barrier();
108 current->curr_ret_stack--;
109
110} 114}
111 115
112/* 116/*
@@ -121,6 +125,8 @@ unsigned long ftrace_return_to_handler(void)
121 ftrace_pop_return_trace(&trace, &ret); 125 ftrace_pop_return_trace(&trace, &ret);
122 trace.rettime = trace_clock_local(); 126 trace.rettime = trace_clock_local();
123 ftrace_graph_return(&trace); 127 ftrace_graph_return(&trace);
128 barrier();
129 current->curr_ret_stack--;
124 130
125 if (unlikely(!ret)) { 131 if (unlikely(!ret)) {
126 ftrace_graph_stop(); 132 ftrace_graph_stop();
@@ -426,8 +432,8 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
426 return TRACE_TYPE_HANDLED; 432 return TRACE_TYPE_HANDLED;
427} 433}
428 434
429static enum print_line_t 435enum print_line_t
430print_graph_duration(unsigned long long duration, struct trace_seq *s) 436trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
431{ 437{
432 unsigned long nsecs_rem = do_div(duration, 1000); 438 unsigned long nsecs_rem = do_div(duration, 1000);
433 /* log10(ULONG_MAX) + '\0' */ 439 /* log10(ULONG_MAX) + '\0' */
@@ -464,12 +470,23 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
464 if (!ret) 470 if (!ret)
465 return TRACE_TYPE_PARTIAL_LINE; 471 return TRACE_TYPE_PARTIAL_LINE;
466 } 472 }
473 return TRACE_TYPE_HANDLED;
474}
475
476static enum print_line_t
477print_graph_duration(unsigned long long duration, struct trace_seq *s)
478{
479 int ret;
480
481 ret = trace_print_graph_duration(duration, s);
482 if (ret != TRACE_TYPE_HANDLED)
483 return ret;
467 484
468 ret = trace_seq_printf(s, "| "); 485 ret = trace_seq_printf(s, "| ");
469 if (!ret) 486 if (!ret)
470 return TRACE_TYPE_PARTIAL_LINE; 487 return TRACE_TYPE_PARTIAL_LINE;
471 return TRACE_TYPE_HANDLED;
472 488
489 return TRACE_TYPE_HANDLED;
473} 490}
474 491
475/* Case of a leaf function on its call entry */ 492/* Case of a leaf function on its call entry */
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 7bfdf4c2347f..ca7d7c4d0c2a 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -1,10 +1,9 @@
1/* 1/*
2 * h/w branch tracer for x86 based on bts 2 * h/w branch tracer for x86 based on BTS
3 * 3 *
4 * Copyright (C) 2008-2009 Intel Corporation. 4 * Copyright (C) 2008-2009 Intel Corporation.
5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009 5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
6 */ 6 */
7#include <linux/spinlock.h>
8#include <linux/kallsyms.h> 7#include <linux/kallsyms.h>
9#include <linux/debugfs.h> 8#include <linux/debugfs.h>
10#include <linux/ftrace.h> 9#include <linux/ftrace.h>
@@ -15,110 +14,119 @@
15 14
16#include <asm/ds.h> 15#include <asm/ds.h>
17 16
18#include "trace.h"
19#include "trace_output.h" 17#include "trace_output.h"
18#include "trace.h"
20 19
21 20
22#define SIZEOF_BTS (1 << 13) 21#define BTS_BUFFER_SIZE (1 << 13)
23 22
24/*
25 * The tracer lock protects the below per-cpu tracer array.
26 * It needs to be held to:
27 * - start tracing on all cpus
28 * - stop tracing on all cpus
29 * - start tracing on a single hotplug cpu
30 * - stop tracing on a single hotplug cpu
31 * - read the trace from all cpus
32 * - read the trace from a single cpu
33 */
34static DEFINE_SPINLOCK(bts_tracer_lock);
35static DEFINE_PER_CPU(struct bts_tracer *, tracer); 23static DEFINE_PER_CPU(struct bts_tracer *, tracer);
36static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); 24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
37 25
38#define this_tracer per_cpu(tracer, smp_processor_id()) 26#define this_tracer per_cpu(tracer, smp_processor_id())
39#define this_buffer per_cpu(buffer, smp_processor_id())
40 27
41static int __read_mostly trace_hw_branches_enabled; 28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly;
42static struct trace_array *hw_branch_trace __read_mostly; 30static struct trace_array *hw_branch_trace __read_mostly;
43 31
44 32
45/* 33static void bts_trace_init_cpu(int cpu)
46 * Start tracing on the current cpu.
47 * The argument is ignored.
48 *
49 * pre: bts_tracer_lock must be locked.
50 */
51static void bts_trace_start_cpu(void *arg)
52{ 34{
53 if (this_tracer) 35 per_cpu(tracer, cpu) =
54 ds_release_bts(this_tracer); 36 ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
55 37 NULL, (size_t)-1, BTS_KERNEL);
56 this_tracer = 38
57 ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS, 39 if (IS_ERR(per_cpu(tracer, cpu)))
58 /* ovfl = */ NULL, /* th = */ (size_t)-1, 40 per_cpu(tracer, cpu) = NULL;
59 BTS_KERNEL);
60 if (IS_ERR(this_tracer)) {
61 this_tracer = NULL;
62 return;
63 }
64} 41}
65 42
66static void bts_trace_start(struct trace_array *tr) 43static int bts_trace_init(struct trace_array *tr)
67{ 44{
68 spin_lock(&bts_tracer_lock); 45 int cpu;
46
47 hw_branch_trace = tr;
48 trace_hw_branches_enabled = 0;
69 49
70 on_each_cpu(bts_trace_start_cpu, NULL, 1); 50 get_online_cpus();
71 trace_hw_branches_enabled = 1; 51 for_each_online_cpu(cpu) {
52 bts_trace_init_cpu(cpu);
72 53
73 spin_unlock(&bts_tracer_lock); 54 if (likely(per_cpu(tracer, cpu)))
55 trace_hw_branches_enabled = 1;
56 }
57 trace_hw_branches_suspended = 0;
58 put_online_cpus();
59
60 /* If we could not enable tracing on a single cpu, we fail. */
61 return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
74} 62}
75 63
76/* 64static void bts_trace_reset(struct trace_array *tr)
77 * Stop tracing on the current cpu.
78 * The argument is ignored.
79 *
80 * pre: bts_tracer_lock must be locked.
81 */
82static void bts_trace_stop_cpu(void *arg)
83{ 65{
84 if (this_tracer) { 66 int cpu;
85 ds_release_bts(this_tracer); 67
86 this_tracer = NULL; 68 get_online_cpus();
69 for_each_online_cpu(cpu) {
70 if (likely(per_cpu(tracer, cpu))) {
71 ds_release_bts(per_cpu(tracer, cpu));
72 per_cpu(tracer, cpu) = NULL;
73 }
87 } 74 }
75 trace_hw_branches_enabled = 0;
76 trace_hw_branches_suspended = 0;
77 put_online_cpus();
88} 78}
89 79
90static void bts_trace_stop(struct trace_array *tr) 80static void bts_trace_start(struct trace_array *tr)
91{ 81{
92 spin_lock(&bts_tracer_lock); 82 int cpu;
93 83
94 trace_hw_branches_enabled = 0; 84 get_online_cpus();
95 on_each_cpu(bts_trace_stop_cpu, NULL, 1); 85 for_each_online_cpu(cpu)
86 if (likely(per_cpu(tracer, cpu)))
87 ds_resume_bts(per_cpu(tracer, cpu));
88 trace_hw_branches_suspended = 0;
89 put_online_cpus();
90}
96 91
97 spin_unlock(&bts_tracer_lock); 92static void bts_trace_stop(struct trace_array *tr)
93{
94 int cpu;
95
96 get_online_cpus();
97 for_each_online_cpu(cpu)
98 if (likely(per_cpu(tracer, cpu)))
99 ds_suspend_bts(per_cpu(tracer, cpu));
100 trace_hw_branches_suspended = 1;
101 put_online_cpus();
98} 102}
99 103
100static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, 104static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
101 unsigned long action, void *hcpu) 105 unsigned long action, void *hcpu)
102{ 106{
103 unsigned int cpu = (unsigned long)hcpu; 107 int cpu = (long)hcpu;
104
105 spin_lock(&bts_tracer_lock);
106
107 if (!trace_hw_branches_enabled)
108 goto out;
109 108
110 switch (action) { 109 switch (action) {
111 case CPU_ONLINE: 110 case CPU_ONLINE:
112 case CPU_DOWN_FAILED: 111 case CPU_DOWN_FAILED:
113 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); 112 /* The notification is sent with interrupts enabled. */
113 if (trace_hw_branches_enabled) {
114 bts_trace_init_cpu(cpu);
115
116 if (trace_hw_branches_suspended &&
117 likely(per_cpu(tracer, cpu)))
118 ds_suspend_bts(per_cpu(tracer, cpu));
119 }
114 break; 120 break;
121
115 case CPU_DOWN_PREPARE: 122 case CPU_DOWN_PREPARE:
116 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); 123 /* The notification is sent with interrupts enabled. */
117 break; 124 if (likely(per_cpu(tracer, cpu))) {
125 ds_release_bts(per_cpu(tracer, cpu));
126 per_cpu(tracer, cpu) = NULL;
127 }
118 } 128 }
119 129
120 out:
121 spin_unlock(&bts_tracer_lock);
122 return NOTIFY_DONE; 130 return NOTIFY_DONE;
123} 131}
124 132
@@ -126,20 +134,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
126 .notifier_call = bts_hotcpu_handler 134 .notifier_call = bts_hotcpu_handler
127}; 135};
128 136
129static int bts_trace_init(struct trace_array *tr)
130{
131 hw_branch_trace = tr;
132
133 bts_trace_start(tr);
134
135 return 0;
136}
137
138static void bts_trace_reset(struct trace_array *tr)
139{
140 bts_trace_stop(tr);
141}
142
143static void bts_trace_print_header(struct seq_file *m) 137static void bts_trace_print_header(struct seq_file *m)
144{ 138{
145 seq_puts(m, "# CPU# TO <- FROM\n"); 139 seq_puts(m, "# CPU# TO <- FROM\n");
@@ -147,10 +141,10 @@ static void bts_trace_print_header(struct seq_file *m)
147 141
148static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) 142static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
149{ 143{
144 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
150 struct trace_entry *entry = iter->ent; 145 struct trace_entry *entry = iter->ent;
151 struct trace_seq *seq = &iter->seq; 146 struct trace_seq *seq = &iter->seq;
152 struct hw_branch_entry *it; 147 struct hw_branch_entry *it;
153 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
154 148
155 trace_assign_type(it, entry); 149 trace_assign_type(it, entry);
156 150
@@ -168,6 +162,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
168 162
169void trace_hw_branch(u64 from, u64 to) 163void trace_hw_branch(u64 from, u64 to)
170{ 164{
165 struct ftrace_event_call *call = &event_hw_branch;
171 struct trace_array *tr = hw_branch_trace; 166 struct trace_array *tr = hw_branch_trace;
172 struct ring_buffer_event *event; 167 struct ring_buffer_event *event;
173 struct hw_branch_entry *entry; 168 struct hw_branch_entry *entry;
@@ -194,7 +189,8 @@ void trace_hw_branch(u64 from, u64 to)
194 entry->ent.type = TRACE_HW_BRANCHES; 189 entry->ent.type = TRACE_HW_BRANCHES;
195 entry->from = from; 190 entry->from = from;
196 entry->to = to; 191 entry->to = to;
197 trace_buffer_unlock_commit(tr, event, 0, 0); 192 if (!filter_check_discard(call, entry, tr->buffer, event))
193 trace_buffer_unlock_commit(tr, event, 0, 0);
198 194
199 out: 195 out:
200 atomic_dec(&tr->data[cpu]->disabled); 196 atomic_dec(&tr->data[cpu]->disabled);
@@ -224,11 +220,11 @@ static void trace_bts_at(const struct bts_trace *trace, void *at)
224/* 220/*
225 * Collect the trace on the current cpu and write it into the ftrace buffer. 221 * Collect the trace on the current cpu and write it into the ftrace buffer.
226 * 222 *
227 * pre: bts_tracer_lock must be locked 223 * pre: tracing must be suspended on the current cpu
228 */ 224 */
229static void trace_bts_cpu(void *arg) 225static void trace_bts_cpu(void *arg)
230{ 226{
231 struct trace_array *tr = (struct trace_array *) arg; 227 struct trace_array *tr = (struct trace_array *)arg;
232 const struct bts_trace *trace; 228 const struct bts_trace *trace;
233 unsigned char *at; 229 unsigned char *at;
234 230
@@ -241,10 +237,9 @@ static void trace_bts_cpu(void *arg)
241 if (unlikely(!this_tracer)) 237 if (unlikely(!this_tracer))
242 return; 238 return;
243 239
244 ds_suspend_bts(this_tracer);
245 trace = ds_read_bts(this_tracer); 240 trace = ds_read_bts(this_tracer);
246 if (!trace) 241 if (!trace)
247 goto out; 242 return;
248 243
249 for (at = trace->ds.top; (void *)at < trace->ds.end; 244 for (at = trace->ds.top; (void *)at < trace->ds.end;
250 at += trace->ds.size) 245 at += trace->ds.size)
@@ -253,18 +248,27 @@ static void trace_bts_cpu(void *arg)
253 for (at = trace->ds.begin; (void *)at < trace->ds.top; 248 for (at = trace->ds.begin; (void *)at < trace->ds.top;
254 at += trace->ds.size) 249 at += trace->ds.size)
255 trace_bts_at(trace, at); 250 trace_bts_at(trace, at);
256
257out:
258 ds_resume_bts(this_tracer);
259} 251}
260 252
261static void trace_bts_prepare(struct trace_iterator *iter) 253static void trace_bts_prepare(struct trace_iterator *iter)
262{ 254{
263 spin_lock(&bts_tracer_lock); 255 int cpu;
264 256
257 get_online_cpus();
258 for_each_online_cpu(cpu)
259 if (likely(per_cpu(tracer, cpu)))
260 ds_suspend_bts(per_cpu(tracer, cpu));
261 /*
262 * We need to collect the trace on the respective cpu since ftrace
263 * implicitly adds the record for the current cpu.
264 * Once that is more flexible, we could collect the data from any cpu.
265 */
265 on_each_cpu(trace_bts_cpu, iter->tr, 1); 266 on_each_cpu(trace_bts_cpu, iter->tr, 1);
266 267
267 spin_unlock(&bts_tracer_lock); 268 for_each_online_cpu(cpu)
269 if (likely(per_cpu(tracer, cpu)))
270 ds_resume_bts(per_cpu(tracer, cpu));
271 put_online_cpus();
268} 272}
269 273
270static void trace_bts_close(struct trace_iterator *iter) 274static void trace_bts_close(struct trace_iterator *iter)
@@ -274,11 +278,11 @@ static void trace_bts_close(struct trace_iterator *iter)
274 278
275void trace_hw_branch_oops(void) 279void trace_hw_branch_oops(void)
276{ 280{
277 spin_lock(&bts_tracer_lock); 281 if (this_tracer) {
278 282 ds_suspend_bts_noirq(this_tracer);
279 trace_bts_cpu(hw_branch_trace); 283 trace_bts_cpu(hw_branch_trace);
280 284 ds_resume_bts_noirq(this_tracer);
281 spin_unlock(&bts_tracer_lock); 285 }
282} 286}
283 287
284struct tracer bts_tracer __read_mostly = 288struct tracer bts_tracer __read_mostly =
@@ -291,7 +295,10 @@ struct tracer bts_tracer __read_mostly =
291 .start = bts_trace_start, 295 .start = bts_trace_start,
292 .stop = bts_trace_stop, 296 .stop = bts_trace_stop,
293 .open = trace_bts_prepare, 297 .open = trace_bts_prepare,
294 .close = trace_bts_close 298 .close = trace_bts_close,
299#ifdef CONFIG_FTRACE_SELFTEST
300 .selftest = trace_selftest_startup_hw_branches,
301#endif /* CONFIG_FTRACE_SELFTEST */
295}; 302};
296 303
297__init static int init_bts_trace(void) 304__init static int init_bts_trace(void)
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 8e37fcddd8b4..d53b45ed0806 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,8 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/time.h>
13
12#include <asm/atomic.h> 14#include <asm/atomic.h>
13 15
14#include "trace.h" 16#include "trace.h"
@@ -174,7 +176,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
174 struct mmiotrace_rw *rw; 176 struct mmiotrace_rw *rw;
175 struct trace_seq *s = &iter->seq; 177 struct trace_seq *s = &iter->seq;
176 unsigned long long t = ns2usecs(iter->ts); 178 unsigned long long t = ns2usecs(iter->ts);
177 unsigned long usec_rem = do_div(t, 1000000ULL); 179 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
178 unsigned secs = (unsigned long)t; 180 unsigned secs = (unsigned long)t;
179 int ret = 1; 181 int ret = 1;
180 182
@@ -221,7 +223,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
221 struct mmiotrace_map *m; 223 struct mmiotrace_map *m;
222 struct trace_seq *s = &iter->seq; 224 struct trace_seq *s = &iter->seq;
223 unsigned long long t = ns2usecs(iter->ts); 225 unsigned long long t = ns2usecs(iter->ts);
224 unsigned long usec_rem = do_div(t, 1000000ULL); 226 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
225 unsigned secs = (unsigned long)t; 227 unsigned secs = (unsigned long)t;
226 int ret; 228 int ret;
227 229
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 64b54a59c55b..7938f3ae93e3 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -14,11 +14,25 @@
14/* must be a power of 2 */ 14/* must be a power of 2 */
15#define EVENT_HASHSIZE 128 15#define EVENT_HASHSIZE 128
16 16
17static DEFINE_MUTEX(trace_event_mutex); 17DECLARE_RWSEM(trace_event_mutex);
18
19DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
20EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
21
18static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; 22static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
19 23
20static int next_event_type = __TRACE_LAST_TYPE + 1; 24static int next_event_type = __TRACE_LAST_TYPE + 1;
21 25
26void trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29
30 s->buffer[len] = 0;
31 seq_puts(m, s->buffer);
32
33 trace_seq_init(s);
34}
35
22enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 36enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
23{ 37{
24 struct trace_seq *s = &iter->seq; 38 struct trace_seq *s = &iter->seq;
@@ -84,6 +98,39 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
84 98
85 return len; 99 return len;
86} 100}
101EXPORT_SYMBOL_GPL(trace_seq_printf);
102
103/**
104 * trace_seq_vprintf - sequence printing of trace information
105 * @s: trace sequence descriptor
106 * @fmt: printf format string
107 *
108 * The tracer may use either sequence operations or its own
109 * copy to user routines. To simplify formating of a trace
110 * trace_seq_printf is used to store strings into a special
111 * buffer (@s). Then the output may be either used by
112 * the sequencer or pulled into another buffer.
113 */
114int
115trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
116{
117 int len = (PAGE_SIZE - 1) - s->len;
118 int ret;
119
120 if (!len)
121 return 0;
122
123 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
124
125 /* If we can't write it all, don't bother writing anything */
126 if (ret >= len)
127 return 0;
128
129 s->len += ret;
130
131 return len;
132}
133EXPORT_SYMBOL_GPL(trace_seq_vprintf);
87 134
88int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) 135int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
89{ 136{
@@ -201,6 +248,67 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
201 return 0; 248 return 0;
202} 249}
203 250
251const char *
252ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
253 unsigned long flags,
254 const struct trace_print_flags *flag_array)
255{
256 unsigned long mask;
257 const char *str;
258 const char *ret = p->buffer + p->len;
259 int i;
260
261 for (i = 0; flag_array[i].name && flags; i++) {
262
263 mask = flag_array[i].mask;
264 if ((flags & mask) != mask)
265 continue;
266
267 str = flag_array[i].name;
268 flags &= ~mask;
269 if (p->len && delim)
270 trace_seq_puts(p, delim);
271 trace_seq_puts(p, str);
272 }
273
274 /* check for left over flags */
275 if (flags) {
276 if (p->len && delim)
277 trace_seq_puts(p, delim);
278 trace_seq_printf(p, "0x%lx", flags);
279 }
280
281 trace_seq_putc(p, 0);
282
283 return ret;
284}
285EXPORT_SYMBOL(ftrace_print_flags_seq);
286
287const char *
288ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
289 const struct trace_print_flags *symbol_array)
290{
291 int i;
292 const char *ret = p->buffer + p->len;
293
294 for (i = 0; symbol_array[i].name; i++) {
295
296 if (val != symbol_array[i].mask)
297 continue;
298
299 trace_seq_puts(p, symbol_array[i].name);
300 break;
301 }
302
303 if (!p->len)
304 trace_seq_printf(p, "0x%lx", val);
305
306 trace_seq_putc(p, 0);
307
308 return ret;
309}
310EXPORT_SYMBOL(ftrace_print_symbols_seq);
311
204#ifdef CONFIG_KRETPROBES 312#ifdef CONFIG_KRETPROBES
205static inline const char *kretprobed(const char *name) 313static inline const char *kretprobed(const char *name)
206{ 314{
@@ -311,17 +419,20 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
311 419
312 if (ip == ULONG_MAX || !ret) 420 if (ip == ULONG_MAX || !ret)
313 break; 421 break;
314 if (i && ret) 422 if (ret)
315 ret = trace_seq_puts(s, " <- "); 423 ret = trace_seq_puts(s, " => ");
316 if (!ip) { 424 if (!ip) {
317 if (ret) 425 if (ret)
318 ret = trace_seq_puts(s, "??"); 426 ret = trace_seq_puts(s, "??");
427 if (ret)
428 ret = trace_seq_puts(s, "\n");
319 continue; 429 continue;
320 } 430 }
321 if (!ret) 431 if (!ret)
322 break; 432 break;
323 if (ret) 433 if (ret)
324 ret = seq_print_user_ip(s, mm, ip, sym_flags); 434 ret = seq_print_user_ip(s, mm, ip, sym_flags);
435 ret = trace_seq_puts(s, "\n");
325 } 436 }
326 437
327 if (mm) 438 if (mm)
@@ -455,6 +566,7 @@ static int task_state_char(unsigned long state)
455 * @type: the type of event to look for 566 * @type: the type of event to look for
456 * 567 *
457 * Returns an event of type @type otherwise NULL 568 * Returns an event of type @type otherwise NULL
569 * Called with trace_event_read_lock() held.
458 */ 570 */
459struct trace_event *ftrace_find_event(int type) 571struct trace_event *ftrace_find_event(int type)
460{ 572{
@@ -464,7 +576,7 @@ struct trace_event *ftrace_find_event(int type)
464 576
465 key = type & (EVENT_HASHSIZE - 1); 577 key = type & (EVENT_HASHSIZE - 1);
466 578
467 hlist_for_each_entry_rcu(event, n, &event_hash[key], node) { 579 hlist_for_each_entry(event, n, &event_hash[key], node) {
468 if (event->type == type) 580 if (event->type == type)
469 return event; 581 return event;
470 } 582 }
@@ -472,6 +584,46 @@ struct trace_event *ftrace_find_event(int type)
472 return NULL; 584 return NULL;
473} 585}
474 586
587static LIST_HEAD(ftrace_event_list);
588
589static int trace_search_list(struct list_head **list)
590{
591 struct trace_event *e;
592 int last = __TRACE_LAST_TYPE;
593
594 if (list_empty(&ftrace_event_list)) {
595 *list = &ftrace_event_list;
596 return last + 1;
597 }
598
599 /*
600 * We used up all possible max events,
601 * lets see if somebody freed one.
602 */
603 list_for_each_entry(e, &ftrace_event_list, list) {
604 if (e->type != last + 1)
605 break;
606 last++;
607 }
608
609 /* Did we used up all 65 thousand events??? */
610 if ((last + 1) > FTRACE_MAX_EVENT)
611 return 0;
612
613 *list = &e->list;
614 return last + 1;
615}
616
617void trace_event_read_lock(void)
618{
619 down_read(&trace_event_mutex);
620}
621
622void trace_event_read_unlock(void)
623{
624 up_read(&trace_event_mutex);
625}
626
475/** 627/**
476 * register_ftrace_event - register output for an event type 628 * register_ftrace_event - register output for an event type
477 * @event: the event type to register 629 * @event: the event type to register
@@ -492,22 +644,42 @@ int register_ftrace_event(struct trace_event *event)
492 unsigned key; 644 unsigned key;
493 int ret = 0; 645 int ret = 0;
494 646
495 mutex_lock(&trace_event_mutex); 647 down_write(&trace_event_mutex);
496 648
497 if (!event) { 649 if (WARN_ON(!event))
498 ret = next_event_type++;
499 goto out; 650 goto out;
500 }
501 651
502 if (!event->type) 652 INIT_LIST_HEAD(&event->list);
503 event->type = next_event_type++; 653
504 else if (event->type > __TRACE_LAST_TYPE) { 654 if (!event->type) {
655 struct list_head *list = NULL;
656
657 if (next_event_type > FTRACE_MAX_EVENT) {
658
659 event->type = trace_search_list(&list);
660 if (!event->type)
661 goto out;
662
663 } else {
664
665 event->type = next_event_type++;
666 list = &ftrace_event_list;
667 }
668
669 if (WARN_ON(ftrace_find_event(event->type)))
670 goto out;
671
672 list_add_tail(&event->list, list);
673
674 } else if (event->type > __TRACE_LAST_TYPE) {
505 printk(KERN_WARNING "Need to add type to trace.h\n"); 675 printk(KERN_WARNING "Need to add type to trace.h\n");
506 WARN_ON(1); 676 WARN_ON(1);
507 }
508
509 if (ftrace_find_event(event->type))
510 goto out; 677 goto out;
678 } else {
679 /* Is this event already used */
680 if (ftrace_find_event(event->type))
681 goto out;
682 }
511 683
512 if (event->trace == NULL) 684 if (event->trace == NULL)
513 event->trace = trace_nop_print; 685 event->trace = trace_nop_print;
@@ -520,14 +692,25 @@ int register_ftrace_event(struct trace_event *event)
520 692
521 key = event->type & (EVENT_HASHSIZE - 1); 693 key = event->type & (EVENT_HASHSIZE - 1);
522 694
523 hlist_add_head_rcu(&event->node, &event_hash[key]); 695 hlist_add_head(&event->node, &event_hash[key]);
524 696
525 ret = event->type; 697 ret = event->type;
526 out: 698 out:
527 mutex_unlock(&trace_event_mutex); 699 up_write(&trace_event_mutex);
528 700
529 return ret; 701 return ret;
530} 702}
703EXPORT_SYMBOL_GPL(register_ftrace_event);
704
705/*
706 * Used by module code with the trace_event_mutex held for write.
707 */
708int __unregister_ftrace_event(struct trace_event *event)
709{
710 hlist_del(&event->node);
711 list_del(&event->list);
712 return 0;
713}
531 714
532/** 715/**
533 * unregister_ftrace_event - remove a no longer used event 716 * unregister_ftrace_event - remove a no longer used event
@@ -535,12 +718,13 @@ int register_ftrace_event(struct trace_event *event)
535 */ 718 */
536int unregister_ftrace_event(struct trace_event *event) 719int unregister_ftrace_event(struct trace_event *event)
537{ 720{
538 mutex_lock(&trace_event_mutex); 721 down_write(&trace_event_mutex);
539 hlist_del(&event->node); 722 __unregister_ftrace_event(event);
540 mutex_unlock(&trace_event_mutex); 723 up_write(&trace_event_mutex);
541 724
542 return 0; 725 return 0;
543} 726}
727EXPORT_SYMBOL_GPL(unregister_ftrace_event);
544 728
545/* 729/*
546 * Standard events 730 * Standard events
@@ -833,14 +1017,16 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
833 1017
834 trace_assign_type(field, iter->ent); 1018 trace_assign_type(field, iter->ent);
835 1019
1020 if (!trace_seq_puts(s, "<stack trace>\n"))
1021 goto partial;
836 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1022 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
837 if (i) { 1023 if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
838 if (!trace_seq_puts(s, " <= ")) 1024 break;
839 goto partial; 1025 if (!trace_seq_puts(s, " => "))
1026 goto partial;
840 1027
841 if (!seq_print_ip_sym(s, field->caller[i], flags)) 1028 if (!seq_print_ip_sym(s, field->caller[i], flags))
842 goto partial; 1029 goto partial;
843 }
844 if (!trace_seq_puts(s, "\n")) 1030 if (!trace_seq_puts(s, "\n"))
845 goto partial; 1031 goto partial;
846 } 1032 }
@@ -868,10 +1054,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
868 1054
869 trace_assign_type(field, iter->ent); 1055 trace_assign_type(field, iter->ent);
870 1056
871 if (!seq_print_userip_objs(field, s, flags)) 1057 if (!trace_seq_puts(s, "<user stack trace>\n"))
872 goto partial; 1058 goto partial;
873 1059
874 if (!trace_seq_putc(s, '\n')) 1060 if (!seq_print_userip_objs(field, s, flags))
875 goto partial; 1061 goto partial;
876 1062
877 return TRACE_TYPE_HANDLED; 1063 return TRACE_TYPE_HANDLED;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index e0bde39c2dd9..d38bec4a9c30 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -1,41 +1,17 @@
1#ifndef __TRACE_EVENTS_H 1#ifndef __TRACE_EVENTS_H
2#define __TRACE_EVENTS_H 2#define __TRACE_EVENTS_H
3 3
4#include <linux/trace_seq.h>
4#include "trace.h" 5#include "trace.h"
5 6
6typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
7 int flags);
8
9struct trace_event {
10 struct hlist_node node;
11 int type;
12 trace_print_func trace;
13 trace_print_func raw;
14 trace_print_func hex;
15 trace_print_func binary;
16};
17
18extern enum print_line_t 7extern enum print_line_t
19trace_print_bprintk_msg_only(struct trace_iterator *iter); 8trace_print_bprintk_msg_only(struct trace_iterator *iter);
20extern enum print_line_t 9extern enum print_line_t
21trace_print_printk_msg_only(struct trace_iterator *iter); 10trace_print_printk_msg_only(struct trace_iterator *iter);
22 11
23extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
24 __attribute__ ((format (printf, 2, 3)));
25extern int
26trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
27extern int 12extern int
28seq_print_ip_sym(struct trace_seq *s, unsigned long ip, 13seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
29 unsigned long sym_flags); 14 unsigned long sym_flags);
30extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
31 size_t cnt);
32extern int trace_seq_puts(struct trace_seq *s, const char *str);
33extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
34extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
35extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
36 size_t len);
37extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
38extern int trace_seq_path(struct trace_seq *s, struct path *path);
39extern int seq_print_userip_objs(const struct userstack_entry *entry, 15extern int seq_print_userip_objs(const struct userstack_entry *entry,
40 struct trace_seq *s, unsigned long sym_flags); 16 struct trace_seq *s, unsigned long sym_flags);
41extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, 17extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
@@ -44,13 +20,17 @@ extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
44extern int trace_print_context(struct trace_iterator *iter); 20extern int trace_print_context(struct trace_iterator *iter);
45extern int trace_print_lat_context(struct trace_iterator *iter); 21extern int trace_print_lat_context(struct trace_iterator *iter);
46 22
23extern void trace_event_read_lock(void);
24extern void trace_event_read_unlock(void);
47extern struct trace_event *ftrace_find_event(int type); 25extern struct trace_event *ftrace_find_event(int type);
48extern int register_ftrace_event(struct trace_event *event);
49extern int unregister_ftrace_event(struct trace_event *event);
50 26
51extern enum print_line_t trace_nop_print(struct trace_iterator *iter, 27extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
52 int flags); 28 int flags);
53 29
30/* used by module unregistering */
31extern int __unregister_ftrace_event(struct trace_event *event);
32extern struct rw_semaphore trace_event_mutex;
33
54#define MAX_MEMHEX_BYTES 8 34#define MAX_MEMHEX_BYTES 8
55#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) 35#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
56 36
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 118439709fb7..8a30d9874cd4 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -36,6 +36,7 @@ static void probe_power_start(struct power_trace *it, unsigned int type,
36 36
37static void probe_power_end(struct power_trace *it) 37static void probe_power_end(struct power_trace *it)
38{ 38{
39 struct ftrace_event_call *call = &event_power;
39 struct ring_buffer_event *event; 40 struct ring_buffer_event *event;
40 struct trace_power *entry; 41 struct trace_power *entry;
41 struct trace_array_cpu *data; 42 struct trace_array_cpu *data;
@@ -54,7 +55,8 @@ static void probe_power_end(struct power_trace *it)
54 goto out; 55 goto out;
55 entry = ring_buffer_event_data(event); 56 entry = ring_buffer_event_data(event);
56 entry->state_data = *it; 57 entry->state_data = *it;
57 trace_buffer_unlock_commit(tr, event, 0, 0); 58 if (!filter_check_discard(call, entry, tr->buffer, event))
59 trace_buffer_unlock_commit(tr, event, 0, 0);
58 out: 60 out:
59 preempt_enable(); 61 preempt_enable();
60} 62}
@@ -62,6 +64,7 @@ static void probe_power_end(struct power_trace *it)
62static void probe_power_mark(struct power_trace *it, unsigned int type, 64static void probe_power_mark(struct power_trace *it, unsigned int type,
63 unsigned int level) 65 unsigned int level)
64{ 66{
67 struct ftrace_event_call *call = &event_power;
65 struct ring_buffer_event *event; 68 struct ring_buffer_event *event;
66 struct trace_power *entry; 69 struct trace_power *entry;
67 struct trace_array_cpu *data; 70 struct trace_array_cpu *data;
@@ -84,7 +87,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
84 goto out; 87 goto out;
85 entry = ring_buffer_event_data(event); 88 entry = ring_buffer_event_data(event);
86 entry->state_data = *it; 89 entry->state_data = *it;
87 trace_buffer_unlock_commit(tr, event, 0, 0); 90 if (!filter_check_discard(call, entry, tr->buffer, event))
91 trace_buffer_unlock_commit(tr, event, 0, 0);
88 out: 92 out:
89 preempt_enable(); 93 preempt_enable();
90} 94}
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index eb81556107fe..9bece9687b62 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -245,17 +245,13 @@ static const struct file_operations ftrace_formats_fops = {
245static __init int init_trace_printk_function_export(void) 245static __init int init_trace_printk_function_export(void)
246{ 246{
247 struct dentry *d_tracer; 247 struct dentry *d_tracer;
248 struct dentry *entry;
249 248
250 d_tracer = tracing_init_dentry(); 249 d_tracer = tracing_init_dentry();
251 if (!d_tracer) 250 if (!d_tracer)
252 return 0; 251 return 0;
253 252
254 entry = debugfs_create_file("printk_formats", 0444, d_tracer, 253 trace_create_file("printk_formats", 0444, d_tracer,
255 NULL, &ftrace_formats_fops); 254 NULL, &ftrace_formats_fops);
256 if (!entry)
257 pr_warning("Could not create debugfs "
258 "'printk_formats' entry\n");
259 255
260 return 0; 256 return 0;
261} 257}
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 9117cea6f1ae..a98106dd979c 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -10,7 +10,7 @@
10#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
11#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/ftrace.h> 12#include <linux/ftrace.h>
13#include <trace/sched.h> 13#include <trace/events/sched.h>
14 14
15#include "trace.h" 15#include "trace.h"
16 16
@@ -29,13 +29,13 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
29 int cpu; 29 int cpu;
30 int pc; 30 int pc;
31 31
32 if (!sched_ref || sched_stopped) 32 if (unlikely(!sched_ref))
33 return; 33 return;
34 34
35 tracing_record_cmdline(prev); 35 tracing_record_cmdline(prev);
36 tracing_record_cmdline(next); 36 tracing_record_cmdline(next);
37 37
38 if (!tracer_enabled) 38 if (!tracer_enabled || sched_stopped)
39 return; 39 return;
40 40
41 pc = preempt_count(); 41 pc = preempt_count();
@@ -56,15 +56,15 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
56 unsigned long flags; 56 unsigned long flags;
57 int cpu, pc; 57 int cpu, pc;
58 58
59 if (!likely(tracer_enabled)) 59 if (unlikely(!sched_ref))
60 return; 60 return;
61 61
62 pc = preempt_count();
63 tracing_record_cmdline(current); 62 tracing_record_cmdline(current);
64 63
65 if (sched_stopped) 64 if (!tracer_enabled || sched_stopped)
66 return; 65 return;
67 66
67 pc = preempt_count();
68 local_irq_save(flags); 68 local_irq_save(flags);
69 cpu = raw_smp_processor_id(); 69 cpu = raw_smp_processor_id();
70 data = ctx_trace->data[cpu]; 70 data = ctx_trace->data[cpu];
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 5bc00e8f153e..eacb27225173 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <trace/sched.h> 18#include <trace/events/sched.h>
19 19
20#include "trace.h" 20#include "trace.h"
21 21
@@ -138,9 +138,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
138 138
139 pc = preempt_count(); 139 pc = preempt_count();
140 140
141 /* The task we are waiting for is waking up */
142 data = wakeup_trace->data[wakeup_cpu];
143
144 /* disable local data, not wakeup_cpu data */ 141 /* disable local data, not wakeup_cpu data */
145 cpu = raw_smp_processor_id(); 142 cpu = raw_smp_processor_id();
146 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled); 143 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
@@ -154,6 +151,9 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
154 if (unlikely(!tracer_enabled || next != wakeup_task)) 151 if (unlikely(!tracer_enabled || next != wakeup_task))
155 goto out_unlock; 152 goto out_unlock;
156 153
154 /* The task we are waiting for is waking up */
155 data = wakeup_trace->data[wakeup_cpu];
156
157 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); 157 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
159 159
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 08f4eb2763d1..00dd6485bdd7 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
16 case TRACE_BRANCH: 16 case TRACE_BRANCH:
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES:
19 return 1; 20 return 1;
20 } 21 }
21 return 0; 22 return 0;
@@ -188,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
188#else 189#else
189# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) 190# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
190#endif /* CONFIG_DYNAMIC_FTRACE */ 191#endif /* CONFIG_DYNAMIC_FTRACE */
192
191/* 193/*
192 * Simple verification test of ftrace function tracer. 194 * Simple verification test of ftrace function tracer.
193 * Enable ftrace, sleep 1/10 second, and then read the trace 195 * Enable ftrace, sleep 1/10 second, and then read the trace
@@ -749,3 +751,59 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
749 return ret; 751 return ret;
750} 752}
751#endif /* CONFIG_BRANCH_TRACER */ 753#endif /* CONFIG_BRANCH_TRACER */
754
755#ifdef CONFIG_HW_BRANCH_TRACER
756int
757trace_selftest_startup_hw_branches(struct tracer *trace,
758 struct trace_array *tr)
759{
760 struct trace_iterator *iter;
761 struct tracer tracer;
762 unsigned long count;
763 int ret;
764
765 if (!trace->open) {
766 printk(KERN_CONT "missing open function...");
767 return -1;
768 }
769
770 ret = tracer_init(trace, tr);
771 if (ret) {
772 warn_failed_init_tracer(trace, ret);
773 return ret;
774 }
775
776 /*
777 * The hw-branch tracer needs to collect the trace from the various
778 * cpu trace buffers - before tracing is stopped.
779 */
780 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
781 if (!iter)
782 return -ENOMEM;
783
784 memcpy(&tracer, trace, sizeof(tracer));
785
786 iter->trace = &tracer;
787 iter->tr = tr;
788 iter->pos = -1;
789 mutex_init(&iter->mutex);
790
791 trace->open(iter);
792
793 mutex_destroy(&iter->mutex);
794 kfree(iter);
795
796 tracing_stop();
797
798 ret = trace_test_buffer(tr, &count);
799 trace->reset(tr);
800 tracing_start();
801
802 if (!ret && !count) {
803 printk(KERN_CONT "no entries found..");
804 ret = -1;
805 }
806
807 return ret;
808}
809#endif /* CONFIG_HW_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index c750f65f9661..2d7aebd71dbd 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -265,7 +265,7 @@ static int t_show(struct seq_file *m, void *v)
265 seq_printf(m, " Depth Size Location" 265 seq_printf(m, " Depth Size Location"
266 " (%d entries)\n" 266 " (%d entries)\n"
267 " ----- ---- --------\n", 267 " ----- ---- --------\n",
268 max_stack_trace.nr_entries); 268 max_stack_trace.nr_entries - 1);
269 269
270 if (!stack_tracer_enabled && !max_stack_size) 270 if (!stack_tracer_enabled && !max_stack_size)
271 print_disabled(m); 271 print_disabled(m);
@@ -352,19 +352,14 @@ __setup("stacktrace", enable_stacktrace);
352static __init int stack_trace_init(void) 352static __init int stack_trace_init(void)
353{ 353{
354 struct dentry *d_tracer; 354 struct dentry *d_tracer;
355 struct dentry *entry;
356 355
357 d_tracer = tracing_init_dentry(); 356 d_tracer = tracing_init_dentry();
358 357
359 entry = debugfs_create_file("stack_max_size", 0644, d_tracer, 358 trace_create_file("stack_max_size", 0644, d_tracer,
360 &max_stack_size, &stack_max_size_fops); 359 &max_stack_size, &stack_max_size_fops);
361 if (!entry)
362 pr_warning("Could not create debugfs 'stack_max_size' entry\n");
363 360
364 entry = debugfs_create_file("stack_trace", 0444, d_tracer, 361 trace_create_file("stack_trace", 0444, d_tracer,
365 NULL, &stack_trace_fops); 362 NULL, &stack_trace_fops);
366 if (!entry)
367 pr_warning("Could not create debugfs 'stack_trace' entry\n");
368 363
369 if (stack_tracer_enabled) 364 if (stack_tracer_enabled)
370 register_ftrace_function(&trace_ops); 365 register_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index acdebd771a93..c00643733f4c 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Infrastructure for statistic tracing (histogram output). 2 * Infrastructure for statistic tracing (histogram output).
3 * 3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> 4 * Copyright (C) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
5 * 5 *
6 * Based on the code from trace_branch.c which is 6 * Based on the code from trace_branch.c which is
7 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 7 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
@@ -10,22 +10,27 @@
10 10
11 11
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/rbtree.h>
13#include <linux/debugfs.h> 14#include <linux/debugfs.h>
14#include "trace_stat.h" 15#include "trace_stat.h"
15#include "trace.h" 16#include "trace.h"
16 17
17 18
18/* List of stat entries from a tracer */ 19/*
19struct trace_stat_list { 20 * List of stat red-black nodes from a tracer
20 struct list_head list; 21 * We use a such tree to sort quickly the stat
22 * entries from the tracer.
23 */
24struct stat_node {
25 struct rb_node node;
21 void *stat; 26 void *stat;
22}; 27};
23 28
24/* A stat session is the stats output in one file */ 29/* A stat session is the stats output in one file */
25struct tracer_stat_session { 30struct stat_session {
26 struct list_head session_list; 31 struct list_head session_list;
27 struct tracer_stat *ts; 32 struct tracer_stat *ts;
28 struct list_head stat_list; 33 struct rb_root stat_root;
29 struct mutex stat_mutex; 34 struct mutex stat_mutex;
30 struct dentry *file; 35 struct dentry *file;
31}; 36};
@@ -37,18 +42,48 @@ static DEFINE_MUTEX(all_stat_sessions_mutex);
37/* The root directory for all stat files */ 42/* The root directory for all stat files */
38static struct dentry *stat_dir; 43static struct dentry *stat_dir;
39 44
45/*
46 * Iterate through the rbtree using a post order traversal path
47 * to release the next node.
48 * It won't necessary release one at each iteration
49 * but it will at least advance closer to the next one
50 * to be released.
51 */
52static struct rb_node *release_next(struct rb_node *node)
53{
54 struct stat_node *snode;
55 struct rb_node *parent = rb_parent(node);
56
57 if (node->rb_left)
58 return node->rb_left;
59 else if (node->rb_right)
60 return node->rb_right;
61 else {
62 if (!parent)
63 ;
64 else if (parent->rb_left == node)
65 parent->rb_left = NULL;
66 else
67 parent->rb_right = NULL;
68
69 snode = container_of(node, struct stat_node, node);
70 kfree(snode);
71
72 return parent;
73 }
74}
40 75
41static void reset_stat_session(struct tracer_stat_session *session) 76static void reset_stat_session(struct stat_session *session)
42{ 77{
43 struct trace_stat_list *node, *next; 78 struct rb_node *node = session->stat_root.rb_node;
44 79
45 list_for_each_entry_safe(node, next, &session->stat_list, list) 80 while (node)
46 kfree(node); 81 node = release_next(node);
47 82
48 INIT_LIST_HEAD(&session->stat_list); 83 session->stat_root = RB_ROOT;
49} 84}
50 85
51static void destroy_session(struct tracer_stat_session *session) 86static void destroy_session(struct stat_session *session)
52{ 87{
53 debugfs_remove(session->file); 88 debugfs_remove(session->file);
54 reset_stat_session(session); 89 reset_stat_session(session);
@@ -56,25 +91,60 @@ static void destroy_session(struct tracer_stat_session *session)
56 kfree(session); 91 kfree(session);
57} 92}
58 93
94typedef int (*cmp_stat_t)(void *, void *);
95
96static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp)
97{
98 struct rb_node **new = &(root->rb_node), *parent = NULL;
99 struct stat_node *data;
100
101 data = kzalloc(sizeof(*data), GFP_KERNEL);
102 if (!data)
103 return -ENOMEM;
104 data->stat = stat;
105
106 /*
107 * Figure out where to put new node
108 * This is a descendent sorting
109 */
110 while (*new) {
111 struct stat_node *this;
112 int result;
113
114 this = container_of(*new, struct stat_node, node);
115 result = cmp(data->stat, this->stat);
116
117 parent = *new;
118 if (result >= 0)
119 new = &((*new)->rb_left);
120 else
121 new = &((*new)->rb_right);
122 }
123
124 rb_link_node(&data->node, parent, new);
125 rb_insert_color(&data->node, root);
126 return 0;
127}
128
59/* 129/*
60 * For tracers that don't provide a stat_cmp callback. 130 * For tracers that don't provide a stat_cmp callback.
61 * This one will force an immediate insertion on tail of 131 * This one will force an insertion as right-most node
62 * the list. 132 * in the rbtree.
63 */ 133 */
64static int dummy_cmp(void *p1, void *p2) 134static int dummy_cmp(void *p1, void *p2)
65{ 135{
66 return 1; 136 return -1;
67} 137}
68 138
69/* 139/*
70 * Initialize the stat list at each trace_stat file opening. 140 * Initialize the stat rbtree at each trace_stat file opening.
71 * All of these copies and sorting are required on all opening 141 * All of these copies and sorting are required on all opening
72 * since the stats could have changed between two file sessions. 142 * since the stats could have changed between two file sessions.
73 */ 143 */
74static int stat_seq_init(struct tracer_stat_session *session) 144static int stat_seq_init(struct stat_session *session)
75{ 145{
76 struct trace_stat_list *iter_entry, *new_entry;
77 struct tracer_stat *ts = session->ts; 146 struct tracer_stat *ts = session->ts;
147 struct rb_root *root = &session->stat_root;
78 void *stat; 148 void *stat;
79 int ret = 0; 149 int ret = 0;
80 int i; 150 int i;
@@ -85,29 +155,16 @@ static int stat_seq_init(struct tracer_stat_session *session)
85 if (!ts->stat_cmp) 155 if (!ts->stat_cmp)
86 ts->stat_cmp = dummy_cmp; 156 ts->stat_cmp = dummy_cmp;
87 157
88 stat = ts->stat_start(); 158 stat = ts->stat_start(ts);
89 if (!stat) 159 if (!stat)
90 goto exit; 160 goto exit;
91 161
92 /* 162 ret = insert_stat(root, stat, ts->stat_cmp);
93 * The first entry. Actually this is the second, but the first 163 if (ret)
94 * one (the stat_list head) is pointless.
95 */
96 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
97 if (!new_entry) {
98 ret = -ENOMEM;
99 goto exit; 164 goto exit;
100 }
101
102 INIT_LIST_HEAD(&new_entry->list);
103
104 list_add(&new_entry->list, &session->stat_list);
105
106 new_entry->stat = stat;
107 165
108 /* 166 /*
109 * Iterate over the tracer stat entries and store them in a sorted 167 * Iterate over the tracer stat entries and store them in an rbtree.
110 * list.
111 */ 168 */
112 for (i = 1; ; i++) { 169 for (i = 1; ; i++) {
113 stat = ts->stat_next(stat, i); 170 stat = ts->stat_next(stat, i);
@@ -116,36 +173,16 @@ static int stat_seq_init(struct tracer_stat_session *session)
116 if (!stat) 173 if (!stat)
117 break; 174 break;
118 175
119 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); 176 ret = insert_stat(root, stat, ts->stat_cmp);
120 if (!new_entry) { 177 if (ret)
121 ret = -ENOMEM; 178 goto exit_free_rbtree;
122 goto exit_free_list;
123 }
124
125 INIT_LIST_HEAD(&new_entry->list);
126 new_entry->stat = stat;
127
128 list_for_each_entry_reverse(iter_entry, &session->stat_list,
129 list) {
130
131 /* Insertion with a descendent sorting */
132 if (ts->stat_cmp(iter_entry->stat,
133 new_entry->stat) >= 0) {
134
135 list_add(&new_entry->list, &iter_entry->list);
136 break;
137 }
138 }
139
140 /* The current larger value */
141 if (list_empty(&new_entry->list))
142 list_add(&new_entry->list, &session->stat_list);
143 } 179 }
180
144exit: 181exit:
145 mutex_unlock(&session->stat_mutex); 182 mutex_unlock(&session->stat_mutex);
146 return ret; 183 return ret;
147 184
148exit_free_list: 185exit_free_rbtree:
149 reset_stat_session(session); 186 reset_stat_session(session);
150 mutex_unlock(&session->stat_mutex); 187 mutex_unlock(&session->stat_mutex);
151 return ret; 188 return ret;
@@ -154,38 +191,51 @@ exit_free_list:
154 191
155static void *stat_seq_start(struct seq_file *s, loff_t *pos) 192static void *stat_seq_start(struct seq_file *s, loff_t *pos)
156{ 193{
157 struct tracer_stat_session *session = s->private; 194 struct stat_session *session = s->private;
195 struct rb_node *node;
196 int i;
158 197
159 /* Prevent from tracer switch or stat_list modification */ 198 /* Prevent from tracer switch or rbtree modification */
160 mutex_lock(&session->stat_mutex); 199 mutex_lock(&session->stat_mutex);
161 200
162 /* If we are in the beginning of the file, print the headers */ 201 /* If we are in the beginning of the file, print the headers */
163 if (!*pos && session->ts->stat_headers) 202 if (!*pos && session->ts->stat_headers) {
203 (*pos)++;
164 return SEQ_START_TOKEN; 204 return SEQ_START_TOKEN;
205 }
165 206
166 return seq_list_start(&session->stat_list, *pos); 207 node = rb_first(&session->stat_root);
208 for (i = 0; node && i < *pos; i++)
209 node = rb_next(node);
210
211 (*pos)++;
212
213 return node;
167} 214}
168 215
169static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos) 216static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
170{ 217{
171 struct tracer_stat_session *session = s->private; 218 struct stat_session *session = s->private;
219 struct rb_node *node = p;
220
221 (*pos)++;
172 222
173 if (p == SEQ_START_TOKEN) 223 if (p == SEQ_START_TOKEN)
174 return seq_list_start(&session->stat_list, *pos); 224 return rb_first(&session->stat_root);
175 225
176 return seq_list_next(p, &session->stat_list, pos); 226 return rb_next(node);
177} 227}
178 228
179static void stat_seq_stop(struct seq_file *s, void *p) 229static void stat_seq_stop(struct seq_file *s, void *p)
180{ 230{
181 struct tracer_stat_session *session = s->private; 231 struct stat_session *session = s->private;
182 mutex_unlock(&session->stat_mutex); 232 mutex_unlock(&session->stat_mutex);
183} 233}
184 234
185static int stat_seq_show(struct seq_file *s, void *v) 235static int stat_seq_show(struct seq_file *s, void *v)
186{ 236{
187 struct tracer_stat_session *session = s->private; 237 struct stat_session *session = s->private;
188 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); 238 struct stat_node *l = container_of(v, struct stat_node, node);
189 239
190 if (v == SEQ_START_TOKEN) 240 if (v == SEQ_START_TOKEN)
191 return session->ts->stat_headers(s); 241 return session->ts->stat_headers(s);
@@ -205,7 +255,7 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
205{ 255{
206 int ret; 256 int ret;
207 257
208 struct tracer_stat_session *session = inode->i_private; 258 struct stat_session *session = inode->i_private;
209 259
210 ret = seq_open(file, &trace_stat_seq_ops); 260 ret = seq_open(file, &trace_stat_seq_ops);
211 if (!ret) { 261 if (!ret) {
@@ -218,11 +268,11 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
218} 268}
219 269
220/* 270/*
221 * Avoid consuming memory with our now useless list. 271 * Avoid consuming memory with our now useless rbtree.
222 */ 272 */
223static int tracing_stat_release(struct inode *i, struct file *f) 273static int tracing_stat_release(struct inode *i, struct file *f)
224{ 274{
225 struct tracer_stat_session *session = i->i_private; 275 struct stat_session *session = i->i_private;
226 276
227 mutex_lock(&session->stat_mutex); 277 mutex_lock(&session->stat_mutex);
228 reset_stat_session(session); 278 reset_stat_session(session);
@@ -251,7 +301,7 @@ static int tracing_stat_init(void)
251 return 0; 301 return 0;
252} 302}
253 303
254static int init_stat_file(struct tracer_stat_session *session) 304static int init_stat_file(struct stat_session *session)
255{ 305{
256 if (!stat_dir && tracing_stat_init()) 306 if (!stat_dir && tracing_stat_init())
257 return -ENODEV; 307 return -ENODEV;
@@ -266,7 +316,7 @@ static int init_stat_file(struct tracer_stat_session *session)
266 316
267int register_stat_tracer(struct tracer_stat *trace) 317int register_stat_tracer(struct tracer_stat *trace)
268{ 318{
269 struct tracer_stat_session *session, *node, *tmp; 319 struct stat_session *session, *node;
270 int ret; 320 int ret;
271 321
272 if (!trace) 322 if (!trace)
@@ -277,7 +327,7 @@ int register_stat_tracer(struct tracer_stat *trace)
277 327
278 /* Already registered? */ 328 /* Already registered? */
279 mutex_lock(&all_stat_sessions_mutex); 329 mutex_lock(&all_stat_sessions_mutex);
280 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) { 330 list_for_each_entry(node, &all_stat_sessions, session_list) {
281 if (node->ts == trace) { 331 if (node->ts == trace) {
282 mutex_unlock(&all_stat_sessions_mutex); 332 mutex_unlock(&all_stat_sessions_mutex);
283 return -EINVAL; 333 return -EINVAL;
@@ -286,15 +336,13 @@ int register_stat_tracer(struct tracer_stat *trace)
286 mutex_unlock(&all_stat_sessions_mutex); 336 mutex_unlock(&all_stat_sessions_mutex);
287 337
288 /* Init the session */ 338 /* Init the session */
289 session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL); 339 session = kzalloc(sizeof(*session), GFP_KERNEL);
290 if (!session) 340 if (!session)
291 return -ENOMEM; 341 return -ENOMEM;
292 342
293 session->ts = trace; 343 session->ts = trace;
294 INIT_LIST_HEAD(&session->session_list); 344 INIT_LIST_HEAD(&session->session_list);
295 INIT_LIST_HEAD(&session->stat_list);
296 mutex_init(&session->stat_mutex); 345 mutex_init(&session->stat_mutex);
297 session->file = NULL;
298 346
299 ret = init_stat_file(session); 347 ret = init_stat_file(session);
300 if (ret) { 348 if (ret) {
@@ -312,7 +360,7 @@ int register_stat_tracer(struct tracer_stat *trace)
312 360
313void unregister_stat_tracer(struct tracer_stat *trace) 361void unregister_stat_tracer(struct tracer_stat *trace)
314{ 362{
315 struct tracer_stat_session *node, *tmp; 363 struct stat_session *node, *tmp;
316 364
317 mutex_lock(&all_stat_sessions_mutex); 365 mutex_lock(&all_stat_sessions_mutex);
318 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) { 366 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index 202274cf7f3d..f3546a2cd826 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -12,7 +12,7 @@ struct tracer_stat {
12 /* The name of your stat file */ 12 /* The name of your stat file */
13 const char *name; 13 const char *name;
14 /* Iteration over statistic entries */ 14 /* Iteration over statistic entries */
15 void *(*stat_start)(void); 15 void *(*stat_start)(struct tracer_stat *trace);
16 void *(*stat_next)(void *prev, int idx); 16 void *(*stat_next)(void *prev, int idx);
17 /* Compare two entries for stats sorting */ 17 /* Compare two entries for stats sorting */
18 int (*stat_cmp)(void *p1, void *p2); 18 int (*stat_cmp)(void *p1, void *p2);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 91fd19c2149f..e04b76cc238a 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -321,11 +321,7 @@ static const struct file_operations sysprof_sample_fops = {
321 321
322void init_tracer_sysprof_debugfs(struct dentry *d_tracer) 322void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
323{ 323{
324 struct dentry *entry;
325 324
326 entry = debugfs_create_file("sysprof_sample_period", 0644, 325 trace_create_file("sysprof_sample_period", 0644,
327 d_tracer, NULL, &sysprof_sample_fops); 326 d_tracer, NULL, &sysprof_sample_fops);
328 if (entry)
329 return;
330 pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
331} 327}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 797201e4a137..97fcea4acce1 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -6,7 +6,7 @@
6 */ 6 */
7 7
8 8
9#include <trace/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include "trace_stat.h" 12#include "trace_stat.h"
@@ -16,8 +16,6 @@
16/* A cpu workqueue thread */ 16/* A cpu workqueue thread */
17struct cpu_workqueue_stats { 17struct cpu_workqueue_stats {
18 struct list_head list; 18 struct list_head list;
19/* Useful to know if we print the cpu headers */
20 bool first_entry;
21 int cpu; 19 int cpu;
22 pid_t pid; 20 pid_t pid;
23/* Can be inserted from interrupt or user context, need to be atomic */ 21/* Can be inserted from interrupt or user context, need to be atomic */
@@ -47,12 +45,11 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
47 struct work_struct *work) 45 struct work_struct *work)
48{ 46{
49 int cpu = cpumask_first(&wq_thread->cpus_allowed); 47 int cpu = cpumask_first(&wq_thread->cpus_allowed);
50 struct cpu_workqueue_stats *node, *next; 48 struct cpu_workqueue_stats *node;
51 unsigned long flags; 49 unsigned long flags;
52 50
53 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 51 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
54 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, 52 list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
55 list) {
56 if (node->pid == wq_thread->pid) { 53 if (node->pid == wq_thread->pid) {
57 atomic_inc(&node->inserted); 54 atomic_inc(&node->inserted);
58 goto found; 55 goto found;
@@ -69,12 +66,11 @@ probe_workqueue_execution(struct task_struct *wq_thread,
69 struct work_struct *work) 66 struct work_struct *work)
70{ 67{
71 int cpu = cpumask_first(&wq_thread->cpus_allowed); 68 int cpu = cpumask_first(&wq_thread->cpus_allowed);
72 struct cpu_workqueue_stats *node, *next; 69 struct cpu_workqueue_stats *node;
73 unsigned long flags; 70 unsigned long flags;
74 71
75 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 72 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
76 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, 73 list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
77 list) {
78 if (node->pid == wq_thread->pid) { 74 if (node->pid == wq_thread->pid) {
79 node->executed++; 75 node->executed++;
80 goto found; 76 goto found;
@@ -105,8 +101,6 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
105 cws->pid = wq_thread->pid; 101 cws->pid = wq_thread->pid;
106 102
107 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 103 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
108 if (list_empty(&workqueue_cpu_stat(cpu)->list))
109 cws->first_entry = true;
110 list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list); 104 list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
111 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 105 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
112} 106}
@@ -152,7 +146,7 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
152 return ret; 146 return ret;
153} 147}
154 148
155static void *workqueue_stat_start(void) 149static void *workqueue_stat_start(struct tracer_stat *trace)
156{ 150{
157 int cpu; 151 int cpu;
158 void *ret = NULL; 152 void *ret = NULL;
@@ -191,16 +185,9 @@ static void *workqueue_stat_next(void *prev, int idx)
191static int workqueue_stat_show(struct seq_file *s, void *p) 185static int workqueue_stat_show(struct seq_file *s, void *p)
192{ 186{
193 struct cpu_workqueue_stats *cws = p; 187 struct cpu_workqueue_stats *cws = p;
194 unsigned long flags;
195 int cpu = cws->cpu;
196 struct pid *pid; 188 struct pid *pid;
197 struct task_struct *tsk; 189 struct task_struct *tsk;
198 190
199 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
200 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
201 seq_printf(s, "\n");
202 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
203
204 pid = find_get_pid(cws->pid); 191 pid = find_get_pid(cws->pid);
205 if (pid) { 192 if (pid) {
206 tsk = get_pid_task(pid, PIDTYPE_PID); 193 tsk = get_pid_task(pid, PIDTYPE_PID);
diff --git a/kernel/wait.c b/kernel/wait.c
index 42a2dbc181c8..ea7c3b4275cf 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -154,7 +154,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
154 if (!list_empty(&wait->task_list)) 154 if (!list_empty(&wait->task_list))
155 list_del_init(&wait->task_list); 155 list_del_init(&wait->task_list);
156 else if (waitqueue_active(q)) 156 else if (waitqueue_active(q))
157 __wake_up_common(q, mode, 1, 0, key); 157 __wake_up_locked_key(q, mode, key);
158 spin_unlock_irqrestore(&q->lock, flags); 158 spin_unlock_irqrestore(&q->lock, flags);
159} 159}
160EXPORT_SYMBOL(abort_exclusive_wait); 160EXPORT_SYMBOL(abort_exclusive_wait);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f71fb2a08950..0668795d8818 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,7 +33,8 @@
33#include <linux/kallsyms.h> 33#include <linux/kallsyms.h>
34#include <linux/debug_locks.h> 34#include <linux/debug_locks.h>
35#include <linux/lockdep.h> 35#include <linux/lockdep.h>
36#include <trace/workqueue.h> 36#define CREATE_TRACE_POINTS
37#include <trace/events/workqueue.h>
37 38
38/* 39/*
39 * The per-CPU workqueue (if single thread, we always use the first 40 * The per-CPU workqueue (if single thread, we always use the first
@@ -124,8 +125,6 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
124 return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); 125 return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
125} 126}
126 127
127DEFINE_TRACE(workqueue_insertion);
128
129static void insert_work(struct cpu_workqueue_struct *cwq, 128static void insert_work(struct cpu_workqueue_struct *cwq,
130 struct work_struct *work, struct list_head *head) 129 struct work_struct *work, struct list_head *head)
131{ 130{
@@ -262,8 +261,6 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
262} 261}
263EXPORT_SYMBOL_GPL(queue_delayed_work_on); 262EXPORT_SYMBOL_GPL(queue_delayed_work_on);
264 263
265DEFINE_TRACE(workqueue_execution);
266
267static void run_workqueue(struct cpu_workqueue_struct *cwq) 264static void run_workqueue(struct cpu_workqueue_struct *cwq)
268{ 265{
269 spin_lock_irq(&cwq->lock); 266 spin_lock_irq(&cwq->lock);
@@ -753,8 +750,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
753 return cwq; 750 return cwq;
754} 751}
755 752
756DEFINE_TRACE(workqueue_creation);
757
758static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) 753static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
759{ 754{
760 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 755 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -860,8 +855,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
860} 855}
861EXPORT_SYMBOL_GPL(__create_workqueue_key); 856EXPORT_SYMBOL_GPL(__create_workqueue_key);
862 857
863DEFINE_TRACE(workqueue_destruction);
864
865static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) 858static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
866{ 859{
867 /* 860 /*
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 1f71b97de0f9..eb23aaa0c7b8 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -119,6 +119,12 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
119} 119}
120EXPORT_SYMBOL(alloc_cpumask_var_node); 120EXPORT_SYMBOL(alloc_cpumask_var_node);
121 121
122bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
123{
124 return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node);
125}
126EXPORT_SYMBOL(zalloc_cpumask_var_node);
127
122/** 128/**
123 * alloc_cpumask_var - allocate a struct cpumask 129 * alloc_cpumask_var - allocate a struct cpumask
124 * @mask: pointer to cpumask_var_t where the cpumask is returned 130 * @mask: pointer to cpumask_var_t where the cpumask is returned
@@ -135,6 +141,12 @@ bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
135} 141}
136EXPORT_SYMBOL(alloc_cpumask_var); 142EXPORT_SYMBOL(alloc_cpumask_var);
137 143
144bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
145{
146 return alloc_cpumask_var(mask, flags | __GFP_ZERO);
147}
148EXPORT_SYMBOL(zalloc_cpumask_var);
149
138/** 150/**
139 * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena. 151 * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena.
140 * @mask: pointer to cpumask_var_t where the cpumask is returned 152 * @mask: pointer to cpumask_var_t where the cpumask is returned
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 69da09a085a1..ad65fc0317d9 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -23,9 +23,11 @@
23#include <linux/dma-debug.h> 23#include <linux/dma-debug.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/debugfs.h> 25#include <linux/debugfs.h>
26#include <linux/uaccess.h>
26#include <linux/device.h> 27#include <linux/device.h>
27#include <linux/types.h> 28#include <linux/types.h>
28#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/ctype.h>
29#include <linux/list.h> 31#include <linux/list.h>
30#include <linux/slab.h> 32#include <linux/slab.h>
31 33
@@ -85,6 +87,7 @@ static u32 show_num_errors = 1;
85 87
86static u32 num_free_entries; 88static u32 num_free_entries;
87static u32 min_free_entries; 89static u32 min_free_entries;
90static u32 nr_total_entries;
88 91
89/* number of preallocated entries requested by kernel cmdline */ 92/* number of preallocated entries requested by kernel cmdline */
90static u32 req_entries; 93static u32 req_entries;
@@ -97,6 +100,16 @@ static struct dentry *show_all_errors_dent __read_mostly;
97static struct dentry *show_num_errors_dent __read_mostly; 100static struct dentry *show_num_errors_dent __read_mostly;
98static struct dentry *num_free_entries_dent __read_mostly; 101static struct dentry *num_free_entries_dent __read_mostly;
99static struct dentry *min_free_entries_dent __read_mostly; 102static struct dentry *min_free_entries_dent __read_mostly;
103static struct dentry *filter_dent __read_mostly;
104
105/* per-driver filter related state */
106
107#define NAME_MAX_LEN 64
108
109static char current_driver_name[NAME_MAX_LEN] __read_mostly;
110static struct device_driver *current_driver __read_mostly;
111
112static DEFINE_RWLOCK(driver_name_lock);
100 113
101static const char *type2name[4] = { "single", "page", 114static const char *type2name[4] = { "single", "page",
102 "scather-gather", "coherent" }; 115 "scather-gather", "coherent" };
@@ -104,6 +117,11 @@ static const char *type2name[4] = { "single", "page",
104static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", 117static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
105 "DMA_FROM_DEVICE", "DMA_NONE" }; 118 "DMA_FROM_DEVICE", "DMA_NONE" };
106 119
120/* little merge helper - remove it after the merge window */
121#ifndef BUS_NOTIFY_UNBOUND_DRIVER
122#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
123#endif
124
107/* 125/*
108 * The access to some variables in this macro is racy. We can't use atomic_t 126 * The access to some variables in this macro is racy. We can't use atomic_t
109 * here because all these variables are exported to debugfs. Some of them even 127 * here because all these variables are exported to debugfs. Some of them even
@@ -121,15 +139,54 @@ static inline void dump_entry_trace(struct dma_debug_entry *entry)
121{ 139{
122#ifdef CONFIG_STACKTRACE 140#ifdef CONFIG_STACKTRACE
123 if (entry) { 141 if (entry) {
124 printk(KERN_WARNING "Mapped at:\n"); 142 pr_warning("Mapped at:\n");
125 print_stack_trace(&entry->stacktrace, 0); 143 print_stack_trace(&entry->stacktrace, 0);
126 } 144 }
127#endif 145#endif
128} 146}
129 147
148static bool driver_filter(struct device *dev)
149{
150 struct device_driver *drv;
151 unsigned long flags;
152 bool ret;
153
154 /* driver filter off */
155 if (likely(!current_driver_name[0]))
156 return true;
157
158 /* driver filter on and initialized */
159 if (current_driver && dev->driver == current_driver)
160 return true;
161
162 if (current_driver || !current_driver_name[0])
163 return false;
164
165 /* driver filter on but not yet initialized */
166 drv = get_driver(dev->driver);
167 if (!drv)
168 return false;
169
170 /* lock to protect against change of current_driver_name */
171 read_lock_irqsave(&driver_name_lock, flags);
172
173 ret = false;
174 if (drv->name &&
175 strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
176 current_driver = drv;
177 ret = true;
178 }
179
180 read_unlock_irqrestore(&driver_name_lock, flags);
181 put_driver(drv);
182
183 return ret;
184}
185
130#define err_printk(dev, entry, format, arg...) do { \ 186#define err_printk(dev, entry, format, arg...) do { \
131 error_count += 1; \ 187 error_count += 1; \
132 if (show_all_errors || show_num_errors > 0) { \ 188 if (driver_filter(dev) && \
189 (show_all_errors || show_num_errors > 0)) { \
133 WARN(1, "%s %s: " format, \ 190 WARN(1, "%s %s: " format, \
134 dev_driver_string(dev), \ 191 dev_driver_string(dev), \
135 dev_name(dev) , ## arg); \ 192 dev_name(dev) , ## arg); \
@@ -185,15 +242,50 @@ static void put_hash_bucket(struct hash_bucket *bucket,
185static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket, 242static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
186 struct dma_debug_entry *ref) 243 struct dma_debug_entry *ref)
187{ 244{
188 struct dma_debug_entry *entry; 245 struct dma_debug_entry *entry, *ret = NULL;
246 int matches = 0, match_lvl, last_lvl = 0;
189 247
190 list_for_each_entry(entry, &bucket->list, list) { 248 list_for_each_entry(entry, &bucket->list, list) {
191 if ((entry->dev_addr == ref->dev_addr) && 249 if ((entry->dev_addr != ref->dev_addr) ||
192 (entry->dev == ref->dev)) 250 (entry->dev != ref->dev))
251 continue;
252
253 /*
254 * Some drivers map the same physical address multiple
255 * times. Without a hardware IOMMU this results in the
256 * same device addresses being put into the dma-debug
257 * hash multiple times too. This can result in false
258 * positives being reported. Therfore we implement a
259 * best-fit algorithm here which returns the entry from
260 * the hash which fits best to the reference value
261 * instead of the first-fit.
262 */
263 matches += 1;
264 match_lvl = 0;
265 entry->size == ref->size ? ++match_lvl : match_lvl;
266 entry->type == ref->type ? ++match_lvl : match_lvl;
267 entry->direction == ref->direction ? ++match_lvl : match_lvl;
268
269 if (match_lvl == 3) {
270 /* perfect-fit - return the result */
193 return entry; 271 return entry;
272 } else if (match_lvl > last_lvl) {
273 /*
274 * We found an entry that fits better then the
275 * previous one
276 */
277 last_lvl = match_lvl;
278 ret = entry;
279 }
194 } 280 }
195 281
196 return NULL; 282 /*
283 * If we have multiple matches but no perfect-fit, just return
284 * NULL.
285 */
286 ret = (matches == 1) ? ret : NULL;
287
288 return ret;
197} 289}
198 290
199/* 291/*
@@ -257,6 +349,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
257 put_hash_bucket(bucket, &flags); 349 put_hash_bucket(bucket, &flags);
258} 350}
259 351
352static struct dma_debug_entry *__dma_entry_alloc(void)
353{
354 struct dma_debug_entry *entry;
355
356 entry = list_entry(free_entries.next, struct dma_debug_entry, list);
357 list_del(&entry->list);
358 memset(entry, 0, sizeof(*entry));
359
360 num_free_entries -= 1;
361 if (num_free_entries < min_free_entries)
362 min_free_entries = num_free_entries;
363
364 return entry;
365}
366
260/* struct dma_entry allocator 367/* struct dma_entry allocator
261 * 368 *
262 * The next two functions implement the allocator for 369 * The next two functions implement the allocator for
@@ -270,15 +377,12 @@ static struct dma_debug_entry *dma_entry_alloc(void)
270 spin_lock_irqsave(&free_entries_lock, flags); 377 spin_lock_irqsave(&free_entries_lock, flags);
271 378
272 if (list_empty(&free_entries)) { 379 if (list_empty(&free_entries)) {
273 printk(KERN_ERR "DMA-API: debugging out of memory " 380 pr_err("DMA-API: debugging out of memory - disabling\n");
274 "- disabling\n");
275 global_disable = true; 381 global_disable = true;
276 goto out; 382 goto out;
277 } 383 }
278 384
279 entry = list_entry(free_entries.next, struct dma_debug_entry, list); 385 entry = __dma_entry_alloc();
280 list_del(&entry->list);
281 memset(entry, 0, sizeof(*entry));
282 386
283#ifdef CONFIG_STACKTRACE 387#ifdef CONFIG_STACKTRACE
284 entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; 388 entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
@@ -286,9 +390,6 @@ static struct dma_debug_entry *dma_entry_alloc(void)
286 entry->stacktrace.skip = 2; 390 entry->stacktrace.skip = 2;
287 save_stack_trace(&entry->stacktrace); 391 save_stack_trace(&entry->stacktrace);
288#endif 392#endif
289 num_free_entries -= 1;
290 if (num_free_entries < min_free_entries)
291 min_free_entries = num_free_entries;
292 393
293out: 394out:
294 spin_unlock_irqrestore(&free_entries_lock, flags); 395 spin_unlock_irqrestore(&free_entries_lock, flags);
@@ -310,6 +411,53 @@ static void dma_entry_free(struct dma_debug_entry *entry)
310 spin_unlock_irqrestore(&free_entries_lock, flags); 411 spin_unlock_irqrestore(&free_entries_lock, flags);
311} 412}
312 413
414int dma_debug_resize_entries(u32 num_entries)
415{
416 int i, delta, ret = 0;
417 unsigned long flags;
418 struct dma_debug_entry *entry;
419 LIST_HEAD(tmp);
420
421 spin_lock_irqsave(&free_entries_lock, flags);
422
423 if (nr_total_entries < num_entries) {
424 delta = num_entries - nr_total_entries;
425
426 spin_unlock_irqrestore(&free_entries_lock, flags);
427
428 for (i = 0; i < delta; i++) {
429 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
430 if (!entry)
431 break;
432
433 list_add_tail(&entry->list, &tmp);
434 }
435
436 spin_lock_irqsave(&free_entries_lock, flags);
437
438 list_splice(&tmp, &free_entries);
439 nr_total_entries += i;
440 num_free_entries += i;
441 } else {
442 delta = nr_total_entries - num_entries;
443
444 for (i = 0; i < delta && !list_empty(&free_entries); i++) {
445 entry = __dma_entry_alloc();
446 kfree(entry);
447 }
448
449 nr_total_entries -= i;
450 }
451
452 if (nr_total_entries != num_entries)
453 ret = 1;
454
455 spin_unlock_irqrestore(&free_entries_lock, flags);
456
457 return ret;
458}
459EXPORT_SYMBOL(dma_debug_resize_entries);
460
313/* 461/*
314 * DMA-API debugging init code 462 * DMA-API debugging init code
315 * 463 *
@@ -334,8 +482,7 @@ static int prealloc_memory(u32 num_entries)
334 num_free_entries = num_entries; 482 num_free_entries = num_entries;
335 min_free_entries = num_entries; 483 min_free_entries = num_entries;
336 484
337 printk(KERN_INFO "DMA-API: preallocated %d debug entries\n", 485 pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
338 num_entries);
339 486
340 return 0; 487 return 0;
341 488
@@ -349,11 +496,102 @@ out_err:
349 return -ENOMEM; 496 return -ENOMEM;
350} 497}
351 498
499static ssize_t filter_read(struct file *file, char __user *user_buf,
500 size_t count, loff_t *ppos)
501{
502 char buf[NAME_MAX_LEN + 1];
503 unsigned long flags;
504 int len;
505
506 if (!current_driver_name[0])
507 return 0;
508
509 /*
510 * We can't copy to userspace directly because current_driver_name can
511 * only be read under the driver_name_lock with irqs disabled. So
512 * create a temporary copy first.
513 */
514 read_lock_irqsave(&driver_name_lock, flags);
515 len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
516 read_unlock_irqrestore(&driver_name_lock, flags);
517
518 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
519}
520
521static ssize_t filter_write(struct file *file, const char __user *userbuf,
522 size_t count, loff_t *ppos)
523{
524 char buf[NAME_MAX_LEN];
525 unsigned long flags;
526 size_t len;
527 int i;
528
529 /*
530 * We can't copy from userspace directly. Access to
531 * current_driver_name is protected with a write_lock with irqs
532 * disabled. Since copy_from_user can fault and may sleep we
533 * need to copy to temporary buffer first
534 */
535 len = min(count, (size_t)(NAME_MAX_LEN - 1));
536 if (copy_from_user(buf, userbuf, len))
537 return -EFAULT;
538
539 buf[len] = 0;
540
541 write_lock_irqsave(&driver_name_lock, flags);
542
543 /*
544 * Now handle the string we got from userspace very carefully.
545 * The rules are:
546 * - only use the first token we got
547 * - token delimiter is everything looking like a space
548 * character (' ', '\n', '\t' ...)
549 *
550 */
551 if (!isalnum(buf[0])) {
552 /*
553 * If the first character userspace gave us is not
554 * alphanumerical then assume the filter should be
555 * switched off.
556 */
557 if (current_driver_name[0])
558 pr_info("DMA-API: switching off dma-debug driver filter\n");
559 current_driver_name[0] = 0;
560 current_driver = NULL;
561 goto out_unlock;
562 }
563
564 /*
565 * Now parse out the first token and use it as the name for the
566 * driver to filter for.
567 */
568 for (i = 0; i < NAME_MAX_LEN; ++i) {
569 current_driver_name[i] = buf[i];
570 if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
571 break;
572 }
573 current_driver_name[i] = 0;
574 current_driver = NULL;
575
576 pr_info("DMA-API: enable driver filter for driver [%s]\n",
577 current_driver_name);
578
579out_unlock:
580 write_unlock_irqrestore(&driver_name_lock, flags);
581
582 return count;
583}
584
585const struct file_operations filter_fops = {
586 .read = filter_read,
587 .write = filter_write,
588};
589
352static int dma_debug_fs_init(void) 590static int dma_debug_fs_init(void)
353{ 591{
354 dma_debug_dent = debugfs_create_dir("dma-api", NULL); 592 dma_debug_dent = debugfs_create_dir("dma-api", NULL);
355 if (!dma_debug_dent) { 593 if (!dma_debug_dent) {
356 printk(KERN_ERR "DMA-API: can not create debugfs directory\n"); 594 pr_err("DMA-API: can not create debugfs directory\n");
357 return -ENOMEM; 595 return -ENOMEM;
358 } 596 }
359 597
@@ -392,6 +630,11 @@ static int dma_debug_fs_init(void)
392 if (!min_free_entries_dent) 630 if (!min_free_entries_dent)
393 goto out_err; 631 goto out_err;
394 632
633 filter_dent = debugfs_create_file("driver_filter", 0644,
634 dma_debug_dent, NULL, &filter_fops);
635 if (!filter_dent)
636 goto out_err;
637
395 return 0; 638 return 0;
396 639
397out_err: 640out_err:
@@ -400,9 +643,64 @@ out_err:
400 return -ENOMEM; 643 return -ENOMEM;
401} 644}
402 645
646static int device_dma_allocations(struct device *dev)
647{
648 struct dma_debug_entry *entry;
649 unsigned long flags;
650 int count = 0, i;
651
652 local_irq_save(flags);
653
654 for (i = 0; i < HASH_SIZE; ++i) {
655 spin_lock(&dma_entry_hash[i].lock);
656 list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
657 if (entry->dev == dev)
658 count += 1;
659 }
660 spin_unlock(&dma_entry_hash[i].lock);
661 }
662
663 local_irq_restore(flags);
664
665 return count;
666}
667
668static int dma_debug_device_change(struct notifier_block *nb,
669 unsigned long action, void *data)
670{
671 struct device *dev = data;
672 int count;
673
674
675 switch (action) {
676 case BUS_NOTIFY_UNBOUND_DRIVER:
677 count = device_dma_allocations(dev);
678 if (count == 0)
679 break;
680 err_printk(dev, NULL, "DMA-API: device driver has pending "
681 "DMA allocations while released from device "
682 "[count=%d]\n", count);
683 break;
684 default:
685 break;
686 }
687
688 return 0;
689}
690
403void dma_debug_add_bus(struct bus_type *bus) 691void dma_debug_add_bus(struct bus_type *bus)
404{ 692{
405 /* FIXME: register notifier */ 693 struct notifier_block *nb;
694
695 nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
696 if (nb == NULL) {
697 pr_err("dma_debug_add_bus: out of memory\n");
698 return;
699 }
700
701 nb->notifier_call = dma_debug_device_change;
702
703 bus_register_notifier(bus, nb);
406} 704}
407 705
408/* 706/*
@@ -421,8 +719,7 @@ void dma_debug_init(u32 num_entries)
421 } 719 }
422 720
423 if (dma_debug_fs_init() != 0) { 721 if (dma_debug_fs_init() != 0) {
424 printk(KERN_ERR "DMA-API: error creating debugfs entries " 722 pr_err("DMA-API: error creating debugfs entries - disabling\n");
425 "- disabling\n");
426 global_disable = true; 723 global_disable = true;
427 724
428 return; 725 return;
@@ -432,14 +729,15 @@ void dma_debug_init(u32 num_entries)
432 num_entries = req_entries; 729 num_entries = req_entries;
433 730
434 if (prealloc_memory(num_entries) != 0) { 731 if (prealloc_memory(num_entries) != 0) {
435 printk(KERN_ERR "DMA-API: debugging out of memory error " 732 pr_err("DMA-API: debugging out of memory error - disabled\n");
436 "- disabled\n");
437 global_disable = true; 733 global_disable = true;
438 734
439 return; 735 return;
440 } 736 }
441 737
442 printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n"); 738 nr_total_entries = num_free_entries;
739
740 pr_info("DMA-API: debugging enabled by kernel config\n");
443} 741}
444 742
445static __init int dma_debug_cmdline(char *str) 743static __init int dma_debug_cmdline(char *str)
@@ -448,8 +746,7 @@ static __init int dma_debug_cmdline(char *str)
448 return -EINVAL; 746 return -EINVAL;
449 747
450 if (strncmp(str, "off", 3) == 0) { 748 if (strncmp(str, "off", 3) == 0) {
451 printk(KERN_INFO "DMA-API: debugging disabled on kernel " 749 pr_info("DMA-API: debugging disabled on kernel command line\n");
452 "command line\n");
453 global_disable = true; 750 global_disable = true;
454 } 751 }
455 752
@@ -723,15 +1020,15 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
723 entry->type = dma_debug_sg; 1020 entry->type = dma_debug_sg;
724 entry->dev = dev; 1021 entry->dev = dev;
725 entry->paddr = sg_phys(s); 1022 entry->paddr = sg_phys(s);
726 entry->size = s->length; 1023 entry->size = sg_dma_len(s);
727 entry->dev_addr = s->dma_address; 1024 entry->dev_addr = sg_dma_address(s);
728 entry->direction = direction; 1025 entry->direction = direction;
729 entry->sg_call_ents = nents; 1026 entry->sg_call_ents = nents;
730 entry->sg_mapped_ents = mapped_ents; 1027 entry->sg_mapped_ents = mapped_ents;
731 1028
732 if (!PageHighMem(sg_page(s))) { 1029 if (!PageHighMem(sg_page(s))) {
733 check_for_stack(dev, sg_virt(s)); 1030 check_for_stack(dev, sg_virt(s));
734 check_for_illegal_area(dev, sg_virt(s), s->length); 1031 check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
735 } 1032 }
736 1033
737 add_dma_entry(entry); 1034 add_dma_entry(entry);
@@ -739,13 +1036,33 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
739} 1036}
740EXPORT_SYMBOL(debug_dma_map_sg); 1037EXPORT_SYMBOL(debug_dma_map_sg);
741 1038
1039static int get_nr_mapped_entries(struct device *dev, struct scatterlist *s)
1040{
1041 struct dma_debug_entry *entry, ref;
1042 struct hash_bucket *bucket;
1043 unsigned long flags;
1044 int mapped_ents;
1045
1046 ref.dev = dev;
1047 ref.dev_addr = sg_dma_address(s);
1048 ref.size = sg_dma_len(s),
1049
1050 bucket = get_hash_bucket(&ref, &flags);
1051 entry = hash_bucket_find(bucket, &ref);
1052 mapped_ents = 0;
1053
1054 if (entry)
1055 mapped_ents = entry->sg_mapped_ents;
1056 put_hash_bucket(bucket, &flags);
1057
1058 return mapped_ents;
1059}
1060
742void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, 1061void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
743 int nelems, int dir) 1062 int nelems, int dir)
744{ 1063{
745 struct dma_debug_entry *entry;
746 struct scatterlist *s; 1064 struct scatterlist *s;
747 int mapped_ents = 0, i; 1065 int mapped_ents = 0, i;
748 unsigned long flags;
749 1066
750 if (unlikely(global_disable)) 1067 if (unlikely(global_disable))
751 return; 1068 return;
@@ -756,8 +1073,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
756 .type = dma_debug_sg, 1073 .type = dma_debug_sg,
757 .dev = dev, 1074 .dev = dev,
758 .paddr = sg_phys(s), 1075 .paddr = sg_phys(s),
759 .dev_addr = s->dma_address, 1076 .dev_addr = sg_dma_address(s),
760 .size = s->length, 1077 .size = sg_dma_len(s),
761 .direction = dir, 1078 .direction = dir,
762 .sg_call_ents = 0, 1079 .sg_call_ents = 0,
763 }; 1080 };
@@ -765,14 +1082,9 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
765 if (mapped_ents && i >= mapped_ents) 1082 if (mapped_ents && i >= mapped_ents)
766 break; 1083 break;
767 1084
768 if (mapped_ents == 0) { 1085 if (!i) {
769 struct hash_bucket *bucket;
770 ref.sg_call_ents = nelems; 1086 ref.sg_call_ents = nelems;
771 bucket = get_hash_bucket(&ref, &flags); 1087 mapped_ents = get_nr_mapped_entries(dev, s);
772 entry = hash_bucket_find(bucket, &ref);
773 if (entry)
774 mapped_ents = entry->sg_mapped_ents;
775 put_hash_bucket(bucket, &flags);
776 } 1088 }
777 1089
778 check_unmap(&ref); 1090 check_unmap(&ref);
@@ -874,14 +1186,20 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
874 int nelems, int direction) 1186 int nelems, int direction)
875{ 1187{
876 struct scatterlist *s; 1188 struct scatterlist *s;
877 int i; 1189 int mapped_ents = 0, i;
878 1190
879 if (unlikely(global_disable)) 1191 if (unlikely(global_disable))
880 return; 1192 return;
881 1193
882 for_each_sg(sg, s, nelems, i) { 1194 for_each_sg(sg, s, nelems, i) {
883 check_sync(dev, s->dma_address, s->dma_length, 0, 1195 if (!i)
884 direction, true); 1196 mapped_ents = get_nr_mapped_entries(dev, s);
1197
1198 if (i >= mapped_ents)
1199 break;
1200
1201 check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
1202 direction, true);
885 } 1203 }
886} 1204}
887EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); 1205EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
@@ -890,15 +1208,39 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
890 int nelems, int direction) 1208 int nelems, int direction)
891{ 1209{
892 struct scatterlist *s; 1210 struct scatterlist *s;
893 int i; 1211 int mapped_ents = 0, i;
894 1212
895 if (unlikely(global_disable)) 1213 if (unlikely(global_disable))
896 return; 1214 return;
897 1215
898 for_each_sg(sg, s, nelems, i) { 1216 for_each_sg(sg, s, nelems, i) {
899 check_sync(dev, s->dma_address, s->dma_length, 0, 1217 if (!i)
900 direction, false); 1218 mapped_ents = get_nr_mapped_entries(dev, s);
1219
1220 if (i >= mapped_ents)
1221 break;
1222
1223 check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
1224 direction, false);
901 } 1225 }
902} 1226}
903EXPORT_SYMBOL(debug_dma_sync_sg_for_device); 1227EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
904 1228
1229static int __init dma_debug_driver_setup(char *str)
1230{
1231 int i;
1232
1233 for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
1234 current_driver_name[i] = *str;
1235 if (*str == 0)
1236 break;
1237 }
1238
1239 if (current_driver_name[0])
1240 pr_info("DMA-API: enable driver filter for driver [%s]\n",
1241 current_driver_name);
1242
1243
1244 return 1;
1245}
1246__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 2b0b5a7d2ced..bffe6d7ef9d9 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -60,8 +60,8 @@ enum dma_sync_target {
60int swiotlb_force; 60int swiotlb_force;
61 61
62/* 62/*
63 * Used to do a quick range check in swiotlb_unmap_single and 63 * Used to do a quick range check in unmap_single and
64 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this 64 * sync_single_*, to see if the memory was in fact allocated by this
65 * API. 65 * API.
66 */ 66 */
67static char *io_tlb_start, *io_tlb_end; 67static char *io_tlb_start, *io_tlb_end;
@@ -129,7 +129,7 @@ dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
129 return paddr; 129 return paddr;
130} 130}
131 131
132phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr) 132phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
133{ 133{
134 return baddr; 134 return baddr;
135} 135}
@@ -140,9 +140,15 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
140 return swiotlb_phys_to_bus(hwdev, virt_to_phys(address)); 140 return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
141} 141}
142 142
143static void *swiotlb_bus_to_virt(dma_addr_t address) 143void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
144{ 144{
145 return phys_to_virt(swiotlb_bus_to_phys(address)); 145 return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
146}
147
148int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
149 dma_addr_t addr, size_t size)
150{
151 return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
146} 152}
147 153
148int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) 154int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -309,10 +315,10 @@ cleanup1:
309 return -ENOMEM; 315 return -ENOMEM;
310} 316}
311 317
312static int 318static inline int
313address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) 319address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
314{ 320{
315 return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); 321 return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
316} 322}
317 323
318static inline int range_needs_mapping(phys_addr_t paddr, size_t size) 324static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -341,7 +347,7 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
341 unsigned long flags; 347 unsigned long flags;
342 348
343 while (size) { 349 while (size) {
344 sz = min(PAGE_SIZE - offset, size); 350 sz = min_t(size_t, PAGE_SIZE - offset, size);
345 351
346 local_irq_save(flags); 352 local_irq_save(flags);
347 buffer = kmap_atomic(pfn_to_page(pfn), 353 buffer = kmap_atomic(pfn_to_page(pfn),
@@ -476,7 +482,7 @@ found:
476 * dma_addr is the kernel virtual address of the bounce buffer to unmap. 482 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
477 */ 483 */
478static void 484static void
479unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) 485do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
480{ 486{
481 unsigned long flags; 487 unsigned long flags;
482 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 488 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -560,7 +566,6 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
560 size)) { 566 size)) {
561 /* 567 /*
562 * The allocated memory isn't reachable by the device. 568 * The allocated memory isn't reachable by the device.
563 * Fall back on swiotlb_map_single().
564 */ 569 */
565 free_pages((unsigned long) ret, order); 570 free_pages((unsigned long) ret, order);
566 ret = NULL; 571 ret = NULL;
@@ -568,9 +573,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
568 if (!ret) { 573 if (!ret) {
569 /* 574 /*
570 * We are either out of memory or the device can't DMA 575 * We are either out of memory or the device can't DMA
571 * to GFP_DMA memory; fall back on 576 * to GFP_DMA memory; fall back on map_single(), which
572 * swiotlb_map_single(), which will grab memory from 577 * will grab memory from the lowest available address range.
573 * the lowest available address range.
574 */ 578 */
575 ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); 579 ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
576 if (!ret) 580 if (!ret)
@@ -587,7 +591,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
587 (unsigned long long)dev_addr); 591 (unsigned long long)dev_addr);
588 592
589 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 593 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
590 unmap_single(hwdev, ret, size, DMA_TO_DEVICE); 594 do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
591 return NULL; 595 return NULL;
592 } 596 }
593 *dma_handle = dev_addr; 597 *dma_handle = dev_addr;
@@ -604,7 +608,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
604 free_pages((unsigned long) vaddr, get_order(size)); 608 free_pages((unsigned long) vaddr, get_order(size));
605 else 609 else
606 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 610 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
607 unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); 611 do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
608} 612}
609EXPORT_SYMBOL(swiotlb_free_coherent); 613EXPORT_SYMBOL(swiotlb_free_coherent);
610 614
@@ -634,7 +638,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
634 * physical address to use is returned. 638 * physical address to use is returned.
635 * 639 *
636 * Once the device is given the dma address, the device owns this memory until 640 * Once the device is given the dma address, the device owns this memory until
637 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. 641 * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
638 */ 642 */
639dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, 643dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
640 unsigned long offset, size_t size, 644 unsigned long offset, size_t size,
@@ -642,18 +646,17 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
642 struct dma_attrs *attrs) 646 struct dma_attrs *attrs)
643{ 647{
644 phys_addr_t phys = page_to_phys(page) + offset; 648 phys_addr_t phys = page_to_phys(page) + offset;
645 void *ptr = page_address(page) + offset;
646 dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys); 649 dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
647 void *map; 650 void *map;
648 651
649 BUG_ON(dir == DMA_NONE); 652 BUG_ON(dir == DMA_NONE);
650 /* 653 /*
651 * If the pointer passed in happens to be in the device's DMA window, 654 * If the address happens to be in the device's DMA window,
652 * we can safely return the device addr and not worry about bounce 655 * we can safely return the device addr and not worry about bounce
653 * buffering it. 656 * buffering it.
654 */ 657 */
655 if (!address_needs_mapping(dev, dev_addr, size) && 658 if (!address_needs_mapping(dev, dev_addr, size) &&
656 !range_needs_mapping(virt_to_phys(ptr), size)) 659 !range_needs_mapping(phys, size))
657 return dev_addr; 660 return dev_addr;
658 661
659 /* 662 /*
@@ -679,23 +682,35 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
679 682
680/* 683/*
681 * Unmap a single streaming mode DMA translation. The dma_addr and size must 684 * Unmap a single streaming mode DMA translation. The dma_addr and size must
682 * match what was provided for in a previous swiotlb_map_single call. All 685 * match what was provided for in a previous swiotlb_map_page call. All
683 * other usages are undefined. 686 * other usages are undefined.
684 * 687 *
685 * After this call, reads by the cpu to the buffer are guaranteed to see 688 * After this call, reads by the cpu to the buffer are guaranteed to see
686 * whatever the device wrote there. 689 * whatever the device wrote there.
687 */ 690 */
691static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
692 size_t size, int dir)
693{
694 char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
695
696 BUG_ON(dir == DMA_NONE);
697
698 if (is_swiotlb_buffer(dma_addr)) {
699 do_unmap_single(hwdev, dma_addr, size, dir);
700 return;
701 }
702
703 if (dir != DMA_FROM_DEVICE)
704 return;
705
706 dma_mark_clean(dma_addr, size);
707}
708
688void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, 709void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
689 size_t size, enum dma_data_direction dir, 710 size_t size, enum dma_data_direction dir,
690 struct dma_attrs *attrs) 711 struct dma_attrs *attrs)
691{ 712{
692 char *dma_addr = swiotlb_bus_to_virt(dev_addr); 713 unmap_single(hwdev, dev_addr, size, dir);
693
694 BUG_ON(dir == DMA_NONE);
695 if (is_swiotlb_buffer(dma_addr))
696 unmap_single(hwdev, dma_addr, size, dir);
697 else if (dir == DMA_FROM_DEVICE)
698 dma_mark_clean(dma_addr, size);
699} 714}
700EXPORT_SYMBOL_GPL(swiotlb_unmap_page); 715EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
701 716
@@ -703,7 +718,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
703 * Make physical memory consistent for a single streaming mode DMA translation 718 * Make physical memory consistent for a single streaming mode DMA translation
704 * after a transfer. 719 * after a transfer.
705 * 720 *
706 * If you perform a swiotlb_map_single() but wish to interrogate the buffer 721 * If you perform a swiotlb_map_page() but wish to interrogate the buffer
707 * using the cpu, yet do not wish to teardown the dma mapping, you must 722 * using the cpu, yet do not wish to teardown the dma mapping, you must
708 * call this function before doing so. At the next point you give the dma 723 * call this function before doing so. At the next point you give the dma
709 * address back to the card, you must first perform a 724 * address back to the card, you must first perform a
@@ -713,13 +728,19 @@ static void
713swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, 728swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
714 size_t size, int dir, int target) 729 size_t size, int dir, int target)
715{ 730{
716 char *dma_addr = swiotlb_bus_to_virt(dev_addr); 731 char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
717 732
718 BUG_ON(dir == DMA_NONE); 733 BUG_ON(dir == DMA_NONE);
719 if (is_swiotlb_buffer(dma_addr)) 734
735 if (is_swiotlb_buffer(dma_addr)) {
720 sync_single(hwdev, dma_addr, size, dir, target); 736 sync_single(hwdev, dma_addr, size, dir, target);
721 else if (dir == DMA_FROM_DEVICE) 737 return;
722 dma_mark_clean(dma_addr, size); 738 }
739
740 if (dir != DMA_FROM_DEVICE)
741 return;
742
743 dma_mark_clean(dma_addr, size);
723} 744}
724 745
725void 746void
@@ -746,13 +767,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
746 unsigned long offset, size_t size, 767 unsigned long offset, size_t size,
747 int dir, int target) 768 int dir, int target)
748{ 769{
749 char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset; 770 swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
750
751 BUG_ON(dir == DMA_NONE);
752 if (is_swiotlb_buffer(dma_addr))
753 sync_single(hwdev, dma_addr, size, dir, target);
754 else if (dir == DMA_FROM_DEVICE)
755 dma_mark_clean(dma_addr, size);
756} 771}
757 772
758void 773void
@@ -777,7 +792,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
777 792
778/* 793/*
779 * Map a set of buffers described by scatterlist in streaming mode for DMA. 794 * Map a set of buffers described by scatterlist in streaming mode for DMA.
780 * This is the scatter-gather version of the above swiotlb_map_single 795 * This is the scatter-gather version of the above swiotlb_map_page
781 * interface. Here the scatter gather list elements are each tagged with the 796 * interface. Here the scatter gather list elements are each tagged with the
782 * appropriate dma address and length. They are obtained via 797 * appropriate dma address and length. They are obtained via
783 * sg_dma_{address,length}(SG). 798 * sg_dma_{address,length}(SG).
@@ -788,7 +803,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
788 * The routine returns the number of addr/length pairs actually 803 * The routine returns the number of addr/length pairs actually
789 * used, at most nents. 804 * used, at most nents.
790 * 805 *
791 * Device ownership issues as mentioned above for swiotlb_map_single are the 806 * Device ownership issues as mentioned above for swiotlb_map_page are the
792 * same here. 807 * same here.
793 */ 808 */
794int 809int
@@ -836,7 +851,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
836 851
837/* 852/*
838 * Unmap a set of streaming mode DMA translations. Again, cpu read rules 853 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
839 * concerning calls here are the same as for swiotlb_unmap_single() above. 854 * concerning calls here are the same as for swiotlb_unmap_page() above.
840 */ 855 */
841void 856void
842swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, 857swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
@@ -847,13 +862,9 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
847 862
848 BUG_ON(dir == DMA_NONE); 863 BUG_ON(dir == DMA_NONE);
849 864
850 for_each_sg(sgl, sg, nelems, i) { 865 for_each_sg(sgl, sg, nelems, i)
851 if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg))) 866 unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
852 unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), 867
853 sg->dma_length, dir);
854 else if (dir == DMA_FROM_DEVICE)
855 dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
856 }
857} 868}
858EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); 869EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
859 870
@@ -879,15 +890,9 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
879 struct scatterlist *sg; 890 struct scatterlist *sg;
880 int i; 891 int i;
881 892
882 BUG_ON(dir == DMA_NONE); 893 for_each_sg(sgl, sg, nelems, i)
883 894 swiotlb_sync_single(hwdev, sg->dma_address,
884 for_each_sg(sgl, sg, nelems, i) {
885 if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
886 sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
887 sg->dma_length, dir, target); 895 sg->dma_length, dir, target);
888 else if (dir == DMA_FROM_DEVICE)
889 dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
890 }
891} 896}
892 897
893void 898void
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 7536acea135b..756ccafa9cec 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -408,6 +408,8 @@ enum format_type {
408 FORMAT_TYPE_LONG_LONG, 408 FORMAT_TYPE_LONG_LONG,
409 FORMAT_TYPE_ULONG, 409 FORMAT_TYPE_ULONG,
410 FORMAT_TYPE_LONG, 410 FORMAT_TYPE_LONG,
411 FORMAT_TYPE_UBYTE,
412 FORMAT_TYPE_BYTE,
411 FORMAT_TYPE_USHORT, 413 FORMAT_TYPE_USHORT,
412 FORMAT_TYPE_SHORT, 414 FORMAT_TYPE_SHORT,
413 FORMAT_TYPE_UINT, 415 FORMAT_TYPE_UINT,
@@ -573,12 +575,15 @@ static char *string(char *buf, char *end, char *s, struct printf_spec spec)
573} 575}
574 576
575static char *symbol_string(char *buf, char *end, void *ptr, 577static char *symbol_string(char *buf, char *end, void *ptr,
576 struct printf_spec spec) 578 struct printf_spec spec, char ext)
577{ 579{
578 unsigned long value = (unsigned long) ptr; 580 unsigned long value = (unsigned long) ptr;
579#ifdef CONFIG_KALLSYMS 581#ifdef CONFIG_KALLSYMS
580 char sym[KSYM_SYMBOL_LEN]; 582 char sym[KSYM_SYMBOL_LEN];
581 sprint_symbol(sym, value); 583 if (ext != 'f')
584 sprint_symbol(sym, value);
585 else
586 kallsyms_lookup(value, NULL, NULL, NULL, sym);
582 return string(buf, end, sym, spec); 587 return string(buf, end, sym, spec);
583#else 588#else
584 spec.field_width = 2*sizeof(void *); 589 spec.field_width = 2*sizeof(void *);
@@ -690,7 +695,8 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr,
690 * 695 *
691 * Right now we handle: 696 * Right now we handle:
692 * 697 *
693 * - 'F' For symbolic function descriptor pointers 698 * - 'F' For symbolic function descriptor pointers with offset
699 * - 'f' For simple symbolic function names without offset
694 * - 'S' For symbolic direct pointers 700 * - 'S' For symbolic direct pointers
695 * - 'R' For a struct resource pointer, it prints the range of 701 * - 'R' For a struct resource pointer, it prints the range of
696 * addresses (not the name nor the flags) 702 * addresses (not the name nor the flags)
@@ -713,10 +719,11 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
713 719
714 switch (*fmt) { 720 switch (*fmt) {
715 case 'F': 721 case 'F':
722 case 'f':
716 ptr = dereference_function_descriptor(ptr); 723 ptr = dereference_function_descriptor(ptr);
717 /* Fallthrough */ 724 /* Fallthrough */
718 case 'S': 725 case 'S':
719 return symbol_string(buf, end, ptr, spec); 726 return symbol_string(buf, end, ptr, spec, *fmt);
720 case 'R': 727 case 'R':
721 return resource_string(buf, end, ptr, spec); 728 return resource_string(buf, end, ptr, spec);
722 case 'm': 729 case 'm':
@@ -853,11 +860,15 @@ qualifier:
853 spec->qualifier = -1; 860 spec->qualifier = -1;
854 if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || 861 if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
855 *fmt == 'Z' || *fmt == 'z' || *fmt == 't') { 862 *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
856 spec->qualifier = *fmt; 863 spec->qualifier = *fmt++;
857 ++fmt; 864 if (unlikely(spec->qualifier == *fmt)) {
858 if (spec->qualifier == 'l' && *fmt == 'l') { 865 if (spec->qualifier == 'l') {
859 spec->qualifier = 'L'; 866 spec->qualifier = 'L';
860 ++fmt; 867 ++fmt;
868 } else if (spec->qualifier == 'h') {
869 spec->qualifier = 'H';
870 ++fmt;
871 }
861 } 872 }
862 } 873 }
863 874
@@ -919,6 +930,11 @@ qualifier:
919 spec->type = FORMAT_TYPE_SIZE_T; 930 spec->type = FORMAT_TYPE_SIZE_T;
920 } else if (spec->qualifier == 't') { 931 } else if (spec->qualifier == 't') {
921 spec->type = FORMAT_TYPE_PTRDIFF; 932 spec->type = FORMAT_TYPE_PTRDIFF;
933 } else if (spec->qualifier == 'H') {
934 if (spec->flags & SIGN)
935 spec->type = FORMAT_TYPE_BYTE;
936 else
937 spec->type = FORMAT_TYPE_UBYTE;
922 } else if (spec->qualifier == 'h') { 938 } else if (spec->qualifier == 'h') {
923 if (spec->flags & SIGN) 939 if (spec->flags & SIGN)
924 spec->type = FORMAT_TYPE_SHORT; 940 spec->type = FORMAT_TYPE_SHORT;
@@ -943,7 +959,8 @@ qualifier:
943 * 959 *
944 * This function follows C99 vsnprintf, but has some extensions: 960 * This function follows C99 vsnprintf, but has some extensions:
945 * %pS output the name of a text symbol 961 * %pS output the name of a text symbol
946 * %pF output the name of a function pointer 962 * %pF output the name of a function pointer with its offset
963 * %pf output the name of a function pointer without its offset
947 * %pR output the address range in a struct resource 964 * %pR output the address range in a struct resource
948 * 965 *
949 * The return value is the number of characters which would 966 * The return value is the number of characters which would
@@ -1087,6 +1104,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1087 case FORMAT_TYPE_PTRDIFF: 1104 case FORMAT_TYPE_PTRDIFF:
1088 num = va_arg(args, ptrdiff_t); 1105 num = va_arg(args, ptrdiff_t);
1089 break; 1106 break;
1107 case FORMAT_TYPE_UBYTE:
1108 num = (unsigned char) va_arg(args, int);
1109 break;
1110 case FORMAT_TYPE_BYTE:
1111 num = (signed char) va_arg(args, int);
1112 break;
1090 case FORMAT_TYPE_USHORT: 1113 case FORMAT_TYPE_USHORT:
1091 num = (unsigned short) va_arg(args, int); 1114 num = (unsigned short) va_arg(args, int);
1092 break; 1115 break;
@@ -1363,6 +1386,10 @@ do { \
1363 case FORMAT_TYPE_PTRDIFF: 1386 case FORMAT_TYPE_PTRDIFF:
1364 save_arg(ptrdiff_t); 1387 save_arg(ptrdiff_t);
1365 break; 1388 break;
1389 case FORMAT_TYPE_UBYTE:
1390 case FORMAT_TYPE_BYTE:
1391 save_arg(char);
1392 break;
1366 case FORMAT_TYPE_USHORT: 1393 case FORMAT_TYPE_USHORT:
1367 case FORMAT_TYPE_SHORT: 1394 case FORMAT_TYPE_SHORT:
1368 save_arg(short); 1395 save_arg(short);
@@ -1391,7 +1418,8 @@ EXPORT_SYMBOL_GPL(vbin_printf);
1391 * 1418 *
1392 * The format follows C99 vsnprintf, but has some extensions: 1419 * The format follows C99 vsnprintf, but has some extensions:
1393 * %pS output the name of a text symbol 1420 * %pS output the name of a text symbol
1394 * %pF output the name of a function pointer 1421 * %pF output the name of a function pointer with its offset
1422 * %pf output the name of a function pointer without its offset
1395 * %pR output the address range in a struct resource 1423 * %pR output the address range in a struct resource
1396 * %n is ignored 1424 * %n is ignored
1397 * 1425 *
@@ -1538,6 +1566,12 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
1538 case FORMAT_TYPE_PTRDIFF: 1566 case FORMAT_TYPE_PTRDIFF:
1539 num = get_arg(ptrdiff_t); 1567 num = get_arg(ptrdiff_t);
1540 break; 1568 break;
1569 case FORMAT_TYPE_UBYTE:
1570 num = get_arg(unsigned char);
1571 break;
1572 case FORMAT_TYPE_BYTE:
1573 num = get_arg(signed char);
1574 break;
1541 case FORMAT_TYPE_USHORT: 1575 case FORMAT_TYPE_USHORT:
1542 num = get_arg(unsigned short); 1576 num = get_arg(unsigned short);
1543 break; 1577 break;
diff --git a/mm/bounce.c b/mm/bounce.c
index e590272fe7a8..65f5e17e411a 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,16 +14,15 @@
14#include <linux/hash.h> 14#include <linux/hash.h>
15#include <linux/highmem.h> 15#include <linux/highmem.h>
16#include <linux/blktrace_api.h> 16#include <linux/blktrace_api.h>
17#include <trace/block.h>
18#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
19 18
19#include <trace/events/block.h>
20
20#define POOL_SIZE 64 21#define POOL_SIZE 64
21#define ISA_POOL_SIZE 16 22#define ISA_POOL_SIZE 16
22 23
23static mempool_t *page_pool, *isa_page_pool; 24static mempool_t *page_pool, *isa_page_pool;
24 25
25DEFINE_TRACE(block_bio_bounce);
26
27#ifdef CONFIG_HIGHMEM 26#ifdef CONFIG_HIGHMEM
28static __init int init_emergency_pool(void) 27static __init int init_emergency_pool(void)
29{ 28{
diff --git a/mm/filemap.c b/mm/filemap.c
index 379ff0bcbf6e..1b60f30cebfa 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -121,7 +121,6 @@ void __remove_from_page_cache(struct page *page)
121 mapping->nrpages--; 121 mapping->nrpages--;
122 __dec_zone_page_state(page, NR_FILE_PAGES); 122 __dec_zone_page_state(page, NR_FILE_PAGES);
123 BUG_ON(page_mapped(page)); 123 BUG_ON(page_mapped(page));
124 mem_cgroup_uncharge_cache_page(page);
125 124
126 /* 125 /*
127 * Some filesystems seem to re-dirty the page even after 126 * Some filesystems seem to re-dirty the page even after
@@ -145,6 +144,7 @@ void remove_from_page_cache(struct page *page)
145 spin_lock_irq(&mapping->tree_lock); 144 spin_lock_irq(&mapping->tree_lock);
146 __remove_from_page_cache(page); 145 __remove_from_page_cache(page);
147 spin_unlock_irq(&mapping->tree_lock); 146 spin_unlock_irq(&mapping->tree_lock);
147 mem_cgroup_uncharge_cache_page(page);
148} 148}
149 149
150static int sync_page(void *word) 150static int sync_page(void *word)
@@ -476,13 +476,13 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
476 if (likely(!error)) { 476 if (likely(!error)) {
477 mapping->nrpages++; 477 mapping->nrpages++;
478 __inc_zone_page_state(page, NR_FILE_PAGES); 478 __inc_zone_page_state(page, NR_FILE_PAGES);
479 spin_unlock_irq(&mapping->tree_lock);
479 } else { 480 } else {
480 page->mapping = NULL; 481 page->mapping = NULL;
482 spin_unlock_irq(&mapping->tree_lock);
481 mem_cgroup_uncharge_cache_page(page); 483 mem_cgroup_uncharge_cache_page(page);
482 page_cache_release(page); 484 page_cache_release(page);
483 } 485 }
484
485 spin_unlock_irq(&mapping->tree_lock);
486 radix_tree_preload_end(); 486 radix_tree_preload_end();
487 } else 487 } else
488 mem_cgroup_uncharge_cache_page(page); 488 mem_cgroup_uncharge_cache_page(page);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 28c655ba9353..e83ad2c9228c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -316,7 +316,7 @@ static void resv_map_release(struct kref *ref)
316static struct resv_map *vma_resv_map(struct vm_area_struct *vma) 316static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
317{ 317{
318 VM_BUG_ON(!is_vm_hugetlb_page(vma)); 318 VM_BUG_ON(!is_vm_hugetlb_page(vma));
319 if (!(vma->vm_flags & VM_SHARED)) 319 if (!(vma->vm_flags & VM_MAYSHARE))
320 return (struct resv_map *)(get_vma_private_data(vma) & 320 return (struct resv_map *)(get_vma_private_data(vma) &
321 ~HPAGE_RESV_MASK); 321 ~HPAGE_RESV_MASK);
322 return NULL; 322 return NULL;
@@ -325,7 +325,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
325static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) 325static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
326{ 326{
327 VM_BUG_ON(!is_vm_hugetlb_page(vma)); 327 VM_BUG_ON(!is_vm_hugetlb_page(vma));
328 VM_BUG_ON(vma->vm_flags & VM_SHARED); 328 VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
329 329
330 set_vma_private_data(vma, (get_vma_private_data(vma) & 330 set_vma_private_data(vma, (get_vma_private_data(vma) &
331 HPAGE_RESV_MASK) | (unsigned long)map); 331 HPAGE_RESV_MASK) | (unsigned long)map);
@@ -334,7 +334,7 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
334static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) 334static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
335{ 335{
336 VM_BUG_ON(!is_vm_hugetlb_page(vma)); 336 VM_BUG_ON(!is_vm_hugetlb_page(vma));
337 VM_BUG_ON(vma->vm_flags & VM_SHARED); 337 VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
338 338
339 set_vma_private_data(vma, get_vma_private_data(vma) | flags); 339 set_vma_private_data(vma, get_vma_private_data(vma) | flags);
340} 340}
@@ -353,7 +353,7 @@ static void decrement_hugepage_resv_vma(struct hstate *h,
353 if (vma->vm_flags & VM_NORESERVE) 353 if (vma->vm_flags & VM_NORESERVE)
354 return; 354 return;
355 355
356 if (vma->vm_flags & VM_SHARED) { 356 if (vma->vm_flags & VM_MAYSHARE) {
357 /* Shared mappings always use reserves */ 357 /* Shared mappings always use reserves */
358 h->resv_huge_pages--; 358 h->resv_huge_pages--;
359 } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { 359 } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
@@ -369,14 +369,14 @@ static void decrement_hugepage_resv_vma(struct hstate *h,
369void reset_vma_resv_huge_pages(struct vm_area_struct *vma) 369void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
370{ 370{
371 VM_BUG_ON(!is_vm_hugetlb_page(vma)); 371 VM_BUG_ON(!is_vm_hugetlb_page(vma));
372 if (!(vma->vm_flags & VM_SHARED)) 372 if (!(vma->vm_flags & VM_MAYSHARE))
373 vma->vm_private_data = (void *)0; 373 vma->vm_private_data = (void *)0;
374} 374}
375 375
376/* Returns true if the VMA has associated reserve pages */ 376/* Returns true if the VMA has associated reserve pages */
377static int vma_has_reserves(struct vm_area_struct *vma) 377static int vma_has_reserves(struct vm_area_struct *vma)
378{ 378{
379 if (vma->vm_flags & VM_SHARED) 379 if (vma->vm_flags & VM_MAYSHARE)
380 return 1; 380 return 1;
381 if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) 381 if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
382 return 1; 382 return 1;
@@ -924,7 +924,7 @@ static long vma_needs_reservation(struct hstate *h,
924 struct address_space *mapping = vma->vm_file->f_mapping; 924 struct address_space *mapping = vma->vm_file->f_mapping;
925 struct inode *inode = mapping->host; 925 struct inode *inode = mapping->host;
926 926
927 if (vma->vm_flags & VM_SHARED) { 927 if (vma->vm_flags & VM_MAYSHARE) {
928 pgoff_t idx = vma_hugecache_offset(h, vma, addr); 928 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
929 return region_chg(&inode->i_mapping->private_list, 929 return region_chg(&inode->i_mapping->private_list,
930 idx, idx + 1); 930 idx, idx + 1);
@@ -949,7 +949,7 @@ static void vma_commit_reservation(struct hstate *h,
949 struct address_space *mapping = vma->vm_file->f_mapping; 949 struct address_space *mapping = vma->vm_file->f_mapping;
950 struct inode *inode = mapping->host; 950 struct inode *inode = mapping->host;
951 951
952 if (vma->vm_flags & VM_SHARED) { 952 if (vma->vm_flags & VM_MAYSHARE) {
953 pgoff_t idx = vma_hugecache_offset(h, vma, addr); 953 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
954 region_add(&inode->i_mapping->private_list, idx, idx + 1); 954 region_add(&inode->i_mapping->private_list, idx, idx + 1);
955 955
@@ -1893,7 +1893,7 @@ retry_avoidcopy:
1893 * at the time of fork() could consume its reserves on COW instead 1893 * at the time of fork() could consume its reserves on COW instead
1894 * of the full address range. 1894 * of the full address range.
1895 */ 1895 */
1896 if (!(vma->vm_flags & VM_SHARED) && 1896 if (!(vma->vm_flags & VM_MAYSHARE) &&
1897 is_vma_resv_set(vma, HPAGE_RESV_OWNER) && 1897 is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
1898 old_page != pagecache_page) 1898 old_page != pagecache_page)
1899 outside_reserve = 1; 1899 outside_reserve = 1;
@@ -2000,7 +2000,7 @@ retry:
2000 clear_huge_page(page, address, huge_page_size(h)); 2000 clear_huge_page(page, address, huge_page_size(h));
2001 __SetPageUptodate(page); 2001 __SetPageUptodate(page);
2002 2002
2003 if (vma->vm_flags & VM_SHARED) { 2003 if (vma->vm_flags & VM_MAYSHARE) {
2004 int err; 2004 int err;
2005 struct inode *inode = mapping->host; 2005 struct inode *inode = mapping->host;
2006 2006
@@ -2104,7 +2104,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2104 goto out_mutex; 2104 goto out_mutex;
2105 } 2105 }
2106 2106
2107 if (!(vma->vm_flags & VM_SHARED)) 2107 if (!(vma->vm_flags & VM_MAYSHARE))
2108 pagecache_page = hugetlbfs_pagecache_page(h, 2108 pagecache_page = hugetlbfs_pagecache_page(h,
2109 vma, address); 2109 vma, address);
2110 } 2110 }
@@ -2289,7 +2289,7 @@ int hugetlb_reserve_pages(struct inode *inode,
2289 * to reserve the full area even if read-only as mprotect() may be 2289 * to reserve the full area even if read-only as mprotect() may be
2290 * called to make the mapping read-write. Assume !vma is a shm mapping 2290 * called to make the mapping read-write. Assume !vma is a shm mapping
2291 */ 2291 */
2292 if (!vma || vma->vm_flags & VM_SHARED) 2292 if (!vma || vma->vm_flags & VM_MAYSHARE)
2293 chg = region_chg(&inode->i_mapping->private_list, from, to); 2293 chg = region_chg(&inode->i_mapping->private_list, from, to);
2294 else { 2294 else {
2295 struct resv_map *resv_map = resv_map_alloc(); 2295 struct resv_map *resv_map = resv_map_alloc();
@@ -2330,7 +2330,7 @@ int hugetlb_reserve_pages(struct inode *inode,
2330 * consumed reservations are stored in the map. Hence, nothing 2330 * consumed reservations are stored in the map. Hence, nothing
2331 * else has to be done for private mappings here 2331 * else has to be done for private mappings here
2332 */ 2332 */
2333 if (!vma || vma->vm_flags & VM_SHARED) 2333 if (!vma || vma->vm_flags & VM_MAYSHARE)
2334 region_add(&inode->i_mapping->private_list, from, to); 2334 region_add(&inode->i_mapping->private_list, from, to);
2335 return 0; 2335 return 0;
2336} 2336}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 01c2d8f14685..78eb8552818b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -314,14 +314,6 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
314 return mem; 314 return mem;
315} 315}
316 316
317static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem)
318{
319 if (!mem)
320 return true;
321 return css_is_removed(&mem->css);
322}
323
324
325/* 317/*
326 * Call callback function against all cgroup under hierarchy tree. 318 * Call callback function against all cgroup under hierarchy tree.
327 */ 319 */
@@ -932,7 +924,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
932 if (unlikely(!mem)) 924 if (unlikely(!mem))
933 return 0; 925 return 0;
934 926
935 VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem)); 927 VM_BUG_ON(css_is_removed(&mem->css));
936 928
937 while (1) { 929 while (1) {
938 int ret; 930 int ret;
@@ -1488,8 +1480,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
1488 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); 1480 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
1489} 1481}
1490 1482
1483#ifdef CONFIG_SWAP
1491/* 1484/*
1492 * called from __delete_from_swap_cache() and drop "page" account. 1485 * called after __delete_from_swap_cache() and drop "page" account.
1493 * memcg information is recorded to swap_cgroup of "ent" 1486 * memcg information is recorded to swap_cgroup of "ent"
1494 */ 1487 */
1495void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) 1488void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
@@ -1506,6 +1499,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
1506 if (memcg) 1499 if (memcg)
1507 css_put(&memcg->css); 1500 css_put(&memcg->css);
1508} 1501}
1502#endif
1509 1503
1510#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 1504#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
1511/* 1505/*
diff --git a/mm/mlock.c b/mm/mlock.c
index cbe9e0581b75..ac130433c7d3 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -629,52 +629,43 @@ void user_shm_unlock(size_t size, struct user_struct *user)
629 free_uid(user); 629 free_uid(user);
630} 630}
631 631
632void *alloc_locked_buffer(size_t size) 632int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
633 size_t size)
633{ 634{
634 unsigned long rlim, vm, pgsz; 635 unsigned long lim, vm, pgsz;
635 void *buffer = NULL; 636 int error = -ENOMEM;
636 637
637 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; 638 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
638 639
639 down_write(&current->mm->mmap_sem); 640 down_write(&mm->mmap_sem);
640
641 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
642 vm = current->mm->total_vm + pgsz;
643 if (rlim < vm)
644 goto out;
645 641
646 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 642 lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
647 vm = current->mm->locked_vm + pgsz; 643 vm = mm->total_vm + pgsz;
648 if (rlim < vm) 644 if (lim < vm)
649 goto out; 645 goto out;
650 646
651 buffer = kzalloc(size, GFP_KERNEL); 647 lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
652 if (!buffer) 648 vm = mm->locked_vm + pgsz;
649 if (lim < vm)
653 goto out; 650 goto out;
654 651
655 current->mm->total_vm += pgsz; 652 mm->total_vm += pgsz;
656 current->mm->locked_vm += pgsz; 653 mm->locked_vm += pgsz;
657 654
655 error = 0;
658 out: 656 out:
659 up_write(&current->mm->mmap_sem); 657 up_write(&mm->mmap_sem);
660 return buffer; 658 return error;
661} 659}
662 660
663void release_locked_buffer(void *buffer, size_t size) 661void refund_locked_memory(struct mm_struct *mm, size_t size)
664{ 662{
665 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; 663 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
666 664
667 down_write(&current->mm->mmap_sem); 665 down_write(&mm->mmap_sem);
668
669 current->mm->total_vm -= pgsz;
670 current->mm->locked_vm -= pgsz;
671
672 up_write(&current->mm->mmap_sem);
673}
674 666
675void free_locked_buffer(void *buffer, size_t size) 667 mm->total_vm -= pgsz;
676{ 668 mm->locked_vm -= pgsz;
677 release_locked_buffer(buffer, size);
678 669
679 kfree(buffer); 670 up_write(&mm->mmap_sem);
680} 671}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 92bcf1db16b2..a7b2460e922b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -284,22 +284,28 @@ static void dump_tasks(const struct mem_cgroup *mem)
284 printk(KERN_INFO "[ pid ] uid tgid total_vm rss cpu oom_adj " 284 printk(KERN_INFO "[ pid ] uid tgid total_vm rss cpu oom_adj "
285 "name\n"); 285 "name\n");
286 do_each_thread(g, p) { 286 do_each_thread(g, p) {
287 /* 287 struct mm_struct *mm;
288 * total_vm and rss sizes do not exist for tasks with a 288
289 * detached mm so there's no need to report them.
290 */
291 if (!p->mm)
292 continue;
293 if (mem && !task_in_mem_cgroup(p, mem)) 289 if (mem && !task_in_mem_cgroup(p, mem))
294 continue; 290 continue;
295 if (!thread_group_leader(p)) 291 if (!thread_group_leader(p))
296 continue; 292 continue;
297 293
298 task_lock(p); 294 task_lock(p);
295 mm = p->mm;
296 if (!mm) {
297 /*
298 * total_vm and rss sizes do not exist for tasks with no
299 * mm so there's no need to report them; they can't be
300 * oom killed anyway.
301 */
302 task_unlock(p);
303 continue;
304 }
299 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", 305 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
300 p->pid, __task_cred(p)->uid, p->tgid, 306 p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
301 p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), 307 get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
302 p->oomkilladj, p->comm); 308 p->comm);
303 task_unlock(p); 309 task_unlock(p);
304 } while_each_thread(g, p); 310 } while_each_thread(g, p);
305} 311}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fe753ecf2aa5..474c7e9dd51a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -149,10 +149,6 @@ static unsigned long __meminitdata dma_reserve;
149 static int __meminitdata nr_nodemap_entries; 149 static int __meminitdata nr_nodemap_entries;
150 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; 150 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
151 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; 151 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
152#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
153 static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
154 static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
155#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
156 static unsigned long __initdata required_kernelcore; 152 static unsigned long __initdata required_kernelcore;
157 static unsigned long __initdata required_movablecore; 153 static unsigned long __initdata required_movablecore;
158 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; 154 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3103,64 +3099,6 @@ void __init sparse_memory_present_with_active_regions(int nid)
3103} 3099}
3104 3100
3105/** 3101/**
3106 * push_node_boundaries - Push node boundaries to at least the requested boundary
3107 * @nid: The nid of the node to push the boundary for
3108 * @start_pfn: The start pfn of the node
3109 * @end_pfn: The end pfn of the node
3110 *
3111 * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
3112 * time. Specifically, on x86_64, SRAT will report ranges that can potentially
3113 * be hotplugged even though no physical memory exists. This function allows
3114 * an arch to push out the node boundaries so mem_map is allocated that can
3115 * be used later.
3116 */
3117#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
3118void __init push_node_boundaries(unsigned int nid,
3119 unsigned long start_pfn, unsigned long end_pfn)
3120{
3121 mminit_dprintk(MMINIT_TRACE, "zoneboundary",
3122 "Entering push_node_boundaries(%u, %lu, %lu)\n",
3123 nid, start_pfn, end_pfn);
3124
3125 /* Initialise the boundary for this node if necessary */
3126 if (node_boundary_end_pfn[nid] == 0)
3127 node_boundary_start_pfn[nid] = -1UL;
3128
3129 /* Update the boundaries */
3130 if (node_boundary_start_pfn[nid] > start_pfn)
3131 node_boundary_start_pfn[nid] = start_pfn;
3132 if (node_boundary_end_pfn[nid] < end_pfn)
3133 node_boundary_end_pfn[nid] = end_pfn;
3134}
3135
3136/* If necessary, push the node boundary out for reserve hotadd */
3137static void __meminit account_node_boundary(unsigned int nid,
3138 unsigned long *start_pfn, unsigned long *end_pfn)
3139{
3140 mminit_dprintk(MMINIT_TRACE, "zoneboundary",
3141 "Entering account_node_boundary(%u, %lu, %lu)\n",
3142 nid, *start_pfn, *end_pfn);
3143
3144 /* Return if boundary information has not been provided */
3145 if (node_boundary_end_pfn[nid] == 0)
3146 return;
3147
3148 /* Check the boundaries and update if necessary */
3149 if (node_boundary_start_pfn[nid] < *start_pfn)
3150 *start_pfn = node_boundary_start_pfn[nid];
3151 if (node_boundary_end_pfn[nid] > *end_pfn)
3152 *end_pfn = node_boundary_end_pfn[nid];
3153}
3154#else
3155void __init push_node_boundaries(unsigned int nid,
3156 unsigned long start_pfn, unsigned long end_pfn) {}
3157
3158static void __meminit account_node_boundary(unsigned int nid,
3159 unsigned long *start_pfn, unsigned long *end_pfn) {}
3160#endif
3161
3162
3163/**
3164 * get_pfn_range_for_nid - Return the start and end page frames for a node 3102 * get_pfn_range_for_nid - Return the start and end page frames for a node
3165 * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. 3103 * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
3166 * @start_pfn: Passed by reference. On return, it will have the node start_pfn. 3104 * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
@@ -3185,9 +3123,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
3185 3123
3186 if (*start_pfn == -1UL) 3124 if (*start_pfn == -1UL)
3187 *start_pfn = 0; 3125 *start_pfn = 0;
3188
3189 /* Push the node boundaries out if requested */
3190 account_node_boundary(nid, start_pfn, end_pfn);
3191} 3126}
3192 3127
3193/* 3128/*
@@ -3793,10 +3728,6 @@ void __init remove_all_active_ranges(void)
3793{ 3728{
3794 memset(early_node_map, 0, sizeof(early_node_map)); 3729 memset(early_node_map, 0, sizeof(early_node_map));
3795 nr_nodemap_entries = 0; 3730 nr_nodemap_entries = 0;
3796#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
3797 memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
3798 memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
3799#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
3800} 3731}
3801 3732
3802/* Compare two active node_active_regions */ 3733/* Compare two active node_active_regions */
diff --git a/mm/percpu.c b/mm/percpu.c
index 1aa5d8fbca12..c0b2c1a76e81 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -23,7 +23,7 @@
23 * Allocation is done in offset-size areas of single unit space. Ie, 23 * Allocation is done in offset-size areas of single unit space. Ie,
24 * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, 24 * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
25 * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring 25 * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring
26 * percpu base registers UNIT_SIZE apart. 26 * percpu base registers pcpu_unit_size apart.
27 * 27 *
28 * There are usually many small percpu allocations many of them as 28 * There are usually many small percpu allocations many of them as
29 * small as 4 bytes. The allocator organizes chunks into lists 29 * small as 4 bytes. The allocator organizes chunks into lists
@@ -38,8 +38,8 @@
38 * region and negative allocated. Allocation inside a chunk is done 38 * region and negative allocated. Allocation inside a chunk is done
39 * by scanning this map sequentially and serving the first matching 39 * by scanning this map sequentially and serving the first matching
40 * entry. This is mostly copied from the percpu_modalloc() allocator. 40 * entry. This is mostly copied from the percpu_modalloc() allocator.
41 * Chunks are also linked into a rb tree to ease address to chunk 41 * Chunks can be determined from the address using the index field
42 * mapping during free. 42 * in the page struct. The index field contains a pointer to the chunk.
43 * 43 *
44 * To use this allocator, arch code should do the followings. 44 * To use this allocator, arch code should do the followings.
45 * 45 *
@@ -61,7 +61,6 @@
61#include <linux/mutex.h> 61#include <linux/mutex.h>
62#include <linux/percpu.h> 62#include <linux/percpu.h>
63#include <linux/pfn.h> 63#include <linux/pfn.h>
64#include <linux/rbtree.h>
65#include <linux/slab.h> 64#include <linux/slab.h>
66#include <linux/spinlock.h> 65#include <linux/spinlock.h>
67#include <linux/vmalloc.h> 66#include <linux/vmalloc.h>
@@ -88,7 +87,6 @@
88 87
89struct pcpu_chunk { 88struct pcpu_chunk {
90 struct list_head list; /* linked to pcpu_slot lists */ 89 struct list_head list; /* linked to pcpu_slot lists */
91 struct rb_node rb_node; /* key is chunk->vm->addr */
92 int free_size; /* free bytes in the chunk */ 90 int free_size; /* free bytes in the chunk */
93 int contig_hint; /* max contiguous size hint */ 91 int contig_hint; /* max contiguous size hint */
94 struct vm_struct *vm; /* mapped vmalloc region */ 92 struct vm_struct *vm; /* mapped vmalloc region */
@@ -110,9 +108,21 @@ static size_t pcpu_chunk_struct_size __read_mostly;
110void *pcpu_base_addr __read_mostly; 108void *pcpu_base_addr __read_mostly;
111EXPORT_SYMBOL_GPL(pcpu_base_addr); 109EXPORT_SYMBOL_GPL(pcpu_base_addr);
112 110
113/* optional reserved chunk, only accessible for reserved allocations */ 111/*
112 * The first chunk which always exists. Note that unlike other
113 * chunks, this one can be allocated and mapped in several different
114 * ways and thus often doesn't live in the vmalloc area.
115 */
116static struct pcpu_chunk *pcpu_first_chunk;
117
118/*
119 * Optional reserved chunk. This chunk reserves part of the first
120 * chunk and serves it for reserved allocations. The amount of
121 * reserved offset is in pcpu_reserved_chunk_limit. When reserved
122 * area doesn't exist, the following variables contain NULL and 0
123 * respectively.
124 */
114static struct pcpu_chunk *pcpu_reserved_chunk; 125static struct pcpu_chunk *pcpu_reserved_chunk;
115/* offset limit of the reserved chunk */
116static int pcpu_reserved_chunk_limit; 126static int pcpu_reserved_chunk_limit;
117 127
118/* 128/*
@@ -121,7 +131,7 @@ static int pcpu_reserved_chunk_limit;
121 * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former 131 * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former
122 * protects allocation/reclaim paths, chunks and chunk->page arrays. 132 * protects allocation/reclaim paths, chunks and chunk->page arrays.
123 * The latter is a spinlock and protects the index data structures - 133 * The latter is a spinlock and protects the index data structures -
124 * chunk slots, rbtree, chunks and area maps in chunks. 134 * chunk slots, chunks and area maps in chunks.
125 * 135 *
126 * During allocation, pcpu_alloc_mutex is kept locked all the time and 136 * During allocation, pcpu_alloc_mutex is kept locked all the time and
127 * pcpu_lock is grabbed and released as necessary. All actual memory 137 * pcpu_lock is grabbed and released as necessary. All actual memory
@@ -140,7 +150,6 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* protects whole alloc and reclaim */
140static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */ 150static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */
141 151
142static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ 152static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
143static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */
144 153
145/* reclaim work to release fully free chunks, scheduled from free path */ 154/* reclaim work to release fully free chunks, scheduled from free path */
146static void pcpu_reclaim(struct work_struct *work); 155static void pcpu_reclaim(struct work_struct *work);
@@ -191,6 +200,18 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk,
191 return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; 200 return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
192} 201}
193 202
203/* set the pointer to a chunk in a page struct */
204static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
205{
206 page->index = (unsigned long)pcpu;
207}
208
209/* obtain pointer to a chunk from a page struct */
210static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
211{
212 return (struct pcpu_chunk *)page->index;
213}
214
194/** 215/**
195 * pcpu_mem_alloc - allocate memory 216 * pcpu_mem_alloc - allocate memory
196 * @size: bytes to allocate 217 * @size: bytes to allocate
@@ -257,93 +278,26 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
257 } 278 }
258} 279}
259 280
260static struct rb_node **pcpu_chunk_rb_search(void *addr,
261 struct rb_node **parentp)
262{
263 struct rb_node **p = &pcpu_addr_root.rb_node;
264 struct rb_node *parent = NULL;
265 struct pcpu_chunk *chunk;
266
267 while (*p) {
268 parent = *p;
269 chunk = rb_entry(parent, struct pcpu_chunk, rb_node);
270
271 if (addr < chunk->vm->addr)
272 p = &(*p)->rb_left;
273 else if (addr > chunk->vm->addr)
274 p = &(*p)->rb_right;
275 else
276 break;
277 }
278
279 if (parentp)
280 *parentp = parent;
281 return p;
282}
283
284/** 281/**
285 * pcpu_chunk_addr_search - search for chunk containing specified address 282 * pcpu_chunk_addr_search - determine chunk containing specified address
286 * @addr: address to search for 283 * @addr: address for which the chunk needs to be determined.
287 *
288 * Look for chunk which might contain @addr. More specifically, it
289 * searchs for the chunk with the highest start address which isn't
290 * beyond @addr.
291 *
292 * CONTEXT:
293 * pcpu_lock.
294 * 284 *
295 * RETURNS: 285 * RETURNS:
296 * The address of the found chunk. 286 * The address of the found chunk.
297 */ 287 */
298static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) 288static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
299{ 289{
300 struct rb_node *n, *parent; 290 void *first_start = pcpu_first_chunk->vm->addr;
301 struct pcpu_chunk *chunk;
302 291
303 /* is it in the reserved chunk? */ 292 /* is it in the first chunk? */
304 if (pcpu_reserved_chunk) { 293 if (addr >= first_start && addr < first_start + pcpu_chunk_size) {
305 void *start = pcpu_reserved_chunk->vm->addr; 294 /* is it in the reserved area? */
306 295 if (addr < first_start + pcpu_reserved_chunk_limit)
307 if (addr >= start && addr < start + pcpu_reserved_chunk_limit)
308 return pcpu_reserved_chunk; 296 return pcpu_reserved_chunk;
297 return pcpu_first_chunk;
309 } 298 }
310 299
311 /* nah... search the regular ones */ 300 return pcpu_get_page_chunk(vmalloc_to_page(addr));
312 n = *pcpu_chunk_rb_search(addr, &parent);
313 if (!n) {
314 /* no exactly matching chunk, the parent is the closest */
315 n = parent;
316 BUG_ON(!n);
317 }
318 chunk = rb_entry(n, struct pcpu_chunk, rb_node);
319
320 if (addr < chunk->vm->addr) {
321 /* the parent was the next one, look for the previous one */
322 n = rb_prev(n);
323 BUG_ON(!n);
324 chunk = rb_entry(n, struct pcpu_chunk, rb_node);
325 }
326
327 return chunk;
328}
329
330/**
331 * pcpu_chunk_addr_insert - insert chunk into address rb tree
332 * @new: chunk to insert
333 *
334 * Insert @new into address rb tree.
335 *
336 * CONTEXT:
337 * pcpu_lock.
338 */
339static void pcpu_chunk_addr_insert(struct pcpu_chunk *new)
340{
341 struct rb_node **p, *parent;
342
343 p = pcpu_chunk_rb_search(new->vm->addr, &parent);
344 BUG_ON(*p);
345 rb_link_node(&new->rb_node, parent, p);
346 rb_insert_color(&new->rb_node, &pcpu_addr_root);
347} 301}
348 302
349/** 303/**
@@ -755,6 +709,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
755 alloc_mask, 0); 709 alloc_mask, 0);
756 if (!*pagep) 710 if (!*pagep)
757 goto err; 711 goto err;
712 pcpu_set_page_chunk(*pagep, chunk);
758 } 713 }
759 } 714 }
760 715
@@ -879,7 +834,6 @@ restart:
879 834
880 spin_lock_irq(&pcpu_lock); 835 spin_lock_irq(&pcpu_lock);
881 pcpu_chunk_relocate(chunk, -1); 836 pcpu_chunk_relocate(chunk, -1);
882 pcpu_chunk_addr_insert(chunk);
883 goto restart; 837 goto restart;
884 838
885area_found: 839area_found:
@@ -968,7 +922,6 @@ static void pcpu_reclaim(struct work_struct *work)
968 if (chunk == list_first_entry(head, struct pcpu_chunk, list)) 922 if (chunk == list_first_entry(head, struct pcpu_chunk, list))
969 continue; 923 continue;
970 924
971 rb_erase(&chunk->rb_node, &pcpu_addr_root);
972 list_move(&chunk->list, &todo); 925 list_move(&chunk->list, &todo);
973 } 926 }
974 927
@@ -1147,7 +1100,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1147 1100
1148 if (reserved_size) { 1101 if (reserved_size) {
1149 schunk->free_size = reserved_size; 1102 schunk->free_size = reserved_size;
1150 pcpu_reserved_chunk = schunk; /* not for dynamic alloc */ 1103 pcpu_reserved_chunk = schunk;
1104 pcpu_reserved_chunk_limit = static_size + reserved_size;
1151 } else { 1105 } else {
1152 schunk->free_size = dyn_size; 1106 schunk->free_size = dyn_size;
1153 dyn_size = 0; /* dynamic area covered */ 1107 dyn_size = 0; /* dynamic area covered */
@@ -1158,8 +1112,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1158 if (schunk->free_size) 1112 if (schunk->free_size)
1159 schunk->map[schunk->map_used++] = schunk->free_size; 1113 schunk->map[schunk->map_used++] = schunk->free_size;
1160 1114
1161 pcpu_reserved_chunk_limit = static_size + schunk->free_size;
1162
1163 /* init dynamic chunk if necessary */ 1115 /* init dynamic chunk if necessary */
1164 if (dyn_size) { 1116 if (dyn_size) {
1165 dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); 1117 dchunk = alloc_bootmem(sizeof(struct pcpu_chunk));
@@ -1226,13 +1178,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1226 } 1178 }
1227 1179
1228 /* link the first chunk in */ 1180 /* link the first chunk in */
1229 if (!dchunk) { 1181 pcpu_first_chunk = dchunk ?: schunk;
1230 pcpu_chunk_relocate(schunk, -1); 1182 pcpu_chunk_relocate(pcpu_first_chunk, -1);
1231 pcpu_chunk_addr_insert(schunk);
1232 } else {
1233 pcpu_chunk_relocate(dchunk, -1);
1234 pcpu_chunk_addr_insert(dchunk);
1235 }
1236 1183
1237 /* we're done */ 1184 /* we're done */
1238 pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); 1185 pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
diff --git a/mm/rmap.c b/mm/rmap.c
index 16521664010d..23122af32611 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -14,7 +14,7 @@
14 * Original design by Rik van Riel <riel@conectiva.com.br> 2001 14 * Original design by Rik van Riel <riel@conectiva.com.br> 2001
15 * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004 15 * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
16 * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004 16 * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
17 * Contributions by Hugh Dickins <hugh@veritas.com> 2003, 2004 17 * Contributions by Hugh Dickins 2003, 2004
18 */ 18 */
19 19
20/* 20/*
diff --git a/mm/slab.c b/mm/slab.c
index 9a90b00d2f91..f85831da9080 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,7 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <trace/kmemtrace.h> 105#include <linux/kmemtrace.h>
106#include <linux/rcupdate.h> 106#include <linux/rcupdate.h>
107#include <linux/string.h> 107#include <linux/string.h>
108#include <linux/uaccess.h> 108#include <linux/uaccess.h>
diff --git a/mm/slob.c b/mm/slob.c
index f92e66d558bd..9b1737b0787b 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -66,7 +66,7 @@
66#include <linux/module.h> 66#include <linux/module.h>
67#include <linux/rcupdate.h> 67#include <linux/rcupdate.h>
68#include <linux/list.h> 68#include <linux/list.h>
69#include <trace/kmemtrace.h> 69#include <linux/kmemtrace.h>
70#include <asm/atomic.h> 70#include <asm/atomic.h>
71 71
72/* 72/*
diff --git a/mm/slub.c b/mm/slub.c
index 65ffda5934b0..5e805a6fe36c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -17,7 +17,7 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <trace/kmemtrace.h> 20#include <linux/kmemtrace.h>
21#include <linux/cpu.h> 21#include <linux/cpu.h>
22#include <linux/cpuset.h> 22#include <linux/cpuset.h>
23#include <linux/mempolicy.h> 23#include <linux/mempolicy.h>
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3ecea98ecb45..1416e7e9e02d 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -109,8 +109,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
109 */ 109 */
110void __delete_from_swap_cache(struct page *page) 110void __delete_from_swap_cache(struct page *page)
111{ 111{
112 swp_entry_t ent = {.val = page_private(page)};
113
114 VM_BUG_ON(!PageLocked(page)); 112 VM_BUG_ON(!PageLocked(page));
115 VM_BUG_ON(!PageSwapCache(page)); 113 VM_BUG_ON(!PageSwapCache(page));
116 VM_BUG_ON(PageWriteback(page)); 114 VM_BUG_ON(PageWriteback(page));
@@ -121,7 +119,6 @@ void __delete_from_swap_cache(struct page *page)
121 total_swapcache_pages--; 119 total_swapcache_pages--;
122 __dec_zone_page_state(page, NR_FILE_PAGES); 120 __dec_zone_page_state(page, NR_FILE_PAGES);
123 INC_CACHE_INFO(del_total); 121 INC_CACHE_INFO(del_total);
124 mem_cgroup_uncharge_swapcache(page, ent);
125} 122}
126 123
127/** 124/**
@@ -191,6 +188,7 @@ void delete_from_swap_cache(struct page *page)
191 __delete_from_swap_cache(page); 188 __delete_from_swap_cache(page);
192 spin_unlock_irq(&swapper_space.tree_lock); 189 spin_unlock_irq(&swapper_space.tree_lock);
193 190
191 mem_cgroup_uncharge_swapcache(page, entry);
194 swap_free(entry); 192 swap_free(entry);
195 page_cache_release(page); 193 page_cache_release(page);
196} 194}
diff --git a/mm/truncate.c b/mm/truncate.c
index 55206fab7b99..12e1579f9165 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -359,6 +359,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
359 BUG_ON(page_has_private(page)); 359 BUG_ON(page_has_private(page));
360 __remove_from_page_cache(page); 360 __remove_from_page_cache(page);
361 spin_unlock_irq(&mapping->tree_lock); 361 spin_unlock_irq(&mapping->tree_lock);
362 mem_cgroup_uncharge_cache_page(page);
362 page_cache_release(page); /* pagecache ref */ 363 page_cache_release(page); /* pagecache ref */
363 return 1; 364 return 1;
364failed: 365failed:
diff --git a/mm/util.c b/mm/util.c
index 55bef160b9f1..abc65aa7cdfc 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,9 +4,11 @@
4#include <linux/module.h> 4#include <linux/module.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/tracepoint.h>
8#include <asm/uaccess.h> 7#include <asm/uaccess.h>
9 8
9#define CREATE_TRACE_POINTS
10#include <trace/events/kmem.h>
11
10/** 12/**
11 * kstrdup - allocate space for and copy an existing string 13 * kstrdup - allocate space for and copy an existing string
12 * @s: the string to duplicate 14 * @s: the string to duplicate
@@ -255,13 +257,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start,
255EXPORT_SYMBOL_GPL(get_user_pages_fast); 257EXPORT_SYMBOL_GPL(get_user_pages_fast);
256 258
257/* Tracepoints definitions. */ 259/* Tracepoints definitions. */
258DEFINE_TRACE(kmalloc);
259DEFINE_TRACE(kmem_cache_alloc);
260DEFINE_TRACE(kmalloc_node);
261DEFINE_TRACE(kmem_cache_alloc_node);
262DEFINE_TRACE(kfree);
263DEFINE_TRACE(kmem_cache_free);
264
265EXPORT_TRACEPOINT_SYMBOL(kmalloc); 260EXPORT_TRACEPOINT_SYMBOL(kmalloc);
266EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); 261EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
267EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); 262EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5fa3eda1f03f..d254306562cd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -470,10 +470,12 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
470 swp_entry_t swap = { .val = page_private(page) }; 470 swp_entry_t swap = { .val = page_private(page) };
471 __delete_from_swap_cache(page); 471 __delete_from_swap_cache(page);
472 spin_unlock_irq(&mapping->tree_lock); 472 spin_unlock_irq(&mapping->tree_lock);
473 mem_cgroup_uncharge_swapcache(page, swap);
473 swap_free(swap); 474 swap_free(swap);
474 } else { 475 } else {
475 __remove_from_page_cache(page); 476 __remove_from_page_cache(page);
476 spin_unlock_irq(&mapping->tree_lock); 477 spin_unlock_irq(&mapping->tree_lock);
478 mem_cgroup_uncharge_cache_page(page);
477 } 479 }
478 480
479 return 1; 481 return 1;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 4cc3624bd22d..95f7a7a544b4 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -90,9 +90,6 @@ static void add_conn(struct work_struct *work)
90 struct hci_conn *conn = container_of(work, struct hci_conn, work_add); 90 struct hci_conn *conn = container_of(work, struct hci_conn, work_add);
91 struct hci_dev *hdev = conn->hdev; 91 struct hci_dev *hdev = conn->hdev;
92 92
93 /* ensure previous del is complete */
94 flush_work(&conn->work_del);
95
96 dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); 93 dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
97 94
98 if (device_add(&conn->dev) < 0) { 95 if (device_add(&conn->dev) < 0) {
@@ -118,9 +115,6 @@ static void del_conn(struct work_struct *work)
118 struct hci_conn *conn = container_of(work, struct hci_conn, work_del); 115 struct hci_conn *conn = container_of(work, struct hci_conn, work_del);
119 struct hci_dev *hdev = conn->hdev; 116 struct hci_dev *hdev = conn->hdev;
120 117
121 /* ensure previous add is complete */
122 flush_work(&conn->work_add);
123
124 if (!device_is_registered(&conn->dev)) 118 if (!device_is_registered(&conn->dev))
125 return; 119 return;
126 120
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9fd0dc3cca99..b75b6cea49da 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -23,7 +23,7 @@
23#include <linux/bitops.h> 23#include <linux/bitops.h>
24#include <net/genetlink.h> 24#include <net/genetlink.h>
25 25
26#include <trace/skb.h> 26#include <trace/events/skb.h>
27 27
28#include <asm/unaligned.h> 28#include <asm/unaligned.h>
29 29
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c8fb45665e4f..499a67eaf3ae 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,11 +19,11 @@
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20#include <linux/netlink.h> 20#include <linux/netlink.h>
21#include <linux/net_dropmon.h> 21#include <linux/net_dropmon.h>
22#include <trace/skb.h>
23 22
24#include <asm/unaligned.h> 23#include <asm/unaligned.h>
25#include <asm/bitops.h> 24#include <asm/bitops.h>
26 25
26#define CREATE_TRACE_POINTS
27#include <trace/events/skb.h>
27 28
28DEFINE_TRACE(kfree_skb);
29EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); 29EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3779c1438c11..0666a827bc62 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2447,7 +2447,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev)
2447 if (pkt_dev->cflows) { 2447 if (pkt_dev->cflows) {
2448 /* let go of the SAs if we have them */ 2448 /* let go of the SAs if we have them */
2449 int i = 0; 2449 int i = 0;
2450 for (; i < pkt_dev->nflows; i++){ 2450 for (; i < pkt_dev->cflows; i++) {
2451 struct xfrm_state *x = pkt_dev->flows[i].x; 2451 struct xfrm_state *x = pkt_dev->flows[i].x;
2452 if (x) { 2452 if (x) {
2453 xfrm_state_put(x); 2453 xfrm_state_put(x);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e505b5392e1e..c2e4fb8f3546 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -65,7 +65,7 @@
65 65
66#include <asm/uaccess.h> 66#include <asm/uaccess.h>
67#include <asm/system.h> 67#include <asm/system.h>
68#include <trace/skb.h> 68#include <trace/events/skb.h>
69 69
70#include "kmap_skb.h" 70#include "kmap_skb.h"
71 71
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ec0ae490f0b6..33c7c85dfe40 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key)
986static struct node *trie_rebalance(struct trie *t, struct tnode *tn) 986static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
987{ 987{
988 int wasfull; 988 int wasfull;
989 t_key cindex, key = tn->key; 989 t_key cindex, key;
990 struct tnode *tp; 990 struct tnode *tp;
991 991
992 preempt_disable();
993 key = tn->key;
994
992 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 995 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
993 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 996 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
994 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 997 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
@@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
1007 if (IS_TNODE(tn)) 1010 if (IS_TNODE(tn))
1008 tn = (struct tnode *)resize(t, (struct tnode *)tn); 1011 tn = (struct tnode *)resize(t, (struct tnode *)tn);
1009 1012
1013 preempt_enable();
1010 return (struct node *)tn; 1014 return (struct node *)tn;
1011} 1015}
1012 1016
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c4c60e9f068a..28205e5bfa9b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -784,8 +784,8 @@ static void rt_check_expire(void)
784{ 784{
785 static unsigned int rover; 785 static unsigned int rover;
786 unsigned int i = rover, goal; 786 unsigned int i = rover, goal;
787 struct rtable *rth, **rthp; 787 struct rtable *rth, *aux, **rthp;
788 unsigned long length = 0, samples = 0; 788 unsigned long samples = 0;
789 unsigned long sum = 0, sum2 = 0; 789 unsigned long sum = 0, sum2 = 0;
790 u64 mult; 790 u64 mult;
791 791
@@ -795,9 +795,9 @@ static void rt_check_expire(void)
795 goal = (unsigned int)mult; 795 goal = (unsigned int)mult;
796 if (goal > rt_hash_mask) 796 if (goal > rt_hash_mask)
797 goal = rt_hash_mask + 1; 797 goal = rt_hash_mask + 1;
798 length = 0;
799 for (; goal > 0; goal--) { 798 for (; goal > 0; goal--) {
800 unsigned long tmo = ip_rt_gc_timeout; 799 unsigned long tmo = ip_rt_gc_timeout;
800 unsigned long length;
801 801
802 i = (i + 1) & rt_hash_mask; 802 i = (i + 1) & rt_hash_mask;
803 rthp = &rt_hash_table[i].chain; 803 rthp = &rt_hash_table[i].chain;
@@ -809,8 +809,10 @@ static void rt_check_expire(void)
809 809
810 if (*rthp == NULL) 810 if (*rthp == NULL)
811 continue; 811 continue;
812 length = 0;
812 spin_lock_bh(rt_hash_lock_addr(i)); 813 spin_lock_bh(rt_hash_lock_addr(i));
813 while ((rth = *rthp) != NULL) { 814 while ((rth = *rthp) != NULL) {
815 prefetch(rth->u.dst.rt_next);
814 if (rt_is_expired(rth)) { 816 if (rt_is_expired(rth)) {
815 *rthp = rth->u.dst.rt_next; 817 *rthp = rth->u.dst.rt_next;
816 rt_free(rth); 818 rt_free(rth);
@@ -819,33 +821,30 @@ static void rt_check_expire(void)
819 if (rth->u.dst.expires) { 821 if (rth->u.dst.expires) {
820 /* Entry is expired even if it is in use */ 822 /* Entry is expired even if it is in use */
821 if (time_before_eq(jiffies, rth->u.dst.expires)) { 823 if (time_before_eq(jiffies, rth->u.dst.expires)) {
824nofree:
822 tmo >>= 1; 825 tmo >>= 1;
823 rthp = &rth->u.dst.rt_next; 826 rthp = &rth->u.dst.rt_next;
824 /* 827 /*
825 * Only bump our length if the hash 828 * We only count entries on
826 * inputs on entries n and n+1 are not
827 * the same, we only count entries on
828 * a chain with equal hash inputs once 829 * a chain with equal hash inputs once
829 * so that entries for different QOS 830 * so that entries for different QOS
830 * levels, and other non-hash input 831 * levels, and other non-hash input
831 * attributes don't unfairly skew 832 * attributes don't unfairly skew
832 * the length computation 833 * the length computation
833 */ 834 */
834 if ((*rthp == NULL) || 835 for (aux = rt_hash_table[i].chain;;) {
835 !compare_hash_inputs(&(*rthp)->fl, 836 if (aux == rth) {
836 &rth->fl)) 837 length += ONE;
837 length += ONE; 838 break;
839 }
840 if (compare_hash_inputs(&aux->fl, &rth->fl))
841 break;
842 aux = aux->u.dst.rt_next;
843 }
838 continue; 844 continue;
839 } 845 }
840 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { 846 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
841 tmo >>= 1; 847 goto nofree;
842 rthp = &rth->u.dst.rt_next;
843 if ((*rthp == NULL) ||
844 !compare_hash_inputs(&(*rthp)->fl,
845 &rth->fl))
846 length += ONE;
847 continue;
848 }
849 848
850 /* Cleanup aged off entries. */ 849 /* Cleanup aged off entries. */
851 *rthp = rth->u.dst.rt_next; 850 *rthp = rth->u.dst.rt_next;
@@ -1068,7 +1067,6 @@ out: return 0;
1068static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) 1067static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
1069{ 1068{
1070 struct rtable *rth, **rthp; 1069 struct rtable *rth, **rthp;
1071 struct rtable *rthi;
1072 unsigned long now; 1070 unsigned long now;
1073 struct rtable *cand, **candp; 1071 struct rtable *cand, **candp;
1074 u32 min_score; 1072 u32 min_score;
@@ -1088,7 +1086,6 @@ restart:
1088 } 1086 }
1089 1087
1090 rthp = &rt_hash_table[hash].chain; 1088 rthp = &rt_hash_table[hash].chain;
1091 rthi = NULL;
1092 1089
1093 spin_lock_bh(rt_hash_lock_addr(hash)); 1090 spin_lock_bh(rt_hash_lock_addr(hash));
1094 while ((rth = *rthp) != NULL) { 1091 while ((rth = *rthp) != NULL) {
@@ -1134,17 +1131,6 @@ restart:
1134 chain_length++; 1131 chain_length++;
1135 1132
1136 rthp = &rth->u.dst.rt_next; 1133 rthp = &rth->u.dst.rt_next;
1137
1138 /*
1139 * check to see if the next entry in the chain
1140 * contains the same hash input values as rt. If it does
1141 * This is where we will insert into the list, instead of
1142 * at the head. This groups entries that differ by aspects not
1143 * relvant to the hash function together, which we use to adjust
1144 * our chain length
1145 */
1146 if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl))
1147 rthi = rth;
1148 } 1134 }
1149 1135
1150 if (cand) { 1136 if (cand) {
@@ -1205,10 +1191,7 @@ restart:
1205 } 1191 }
1206 } 1192 }
1207 1193
1208 if (rthi) 1194 rt->u.dst.rt_next = rt_hash_table[hash].chain;
1209 rt->u.dst.rt_next = rthi->u.dst.rt_next;
1210 else
1211 rt->u.dst.rt_next = rt_hash_table[hash].chain;
1212 1195
1213#if RT_CACHE_DEBUG >= 2 1196#if RT_CACHE_DEBUG >= 2
1214 if (rt->u.dst.rt_next) { 1197 if (rt->u.dst.rt_next) {
@@ -1224,10 +1207,7 @@ restart:
1224 * previous writes to rt are comitted to memory 1207 * previous writes to rt are comitted to memory
1225 * before making rt visible to other CPUS. 1208 * before making rt visible to other CPUS.
1226 */ 1209 */
1227 if (rthi) 1210 rcu_assign_pointer(rt_hash_table[hash].chain, rt);
1228 rcu_assign_pointer(rthi->u.dst.rt_next, rt);
1229 else
1230 rcu_assign_pointer(rt_hash_table[hash].chain, rt);
1231 1211
1232 spin_unlock_bh(rt_hash_lock_addr(hash)); 1212 spin_unlock_bh(rt_hash_lock_addr(hash));
1233 *rp = rt; 1213 *rp = rt;
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a453aac91bd3..c6743eec9b7d 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
158} 158}
159EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); 159EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
160 160
161static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
162{
163 return min(tp->snd_ssthresh, tp->snd_cwnd-1);
164}
165
161static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) 166static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
162{ 167{
163 struct tcp_sock *tp = tcp_sk(sk); 168 struct tcp_sock *tp = tcp_sk(sk);
@@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
221 */ 226 */
222 diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; 227 diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
223 228
224 if (diff > gamma && tp->snd_ssthresh > 2 ) { 229 if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
225 /* Going too fast. Time to slow down 230 /* Going too fast. Time to slow down
226 * and switch to congestion avoidance. 231 * and switch to congestion avoidance.
227 */ 232 */
228 tp->snd_ssthresh = 2;
229 233
230 /* Set cwnd to match the actual rate 234 /* Set cwnd to match the actual rate
231 * exactly: 235 * exactly:
@@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
235 * utilization. 239 * utilization.
236 */ 240 */
237 tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); 241 tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
242 tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
238 243
239 } else if (tp->snd_cwnd <= tp->snd_ssthresh) { 244 } else if (tp->snd_cwnd <= tp->snd_ssthresh) {
240 /* Slow start. */ 245 /* Slow start. */
@@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
250 * we slow down. 255 * we slow down.
251 */ 256 */
252 tp->snd_cwnd--; 257 tp->snd_cwnd--;
258 tp->snd_ssthresh
259 = tcp_vegas_ssthresh(tp);
253 } else if (diff < alpha) { 260 } else if (diff < alpha) {
254 /* We don't have enough extra packets 261 /* We don't have enough extra packets
255 * in the network, so speed up. 262 * in the network, so speed up.
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1394ddb6e35c..032a5ec391c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -137,6 +137,7 @@ static struct rt6_info ip6_null_entry_template = {
137 } 137 }
138 }, 138 },
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
140 .rt6i_protocol = RTPROT_KERNEL,
140 .rt6i_metric = ~(u32) 0, 141 .rt6i_metric = ~(u32) 0,
141 .rt6i_ref = ATOMIC_INIT(1), 142 .rt6i_ref = ATOMIC_INIT(1),
142}; 143};
@@ -159,6 +160,7 @@ static struct rt6_info ip6_prohibit_entry_template = {
159 } 160 }
160 }, 161 },
161 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
163 .rt6i_protocol = RTPROT_KERNEL,
162 .rt6i_metric = ~(u32) 0, 164 .rt6i_metric = ~(u32) 0,
163 .rt6i_ref = ATOMIC_INIT(1), 165 .rt6i_ref = ATOMIC_INIT(1),
164}; 166};
@@ -176,6 +178,7 @@ static struct rt6_info ip6_blk_hole_entry_template = {
176 } 178 }
177 }, 179 },
178 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_protocol = RTPROT_KERNEL,
179 .rt6i_metric = ~(u32) 0, 182 .rt6i_metric = ~(u32) 0,
180 .rt6i_ref = ATOMIC_INIT(1), 183 .rt6i_ref = ATOMIC_INIT(1),
181}; 184};
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 8e757dd53396..aee0d6bea309 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -22,6 +22,7 @@
22#include <linux/netfilter/nfnetlink_conntrack.h> 22#include <linux/netfilter/nfnetlink_conntrack.h>
23#include <net/netfilter/nf_conntrack.h> 23#include <net/netfilter/nf_conntrack.h>
24#include <net/netfilter/nf_conntrack_l4proto.h> 24#include <net/netfilter/nf_conntrack_l4proto.h>
25#include <net/netfilter/nf_conntrack_ecache.h>
25#include <net/netfilter/nf_log.h> 26#include <net/netfilter/nf_log.h>
26 27
27static DEFINE_RWLOCK(dccp_lock); 28static DEFINE_RWLOCK(dccp_lock);
@@ -553,6 +554,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
553 ct->proto.dccp.state = new_state; 554 ct->proto.dccp.state = new_state;
554 write_unlock_bh(&dccp_lock); 555 write_unlock_bh(&dccp_lock);
555 556
557 if (new_state != old_state)
558 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
559
556 dn = dccp_pernet(net); 560 dn = dccp_pernet(net);
557 nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); 561 nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]);
558 562
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b5ccf2b4b2e7..97a6e93d742e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -634,6 +634,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
634 sender->td_end = end; 634 sender->td_end = end;
635 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; 635 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
636 } 636 }
637 if (tcph->ack) {
638 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
639 sender->td_maxack = ack;
640 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
641 } else if (after(ack, sender->td_maxack))
642 sender->td_maxack = ack;
643 }
644
637 /* 645 /*
638 * Update receiver data. 646 * Update receiver data.
639 */ 647 */
@@ -919,6 +927,16 @@ static int tcp_packet(struct nf_conn *ct,
919 return -NF_ACCEPT; 927 return -NF_ACCEPT;
920 case TCP_CONNTRACK_CLOSE: 928 case TCP_CONNTRACK_CLOSE:
921 if (index == TCP_RST_SET 929 if (index == TCP_RST_SET
930 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
931 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
932 /* Invalid RST */
933 write_unlock_bh(&tcp_lock);
934 if (LOG_INVALID(net, IPPROTO_TCP))
935 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
936 "nf_ct_tcp: invalid RST ");
937 return -NF_ACCEPT;
938 }
939 if (index == TCP_RST_SET
922 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) 940 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
923 && ct->proto.tcp.last_index == TCP_SYN_SET) 941 && ct->proto.tcp.last_index == TCP_SYN_SET)
924 || (!test_bit(IPS_ASSURED_BIT, &ct->status) 942 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fd326ac27ec8..66a6dd5c519a 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -581,6 +581,12 @@ nfulnl_log_packet(u_int8_t pf,
581 + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) 581 + nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
582 + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); 582 + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
583 583
584 if (in && skb_mac_header_was_set(skb)) {
585 size += nla_total_size(skb->dev->hard_header_len)
586 + nla_total_size(sizeof(u_int16_t)) /* hwtype */
587 + nla_total_size(sizeof(u_int16_t)); /* hwlen */
588 }
589
584 spin_lock_bh(&inst->lock); 590 spin_lock_bh(&inst->lock);
585 591
586 if (inst->flags & NFULNL_CFG_F_SEQ) 592 if (inst->flags & NFULNL_CFG_F_SEQ)
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index a5b5369c30f9..219dcdbe388c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -926,7 +926,7 @@ static int dl_seq_show(struct seq_file *s, void *v)
926 if (!hlist_empty(&htable->hash[*bucket])) { 926 if (!hlist_empty(&htable->hash[*bucket])) {
927 hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) 927 hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node)
928 if (dl_seq_real_show(ent, htable->family, s)) 928 if (dl_seq_real_show(ent, htable->family, s))
929 return 1; 929 return -1;
930 } 930 }
931 return 0; 931 return 0;
932} 932}
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 0f1218b8d289..67e38a056240 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -343,9 +343,9 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
343 /* not yet present - create a candidate for a new connection 343 /* not yet present - create a candidate for a new connection
344 * and then redo the check */ 344 * and then redo the check */
345 conn = rxrpc_alloc_connection(gfp); 345 conn = rxrpc_alloc_connection(gfp);
346 if (IS_ERR(conn)) { 346 if (!conn) {
347 _leave(" = %ld", PTR_ERR(conn)); 347 _leave(" = -ENOMEM");
348 return PTR_ERR(conn); 348 return -ENOMEM;
349 } 349 }
350 350
351 conn->trans = trans; 351 conn->trans = trans;
@@ -508,9 +508,9 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
508 /* not yet present - create a candidate for a new connection and then 508 /* not yet present - create a candidate for a new connection and then
509 * redo the check */ 509 * redo the check */
510 candidate = rxrpc_alloc_connection(gfp); 510 candidate = rxrpc_alloc_connection(gfp);
511 if (IS_ERR(candidate)) { 511 if (!candidate) {
512 _leave(" = %ld", PTR_ERR(candidate)); 512 _leave(" = -ENOMEM");
513 return PTR_ERR(candidate); 513 return -ENOMEM;
514 } 514 }
515 515
516 candidate->trans = trans; 516 candidate->trans = trans;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0759f32e9dca..09cdcdfe7e91 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -135,6 +135,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
135 unsigned long cl; 135 unsigned long cl;
136 unsigned long fh; 136 unsigned long fh;
137 int err; 137 int err;
138 int tp_created = 0;
138 139
139 if (net != &init_net) 140 if (net != &init_net)
140 return -EINVAL; 141 return -EINVAL;
@@ -266,10 +267,7 @@ replay:
266 goto errout; 267 goto errout;
267 } 268 }
268 269
269 spin_lock_bh(root_lock); 270 tp_created = 1;
270 tp->next = *back;
271 *back = tp;
272 spin_unlock_bh(root_lock);
273 271
274 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) 272 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
275 goto errout; 273 goto errout;
@@ -296,8 +294,11 @@ replay:
296 switch (n->nlmsg_type) { 294 switch (n->nlmsg_type) {
297 case RTM_NEWTFILTER: 295 case RTM_NEWTFILTER:
298 err = -EEXIST; 296 err = -EEXIST;
299 if (n->nlmsg_flags & NLM_F_EXCL) 297 if (n->nlmsg_flags & NLM_F_EXCL) {
298 if (tp_created)
299 tcf_destroy(tp);
300 goto errout; 300 goto errout;
301 }
301 break; 302 break;
302 case RTM_DELTFILTER: 303 case RTM_DELTFILTER:
303 err = tp->ops->delete(tp, fh); 304 err = tp->ops->delete(tp, fh);
@@ -314,8 +315,18 @@ replay:
314 } 315 }
315 316
316 err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh); 317 err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh);
317 if (err == 0) 318 if (err == 0) {
319 if (tp_created) {
320 spin_lock_bh(root_lock);
321 tp->next = *back;
322 *back = tp;
323 spin_unlock_bh(root_lock);
324 }
318 tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); 325 tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
326 } else {
327 if (tp_created)
328 tcf_destroy(tp);
329 }
319 330
320errout: 331errout:
321 if (cl) 332 if (cl)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 91a3db4a76f8..e5becb92b3e7 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -104,8 +104,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
104 struct tcf_result *res) 104 struct tcf_result *res)
105{ 105{
106 struct cls_cgroup_head *head = tp->root; 106 struct cls_cgroup_head *head = tp->root;
107 struct cgroup_cls_state *cs; 107 u32 classid;
108 int ret = 0;
109 108
110 /* 109 /*
111 * Due to the nature of the classifier it is required to ignore all 110 * Due to the nature of the classifier it is required to ignore all
@@ -121,17 +120,18 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
121 return -1; 120 return -1;
122 121
123 rcu_read_lock(); 122 rcu_read_lock();
124 cs = task_cls_state(current); 123 classid = task_cls_state(current)->classid;
125 if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) {
126 res->classid = cs->classid;
127 res->class = 0;
128 ret = tcf_exts_exec(skb, &head->exts, res);
129 } else
130 ret = -1;
131
132 rcu_read_unlock(); 124 rcu_read_unlock();
133 125
134 return ret; 126 if (!classid)
127 return -1;
128
129 if (!tcf_em_tree_match(skb, &head->ematches, NULL))
130 return -1;
131
132 res->classid = classid;
133 res->class = 0;
134 return tcf_exts_exec(skb, &head->exts, res);
135} 135}
136 136
137static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) 137static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
@@ -167,6 +167,9 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
167 struct tcf_exts e; 167 struct tcf_exts e;
168 int err; 168 int err;
169 169
170 if (!tca[TCA_OPTIONS])
171 return -EINVAL;
172
170 if (head == NULL) { 173 if (head == NULL) {
171 if (!handle) 174 if (!handle)
172 return -EINVAL; 175 return -EINVAL;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index af3198814c15..9d504234af4a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -345,6 +345,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
345 lock_sock(sock->sk); 345 lock_sock(sock->sk);
346 sock->sk->sk_sndbuf = snd * 2; 346 sock->sk->sk_sndbuf = snd * 2;
347 sock->sk->sk_rcvbuf = rcv * 2; 347 sock->sk->sk_rcvbuf = rcv * 2;
348 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
348 release_sock(sock->sk); 349 release_sock(sock->sk);
349#endif 350#endif
350} 351}
@@ -796,6 +797,23 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
796 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), 797 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
797 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); 798 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
798 799
800 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
801 /* sndbuf needs to have room for one request
802 * per thread, otherwise we can stall even when the
803 * network isn't a bottleneck.
804 *
805 * We count all threads rather than threads in a
806 * particular pool, which provides an upper bound
807 * on the number of threads which will access the socket.
808 *
809 * rcvbuf just needs to be able to hold a few requests.
810 * Normally they will be removed from the queue
811 * as soon a a complete request arrives.
812 */
813 svc_sock_setbufsize(svsk->sk_sock,
814 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
815 3 * serv->sv_max_mesg);
816
799 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 817 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
800 818
801 /* Receive data. If we haven't got the record length yet, get 819 /* Receive data. If we haven't got the record length yet, get
@@ -1043,6 +1061,15 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1043 1061
1044 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1062 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1045 1063
1064 /* initialise setting must have enough space to
1065 * receive and respond to one request.
1066 * svc_tcp_recvfrom will re-adjust if necessary
1067 */
1068 svc_sock_setbufsize(svsk->sk_sock,
1069 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
1070 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
1071
1072 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1046 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1073 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1047 if (sk->sk_state != TCP_ESTABLISHED) 1074 if (sk->sk_state != TCP_ESTABLISHED)
1048 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1075 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1112,14 +1139,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1112 /* Initialize the socket */ 1139 /* Initialize the socket */
1113 if (sock->type == SOCK_DGRAM) 1140 if (sock->type == SOCK_DGRAM)
1114 svc_udp_init(svsk, serv); 1141 svc_udp_init(svsk, serv);
1115 else { 1142 else
1116 /* initialise setting must have enough space to
1117 * receive and respond to one request.
1118 */
1119 svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
1120 4 * serv->sv_max_mesg);
1121 svc_tcp_init(svsk, serv); 1143 svc_tcp_init(svsk, serv);
1122 }
1123 1144
1124 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1145 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1125 svsk, svsk->sk_sk); 1146 svsk, svsk->sk_sk);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 8b510c5e8777..f11be72a1a80 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -128,7 +128,8 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
128 page_bytes -= sge_bytes; 128 page_bytes -= sge_bytes;
129 129
130 frmr->page_list->page_list[page_no] = 130 frmr->page_list->page_list[page_no] =
131 ib_dma_map_page(xprt->sc_cm_id->device, page, 0, 131 ib_dma_map_single(xprt->sc_cm_id->device,
132 page_address(page),
132 PAGE_SIZE, DMA_TO_DEVICE); 133 PAGE_SIZE, DMA_TO_DEVICE);
133 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 134 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
134 frmr->page_list->page_list[page_no])) 135 frmr->page_list->page_list[page_no]))
@@ -532,18 +533,17 @@ static int send_reply(struct svcxprt_rdma *rdma,
532 clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); 533 clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
533 534
534 /* Prepare the SGE for the RPCRDMA Header */ 535 /* Prepare the SGE for the RPCRDMA Header */
536 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
537 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
535 ctxt->sge[0].addr = 538 ctxt->sge[0].addr =
536 ib_dma_map_page(rdma->sc_cm_id->device, 539 ib_dma_map_single(rdma->sc_cm_id->device, page_address(page),
537 page, 0, PAGE_SIZE, DMA_TO_DEVICE); 540 ctxt->sge[0].length, DMA_TO_DEVICE);
538 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) 541 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
539 goto err; 542 goto err;
540 atomic_inc(&rdma->sc_dma_used); 543 atomic_inc(&rdma->sc_dma_used);
541 544
542 ctxt->direction = DMA_TO_DEVICE; 545 ctxt->direction = DMA_TO_DEVICE;
543 546
544 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
545 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
546
547 /* Determine how many of our SGE are to be transmitted */ 547 /* Determine how many of our SGE are to be transmitted */
548 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 548 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
549 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 549 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4b0c2fa15e0b..5151f9f6c573 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -500,8 +500,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
500 BUG_ON(sge_no >= xprt->sc_max_sge); 500 BUG_ON(sge_no >= xprt->sc_max_sge);
501 page = svc_rdma_get_page(); 501 page = svc_rdma_get_page();
502 ctxt->pages[sge_no] = page; 502 ctxt->pages[sge_no] = page;
503 pa = ib_dma_map_page(xprt->sc_cm_id->device, 503 pa = ib_dma_map_single(xprt->sc_cm_id->device,
504 page, 0, PAGE_SIZE, 504 page_address(page), PAGE_SIZE,
505 DMA_FROM_DEVICE); 505 DMA_FROM_DEVICE);
506 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) 506 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
507 goto err_put_ctxt; 507 goto err_put_ctxt;
@@ -1315,8 +1315,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1315 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1315 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1316 1316
1317 /* Prepare SGE for local address */ 1317 /* Prepare SGE for local address */
1318 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, 1318 sge.addr = ib_dma_map_single(xprt->sc_cm_id->device,
1319 p, 0, PAGE_SIZE, DMA_FROM_DEVICE); 1319 page_address(p), PAGE_SIZE, DMA_FROM_DEVICE);
1320 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { 1320 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
1321 put_page(p); 1321 put_page(p);
1322 return; 1322 return;
@@ -1343,7 +1343,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1343 if (ret) { 1343 if (ret) {
1344 dprintk("svcrdma: Error %d posting send for protocol error\n", 1344 dprintk("svcrdma: Error %d posting send for protocol error\n",
1345 ret); 1345 ret);
1346 ib_dma_unmap_page(xprt->sc_cm_id->device, 1346 ib_dma_unmap_single(xprt->sc_cm_id->device,
1347 sge.addr, PAGE_SIZE, 1347 sge.addr, PAGE_SIZE,
1348 DMA_FROM_DEVICE); 1348 DMA_FROM_DEVICE);
1349 svc_rdma_put_context(ctxt, 1); 1349 svc_rdma_put_context(ctxt, 1);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3b21e0cc5e69..465aafc2007f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1495,7 +1495,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1495 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 1495 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1496 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; 1496 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1497 frmr_wr.wr.fast_reg.access_flags = (writing ? 1497 frmr_wr.wr.fast_reg.access_flags = (writing ?
1498 IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); 1498 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1499 IB_ACCESS_REMOTE_READ);
1499 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1500 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1500 DECR_CQCOUNT(&r_xprt->rx_ep); 1501 DECR_CQCOUNT(&r_xprt->rx_ep);
1501 1502
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 08265ca15785..487cb627ddba 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1551,6 +1551,13 @@ static int regulatory_hint_core(const char *alpha2)
1551 1551
1552 queue_regulatory_request(request); 1552 queue_regulatory_request(request);
1553 1553
1554 /*
1555 * This ensures last_request is populated once modules
1556 * come swinging in and calling regulatory hints and
1557 * wiphy_apply_custom_regulatory().
1558 */
1559 flush_scheduled_work();
1560
1554 return 0; 1561 return 0;
1555} 1562}
1556 1563
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index cb6a5bb85d80..0e59f9ae9b81 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -786,6 +786,13 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
786 err = -EFAULT; 786 err = -EFAULT;
787 goto out; 787 goto out;
788 } 788 }
789
790 if (cmd == SIOCSIWENCODEEXT) {
791 struct iw_encode_ext *ee = (void *) extra;
792
793 if (iwp->length < sizeof(*ee) + ee->key_len)
794 return -EFAULT;
795 }
789 } 796 }
790 797
791 err = handler(dev, info, (union iwreq_data *) iwp, extra); 798 err = handler(dev, info, (union iwreq_data *) iwp, extra);
diff --git a/samples/Kconfig b/samples/Kconfig
index 4b02f5a0e656..b75d28cba3f7 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -19,6 +19,12 @@ config SAMPLE_TRACEPOINTS
19 help 19 help
20 This build tracepoints example modules. 20 This build tracepoints example modules.
21 21
22config SAMPLE_TRACE_EVENTS
23 tristate "Build trace_events examples -- loadable modules only"
24 depends on EVENT_TRACING && m
25 help
26 This build trace event example modules.
27
22config SAMPLE_KOBJECT 28config SAMPLE_KOBJECT
23 tristate "Build kobject examples" 29 tristate "Build kobject examples"
24 help 30 help
diff --git a/samples/Makefile b/samples/Makefile
index 10eaca89fe17..13e4b470b539 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
1# Makefile for Linux samples code 1# Makefile for Linux samples code
2 2
3obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ 3obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/
diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
new file mode 100644
index 000000000000..0d428dc67283
--- /dev/null
+++ b/samples/trace_events/Makefile
@@ -0,0 +1,6 @@
1# builds the trace events example kernel modules;
2# then to use one (as root): insmod <module_name.ko>
3
4CFLAGS_trace-events-sample.o := -I$(src)
5
6obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
new file mode 100644
index 000000000000..aabc4e970911
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.c
@@ -0,0 +1,52 @@
1#include <linux/module.h>
2#include <linux/kthread.h>
3
4/*
5 * Any file that uses trace points, must include the header.
6 * But only one file, must include the header by defining
7 * CREATE_TRACE_POINTS first. This will make the C code that
8 * creates the handles for the trace points.
9 */
10#define CREATE_TRACE_POINTS
11#include "trace-events-sample.h"
12
13
14static void simple_thread_func(int cnt)
15{
16 set_current_state(TASK_INTERRUPTIBLE);
17 schedule_timeout(HZ);
18 trace_foo_bar("hello", cnt);
19}
20
21static int simple_thread(void *arg)
22{
23 int cnt = 0;
24
25 while (!kthread_should_stop())
26 simple_thread_func(cnt++);
27
28 return 0;
29}
30
31static struct task_struct *simple_tsk;
32
33static int __init trace_event_init(void)
34{
35 simple_tsk = kthread_run(simple_thread, NULL, "event-sample");
36 if (IS_ERR(simple_tsk))
37 return -1;
38
39 return 0;
40}
41
42static void __exit trace_event_exit(void)
43{
44 kthread_stop(simple_tsk);
45}
46
47module_init(trace_event_init);
48module_exit(trace_event_exit);
49
50MODULE_AUTHOR("Steven Rostedt");
51MODULE_DESCRIPTION("trace-events-sample");
52MODULE_LICENSE("GPL");
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
new file mode 100644
index 000000000000..128a897687c5
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.h
@@ -0,0 +1,129 @@
1/*
2 * Notice that this file is not protected like a normal header.
3 * We also must allow for rereading of this file. The
4 *
5 * || defined(TRACE_HEADER_MULTI_READ)
6 *
7 * serves this purpose.
8 */
9#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ)
10#define _TRACE_EVENT_SAMPLE_H
11
12/*
13 * All trace headers should include tracepoint.h, until we finally
14 * make it into a standard header.
15 */
16#include <linux/tracepoint.h>
17
18/*
19 * If TRACE_SYSTEM is defined, that will be the directory created
20 * in the ftrace directory under /debugfs/tracing/events/<system>
21 *
22 * The define_trace.h belowe will also look for a file name of
23 * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
24 *
25 * If you want a different system than file name, you can override
26 * the header name by defining TRACE_INCLUDE_FILE
27 *
28 * If this file was called, goofy.h, then we would define:
29 *
30 * #define TRACE_INCLUDE_FILE goofy
31 *
32 */
33#undef TRACE_SYSTEM
34#define TRACE_SYSTEM sample
35
36/*
37 * The TRACE_EVENT macro is broken up into 5 parts.
38 *
39 * name: name of the trace point. This is also how to enable the tracepoint.
40 * A function called trace_foo_bar() will be created.
41 *
42 * proto: the prototype of the function trace_foo_bar()
43 * Here it is trace_foo_bar(char *foo, int bar).
44 *
45 * args: must match the arguments in the prototype.
46 * Here it is simply "foo, bar".
47 *
48 * struct: This defines the way the data will be stored in the ring buffer.
49 * There are currently two types of elements. __field and __array.
50 * a __field is broken up into (type, name). Where type can be any
51 * type but an array.
52 * For an array. there are three fields. (type, name, size). The
53 * type of elements in the array, the name of the field and the size
54 * of the array.
55 *
56 * __array( char, foo, 10) is the same as saying char foo[10].
57 *
58 * fast_assign: This is a C like function that is used to store the items
59 * into the ring buffer.
60 *
61 * printk: This is a way to print out the data in pretty print. This is
62 * useful if the system crashes and you are logging via a serial line,
63 * the data can be printed to the console using this "printk" method.
64 *
65 * Note, that for both the assign and the printk, __entry is the handler
66 * to the data structure in the ring buffer, and is defined by the
67 * TP_STRUCT__entry.
68 */
69TRACE_EVENT(foo_bar,
70
71 TP_PROTO(char *foo, int bar),
72
73 TP_ARGS(foo, bar),
74
75 TP_STRUCT__entry(
76 __array( char, foo, 10 )
77 __field( int, bar )
78 ),
79
80 TP_fast_assign(
81 strncpy(__entry->foo, foo, 10);
82 __entry->bar = bar;
83 ),
84
85 TP_printk("foo %s %d", __entry->foo, __entry->bar)
86);
87#endif
88
89/***** NOTICE! The #if protection ends here. *****/
90
91
92/*
93 * There are several ways I could have done this. If I left out the
94 * TRACE_INCLUDE_PATH, then it would default to the kernel source
95 * include/trace/events directory.
96 *
97 * I could specify a path from the define_trace.h file back to this
98 * file.
99 *
100 * #define TRACE_INCLUDE_PATH ../../samples/trace_events
101 *
102 * But I chose to simply make it use the current directory and then in
103 * the Makefile I added:
104 *
105 * CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/
106 *
107 * This will make sure the current path is part of the include
108 * structure for our file so that we can find it.
109 *
110 * I could have made only the top level directory the include:
111 *
112 * CFLAGS_trace-events-sample.o := -I$(PWD)
113 *
114 * And then let the path to this directory be the TRACE_INCLUDE_PATH:
115 *
116 * #define TRACE_INCLUDE_PATH samples/trace_events
117 *
118 * But then if something defines "samples" or "trace_events" then we
119 * could risk that being converted too, and give us an unexpected
120 * result.
121 */
122#undef TRACE_INCLUDE_PATH
123#undef TRACE_INCLUDE_FILE
124#define TRACE_INCLUDE_PATH .
125/*
126 * TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal
127 */
128#define TRACE_INCLUDE_FILE trace-events-sample
129#include <trace/define_trace.h>
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index cba61ca403ca..2b706617c89a 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -188,20 +188,34 @@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@
188# --------------------------------------------------------------------------- 188# ---------------------------------------------------------------------------
189 189
190quiet_cmd_gzip = GZIP $@ 190quiet_cmd_gzip = GZIP $@
191cmd_gzip = gzip -f -9 < $< > $@ 191cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -f -9 > $@) || \
192 (rm -f $@ ; false)
192 193
193 194
194# Bzip2 195# Bzip2
195# --------------------------------------------------------------------------- 196# ---------------------------------------------------------------------------
196 197
197# Bzip2 does not include size in file... so we have to fake that 198# Bzip2 and LZMA do not include size in file... so we have to fake that;
198size_append=$(CONFIG_SHELL) $(srctree)/scripts/bin_size 199# append the size as a 32-bit littleendian number as gzip does.
199 200size_append = echo -ne $(shell \
200quiet_cmd_bzip2 = BZIP2 $@ 201dec_size=0; \
201cmd_bzip2 = (bzip2 -9 < $< && $(size_append) $<) > $@ || (rm -f $@ ; false) 202for F in $1; do \
203 fsize=$$(stat -c "%s" $$F); \
204 dec_size=$$(expr $$dec_size + $$fsize); \
205done; \
206printf "%08x" $$dec_size | \
207 sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g' \
208)
209
210quiet_cmd_bzip2 = BZIP2 $@
211cmd_bzip2 = (cat $(filter-out FORCE,$^) | \
212 bzip2 -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
213 (rm -f $@ ; false)
202 214
203# Lzma 215# Lzma
204# --------------------------------------------------------------------------- 216# ---------------------------------------------------------------------------
205 217
206quiet_cmd_lzma = LZMA $@ 218quiet_cmd_lzma = LZMA $@
207cmd_lzma = (lzma -9 -c $< && $(size_append) $<) >$@ || (rm -f $@ ; false) 219cmd_lzma = (cat $(filter-out FORCE,$^) | \
220 lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
221 (rm -f $@ ; false)
diff --git a/scripts/bin_size b/scripts/bin_size
deleted file mode 100644
index 43e1b360cee6..000000000000
--- a/scripts/bin_size
+++ /dev/null
@@ -1,10 +0,0 @@
1#!/bin/sh
2
3if [ $# = 0 ] ; then
4 echo Usage: $0 file
5fi
6
7size_dec=`stat -c "%s" $1`
8size_hex_echo_string=`printf "%08x" $size_dec |
9 sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g'`
10/bin/echo -ne $size_hex_echo_string
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 3208a3a7e7fe..acd8c4a8e3e0 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1828,6 +1828,25 @@ sub reset_state {
1828 $state = 0; 1828 $state = 0;
1829} 1829}
1830 1830
1831sub tracepoint_munge($) {
1832 my $file = shift;
1833 my $tracepointname = 0;
1834 my $tracepointargs = 0;
1835
1836 if($prototype =~ m/TRACE_EVENT\((.*?),/) {
1837 $tracepointname = $1;
1838 }
1839 if($prototype =~ m/TP_PROTO\((.*?)\)/) {
1840 $tracepointargs = $1;
1841 }
1842 if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
1843 print STDERR "Warning(${file}:$.): Unrecognized tracepoint format: \n".
1844 "$prototype\n";
1845 } else {
1846 $prototype = "static inline void trace_$tracepointname($tracepointargs)";
1847 }
1848}
1849
1831sub syscall_munge() { 1850sub syscall_munge() {
1832 my $void = 0; 1851 my $void = 0;
1833 1852
@@ -1882,6 +1901,9 @@ sub process_state3_function($$) {
1882 if ($prototype =~ /SYSCALL_DEFINE/) { 1901 if ($prototype =~ /SYSCALL_DEFINE/) {
1883 syscall_munge(); 1902 syscall_munge();
1884 } 1903 }
1904 if ($prototype =~ /TRACE_EVENT/) {
1905 tracepoint_munge($file);
1906 }
1885 dump_function($prototype, $file); 1907 dump_function($prototype, $file);
1886 reset_state(); 1908 reset_state();
1887 } 1909 }
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 409596eca124..0fae7da0529c 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -26,7 +26,7 @@
26# which will also be the location of that section after final link. 26# which will also be the location of that section after final link.
27# e.g. 27# e.g.
28# 28#
29# .section ".text.sched" 29# .section ".sched.text", "ax"
30# .globl my_func 30# .globl my_func
31# my_func: 31# my_func:
32# [...] 32# [...]
@@ -39,7 +39,7 @@
39# [...] 39# [...]
40# 40#
41# Both relocation offsets for the mcounts in the above example will be 41# Both relocation offsets for the mcounts in the above example will be
42# offset from .text.sched. If we make another file called tmp.s with: 42# offset from .sched.text. If we make another file called tmp.s with:
43# 43#
44# .section __mcount_loc 44# .section __mcount_loc
45# .quad my_func + 0x5 45# .quad my_func + 0x5
@@ -51,7 +51,7 @@
51# But this gets hard if my_func is not globl (a static function). 51# But this gets hard if my_func is not globl (a static function).
52# In such a case we have: 52# In such a case we have:
53# 53#
54# .section ".text.sched" 54# .section ".sched.text", "ax"
55# my_func: 55# my_func:
56# [...] 56# [...]
57# call mcount (offset: 0x5) 57# call mcount (offset: 0x5)
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index ffbe259700b1..510186f0b72e 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -84,8 +84,8 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
84 * against concurrent list-extension 84 * against concurrent list-extension
85 */ 85 */
86 rcu_read_lock(); 86 rcu_read_lock();
87 qe = list_entry(rcu_dereference(qe->later.next), 87 qe = list_entry_rcu(qe->later.next,
88 struct ima_queue_entry, later); 88 struct ima_queue_entry, later);
89 rcu_read_unlock(); 89 rcu_read_unlock();
90 (*pos)++; 90 (*pos)++;
91 91
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index e03a7e19c73b..11d2cb19d7a6 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -734,8 +734,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new)
734 return; 734 return;
735 } 735 }
736 736
737 m = list_entry(rcu_dereference(smk_netlbladdr_list.next), 737 m = list_entry_rcu(smk_netlbladdr_list.next,
738 struct smk_netlbladdr, list); 738 struct smk_netlbladdr, list);
739 739
740 /* the comparison '>' is a bit hacky, but works */ 740 /* the comparison '>' is a bit hacky, but works */
741 if (new->smk_mask.s_addr > m->smk_mask.s_addr) { 741 if (new->smk_mask.s_addr > m->smk_mask.s_addr) {
@@ -748,8 +748,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new)
748 list_add_rcu(&new->list, &m->list); 748 list_add_rcu(&new->list, &m->list);
749 return; 749 return;
750 } 750 }
751 m_next = list_entry(rcu_dereference(m->list.next), 751 m_next = list_entry_rcu(m->list.next,
752 struct smk_netlbladdr, list); 752 struct smk_netlbladdr, list);
753 if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) { 753 if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) {
754 list_add_rcu(&new->list, &m->list); 754 list_add_rcu(&new->list, &m->list);
755 return; 755 return;
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 5b481912752a..e42be5c4f055 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -27,6 +27,12 @@ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
27 27
28static int tomoyo_bprm_set_creds(struct linux_binprm *bprm) 28static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
29{ 29{
30 int rc;
31
32 rc = cap_bprm_set_creds(bprm);
33 if (rc)
34 return rc;
35
30 /* 36 /*
31 * Do only if this function is called for the first time of an execve 37 * Do only if this function is called for the first time of an execve
32 * operation. 38 * operation.
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index a2a792c18c40..d659995ac3ac 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -249,6 +249,11 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream)
249 new_hw_ptr = hw_base + pos; 249 new_hw_ptr = hw_base + pos;
250 } 250 }
251 } 251 }
252
253 /* Do jiffies check only in xrun_debug mode */
254 if (!xrun_debug(substream))
255 goto no_jiffies_check;
256
252 /* Skip the jiffies check for hardwares with BATCH flag. 257 /* Skip the jiffies check for hardwares with BATCH flag.
253 * Such hardware usually just increases the position at each IRQ, 258 * Such hardware usually just increases the position at each IRQ,
254 * thus it can't give any strange position. 259 * thus it can't give any strange position.
@@ -336,7 +341,9 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
336 hw_base = 0; 341 hw_base = 0;
337 new_hw_ptr = hw_base + pos; 342 new_hw_ptr = hw_base + pos;
338 } 343 }
339 if (((delta * HZ) / runtime->rate) > jdelta + HZ/100) { 344 /* Do jiffies check only in xrun_debug mode */
345 if (xrun_debug(substream) &&
346 ((delta * HZ) / runtime->rate) > jdelta + HZ/100) {
340 hw_ptr_error(substream, 347 hw_ptr_error(substream,
341 "hw_ptr skipping! " 348 "hw_ptr skipping! "
342 "(pos=%ld, delta=%ld, period=%ld, jdelta=%lu/%lu)\n", 349 "(pos=%ld, delta=%ld, period=%ld, jdelta=%lu/%lu)\n",
@@ -1478,7 +1485,6 @@ static int snd_pcm_lib_ioctl_reset(struct snd_pcm_substream *substream,
1478 runtime->status->hw_ptr %= runtime->buffer_size; 1485 runtime->status->hw_ptr %= runtime->buffer_size;
1479 else 1486 else
1480 runtime->status->hw_ptr = 0; 1487 runtime->status->hw_ptr = 0;
1481 runtime->hw_ptr_jiffies = jiffies;
1482 snd_pcm_stream_unlock_irqrestore(substream, flags); 1488 snd_pcm_stream_unlock_irqrestore(substream, flags);
1483 return 0; 1489 return 0;
1484} 1490}
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index fc6f98e257df..b5da656d1ece 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -848,6 +848,7 @@ static void snd_pcm_post_start(struct snd_pcm_substream *substream, int state)
848{ 848{
849 struct snd_pcm_runtime *runtime = substream->runtime; 849 struct snd_pcm_runtime *runtime = substream->runtime;
850 snd_pcm_trigger_tstamp(substream); 850 snd_pcm_trigger_tstamp(substream);
851 runtime->hw_ptr_jiffies = jiffies;
851 runtime->status->state = state; 852 runtime->status->state = state;
852 if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && 853 if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
853 runtime->silence_size > 0) 854 runtime->silence_size > 0)
@@ -961,6 +962,11 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push)
961{ 962{
962 if (substream->runtime->trigger_master != substream) 963 if (substream->runtime->trigger_master != substream)
963 return 0; 964 return 0;
965 /* The jiffies check in snd_pcm_update_hw_ptr*() is done by
966 * a delta betwen the current jiffies, this gives a large enough
967 * delta, effectively to skip the check once.
968 */
969 substream->runtime->hw_ptr_jiffies = jiffies - HZ * 1000;
964 return substream->ops->trigger(substream, 970 return substream->ops->trigger(substream,
965 push ? SNDRV_PCM_TRIGGER_PAUSE_PUSH : 971 push ? SNDRV_PCM_TRIGGER_PAUSE_PUSH :
966 SNDRV_PCM_TRIGGER_PAUSE_RELEASE); 972 SNDRV_PCM_TRIGGER_PAUSE_RELEASE);
diff --git a/sound/drivers/pcsp/pcsp_mixer.c b/sound/drivers/pcsp/pcsp_mixer.c
index 771955a9be71..199b03377142 100644
--- a/sound/drivers/pcsp/pcsp_mixer.c
+++ b/sound/drivers/pcsp/pcsp_mixer.c
@@ -51,7 +51,7 @@ static int pcsp_treble_info(struct snd_kcontrol *kcontrol,
51 if (uinfo->value.enumerated.item > chip->max_treble) 51 if (uinfo->value.enumerated.item > chip->max_treble)
52 uinfo->value.enumerated.item = chip->max_treble; 52 uinfo->value.enumerated.item = chip->max_treble;
53 sprintf(uinfo->value.enumerated.name, "%lu", 53 sprintf(uinfo->value.enumerated.name, "%lu",
54 PCSP_CALC_RATE(uinfo->value.enumerated.item)); 54 (unsigned long)PCSP_CALC_RATE(uinfo->value.enumerated.item));
55 return 0; 55 return 0;
56} 56}
57 57
diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 81bc93e5f1e3..7337abdbe4e3 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -958,10 +958,13 @@ static int patch_sigmatel_stac9708_3d(struct snd_ac97 * ac97)
958} 958}
959 959
960static const struct snd_kcontrol_new snd_ac97_sigmatel_4speaker = 960static const struct snd_kcontrol_new snd_ac97_sigmatel_4speaker =
961AC97_SINGLE("Sigmatel 4-Speaker Stereo Playback Switch", AC97_SIGMATEL_DAC2INVERT, 2, 1, 0); 961AC97_SINGLE("Sigmatel 4-Speaker Stereo Playback Switch",
962 AC97_SIGMATEL_DAC2INVERT, 2, 1, 0);
962 963
964/* "Sigmatel " removed due to excessive name length: */
963static const struct snd_kcontrol_new snd_ac97_sigmatel_phaseinvert = 965static const struct snd_kcontrol_new snd_ac97_sigmatel_phaseinvert =
964AC97_SINGLE("Sigmatel Surround Phase Inversion Playback Switch", AC97_SIGMATEL_DAC2INVERT, 3, 1, 0); 966AC97_SINGLE("Surround Phase Inversion Playback Switch",
967 AC97_SIGMATEL_DAC2INVERT, 3, 1, 0);
965 968
966static const struct snd_kcontrol_new snd_ac97_sigmatel_controls[] = { 969static const struct snd_kcontrol_new snd_ac97_sigmatel_controls[] = {
967AC97_SINGLE("Sigmatel DAC 6dB Attenuate", AC97_SIGMATEL_ANALOG, 1, 1, 0), 970AC97_SINGLE("Sigmatel DAC 6dB Attenuate", AC97_SIGMATEL_ANALOG, 1, 1, 0),
diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index ad2888705d2a..c111efe61c3c 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -800,7 +800,7 @@ int __devinit snd_ca0106_mixer(struct snd_ca0106 *emu)
800 "Capture Volume", 800 "Capture Volume",
801 "External Amplifier", 801 "External Amplifier",
802 "Sigmatel 4-Speaker Stereo Playback Switch", 802 "Sigmatel 4-Speaker Stereo Playback Switch",
803 "Sigmatel Surround Phase Inversion Playback ", 803 "Surround Phase Inversion Playback Switch",
804 NULL 804 NULL
805 }; 805 };
806 static char *ca0106_rename_ctls[] = { 806 static char *ca0106_rename_ctls[] = {
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 21e99cfa8c49..3128e1a6bc65 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2141,6 +2141,7 @@ static struct snd_pci_quirk probe_mask_list[] __devinitdata = {
2141 /* including bogus ALC268 in slot#2 that conflicts with ALC888 */ 2141 /* including bogus ALC268 in slot#2 that conflicts with ALC888 */
2142 SND_PCI_QUIRK(0x17c0, 0x4085, "Medion MD96630", 0x01), 2142 SND_PCI_QUIRK(0x17c0, 0x4085, "Medion MD96630", 0x01),
2143 /* forced codec slots */ 2143 /* forced codec slots */
2144 SND_PCI_QUIRK(0x1043, 0x1262, "ASUS W5Fm", 0x103),
2144 SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103), 2145 SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103),
2145 {} 2146 {}
2146}; 2147};
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 56ce19e68cb5..4fcbe21829ab 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -1848,6 +1848,7 @@ static const char *cxt5051_models[CXT5051_MODELS] = {
1848 1848
1849static struct snd_pci_quirk cxt5051_cfg_tbl[] = { 1849static struct snd_pci_quirk cxt5051_cfg_tbl[] = {
1850 SND_PCI_QUIRK(0x103c, 0x30cf, "HP DV6736", CXT5051_HP_DV6736), 1850 SND_PCI_QUIRK(0x103c, 0x30cf, "HP DV6736", CXT5051_HP_DV6736),
1851 SND_PCI_QUIRK(0x103c, 0x360b, "Compaq Presario CQ60", CXT5051_HP),
1851 SND_PCI_QUIRK(0x14f1, 0x0101, "Conexant Reference board", 1852 SND_PCI_QUIRK(0x14f1, 0x0101, "Conexant Reference board",
1852 CXT5051_LAPTOP), 1853 CXT5051_LAPTOP),
1853 SND_PCI_QUIRK(0x14f1, 0x5051, "HP Spartan 1.1", CXT5051_HP), 1854 SND_PCI_QUIRK(0x14f1, 0x5051, "HP Spartan 1.1", CXT5051_HP),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b8a0d3e79272..0fd258eba3a5 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -776,6 +776,12 @@ static void alc_set_input_pin(struct hda_codec *codec, hda_nid_t nid,
776 pincap = (pincap & AC_PINCAP_VREF) >> AC_PINCAP_VREF_SHIFT; 776 pincap = (pincap & AC_PINCAP_VREF) >> AC_PINCAP_VREF_SHIFT;
777 if (pincap & AC_PINCAP_VREF_80) 777 if (pincap & AC_PINCAP_VREF_80)
778 val = PIN_VREF80; 778 val = PIN_VREF80;
779 else if (pincap & AC_PINCAP_VREF_50)
780 val = PIN_VREF50;
781 else if (pincap & AC_PINCAP_VREF_100)
782 val = PIN_VREF100;
783 else if (pincap & AC_PINCAP_VREF_GRD)
784 val = PIN_VREFGRD;
779 } 785 }
780 snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, val); 786 snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, val);
781} 787}
@@ -12058,6 +12064,7 @@ static struct snd_pci_quirk alc268_cfg_tbl[] = {
12058 SND_PCI_QUIRK(0x1028, 0x0253, "Dell OEM", ALC268_DELL), 12064 SND_PCI_QUIRK(0x1028, 0x0253, "Dell OEM", ALC268_DELL),
12059 SND_PCI_QUIRK(0x1028, 0x02b0, "Dell Inspiron Mini9", ALC268_DELL), 12065 SND_PCI_QUIRK(0x1028, 0x02b0, "Dell Inspiron Mini9", ALC268_DELL),
12060 SND_PCI_QUIRK(0x103c, 0x30cc, "TOSHIBA", ALC268_TOSHIBA), 12066 SND_PCI_QUIRK(0x103c, 0x30cc, "TOSHIBA", ALC268_TOSHIBA),
12067 SND_PCI_QUIRK(0x103c, 0x30f1, "HP TX25xx series", ALC268_TOSHIBA),
12061 SND_PCI_QUIRK(0x1043, 0x1205, "ASUS W7J", ALC268_3ST), 12068 SND_PCI_QUIRK(0x1043, 0x1205, "ASUS W7J", ALC268_3ST),
12062 SND_PCI_QUIRK(0x1179, 0xff10, "TOSHIBA A205", ALC268_TOSHIBA), 12069 SND_PCI_QUIRK(0x1179, 0xff10, "TOSHIBA A205", ALC268_TOSHIBA),
12063 SND_PCI_QUIRK(0x1179, 0xff50, "TOSHIBA A305", ALC268_TOSHIBA), 12070 SND_PCI_QUIRK(0x1179, 0xff50, "TOSHIBA A305", ALC268_TOSHIBA),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 03b3646018a1..d2fd8ef6aef8 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -150,6 +150,7 @@ enum {
150 STAC_D965_REF, 150 STAC_D965_REF,
151 STAC_D965_3ST, 151 STAC_D965_3ST,
152 STAC_D965_5ST, 152 STAC_D965_5ST,
153 STAC_D965_5ST_NO_FP,
153 STAC_DELL_3ST, 154 STAC_DELL_3ST,
154 STAC_DELL_BIOS, 155 STAC_DELL_BIOS,
155 STAC_927X_MODELS 156 STAC_927X_MODELS
@@ -2154,6 +2155,13 @@ static unsigned int d965_5st_pin_configs[14] = {
2154 0x40000100, 0x40000100 2155 0x40000100, 0x40000100
2155}; 2156};
2156 2157
2158static unsigned int d965_5st_no_fp_pin_configs[14] = {
2159 0x40000100, 0x40000100, 0x0181304e, 0x01014010,
2160 0x01a19040, 0x01011012, 0x01016011, 0x40000100,
2161 0x40000100, 0x40000100, 0x40000100, 0x01442070,
2162 0x40000100, 0x40000100
2163};
2164
2157static unsigned int dell_3st_pin_configs[14] = { 2165static unsigned int dell_3st_pin_configs[14] = {
2158 0x02211230, 0x02a11220, 0x01a19040, 0x01114210, 2166 0x02211230, 0x02a11220, 0x01a19040, 0x01114210,
2159 0x01111212, 0x01116211, 0x01813050, 0x01112214, 2167 0x01111212, 0x01116211, 0x01813050, 0x01112214,
@@ -2166,6 +2174,7 @@ static unsigned int *stac927x_brd_tbl[STAC_927X_MODELS] = {
2166 [STAC_D965_REF] = ref927x_pin_configs, 2174 [STAC_D965_REF] = ref927x_pin_configs,
2167 [STAC_D965_3ST] = d965_3st_pin_configs, 2175 [STAC_D965_3ST] = d965_3st_pin_configs,
2168 [STAC_D965_5ST] = d965_5st_pin_configs, 2176 [STAC_D965_5ST] = d965_5st_pin_configs,
2177 [STAC_D965_5ST_NO_FP] = d965_5st_no_fp_pin_configs,
2169 [STAC_DELL_3ST] = dell_3st_pin_configs, 2178 [STAC_DELL_3ST] = dell_3st_pin_configs,
2170 [STAC_DELL_BIOS] = NULL, 2179 [STAC_DELL_BIOS] = NULL,
2171}; 2180};
@@ -2176,6 +2185,7 @@ static const char *stac927x_models[STAC_927X_MODELS] = {
2176 [STAC_D965_REF] = "ref", 2185 [STAC_D965_REF] = "ref",
2177 [STAC_D965_3ST] = "3stack", 2186 [STAC_D965_3ST] = "3stack",
2178 [STAC_D965_5ST] = "5stack", 2187 [STAC_D965_5ST] = "5stack",
2188 [STAC_D965_5ST_NO_FP] = "5stack-no-fp",
2179 [STAC_DELL_3ST] = "dell-3stack", 2189 [STAC_DELL_3ST] = "dell-3stack",
2180 [STAC_DELL_BIOS] = "dell-bios", 2190 [STAC_DELL_BIOS] = "dell-bios",
2181}; 2191};
diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 823296d7d578..a6b88482637b 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -3347,7 +3347,7 @@ static int snd_usb_create_quirk(struct snd_usb_audio *chip,
3347 [QUIRK_MIDI_YAMAHA] = snd_usb_create_midi_interface, 3347 [QUIRK_MIDI_YAMAHA] = snd_usb_create_midi_interface,
3348 [QUIRK_MIDI_MIDIMAN] = snd_usb_create_midi_interface, 3348 [QUIRK_MIDI_MIDIMAN] = snd_usb_create_midi_interface,
3349 [QUIRK_MIDI_NOVATION] = snd_usb_create_midi_interface, 3349 [QUIRK_MIDI_NOVATION] = snd_usb_create_midi_interface,
3350 [QUIRK_MIDI_RAW] = snd_usb_create_midi_interface, 3350 [QUIRK_MIDI_FASTLANE] = snd_usb_create_midi_interface,
3351 [QUIRK_MIDI_EMAGIC] = snd_usb_create_midi_interface, 3351 [QUIRK_MIDI_EMAGIC] = snd_usb_create_midi_interface,
3352 [QUIRK_MIDI_CME] = snd_usb_create_midi_interface, 3352 [QUIRK_MIDI_CME] = snd_usb_create_midi_interface,
3353 [QUIRK_AUDIO_STANDARD_INTERFACE] = create_standard_audio_quirk, 3353 [QUIRK_AUDIO_STANDARD_INTERFACE] = create_standard_audio_quirk,
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index 36e4f7a29adc..8e7f78941ba6 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -153,7 +153,7 @@ enum quirk_type {
153 QUIRK_MIDI_YAMAHA, 153 QUIRK_MIDI_YAMAHA,
154 QUIRK_MIDI_MIDIMAN, 154 QUIRK_MIDI_MIDIMAN,
155 QUIRK_MIDI_NOVATION, 155 QUIRK_MIDI_NOVATION,
156 QUIRK_MIDI_RAW, 156 QUIRK_MIDI_FASTLANE,
157 QUIRK_MIDI_EMAGIC, 157 QUIRK_MIDI_EMAGIC,
158 QUIRK_MIDI_CME, 158 QUIRK_MIDI_CME,
159 QUIRK_MIDI_US122L, 159 QUIRK_MIDI_US122L,
diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c
index 26bad373fe65..2fb35cc22a30 100644
--- a/sound/usb/usbmidi.c
+++ b/sound/usb/usbmidi.c
@@ -1778,8 +1778,18 @@ int snd_usb_create_midi_interface(struct snd_usb_audio* chip,
1778 umidi->usb_protocol_ops = &snd_usbmidi_novation_ops; 1778 umidi->usb_protocol_ops = &snd_usbmidi_novation_ops;
1779 err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); 1779 err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
1780 break; 1780 break;
1781 case QUIRK_MIDI_RAW: 1781 case QUIRK_MIDI_FASTLANE:
1782 umidi->usb_protocol_ops = &snd_usbmidi_raw_ops; 1782 umidi->usb_protocol_ops = &snd_usbmidi_raw_ops;
1783 /*
1784 * Interface 1 contains isochronous endpoints, but with the same
1785 * numbers as in interface 0. Since it is interface 1 that the
1786 * USB core has most recently seen, these descriptors are now
1787 * associated with the endpoint numbers. This will foul up our
1788 * attempts to submit bulk/interrupt URBs to the endpoints in
1789 * interface 0, so we have to make sure that the USB core looks
1790 * again at interface 0 by calling usb_set_interface() on it.
1791 */
1792 usb_set_interface(umidi->chip->dev, 0, 0);
1783 err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); 1793 err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
1784 break; 1794 break;
1785 case QUIRK_MIDI_EMAGIC: 1795 case QUIRK_MIDI_EMAGIC:
diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h
index 647ef5029651..5d955aaad85f 100644
--- a/sound/usb/usbquirks.h
+++ b/sound/usb/usbquirks.h
@@ -1868,7 +1868,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
1868 .data = & (const struct snd_usb_audio_quirk[]) { 1868 .data = & (const struct snd_usb_audio_quirk[]) {
1869 { 1869 {
1870 .ifnum = 0, 1870 .ifnum = 0,
1871 .type = QUIRK_MIDI_RAW 1871 .type = QUIRK_MIDI_FASTLANE
1872 }, 1872 },
1873 { 1873 {
1874 .ifnum = 1, 1874 .ifnum = 1,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1ecbe2391c8b..4d0dd390aa50 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2301,10 +2301,11 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
2301 2301
2302 bad_pfn = page_to_pfn(bad_page); 2302 bad_pfn = page_to_pfn(bad_page);
2303 2303
2304 if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { 2304 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
2305 r = -ENOMEM; 2305 r = -ENOMEM;
2306 goto out_free_0; 2306 goto out_free_0;
2307 } 2307 }
2308 cpumask_clear(cpus_hardware_enabled);
2308 2309
2309 r = kvm_arch_hardware_setup(); 2310 r = kvm_arch_hardware_setup();
2310 if (r < 0) 2311 if (r < 0)