aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-04-18 15:52:50 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-04-18 15:52:50 -0400
commit681e4a5e13c1c8315694eb4f44e0cdd84c9082d2 (patch)
tree699f14527c118859026e8ce0214e689d0b9c88cb /arch/x86
parentb960d6c43a63ebd2d8518b328da3816b833ee8cc (diff)
parentc104f1fa1ecf4ee0fc06e31b1f77630b2551be81 (diff)
Merge commit 'c104f1fa1ecf4ee0fc06e31b1f77630b2551be81' into stable/for-linus-3.4
* commit 'c104f1fa1ecf4ee0fc06e31b1f77630b2551be81': (14566 commits) cpufreq: OMAP: fix build errors: depends on ARCH_OMAP2PLUS sparc64: Eliminate obsolete __handle_softirq() function sparc64: Fix bootup crash on sun4v. kconfig: delete last traces of __enabled_ from autoconf.h Revert "kconfig: fix __enabled_ macros definition for invisible and un-selected symbols" kconfig: fix IS_ENABLED to not require all options to be defined irq_domain: fix type mismatch in debugfs output format staging: android: fix mem leaks in __persistent_ram_init() staging: vt6656: Don't leak memory in drivers/staging/vt6656/ioctl.c::private_ioctl() staging: iio: hmc5843: Fix crash in probe function. panic: fix stack dump print on direct call to panic() drivers/rtc/rtc-pl031.c: enable clock on all ST variants Revert "mm: vmscan: fix misused nr_reclaimed in shrink_mem_cgroup_zone()" hugetlb: fix race condition in hugetlb_fault() drivers/rtc/rtc-twl.c: use static register while reading time drivers/rtc/rtc-s3c.c: add placeholder for driver private data drivers/rtc/rtc-s3c.c: fix compilation error MAINTAINERS: add PCDP console maintainer memcg: do not open code accesses to res_counter members drivers/rtc/rtc-efi.c: fix section mismatch warning ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/.gitignore1
-rw-r--r--arch/x86/Kconfig98
-rw-r--r--arch/x86/Kconfig.cpu11
-rw-r--r--arch/x86/Kconfig.debug25
-rw-r--r--arch/x86/Makefile23
-rw-r--r--arch/x86/Makefile.um7
-rw-r--r--arch/x86/boot/Makefile3
-rw-r--r--arch/x86/boot/boot.h2
-rw-r--r--arch/x86/boot/compressed/Makefile11
-rw-r--r--arch/x86/boot/compressed/eboot.c1022
-rw-r--r--arch/x86/boot/compressed/eboot.h61
-rw-r--r--arch/x86/boot/compressed/efi_stub_32.S86
-rw-r--r--arch/x86/boot/compressed/efi_stub_64.S1
-rw-r--r--arch/x86/boot/compressed/head_32.S22
-rw-r--r--arch/x86/boot/compressed/head_64.S20
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c11
-rw-r--r--arch/x86/boot/compressed/relocs.c6
-rw-r--r--arch/x86/boot/compressed/string.c9
-rw-r--r--arch/x86/boot/header.S158
-rw-r--r--arch/x86/boot/string.c35
-rw-r--r--arch/x86/boot/tools/build.c61
-rw-r--r--arch/x86/configs/i386_defconfig64
-rw-r--r--arch/x86/configs/x86_64_defconfig67
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c36
-rw-r--r--arch/x86/crypto/blowfish_glue.c191
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S520
-rw-r--r--arch/x86/crypto/camellia_glue.c1952
-rw-r--r--arch/x86/crypto/crc32c-intel.c11
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c12
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S29
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S29
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c394
-rw-r--r--arch/x86/crypto/twofish_glue.c2
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c269
-rw-r--r--arch/x86/ia32/Makefile1
-rw-r--r--arch/x86/ia32/ia32_aout.c19
-rw-r--r--arch/x86/ia32/ia32_signal.c25
-rw-r--r--arch/x86/ia32/ia32entry.S373
-rw-r--r--arch/x86/ia32/nosyscall.c7
-rw-r--r--arch/x86/ia32/sys_ia32.c40
-rw-r--r--arch/x86/ia32/syscall_ia32.c25
-rw-r--r--arch/x86/include/asm/Kbuild7
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/apic.h7
-rw-r--r--arch/x86/include/asm/atomic64_32.h148
-rw-r--r--arch/x86/include/asm/auxvec.h7
-rw-r--r--arch/x86/include/asm/barrier.h (renamed from arch/x86/um/asm/system.h)31
-rw-r--r--arch/x86/include/asm/bootparam.h2
-rw-r--r--arch/x86/include/asm/bug.h4
-rw-r--r--arch/x86/include/asm/cacheflush.h1
-rw-r--r--arch/x86/include/asm/cmpxchg.h10
-rw-r--r--arch/x86/include/asm/compat.h40
-rw-r--r--arch/x86/include/asm/cpu_device_id.h13
-rw-r--r--arch/x86/include/asm/cpufeature.h5
-rw-r--r--arch/x86/include/asm/debugreg.h89
-rw-r--r--arch/x86/include/asm/desc.h12
-rw-r--r--arch/x86/include/asm/dma-mapping.h26
-rw-r--r--arch/x86/include/asm/efi.h6
-rw-r--r--arch/x86/include/asm/elf.h32
-rw-r--r--arch/x86/include/asm/exec.h1
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/fpu-internal.h520
-rw-r--r--arch/x86/include/asm/futex.h1
-rw-r--r--arch/x86/include/asm/hardirq.h1
-rw-r--r--arch/x86/include/asm/highmem.h2
-rw-r--r--arch/x86/include/asm/i387.h410
-rw-r--r--arch/x86/include/asm/ia32.h18
-rw-r--r--arch/x86/include/asm/ia32_unistd.h13
-rw-r--r--arch/x86/include/asm/idle.h1
-rw-r--r--arch/x86/include/asm/inat.h5
-rw-r--r--arch/x86/include/asm/init.h2
-rw-r--r--arch/x86/include/asm/insn.h18
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/irq_controller.h12
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/kgdb.h10
-rw-r--r--arch/x86/include/asm/kvm.h4
-rw-r--r--arch/x86/include/asm/kvm_emulate.h19
-rw-r--r--arch/x86/include/asm/kvm_host.h63
-rw-r--r--arch/x86/include/asm/local.h1
-rw-r--r--arch/x86/include/asm/mc146818rtc.h1
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mrst.h4
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/mtrr.h28
-rw-r--r--arch/x86/include/asm/page_types.h1
-rw-r--r--arch/x86/include/asm/paravirt.h7
-rw-r--r--arch/x86/include/asm/perf_event.h11
-rw-r--r--arch/x86/include/asm/posix_types.h4
-rw-r--r--arch/x86/include/asm/posix_types_32.h75
-rw-r--r--arch/x86/include/asm/posix_types_64.h106
-rw-r--r--arch/x86/include/asm/posix_types_x32.h19
-rw-r--r--arch/x86/include/asm/processor.h99
-rw-r--r--arch/x86/include/asm/prom.h10
-rw-r--r--arch/x86/include/asm/ptrace.h1
-rw-r--r--arch/x86/include/asm/segment.h58
-rw-r--r--arch/x86/include/asm/setup.h2
-rw-r--r--arch/x86/include/asm/sigcontext.h57
-rw-r--r--arch/x86/include/asm/sigframe.h13
-rw-r--r--arch/x86/include/asm/sighandling.h24
-rw-r--r--arch/x86/include/asm/smp.h6
-rw-r--r--arch/x86/include/asm/special_insns.h199
-rw-r--r--arch/x86/include/asm/spinlock.h4
-rw-r--r--arch/x86/include/asm/spinlock_types.h1
-rw-r--r--arch/x86/include/asm/stackprotector.h1
-rw-r--r--arch/x86/include/asm/switch_to.h129
-rw-r--r--arch/x86/include/asm/sys_ia32.h7
-rw-r--r--arch/x86/include/asm/syscall.h6
-rw-r--r--arch/x86/include/asm/system.h523
-rw-r--r--arch/x86/include/asm/thread_info.h24
-rw-r--r--arch/x86/include/asm/timer.h8
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/traps.h25
-rw-r--r--arch/x86/include/asm/tsc.h4
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h5
-rw-r--r--arch/x86/include/asm/uaccess_64.h4
-rw-r--r--arch/x86/include/asm/unistd.h68
-rw-r--r--arch/x86/include/asm/unistd_32.h401
-rw-r--r--arch/x86/include/asm/unistd_64.h732
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h107
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h4
-rw-r--r--arch/x86/include/asm/vgtod.h17
-rw-r--r--arch/x86/include/asm/virtext.h1
-rw-r--r--arch/x86/include/asm/word-at-a-time.h46
-rw-r--r--arch/x86/include/asm/x2apic.h5
-rw-r--r--arch/x86/include/asm/x86_init.h6
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c7
-rw-r--r--arch/x86/kernel/acpi/cstate.c1
-rw-r--r--arch/x86/kernel/amd_gart_64.c11
-rw-r--r--arch/x86/kernel/apic/apic.c13
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c7
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c199
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c13
-rw-r--r--arch/x86/kernel/apm_32.c28
-rw-r--r--arch/x86/kernel/asm-offsets.c2
-rw-r--r--arch/x86/kernel/asm-offsets_32.c8
-rw-r--r--arch/x86/kernel/asm-offsets_64.c25
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/common.c43
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c44
-rw-r--r--arch/x86/kernel/cpu/match.c91
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c205
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c21
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event.c190
-rw-r--r--arch/x86/kernel/cpu/perf_event.h59
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c58
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c194
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c23
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c528
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c19
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpuid.c1
-rw-r--r--arch/x86/kernel/crash_dump_32.c6
-rw-r--r--arch/x86/kernel/devicetree.c101
-rw-r--r--arch/x86/kernel/dumpstack.c12
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c8
-rw-r--r--arch/x86/kernel/e820.c63
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/entry_32.S64
-rw-r--r--arch/x86/kernel/entry_64.S298
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/i387.c83
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/irq.c7
-rw-r--r--arch/x86/kernel/irq_32.c16
-rw-r--r--arch/x86/kernel/irq_64.c35
-rw-r--r--arch/x86/kernel/irqinit.c9
-rw-r--r--arch/x86/kernel/kdebugfs.c9
-rw-r--r--arch/x86/kernel/kgdb.c67
-rw-r--r--arch/x86/kernel/kprobes-common.h102
-rw-r--r--arch/x86/kernel/kprobes-opt.c512
-rw-r--r--arch/x86/kernel/kprobes.c664
-rw-r--r--arch/x86/kernel/kvm.c8
-rw-r--r--arch/x86/kernel/kvmclock.c15
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/machine_kexec_32.c1
-rw-r--r--arch/x86/kernel/mca_32.c1
-rw-r--r--arch/x86/kernel/microcode_amd.c25
-rw-r--r--arch/x86/kernel/microcode_core.c15
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/msr.c1
-rw-r--r--arch/x86/kernel/nmi.c102
-rw-r--r--arch/x86/kernel/nmi_selftest.c181
-rw-r--r--arch/x86/kernel/paravirt.c6
-rw-r--r--arch/x86/kernel/pci-calgary_64.c10
-rw-r--r--arch/x86/kernel/pci-dma.c8
-rw-r--r--arch/x86/kernel/pci-nommu.c6
-rw-r--r--arch/x86/kernel/pci-swiotlb.c17
-rw-r--r--arch/x86/kernel/probe_roms.c1
-rw-r--r--arch/x86/kernel/process.c150
-rw-r--r--arch/x86/kernel/process_32.c89
-rw-r--r--arch/x86/kernel/process_64.c170
-rw-r--r--arch/x86/kernel/ptrace.c129
-rw-r--r--arch/x86/kernel/reboot.c36
-rw-r--r--arch/x86/kernel/setup.c27
-rw-r--r--arch/x86/kernel/signal.c141
-rw-r--r--arch/x86/kernel/smp.c72
-rw-r--r--arch/x86/kernel/smpboot.c49
-rw-r--r--arch/x86/kernel/sys_x86_64.c40
-rw-r--r--arch/x86/kernel/syscall_32.c25
-rw-r--r--arch/x86/kernel/syscall_64.c26
-rw-r--r--arch/x86/kernel/syscall_table_32.S350
-rw-r--r--arch/x86/kernel/tboot.c9
-rw-r--r--arch/x86/kernel/tce_64.c1
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/tls.c5
-rw-r--r--arch/x86/kernel/traps.c194
-rw-r--r--arch/x86/kernel/tsc.c51
-rw-r--r--arch/x86/kernel/tsc_sync.c29
-rw-r--r--arch/x86/kernel/vm86_32.c10
-rw-r--r--arch/x86/kernel/vsyscall_64.c33
-rw-r--r--arch/x86/kernel/x86_init.c5
-rw-r--r--arch/x86/kernel/xsave.c13
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c163
-rw-r--r--arch/x86/kvm/i8259.c1
-rw-r--r--arch/x86/kvm/lapic.c12
-rw-r--r--arch/x86/kvm/mmu.c87
-rw-r--r--arch/x86/kvm/mmu_audit.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/pmu.c12
-rw-r--r--arch/x86/kvm/svm.c124
-rw-r--r--arch/x86/kvm/vmx.c73
-rw-r--r--arch/x86/kvm/x86.c461
-rw-r--r--arch/x86/lguest/boot.c21
-rw-r--r--arch/x86/lib/atomic64_32.c59
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S29
-rw-r--r--arch/x86/lib/copy_page_64.S12
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/inat.c36
-rw-r--r--arch/x86/lib/insn.c13
-rw-r--r--arch/x86/lib/memcpy_64.S44
-rw-r--r--arch/x86/lib/memset_64.S33
-rw-r--r--arch/x86/lib/usercopy.c103
-rw-r--r--arch/x86/lib/usercopy_32.c91
-rw-r--r--arch/x86/lib/usercopy_64.c49
-rw-r--r--arch/x86/lib/x86-opcode-map.txt8
-rw-r--r--arch/x86/math-emu/fpu_entry.c5
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/highmem_32.c4
-rw-r--r--arch/x86/mm/hugetlbpage.c30
-rw-r--r--arch/x86/mm/init.c24
-rw-r--r--arch/x86/mm/init_32.c30
-rw-r--r--arch/x86/mm/init_64.c12
-rw-r--r--arch/x86/mm/kmemcheck/selftest.c1
-rw-r--r--arch/x86/mm/mmap.c4
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/mm/numa.c12
-rw-r--r--arch/x86/mm/numa_emulation.c4
-rw-r--r--arch/x86/mm/pageattr.c6
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/srat.c6
-rw-r--r--arch/x86/net/bpf_jit.S122
-rw-r--r--arch/x86/net/bpf_jit_comp.c77
-rw-r--r--arch/x86/oprofile/backtrace.c2
-rw-r--r--arch/x86/pci/Makefile5
-rw-r--r--arch/x86/pci/acpi.c29
-rw-r--r--arch/x86/pci/fixup.c12
-rw-r--r--arch/x86/pci/i386.c85
-rw-r--r--arch/x86/pci/mrst.c40
-rw-r--r--arch/x86/pci/xen.c2
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts7
-rw-r--r--arch/x86/platform/efi/efi.c377
-rw-r--r--arch/x86/platform/geode/Makefile2
-rw-r--r--arch/x86/platform/geode/alix.c78
-rw-r--r--arch/x86/platform/geode/geos.c128
-rw-r--r--arch/x86/platform/geode/net5501.c154
-rw-r--r--arch/x86/platform/iris/iris.c2
-rw-r--r--arch/x86/platform/mrst/Makefile7
-rw-r--r--arch/x86/platform/mrst/mrst.c90
-rw-r--r--arch/x86/platform/mrst/pmu.c817
-rw-r--r--arch/x86/platform/mrst/pmu.h234
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c72
-rw-r--r--arch/x86/platform/olpc/olpc.c97
-rw-r--r--arch/x86/platform/scx200/scx200_32.c24
-rw-r--r--arch/x86/platform/uv/tlb_uv.c390
-rw-r--r--arch/x86/platform/uv/uv_irq.c2
-rw-r--r--arch/x86/platform/uv/uv_time.c6
-rw-r--r--arch/x86/power/cpu.c5
-rw-r--r--arch/x86/power/hibernate_32.c1
-rw-r--r--arch/x86/syscalls/Makefile55
-rw-r--r--arch/x86/syscalls/syscall_32.tbl357
-rw-r--r--arch/x86/syscalls/syscall_64.tbl353
-rw-r--r--arch/x86/syscalls/syscallhdr.sh27
-rw-r--r--arch/x86/syscalls/syscalltbl.sh15
-rw-r--r--arch/x86/um/Kconfig12
-rw-r--r--arch/x86/um/Makefile3
-rw-r--r--arch/x86/um/asm/barrier.h75
-rw-r--r--arch/x86/um/asm/processor.h10
-rw-r--r--arch/x86/um/asm/processor_32.h10
-rw-r--r--arch/x86/um/asm/processor_64.h10
-rw-r--r--arch/x86/um/bugs_32.c4
-rw-r--r--arch/x86/um/mem_32.c8
-rw-r--r--arch/x86/um/shared/sysdep/ptrace.h10
-rw-r--r--arch/x86/um/sys_call_table_32.S26
-rw-r--r--arch/x86/um/sys_call_table_32.c55
-rw-r--r--arch/x86/um/sys_call_table_64.c36
-rw-r--r--arch/x86/um/user-offsets.c17
-rw-r--r--arch/x86/um/vdso/vma.c3
-rw-r--r--arch/x86/vdso/.gitignore2
-rw-r--r--arch/x86/vdso/Makefile46
-rw-r--r--arch/x86/vdso/vclock_gettime.c135
-rw-r--r--arch/x86/vdso/vdso32-setup.c22
-rw-r--r--arch/x86/vdso/vdsox32.S22
-rw-r--r--arch/x86/vdso/vdsox32.lds.S28
-rw-r--r--arch/x86/vdso/vma.c81
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c8
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c4
-rw-r--r--arch/x86/xen/smp.c7
-rw-r--r--arch/x86/xen/spinlock.c27
335 files changed, 14199 insertions, 7926 deletions
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index 028079065af6..7cab8c08e6d1 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -1,3 +1,4 @@
1boot/compressed/vmlinux 1boot/compressed/vmlinux
2tools/test_get_len 2tools/test_get_len
3tools/insn_sanity
3 4
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a31254ceb83..1d14cc6b79ad 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -60,13 +60,15 @@ config X86
60 select PERF_EVENTS 60 select PERF_EVENTS
61 select HAVE_PERF_EVENTS_NMI 61 select HAVE_PERF_EVENTS_NMI
62 select ANON_INODES 62 select ANON_INODES
63 select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
64 select HAVE_CMPXCHG_LOCAL if !M386
65 select HAVE_CMPXCHG_DOUBLE
63 select HAVE_ARCH_KMEMCHECK 66 select HAVE_ARCH_KMEMCHECK
64 select HAVE_USER_RETURN_NOTIFIER 67 select HAVE_USER_RETURN_NOTIFIER
65 select ARCH_BINFMT_ELF_RANDOMIZE_PIE 68 select ARCH_BINFMT_ELF_RANDOMIZE_PIE
66 select HAVE_ARCH_JUMP_LABEL 69 select HAVE_ARCH_JUMP_LABEL
67 select HAVE_TEXT_POKE_SMP 70 select HAVE_TEXT_POKE_SMP
68 select HAVE_GENERIC_HARDIRQS 71 select HAVE_GENERIC_HARDIRQS
69 select HAVE_SPARSE_IRQ
70 select SPARSE_IRQ 72 select SPARSE_IRQ
71 select GENERIC_FIND_FIRST_BIT 73 select GENERIC_FIND_FIRST_BIT
72 select GENERIC_IRQ_PROBE 74 select GENERIC_IRQ_PROBE
@@ -79,6 +81,7 @@ config X86
79 select CLKEVT_I8253 81 select CLKEVT_I8253
80 select ARCH_HAVE_NMI_SAFE_CMPXCHG 82 select ARCH_HAVE_NMI_SAFE_CMPXCHG
81 select GENERIC_IOMAP 83 select GENERIC_IOMAP
84 select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
82 85
83config INSTRUCTION_DECODER 86config INSTRUCTION_DECODER
84 def_bool (KPROBES || PERF_EVENTS) 87 def_bool (KPROBES || PERF_EVENTS)
@@ -122,16 +125,6 @@ config HAVE_LATENCYTOP_SUPPORT
122config MMU 125config MMU
123 def_bool y 126 def_bool y
124 127
125config ZONE_DMA
126 bool "DMA memory allocation support" if EXPERT
127 default y
128 help
129 DMA memory allocation support allows devices with less than 32-bit
130 addressing to allocate within the first 16MB of address space.
131 Disable if no such devices will be used.
132
133 If unsure, say Y.
134
135config SBUS 128config SBUS
136 bool 129 bool
137 130
@@ -186,6 +179,9 @@ config ARCH_HAS_DEFAULT_IDLE
186config ARCH_HAS_CACHE_LINE_SIZE 179config ARCH_HAS_CACHE_LINE_SIZE
187 def_bool y 180 def_bool y
188 181
182config ARCH_HAS_CPU_AUTOPROBE
183 def_bool y
184
189config HAVE_SETUP_PER_CPU_AREA 185config HAVE_SETUP_PER_CPU_AREA
190 def_bool y 186 def_bool y
191 187
@@ -252,6 +248,16 @@ source "kernel/Kconfig.freezer"
252 248
253menu "Processor type and features" 249menu "Processor type and features"
254 250
251config ZONE_DMA
252 bool "DMA memory allocation support" if EXPERT
253 default y
254 help
255 DMA memory allocation support allows devices with less than 32-bit
256 addressing to allocate within the first 16MB of address space.
257 Disable if no such devices will be used.
258
259 If unsure, say Y.
260
255source "kernel/time/Kconfig" 261source "kernel/time/Kconfig"
256 262
257config SMP 263config SMP
@@ -357,7 +363,6 @@ config X86_NUMACHIP
357 depends on NUMA 363 depends on NUMA
358 depends on SMP 364 depends on SMP
359 depends on X86_X2APIC 365 depends on X86_X2APIC
360 depends on !EDAC_AMD64
361 ---help--- 366 ---help---
362 Adds support for Numascale NumaChip large-SMP systems. Needed to 367 Adds support for Numascale NumaChip large-SMP systems. Needed to
363 enable more than ~168 cores. 368 enable more than ~168 cores.
@@ -396,6 +401,7 @@ config X86_INTEL_CE
396 select X86_REBOOTFIXUPS 401 select X86_REBOOTFIXUPS
397 select OF 402 select OF
398 select OF_EARLY_FLATTREE 403 select OF_EARLY_FLATTREE
404 select IRQ_DOMAIN
399 ---help--- 405 ---help---
400 Select for the Intel CE media processor (CE4100) SOC. 406 Select for the Intel CE media processor (CE4100) SOC.
401 This option compiles in support for the CE4100 SOC for settop 407 This option compiles in support for the CE4100 SOC for settop
@@ -415,23 +421,25 @@ if X86_WANT_INTEL_MID
415config X86_INTEL_MID 421config X86_INTEL_MID
416 bool 422 bool
417 423
418config X86_MRST 424config X86_MDFLD
419 bool "Moorestown MID platform" 425 bool "Medfield MID platform"
420 depends on PCI 426 depends on PCI
421 depends on PCI_GOANY 427 depends on PCI_GOANY
422 depends on X86_IO_APIC 428 depends on X86_IO_APIC
429 select X86_INTEL_MID
430 select SFI
431 select DW_APB_TIMER
423 select APB_TIMER 432 select APB_TIMER
424 select I2C 433 select I2C
425 select SPI 434 select SPI
426 select INTEL_SCU_IPC 435 select INTEL_SCU_IPC
427 select X86_PLATFORM_DEVICES 436 select X86_PLATFORM_DEVICES
428 select X86_INTEL_MID 437 select MFD_INTEL_MSIC
429 ---help--- 438 ---help---
430 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin 439 Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin
431 Internet Device(MID) platform. Moorestown consists of two chips: 440 Internet Device(MID) platform.
432 Lincroft (CPU core, graphics, and memory controller) and Langwell IOH. 441 Unlike standard x86 PCs, Medfield does not have many legacy devices
433 Unlike standard x86 PCs, Moorestown does not have many legacy devices 442 nor standard legacy replacement devices/features. e.g. Medfield does
434 nor standard legacy replacement devices/features. e.g. Moorestown does
435 not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. 443 not contain i8259, i8254, HPET, legacy BIOS, most of the io ports.
436 444
437endif 445endif
@@ -631,7 +639,7 @@ config X86_SUMMIT_NUMA
631 639
632config X86_CYCLONE_TIMER 640config X86_CYCLONE_TIMER
633 def_bool y 641 def_bool y
634 depends on X86_32_NON_STANDARD 642 depends on X86_SUMMIT
635 643
636source "arch/x86/Kconfig.cpu" 644source "arch/x86/Kconfig.cpu"
637 645
@@ -659,9 +667,10 @@ config HPET_EMULATE_RTC
659 depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) 667 depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
660 668
661config APB_TIMER 669config APB_TIMER
662 def_bool y if MRST 670 def_bool y if X86_INTEL_MID
663 prompt "Langwell APB Timer Support" if X86_MRST 671 prompt "Intel MID APB Timer Support" if X86_INTEL_MID
664 select DW_APB_TIMER 672 select DW_APB_TIMER
673 depends on X86_INTEL_MID && SFI
665 help 674 help
666 APB timer is the replacement for 8254, HPET on X86 MID platforms. 675 APB timer is the replacement for 8254, HPET on X86 MID platforms.
667 The APBT provides a stable time base on SMP 676 The APBT provides a stable time base on SMP
@@ -1489,6 +1498,13 @@ config EFI
1489 resultant kernel should continue to boot on existing non-EFI 1498 resultant kernel should continue to boot on existing non-EFI
1490 platforms. 1499 platforms.
1491 1500
1501config EFI_STUB
1502 bool "EFI stub support"
1503 depends on EFI
1504 ---help---
1505 This kernel feature allows a bzImage to be loaded directly
1506 by EFI firmware without the use of a bootloader.
1507
1492config SECCOMP 1508config SECCOMP
1493 def_bool y 1509 def_bool y
1494 prompt "Enable seccomp to safely compute untrusted bytecode" 1510 prompt "Enable seccomp to safely compute untrusted bytecode"
@@ -2044,6 +2060,7 @@ config OLPC
2044 select GPIOLIB 2060 select GPIOLIB
2045 select OF 2061 select OF
2046 select OF_PROMTREE 2062 select OF_PROMTREE
2063 select IRQ_DOMAIN
2047 ---help--- 2064 ---help---
2048 Add support for detecting the unique features of the OLPC 2065 Add support for detecting the unique features of the OLPC
2049 XO hardware. 2066 XO hardware.
@@ -2101,6 +2118,19 @@ config ALIX
2101 2118
2102 Note: You have to set alix.force=1 for boards with Award BIOS. 2119 Note: You have to set alix.force=1 for boards with Award BIOS.
2103 2120
2121config NET5501
2122 bool "Soekris Engineering net5501 System Support (LEDS, GPIO, etc)"
2123 select GPIOLIB
2124 ---help---
2125 This option enables system support for the Soekris Engineering net5501.
2126
2127config GEOS
2128 bool "Traverse Technologies GEOS System Support (LEDS, GPIO, etc)"
2129 select GPIOLIB
2130 depends on DMI
2131 ---help---
2132 This option enables system support for the Traverse Technologies GEOS.
2133
2104endif # X86_32 2134endif # X86_32
2105 2135
2106config AMD_NB 2136config AMD_NB
@@ -2133,9 +2163,9 @@ config IA32_EMULATION
2133 depends on X86_64 2163 depends on X86_64
2134 select COMPAT_BINFMT_ELF 2164 select COMPAT_BINFMT_ELF
2135 ---help--- 2165 ---help---
2136 Include code to run 32-bit programs under a 64-bit kernel. You should 2166 Include code to run legacy 32-bit programs under a
2137 likely turn this on, unless you're 100% sure that you don't have any 2167 64-bit kernel. You should likely turn this on, unless you're
2138 32-bit programs left. 2168 100% sure that you don't have any 32-bit programs left.
2139 2169
2140config IA32_AOUT 2170config IA32_AOUT
2141 tristate "IA32 a.out support" 2171 tristate "IA32 a.out support"
@@ -2143,9 +2173,23 @@ config IA32_AOUT
2143 ---help--- 2173 ---help---
2144 Support old a.out binaries in the 32bit emulation. 2174 Support old a.out binaries in the 32bit emulation.
2145 2175
2176config X86_X32
2177 bool "x32 ABI for 64-bit mode (EXPERIMENTAL)"
2178 depends on X86_64 && IA32_EMULATION && EXPERIMENTAL
2179 ---help---
2180 Include code to run binaries for the x32 native 32-bit ABI
2181 for 64-bit processors. An x32 process gets access to the
2182 full 64-bit register file and wide data path while leaving
2183 pointers at 32 bits for smaller memory footprint.
2184
2185 You will need a recent binutils (2.22 or later) with
2186 elf32_x86_64 support enabled to compile a kernel with this
2187 option set.
2188
2146config COMPAT 2189config COMPAT
2147 def_bool y 2190 def_bool y
2148 depends on IA32_EMULATION 2191 depends on IA32_EMULATION || X86_X32
2192 select ARCH_WANT_OLD_COMPAT_IPC
2149 2193
2150config COMPAT_FOR_U64_ALIGNMENT 2194config COMPAT_FOR_U64_ALIGNMENT
2151 def_bool COMPAT 2195 def_bool COMPAT
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index e3ca7e0d858c..706e12e9984b 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -303,18 +303,11 @@ config X86_GENERIC
303config X86_INTERNODE_CACHE_SHIFT 303config X86_INTERNODE_CACHE_SHIFT
304 int 304 int
305 default "12" if X86_VSMP 305 default "12" if X86_VSMP
306 default "7" if NUMA
307 default X86_L1_CACHE_SHIFT 306 default X86_L1_CACHE_SHIFT
308 307
309config X86_CMPXCHG 308config X86_CMPXCHG
310 def_bool X86_64 || (X86_32 && !M386) 309 def_bool X86_64 || (X86_32 && !M386)
311 310
312config CMPXCHG_LOCAL
313 def_bool X86_64 || (X86_32 && !M386)
314
315config CMPXCHG_DOUBLE
316 def_bool y
317
318config X86_L1_CACHE_SHIFT 311config X86_L1_CACHE_SHIFT
319 int 312 int
320 default "7" if MPENTIUM4 || MPSC 313 default "7" if MPENTIUM4 || MPSC
@@ -447,7 +440,7 @@ config CPU_SUP_INTEL
447config CPU_SUP_CYRIX_32 440config CPU_SUP_CYRIX_32
448 default y 441 default y
449 bool "Support Cyrix processors" if PROCESSOR_SELECT 442 bool "Support Cyrix processors" if PROCESSOR_SELECT
450 depends on !64BIT 443 depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
451 ---help--- 444 ---help---
452 This enables detection, tunings and quirks for Cyrix processors 445 This enables detection, tunings and quirks for Cyrix processors
453 446
@@ -501,7 +494,7 @@ config CPU_SUP_TRANSMETA_32
501config CPU_SUP_UMC_32 494config CPU_SUP_UMC_32
502 default y 495 default y
503 bool "Support UMC processors" if PROCESSOR_SELECT 496 bool "Support UMC processors" if PROCESSOR_SELECT
504 depends on !64BIT 497 depends on M386 || M486 || (EXPERT && !64BIT)
505 ---help--- 498 ---help---
506 This enables detection, tunings and quirks for UMC processors 499 This enables detection, tunings and quirks for UMC processors
507 500
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index bf56e1793272..e46c2147397f 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -43,9 +43,9 @@ config EARLY_PRINTK
43 with klogd/syslogd or the X server. You should normally N here, 43 with klogd/syslogd or the X server. You should normally N here,
44 unless you want to debug such a crash. 44 unless you want to debug such a crash.
45 45
46config EARLY_PRINTK_MRST 46config EARLY_PRINTK_INTEL_MID
47 bool "Early printk for MRST platform support" 47 bool "Early printk for Intel MID platform support"
48 depends on EARLY_PRINTK && X86_MRST 48 depends on EARLY_PRINTK && X86_INTEL_MID
49 49
50config EARLY_PRINTK_DBGP 50config EARLY_PRINTK_DBGP
51 bool "Early printk via EHCI debug port" 51 bool "Early printk via EHCI debug port"
@@ -63,8 +63,11 @@ config DEBUG_STACKOVERFLOW
63 bool "Check for stack overflows" 63 bool "Check for stack overflows"
64 depends on DEBUG_KERNEL 64 depends on DEBUG_KERNEL
65 ---help--- 65 ---help---
66 This option will cause messages to be printed if free stack space 66 Say Y here if you want to check the overflows of kernel, IRQ
67 drops below a certain limit. 67 and exception stacks. This option will cause messages of the
68 stacks in detail when free stack space drops below a certain
69 limit.
70 If in doubt, say "N".
68 71
69config X86_PTDUMP 72config X86_PTDUMP
70 bool "Export kernel pagetable layout to userspace via debugfs" 73 bool "Export kernel pagetable layout to userspace via debugfs"
@@ -284,4 +287,16 @@ config DEBUG_STRICT_USER_COPY_CHECKS
284 287
285 If unsure, or if you run an older (pre 4.4) gcc, say N. 288 If unsure, or if you run an older (pre 4.4) gcc, say N.
286 289
290config DEBUG_NMI_SELFTEST
291 bool "NMI Selftest"
292 depends on DEBUG_KERNEL && X86_LOCAL_APIC
293 ---help---
294 Enabling this option turns on a quick NMI selftest to verify
295 that the NMI behaves correctly.
296
297 This might help diagnose strange hangs that rely on NMI to
298 function properly.
299
300 If unsure, say N.
301
287endmenu 302endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b02e509072a7..41a7237606a3 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -82,6 +82,22 @@ ifdef CONFIG_CC_STACKPROTECTOR
82 endif 82 endif
83endif 83endif
84 84
85ifdef CONFIG_X86_X32
86 x32_ld_ok := $(call try-run,\
87 /bin/echo -e '1: .quad 1b' | \
88 $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" - && \
89 $(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \
90 $(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n)
91 ifeq ($(x32_ld_ok),y)
92 CONFIG_X86_X32_ABI := y
93 KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI
94 KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI
95 else
96 $(warning CONFIG_X86_X32 enabled but no binutils support)
97 endif
98endif
99export CONFIG_X86_X32_ABI
100
85# Don't unroll struct assignments with kmemcheck enabled 101# Don't unroll struct assignments with kmemcheck enabled
86ifeq ($(CONFIG_KMEMCHECK),y) 102ifeq ($(CONFIG_KMEMCHECK),y)
87 KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) 103 KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
@@ -113,11 +129,18 @@ KBUILD_CFLAGS += -Wno-sign-compare
113KBUILD_CFLAGS += -fno-asynchronous-unwind-tables 129KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
114# prevent gcc from generating any FP code by mistake 130# prevent gcc from generating any FP code by mistake
115KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) 131KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
132KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
116 133
117KBUILD_CFLAGS += $(mflags-y) 134KBUILD_CFLAGS += $(mflags-y)
118KBUILD_AFLAGS += $(mflags-y) 135KBUILD_AFLAGS += $(mflags-y)
119 136
120### 137###
138# Syscall table generation
139
140archheaders:
141 $(Q)$(MAKE) $(build)=arch/x86/syscalls all
142
143###
121# Kernel objects 144# Kernel objects
122 145
123head-y := arch/x86/kernel/head_$(BITS).o 146head-y := arch/x86/kernel/head_$(BITS).o
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
index 36ddec6a41c9..36b62bc52638 100644
--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -8,15 +8,14 @@ ELF_ARCH := i386
8ELF_FORMAT := elf32-i386 8ELF_FORMAT := elf32-i386
9CHECKFLAGS += -D__i386__ 9CHECKFLAGS += -D__i386__
10 10
11ifeq ("$(origin SUBARCH)", "command line")
12ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)")
13KBUILD_CFLAGS += $(call cc-option,-m32) 11KBUILD_CFLAGS += $(call cc-option,-m32)
14KBUILD_AFLAGS += $(call cc-option,-m32) 12KBUILD_AFLAGS += $(call cc-option,-m32)
15LINK-y += $(call cc-option,-m32) 13LINK-y += $(call cc-option,-m32)
16 14
17export LDFLAGS 15export LDFLAGS
18endif 16
19endif 17LDS_EXTRA := -Ui386
18export LDS_EXTRA
20 19
21# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. 20# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
22include $(srctree)/arch/x86/Makefile_32.cpu 21include $(srctree)/arch/x86/Makefile_32.cpu
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 95365a82b6a0..5a747dd884db 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -37,7 +37,8 @@ setup-y += video-bios.o
37targets += $(setup-y) 37targets += $(setup-y)
38hostprogs-y := mkcpustr tools/build 38hostprogs-y := mkcpustr tools/build
39 39
40HOST_EXTRACFLAGS += $(LINUXINCLUDE) 40HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \
41 -D__EXPORTED_HEADERS__
41 42
42$(obj)/cpu.o: $(obj)/cpustr.h 43$(obj)/cpu.o: $(obj)/cpustr.h
43 44
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index c7093bd9f2d3..18997e5a1053 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -67,7 +67,7 @@ static inline void outl(u32 v, u16 port)
67{ 67{
68 asm volatile("outl %0,%1" : : "a" (v), "dN" (port)); 68 asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
69} 69}
70static inline u32 inl(u32 port) 70static inline u32 inl(u16 port)
71{ 71{
72 u32 v; 72 u32 v;
73 asm volatile("inl %1,%0" : "=a" (v) : "dN" (port)); 73 asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 09664efb9cee..fd55a2ff3ad8 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,8 +22,17 @@ LDFLAGS := -m elf_$(UTS_MACHINE)
22LDFLAGS_vmlinux := -T 22LDFLAGS_vmlinux := -T
23 23
24hostprogs-y := mkpiggy 24hostprogs-y := mkpiggy
25HOST_EXTRACFLAGS += -I$(srctree)/tools/include
25 26
26$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o $(obj)/piggy.o FORCE 27VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
28 $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
29 $(obj)/piggy.o
30
31ifeq ($(CONFIG_EFI_STUB), y)
32 VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
33endif
34
35$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE
27 $(call if_changed,ld) 36 $(call if_changed,ld)
28 @: 37 @:
29 38
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
new file mode 100644
index 000000000000..0cdfc0d2315e
--- /dev/null
+++ b/arch/x86/boot/compressed/eboot.c
@@ -0,0 +1,1022 @@
1/* -----------------------------------------------------------------------
2 *
3 * Copyright 2011 Intel Corporation; author Matt Fleming
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2.
7 *
8 * ----------------------------------------------------------------------- */
9
10#include <linux/efi.h>
11#include <asm/efi.h>
12#include <asm/setup.h>
13#include <asm/desc.h>
14
15#include "eboot.h"
16
17static efi_system_table_t *sys_table;
18
19static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size,
20 unsigned long *desc_size)
21{
22 efi_memory_desc_t *m = NULL;
23 efi_status_t status;
24 unsigned long key;
25 u32 desc_version;
26
27 *map_size = sizeof(*m) * 32;
28again:
29 /*
30 * Add an additional efi_memory_desc_t because we're doing an
31 * allocation which may be in a new descriptor region.
32 */
33 *map_size += sizeof(*m);
34 status = efi_call_phys3(sys_table->boottime->allocate_pool,
35 EFI_LOADER_DATA, *map_size, (void **)&m);
36 if (status != EFI_SUCCESS)
37 goto fail;
38
39 status = efi_call_phys5(sys_table->boottime->get_memory_map, map_size,
40 m, &key, desc_size, &desc_version);
41 if (status == EFI_BUFFER_TOO_SMALL) {
42 efi_call_phys1(sys_table->boottime->free_pool, m);
43 goto again;
44 }
45
46 if (status != EFI_SUCCESS)
47 efi_call_phys1(sys_table->boottime->free_pool, m);
48
49fail:
50 *map = m;
51 return status;
52}
53
54/*
55 * Allocate at the highest possible address that is not above 'max'.
56 */
57static efi_status_t high_alloc(unsigned long size, unsigned long align,
58 unsigned long *addr, unsigned long max)
59{
60 unsigned long map_size, desc_size;
61 efi_memory_desc_t *map;
62 efi_status_t status;
63 unsigned long nr_pages;
64 u64 max_addr = 0;
65 int i;
66
67 status = __get_map(&map, &map_size, &desc_size);
68 if (status != EFI_SUCCESS)
69 goto fail;
70
71 nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
72again:
73 for (i = 0; i < map_size / desc_size; i++) {
74 efi_memory_desc_t *desc;
75 unsigned long m = (unsigned long)map;
76 u64 start, end;
77
78 desc = (efi_memory_desc_t *)(m + (i * desc_size));
79 if (desc->type != EFI_CONVENTIONAL_MEMORY)
80 continue;
81
82 if (desc->num_pages < nr_pages)
83 continue;
84
85 start = desc->phys_addr;
86 end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT);
87
88 if ((start + size) > end || (start + size) > max)
89 continue;
90
91 if (end - size > max)
92 end = max;
93
94 if (round_down(end - size, align) < start)
95 continue;
96
97 start = round_down(end - size, align);
98
99 /*
100 * Don't allocate at 0x0. It will confuse code that
101 * checks pointers against NULL.
102 */
103 if (start == 0x0)
104 continue;
105
106 if (start > max_addr)
107 max_addr = start;
108 }
109
110 if (!max_addr)
111 status = EFI_NOT_FOUND;
112 else {
113 status = efi_call_phys4(sys_table->boottime->allocate_pages,
114 EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
115 nr_pages, &max_addr);
116 if (status != EFI_SUCCESS) {
117 max = max_addr;
118 max_addr = 0;
119 goto again;
120 }
121
122 *addr = max_addr;
123 }
124
125free_pool:
126 efi_call_phys1(sys_table->boottime->free_pool, map);
127
128fail:
129 return status;
130}
131
132/*
133 * Allocate at the lowest possible address.
134 */
135static efi_status_t low_alloc(unsigned long size, unsigned long align,
136 unsigned long *addr)
137{
138 unsigned long map_size, desc_size;
139 efi_memory_desc_t *map;
140 efi_status_t status;
141 unsigned long nr_pages;
142 int i;
143
144 status = __get_map(&map, &map_size, &desc_size);
145 if (status != EFI_SUCCESS)
146 goto fail;
147
148 nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
149 for (i = 0; i < map_size / desc_size; i++) {
150 efi_memory_desc_t *desc;
151 unsigned long m = (unsigned long)map;
152 u64 start, end;
153
154 desc = (efi_memory_desc_t *)(m + (i * desc_size));
155
156 if (desc->type != EFI_CONVENTIONAL_MEMORY)
157 continue;
158
159 if (desc->num_pages < nr_pages)
160 continue;
161
162 start = desc->phys_addr;
163 end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT);
164
165 /*
166 * Don't allocate at 0x0. It will confuse code that
167 * checks pointers against NULL. Skip the first 8
168 * bytes so we start at a nice even number.
169 */
170 if (start == 0x0)
171 start += 8;
172
173 start = round_up(start, align);
174 if ((start + size) > end)
175 continue;
176
177 status = efi_call_phys4(sys_table->boottime->allocate_pages,
178 EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
179 nr_pages, &start);
180 if (status == EFI_SUCCESS) {
181 *addr = start;
182 break;
183 }
184 }
185
186 if (i == map_size / desc_size)
187 status = EFI_NOT_FOUND;
188
189free_pool:
190 efi_call_phys1(sys_table->boottime->free_pool, map);
191fail:
192 return status;
193}
194
195static void low_free(unsigned long size, unsigned long addr)
196{
197 unsigned long nr_pages;
198
199 nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
200 efi_call_phys2(sys_table->boottime->free_pages, addr, size);
201}
202
203static void find_bits(unsigned long mask, u8 *pos, u8 *size)
204{
205 u8 first, len;
206
207 first = 0;
208 len = 0;
209
210 if (mask) {
211 while (!(mask & 0x1)) {
212 mask = mask >> 1;
213 first++;
214 }
215
216 while (mask & 0x1) {
217 mask = mask >> 1;
218 len++;
219 }
220 }
221
222 *pos = first;
223 *size = len;
224}
225
226/*
227 * See if we have Graphics Output Protocol
228 */
229static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
230 unsigned long size)
231{
232 struct efi_graphics_output_protocol *gop, *first_gop;
233 struct efi_pixel_bitmask pixel_info;
234 unsigned long nr_gops;
235 efi_status_t status;
236 void **gop_handle;
237 u16 width, height;
238 u32 fb_base, fb_size;
239 u32 pixels_per_scan_line;
240 int pixel_format;
241 int i;
242
243 status = efi_call_phys3(sys_table->boottime->allocate_pool,
244 EFI_LOADER_DATA, size, &gop_handle);
245 if (status != EFI_SUCCESS)
246 return status;
247
248 status = efi_call_phys5(sys_table->boottime->locate_handle,
249 EFI_LOCATE_BY_PROTOCOL, proto,
250 NULL, &size, gop_handle);
251 if (status != EFI_SUCCESS)
252 goto free_handle;
253
254 first_gop = NULL;
255
256 nr_gops = size / sizeof(void *);
257 for (i = 0; i < nr_gops; i++) {
258 struct efi_graphics_output_mode_info *info;
259 efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
260 void *pciio;
261 void *h = gop_handle[i];
262
263 status = efi_call_phys3(sys_table->boottime->handle_protocol,
264 h, proto, &gop);
265 if (status != EFI_SUCCESS)
266 continue;
267
268 efi_call_phys3(sys_table->boottime->handle_protocol,
269 h, &pciio_proto, &pciio);
270
271 status = efi_call_phys4(gop->query_mode, gop,
272 gop->mode->mode, &size, &info);
273 if (status == EFI_SUCCESS && (!first_gop || pciio)) {
274 /*
275 * Apple provide GOPs that are not backed by
276 * real hardware (they're used to handle
277 * multiple displays). The workaround is to
278 * search for a GOP implementing the PCIIO
279 * protocol, and if one isn't found, to just
280 * fallback to the first GOP.
281 */
282 width = info->horizontal_resolution;
283 height = info->vertical_resolution;
284 fb_base = gop->mode->frame_buffer_base;
285 fb_size = gop->mode->frame_buffer_size;
286 pixel_format = info->pixel_format;
287 pixel_info = info->pixel_information;
288 pixels_per_scan_line = info->pixels_per_scan_line;
289
290 /*
291 * Once we've found a GOP supporting PCIIO,
292 * don't bother looking any further.
293 */
294 if (pciio)
295 break;
296
297 first_gop = gop;
298 }
299 }
300
301 /* Did we find any GOPs? */
302 if (!first_gop)
303 goto free_handle;
304
305 /* EFI framebuffer */
306 si->orig_video_isVGA = VIDEO_TYPE_EFI;
307
308 si->lfb_width = width;
309 si->lfb_height = height;
310 si->lfb_base = fb_base;
311 si->lfb_size = fb_size;
312 si->pages = 1;
313
314 if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
315 si->lfb_depth = 32;
316 si->lfb_linelength = pixels_per_scan_line * 4;
317 si->red_size = 8;
318 si->red_pos = 0;
319 si->green_size = 8;
320 si->green_pos = 8;
321 si->blue_size = 8;
322 si->blue_pos = 16;
323 si->rsvd_size = 8;
324 si->rsvd_pos = 24;
325 } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) {
326 si->lfb_depth = 32;
327 si->lfb_linelength = pixels_per_scan_line * 4;
328 si->red_size = 8;
329 si->red_pos = 16;
330 si->green_size = 8;
331 si->green_pos = 8;
332 si->blue_size = 8;
333 si->blue_pos = 0;
334 si->rsvd_size = 8;
335 si->rsvd_pos = 24;
336 } else if (pixel_format == PIXEL_BIT_MASK) {
337 find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size);
338 find_bits(pixel_info.green_mask, &si->green_pos,
339 &si->green_size);
340 find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size);
341 find_bits(pixel_info.reserved_mask, &si->rsvd_pos,
342 &si->rsvd_size);
343 si->lfb_depth = si->red_size + si->green_size +
344 si->blue_size + si->rsvd_size;
345 si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8;
346 } else {
347 si->lfb_depth = 4;
348 si->lfb_linelength = si->lfb_width / 2;
349 si->red_size = 0;
350 si->red_pos = 0;
351 si->green_size = 0;
352 si->green_pos = 0;
353 si->blue_size = 0;
354 si->blue_pos = 0;
355 si->rsvd_size = 0;
356 si->rsvd_pos = 0;
357 }
358
359free_handle:
360 efi_call_phys1(sys_table->boottime->free_pool, gop_handle);
361 return status;
362}
363
364/*
365 * See if we have Universal Graphics Adapter (UGA) protocol
366 */
367static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto,
368 unsigned long size)
369{
370 struct efi_uga_draw_protocol *uga, *first_uga;
371 unsigned long nr_ugas;
372 efi_status_t status;
373 u32 width, height;
374 void **uga_handle = NULL;
375 int i;
376
377 status = efi_call_phys3(sys_table->boottime->allocate_pool,
378 EFI_LOADER_DATA, size, &uga_handle);
379 if (status != EFI_SUCCESS)
380 return status;
381
382 status = efi_call_phys5(sys_table->boottime->locate_handle,
383 EFI_LOCATE_BY_PROTOCOL, uga_proto,
384 NULL, &size, uga_handle);
385 if (status != EFI_SUCCESS)
386 goto free_handle;
387
388 first_uga = NULL;
389
390 nr_ugas = size / sizeof(void *);
391 for (i = 0; i < nr_ugas; i++) {
392 efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
393 void *handle = uga_handle[i];
394 u32 w, h, depth, refresh;
395 void *pciio;
396
397 status = efi_call_phys3(sys_table->boottime->handle_protocol,
398 handle, uga_proto, &uga);
399 if (status != EFI_SUCCESS)
400 continue;
401
402 efi_call_phys3(sys_table->boottime->handle_protocol,
403 handle, &pciio_proto, &pciio);
404
405 status = efi_call_phys5(uga->get_mode, uga, &w, &h,
406 &depth, &refresh);
407 if (status == EFI_SUCCESS && (!first_uga || pciio)) {
408 width = w;
409 height = h;
410
411 /*
412 * Once we've found a UGA supporting PCIIO,
413 * don't bother looking any further.
414 */
415 if (pciio)
416 break;
417
418 first_uga = uga;
419 }
420 }
421
422 if (!first_uga)
423 goto free_handle;
424
425 /* EFI framebuffer */
426 si->orig_video_isVGA = VIDEO_TYPE_EFI;
427
428 si->lfb_depth = 32;
429 si->lfb_width = width;
430 si->lfb_height = height;
431
432 si->red_size = 8;
433 si->red_pos = 16;
434 si->green_size = 8;
435 si->green_pos = 8;
436 si->blue_size = 8;
437 si->blue_pos = 0;
438 si->rsvd_size = 8;
439 si->rsvd_pos = 24;
440
441
442free_handle:
443 efi_call_phys1(sys_table->boottime->free_pool, uga_handle);
444 return status;
445}
446
447void setup_graphics(struct boot_params *boot_params)
448{
449 efi_guid_t graphics_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
450 struct screen_info *si;
451 efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
452 efi_status_t status;
453 unsigned long size;
454 void **gop_handle = NULL;
455 void **uga_handle = NULL;
456
457 si = &boot_params->screen_info;
458 memset(si, 0, sizeof(*si));
459
460 size = 0;
461 status = efi_call_phys5(sys_table->boottime->locate_handle,
462 EFI_LOCATE_BY_PROTOCOL, &graphics_proto,
463 NULL, &size, gop_handle);
464 if (status == EFI_BUFFER_TOO_SMALL)
465 status = setup_gop(si, &graphics_proto, size);
466
467 if (status != EFI_SUCCESS) {
468 size = 0;
469 status = efi_call_phys5(sys_table->boottime->locate_handle,
470 EFI_LOCATE_BY_PROTOCOL, &uga_proto,
471 NULL, &size, uga_handle);
472 if (status == EFI_BUFFER_TOO_SMALL)
473 setup_uga(si, &uga_proto, size);
474 }
475}
476
477struct initrd {
478 efi_file_handle_t *handle;
479 u64 size;
480};
481
482/*
483 * Check the cmdline for a LILO-style initrd= arguments.
484 *
485 * We only support loading an initrd from the same filesystem as the
486 * kernel image.
487 */
488static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
489 struct setup_header *hdr)
490{
491 struct initrd *initrds;
492 unsigned long initrd_addr;
493 efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
494 u64 initrd_total;
495 efi_file_io_interface_t *io;
496 efi_file_handle_t *fh;
497 efi_status_t status;
498 int nr_initrds;
499 char *str;
500 int i, j, k;
501
502 initrd_addr = 0;
503 initrd_total = 0;
504
505 str = (char *)(unsigned long)hdr->cmd_line_ptr;
506
507 j = 0; /* See close_handles */
508
509 if (!str || !*str)
510 return EFI_SUCCESS;
511
512 for (nr_initrds = 0; *str; nr_initrds++) {
513 str = strstr(str, "initrd=");
514 if (!str)
515 break;
516
517 str += 7;
518
519 /* Skip any leading slashes */
520 while (*str == '/' || *str == '\\')
521 str++;
522
523 while (*str && *str != ' ' && *str != '\n')
524 str++;
525 }
526
527 if (!nr_initrds)
528 return EFI_SUCCESS;
529
530 status = efi_call_phys3(sys_table->boottime->allocate_pool,
531 EFI_LOADER_DATA,
532 nr_initrds * sizeof(*initrds),
533 &initrds);
534 if (status != EFI_SUCCESS)
535 goto fail;
536
537 str = (char *)(unsigned long)hdr->cmd_line_ptr;
538 for (i = 0; i < nr_initrds; i++) {
539 struct initrd *initrd;
540 efi_file_handle_t *h;
541 efi_file_info_t *info;
542 efi_char16_t filename_16[256];
543 unsigned long info_sz;
544 efi_guid_t info_guid = EFI_FILE_INFO_ID;
545 efi_char16_t *p;
546 u64 file_sz;
547
548 str = strstr(str, "initrd=");
549 if (!str)
550 break;
551
552 str += 7;
553
554 initrd = &initrds[i];
555 p = filename_16;
556
557 /* Skip any leading slashes */
558 while (*str == '/' || *str == '\\')
559 str++;
560
561 while (*str && *str != ' ' && *str != '\n') {
562 if ((u8 *)p >= (u8 *)filename_16 + sizeof(filename_16))
563 break;
564
565 *p++ = *str++;
566 }
567
568 *p = '\0';
569
570 /* Only open the volume once. */
571 if (!i) {
572 efi_boot_services_t *boottime;
573
574 boottime = sys_table->boottime;
575
576 status = efi_call_phys3(boottime->handle_protocol,
577 image->device_handle, &fs_proto, &io);
578 if (status != EFI_SUCCESS)
579 goto free_initrds;
580
581 status = efi_call_phys2(io->open_volume, io, &fh);
582 if (status != EFI_SUCCESS)
583 goto free_initrds;
584 }
585
586 status = efi_call_phys5(fh->open, fh, &h, filename_16,
587 EFI_FILE_MODE_READ, (u64)0);
588 if (status != EFI_SUCCESS)
589 goto close_handles;
590
591 initrd->handle = h;
592
593 info_sz = 0;
594 status = efi_call_phys4(h->get_info, h, &info_guid,
595 &info_sz, NULL);
596 if (status != EFI_BUFFER_TOO_SMALL)
597 goto close_handles;
598
599grow:
600 status = efi_call_phys3(sys_table->boottime->allocate_pool,
601 EFI_LOADER_DATA, info_sz, &info);
602 if (status != EFI_SUCCESS)
603 goto close_handles;
604
605 status = efi_call_phys4(h->get_info, h, &info_guid,
606 &info_sz, info);
607 if (status == EFI_BUFFER_TOO_SMALL) {
608 efi_call_phys1(sys_table->boottime->free_pool, info);
609 goto grow;
610 }
611
612 file_sz = info->file_size;
613 efi_call_phys1(sys_table->boottime->free_pool, info);
614
615 if (status != EFI_SUCCESS)
616 goto close_handles;
617
618 initrd->size = file_sz;
619 initrd_total += file_sz;
620 }
621
622 if (initrd_total) {
623 unsigned long addr;
624
625 /*
626 * Multiple initrd's need to be at consecutive
627 * addresses in memory, so allocate enough memory for
628 * all the initrd's.
629 */
630 status = high_alloc(initrd_total, 0x1000,
631 &initrd_addr, hdr->initrd_addr_max);
632 if (status != EFI_SUCCESS)
633 goto close_handles;
634
635 /* We've run out of free low memory. */
636 if (initrd_addr > hdr->initrd_addr_max) {
637 status = EFI_INVALID_PARAMETER;
638 goto free_initrd_total;
639 }
640
641 addr = initrd_addr;
642 for (j = 0; j < nr_initrds; j++) {
643 u64 size;
644
645 size = initrds[j].size;
646 while (size) {
647 u64 chunksize;
648 if (size > EFI_READ_CHUNK_SIZE)
649 chunksize = EFI_READ_CHUNK_SIZE;
650 else
651 chunksize = size;
652 status = efi_call_phys3(fh->read,
653 initrds[j].handle,
654 &chunksize, addr);
655 if (status != EFI_SUCCESS)
656 goto free_initrd_total;
657 addr += chunksize;
658 size -= chunksize;
659 }
660
661 efi_call_phys1(fh->close, initrds[j].handle);
662 }
663
664 }
665
666 efi_call_phys1(sys_table->boottime->free_pool, initrds);
667
668 hdr->ramdisk_image = initrd_addr;
669 hdr->ramdisk_size = initrd_total;
670
671 return status;
672
673free_initrd_total:
674 low_free(initrd_total, initrd_addr);
675
676close_handles:
677 for (k = j; k < nr_initrds; k++)
678 efi_call_phys1(fh->close, initrds[k].handle);
679free_initrds:
680 efi_call_phys1(sys_table->boottime->free_pool, initrds);
681fail:
682 hdr->ramdisk_image = 0;
683 hdr->ramdisk_size = 0;
684
685 return status;
686}
687
688/*
689 * Because the x86 boot code expects to be passed a boot_params we
690 * need to create one ourselves (usually the bootloader would create
691 * one for us).
692 */
693static efi_status_t make_boot_params(struct boot_params *boot_params,
694 efi_loaded_image_t *image,
695 void *handle)
696{
697 struct efi_info *efi = &boot_params->efi_info;
698 struct apm_bios_info *bi = &boot_params->apm_bios_info;
699 struct sys_desc_table *sdt = &boot_params->sys_desc_table;
700 struct e820entry *e820_map = &boot_params->e820_map[0];
701 struct e820entry *prev = NULL;
702 struct setup_header *hdr = &boot_params->hdr;
703 unsigned long size, key, desc_size, _size;
704 efi_memory_desc_t *mem_map;
705 void *options = image->load_options;
706 u32 load_options_size = image->load_options_size / 2; /* ASCII */
707 int options_size = 0;
708 efi_status_t status;
709 __u32 desc_version;
710 unsigned long cmdline;
711 u8 nr_entries;
712 u16 *s2;
713 u8 *s1;
714 int i;
715
716 hdr->type_of_loader = 0x21;
717
718 /* Convert unicode cmdline to ascii */
719 cmdline = 0;
720 s2 = (u16 *)options;
721
722 if (s2) {
723 while (*s2 && *s2 != '\n' && options_size < load_options_size) {
724 s2++;
725 options_size++;
726 }
727
728 if (options_size) {
729 if (options_size > hdr->cmdline_size)
730 options_size = hdr->cmdline_size;
731
732 options_size++; /* NUL termination */
733
734 status = low_alloc(options_size, 1, &cmdline);
735 if (status != EFI_SUCCESS)
736 goto fail;
737
738 s1 = (u8 *)(unsigned long)cmdline;
739 s2 = (u16 *)options;
740
741 for (i = 0; i < options_size - 1; i++)
742 *s1++ = *s2++;
743
744 *s1 = '\0';
745 }
746 }
747
748 hdr->cmd_line_ptr = cmdline;
749
750 hdr->ramdisk_image = 0;
751 hdr->ramdisk_size = 0;
752
753 status = handle_ramdisks(image, hdr);
754 if (status != EFI_SUCCESS)
755 goto free_cmdline;
756
757 setup_graphics(boot_params);
758
759 /* Clear APM BIOS info */
760 memset(bi, 0, sizeof(*bi));
761
762 memset(sdt, 0, sizeof(*sdt));
763
764 memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32));
765
766 size = sizeof(*mem_map) * 32;
767
768again:
769 size += sizeof(*mem_map);
770 _size = size;
771 status = low_alloc(size, 1, (unsigned long *)&mem_map);
772 if (status != EFI_SUCCESS)
773 goto free_cmdline;
774
775 status = efi_call_phys5(sys_table->boottime->get_memory_map, &size,
776 mem_map, &key, &desc_size, &desc_version);
777 if (status == EFI_BUFFER_TOO_SMALL) {
778 low_free(_size, (unsigned long)mem_map);
779 goto again;
780 }
781
782 if (status != EFI_SUCCESS)
783 goto free_mem_map;
784
785 efi->efi_systab = (unsigned long)sys_table;
786 efi->efi_memdesc_size = desc_size;
787 efi->efi_memdesc_version = desc_version;
788 efi->efi_memmap = (unsigned long)mem_map;
789 efi->efi_memmap_size = size;
790
791#ifdef CONFIG_X86_64
792 efi->efi_systab_hi = (unsigned long)sys_table >> 32;
793 efi->efi_memmap_hi = (unsigned long)mem_map >> 32;
794#endif
795
796 /* Might as well exit boot services now */
797 status = efi_call_phys2(sys_table->boottime->exit_boot_services,
798 handle, key);
799 if (status != EFI_SUCCESS)
800 goto free_mem_map;
801
802 /* Historic? */
803 boot_params->alt_mem_k = 32 * 1024;
804
805 /*
806 * Convert the EFI memory map to E820.
807 */
808 nr_entries = 0;
809 for (i = 0; i < size / desc_size; i++) {
810 efi_memory_desc_t *d;
811 unsigned int e820_type = 0;
812 unsigned long m = (unsigned long)mem_map;
813
814 d = (efi_memory_desc_t *)(m + (i * desc_size));
815 switch (d->type) {
816 case EFI_RESERVED_TYPE:
817 case EFI_RUNTIME_SERVICES_CODE:
818 case EFI_RUNTIME_SERVICES_DATA:
819 case EFI_MEMORY_MAPPED_IO:
820 case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
821 case EFI_PAL_CODE:
822 e820_type = E820_RESERVED;
823 break;
824
825 case EFI_UNUSABLE_MEMORY:
826 e820_type = E820_UNUSABLE;
827 break;
828
829 case EFI_ACPI_RECLAIM_MEMORY:
830 e820_type = E820_ACPI;
831 break;
832
833 case EFI_LOADER_CODE:
834 case EFI_LOADER_DATA:
835 case EFI_BOOT_SERVICES_CODE:
836 case EFI_BOOT_SERVICES_DATA:
837 case EFI_CONVENTIONAL_MEMORY:
838 e820_type = E820_RAM;
839 break;
840
841 case EFI_ACPI_MEMORY_NVS:
842 e820_type = E820_NVS;
843 break;
844
845 default:
846 continue;
847 }
848
849 /* Merge adjacent mappings */
850 if (prev && prev->type == e820_type &&
851 (prev->addr + prev->size) == d->phys_addr)
852 prev->size += d->num_pages << 12;
853 else {
854 e820_map->addr = d->phys_addr;
855 e820_map->size = d->num_pages << 12;
856 e820_map->type = e820_type;
857 prev = e820_map++;
858 nr_entries++;
859 }
860 }
861
862 boot_params->e820_entries = nr_entries;
863
864 return EFI_SUCCESS;
865
866free_mem_map:
867 low_free(_size, (unsigned long)mem_map);
868free_cmdline:
869 if (options_size)
870 low_free(options_size, hdr->cmd_line_ptr);
871fail:
872 return status;
873}
874
875/*
876 * On success we return a pointer to a boot_params structure, and NULL
877 * on failure.
878 */
879struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
880{
881 struct boot_params *boot_params;
882 unsigned long start, nr_pages;
883 struct desc_ptr *gdt, *idt;
884 efi_loaded_image_t *image;
885 struct setup_header *hdr;
886 efi_status_t status;
887 efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
888 struct desc_struct *desc;
889
890 sys_table = _table;
891
892 /* Check if we were booted by the EFI firmware */
893 if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
894 goto fail;
895
896 status = efi_call_phys3(sys_table->boottime->handle_protocol,
897 handle, &proto, (void *)&image);
898 if (status != EFI_SUCCESS)
899 goto fail;
900
901 status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
902 if (status != EFI_SUCCESS)
903 goto fail;
904
905 memset(boot_params, 0x0, 0x4000);
906
907 /* Copy first two sectors to boot_params */
908 memcpy(boot_params, image->image_base, 1024);
909
910 hdr = &boot_params->hdr;
911
912 /*
913 * The EFI firmware loader could have placed the kernel image
914 * anywhere in memory, but the kernel has various restrictions
915 * on the max physical address it can run at. Attempt to move
916 * the kernel to boot_params.pref_address, or as low as
917 * possible.
918 */
919 start = hdr->pref_address;
920 nr_pages = round_up(hdr->init_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
921
922 status = efi_call_phys4(sys_table->boottime->allocate_pages,
923 EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
924 nr_pages, &start);
925 if (status != EFI_SUCCESS) {
926 status = low_alloc(hdr->init_size, hdr->kernel_alignment,
927 &start);
928 if (status != EFI_SUCCESS)
929 goto fail;
930 }
931
932 hdr->code32_start = (__u32)start;
933 hdr->pref_address = (__u64)(unsigned long)image->image_base;
934
935 memcpy((void *)start, image->image_base, image->image_size);
936
937 status = efi_call_phys3(sys_table->boottime->allocate_pool,
938 EFI_LOADER_DATA, sizeof(*gdt),
939 (void **)&gdt);
940 if (status != EFI_SUCCESS)
941 goto fail;
942
943 gdt->size = 0x800;
944 status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
945 if (status != EFI_SUCCESS)
946 goto fail;
947
948 status = efi_call_phys3(sys_table->boottime->allocate_pool,
949 EFI_LOADER_DATA, sizeof(*idt),
950 (void **)&idt);
951 if (status != EFI_SUCCESS)
952 goto fail;
953
954 idt->size = 0;
955 idt->address = 0;
956
957 status = make_boot_params(boot_params, image, handle);
958 if (status != EFI_SUCCESS)
959 goto fail;
960
961 memset((char *)gdt->address, 0x0, gdt->size);
962 desc = (struct desc_struct *)gdt->address;
963
964 /* The first GDT is a dummy and the second is unused. */
965 desc += 2;
966
967 desc->limit0 = 0xffff;
968 desc->base0 = 0x0000;
969 desc->base1 = 0x0000;
970 desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
971 desc->s = DESC_TYPE_CODE_DATA;
972 desc->dpl = 0;
973 desc->p = 1;
974 desc->limit = 0xf;
975 desc->avl = 0;
976 desc->l = 0;
977 desc->d = SEG_OP_SIZE_32BIT;
978 desc->g = SEG_GRANULARITY_4KB;
979 desc->base2 = 0x00;
980
981 desc++;
982 desc->limit0 = 0xffff;
983 desc->base0 = 0x0000;
984 desc->base1 = 0x0000;
985 desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE;
986 desc->s = DESC_TYPE_CODE_DATA;
987 desc->dpl = 0;
988 desc->p = 1;
989 desc->limit = 0xf;
990 desc->avl = 0;
991 desc->l = 0;
992 desc->d = SEG_OP_SIZE_32BIT;
993 desc->g = SEG_GRANULARITY_4KB;
994 desc->base2 = 0x00;
995
996#ifdef CONFIG_X86_64
997 /* Task segment value */
998 desc++;
999 desc->limit0 = 0x0000;
1000 desc->base0 = 0x0000;
1001 desc->base1 = 0x0000;
1002 desc->type = SEG_TYPE_TSS;
1003 desc->s = 0;
1004 desc->dpl = 0;
1005 desc->p = 1;
1006 desc->limit = 0x0;
1007 desc->avl = 0;
1008 desc->l = 0;
1009 desc->d = 0;
1010 desc->g = SEG_GRANULARITY_4KB;
1011 desc->base2 = 0x00;
1012#endif /* CONFIG_X86_64 */
1013
1014 asm volatile ("lidt %0" : : "m" (*idt));
1015 asm volatile ("lgdt %0" : : "m" (*gdt));
1016
1017 asm volatile("cli");
1018
1019 return boot_params;
1020fail:
1021 return NULL;
1022}
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
new file mode 100644
index 000000000000..39251663e65b
--- /dev/null
+++ b/arch/x86/boot/compressed/eboot.h
@@ -0,0 +1,61 @@
1#ifndef BOOT_COMPRESSED_EBOOT_H
2#define BOOT_COMPRESSED_EBOOT_H
3
4#define SEG_TYPE_DATA (0 << 3)
5#define SEG_TYPE_READ_WRITE (1 << 1)
6#define SEG_TYPE_CODE (1 << 3)
7#define SEG_TYPE_EXEC_READ (1 << 1)
8#define SEG_TYPE_TSS ((1 << 3) | (1 << 0))
9#define SEG_OP_SIZE_32BIT (1 << 0)
10#define SEG_GRANULARITY_4KB (1 << 0)
11
12#define DESC_TYPE_CODE_DATA (1 << 0)
13
14#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT)
15#define EFI_READ_CHUNK_SIZE (1024 * 1024)
16
17#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
18#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
19#define PIXEL_BIT_MASK 2
20#define PIXEL_BLT_ONLY 3
21#define PIXEL_FORMAT_MAX 4
22
23struct efi_pixel_bitmask {
24 u32 red_mask;
25 u32 green_mask;
26 u32 blue_mask;
27 u32 reserved_mask;
28};
29
30struct efi_graphics_output_mode_info {
31 u32 version;
32 u32 horizontal_resolution;
33 u32 vertical_resolution;
34 int pixel_format;
35 struct efi_pixel_bitmask pixel_information;
36 u32 pixels_per_scan_line;
37} __packed;
38
39struct efi_graphics_output_protocol_mode {
40 u32 max_mode;
41 u32 mode;
42 unsigned long info;
43 unsigned long size_of_info;
44 u64 frame_buffer_base;
45 unsigned long frame_buffer_size;
46} __packed;
47
48struct efi_graphics_output_protocol {
49 void *query_mode;
50 unsigned long set_mode;
51 unsigned long blt;
52 struct efi_graphics_output_protocol_mode *mode;
53};
54
55struct efi_uga_draw_protocol {
56 void *get_mode;
57 void *set_mode;
58 void *blt;
59};
60
61#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S
new file mode 100644
index 000000000000..a53440e81d52
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_stub_32.S
@@ -0,0 +1,86 @@
1/*
2 * EFI call stub for IA32.
3 *
4 * This stub allows us to make EFI calls in physical mode with interrupts
5 * turned off. Note that this implementation is different from the one in
6 * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
7 * mode at this point.
8 */
9
10#include <linux/linkage.h>
11#include <asm/page_types.h>
12
13/*
14 * efi_call_phys(void *, ...) is a function with variable parameters.
15 * All the callers of this function assure that all the parameters are 4-bytes.
16 */
17
18/*
19 * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
20 * So we'd better save all of them at the beginning of this function and restore
21 * at the end no matter how many we use, because we can not assure EFI runtime
22 * service functions will comply with gcc calling convention, too.
23 */
24
25.text
26ENTRY(efi_call_phys)
27 /*
28 * 0. The function can only be called in Linux kernel. So CS has been
29 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
30 * the values of these registers are the same. And, the corresponding
31 * GDT entries are identical. So I will do nothing about segment reg
32 * and GDT, but change GDT base register in prelog and epilog.
33 */
34
35 /*
36 * 1. Because we haven't been relocated by this point we need to
37 * use relative addressing.
38 */
39 call 1f
401: popl %edx
41 subl $1b, %edx
42
43 /*
44 * 2. Now on the top of stack is the return
45 * address in the caller of efi_call_phys(), then parameter 1,
46 * parameter 2, ..., param n. To make things easy, we save the return
47 * address of efi_call_phys in a global variable.
48 */
49 popl %ecx
50 movl %ecx, saved_return_addr(%edx)
51 /* get the function pointer into ECX*/
52 popl %ecx
53 movl %ecx, efi_rt_function_ptr(%edx)
54
55 /*
56 * 3. Call the physical function.
57 */
58 call *%ecx
59
60 /*
61 * 4. Balance the stack. And because EAX contain the return value,
62 * we'd better not clobber it. We need to calculate our address
63 * again because %ecx and %edx are not preserved across EFI function
64 * calls.
65 */
66 call 1f
671: popl %edx
68 subl $1b, %edx
69
70 movl efi_rt_function_ptr(%edx), %ecx
71 pushl %ecx
72
73 /*
74 * 10. Push the saved return address onto the stack and return.
75 */
76 movl saved_return_addr(%edx), %ecx
77 pushl %ecx
78 ret
79ENDPROC(efi_call_phys)
80.previous
81
82.data
83saved_return_addr:
84 .long 0
85efi_rt_function_ptr:
86 .long 0
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
new file mode 100644
index 000000000000..cedc60de86eb
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_stub_64.S
@@ -0,0 +1 @@
#include "../../platform/efi/efi_stub_64.S"
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 67a655a39ce4..a0559930a180 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -32,6 +32,28 @@
32 32
33 __HEAD 33 __HEAD
34ENTRY(startup_32) 34ENTRY(startup_32)
35#ifdef CONFIG_EFI_STUB
36 /*
37 * We don't need the return address, so set up the stack so
38 * efi_main() can find its arugments.
39 */
40 add $0x4, %esp
41
42 call efi_main
43 cmpl $0, %eax
44 je preferred_addr
45 movl %eax, %esi
46 call 1f
471:
48 popl %eax
49 subl $1b, %eax
50 subl BP_pref_address(%esi), %eax
51 add BP_code32_start(%esi), %eax
52 leal preferred_addr(%eax), %eax
53 jmp *%eax
54
55preferred_addr:
56#endif
35 cld 57 cld
36 /* 58 /*
37 * Test KEEP_SEGMENTS flag to see if the bootloader is asking 59 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 35af09d13dc1..558d76ce23bc 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -199,6 +199,26 @@ ENTRY(startup_64)
199 * an identity mapped page table being provied that maps our 199 * an identity mapped page table being provied that maps our
200 * entire text+data+bss and hopefully all of memory. 200 * entire text+data+bss and hopefully all of memory.
201 */ 201 */
202#ifdef CONFIG_EFI_STUB
203 pushq %rsi
204 mov %rcx, %rdi
205 mov %rdx, %rsi
206 call efi_main
207 popq %rsi
208 cmpq $0,%rax
209 je preferred_addr
210 movq %rax,%rsi
211 call 1f
2121:
213 popq %rax
214 subq $1b, %rax
215 subq BP_pref_address(%rsi), %rax
216 add BP_code32_start(%esi), %eax
217 leaq preferred_addr(%rax), %rax
218 jmp *%rax
219
220preferred_addr:
221#endif
202 222
203 /* Setup data segments. */ 223 /* Setup data segments. */
204 xorl %eax, %eax 224 xorl %eax, %eax
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 3a19d04cebeb..7116dcba0c9e 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -321,6 +321,8 @@ static void parse_elf(void *output)
321 default: /* Ignore other PT_* */ break; 321 default: /* Ignore other PT_* */ break;
322 } 322 }
323 } 323 }
324
325 free(phdrs);
324} 326}
325 327
326asmlinkage void decompress_kernel(void *rmode, memptr heap, 328asmlinkage void decompress_kernel(void *rmode, memptr heap,
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 46a823882437..958a641483dd 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -29,14 +29,7 @@
29#include <stdio.h> 29#include <stdio.h>
30#include <string.h> 30#include <string.h>
31#include <inttypes.h> 31#include <inttypes.h>
32 32#include <tools/le_byteshift.h>
33static uint32_t getle32(const void *p)
34{
35 const uint8_t *cp = p;
36
37 return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) +
38 ((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24);
39}
40 33
41int main(int argc, char *argv[]) 34int main(int argc, char *argv[])
42{ 35{
@@ -69,7 +62,7 @@ int main(int argc, char *argv[])
69 } 62 }
70 63
71 ilen = ftell(f); 64 ilen = ftell(f);
72 olen = getle32(&olen); 65 olen = get_unaligned_le32(&olen);
73 fclose(f); 66 fclose(f);
74 67
75 /* 68 /*
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index 89bbf4e4d05d..d3c0b0277666 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -10,6 +10,7 @@
10#define USE_BSD 10#define USE_BSD
11#include <endian.h> 11#include <endian.h>
12#include <regex.h> 12#include <regex.h>
13#include <tools/le_byteshift.h>
13 14
14static void die(char *fmt, ...); 15static void die(char *fmt, ...);
15 16
@@ -605,10 +606,7 @@ static void emit_relocs(int as_text)
605 fwrite("\0\0\0\0", 4, 1, stdout); 606 fwrite("\0\0\0\0", 4, 1, stdout);
606 /* Now print each relocation */ 607 /* Now print each relocation */
607 for (i = 0; i < reloc_count; i++) { 608 for (i = 0; i < reloc_count; i++) {
608 buf[0] = (relocs[i] >> 0) & 0xff; 609 put_unaligned_le32(relocs[i], buf);
609 buf[1] = (relocs[i] >> 8) & 0xff;
610 buf[2] = (relocs[i] >> 16) & 0xff;
611 buf[3] = (relocs[i] >> 24) & 0xff;
612 fwrite(buf, 4, 1, stdout); 610 fwrite(buf, 4, 1, stdout);
613 } 611 }
614 } 612 }
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
index 19b3e693cd72..ffb9c5c9d748 100644
--- a/arch/x86/boot/compressed/string.c
+++ b/arch/x86/boot/compressed/string.c
@@ -1,2 +1,11 @@
1#include "misc.h" 1#include "misc.h"
2
3int memcmp(const void *s1, const void *s2, size_t len)
4{
5 u8 diff;
6 asm("repe; cmpsb; setnz %0"
7 : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
8 return diff;
9}
10
2#include "../string.c" 11#include "../string.c"
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index bdb4d458ec8c..f1bbeeb09148 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -45,6 +45,11 @@ SYSSEG = 0x1000 /* historical load address >> 4 */
45 45
46 .global bootsect_start 46 .global bootsect_start
47bootsect_start: 47bootsect_start:
48#ifdef CONFIG_EFI_STUB
49 # "MZ", MS-DOS header
50 .byte 0x4d
51 .byte 0x5a
52#endif
48 53
49 # Normalize the start address 54 # Normalize the start address
50 ljmp $BOOTSEG, $start2 55 ljmp $BOOTSEG, $start2
@@ -79,6 +84,14 @@ bs_die:
79 # invoke the BIOS reset code... 84 # invoke the BIOS reset code...
80 ljmp $0xf000,$0xfff0 85 ljmp $0xf000,$0xfff0
81 86
87#ifdef CONFIG_EFI_STUB
88 .org 0x3c
89 #
90 # Offset to the PE header.
91 #
92 .long pe_header
93#endif /* CONFIG_EFI_STUB */
94
82 .section ".bsdata", "a" 95 .section ".bsdata", "a"
83bugger_off_msg: 96bugger_off_msg:
84 .ascii "Direct booting from floppy is no longer supported.\r\n" 97 .ascii "Direct booting from floppy is no longer supported.\r\n"
@@ -87,6 +100,141 @@ bugger_off_msg:
87 .ascii "Remove disk and press any key to reboot . . .\r\n" 100 .ascii "Remove disk and press any key to reboot . . .\r\n"
88 .byte 0 101 .byte 0
89 102
103#ifdef CONFIG_EFI_STUB
104pe_header:
105 .ascii "PE"
106 .word 0
107
108coff_header:
109#ifdef CONFIG_X86_32
110 .word 0x14c # i386
111#else
112 .word 0x8664 # x86-64
113#endif
114 .word 2 # nr_sections
115 .long 0 # TimeDateStamp
116 .long 0 # PointerToSymbolTable
117 .long 1 # NumberOfSymbols
118 .word section_table - optional_header # SizeOfOptionalHeader
119#ifdef CONFIG_X86_32
120 .word 0x306 # Characteristics.
121 # IMAGE_FILE_32BIT_MACHINE |
122 # IMAGE_FILE_DEBUG_STRIPPED |
123 # IMAGE_FILE_EXECUTABLE_IMAGE |
124 # IMAGE_FILE_LINE_NUMS_STRIPPED
125#else
126 .word 0x206 # Characteristics
127 # IMAGE_FILE_DEBUG_STRIPPED |
128 # IMAGE_FILE_EXECUTABLE_IMAGE |
129 # IMAGE_FILE_LINE_NUMS_STRIPPED
130#endif
131
132optional_header:
133#ifdef CONFIG_X86_32
134 .word 0x10b # PE32 format
135#else
136 .word 0x20b # PE32+ format
137#endif
138 .byte 0x02 # MajorLinkerVersion
139 .byte 0x14 # MinorLinkerVersion
140
141 # Filled in by build.c
142 .long 0 # SizeOfCode
143
144 .long 0 # SizeOfInitializedData
145 .long 0 # SizeOfUninitializedData
146
147 # Filled in by build.c
148 .long 0x0000 # AddressOfEntryPoint
149
150 .long 0x0000 # BaseOfCode
151#ifdef CONFIG_X86_32
152 .long 0 # data
153#endif
154
155extra_header_fields:
156#ifdef CONFIG_X86_32
157 .long 0 # ImageBase
158#else
159 .quad 0 # ImageBase
160#endif
161 .long 0x1000 # SectionAlignment
162 .long 0x200 # FileAlignment
163 .word 0 # MajorOperatingSystemVersion
164 .word 0 # MinorOperatingSystemVersion
165 .word 0 # MajorImageVersion
166 .word 0 # MinorImageVersion
167 .word 0 # MajorSubsystemVersion
168 .word 0 # MinorSubsystemVersion
169 .long 0 # Win32VersionValue
170
171 #
172 # The size of the bzImage is written in tools/build.c
173 #
174 .long 0 # SizeOfImage
175
176 .long 0x200 # SizeOfHeaders
177 .long 0 # CheckSum
178 .word 0xa # Subsystem (EFI application)
179 .word 0 # DllCharacteristics
180#ifdef CONFIG_X86_32
181 .long 0 # SizeOfStackReserve
182 .long 0 # SizeOfStackCommit
183 .long 0 # SizeOfHeapReserve
184 .long 0 # SizeOfHeapCommit
185#else
186 .quad 0 # SizeOfStackReserve
187 .quad 0 # SizeOfStackCommit
188 .quad 0 # SizeOfHeapReserve
189 .quad 0 # SizeOfHeapCommit
190#endif
191 .long 0 # LoaderFlags
192 .long 0x1 # NumberOfRvaAndSizes
193
194 .quad 0 # ExportTable
195 .quad 0 # ImportTable
196 .quad 0 # ResourceTable
197 .quad 0 # ExceptionTable
198 .quad 0 # CertificationTable
199 .quad 0 # BaseRelocationTable
200
201 # Section table
202section_table:
203 .ascii ".text"
204 .byte 0
205 .byte 0
206 .byte 0
207 .long 0
208 .long 0x0 # startup_{32,64}
209 .long 0 # Size of initialized data
210 # on disk
211 .long 0x0 # startup_{32,64}
212 .long 0 # PointerToRelocations
213 .long 0 # PointerToLineNumbers
214 .word 0 # NumberOfRelocations
215 .word 0 # NumberOfLineNumbers
216 .long 0x60500020 # Characteristics (section flags)
217
218 #
219 # The EFI application loader requires a relocation section
220 # because EFI applications are relocatable and not having
221 # this section seems to confuse it. But since we don't need
222 # the loader to fixup any relocs for us just fill it with a
223 # single dummy reloc.
224 #
225 .ascii ".reloc"
226 .byte 0
227 .byte 0
228 .long reloc_end - reloc_start
229 .long reloc_start
230 .long reloc_end - reloc_start # SizeOfRawData
231 .long reloc_start # PointerToRawData
232 .long 0 # PointerToRelocations
233 .long 0 # PointerToLineNumbers
234 .word 0 # NumberOfRelocations
235 .word 0 # NumberOfLineNumbers
236 .long 0x42100040 # Characteristics (section flags)
237#endif /* CONFIG_EFI_STUB */
90 238
91 # Kernel attributes; used by setup. This is part 1 of the 239 # Kernel attributes; used by setup. This is part 1 of the
92 # header, from the old boot sector. 240 # header, from the old boot sector.
@@ -318,3 +466,13 @@ die:
318setup_corrupt: 466setup_corrupt:
319 .byte 7 467 .byte 7
320 .string "No setup signature found...\n" 468 .string "No setup signature found...\n"
469
470 .data
471dummy: .long 0
472
473 .section .reloc
474reloc_start:
475 .long dummy - reloc_start
476 .long 10
477 .word 0
478reloc_end:
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 3cbc4058dd26..574dedfe2890 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -111,3 +111,38 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas
111 111
112 return result; 112 return result;
113} 113}
114
115/**
116 * strlen - Find the length of a string
117 * @s: The string to be sized
118 */
119size_t strlen(const char *s)
120{
121 const char *sc;
122
123 for (sc = s; *sc != '\0'; ++sc)
124 /* nothing */;
125 return sc - s;
126}
127
128/**
129 * strstr - Find the first substring in a %NUL terminated string
130 * @s1: The string to be searched
131 * @s2: The string to search for
132 */
133char *strstr(const char *s1, const char *s2)
134{
135 size_t l1, l2;
136
137 l2 = strlen(s2);
138 if (!l2)
139 return (char *)s1;
140 l1 = strlen(s1);
141 while (l1 >= l2) {
142 l1--;
143 if (!memcmp(s1, s2, l2))
144 return (char *)s1;
145 s1++;
146 }
147 return NULL;
148}
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index fdc60a0b3c20..ed549767a231 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -29,18 +29,18 @@
29#include <stdarg.h> 29#include <stdarg.h>
30#include <sys/types.h> 30#include <sys/types.h>
31#include <sys/stat.h> 31#include <sys/stat.h>
32#include <sys/sysmacros.h>
33#include <unistd.h> 32#include <unistd.h>
34#include <fcntl.h> 33#include <fcntl.h>
35#include <sys/mman.h> 34#include <sys/mman.h>
36#include <asm/boot.h> 35#include <tools/le_byteshift.h>
37 36
38typedef unsigned char u8; 37typedef unsigned char u8;
39typedef unsigned short u16; 38typedef unsigned short u16;
40typedef unsigned long u32; 39typedef unsigned int u32;
41 40
42#define DEFAULT_MAJOR_ROOT 0 41#define DEFAULT_MAJOR_ROOT 0
43#define DEFAULT_MINOR_ROOT 0 42#define DEFAULT_MINOR_ROOT 0
43#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT)
44 44
45/* Minimal number of setup sectors */ 45/* Minimal number of setup sectors */
46#define SETUP_SECT_MIN 5 46#define SETUP_SECT_MIN 5
@@ -135,6 +135,9 @@ static void usage(void)
135 135
136int main(int argc, char ** argv) 136int main(int argc, char ** argv)
137{ 137{
138#ifdef CONFIG_EFI_STUB
139 unsigned int file_sz, pe_header;
140#endif
138 unsigned int i, sz, setup_sectors; 141 unsigned int i, sz, setup_sectors;
139 int c; 142 int c;
140 u32 sys_size; 143 u32 sys_size;
@@ -156,7 +159,7 @@ int main(int argc, char ** argv)
156 die("read-error on `setup'"); 159 die("read-error on `setup'");
157 if (c < 1024) 160 if (c < 1024)
158 die("The setup must be at least 1024 bytes"); 161 die("The setup must be at least 1024 bytes");
159 if (buf[510] != 0x55 || buf[511] != 0xaa) 162 if (get_unaligned_le16(&buf[510]) != 0xAA55)
160 die("Boot block hasn't got boot flag (0xAA55)"); 163 die("Boot block hasn't got boot flag (0xAA55)");
161 fclose(file); 164 fclose(file);
162 165
@@ -168,8 +171,7 @@ int main(int argc, char ** argv)
168 memset(buf+c, 0, i-c); 171 memset(buf+c, 0, i-c);
169 172
170 /* Set the default root device */ 173 /* Set the default root device */
171 buf[508] = DEFAULT_MINOR_ROOT; 174 put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
172 buf[509] = DEFAULT_MAJOR_ROOT;
173 175
174 fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i); 176 fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i);
175 177
@@ -189,10 +191,44 @@ int main(int argc, char ** argv)
189 191
190 /* Patch the setup code with the appropriate size parameters */ 192 /* Patch the setup code with the appropriate size parameters */
191 buf[0x1f1] = setup_sectors-1; 193 buf[0x1f1] = setup_sectors-1;
192 buf[0x1f4] = sys_size; 194 put_unaligned_le32(sys_size, &buf[0x1f4]);
193 buf[0x1f5] = sys_size >> 8; 195
194 buf[0x1f6] = sys_size >> 16; 196#ifdef CONFIG_EFI_STUB
195 buf[0x1f7] = sys_size >> 24; 197 file_sz = sz + i + ((sys_size * 16) - sz);
198
199 pe_header = get_unaligned_le32(&buf[0x3c]);
200
201 /* Size of code */
202 put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
203
204 /* Size of image */
205 put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
206
207#ifdef CONFIG_X86_32
208 /* Address of entry point */
209 put_unaligned_le32(i, &buf[pe_header + 0x28]);
210
211 /* .text size */
212 put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
213
214 /* .text size of initialised data */
215 put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
216#else
217 /*
218 * Address of entry point. startup_32 is at the beginning and
219 * the 64-bit entry point (startup_64) is always 512 bytes
220 * after.
221 */
222 put_unaligned_le32(i + 512, &buf[pe_header + 0x28]);
223
224 /* .text size */
225 put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
226
227 /* .text size of initialised data */
228 put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
229
230#endif /* CONFIG_X86_32 */
231#endif /* CONFIG_EFI_STUB */
196 232
197 crc = partial_crc32(buf, i, crc); 233 crc = partial_crc32(buf, i, crc);
198 if (fwrite(buf, 1, i, stdout) != i) 234 if (fwrite(buf, 1, i, stdout) != i)
@@ -211,8 +247,9 @@ int main(int argc, char ** argv)
211 } 247 }
212 248
213 /* Write the CRC */ 249 /* Write the CRC */
214 fprintf(stderr, "CRC %lx\n", crc); 250 fprintf(stderr, "CRC %x\n", crc);
215 if (fwrite(&crc, 1, 4, stdout) != 4) 251 put_unaligned_le32(crc, buf);
252 if (fwrite(buf, 1, 4, stdout) != 4)
216 die("Writing CRC failed"); 253 die("Writing CRC failed");
217 254
218 close(fd); 255 close(fd);
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 2bf18059fbea..119db67dcb03 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -15,23 +15,28 @@ CONFIG_CPUSETS=y
15CONFIG_CGROUP_CPUACCT=y 15CONFIG_CGROUP_CPUACCT=y
16CONFIG_RESOURCE_COUNTERS=y 16CONFIG_RESOURCE_COUNTERS=y
17CONFIG_CGROUP_SCHED=y 17CONFIG_CGROUP_SCHED=y
18CONFIG_UTS_NS=y
19CONFIG_IPC_NS=y
20CONFIG_USER_NS=y
21CONFIG_PID_NS=y
22CONFIG_NET_NS=y
23CONFIG_BLK_DEV_INITRD=y 18CONFIG_BLK_DEV_INITRD=y
24CONFIG_KALLSYMS_EXTRA_PASS=y
25# CONFIG_COMPAT_BRK is not set 19# CONFIG_COMPAT_BRK is not set
26CONFIG_PROFILING=y 20CONFIG_PROFILING=y
27CONFIG_KPROBES=y 21CONFIG_KPROBES=y
28CONFIG_MODULES=y 22CONFIG_MODULES=y
29CONFIG_MODULE_UNLOAD=y 23CONFIG_MODULE_UNLOAD=y
30CONFIG_MODULE_FORCE_UNLOAD=y 24CONFIG_MODULE_FORCE_UNLOAD=y
25CONFIG_PARTITION_ADVANCED=y
26CONFIG_OSF_PARTITION=y
27CONFIG_AMIGA_PARTITION=y
28CONFIG_MAC_PARTITION=y
29CONFIG_BSD_DISKLABEL=y
30CONFIG_MINIX_SUBPARTITION=y
31CONFIG_SOLARIS_X86_PARTITION=y
32CONFIG_UNIXWARE_DISKLABEL=y
33CONFIG_SGI_PARTITION=y
34CONFIG_SUN_PARTITION=y
35CONFIG_KARMA_PARTITION=y
36CONFIG_EFI_PARTITION=y
31CONFIG_NO_HZ=y 37CONFIG_NO_HZ=y
32CONFIG_HIGH_RES_TIMERS=y 38CONFIG_HIGH_RES_TIMERS=y
33CONFIG_SMP=y 39CONFIG_SMP=y
34CONFIG_SPARSE_IRQ=y
35CONFIG_X86_GENERIC=y 40CONFIG_X86_GENERIC=y
36CONFIG_HPET_TIMER=y 41CONFIG_HPET_TIMER=y
37CONFIG_SCHED_SMT=y 42CONFIG_SCHED_SMT=y
@@ -51,14 +56,12 @@ CONFIG_HZ_1000=y
51CONFIG_KEXEC=y 56CONFIG_KEXEC=y
52CONFIG_CRASH_DUMP=y 57CONFIG_CRASH_DUMP=y
53# CONFIG_COMPAT_VDSO is not set 58# CONFIG_COMPAT_VDSO is not set
54CONFIG_PM=y 59CONFIG_HIBERNATION=y
55CONFIG_PM_DEBUG=y 60CONFIG_PM_DEBUG=y
56CONFIG_PM_TRACE_RTC=y 61CONFIG_PM_TRACE_RTC=y
57CONFIG_HIBERNATION=y
58CONFIG_ACPI_PROCFS=y 62CONFIG_ACPI_PROCFS=y
59CONFIG_ACPI_DOCK=y 63CONFIG_ACPI_DOCK=y
60CONFIG_CPU_FREQ=y 64CONFIG_CPU_FREQ=y
61CONFIG_CPU_FREQ_DEBUG=y
62# CONFIG_CPU_FREQ_STAT is not set 65# CONFIG_CPU_FREQ_STAT is not set
63CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y 66CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
64CONFIG_CPU_FREQ_GOV_PERFORMANCE=y 67CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
@@ -69,7 +72,6 @@ CONFIG_PCI_MSI=y
69CONFIG_PCCARD=y 72CONFIG_PCCARD=y
70CONFIG_YENTA=y 73CONFIG_YENTA=y
71CONFIG_HOTPLUG_PCI=y 74CONFIG_HOTPLUG_PCI=y
72CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
73CONFIG_BINFMT_MISC=y 75CONFIG_BINFMT_MISC=y
74CONFIG_NET=y 76CONFIG_NET=y
75CONFIG_PACKET=y 77CONFIG_PACKET=y
@@ -120,7 +122,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
120CONFIG_IP_NF_IPTABLES=y 122CONFIG_IP_NF_IPTABLES=y
121CONFIG_IP_NF_FILTER=y 123CONFIG_IP_NF_FILTER=y
122CONFIG_IP_NF_TARGET_REJECT=y 124CONFIG_IP_NF_TARGET_REJECT=y
123CONFIG_IP_NF_TARGET_LOG=y
124CONFIG_IP_NF_TARGET_ULOG=y 125CONFIG_IP_NF_TARGET_ULOG=y
125CONFIG_NF_NAT=y 126CONFIG_NF_NAT=y
126CONFIG_IP_NF_TARGET_MASQUERADE=y 127CONFIG_IP_NF_TARGET_MASQUERADE=y
@@ -128,7 +129,6 @@ CONFIG_IP_NF_MANGLE=y
128CONFIG_NF_CONNTRACK_IPV6=y 129CONFIG_NF_CONNTRACK_IPV6=y
129CONFIG_IP6_NF_IPTABLES=y 130CONFIG_IP6_NF_IPTABLES=y
130CONFIG_IP6_NF_MATCH_IPV6HEADER=y 131CONFIG_IP6_NF_MATCH_IPV6HEADER=y
131CONFIG_IP6_NF_TARGET_LOG=y
132CONFIG_IP6_NF_FILTER=y 132CONFIG_IP6_NF_FILTER=y
133CONFIG_IP6_NF_TARGET_REJECT=y 133CONFIG_IP6_NF_TARGET_REJECT=y
134CONFIG_IP6_NF_MANGLE=y 134CONFIG_IP6_NF_MANGLE=y
@@ -169,25 +169,20 @@ CONFIG_DM_ZERO=y
169CONFIG_MACINTOSH_DRIVERS=y 169CONFIG_MACINTOSH_DRIVERS=y
170CONFIG_MAC_EMUMOUSEBTN=y 170CONFIG_MAC_EMUMOUSEBTN=y
171CONFIG_NETDEVICES=y 171CONFIG_NETDEVICES=y
172CONFIG_NET_ETHERNET=y 172CONFIG_NETCONSOLE=y
173CONFIG_NET_VENDOR_3COM=y 173CONFIG_BNX2=y
174CONFIG_TIGON3=y
174CONFIG_NET_TULIP=y 175CONFIG_NET_TULIP=y
175CONFIG_NET_PCI=y
176CONFIG_FORCEDETH=y
177CONFIG_E100=y 176CONFIG_E100=y
177CONFIG_E1000=y
178CONFIG_E1000E=y
179CONFIG_SKY2=y
178CONFIG_NE2K_PCI=y 180CONFIG_NE2K_PCI=y
181CONFIG_FORCEDETH=y
179CONFIG_8139TOO=y 182CONFIG_8139TOO=y
180# CONFIG_8139TOO_PIO is not set 183# CONFIG_8139TOO_PIO is not set
181CONFIG_E1000=y
182CONFIG_E1000E=y
183CONFIG_R8169=y 184CONFIG_R8169=y
184CONFIG_SKY2=y
185CONFIG_TIGON3=y
186CONFIG_BNX2=y
187CONFIG_TR=y
188CONFIG_NET_PCMCIA=y
189CONFIG_FDDI=y 185CONFIG_FDDI=y
190CONFIG_NETCONSOLE=y
191CONFIG_INPUT_POLLDEV=y 186CONFIG_INPUT_POLLDEV=y
192# CONFIG_INPUT_MOUSEDEV_PSAUX is not set 187# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
193CONFIG_INPUT_EVDEV=y 188CONFIG_INPUT_EVDEV=y
@@ -196,6 +191,7 @@ CONFIG_INPUT_TABLET=y
196CONFIG_INPUT_TOUCHSCREEN=y 191CONFIG_INPUT_TOUCHSCREEN=y
197CONFIG_INPUT_MISC=y 192CONFIG_INPUT_MISC=y
198CONFIG_VT_HW_CONSOLE_BINDING=y 193CONFIG_VT_HW_CONSOLE_BINDING=y
194# CONFIG_LEGACY_PTYS is not set
199CONFIG_SERIAL_NONSTANDARD=y 195CONFIG_SERIAL_NONSTANDARD=y
200CONFIG_SERIAL_8250=y 196CONFIG_SERIAL_8250=y
201CONFIG_SERIAL_8250_CONSOLE=y 197CONFIG_SERIAL_8250_CONSOLE=y
@@ -205,7 +201,6 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
205CONFIG_SERIAL_8250_SHARE_IRQ=y 201CONFIG_SERIAL_8250_SHARE_IRQ=y
206CONFIG_SERIAL_8250_DETECT_IRQ=y 202CONFIG_SERIAL_8250_DETECT_IRQ=y
207CONFIG_SERIAL_8250_RSA=y 203CONFIG_SERIAL_8250_RSA=y
208# CONFIG_LEGACY_PTYS is not set
209CONFIG_HW_RANDOM=y 204CONFIG_HW_RANDOM=y
210CONFIG_NVRAM=y 205CONFIG_NVRAM=y
211CONFIG_HPET=y 206CONFIG_HPET=y
@@ -220,7 +215,6 @@ CONFIG_DRM_I915=y
220CONFIG_FB_MODE_HELPERS=y 215CONFIG_FB_MODE_HELPERS=y
221CONFIG_FB_TILEBLITTING=y 216CONFIG_FB_TILEBLITTING=y
222CONFIG_FB_EFI=y 217CONFIG_FB_EFI=y
223CONFIG_BACKLIGHT_LCD_SUPPORT=y
224# CONFIG_LCD_CLASS_DEVICE is not set 218# CONFIG_LCD_CLASS_DEVICE is not set
225CONFIG_VGACON_SOFT_SCROLLBACK=y 219CONFIG_VGACON_SOFT_SCROLLBACK=y
226CONFIG_LOGO=y 220CONFIG_LOGO=y
@@ -283,7 +277,6 @@ CONFIG_ZISOFS=y
283CONFIG_MSDOS_FS=y 277CONFIG_MSDOS_FS=y
284CONFIG_VFAT_FS=y 278CONFIG_VFAT_FS=y
285CONFIG_PROC_KCORE=y 279CONFIG_PROC_KCORE=y
286CONFIG_TMPFS=y
287CONFIG_TMPFS_POSIX_ACL=y 280CONFIG_TMPFS_POSIX_ACL=y
288CONFIG_HUGETLBFS=y 281CONFIG_HUGETLBFS=y
289CONFIG_NFS_FS=y 282CONFIG_NFS_FS=y
@@ -291,18 +284,6 @@ CONFIG_NFS_V3=y
291CONFIG_NFS_V3_ACL=y 284CONFIG_NFS_V3_ACL=y
292CONFIG_NFS_V4=y 285CONFIG_NFS_V4=y
293CONFIG_ROOT_NFS=y 286CONFIG_ROOT_NFS=y
294CONFIG_PARTITION_ADVANCED=y
295CONFIG_OSF_PARTITION=y
296CONFIG_AMIGA_PARTITION=y
297CONFIG_MAC_PARTITION=y
298CONFIG_BSD_DISKLABEL=y
299CONFIG_MINIX_SUBPARTITION=y
300CONFIG_SOLARIS_X86_PARTITION=y
301CONFIG_UNIXWARE_DISKLABEL=y
302CONFIG_SGI_PARTITION=y
303CONFIG_SUN_PARTITION=y
304CONFIG_KARMA_PARTITION=y
305CONFIG_EFI_PARTITION=y
306CONFIG_NLS_DEFAULT="utf8" 287CONFIG_NLS_DEFAULT="utf8"
307CONFIG_NLS_CODEPAGE_437=y 288CONFIG_NLS_CODEPAGE_437=y
308CONFIG_NLS_ASCII=y 289CONFIG_NLS_ASCII=y
@@ -317,13 +298,12 @@ CONFIG_DEBUG_KERNEL=y
317# CONFIG_SCHED_DEBUG is not set 298# CONFIG_SCHED_DEBUG is not set
318CONFIG_SCHEDSTATS=y 299CONFIG_SCHEDSTATS=y
319CONFIG_TIMER_STATS=y 300CONFIG_TIMER_STATS=y
320# CONFIG_RCU_CPU_STALL_DETECTOR is not set 301CONFIG_DEBUG_STACK_USAGE=y
321CONFIG_SYSCTL_SYSCALL_CHECK=y 302CONFIG_SYSCTL_SYSCALL_CHECK=y
322CONFIG_BLK_DEV_IO_TRACE=y 303CONFIG_BLK_DEV_IO_TRACE=y
323CONFIG_PROVIDE_OHCI1394_DMA_INIT=y 304CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
324CONFIG_EARLY_PRINTK_DBGP=y 305CONFIG_EARLY_PRINTK_DBGP=y
325CONFIG_DEBUG_STACKOVERFLOW=y 306CONFIG_DEBUG_STACKOVERFLOW=y
326CONFIG_DEBUG_STACK_USAGE=y
327# CONFIG_DEBUG_RODATA_TEST is not set 307# CONFIG_DEBUG_RODATA_TEST is not set
328CONFIG_DEBUG_NX_TEST=m 308CONFIG_DEBUG_NX_TEST=m
329CONFIG_DEBUG_BOOT_PARAMS=y 309CONFIG_DEBUG_BOOT_PARAMS=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 058a35b8286c..76eb2903809f 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,4 +1,3 @@
1CONFIG_64BIT=y
2CONFIG_EXPERIMENTAL=y 1CONFIG_EXPERIMENTAL=y
3# CONFIG_LOCALVERSION_AUTO is not set 2# CONFIG_LOCALVERSION_AUTO is not set
4CONFIG_SYSVIPC=y 3CONFIG_SYSVIPC=y
@@ -16,26 +15,29 @@ CONFIG_CPUSETS=y
16CONFIG_CGROUP_CPUACCT=y 15CONFIG_CGROUP_CPUACCT=y
17CONFIG_RESOURCE_COUNTERS=y 16CONFIG_RESOURCE_COUNTERS=y
18CONFIG_CGROUP_SCHED=y 17CONFIG_CGROUP_SCHED=y
19CONFIG_UTS_NS=y
20CONFIG_IPC_NS=y
21CONFIG_USER_NS=y
22CONFIG_PID_NS=y
23CONFIG_NET_NS=y
24CONFIG_BLK_DEV_INITRD=y 18CONFIG_BLK_DEV_INITRD=y
25CONFIG_KALLSYMS_EXTRA_PASS=y
26# CONFIG_COMPAT_BRK is not set 19# CONFIG_COMPAT_BRK is not set
27CONFIG_PROFILING=y 20CONFIG_PROFILING=y
28CONFIG_KPROBES=y 21CONFIG_KPROBES=y
29CONFIG_MODULES=y 22CONFIG_MODULES=y
30CONFIG_MODULE_UNLOAD=y 23CONFIG_MODULE_UNLOAD=y
31CONFIG_MODULE_FORCE_UNLOAD=y 24CONFIG_MODULE_FORCE_UNLOAD=y
25CONFIG_PARTITION_ADVANCED=y
26CONFIG_OSF_PARTITION=y
27CONFIG_AMIGA_PARTITION=y
28CONFIG_MAC_PARTITION=y
29CONFIG_BSD_DISKLABEL=y
30CONFIG_MINIX_SUBPARTITION=y
31CONFIG_SOLARIS_X86_PARTITION=y
32CONFIG_UNIXWARE_DISKLABEL=y
33CONFIG_SGI_PARTITION=y
34CONFIG_SUN_PARTITION=y
35CONFIG_KARMA_PARTITION=y
36CONFIG_EFI_PARTITION=y
32CONFIG_NO_HZ=y 37CONFIG_NO_HZ=y
33CONFIG_HIGH_RES_TIMERS=y 38CONFIG_HIGH_RES_TIMERS=y
34CONFIG_SMP=y 39CONFIG_SMP=y
35CONFIG_SPARSE_IRQ=y
36CONFIG_CALGARY_IOMMU=y 40CONFIG_CALGARY_IOMMU=y
37CONFIG_AMD_IOMMU=y
38CONFIG_AMD_IOMMU_STATS=y
39CONFIG_NR_CPUS=64 41CONFIG_NR_CPUS=64
40CONFIG_SCHED_SMT=y 42CONFIG_SCHED_SMT=y
41CONFIG_PREEMPT_VOLUNTARY=y 43CONFIG_PREEMPT_VOLUNTARY=y
@@ -53,27 +55,22 @@ CONFIG_HZ_1000=y
53CONFIG_KEXEC=y 55CONFIG_KEXEC=y
54CONFIG_CRASH_DUMP=y 56CONFIG_CRASH_DUMP=y
55# CONFIG_COMPAT_VDSO is not set 57# CONFIG_COMPAT_VDSO is not set
56CONFIG_PM=y 58CONFIG_HIBERNATION=y
57CONFIG_PM_DEBUG=y 59CONFIG_PM_DEBUG=y
58CONFIG_PM_TRACE_RTC=y 60CONFIG_PM_TRACE_RTC=y
59CONFIG_HIBERNATION=y
60CONFIG_ACPI_PROCFS=y 61CONFIG_ACPI_PROCFS=y
61CONFIG_ACPI_DOCK=y 62CONFIG_ACPI_DOCK=y
62CONFIG_CPU_FREQ=y 63CONFIG_CPU_FREQ=y
63CONFIG_CPU_FREQ_DEBUG=y
64# CONFIG_CPU_FREQ_STAT is not set 64# CONFIG_CPU_FREQ_STAT is not set
65CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y 65CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
66CONFIG_CPU_FREQ_GOV_PERFORMANCE=y 66CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
67CONFIG_CPU_FREQ_GOV_ONDEMAND=y 67CONFIG_CPU_FREQ_GOV_ONDEMAND=y
68CONFIG_X86_ACPI_CPUFREQ=y 68CONFIG_X86_ACPI_CPUFREQ=y
69CONFIG_PCI_MMCONFIG=y 69CONFIG_PCI_MMCONFIG=y
70CONFIG_INTEL_IOMMU=y
71# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
72CONFIG_PCIEPORTBUS=y 70CONFIG_PCIEPORTBUS=y
73CONFIG_PCCARD=y 71CONFIG_PCCARD=y
74CONFIG_YENTA=y 72CONFIG_YENTA=y
75CONFIG_HOTPLUG_PCI=y 73CONFIG_HOTPLUG_PCI=y
76CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
77CONFIG_BINFMT_MISC=y 74CONFIG_BINFMT_MISC=y
78CONFIG_IA32_EMULATION=y 75CONFIG_IA32_EMULATION=y
79CONFIG_NET=y 76CONFIG_NET=y
@@ -125,7 +122,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
125CONFIG_IP_NF_IPTABLES=y 122CONFIG_IP_NF_IPTABLES=y
126CONFIG_IP_NF_FILTER=y 123CONFIG_IP_NF_FILTER=y
127CONFIG_IP_NF_TARGET_REJECT=y 124CONFIG_IP_NF_TARGET_REJECT=y
128CONFIG_IP_NF_TARGET_LOG=y
129CONFIG_IP_NF_TARGET_ULOG=y 125CONFIG_IP_NF_TARGET_ULOG=y
130CONFIG_NF_NAT=y 126CONFIG_NF_NAT=y
131CONFIG_IP_NF_TARGET_MASQUERADE=y 127CONFIG_IP_NF_TARGET_MASQUERADE=y
@@ -133,7 +129,6 @@ CONFIG_IP_NF_MANGLE=y
133CONFIG_NF_CONNTRACK_IPV6=y 129CONFIG_NF_CONNTRACK_IPV6=y
134CONFIG_IP6_NF_IPTABLES=y 130CONFIG_IP6_NF_IPTABLES=y
135CONFIG_IP6_NF_MATCH_IPV6HEADER=y 131CONFIG_IP6_NF_MATCH_IPV6HEADER=y
136CONFIG_IP6_NF_TARGET_LOG=y
137CONFIG_IP6_NF_FILTER=y 132CONFIG_IP6_NF_FILTER=y
138CONFIG_IP6_NF_TARGET_REJECT=y 133CONFIG_IP6_NF_TARGET_REJECT=y
139CONFIG_IP6_NF_MANGLE=y 134CONFIG_IP6_NF_MANGLE=y
@@ -172,20 +167,15 @@ CONFIG_DM_ZERO=y
172CONFIG_MACINTOSH_DRIVERS=y 167CONFIG_MACINTOSH_DRIVERS=y
173CONFIG_MAC_EMUMOUSEBTN=y 168CONFIG_MAC_EMUMOUSEBTN=y
174CONFIG_NETDEVICES=y 169CONFIG_NETDEVICES=y
175CONFIG_NET_ETHERNET=y 170CONFIG_NETCONSOLE=y
176CONFIG_NET_VENDOR_3COM=y 171CONFIG_TIGON3=y
177CONFIG_NET_TULIP=y 172CONFIG_NET_TULIP=y
178CONFIG_NET_PCI=y
179CONFIG_FORCEDETH=y
180CONFIG_E100=y 173CONFIG_E100=y
181CONFIG_8139TOO=y
182CONFIG_E1000=y 174CONFIG_E1000=y
183CONFIG_SKY2=y 175CONFIG_SKY2=y
184CONFIG_TIGON3=y 176CONFIG_FORCEDETH=y
185CONFIG_TR=y 177CONFIG_8139TOO=y
186CONFIG_NET_PCMCIA=y
187CONFIG_FDDI=y 178CONFIG_FDDI=y
188CONFIG_NETCONSOLE=y
189CONFIG_INPUT_POLLDEV=y 179CONFIG_INPUT_POLLDEV=y
190# CONFIG_INPUT_MOUSEDEV_PSAUX is not set 180# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
191CONFIG_INPUT_EVDEV=y 181CONFIG_INPUT_EVDEV=y
@@ -194,6 +184,7 @@ CONFIG_INPUT_TABLET=y
194CONFIG_INPUT_TOUCHSCREEN=y 184CONFIG_INPUT_TOUCHSCREEN=y
195CONFIG_INPUT_MISC=y 185CONFIG_INPUT_MISC=y
196CONFIG_VT_HW_CONSOLE_BINDING=y 186CONFIG_VT_HW_CONSOLE_BINDING=y
187# CONFIG_LEGACY_PTYS is not set
197CONFIG_SERIAL_NONSTANDARD=y 188CONFIG_SERIAL_NONSTANDARD=y
198CONFIG_SERIAL_8250=y 189CONFIG_SERIAL_8250=y
199CONFIG_SERIAL_8250_CONSOLE=y 190CONFIG_SERIAL_8250_CONSOLE=y
@@ -203,7 +194,6 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
203CONFIG_SERIAL_8250_SHARE_IRQ=y 194CONFIG_SERIAL_8250_SHARE_IRQ=y
204CONFIG_SERIAL_8250_DETECT_IRQ=y 195CONFIG_SERIAL_8250_DETECT_IRQ=y
205CONFIG_SERIAL_8250_RSA=y 196CONFIG_SERIAL_8250_RSA=y
206# CONFIG_LEGACY_PTYS is not set
207CONFIG_HW_RANDOM=y 197CONFIG_HW_RANDOM=y
208# CONFIG_HW_RANDOM_INTEL is not set 198# CONFIG_HW_RANDOM_INTEL is not set
209# CONFIG_HW_RANDOM_AMD is not set 199# CONFIG_HW_RANDOM_AMD is not set
@@ -221,7 +211,6 @@ CONFIG_DRM_I915_KMS=y
221CONFIG_FB_MODE_HELPERS=y 211CONFIG_FB_MODE_HELPERS=y
222CONFIG_FB_TILEBLITTING=y 212CONFIG_FB_TILEBLITTING=y
223CONFIG_FB_EFI=y 213CONFIG_FB_EFI=y
224CONFIG_BACKLIGHT_LCD_SUPPORT=y
225# CONFIG_LCD_CLASS_DEVICE is not set 214# CONFIG_LCD_CLASS_DEVICE is not set
226CONFIG_VGACON_SOFT_SCROLLBACK=y 215CONFIG_VGACON_SOFT_SCROLLBACK=y
227CONFIG_LOGO=y 216CONFIG_LOGO=y
@@ -268,6 +257,10 @@ CONFIG_RTC_CLASS=y
268# CONFIG_RTC_HCTOSYS is not set 257# CONFIG_RTC_HCTOSYS is not set
269CONFIG_DMADEVICES=y 258CONFIG_DMADEVICES=y
270CONFIG_EEEPC_LAPTOP=y 259CONFIG_EEEPC_LAPTOP=y
260CONFIG_AMD_IOMMU=y
261CONFIG_AMD_IOMMU_STATS=y
262CONFIG_INTEL_IOMMU=y
263# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
271CONFIG_EFI_VARS=y 264CONFIG_EFI_VARS=y
272CONFIG_EXT3_FS=y 265CONFIG_EXT3_FS=y
273# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set 266# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
@@ -284,7 +277,6 @@ CONFIG_ZISOFS=y
284CONFIG_MSDOS_FS=y 277CONFIG_MSDOS_FS=y
285CONFIG_VFAT_FS=y 278CONFIG_VFAT_FS=y
286CONFIG_PROC_KCORE=y 279CONFIG_PROC_KCORE=y
287CONFIG_TMPFS=y
288CONFIG_TMPFS_POSIX_ACL=y 280CONFIG_TMPFS_POSIX_ACL=y
289CONFIG_HUGETLBFS=y 281CONFIG_HUGETLBFS=y
290CONFIG_NFS_FS=y 282CONFIG_NFS_FS=y
@@ -292,18 +284,6 @@ CONFIG_NFS_V3=y
292CONFIG_NFS_V3_ACL=y 284CONFIG_NFS_V3_ACL=y
293CONFIG_NFS_V4=y 285CONFIG_NFS_V4=y
294CONFIG_ROOT_NFS=y 286CONFIG_ROOT_NFS=y
295CONFIG_PARTITION_ADVANCED=y
296CONFIG_OSF_PARTITION=y
297CONFIG_AMIGA_PARTITION=y
298CONFIG_MAC_PARTITION=y
299CONFIG_BSD_DISKLABEL=y
300CONFIG_MINIX_SUBPARTITION=y
301CONFIG_SOLARIS_X86_PARTITION=y
302CONFIG_UNIXWARE_DISKLABEL=y
303CONFIG_SGI_PARTITION=y
304CONFIG_SUN_PARTITION=y
305CONFIG_KARMA_PARTITION=y
306CONFIG_EFI_PARTITION=y
307CONFIG_NLS_DEFAULT="utf8" 287CONFIG_NLS_DEFAULT="utf8"
308CONFIG_NLS_CODEPAGE_437=y 288CONFIG_NLS_CODEPAGE_437=y
309CONFIG_NLS_ASCII=y 289CONFIG_NLS_ASCII=y
@@ -317,13 +297,12 @@ CONFIG_DEBUG_KERNEL=y
317# CONFIG_SCHED_DEBUG is not set 297# CONFIG_SCHED_DEBUG is not set
318CONFIG_SCHEDSTATS=y 298CONFIG_SCHEDSTATS=y
319CONFIG_TIMER_STATS=y 299CONFIG_TIMER_STATS=y
320# CONFIG_RCU_CPU_STALL_DETECTOR is not set 300CONFIG_DEBUG_STACK_USAGE=y
321CONFIG_SYSCTL_SYSCALL_CHECK=y 301CONFIG_SYSCTL_SYSCALL_CHECK=y
322CONFIG_BLK_DEV_IO_TRACE=y 302CONFIG_BLK_DEV_IO_TRACE=y
323CONFIG_PROVIDE_OHCI1394_DMA_INIT=y 303CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
324CONFIG_EARLY_PRINTK_DBGP=y 304CONFIG_EARLY_PRINTK_DBGP=y
325CONFIG_DEBUG_STACKOVERFLOW=y 305CONFIG_DEBUG_STACKOVERFLOW=y
326CONFIG_DEBUG_STACK_USAGE=y
327# CONFIG_DEBUG_RODATA_TEST is not set 306# CONFIG_DEBUG_RODATA_TEST is not set
328CONFIG_DEBUG_NX_TEST=m 307CONFIG_DEBUG_NX_TEST=m
329CONFIG_DEBUG_BOOT_PARAMS=y 308CONFIG_DEBUG_BOOT_PARAMS=y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 2b0b9631474b..e191ac048b59 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
8obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o 8obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
9 9
10obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 10obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
11obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
11obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 12obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
12obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 13obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
13obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o 14obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -25,6 +26,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
25serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o 26serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
26 27
27aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 28aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
29camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
28blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 30blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
29twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 31twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
30twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o 32twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 545d0ce59818..c799352e24fc 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -28,6 +28,7 @@
28#include <crypto/aes.h> 28#include <crypto/aes.h>
29#include <crypto/cryptd.h> 29#include <crypto/cryptd.h>
30#include <crypto/ctr.h> 30#include <crypto/ctr.h>
31#include <asm/cpu_device_id.h>
31#include <asm/i387.h> 32#include <asm/i387.h>
32#include <asm/aes.h> 33#include <asm/aes.h>
33#include <crypto/scatterwalk.h> 34#include <crypto/scatterwalk.h>
@@ -1107,12 +1108,12 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
1107 one_entry_in_sg = 1; 1108 one_entry_in_sg = 1;
1108 scatterwalk_start(&src_sg_walk, req->src); 1109 scatterwalk_start(&src_sg_walk, req->src);
1109 scatterwalk_start(&assoc_sg_walk, req->assoc); 1110 scatterwalk_start(&assoc_sg_walk, req->assoc);
1110 src = scatterwalk_map(&src_sg_walk, 0); 1111 src = scatterwalk_map(&src_sg_walk);
1111 assoc = scatterwalk_map(&assoc_sg_walk, 0); 1112 assoc = scatterwalk_map(&assoc_sg_walk);
1112 dst = src; 1113 dst = src;
1113 if (unlikely(req->src != req->dst)) { 1114 if (unlikely(req->src != req->dst)) {
1114 scatterwalk_start(&dst_sg_walk, req->dst); 1115 scatterwalk_start(&dst_sg_walk, req->dst);
1115 dst = scatterwalk_map(&dst_sg_walk, 0); 1116 dst = scatterwalk_map(&dst_sg_walk);
1116 } 1117 }
1117 1118
1118 } else { 1119 } else {
@@ -1136,11 +1137,11 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
1136 * back to the packet. */ 1137 * back to the packet. */
1137 if (one_entry_in_sg) { 1138 if (one_entry_in_sg) {
1138 if (unlikely(req->src != req->dst)) { 1139 if (unlikely(req->src != req->dst)) {
1139 scatterwalk_unmap(dst, 0); 1140 scatterwalk_unmap(dst);
1140 scatterwalk_done(&dst_sg_walk, 0, 0); 1141 scatterwalk_done(&dst_sg_walk, 0, 0);
1141 } 1142 }
1142 scatterwalk_unmap(src, 0); 1143 scatterwalk_unmap(src);
1143 scatterwalk_unmap(assoc, 0); 1144 scatterwalk_unmap(assoc);
1144 scatterwalk_done(&src_sg_walk, 0, 0); 1145 scatterwalk_done(&src_sg_walk, 0, 0);
1145 scatterwalk_done(&assoc_sg_walk, 0, 0); 1146 scatterwalk_done(&assoc_sg_walk, 0, 0);
1146 } else { 1147 } else {
@@ -1189,12 +1190,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
1189 one_entry_in_sg = 1; 1190 one_entry_in_sg = 1;
1190 scatterwalk_start(&src_sg_walk, req->src); 1191 scatterwalk_start(&src_sg_walk, req->src);
1191 scatterwalk_start(&assoc_sg_walk, req->assoc); 1192 scatterwalk_start(&assoc_sg_walk, req->assoc);
1192 src = scatterwalk_map(&src_sg_walk, 0); 1193 src = scatterwalk_map(&src_sg_walk);
1193 assoc = scatterwalk_map(&assoc_sg_walk, 0); 1194 assoc = scatterwalk_map(&assoc_sg_walk);
1194 dst = src; 1195 dst = src;
1195 if (unlikely(req->src != req->dst)) { 1196 if (unlikely(req->src != req->dst)) {
1196 scatterwalk_start(&dst_sg_walk, req->dst); 1197 scatterwalk_start(&dst_sg_walk, req->dst);
1197 dst = scatterwalk_map(&dst_sg_walk, 0); 1198 dst = scatterwalk_map(&dst_sg_walk);
1198 } 1199 }
1199 1200
1200 } else { 1201 } else {
@@ -1219,11 +1220,11 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
1219 1220
1220 if (one_entry_in_sg) { 1221 if (one_entry_in_sg) {
1221 if (unlikely(req->src != req->dst)) { 1222 if (unlikely(req->src != req->dst)) {
1222 scatterwalk_unmap(dst, 0); 1223 scatterwalk_unmap(dst);
1223 scatterwalk_done(&dst_sg_walk, 0, 0); 1224 scatterwalk_done(&dst_sg_walk, 0, 0);
1224 } 1225 }
1225 scatterwalk_unmap(src, 0); 1226 scatterwalk_unmap(src);
1226 scatterwalk_unmap(assoc, 0); 1227 scatterwalk_unmap(assoc);
1227 scatterwalk_done(&src_sg_walk, 0, 0); 1228 scatterwalk_done(&src_sg_walk, 0, 0);
1228 scatterwalk_done(&assoc_sg_walk, 0, 0); 1229 scatterwalk_done(&assoc_sg_walk, 0, 0);
1229 } else { 1230 } else {
@@ -1253,14 +1254,19 @@ static struct crypto_alg __rfc4106_alg = {
1253}; 1254};
1254#endif 1255#endif
1255 1256
1257
1258static const struct x86_cpu_id aesni_cpu_id[] = {
1259 X86_FEATURE_MATCH(X86_FEATURE_AES),
1260 {}
1261};
1262MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
1263
1256static int __init aesni_init(void) 1264static int __init aesni_init(void)
1257{ 1265{
1258 int err; 1266 int err;
1259 1267
1260 if (!cpu_has_aes) { 1268 if (!x86_match_cpu(aesni_cpu_id))
1261 printk(KERN_INFO "Intel AES-NI instructions are not detected.\n");
1262 return -ENODEV; 1269 return -ENODEV;
1263 }
1264 1270
1265 if ((err = crypto_fpu_init())) 1271 if ((err = crypto_fpu_init()))
1266 goto fpu_err; 1272 goto fpu_err;
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index b05aa163d55a..7967474de8f7 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -25,6 +25,7 @@
25 * 25 *
26 */ 26 */
27 27
28#include <asm/processor.h>
28#include <crypto/blowfish.h> 29#include <crypto/blowfish.h>
29#include <linux/crypto.h> 30#include <linux/crypto.h>
30#include <linux/init.h> 31#include <linux/init.h>
@@ -76,27 +77,6 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
76 blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); 77 blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
77} 78}
78 79
79static struct crypto_alg bf_alg = {
80 .cra_name = "blowfish",
81 .cra_driver_name = "blowfish-asm",
82 .cra_priority = 200,
83 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
84 .cra_blocksize = BF_BLOCK_SIZE,
85 .cra_ctxsize = sizeof(struct bf_ctx),
86 .cra_alignmask = 3,
87 .cra_module = THIS_MODULE,
88 .cra_list = LIST_HEAD_INIT(bf_alg.cra_list),
89 .cra_u = {
90 .cipher = {
91 .cia_min_keysize = BF_MIN_KEY_SIZE,
92 .cia_max_keysize = BF_MAX_KEY_SIZE,
93 .cia_setkey = blowfish_setkey,
94 .cia_encrypt = blowfish_encrypt,
95 .cia_decrypt = blowfish_decrypt,
96 }
97 }
98};
99
100static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 80static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
101 void (*fn)(struct bf_ctx *, u8 *, const u8 *), 81 void (*fn)(struct bf_ctx *, u8 *, const u8 *),
102 void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) 82 void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
@@ -160,28 +140,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
160 return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); 140 return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
161} 141}
162 142
163static struct crypto_alg blk_ecb_alg = {
164 .cra_name = "ecb(blowfish)",
165 .cra_driver_name = "ecb-blowfish-asm",
166 .cra_priority = 300,
167 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
168 .cra_blocksize = BF_BLOCK_SIZE,
169 .cra_ctxsize = sizeof(struct bf_ctx),
170 .cra_alignmask = 0,
171 .cra_type = &crypto_blkcipher_type,
172 .cra_module = THIS_MODULE,
173 .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
174 .cra_u = {
175 .blkcipher = {
176 .min_keysize = BF_MIN_KEY_SIZE,
177 .max_keysize = BF_MAX_KEY_SIZE,
178 .setkey = blowfish_setkey,
179 .encrypt = ecb_encrypt,
180 .decrypt = ecb_decrypt,
181 },
182 },
183};
184
185static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 143static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
186 struct blkcipher_walk *walk) 144 struct blkcipher_walk *walk)
187{ 145{
@@ -307,29 +265,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
307 return err; 265 return err;
308} 266}
309 267
310static struct crypto_alg blk_cbc_alg = {
311 .cra_name = "cbc(blowfish)",
312 .cra_driver_name = "cbc-blowfish-asm",
313 .cra_priority = 300,
314 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
315 .cra_blocksize = BF_BLOCK_SIZE,
316 .cra_ctxsize = sizeof(struct bf_ctx),
317 .cra_alignmask = 0,
318 .cra_type = &crypto_blkcipher_type,
319 .cra_module = THIS_MODULE,
320 .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
321 .cra_u = {
322 .blkcipher = {
323 .min_keysize = BF_MIN_KEY_SIZE,
324 .max_keysize = BF_MAX_KEY_SIZE,
325 .ivsize = BF_BLOCK_SIZE,
326 .setkey = blowfish_setkey,
327 .encrypt = cbc_encrypt,
328 .decrypt = cbc_decrypt,
329 },
330 },
331};
332
333static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) 268static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
334{ 269{
335 u8 *ctrblk = walk->iv; 270 u8 *ctrblk = walk->iv;
@@ -423,7 +358,67 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
423 return err; 358 return err;
424} 359}
425 360
426static struct crypto_alg blk_ctr_alg = { 361static struct crypto_alg bf_algs[4] = { {
362 .cra_name = "blowfish",
363 .cra_driver_name = "blowfish-asm",
364 .cra_priority = 200,
365 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
366 .cra_blocksize = BF_BLOCK_SIZE,
367 .cra_ctxsize = sizeof(struct bf_ctx),
368 .cra_alignmask = 0,
369 .cra_module = THIS_MODULE,
370 .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list),
371 .cra_u = {
372 .cipher = {
373 .cia_min_keysize = BF_MIN_KEY_SIZE,
374 .cia_max_keysize = BF_MAX_KEY_SIZE,
375 .cia_setkey = blowfish_setkey,
376 .cia_encrypt = blowfish_encrypt,
377 .cia_decrypt = blowfish_decrypt,
378 }
379 }
380}, {
381 .cra_name = "ecb(blowfish)",
382 .cra_driver_name = "ecb-blowfish-asm",
383 .cra_priority = 300,
384 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
385 .cra_blocksize = BF_BLOCK_SIZE,
386 .cra_ctxsize = sizeof(struct bf_ctx),
387 .cra_alignmask = 0,
388 .cra_type = &crypto_blkcipher_type,
389 .cra_module = THIS_MODULE,
390 .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list),
391 .cra_u = {
392 .blkcipher = {
393 .min_keysize = BF_MIN_KEY_SIZE,
394 .max_keysize = BF_MAX_KEY_SIZE,
395 .setkey = blowfish_setkey,
396 .encrypt = ecb_encrypt,
397 .decrypt = ecb_decrypt,
398 },
399 },
400}, {
401 .cra_name = "cbc(blowfish)",
402 .cra_driver_name = "cbc-blowfish-asm",
403 .cra_priority = 300,
404 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
405 .cra_blocksize = BF_BLOCK_SIZE,
406 .cra_ctxsize = sizeof(struct bf_ctx),
407 .cra_alignmask = 0,
408 .cra_type = &crypto_blkcipher_type,
409 .cra_module = THIS_MODULE,
410 .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list),
411 .cra_u = {
412 .blkcipher = {
413 .min_keysize = BF_MIN_KEY_SIZE,
414 .max_keysize = BF_MAX_KEY_SIZE,
415 .ivsize = BF_BLOCK_SIZE,
416 .setkey = blowfish_setkey,
417 .encrypt = cbc_encrypt,
418 .decrypt = cbc_decrypt,
419 },
420 },
421}, {
427 .cra_name = "ctr(blowfish)", 422 .cra_name = "ctr(blowfish)",
428 .cra_driver_name = "ctr-blowfish-asm", 423 .cra_driver_name = "ctr-blowfish-asm",
429 .cra_priority = 300, 424 .cra_priority = 300,
@@ -433,7 +428,7 @@ static struct crypto_alg blk_ctr_alg = {
433 .cra_alignmask = 0, 428 .cra_alignmask = 0,
434 .cra_type = &crypto_blkcipher_type, 429 .cra_type = &crypto_blkcipher_type,
435 .cra_module = THIS_MODULE, 430 .cra_module = THIS_MODULE,
436 .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), 431 .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list),
437 .cra_u = { 432 .cra_u = {
438 .blkcipher = { 433 .blkcipher = {
439 .min_keysize = BF_MIN_KEY_SIZE, 434 .min_keysize = BF_MIN_KEY_SIZE,
@@ -444,43 +439,45 @@ static struct crypto_alg blk_ctr_alg = {
444 .decrypt = ctr_crypt, 439 .decrypt = ctr_crypt,
445 }, 440 },
446 }, 441 },
447}; 442} };
443
444static bool is_blacklisted_cpu(void)
445{
446 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
447 return false;
448
449 if (boot_cpu_data.x86 == 0x0f) {
450 /*
451 * On Pentium 4, blowfish-x86_64 is slower than generic C
452 * implementation because use of 64bit rotates (which are really
453 * slow on P4). Therefore blacklist P4s.
454 */
455 return true;
456 }
457
458 return false;
459}
460
461static int force;
462module_param(force, int, 0);
463MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
448 464
449static int __init init(void) 465static int __init init(void)
450{ 466{
451 int err; 467 if (!force && is_blacklisted_cpu()) {
468 printk(KERN_INFO
469 "blowfish-x86_64: performance on this CPU "
470 "would be suboptimal: disabling "
471 "blowfish-x86_64.\n");
472 return -ENODEV;
473 }
452 474
453 err = crypto_register_alg(&bf_alg); 475 return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
454 if (err)
455 goto bf_err;
456 err = crypto_register_alg(&blk_ecb_alg);
457 if (err)
458 goto ecb_err;
459 err = crypto_register_alg(&blk_cbc_alg);
460 if (err)
461 goto cbc_err;
462 err = crypto_register_alg(&blk_ctr_alg);
463 if (err)
464 goto ctr_err;
465
466 return 0;
467
468ctr_err:
469 crypto_unregister_alg(&blk_cbc_alg);
470cbc_err:
471 crypto_unregister_alg(&blk_ecb_alg);
472ecb_err:
473 crypto_unregister_alg(&bf_alg);
474bf_err:
475 return err;
476} 476}
477 477
478static void __exit fini(void) 478static void __exit fini(void)
479{ 479{
480 crypto_unregister_alg(&blk_ctr_alg); 480 crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
481 crypto_unregister_alg(&blk_cbc_alg);
482 crypto_unregister_alg(&blk_ecb_alg);
483 crypto_unregister_alg(&bf_alg);
484} 481}
485 482
486module_init(init); 483module_init(init);
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
new file mode 100644
index 000000000000..0b3374335fdc
--- /dev/null
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -0,0 +1,520 @@
1/*
2 * Camellia Cipher Algorithm (x86_64)
3 *
4 * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
19 * USA
20 *
21 */
22
23.file "camellia-x86_64-asm_64.S"
24.text
25
26.extern camellia_sp10011110;
27.extern camellia_sp22000222;
28.extern camellia_sp03303033;
29.extern camellia_sp00444404;
30.extern camellia_sp02220222;
31.extern camellia_sp30333033;
32.extern camellia_sp44044404;
33.extern camellia_sp11101110;
34
35#define sp10011110 camellia_sp10011110
36#define sp22000222 camellia_sp22000222
37#define sp03303033 camellia_sp03303033
38#define sp00444404 camellia_sp00444404
39#define sp02220222 camellia_sp02220222
40#define sp30333033 camellia_sp30333033
41#define sp44044404 camellia_sp44044404
42#define sp11101110 camellia_sp11101110
43
44#define CAMELLIA_TABLE_BYTE_LEN 272
45
46/* struct camellia_ctx: */
47#define key_table 0
48#define key_length CAMELLIA_TABLE_BYTE_LEN
49
50/* register macros */
51#define CTX %rdi
52#define RIO %rsi
53#define RIOd %esi
54
55#define RAB0 %rax
56#define RCD0 %rcx
57#define RAB1 %rbx
58#define RCD1 %rdx
59
60#define RAB0d %eax
61#define RCD0d %ecx
62#define RAB1d %ebx
63#define RCD1d %edx
64
65#define RAB0bl %al
66#define RCD0bl %cl
67#define RAB1bl %bl
68#define RCD1bl %dl
69
70#define RAB0bh %ah
71#define RCD0bh %ch
72#define RAB1bh %bh
73#define RCD1bh %dh
74
75#define RT0 %rsi
76#define RT1 %rbp
77#define RT2 %r8
78
79#define RT0d %esi
80#define RT1d %ebp
81#define RT2d %r8d
82
83#define RT2bl %r8b
84
85#define RXOR %r9
86#define RRBP %r10
87#define RDST %r11
88
89#define RXORd %r9d
90#define RXORbl %r9b
91
92#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
93 movzbl ab ## bl, tmp2 ## d; \
94 movzbl ab ## bh, tmp1 ## d; \
95 rorq $16, ab; \
96 xorq T0(, tmp2, 8), dst; \
97 xorq T1(, tmp1, 8), dst;
98
99/**********************************************************************
100 1-way camellia
101 **********************************************************************/
102#define roundsm(ab, subkey, cd) \
103 movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
104 \
105 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
106 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
107 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
108 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
109 \
110 xorq RT2, cd ## 0;
111
112#define fls(l, r, kl, kr) \
113 movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
114 andl l ## 0d, RT0d; \
115 roll $1, RT0d; \
116 shlq $32, RT0; \
117 xorq RT0, l ## 0; \
118 movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
119 orq r ## 0, RT1; \
120 shrq $32, RT1; \
121 xorq RT1, r ## 0; \
122 \
123 movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
124 orq l ## 0, RT2; \
125 shrq $32, RT2; \
126 xorq RT2, l ## 0; \
127 movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
128 andl r ## 0d, RT0d; \
129 roll $1, RT0d; \
130 shlq $32, RT0; \
131 xorq RT0, r ## 0;
132
133#define enc_rounds(i) \
134 roundsm(RAB, i + 2, RCD); \
135 roundsm(RCD, i + 3, RAB); \
136 roundsm(RAB, i + 4, RCD); \
137 roundsm(RCD, i + 5, RAB); \
138 roundsm(RAB, i + 6, RCD); \
139 roundsm(RCD, i + 7, RAB);
140
141#define enc_fls(i) \
142 fls(RAB, RCD, i + 0, i + 1);
143
144#define enc_inpack() \
145 movq (RIO), RAB0; \
146 bswapq RAB0; \
147 rolq $32, RAB0; \
148 movq 4*2(RIO), RCD0; \
149 bswapq RCD0; \
150 rorq $32, RCD0; \
151 xorq key_table(CTX), RAB0;
152
153#define enc_outunpack(op, max) \
154 xorq key_table(CTX, max, 8), RCD0; \
155 rorq $32, RCD0; \
156 bswapq RCD0; \
157 op ## q RCD0, (RIO); \
158 rolq $32, RAB0; \
159 bswapq RAB0; \
160 op ## q RAB0, 4*2(RIO);
161
162#define dec_rounds(i) \
163 roundsm(RAB, i + 7, RCD); \
164 roundsm(RCD, i + 6, RAB); \
165 roundsm(RAB, i + 5, RCD); \
166 roundsm(RCD, i + 4, RAB); \
167 roundsm(RAB, i + 3, RCD); \
168 roundsm(RCD, i + 2, RAB);
169
170#define dec_fls(i) \
171 fls(RAB, RCD, i + 1, i + 0);
172
173#define dec_inpack(max) \
174 movq (RIO), RAB0; \
175 bswapq RAB0; \
176 rolq $32, RAB0; \
177 movq 4*2(RIO), RCD0; \
178 bswapq RCD0; \
179 rorq $32, RCD0; \
180 xorq key_table(CTX, max, 8), RAB0;
181
182#define dec_outunpack() \
183 xorq key_table(CTX), RCD0; \
184 rorq $32, RCD0; \
185 bswapq RCD0; \
186 movq RCD0, (RIO); \
187 rolq $32, RAB0; \
188 bswapq RAB0; \
189 movq RAB0, 4*2(RIO);
190
191.global __camellia_enc_blk;
192.type __camellia_enc_blk,@function;
193
194__camellia_enc_blk:
195 /* input:
196 * %rdi: ctx, CTX
197 * %rsi: dst
198 * %rdx: src
199 * %rcx: bool xor
200 */
201 movq %rbp, RRBP;
202
203 movq %rcx, RXOR;
204 movq %rsi, RDST;
205 movq %rdx, RIO;
206
207 enc_inpack();
208
209 enc_rounds(0);
210 enc_fls(8);
211 enc_rounds(8);
212 enc_fls(16);
213 enc_rounds(16);
214 movl $24, RT1d; /* max */
215
216 cmpb $16, key_length(CTX);
217 je __enc_done;
218
219 enc_fls(24);
220 enc_rounds(24);
221 movl $32, RT1d; /* max */
222
223__enc_done:
224 testb RXORbl, RXORbl;
225 movq RDST, RIO;
226
227 jnz __enc_xor;
228
229 enc_outunpack(mov, RT1);
230
231 movq RRBP, %rbp;
232 ret;
233
234__enc_xor:
235 enc_outunpack(xor, RT1);
236
237 movq RRBP, %rbp;
238 ret;
239
240.global camellia_dec_blk;
241.type camellia_dec_blk,@function;
242
243camellia_dec_blk:
244 /* input:
245 * %rdi: ctx, CTX
246 * %rsi: dst
247 * %rdx: src
248 */
249 cmpl $16, key_length(CTX);
250 movl $32, RT2d;
251 movl $24, RXORd;
252 cmovel RXORd, RT2d; /* max */
253
254 movq %rbp, RRBP;
255 movq %rsi, RDST;
256 movq %rdx, RIO;
257
258 dec_inpack(RT2);
259
260 cmpb $24, RT2bl;
261 je __dec_rounds16;
262
263 dec_rounds(24);
264 dec_fls(24);
265
266__dec_rounds16:
267 dec_rounds(16);
268 dec_fls(16);
269 dec_rounds(8);
270 dec_fls(8);
271 dec_rounds(0);
272
273 movq RDST, RIO;
274
275 dec_outunpack();
276
277 movq RRBP, %rbp;
278 ret;
279
280/**********************************************************************
281 2-way camellia
282 **********************************************************************/
283#define roundsm2(ab, subkey, cd) \
284 movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
285 xorq RT2, cd ## 1; \
286 \
287 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
288 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
289 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
290 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
291 \
292 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
293 xorq RT2, cd ## 0; \
294 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
295 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
296 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
297
298#define fls2(l, r, kl, kr) \
299 movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
300 andl l ## 0d, RT0d; \
301 roll $1, RT0d; \
302 shlq $32, RT0; \
303 xorq RT0, l ## 0; \
304 movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
305 orq r ## 0, RT1; \
306 shrq $32, RT1; \
307 xorq RT1, r ## 0; \
308 \
309 movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
310 andl l ## 1d, RT2d; \
311 roll $1, RT2d; \
312 shlq $32, RT2; \
313 xorq RT2, l ## 1; \
314 movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
315 orq r ## 1, RT0; \
316 shrq $32, RT0; \
317 xorq RT0, r ## 1; \
318 \
319 movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
320 orq l ## 0, RT1; \
321 shrq $32, RT1; \
322 xorq RT1, l ## 0; \
323 movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
324 andl r ## 0d, RT2d; \
325 roll $1, RT2d; \
326 shlq $32, RT2; \
327 xorq RT2, r ## 0; \
328 \
329 movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
330 orq l ## 1, RT0; \
331 shrq $32, RT0; \
332 xorq RT0, l ## 1; \
333 movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
334 andl r ## 1d, RT1d; \
335 roll $1, RT1d; \
336 shlq $32, RT1; \
337 xorq RT1, r ## 1;
338
339#define enc_rounds2(i) \
340 roundsm2(RAB, i + 2, RCD); \
341 roundsm2(RCD, i + 3, RAB); \
342 roundsm2(RAB, i + 4, RCD); \
343 roundsm2(RCD, i + 5, RAB); \
344 roundsm2(RAB, i + 6, RCD); \
345 roundsm2(RCD, i + 7, RAB);
346
347#define enc_fls2(i) \
348 fls2(RAB, RCD, i + 0, i + 1);
349
350#define enc_inpack2() \
351 movq (RIO), RAB0; \
352 bswapq RAB0; \
353 rorq $32, RAB0; \
354 movq 4*2(RIO), RCD0; \
355 bswapq RCD0; \
356 rolq $32, RCD0; \
357 xorq key_table(CTX), RAB0; \
358 \
359 movq 8*2(RIO), RAB1; \
360 bswapq RAB1; \
361 rorq $32, RAB1; \
362 movq 12*2(RIO), RCD1; \
363 bswapq RCD1; \
364 rolq $32, RCD1; \
365 xorq key_table(CTX), RAB1;
366
367#define enc_outunpack2(op, max) \
368 xorq key_table(CTX, max, 8), RCD0; \
369 rolq $32, RCD0; \
370 bswapq RCD0; \
371 op ## q RCD0, (RIO); \
372 rorq $32, RAB0; \
373 bswapq RAB0; \
374 op ## q RAB0, 4*2(RIO); \
375 \
376 xorq key_table(CTX, max, 8), RCD1; \
377 rolq $32, RCD1; \
378 bswapq RCD1; \
379 op ## q RCD1, 8*2(RIO); \
380 rorq $32, RAB1; \
381 bswapq RAB1; \
382 op ## q RAB1, 12*2(RIO);
383
384#define dec_rounds2(i) \
385 roundsm2(RAB, i + 7, RCD); \
386 roundsm2(RCD, i + 6, RAB); \
387 roundsm2(RAB, i + 5, RCD); \
388 roundsm2(RCD, i + 4, RAB); \
389 roundsm2(RAB, i + 3, RCD); \
390 roundsm2(RCD, i + 2, RAB);
391
392#define dec_fls2(i) \
393 fls2(RAB, RCD, i + 1, i + 0);
394
395#define dec_inpack2(max) \
396 movq (RIO), RAB0; \
397 bswapq RAB0; \
398 rorq $32, RAB0; \
399 movq 4*2(RIO), RCD0; \
400 bswapq RCD0; \
401 rolq $32, RCD0; \
402 xorq key_table(CTX, max, 8), RAB0; \
403 \
404 movq 8*2(RIO), RAB1; \
405 bswapq RAB1; \
406 rorq $32, RAB1; \
407 movq 12*2(RIO), RCD1; \
408 bswapq RCD1; \
409 rolq $32, RCD1; \
410 xorq key_table(CTX, max, 8), RAB1;
411
412#define dec_outunpack2() \
413 xorq key_table(CTX), RCD0; \
414 rolq $32, RCD0; \
415 bswapq RCD0; \
416 movq RCD0, (RIO); \
417 rorq $32, RAB0; \
418 bswapq RAB0; \
419 movq RAB0, 4*2(RIO); \
420 \
421 xorq key_table(CTX), RCD1; \
422 rolq $32, RCD1; \
423 bswapq RCD1; \
424 movq RCD1, 8*2(RIO); \
425 rorq $32, RAB1; \
426 bswapq RAB1; \
427 movq RAB1, 12*2(RIO);
428
429.global __camellia_enc_blk_2way;
430.type __camellia_enc_blk_2way,@function;
431
432__camellia_enc_blk_2way:
433 /* input:
434 * %rdi: ctx, CTX
435 * %rsi: dst
436 * %rdx: src
437 * %rcx: bool xor
438 */
439 pushq %rbx;
440
441 movq %rbp, RRBP;
442 movq %rcx, RXOR;
443 movq %rsi, RDST;
444 movq %rdx, RIO;
445
446 enc_inpack2();
447
448 enc_rounds2(0);
449 enc_fls2(8);
450 enc_rounds2(8);
451 enc_fls2(16);
452 enc_rounds2(16);
453 movl $24, RT2d; /* max */
454
455 cmpb $16, key_length(CTX);
456 je __enc2_done;
457
458 enc_fls2(24);
459 enc_rounds2(24);
460 movl $32, RT2d; /* max */
461
462__enc2_done:
463 test RXORbl, RXORbl;
464 movq RDST, RIO;
465 jnz __enc2_xor;
466
467 enc_outunpack2(mov, RT2);
468
469 movq RRBP, %rbp;
470 popq %rbx;
471 ret;
472
473__enc2_xor:
474 enc_outunpack2(xor, RT2);
475
476 movq RRBP, %rbp;
477 popq %rbx;
478 ret;
479
480.global camellia_dec_blk_2way;
481.type camellia_dec_blk_2way,@function;
482
483camellia_dec_blk_2way:
484 /* input:
485 * %rdi: ctx, CTX
486 * %rsi: dst
487 * %rdx: src
488 */
489 cmpl $16, key_length(CTX);
490 movl $32, RT2d;
491 movl $24, RXORd;
492 cmovel RXORd, RT2d; /* max */
493
494 movq %rbx, RXOR;
495 movq %rbp, RRBP;
496 movq %rsi, RDST;
497 movq %rdx, RIO;
498
499 dec_inpack2(RT2);
500
501 cmpb $24, RT2bl;
502 je __dec2_rounds16;
503
504 dec_rounds2(24);
505 dec_fls2(24);
506
507__dec2_rounds16:
508 dec_rounds2(16);
509 dec_fls2(16);
510 dec_rounds2(8);
511 dec_fls2(8);
512 dec_rounds2(0);
513
514 movq RDST, RIO;
515
516 dec_outunpack2();
517
518 movq RRBP, %rbp;
519 movq RXOR, %rbx;
520 ret;
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
new file mode 100644
index 000000000000..3306dc0b139e
--- /dev/null
+++ b/arch/x86/crypto/camellia_glue.c
@@ -0,0 +1,1952 @@
1/*
2 * Glue Code for assembler optimized version of Camellia
3 *
4 * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * Camellia parts based on code by:
7 * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
8 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
9 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
10 * CTR part based on code (crypto/ctr.c) by:
11 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
26 * USA
27 *
28 */
29
30#include <asm/processor.h>
31#include <asm/unaligned.h>
32#include <linux/crypto.h>
33#include <linux/init.h>
34#include <linux/module.h>
35#include <linux/types.h>
36#include <crypto/algapi.h>
37#include <crypto/b128ops.h>
38#include <crypto/lrw.h>
39#include <crypto/xts.h>
40
41#define CAMELLIA_MIN_KEY_SIZE 16
42#define CAMELLIA_MAX_KEY_SIZE 32
43#define CAMELLIA_BLOCK_SIZE 16
44#define CAMELLIA_TABLE_BYTE_LEN 272
45
46struct camellia_ctx {
47 u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
48 u32 key_length;
49};
50
51/* regular block cipher functions */
52asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
53 const u8 *src, bool xor);
54asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
55 const u8 *src);
56
57/* 2-way parallel cipher functions */
58asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
59 const u8 *src, bool xor);
60asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
61 const u8 *src);
62
63static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
64 const u8 *src)
65{
66 __camellia_enc_blk(ctx, dst, src, false);
67}
68
69static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
70 const u8 *src)
71{
72 __camellia_enc_blk(ctx, dst, src, true);
73}
74
75static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
76 const u8 *src)
77{
78 __camellia_enc_blk_2way(ctx, dst, src, false);
79}
80
81static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
82 const u8 *src)
83{
84 __camellia_enc_blk_2way(ctx, dst, src, true);
85}
86
87static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
88{
89 camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src);
90}
91
92static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
93{
94 camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src);
95}
96
97/* camellia sboxes */
98const u64 camellia_sp10011110[256] = {
99 0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00,
100 0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700,
101 0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400,
102 0x8500008585858500, 0x5700005757575700, 0x3500003535353500,
103 0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00,
104 0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00,
105 0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500,
106 0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100,
107 0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00,
108 0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500,
109 0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600,
110 0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00,
111 0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00,
112 0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000,
113 0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00,
114 0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00,
115 0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900,
116 0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700,
117 0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900,
118 0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600,
119 0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00,
120 0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00,
121 0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00,
122 0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00,
123 0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00,
124 0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100,
125 0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00,
126 0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200,
127 0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600,
128 0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700,
129 0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100,
130 0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700,
131 0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00,
132 0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00,
133 0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200,
134 0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600,
135 0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200,
136 0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400,
137 0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300,
138 0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100,
139 0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800,
140 0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00,
141 0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00,
142 0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00,
143 0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200,
144 0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00,
145 0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000,
146 0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200,
147 0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000,
148 0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700,
149 0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00,
150 0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00,
151 0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00,
152 0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00,
153 0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00,
154 0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300,
155 0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600,
156 0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00,
157 0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200,
158 0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600,
159 0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600,
160 0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600,
161 0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300,
162 0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00,
163 0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700,
164 0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00,
165 0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900,
166 0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600,
167 0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400,
168 0x5900005959595900, 0x7800007878787800, 0x9800009898989800,
169 0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700,
170 0x4600004646464600, 0x7100007171717100, 0xba0000babababa00,
171 0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00,
172 0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200,
173 0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200,
174 0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500,
175 0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00,
176 0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900,
177 0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00,
178 0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400,
179 0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300,
180 0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900,
181 0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500,
182 0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400,
183 0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000,
184 0x9e00009e9e9e9e00,
185};
186
187const u64 camellia_sp22000222[256] = {
188 0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858,
189 0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e,
190 0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9,
191 0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a,
192 0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d,
193 0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf,
194 0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a,
195 0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242,
196 0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e,
197 0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca,
198 0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d,
199 0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f,
200 0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e,
201 0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060,
202 0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc,
203 0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434,
204 0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272,
205 0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e,
206 0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3,
207 0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad,
208 0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8,
209 0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a,
210 0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7,
211 0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a,
212 0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656,
213 0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363,
214 0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf,
215 0x9898000000989898, 0x9797000000979797, 0x8585000000858585,
216 0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec,
217 0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f,
218 0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3,
219 0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf,
220 0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474,
221 0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636,
222 0x2222000000222222, 0x3838000000383838, 0x6464000000646464,
223 0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c,
224 0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5,
225 0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888,
226 0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787,
227 0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323,
228 0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1,
229 0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9,
230 0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555,
231 0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa,
232 0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4,
233 0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6,
234 0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1,
235 0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5,
236 0x2020000000202020, 0x8989000000898989, 0x0000000000000000,
237 0x9090000000909090, 0x4747000000474747, 0xefef000000efefef,
238 0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515,
239 0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5,
240 0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb,
241 0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8,
242 0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b,
243 0x9494000000949494, 0x2121000000212121, 0x6666000000666666,
244 0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed,
245 0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe,
246 0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4,
247 0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c,
248 0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d,
249 0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d,
250 0xdede000000dedede, 0x9696000000969696, 0x2626000000262626,
251 0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c,
252 0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f,
253 0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc,
254 0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252,
255 0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d,
256 0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969,
257 0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131,
258 0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf,
259 0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575,
260 0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757,
261 0x8484000000848484, 0x1111000000111111, 0x4545000000454545,
262 0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4,
263 0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa,
264 0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959,
265 0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292,
266 0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878,
267 0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949,
268 0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7,
269 0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393,
270 0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a,
271 0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9,
272 0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101,
273 0x3d3d0000003d3d3d,
274};
275
276const u64 camellia_sp03303033[256] = {
277 0x0038380038003838, 0x0041410041004141, 0x0016160016001616,
278 0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393,
279 0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272,
280 0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a,
281 0x0075750075007575, 0x0006060006000606, 0x0057570057005757,
282 0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7,
283 0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2,
284 0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090,
285 0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7,
286 0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2,
287 0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343,
288 0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7,
289 0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f,
290 0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818,
291 0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f,
292 0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d,
293 0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c,
294 0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3,
295 0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec,
296 0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b,
297 0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636,
298 0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686,
299 0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd,
300 0x0066660066006666, 0x0058580058005858, 0x0096960096009696,
301 0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595,
302 0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8,
303 0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef,
304 0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161,
305 0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b,
306 0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb,
307 0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8,
308 0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb,
309 0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d,
310 0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d,
311 0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919,
312 0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b,
313 0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979,
314 0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222,
315 0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1,
316 0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8,
317 0x0012120012001212, 0x0004040004000404, 0x0074740074007474,
318 0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e,
319 0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555,
320 0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe,
321 0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131,
322 0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad,
323 0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070,
324 0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969,
325 0x0008080008000808, 0x0062620062006262, 0x0000000000000000,
326 0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb,
327 0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545,
328 0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d,
329 0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee,
330 0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e,
331 0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6,
332 0x0025250025002525, 0x0048480048004848, 0x0099990099009999,
333 0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b,
334 0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf,
335 0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929,
336 0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313,
337 0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363,
338 0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b,
339 0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989,
340 0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717,
341 0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3,
342 0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737,
343 0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494,
344 0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b,
345 0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a,
346 0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c,
347 0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3,
348 0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d,
349 0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5,
350 0x0021210021002121, 0x0044440044004444, 0x0051510051005151,
351 0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939,
352 0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa,
353 0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656,
354 0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4,
355 0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e,
356 0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252,
357 0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9,
358 0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4,
359 0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a,
360 0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a,
361 0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040,
362 0x004f4f004f004f4f,
363};
364
365const u64 camellia_sp00444404[256] = {
366 0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3,
367 0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057,
368 0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023,
369 0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5,
370 0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d,
371 0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af,
372 0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e,
373 0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b,
374 0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5,
375 0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a,
376 0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b,
377 0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0,
378 0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0,
379 0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb,
380 0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d,
381 0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004,
382 0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de,
383 0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c,
384 0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe,
385 0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a,
386 0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060,
387 0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0,
388 0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054,
389 0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064,
390 0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3,
391 0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6,
392 0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087,
393 0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090,
394 0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d,
395 0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8,
396 0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081,
397 0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063,
398 0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f,
399 0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9,
400 0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078,
401 0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071,
402 0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088,
403 0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9,
404 0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036,
405 0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1,
406 0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb,
407 0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad,
408 0x0000777777770077, 0x0000808080800080, 0x0000828282820082,
409 0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5,
410 0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c,
411 0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093,
412 0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e,
413 0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd,
414 0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb,
415 0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f,
416 0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1,
417 0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d,
418 0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056,
419 0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066,
420 0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012,
421 0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099,
422 0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e,
423 0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031,
424 0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058,
425 0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c,
426 0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018,
427 0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2,
428 0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008,
429 0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050,
430 0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089,
431 0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095,
432 0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4,
433 0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db,
434 0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f,
435 0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002,
436 0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067,
437 0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2,
438 0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037,
439 0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b,
440 0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079,
441 0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e,
442 0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd,
443 0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a,
444 0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025,
445 0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa,
446 0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee,
447 0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068,
448 0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028,
449 0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1,
450 0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7,
451 0x00009e9e9e9e009e,
452};
453
454const u64 camellia_sp02220222[256] = {
455 0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858,
456 0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e,
457 0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9,
458 0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a,
459 0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d,
460 0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf,
461 0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a,
462 0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242,
463 0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e,
464 0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca,
465 0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d,
466 0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f,
467 0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e,
468 0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060,
469 0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc,
470 0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434,
471 0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272,
472 0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e,
473 0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3,
474 0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad,
475 0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8,
476 0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a,
477 0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7,
478 0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a,
479 0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656,
480 0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363,
481 0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf,
482 0x0098989800989898, 0x0097979700979797, 0x0085858500858585,
483 0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec,
484 0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f,
485 0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3,
486 0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf,
487 0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474,
488 0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636,
489 0x0022222200222222, 0x0038383800383838, 0x0064646400646464,
490 0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c,
491 0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5,
492 0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888,
493 0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787,
494 0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323,
495 0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1,
496 0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9,
497 0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555,
498 0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa,
499 0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4,
500 0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6,
501 0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1,
502 0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5,
503 0x0020202000202020, 0x0089898900898989, 0x0000000000000000,
504 0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef,
505 0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515,
506 0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5,
507 0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb,
508 0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8,
509 0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b,
510 0x0094949400949494, 0x0021212100212121, 0x0066666600666666,
511 0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed,
512 0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe,
513 0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4,
514 0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c,
515 0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d,
516 0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d,
517 0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626,
518 0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c,
519 0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f,
520 0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc,
521 0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252,
522 0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d,
523 0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969,
524 0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131,
525 0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf,
526 0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575,
527 0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757,
528 0x0084848400848484, 0x0011111100111111, 0x0045454500454545,
529 0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4,
530 0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa,
531 0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959,
532 0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292,
533 0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878,
534 0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949,
535 0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7,
536 0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393,
537 0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a,
538 0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9,
539 0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101,
540 0x003d3d3d003d3d3d,
541};
542
543const u64 camellia_sp30333033[256] = {
544 0x3800383838003838, 0x4100414141004141, 0x1600161616001616,
545 0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393,
546 0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272,
547 0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a,
548 0x7500757575007575, 0x0600060606000606, 0x5700575757005757,
549 0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7,
550 0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2,
551 0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090,
552 0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7,
553 0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2,
554 0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343,
555 0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7,
556 0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f,
557 0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818,
558 0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f,
559 0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d,
560 0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c,
561 0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3,
562 0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec,
563 0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b,
564 0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636,
565 0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686,
566 0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd,
567 0x6600666666006666, 0x5800585858005858, 0x9600969696009696,
568 0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595,
569 0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8,
570 0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef,
571 0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161,
572 0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b,
573 0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb,
574 0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8,
575 0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb,
576 0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d,
577 0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d,
578 0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919,
579 0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b,
580 0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979,
581 0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222,
582 0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1,
583 0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8,
584 0x1200121212001212, 0x0400040404000404, 0x7400747474007474,
585 0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e,
586 0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555,
587 0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe,
588 0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131,
589 0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad,
590 0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070,
591 0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969,
592 0x0800080808000808, 0x6200626262006262, 0x0000000000000000,
593 0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb,
594 0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545,
595 0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d,
596 0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee,
597 0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e,
598 0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6,
599 0x2500252525002525, 0x4800484848004848, 0x9900999999009999,
600 0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b,
601 0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf,
602 0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929,
603 0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313,
604 0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363,
605 0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b,
606 0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989,
607 0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717,
608 0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3,
609 0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737,
610 0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494,
611 0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b,
612 0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a,
613 0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c,
614 0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3,
615 0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d,
616 0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5,
617 0x2100212121002121, 0x4400444444004444, 0x5100515151005151,
618 0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939,
619 0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa,
620 0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656,
621 0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4,
622 0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e,
623 0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252,
624 0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9,
625 0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4,
626 0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a,
627 0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a,
628 0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040,
629 0x4f004f4f4f004f4f,
630};
631
632const u64 camellia_sp44044404[256] = {
633 0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3,
634 0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057,
635 0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023,
636 0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5,
637 0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d,
638 0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af,
639 0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e,
640 0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b,
641 0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5,
642 0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a,
643 0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b,
644 0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0,
645 0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0,
646 0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb,
647 0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d,
648 0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004,
649 0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de,
650 0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c,
651 0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe,
652 0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a,
653 0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060,
654 0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0,
655 0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054,
656 0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064,
657 0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3,
658 0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6,
659 0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087,
660 0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090,
661 0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d,
662 0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8,
663 0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081,
664 0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063,
665 0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f,
666 0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9,
667 0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078,
668 0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071,
669 0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088,
670 0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9,
671 0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036,
672 0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1,
673 0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb,
674 0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad,
675 0x7777007777770077, 0x8080008080800080, 0x8282008282820082,
676 0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5,
677 0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c,
678 0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093,
679 0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e,
680 0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd,
681 0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb,
682 0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f,
683 0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1,
684 0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d,
685 0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056,
686 0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066,
687 0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012,
688 0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099,
689 0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e,
690 0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031,
691 0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058,
692 0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c,
693 0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018,
694 0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2,
695 0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008,
696 0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050,
697 0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089,
698 0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095,
699 0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4,
700 0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db,
701 0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f,
702 0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002,
703 0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067,
704 0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2,
705 0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037,
706 0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b,
707 0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079,
708 0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e,
709 0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd,
710 0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a,
711 0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025,
712 0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa,
713 0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee,
714 0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068,
715 0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028,
716 0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1,
717 0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7,
718 0x9e9e009e9e9e009e,
719};
720
721const u64 camellia_sp11101110[256] = {
722 0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00,
723 0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700,
724 0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400,
725 0x8585850085858500, 0x5757570057575700, 0x3535350035353500,
726 0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00,
727 0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00,
728 0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500,
729 0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100,
730 0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00,
731 0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500,
732 0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600,
733 0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00,
734 0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00,
735 0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000,
736 0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00,
737 0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00,
738 0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900,
739 0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700,
740 0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900,
741 0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600,
742 0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00,
743 0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00,
744 0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00,
745 0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00,
746 0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00,
747 0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100,
748 0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00,
749 0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200,
750 0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600,
751 0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700,
752 0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100,
753 0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700,
754 0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00,
755 0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00,
756 0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200,
757 0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600,
758 0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200,
759 0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400,
760 0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300,
761 0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100,
762 0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800,
763 0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00,
764 0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00,
765 0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00,
766 0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200,
767 0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00,
768 0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000,
769 0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200,
770 0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000,
771 0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700,
772 0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00,
773 0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00,
774 0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00,
775 0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00,
776 0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00,
777 0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300,
778 0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600,
779 0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00,
780 0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200,
781 0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600,
782 0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600,
783 0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600,
784 0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300,
785 0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00,
786 0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700,
787 0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00,
788 0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900,
789 0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600,
790 0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400,
791 0x5959590059595900, 0x7878780078787800, 0x9898980098989800,
792 0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700,
793 0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00,
794 0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00,
795 0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200,
796 0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200,
797 0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500,
798 0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00,
799 0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900,
800 0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00,
801 0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400,
802 0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300,
803 0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900,
804 0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500,
805 0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400,
806 0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000,
807 0x9e9e9e009e9e9e00,
808};
809
810/* key constants */
811#define CAMELLIA_SIGMA1L (0xA09E667FL)
812#define CAMELLIA_SIGMA1R (0x3BCC908BL)
813#define CAMELLIA_SIGMA2L (0xB67AE858L)
814#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
815#define CAMELLIA_SIGMA3L (0xC6EF372FL)
816#define CAMELLIA_SIGMA3R (0xE94F82BEL)
817#define CAMELLIA_SIGMA4L (0x54FF53A5L)
818#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
819#define CAMELLIA_SIGMA5L (0x10E527FAL)
820#define CAMELLIA_SIGMA5R (0xDE682D1DL)
821#define CAMELLIA_SIGMA6L (0xB05688C2L)
822#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
823
824/* macros */
825#define ROLDQ(l, r, bits) ({ \
826 u64 t = l; \
827 l = (l << bits) | (r >> (64 - bits)); \
828 r = (r << bits) | (t >> (64 - bits)); \
829})
830
831#define CAMELLIA_F(x, kl, kr, y) ({ \
832 u64 ii = x ^ (((u64)kl << 32) | kr); \
833 y = camellia_sp11101110[(uint8_t)ii]; \
834 y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \
835 ii >>= 16; \
836 y ^= camellia_sp30333033[(uint8_t)ii]; \
837 y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \
838 ii >>= 16; \
839 y ^= camellia_sp00444404[(uint8_t)ii]; \
840 y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \
841 ii >>= 16; \
842 y ^= camellia_sp22000222[(uint8_t)ii]; \
843 y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \
844 y = ror64(y, 32); \
845})
846
847#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32))
848
849static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max)
850{
851 u64 kw4, tt;
852 u32 dw, tl, tr;
853
854 /* absorb kw2 to other subkeys */
855 /* round 2 */
856 subRL[3] ^= subRL[1];
857 /* round 4 */
858 subRL[5] ^= subRL[1];
859 /* round 6 */
860 subRL[7] ^= subRL[1];
861
862 subRL[1] ^= (subRL[1] & ~subRL[9]) << 32;
863 /* modified for FLinv(kl2) */
864 dw = (subRL[1] & subRL[9]) >> 32,
865 subRL[1] ^= rol32(dw, 1);
866
867 /* round 8 */
868 subRL[11] ^= subRL[1];
869 /* round 10 */
870 subRL[13] ^= subRL[1];
871 /* round 12 */
872 subRL[15] ^= subRL[1];
873
874 subRL[1] ^= (subRL[1] & ~subRL[17]) << 32;
875 /* modified for FLinv(kl4) */
876 dw = (subRL[1] & subRL[17]) >> 32,
877 subRL[1] ^= rol32(dw, 1);
878
879 /* round 14 */
880 subRL[19] ^= subRL[1];
881 /* round 16 */
882 subRL[21] ^= subRL[1];
883 /* round 18 */
884 subRL[23] ^= subRL[1];
885
886 if (max == 24) {
887 /* kw3 */
888 subRL[24] ^= subRL[1];
889
890 /* absorb kw4 to other subkeys */
891 kw4 = subRL[25];
892 } else {
893 subRL[1] ^= (subRL[1] & ~subRL[25]) << 32;
894 /* modified for FLinv(kl6) */
895 dw = (subRL[1] & subRL[25]) >> 32,
896 subRL[1] ^= rol32(dw, 1);
897
898 /* round 20 */
899 subRL[27] ^= subRL[1];
900 /* round 22 */
901 subRL[29] ^= subRL[1];
902 /* round 24 */
903 subRL[31] ^= subRL[1];
904 /* kw3 */
905 subRL[32] ^= subRL[1];
906
907 /* absorb kw4 to other subkeys */
908 kw4 = subRL[33];
909 /* round 23 */
910 subRL[30] ^= kw4;
911 /* round 21 */
912 subRL[28] ^= kw4;
913 /* round 19 */
914 subRL[26] ^= kw4;
915
916 kw4 ^= (kw4 & ~subRL[24]) << 32;
917 /* modified for FL(kl5) */
918 dw = (kw4 & subRL[24]) >> 32,
919 kw4 ^= rol32(dw, 1);
920 }
921
922 /* round 17 */
923 subRL[22] ^= kw4;
924 /* round 15 */
925 subRL[20] ^= kw4;
926 /* round 13 */
927 subRL[18] ^= kw4;
928
929 kw4 ^= (kw4 & ~subRL[16]) << 32;
930 /* modified for FL(kl3) */
931 dw = (kw4 & subRL[16]) >> 32,
932 kw4 ^= rol32(dw, 1);
933
934 /* round 11 */
935 subRL[14] ^= kw4;
936 /* round 9 */
937 subRL[12] ^= kw4;
938 /* round 7 */
939 subRL[10] ^= kw4;
940
941 kw4 ^= (kw4 & ~subRL[8]) << 32;
942 /* modified for FL(kl1) */
943 dw = (kw4 & subRL[8]) >> 32,
944 kw4 ^= rol32(dw, 1);
945
946 /* round 5 */
947 subRL[6] ^= kw4;
948 /* round 3 */
949 subRL[4] ^= kw4;
950 /* round 1 */
951 subRL[2] ^= kw4;
952 /* kw1 */
953 subRL[0] ^= kw4;
954
955 /* key XOR is end of F-function */
956 SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */
957 SET_SUBKEY_LR(2, subRL[3]); /* round 1 */
958 SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */
959 SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */
960 SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */
961 SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */
962
963 tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]);
964 dw = tl & (subRL[8] >> 32), /* FL(kl1) */
965 tr = subRL[10] ^ rol32(dw, 1);
966 tt = (tr | ((u64)tl << 32));
967
968 SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */
969 SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */
970 SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */
971
972 tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]);
973 dw = tl & (subRL[9] >> 32), /* FLinv(kl2) */
974 tr = subRL[7] ^ rol32(dw, 1);
975 tt = (tr | ((u64)tl << 32));
976
977 SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */
978 SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */
979 SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */
980 SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */
981 SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */
982
983 tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]);
984 dw = tl & (subRL[16] >> 32), /* FL(kl3) */
985 tr = subRL[18] ^ rol32(dw, 1);
986 tt = (tr | ((u64)tl << 32));
987
988 SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */
989 SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */
990 SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */
991
992 tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]);
993 dw = tl & (subRL[17] >> 32), /* FLinv(kl4) */
994 tr = subRL[15] ^ rol32(dw, 1);
995 tt = (tr | ((u64)tl << 32));
996
997 SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */
998 SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */
999 SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */
1000 SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */
1001 SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */
1002
1003 if (max == 24) {
1004 SET_SUBKEY_LR(23, subRL[22]); /* round 18 */
1005 SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */
1006 } else {
1007 tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]);
1008 dw = tl & (subRL[24] >> 32), /* FL(kl5) */
1009 tr = subRL[26] ^ rol32(dw, 1);
1010 tt = (tr | ((u64)tl << 32));
1011
1012 SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */
1013 SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */
1014 SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */
1015
1016 tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]);
1017 dw = tl & (subRL[25] >> 32), /* FLinv(kl6) */
1018 tr = subRL[23] ^ rol32(dw, 1);
1019 tt = (tr | ((u64)tl << 32));
1020
1021 SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */
1022 SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */
1023 SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */
1024 SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */
1025 SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */
1026 SET_SUBKEY_LR(31, subRL[30]); /* round 24 */
1027 SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */
1028 }
1029}
1030
1031static void camellia_setup128(const unsigned char *key, u64 *subkey)
1032{
1033 u64 kl, kr, ww;
1034 u64 subRL[26];
1035
1036 /**
1037 * k == kl || kr (|| is concatenation)
1038 */
1039 kl = get_unaligned_be64(key);
1040 kr = get_unaligned_be64(key + 8);
1041
1042 /* generate KL dependent subkeys */
1043 /* kw1 */
1044 subRL[0] = kl;
1045 /* kw2 */
1046 subRL[1] = kr;
1047
1048 /* rotation left shift 15bit */
1049 ROLDQ(kl, kr, 15);
1050
1051 /* k3 */
1052 subRL[4] = kl;
1053 /* k4 */
1054 subRL[5] = kr;
1055
1056 /* rotation left shift 15+30bit */
1057 ROLDQ(kl, kr, 30);
1058
1059 /* k7 */
1060 subRL[10] = kl;
1061 /* k8 */
1062 subRL[11] = kr;
1063
1064 /* rotation left shift 15+30+15bit */
1065 ROLDQ(kl, kr, 15);
1066
1067 /* k10 */
1068 subRL[13] = kr;
1069 /* rotation left shift 15+30+15+17 bit */
1070 ROLDQ(kl, kr, 17);
1071
1072 /* kl3 */
1073 subRL[16] = kl;
1074 /* kl4 */
1075 subRL[17] = kr;
1076
1077 /* rotation left shift 15+30+15+17+17 bit */
1078 ROLDQ(kl, kr, 17);
1079
1080 /* k13 */
1081 subRL[18] = kl;
1082 /* k14 */
1083 subRL[19] = kr;
1084
1085 /* rotation left shift 15+30+15+17+17+17 bit */
1086 ROLDQ(kl, kr, 17);
1087
1088 /* k17 */
1089 subRL[22] = kl;
1090 /* k18 */
1091 subRL[23] = kr;
1092
1093 /* generate KA */
1094 kl = subRL[0];
1095 kr = subRL[1];
1096 CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
1097 kr ^= ww;
1098 CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
1099
1100 /* current status == (kll, klr, w0, w1) */
1101 CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
1102 kr ^= ww;
1103 CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
1104 kl ^= ww;
1105
1106 /* generate KA dependent subkeys */
1107 /* k1, k2 */
1108 subRL[2] = kl;
1109 subRL[3] = kr;
1110 ROLDQ(kl, kr, 15);
1111 /* k5,k6 */
1112 subRL[6] = kl;
1113 subRL[7] = kr;
1114 ROLDQ(kl, kr, 15);
1115 /* kl1, kl2 */
1116 subRL[8] = kl;
1117 subRL[9] = kr;
1118 ROLDQ(kl, kr, 15);
1119 /* k9 */
1120 subRL[12] = kl;
1121 ROLDQ(kl, kr, 15);
1122 /* k11, k12 */
1123 subRL[14] = kl;
1124 subRL[15] = kr;
1125 ROLDQ(kl, kr, 34);
1126 /* k15, k16 */
1127 subRL[20] = kl;
1128 subRL[21] = kr;
1129 ROLDQ(kl, kr, 17);
1130 /* kw3, kw4 */
1131 subRL[24] = kl;
1132 subRL[25] = kr;
1133
1134 camellia_setup_tail(subkey, subRL, 24);
1135}
1136
1137static void camellia_setup256(const unsigned char *key, u64 *subkey)
1138{
1139 u64 kl, kr; /* left half of key */
1140 u64 krl, krr; /* right half of key */
1141 u64 ww; /* temporary variables */
1142 u64 subRL[34];
1143
1144 /**
1145 * key = (kl || kr || krl || krr) (|| is concatenation)
1146 */
1147 kl = get_unaligned_be64(key);
1148 kr = get_unaligned_be64(key + 8);
1149 krl = get_unaligned_be64(key + 16);
1150 krr = get_unaligned_be64(key + 24);
1151
1152 /* generate KL dependent subkeys */
1153 /* kw1 */
1154 subRL[0] = kl;
1155 /* kw2 */
1156 subRL[1] = kr;
1157 ROLDQ(kl, kr, 45);
1158 /* k9 */
1159 subRL[12] = kl;
1160 /* k10 */
1161 subRL[13] = kr;
1162 ROLDQ(kl, kr, 15);
1163 /* kl3 */
1164 subRL[16] = kl;
1165 /* kl4 */
1166 subRL[17] = kr;
1167 ROLDQ(kl, kr, 17);
1168 /* k17 */
1169 subRL[22] = kl;
1170 /* k18 */
1171 subRL[23] = kr;
1172 ROLDQ(kl, kr, 34);
1173 /* k23 */
1174 subRL[30] = kl;
1175 /* k24 */
1176 subRL[31] = kr;
1177
1178 /* generate KR dependent subkeys */
1179 ROLDQ(krl, krr, 15);
1180 /* k3 */
1181 subRL[4] = krl;
1182 /* k4 */
1183 subRL[5] = krr;
1184 ROLDQ(krl, krr, 15);
1185 /* kl1 */
1186 subRL[8] = krl;
1187 /* kl2 */
1188 subRL[9] = krr;
1189 ROLDQ(krl, krr, 30);
1190 /* k13 */
1191 subRL[18] = krl;
1192 /* k14 */
1193 subRL[19] = krr;
1194 ROLDQ(krl, krr, 34);
1195 /* k19 */
1196 subRL[26] = krl;
1197 /* k20 */
1198 subRL[27] = krr;
1199 ROLDQ(krl, krr, 34);
1200
1201 /* generate KA */
1202 kl = subRL[0] ^ krl;
1203 kr = subRL[1] ^ krr;
1204
1205 CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
1206 kr ^= ww;
1207 CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
1208 kl ^= krl;
1209 CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
1210 kr ^= ww ^ krr;
1211 CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
1212 kl ^= ww;
1213
1214 /* generate KB */
1215 krl ^= kl;
1216 krr ^= kr;
1217 CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww);
1218 krr ^= ww;
1219 CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww);
1220 krl ^= ww;
1221
1222 /* generate KA dependent subkeys */
1223 ROLDQ(kl, kr, 15);
1224 /* k5 */
1225 subRL[6] = kl;
1226 /* k6 */
1227 subRL[7] = kr;
1228 ROLDQ(kl, kr, 30);
1229 /* k11 */
1230 subRL[14] = kl;
1231 /* k12 */
1232 subRL[15] = kr;
1233 /* rotation left shift 32bit */
1234 ROLDQ(kl, kr, 32);
1235 /* kl5 */
1236 subRL[24] = kl;
1237 /* kl6 */
1238 subRL[25] = kr;
1239 /* rotation left shift 17 from k11,k12 -> k21,k22 */
1240 ROLDQ(kl, kr, 17);
1241 /* k21 */
1242 subRL[28] = kl;
1243 /* k22 */
1244 subRL[29] = kr;
1245
1246 /* generate KB dependent subkeys */
1247 /* k1 */
1248 subRL[2] = krl;
1249 /* k2 */
1250 subRL[3] = krr;
1251 ROLDQ(krl, krr, 30);
1252 /* k7 */
1253 subRL[10] = krl;
1254 /* k8 */
1255 subRL[11] = krr;
1256 ROLDQ(krl, krr, 30);
1257 /* k15 */
1258 subRL[20] = krl;
1259 /* k16 */
1260 subRL[21] = krr;
1261 ROLDQ(krl, krr, 51);
1262 /* kw3 */
1263 subRL[32] = krl;
1264 /* kw4 */
1265 subRL[33] = krr;
1266
1267 camellia_setup_tail(subkey, subRL, 32);
1268}
1269
1270static void camellia_setup192(const unsigned char *key, u64 *subkey)
1271{
1272 unsigned char kk[32];
1273 u64 krl, krr;
1274
1275 memcpy(kk, key, 24);
1276 memcpy((unsigned char *)&krl, key+16, 8);
1277 krr = ~krl;
1278 memcpy(kk+24, (unsigned char *)&krr, 8);
1279 camellia_setup256(kk, subkey);
1280}
1281
1282static int __camellia_setkey(struct camellia_ctx *cctx,
1283 const unsigned char *key,
1284 unsigned int key_len, u32 *flags)
1285{
1286 if (key_len != 16 && key_len != 24 && key_len != 32) {
1287 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
1288 return -EINVAL;
1289 }
1290
1291 cctx->key_length = key_len;
1292
1293 switch (key_len) {
1294 case 16:
1295 camellia_setup128(key, cctx->key_table);
1296 break;
1297 case 24:
1298 camellia_setup192(key, cctx->key_table);
1299 break;
1300 case 32:
1301 camellia_setup256(key, cctx->key_table);
1302 break;
1303 }
1304
1305 return 0;
1306}
1307
1308static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
1309 unsigned int key_len)
1310{
1311 return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
1312 &tfm->crt_flags);
1313}
1314
1315static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
1316 void (*fn)(struct camellia_ctx *, u8 *, const u8 *),
1317 void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *))
1318{
1319 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1320 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1321 unsigned int nbytes;
1322 int err;
1323
1324 err = blkcipher_walk_virt(desc, walk);
1325
1326 while ((nbytes = walk->nbytes)) {
1327 u8 *wsrc = walk->src.virt.addr;
1328 u8 *wdst = walk->dst.virt.addr;
1329
1330 /* Process two block batch */
1331 if (nbytes >= bsize * 2) {
1332 do {
1333 fn_2way(ctx, wdst, wsrc);
1334
1335 wsrc += bsize * 2;
1336 wdst += bsize * 2;
1337 nbytes -= bsize * 2;
1338 } while (nbytes >= bsize * 2);
1339
1340 if (nbytes < bsize)
1341 goto done;
1342 }
1343
1344 /* Handle leftovers */
1345 do {
1346 fn(ctx, wdst, wsrc);
1347
1348 wsrc += bsize;
1349 wdst += bsize;
1350 nbytes -= bsize;
1351 } while (nbytes >= bsize);
1352
1353done:
1354 err = blkcipher_walk_done(desc, walk, nbytes);
1355 }
1356
1357 return err;
1358}
1359
1360static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1361 struct scatterlist *src, unsigned int nbytes)
1362{
1363 struct blkcipher_walk walk;
1364
1365 blkcipher_walk_init(&walk, dst, src, nbytes);
1366 return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way);
1367}
1368
1369static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1370 struct scatterlist *src, unsigned int nbytes)
1371{
1372 struct blkcipher_walk walk;
1373
1374 blkcipher_walk_init(&walk, dst, src, nbytes);
1375 return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way);
1376}
1377
1378static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
1379 struct blkcipher_walk *walk)
1380{
1381 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1382 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1383 unsigned int nbytes = walk->nbytes;
1384 u128 *src = (u128 *)walk->src.virt.addr;
1385 u128 *dst = (u128 *)walk->dst.virt.addr;
1386 u128 *iv = (u128 *)walk->iv;
1387
1388 do {
1389 u128_xor(dst, src, iv);
1390 camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
1391 iv = dst;
1392
1393 src += 1;
1394 dst += 1;
1395 nbytes -= bsize;
1396 } while (nbytes >= bsize);
1397
1398 u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
1399 return nbytes;
1400}
1401
1402static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1403 struct scatterlist *src, unsigned int nbytes)
1404{
1405 struct blkcipher_walk walk;
1406 int err;
1407
1408 blkcipher_walk_init(&walk, dst, src, nbytes);
1409 err = blkcipher_walk_virt(desc, &walk);
1410
1411 while ((nbytes = walk.nbytes)) {
1412 nbytes = __cbc_encrypt(desc, &walk);
1413 err = blkcipher_walk_done(desc, &walk, nbytes);
1414 }
1415
1416 return err;
1417}
1418
1419static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
1420 struct blkcipher_walk *walk)
1421{
1422 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1423 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1424 unsigned int nbytes = walk->nbytes;
1425 u128 *src = (u128 *)walk->src.virt.addr;
1426 u128 *dst = (u128 *)walk->dst.virt.addr;
1427 u128 ivs[2 - 1];
1428 u128 last_iv;
1429
1430 /* Start of the last block. */
1431 src += nbytes / bsize - 1;
1432 dst += nbytes / bsize - 1;
1433
1434 last_iv = *src;
1435
1436 /* Process two block batch */
1437 if (nbytes >= bsize * 2) {
1438 do {
1439 nbytes -= bsize * (2 - 1);
1440 src -= 2 - 1;
1441 dst -= 2 - 1;
1442
1443 ivs[0] = src[0];
1444
1445 camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
1446
1447 u128_xor(dst + 1, dst + 1, ivs + 0);
1448
1449 nbytes -= bsize;
1450 if (nbytes < bsize)
1451 goto done;
1452
1453 u128_xor(dst, dst, src - 1);
1454 src -= 1;
1455 dst -= 1;
1456 } while (nbytes >= bsize * 2);
1457
1458 if (nbytes < bsize)
1459 goto done;
1460 }
1461
1462 /* Handle leftovers */
1463 for (;;) {
1464 camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src);
1465
1466 nbytes -= bsize;
1467 if (nbytes < bsize)
1468 break;
1469
1470 u128_xor(dst, dst, src - 1);
1471 src -= 1;
1472 dst -= 1;
1473 }
1474
1475done:
1476 u128_xor(dst, dst, (u128 *)walk->iv);
1477 *(u128 *)walk->iv = last_iv;
1478
1479 return nbytes;
1480}
1481
1482static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1483 struct scatterlist *src, unsigned int nbytes)
1484{
1485 struct blkcipher_walk walk;
1486 int err;
1487
1488 blkcipher_walk_init(&walk, dst, src, nbytes);
1489 err = blkcipher_walk_virt(desc, &walk);
1490
1491 while ((nbytes = walk.nbytes)) {
1492 nbytes = __cbc_decrypt(desc, &walk);
1493 err = blkcipher_walk_done(desc, &walk, nbytes);
1494 }
1495
1496 return err;
1497}
1498
1499static inline void u128_to_be128(be128 *dst, const u128 *src)
1500{
1501 dst->a = cpu_to_be64(src->a);
1502 dst->b = cpu_to_be64(src->b);
1503}
1504
1505static inline void be128_to_u128(u128 *dst, const be128 *src)
1506{
1507 dst->a = be64_to_cpu(src->a);
1508 dst->b = be64_to_cpu(src->b);
1509}
1510
1511static inline void u128_inc(u128 *i)
1512{
1513 i->b++;
1514 if (!i->b)
1515 i->a++;
1516}
1517
1518static void ctr_crypt_final(struct blkcipher_desc *desc,
1519 struct blkcipher_walk *walk)
1520{
1521 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1522 u8 keystream[CAMELLIA_BLOCK_SIZE];
1523 u8 *src = walk->src.virt.addr;
1524 u8 *dst = walk->dst.virt.addr;
1525 unsigned int nbytes = walk->nbytes;
1526 u128 ctrblk;
1527
1528 memcpy(keystream, src, nbytes);
1529 camellia_enc_blk_xor(ctx, keystream, walk->iv);
1530 memcpy(dst, keystream, nbytes);
1531
1532 be128_to_u128(&ctrblk, (be128 *)walk->iv);
1533 u128_inc(&ctrblk);
1534 u128_to_be128((be128 *)walk->iv, &ctrblk);
1535}
1536
1537static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
1538 struct blkcipher_walk *walk)
1539{
1540 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1541 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1542 unsigned int nbytes = walk->nbytes;
1543 u128 *src = (u128 *)walk->src.virt.addr;
1544 u128 *dst = (u128 *)walk->dst.virt.addr;
1545 u128 ctrblk;
1546 be128 ctrblocks[2];
1547
1548 be128_to_u128(&ctrblk, (be128 *)walk->iv);
1549
1550 /* Process two block batch */
1551 if (nbytes >= bsize * 2) {
1552 do {
1553 if (dst != src) {
1554 dst[0] = src[0];
1555 dst[1] = src[1];
1556 }
1557
1558 /* create ctrblks for parallel encrypt */
1559 u128_to_be128(&ctrblocks[0], &ctrblk);
1560 u128_inc(&ctrblk);
1561 u128_to_be128(&ctrblocks[1], &ctrblk);
1562 u128_inc(&ctrblk);
1563
1564 camellia_enc_blk_xor_2way(ctx, (u8 *)dst,
1565 (u8 *)ctrblocks);
1566
1567 src += 2;
1568 dst += 2;
1569 nbytes -= bsize * 2;
1570 } while (nbytes >= bsize * 2);
1571
1572 if (nbytes < bsize)
1573 goto done;
1574 }
1575
1576 /* Handle leftovers */
1577 do {
1578 if (dst != src)
1579 *dst = *src;
1580
1581 u128_to_be128(&ctrblocks[0], &ctrblk);
1582 u128_inc(&ctrblk);
1583
1584 camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
1585
1586 src += 1;
1587 dst += 1;
1588 nbytes -= bsize;
1589 } while (nbytes >= bsize);
1590
1591done:
1592 u128_to_be128((be128 *)walk->iv, &ctrblk);
1593 return nbytes;
1594}
1595
1596static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1597 struct scatterlist *src, unsigned int nbytes)
1598{
1599 struct blkcipher_walk walk;
1600 int err;
1601
1602 blkcipher_walk_init(&walk, dst, src, nbytes);
1603 err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE);
1604
1605 while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) {
1606 nbytes = __ctr_crypt(desc, &walk);
1607 err = blkcipher_walk_done(desc, &walk, nbytes);
1608 }
1609
1610 if (walk.nbytes) {
1611 ctr_crypt_final(desc, &walk);
1612 err = blkcipher_walk_done(desc, &walk, 0);
1613 }
1614
1615 return err;
1616}
1617
1618static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
1619{
1620 const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1621 struct camellia_ctx *ctx = priv;
1622 int i;
1623
1624 while (nbytes >= 2 * bsize) {
1625 camellia_enc_blk_2way(ctx, srcdst, srcdst);
1626 srcdst += bsize * 2;
1627 nbytes -= bsize * 2;
1628 }
1629
1630 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
1631 camellia_enc_blk(ctx, srcdst, srcdst);
1632}
1633
1634static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
1635{
1636 const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1637 struct camellia_ctx *ctx = priv;
1638 int i;
1639
1640 while (nbytes >= 2 * bsize) {
1641 camellia_dec_blk_2way(ctx, srcdst, srcdst);
1642 srcdst += bsize * 2;
1643 nbytes -= bsize * 2;
1644 }
1645
1646 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
1647 camellia_dec_blk(ctx, srcdst, srcdst);
1648}
1649
1650struct camellia_lrw_ctx {
1651 struct lrw_table_ctx lrw_table;
1652 struct camellia_ctx camellia_ctx;
1653};
1654
1655static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
1656 unsigned int keylen)
1657{
1658 struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
1659 int err;
1660
1661 err = __camellia_setkey(&ctx->camellia_ctx, key,
1662 keylen - CAMELLIA_BLOCK_SIZE,
1663 &tfm->crt_flags);
1664 if (err)
1665 return err;
1666
1667 return lrw_init_table(&ctx->lrw_table,
1668 key + keylen - CAMELLIA_BLOCK_SIZE);
1669}
1670
1671static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1672 struct scatterlist *src, unsigned int nbytes)
1673{
1674 struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1675 be128 buf[2 * 4];
1676 struct lrw_crypt_req req = {
1677 .tbuf = buf,
1678 .tbuflen = sizeof(buf),
1679
1680 .table_ctx = &ctx->lrw_table,
1681 .crypt_ctx = &ctx->camellia_ctx,
1682 .crypt_fn = encrypt_callback,
1683 };
1684
1685 return lrw_crypt(desc, dst, src, nbytes, &req);
1686}
1687
1688static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1689 struct scatterlist *src, unsigned int nbytes)
1690{
1691 struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1692 be128 buf[2 * 4];
1693 struct lrw_crypt_req req = {
1694 .tbuf = buf,
1695 .tbuflen = sizeof(buf),
1696
1697 .table_ctx = &ctx->lrw_table,
1698 .crypt_ctx = &ctx->camellia_ctx,
1699 .crypt_fn = decrypt_callback,
1700 };
1701
1702 return lrw_crypt(desc, dst, src, nbytes, &req);
1703}
1704
1705static void lrw_exit_tfm(struct crypto_tfm *tfm)
1706{
1707 struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
1708
1709 lrw_free_table(&ctx->lrw_table);
1710}
1711
1712struct camellia_xts_ctx {
1713 struct camellia_ctx tweak_ctx;
1714 struct camellia_ctx crypt_ctx;
1715};
1716
1717static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
1718 unsigned int keylen)
1719{
1720 struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
1721 u32 *flags = &tfm->crt_flags;
1722 int err;
1723
1724 /* key consists of keys of equal size concatenated, therefore
1725 * the length must be even
1726 */
1727 if (keylen % 2) {
1728 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
1729 return -EINVAL;
1730 }
1731
1732 /* first half of xts-key is for crypt */
1733 err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
1734 if (err)
1735 return err;
1736
1737 /* second half of xts-key is for tweak */
1738 return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
1739 flags);
1740}
1741
1742static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1743 struct scatterlist *src, unsigned int nbytes)
1744{
1745 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1746 be128 buf[2 * 4];
1747 struct xts_crypt_req req = {
1748 .tbuf = buf,
1749 .tbuflen = sizeof(buf),
1750
1751 .tweak_ctx = &ctx->tweak_ctx,
1752 .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
1753 .crypt_ctx = &ctx->crypt_ctx,
1754 .crypt_fn = encrypt_callback,
1755 };
1756
1757 return xts_crypt(desc, dst, src, nbytes, &req);
1758}
1759
1760static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1761 struct scatterlist *src, unsigned int nbytes)
1762{
1763 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1764 be128 buf[2 * 4];
1765 struct xts_crypt_req req = {
1766 .tbuf = buf,
1767 .tbuflen = sizeof(buf),
1768
1769 .tweak_ctx = &ctx->tweak_ctx,
1770 .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
1771 .crypt_ctx = &ctx->crypt_ctx,
1772 .crypt_fn = decrypt_callback,
1773 };
1774
1775 return xts_crypt(desc, dst, src, nbytes, &req);
1776}
1777
1778static struct crypto_alg camellia_algs[6] = { {
1779 .cra_name = "camellia",
1780 .cra_driver_name = "camellia-asm",
1781 .cra_priority = 200,
1782 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
1783 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
1784 .cra_ctxsize = sizeof(struct camellia_ctx),
1785 .cra_alignmask = 0,
1786 .cra_module = THIS_MODULE,
1787 .cra_list = LIST_HEAD_INIT(camellia_algs[0].cra_list),
1788 .cra_u = {
1789 .cipher = {
1790 .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
1791 .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
1792 .cia_setkey = camellia_setkey,
1793 .cia_encrypt = camellia_encrypt,
1794 .cia_decrypt = camellia_decrypt
1795 }
1796 }
1797}, {
1798 .cra_name = "ecb(camellia)",
1799 .cra_driver_name = "ecb-camellia-asm",
1800 .cra_priority = 300,
1801 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
1802 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
1803 .cra_ctxsize = sizeof(struct camellia_ctx),
1804 .cra_alignmask = 0,
1805 .cra_type = &crypto_blkcipher_type,
1806 .cra_module = THIS_MODULE,
1807 .cra_list = LIST_HEAD_INIT(camellia_algs[1].cra_list),
1808 .cra_u = {
1809 .blkcipher = {
1810 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
1811 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
1812 .setkey = camellia_setkey,
1813 .encrypt = ecb_encrypt,
1814 .decrypt = ecb_decrypt,
1815 },
1816 },
1817}, {
1818 .cra_name = "cbc(camellia)",
1819 .cra_driver_name = "cbc-camellia-asm",
1820 .cra_priority = 300,
1821 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
1822 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
1823 .cra_ctxsize = sizeof(struct camellia_ctx),
1824 .cra_alignmask = 0,
1825 .cra_type = &crypto_blkcipher_type,
1826 .cra_module = THIS_MODULE,
1827 .cra_list = LIST_HEAD_INIT(camellia_algs[2].cra_list),
1828 .cra_u = {
1829 .blkcipher = {
1830 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
1831 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
1832 .ivsize = CAMELLIA_BLOCK_SIZE,
1833 .setkey = camellia_setkey,
1834 .encrypt = cbc_encrypt,
1835 .decrypt = cbc_decrypt,
1836 },
1837 },
1838}, {
1839 .cra_name = "ctr(camellia)",
1840 .cra_driver_name = "ctr-camellia-asm",
1841 .cra_priority = 300,
1842 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
1843 .cra_blocksize = 1,
1844 .cra_ctxsize = sizeof(struct camellia_ctx),
1845 .cra_alignmask = 0,
1846 .cra_type = &crypto_blkcipher_type,
1847 .cra_module = THIS_MODULE,
1848 .cra_list = LIST_HEAD_INIT(camellia_algs[3].cra_list),
1849 .cra_u = {
1850 .blkcipher = {
1851 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
1852 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
1853 .ivsize = CAMELLIA_BLOCK_SIZE,
1854 .setkey = camellia_setkey,
1855 .encrypt = ctr_crypt,
1856 .decrypt = ctr_crypt,
1857 },
1858 },
1859}, {
1860 .cra_name = "lrw(camellia)",
1861 .cra_driver_name = "lrw-camellia-asm",
1862 .cra_priority = 300,
1863 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
1864 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
1865 .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
1866 .cra_alignmask = 0,
1867 .cra_type = &crypto_blkcipher_type,
1868 .cra_module = THIS_MODULE,
1869 .cra_list = LIST_HEAD_INIT(camellia_algs[4].cra_list),
1870 .cra_exit = lrw_exit_tfm,
1871 .cra_u = {
1872 .blkcipher = {
1873 .min_keysize = CAMELLIA_MIN_KEY_SIZE +
1874 CAMELLIA_BLOCK_SIZE,
1875 .max_keysize = CAMELLIA_MAX_KEY_SIZE +
1876 CAMELLIA_BLOCK_SIZE,
1877 .ivsize = CAMELLIA_BLOCK_SIZE,
1878 .setkey = lrw_camellia_setkey,
1879 .encrypt = lrw_encrypt,
1880 .decrypt = lrw_decrypt,
1881 },
1882 },
1883}, {
1884 .cra_name = "xts(camellia)",
1885 .cra_driver_name = "xts-camellia-asm",
1886 .cra_priority = 300,
1887 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
1888 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
1889 .cra_ctxsize = sizeof(struct camellia_xts_ctx),
1890 .cra_alignmask = 0,
1891 .cra_type = &crypto_blkcipher_type,
1892 .cra_module = THIS_MODULE,
1893 .cra_list = LIST_HEAD_INIT(camellia_algs[5].cra_list),
1894 .cra_u = {
1895 .blkcipher = {
1896 .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
1897 .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
1898 .ivsize = CAMELLIA_BLOCK_SIZE,
1899 .setkey = xts_camellia_setkey,
1900 .encrypt = xts_encrypt,
1901 .decrypt = xts_decrypt,
1902 },
1903 },
1904} };
1905
1906static bool is_blacklisted_cpu(void)
1907{
1908 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1909 return false;
1910
1911 if (boot_cpu_data.x86 == 0x0f) {
1912 /*
1913 * On Pentium 4, camellia-asm is slower than original assembler
1914 * implementation because excessive uses of 64bit rotate and
1915 * left-shifts (which are really slow on P4) needed to store and
1916 * handle 128bit block in two 64bit registers.
1917 */
1918 return true;
1919 }
1920
1921 return false;
1922}
1923
1924static int force;
1925module_param(force, int, 0);
1926MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
1927
1928static int __init init(void)
1929{
1930 if (!force && is_blacklisted_cpu()) {
1931 printk(KERN_INFO
1932 "camellia-x86_64: performance on this CPU "
1933 "would be suboptimal: disabling "
1934 "camellia-x86_64.\n");
1935 return -ENODEV;
1936 }
1937
1938 return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
1939}
1940
1941static void __exit fini(void)
1942{
1943 crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
1944}
1945
1946module_init(init);
1947module_exit(fini);
1948
1949MODULE_LICENSE("GPL");
1950MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized");
1951MODULE_ALIAS("camellia");
1952MODULE_ALIAS("camellia-asm");
diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel.c
index b9d00261703c..493f959261f7 100644
--- a/arch/x86/crypto/crc32c-intel.c
+++ b/arch/x86/crypto/crc32c-intel.c
@@ -31,6 +31,7 @@
31#include <crypto/internal/hash.h> 31#include <crypto/internal/hash.h>
32 32
33#include <asm/cpufeature.h> 33#include <asm/cpufeature.h>
34#include <asm/cpu_device_id.h>
34 35
35#define CHKSUM_BLOCK_SIZE 1 36#define CHKSUM_BLOCK_SIZE 1
36#define CHKSUM_DIGEST_SIZE 4 37#define CHKSUM_DIGEST_SIZE 4
@@ -173,13 +174,17 @@ static struct shash_alg alg = {
173 } 174 }
174}; 175};
175 176
177static const struct x86_cpu_id crc32c_cpu_id[] = {
178 X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
179 {}
180};
181MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
176 182
177static int __init crc32c_intel_mod_init(void) 183static int __init crc32c_intel_mod_init(void)
178{ 184{
179 if (cpu_has_xmm4_2) 185 if (!x86_match_cpu(crc32c_cpu_id))
180 return crypto_register_shash(&alg);
181 else
182 return -ENODEV; 186 return -ENODEV;
187 return crypto_register_shash(&alg);
183} 188}
184 189
185static void __exit crc32c_intel_mod_fini(void) 190static void __exit crc32c_intel_mod_fini(void)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 976aa64d9a20..b4bf0a63b520 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -20,6 +20,7 @@
20#include <crypto/gf128mul.h> 20#include <crypto/gf128mul.h>
21#include <crypto/internal/hash.h> 21#include <crypto/internal/hash.h>
22#include <asm/i387.h> 22#include <asm/i387.h>
23#include <asm/cpu_device_id.h>
23 24
24#define GHASH_BLOCK_SIZE 16 25#define GHASH_BLOCK_SIZE 16
25#define GHASH_DIGEST_SIZE 16 26#define GHASH_DIGEST_SIZE 16
@@ -294,15 +295,18 @@ static struct ahash_alg ghash_async_alg = {
294 }, 295 },
295}; 296};
296 297
298static const struct x86_cpu_id pcmul_cpu_id[] = {
299 X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
300 {}
301};
302MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
303
297static int __init ghash_pclmulqdqni_mod_init(void) 304static int __init ghash_pclmulqdqni_mod_init(void)
298{ 305{
299 int err; 306 int err;
300 307
301 if (!cpu_has_pclmulqdq) { 308 if (!x86_match_cpu(pcmul_cpu_id))
302 printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not"
303 " detected.\n");
304 return -ENODEV; 309 return -ENODEV;
305 }
306 310
307 err = crypto_register_shash(&ghash_alg); 311 err = crypto_register_shash(&ghash_alg);
308 if (err) 312 if (err)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index 4e37677ca851..c00053d42f99 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -463,23 +463,20 @@
463 pand x0, x4; \ 463 pand x0, x4; \
464 pxor x2, x4; 464 pxor x2, x4;
465 465
466#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ 466#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
467 movdqa x2, t3; \
468 movdqa x0, t1; \
469 unpcklps x3, t3; \
470 movdqa x0, t2; \ 467 movdqa x0, t2; \
471 unpcklps x1, t1; \ 468 punpckldq x1, x0; \
472 unpckhps x1, t2; \ 469 punpckhdq x1, t2; \
473 movdqa t3, x1; \ 470 movdqa x2, t1; \
474 unpckhps x3, x2; \ 471 punpckhdq x3, x2; \
475 movdqa t1, x0; \ 472 punpckldq x3, t1; \
476 movhlps t1, x1; \ 473 movdqa x0, x1; \
477 movdqa t2, t1; \ 474 punpcklqdq t1, x0; \
478 movlhps t3, x0; \ 475 punpckhqdq t1, x1; \
479 movlhps x2, t1; \ 476 movdqa t2, x3; \
480 movhlps t2, x2; \ 477 punpcklqdq x2, t2; \
481 movdqa x2, x3; \ 478 punpckhqdq x2, x3; \
482 movdqa t1, x2; 479 movdqa t2, x2;
483 480
484#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ 481#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
485 movdqu (0*4*4)(in), x0; \ 482 movdqu (0*4*4)(in), x0; \
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 7f24a1540821..3ee1ff04d3e9 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -585,23 +585,20 @@
585 get_key(i, 1, RK1); \ 585 get_key(i, 1, RK1); \
586 SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 586 SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
587 587
588#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ 588#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
589 movdqa x2, t3; \
590 movdqa x0, t1; \
591 unpcklps x3, t3; \
592 movdqa x0, t2; \ 589 movdqa x0, t2; \
593 unpcklps x1, t1; \ 590 punpckldq x1, x0; \
594 unpckhps x1, t2; \ 591 punpckhdq x1, t2; \
595 movdqa t3, x1; \ 592 movdqa x2, t1; \
596 unpckhps x3, x2; \ 593 punpckhdq x3, x2; \
597 movdqa t1, x0; \ 594 punpckldq x3, t1; \
598 movhlps t1, x1; \ 595 movdqa x0, x1; \
599 movdqa t2, t1; \ 596 punpcklqdq t1, x0; \
600 movlhps t3, x0; \ 597 punpckhqdq t1, x1; \
601 movlhps x2, t1; \ 598 movdqa t2, x3; \
602 movhlps t2, x2; \ 599 punpcklqdq x2, t2; \
603 movdqa x2, x3; \ 600 punpckhqdq x2, x3; \
604 movdqa t1, x2; 601 movdqa t2, x2;
605 602
606#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ 603#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
607 movdqu (0*4*4)(in), x0; \ 604 movdqu (0*4*4)(in), x0; \
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 7955a9b76b91..4b21be85e0a1 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -145,28 +145,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
145 return ecb_crypt(desc, &walk, false); 145 return ecb_crypt(desc, &walk, false);
146} 146}
147 147
148static struct crypto_alg blk_ecb_alg = {
149 .cra_name = "__ecb-serpent-sse2",
150 .cra_driver_name = "__driver-ecb-serpent-sse2",
151 .cra_priority = 0,
152 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
153 .cra_blocksize = SERPENT_BLOCK_SIZE,
154 .cra_ctxsize = sizeof(struct serpent_ctx),
155 .cra_alignmask = 0,
156 .cra_type = &crypto_blkcipher_type,
157 .cra_module = THIS_MODULE,
158 .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
159 .cra_u = {
160 .blkcipher = {
161 .min_keysize = SERPENT_MIN_KEY_SIZE,
162 .max_keysize = SERPENT_MAX_KEY_SIZE,
163 .setkey = serpent_setkey,
164 .encrypt = ecb_encrypt,
165 .decrypt = ecb_decrypt,
166 },
167 },
168};
169
170static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 148static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
171 struct blkcipher_walk *walk) 149 struct blkcipher_walk *walk)
172{ 150{
@@ -295,28 +273,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
295 return err; 273 return err;
296} 274}
297 275
298static struct crypto_alg blk_cbc_alg = {
299 .cra_name = "__cbc-serpent-sse2",
300 .cra_driver_name = "__driver-cbc-serpent-sse2",
301 .cra_priority = 0,
302 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
303 .cra_blocksize = SERPENT_BLOCK_SIZE,
304 .cra_ctxsize = sizeof(struct serpent_ctx),
305 .cra_alignmask = 0,
306 .cra_type = &crypto_blkcipher_type,
307 .cra_module = THIS_MODULE,
308 .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
309 .cra_u = {
310 .blkcipher = {
311 .min_keysize = SERPENT_MIN_KEY_SIZE,
312 .max_keysize = SERPENT_MAX_KEY_SIZE,
313 .setkey = serpent_setkey,
314 .encrypt = cbc_encrypt,
315 .decrypt = cbc_decrypt,
316 },
317 },
318};
319
320static inline void u128_to_be128(be128 *dst, const u128 *src) 276static inline void u128_to_be128(be128 *dst, const u128 *src)
321{ 277{
322 dst->a = cpu_to_be64(src->a); 278 dst->a = cpu_to_be64(src->a);
@@ -439,29 +395,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
439 return err; 395 return err;
440} 396}
441 397
442static struct crypto_alg blk_ctr_alg = {
443 .cra_name = "__ctr-serpent-sse2",
444 .cra_driver_name = "__driver-ctr-serpent-sse2",
445 .cra_priority = 0,
446 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
447 .cra_blocksize = 1,
448 .cra_ctxsize = sizeof(struct serpent_ctx),
449 .cra_alignmask = 0,
450 .cra_type = &crypto_blkcipher_type,
451 .cra_module = THIS_MODULE,
452 .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
453 .cra_u = {
454 .blkcipher = {
455 .min_keysize = SERPENT_MIN_KEY_SIZE,
456 .max_keysize = SERPENT_MAX_KEY_SIZE,
457 .ivsize = SERPENT_BLOCK_SIZE,
458 .setkey = serpent_setkey,
459 .encrypt = ctr_crypt,
460 .decrypt = ctr_crypt,
461 },
462 },
463};
464
465struct crypt_priv { 398struct crypt_priv {
466 struct serpent_ctx *ctx; 399 struct serpent_ctx *ctx;
467 bool fpu_enabled; 400 bool fpu_enabled;
@@ -580,32 +513,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm)
580 lrw_free_table(&ctx->lrw_table); 513 lrw_free_table(&ctx->lrw_table);
581} 514}
582 515
583static struct crypto_alg blk_lrw_alg = {
584 .cra_name = "__lrw-serpent-sse2",
585 .cra_driver_name = "__driver-lrw-serpent-sse2",
586 .cra_priority = 0,
587 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
588 .cra_blocksize = SERPENT_BLOCK_SIZE,
589 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
590 .cra_alignmask = 0,
591 .cra_type = &crypto_blkcipher_type,
592 .cra_module = THIS_MODULE,
593 .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list),
594 .cra_exit = lrw_exit_tfm,
595 .cra_u = {
596 .blkcipher = {
597 .min_keysize = SERPENT_MIN_KEY_SIZE +
598 SERPENT_BLOCK_SIZE,
599 .max_keysize = SERPENT_MAX_KEY_SIZE +
600 SERPENT_BLOCK_SIZE,
601 .ivsize = SERPENT_BLOCK_SIZE,
602 .setkey = lrw_serpent_setkey,
603 .encrypt = lrw_encrypt,
604 .decrypt = lrw_decrypt,
605 },
606 },
607};
608
609struct serpent_xts_ctx { 516struct serpent_xts_ctx {
610 struct serpent_ctx tweak_ctx; 517 struct serpent_ctx tweak_ctx;
611 struct serpent_ctx crypt_ctx; 518 struct serpent_ctx crypt_ctx;
@@ -689,29 +596,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
689 return ret; 596 return ret;
690} 597}
691 598
692static struct crypto_alg blk_xts_alg = {
693 .cra_name = "__xts-serpent-sse2",
694 .cra_driver_name = "__driver-xts-serpent-sse2",
695 .cra_priority = 0,
696 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
697 .cra_blocksize = SERPENT_BLOCK_SIZE,
698 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
699 .cra_alignmask = 0,
700 .cra_type = &crypto_blkcipher_type,
701 .cra_module = THIS_MODULE,
702 .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list),
703 .cra_u = {
704 .blkcipher = {
705 .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
706 .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
707 .ivsize = SERPENT_BLOCK_SIZE,
708 .setkey = xts_serpent_setkey,
709 .encrypt = xts_encrypt,
710 .decrypt = xts_decrypt,
711 },
712 },
713};
714
715static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, 599static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
716 unsigned int key_len) 600 unsigned int key_len)
717{ 601{
@@ -792,28 +676,133 @@ static void ablk_exit(struct crypto_tfm *tfm)
792 cryptd_free_ablkcipher(ctx->cryptd_tfm); 676 cryptd_free_ablkcipher(ctx->cryptd_tfm);
793} 677}
794 678
795static void ablk_init_common(struct crypto_tfm *tfm, 679static int ablk_init(struct crypto_tfm *tfm)
796 struct cryptd_ablkcipher *cryptd_tfm)
797{ 680{
798 struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); 681 struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
682 struct cryptd_ablkcipher *cryptd_tfm;
683 char drv_name[CRYPTO_MAX_ALG_NAME];
684
685 snprintf(drv_name, sizeof(drv_name), "__driver-%s",
686 crypto_tfm_alg_driver_name(tfm));
687
688 cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
689 if (IS_ERR(cryptd_tfm))
690 return PTR_ERR(cryptd_tfm);
799 691
800 ctx->cryptd_tfm = cryptd_tfm; 692 ctx->cryptd_tfm = cryptd_tfm;
801 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + 693 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
802 crypto_ablkcipher_reqsize(&cryptd_tfm->base); 694 crypto_ablkcipher_reqsize(&cryptd_tfm->base);
803}
804
805static int ablk_ecb_init(struct crypto_tfm *tfm)
806{
807 struct cryptd_ablkcipher *cryptd_tfm;
808 695
809 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-serpent-sse2", 0, 0);
810 if (IS_ERR(cryptd_tfm))
811 return PTR_ERR(cryptd_tfm);
812 ablk_init_common(tfm, cryptd_tfm);
813 return 0; 696 return 0;
814} 697}
815 698
816static struct crypto_alg ablk_ecb_alg = { 699static struct crypto_alg serpent_algs[10] = { {
700 .cra_name = "__ecb-serpent-sse2",
701 .cra_driver_name = "__driver-ecb-serpent-sse2",
702 .cra_priority = 0,
703 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
704 .cra_blocksize = SERPENT_BLOCK_SIZE,
705 .cra_ctxsize = sizeof(struct serpent_ctx),
706 .cra_alignmask = 0,
707 .cra_type = &crypto_blkcipher_type,
708 .cra_module = THIS_MODULE,
709 .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list),
710 .cra_u = {
711 .blkcipher = {
712 .min_keysize = SERPENT_MIN_KEY_SIZE,
713 .max_keysize = SERPENT_MAX_KEY_SIZE,
714 .setkey = serpent_setkey,
715 .encrypt = ecb_encrypt,
716 .decrypt = ecb_decrypt,
717 },
718 },
719}, {
720 .cra_name = "__cbc-serpent-sse2",
721 .cra_driver_name = "__driver-cbc-serpent-sse2",
722 .cra_priority = 0,
723 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
724 .cra_blocksize = SERPENT_BLOCK_SIZE,
725 .cra_ctxsize = sizeof(struct serpent_ctx),
726 .cra_alignmask = 0,
727 .cra_type = &crypto_blkcipher_type,
728 .cra_module = THIS_MODULE,
729 .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list),
730 .cra_u = {
731 .blkcipher = {
732 .min_keysize = SERPENT_MIN_KEY_SIZE,
733 .max_keysize = SERPENT_MAX_KEY_SIZE,
734 .setkey = serpent_setkey,
735 .encrypt = cbc_encrypt,
736 .decrypt = cbc_decrypt,
737 },
738 },
739}, {
740 .cra_name = "__ctr-serpent-sse2",
741 .cra_driver_name = "__driver-ctr-serpent-sse2",
742 .cra_priority = 0,
743 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
744 .cra_blocksize = 1,
745 .cra_ctxsize = sizeof(struct serpent_ctx),
746 .cra_alignmask = 0,
747 .cra_type = &crypto_blkcipher_type,
748 .cra_module = THIS_MODULE,
749 .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list),
750 .cra_u = {
751 .blkcipher = {
752 .min_keysize = SERPENT_MIN_KEY_SIZE,
753 .max_keysize = SERPENT_MAX_KEY_SIZE,
754 .ivsize = SERPENT_BLOCK_SIZE,
755 .setkey = serpent_setkey,
756 .encrypt = ctr_crypt,
757 .decrypt = ctr_crypt,
758 },
759 },
760}, {
761 .cra_name = "__lrw-serpent-sse2",
762 .cra_driver_name = "__driver-lrw-serpent-sse2",
763 .cra_priority = 0,
764 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
765 .cra_blocksize = SERPENT_BLOCK_SIZE,
766 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
767 .cra_alignmask = 0,
768 .cra_type = &crypto_blkcipher_type,
769 .cra_module = THIS_MODULE,
770 .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list),
771 .cra_exit = lrw_exit_tfm,
772 .cra_u = {
773 .blkcipher = {
774 .min_keysize = SERPENT_MIN_KEY_SIZE +
775 SERPENT_BLOCK_SIZE,
776 .max_keysize = SERPENT_MAX_KEY_SIZE +
777 SERPENT_BLOCK_SIZE,
778 .ivsize = SERPENT_BLOCK_SIZE,
779 .setkey = lrw_serpent_setkey,
780 .encrypt = lrw_encrypt,
781 .decrypt = lrw_decrypt,
782 },
783 },
784}, {
785 .cra_name = "__xts-serpent-sse2",
786 .cra_driver_name = "__driver-xts-serpent-sse2",
787 .cra_priority = 0,
788 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
789 .cra_blocksize = SERPENT_BLOCK_SIZE,
790 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
791 .cra_alignmask = 0,
792 .cra_type = &crypto_blkcipher_type,
793 .cra_module = THIS_MODULE,
794 .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list),
795 .cra_u = {
796 .blkcipher = {
797 .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
798 .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
799 .ivsize = SERPENT_BLOCK_SIZE,
800 .setkey = xts_serpent_setkey,
801 .encrypt = xts_encrypt,
802 .decrypt = xts_decrypt,
803 },
804 },
805}, {
817 .cra_name = "ecb(serpent)", 806 .cra_name = "ecb(serpent)",
818 .cra_driver_name = "ecb-serpent-sse2", 807 .cra_driver_name = "ecb-serpent-sse2",
819 .cra_priority = 400, 808 .cra_priority = 400,
@@ -823,8 +812,8 @@ static struct crypto_alg ablk_ecb_alg = {
823 .cra_alignmask = 0, 812 .cra_alignmask = 0,
824 .cra_type = &crypto_ablkcipher_type, 813 .cra_type = &crypto_ablkcipher_type,
825 .cra_module = THIS_MODULE, 814 .cra_module = THIS_MODULE,
826 .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list), 815 .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list),
827 .cra_init = ablk_ecb_init, 816 .cra_init = ablk_init,
828 .cra_exit = ablk_exit, 817 .cra_exit = ablk_exit,
829 .cra_u = { 818 .cra_u = {
830 .ablkcipher = { 819 .ablkcipher = {
@@ -835,20 +824,7 @@ static struct crypto_alg ablk_ecb_alg = {
835 .decrypt = ablk_decrypt, 824 .decrypt = ablk_decrypt,
836 }, 825 },
837 }, 826 },
838}; 827}, {
839
840static int ablk_cbc_init(struct crypto_tfm *tfm)
841{
842 struct cryptd_ablkcipher *cryptd_tfm;
843
844 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-serpent-sse2", 0, 0);
845 if (IS_ERR(cryptd_tfm))
846 return PTR_ERR(cryptd_tfm);
847 ablk_init_common(tfm, cryptd_tfm);
848 return 0;
849}
850
851static struct crypto_alg ablk_cbc_alg = {
852 .cra_name = "cbc(serpent)", 828 .cra_name = "cbc(serpent)",
853 .cra_driver_name = "cbc-serpent-sse2", 829 .cra_driver_name = "cbc-serpent-sse2",
854 .cra_priority = 400, 830 .cra_priority = 400,
@@ -858,8 +834,8 @@ static struct crypto_alg ablk_cbc_alg = {
858 .cra_alignmask = 0, 834 .cra_alignmask = 0,
859 .cra_type = &crypto_ablkcipher_type, 835 .cra_type = &crypto_ablkcipher_type,
860 .cra_module = THIS_MODULE, 836 .cra_module = THIS_MODULE,
861 .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list), 837 .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list),
862 .cra_init = ablk_cbc_init, 838 .cra_init = ablk_init,
863 .cra_exit = ablk_exit, 839 .cra_exit = ablk_exit,
864 .cra_u = { 840 .cra_u = {
865 .ablkcipher = { 841 .ablkcipher = {
@@ -871,20 +847,7 @@ static struct crypto_alg ablk_cbc_alg = {
871 .decrypt = ablk_decrypt, 847 .decrypt = ablk_decrypt,
872 }, 848 },
873 }, 849 },
874}; 850}, {
875
876static int ablk_ctr_init(struct crypto_tfm *tfm)
877{
878 struct cryptd_ablkcipher *cryptd_tfm;
879
880 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-serpent-sse2", 0, 0);
881 if (IS_ERR(cryptd_tfm))
882 return PTR_ERR(cryptd_tfm);
883 ablk_init_common(tfm, cryptd_tfm);
884 return 0;
885}
886
887static struct crypto_alg ablk_ctr_alg = {
888 .cra_name = "ctr(serpent)", 851 .cra_name = "ctr(serpent)",
889 .cra_driver_name = "ctr-serpent-sse2", 852 .cra_driver_name = "ctr-serpent-sse2",
890 .cra_priority = 400, 853 .cra_priority = 400,
@@ -894,8 +857,8 @@ static struct crypto_alg ablk_ctr_alg = {
894 .cra_alignmask = 0, 857 .cra_alignmask = 0,
895 .cra_type = &crypto_ablkcipher_type, 858 .cra_type = &crypto_ablkcipher_type,
896 .cra_module = THIS_MODULE, 859 .cra_module = THIS_MODULE,
897 .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), 860 .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list),
898 .cra_init = ablk_ctr_init, 861 .cra_init = ablk_init,
899 .cra_exit = ablk_exit, 862 .cra_exit = ablk_exit,
900 .cra_u = { 863 .cra_u = {
901 .ablkcipher = { 864 .ablkcipher = {
@@ -908,20 +871,7 @@ static struct crypto_alg ablk_ctr_alg = {
908 .geniv = "chainiv", 871 .geniv = "chainiv",
909 }, 872 },
910 }, 873 },
911}; 874}, {
912
913static int ablk_lrw_init(struct crypto_tfm *tfm)
914{
915 struct cryptd_ablkcipher *cryptd_tfm;
916
917 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-lrw-serpent-sse2", 0, 0);
918 if (IS_ERR(cryptd_tfm))
919 return PTR_ERR(cryptd_tfm);
920 ablk_init_common(tfm, cryptd_tfm);
921 return 0;
922}
923
924static struct crypto_alg ablk_lrw_alg = {
925 .cra_name = "lrw(serpent)", 875 .cra_name = "lrw(serpent)",
926 .cra_driver_name = "lrw-serpent-sse2", 876 .cra_driver_name = "lrw-serpent-sse2",
927 .cra_priority = 400, 877 .cra_priority = 400,
@@ -931,8 +881,8 @@ static struct crypto_alg ablk_lrw_alg = {
931 .cra_alignmask = 0, 881 .cra_alignmask = 0,
932 .cra_type = &crypto_ablkcipher_type, 882 .cra_type = &crypto_ablkcipher_type,
933 .cra_module = THIS_MODULE, 883 .cra_module = THIS_MODULE,
934 .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), 884 .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list),
935 .cra_init = ablk_lrw_init, 885 .cra_init = ablk_init,
936 .cra_exit = ablk_exit, 886 .cra_exit = ablk_exit,
937 .cra_u = { 887 .cra_u = {
938 .ablkcipher = { 888 .ablkcipher = {
@@ -946,20 +896,7 @@ static struct crypto_alg ablk_lrw_alg = {
946 .decrypt = ablk_decrypt, 896 .decrypt = ablk_decrypt,
947 }, 897 },
948 }, 898 },
949}; 899}, {
950
951static int ablk_xts_init(struct crypto_tfm *tfm)
952{
953 struct cryptd_ablkcipher *cryptd_tfm;
954
955 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-xts-serpent-sse2", 0, 0);
956 if (IS_ERR(cryptd_tfm))
957 return PTR_ERR(cryptd_tfm);
958 ablk_init_common(tfm, cryptd_tfm);
959 return 0;
960}
961
962static struct crypto_alg ablk_xts_alg = {
963 .cra_name = "xts(serpent)", 900 .cra_name = "xts(serpent)",
964 .cra_driver_name = "xts-serpent-sse2", 901 .cra_driver_name = "xts-serpent-sse2",
965 .cra_priority = 400, 902 .cra_priority = 400,
@@ -969,8 +906,8 @@ static struct crypto_alg ablk_xts_alg = {
969 .cra_alignmask = 0, 906 .cra_alignmask = 0,
970 .cra_type = &crypto_ablkcipher_type, 907 .cra_type = &crypto_ablkcipher_type,
971 .cra_module = THIS_MODULE, 908 .cra_module = THIS_MODULE,
972 .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), 909 .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list),
973 .cra_init = ablk_xts_init, 910 .cra_init = ablk_init,
974 .cra_exit = ablk_exit, 911 .cra_exit = ablk_exit,
975 .cra_u = { 912 .cra_u = {
976 .ablkcipher = { 913 .ablkcipher = {
@@ -982,84 +919,21 @@ static struct crypto_alg ablk_xts_alg = {
982 .decrypt = ablk_decrypt, 919 .decrypt = ablk_decrypt,
983 }, 920 },
984 }, 921 },
985}; 922} };
986 923
987static int __init serpent_sse2_init(void) 924static int __init serpent_sse2_init(void)
988{ 925{
989 int err;
990
991 if (!cpu_has_xmm2) { 926 if (!cpu_has_xmm2) {
992 printk(KERN_INFO "SSE2 instructions are not detected.\n"); 927 printk(KERN_INFO "SSE2 instructions are not detected.\n");
993 return -ENODEV; 928 return -ENODEV;
994 } 929 }
995 930
996 err = crypto_register_alg(&blk_ecb_alg); 931 return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
997 if (err)
998 goto blk_ecb_err;
999 err = crypto_register_alg(&blk_cbc_alg);
1000 if (err)
1001 goto blk_cbc_err;
1002 err = crypto_register_alg(&blk_ctr_alg);
1003 if (err)
1004 goto blk_ctr_err;
1005 err = crypto_register_alg(&ablk_ecb_alg);
1006 if (err)
1007 goto ablk_ecb_err;
1008 err = crypto_register_alg(&ablk_cbc_alg);
1009 if (err)
1010 goto ablk_cbc_err;
1011 err = crypto_register_alg(&ablk_ctr_alg);
1012 if (err)
1013 goto ablk_ctr_err;
1014 err = crypto_register_alg(&blk_lrw_alg);
1015 if (err)
1016 goto blk_lrw_err;
1017 err = crypto_register_alg(&ablk_lrw_alg);
1018 if (err)
1019 goto ablk_lrw_err;
1020 err = crypto_register_alg(&blk_xts_alg);
1021 if (err)
1022 goto blk_xts_err;
1023 err = crypto_register_alg(&ablk_xts_alg);
1024 if (err)
1025 goto ablk_xts_err;
1026 return err;
1027
1028 crypto_unregister_alg(&ablk_xts_alg);
1029ablk_xts_err:
1030 crypto_unregister_alg(&blk_xts_alg);
1031blk_xts_err:
1032 crypto_unregister_alg(&ablk_lrw_alg);
1033ablk_lrw_err:
1034 crypto_unregister_alg(&blk_lrw_alg);
1035blk_lrw_err:
1036 crypto_unregister_alg(&ablk_ctr_alg);
1037ablk_ctr_err:
1038 crypto_unregister_alg(&ablk_cbc_alg);
1039ablk_cbc_err:
1040 crypto_unregister_alg(&ablk_ecb_alg);
1041ablk_ecb_err:
1042 crypto_unregister_alg(&blk_ctr_alg);
1043blk_ctr_err:
1044 crypto_unregister_alg(&blk_cbc_alg);
1045blk_cbc_err:
1046 crypto_unregister_alg(&blk_ecb_alg);
1047blk_ecb_err:
1048 return err;
1049} 932}
1050 933
1051static void __exit serpent_sse2_exit(void) 934static void __exit serpent_sse2_exit(void)
1052{ 935{
1053 crypto_unregister_alg(&ablk_xts_alg); 936 crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
1054 crypto_unregister_alg(&blk_xts_alg);
1055 crypto_unregister_alg(&ablk_lrw_alg);
1056 crypto_unregister_alg(&blk_lrw_alg);
1057 crypto_unregister_alg(&ablk_ctr_alg);
1058 crypto_unregister_alg(&ablk_cbc_alg);
1059 crypto_unregister_alg(&ablk_ecb_alg);
1060 crypto_unregister_alg(&blk_ctr_alg);
1061 crypto_unregister_alg(&blk_cbc_alg);
1062 crypto_unregister_alg(&blk_ecb_alg);
1063} 937}
1064 938
1065module_init(serpent_sse2_init); 939module_init(serpent_sse2_init);
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index dc6b3fb817fc..359ae084275c 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -68,7 +68,7 @@ static struct crypto_alg alg = {
68 .cra_flags = CRYPTO_ALG_TYPE_CIPHER, 68 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
69 .cra_blocksize = TF_BLOCK_SIZE, 69 .cra_blocksize = TF_BLOCK_SIZE,
70 .cra_ctxsize = sizeof(struct twofish_ctx), 70 .cra_ctxsize = sizeof(struct twofish_ctx),
71 .cra_alignmask = 3, 71 .cra_alignmask = 0,
72 .cra_module = THIS_MODULE, 72 .cra_module = THIS_MODULE,
73 .cra_list = LIST_HEAD_INIT(alg.cra_list), 73 .cra_list = LIST_HEAD_INIT(alg.cra_list),
74 .cra_u = { 74 .cra_u = {
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 7fee8c152f93..922ab24cce31 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -25,6 +25,7 @@
25 * 25 *
26 */ 26 */
27 27
28#include <asm/processor.h>
28#include <linux/crypto.h> 29#include <linux/crypto.h>
29#include <linux/init.h> 30#include <linux/init.h>
30#include <linux/module.h> 31#include <linux/module.h>
@@ -122,28 +123,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
122 return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); 123 return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
123} 124}
124 125
125static struct crypto_alg blk_ecb_alg = {
126 .cra_name = "ecb(twofish)",
127 .cra_driver_name = "ecb-twofish-3way",
128 .cra_priority = 300,
129 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
130 .cra_blocksize = TF_BLOCK_SIZE,
131 .cra_ctxsize = sizeof(struct twofish_ctx),
132 .cra_alignmask = 0,
133 .cra_type = &crypto_blkcipher_type,
134 .cra_module = THIS_MODULE,
135 .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
136 .cra_u = {
137 .blkcipher = {
138 .min_keysize = TF_MIN_KEY_SIZE,
139 .max_keysize = TF_MAX_KEY_SIZE,
140 .setkey = twofish_setkey,
141 .encrypt = ecb_encrypt,
142 .decrypt = ecb_decrypt,
143 },
144 },
145};
146
147static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 126static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
148 struct blkcipher_walk *walk) 127 struct blkcipher_walk *walk)
149{ 128{
@@ -267,29 +246,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
267 return err; 246 return err;
268} 247}
269 248
270static struct crypto_alg blk_cbc_alg = {
271 .cra_name = "cbc(twofish)",
272 .cra_driver_name = "cbc-twofish-3way",
273 .cra_priority = 300,
274 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
275 .cra_blocksize = TF_BLOCK_SIZE,
276 .cra_ctxsize = sizeof(struct twofish_ctx),
277 .cra_alignmask = 0,
278 .cra_type = &crypto_blkcipher_type,
279 .cra_module = THIS_MODULE,
280 .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
281 .cra_u = {
282 .blkcipher = {
283 .min_keysize = TF_MIN_KEY_SIZE,
284 .max_keysize = TF_MAX_KEY_SIZE,
285 .ivsize = TF_BLOCK_SIZE,
286 .setkey = twofish_setkey,
287 .encrypt = cbc_encrypt,
288 .decrypt = cbc_decrypt,
289 },
290 },
291};
292
293static inline void u128_to_be128(be128 *dst, const u128 *src) 249static inline void u128_to_be128(be128 *dst, const u128 *src)
294{ 250{
295 dst->a = cpu_to_be64(src->a); 251 dst->a = cpu_to_be64(src->a);
@@ -411,29 +367,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
411 return err; 367 return err;
412} 368}
413 369
414static struct crypto_alg blk_ctr_alg = {
415 .cra_name = "ctr(twofish)",
416 .cra_driver_name = "ctr-twofish-3way",
417 .cra_priority = 300,
418 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
419 .cra_blocksize = 1,
420 .cra_ctxsize = sizeof(struct twofish_ctx),
421 .cra_alignmask = 0,
422 .cra_type = &crypto_blkcipher_type,
423 .cra_module = THIS_MODULE,
424 .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
425 .cra_u = {
426 .blkcipher = {
427 .min_keysize = TF_MIN_KEY_SIZE,
428 .max_keysize = TF_MAX_KEY_SIZE,
429 .ivsize = TF_BLOCK_SIZE,
430 .setkey = twofish_setkey,
431 .encrypt = ctr_crypt,
432 .decrypt = ctr_crypt,
433 },
434 },
435};
436
437static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) 370static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
438{ 371{
439 const unsigned int bsize = TF_BLOCK_SIZE; 372 const unsigned int bsize = TF_BLOCK_SIZE;
@@ -524,30 +457,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm)
524 lrw_free_table(&ctx->lrw_table); 457 lrw_free_table(&ctx->lrw_table);
525} 458}
526 459
527static struct crypto_alg blk_lrw_alg = {
528 .cra_name = "lrw(twofish)",
529 .cra_driver_name = "lrw-twofish-3way",
530 .cra_priority = 300,
531 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
532 .cra_blocksize = TF_BLOCK_SIZE,
533 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
534 .cra_alignmask = 0,
535 .cra_type = &crypto_blkcipher_type,
536 .cra_module = THIS_MODULE,
537 .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list),
538 .cra_exit = lrw_exit_tfm,
539 .cra_u = {
540 .blkcipher = {
541 .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
542 .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
543 .ivsize = TF_BLOCK_SIZE,
544 .setkey = lrw_twofish_setkey,
545 .encrypt = lrw_encrypt,
546 .decrypt = lrw_decrypt,
547 },
548 },
549};
550
551struct twofish_xts_ctx { 460struct twofish_xts_ctx {
552 struct twofish_ctx tweak_ctx; 461 struct twofish_ctx tweak_ctx;
553 struct twofish_ctx crypt_ctx; 462 struct twofish_ctx crypt_ctx;
@@ -614,7 +523,91 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
614 return xts_crypt(desc, dst, src, nbytes, &req); 523 return xts_crypt(desc, dst, src, nbytes, &req);
615} 524}
616 525
617static struct crypto_alg blk_xts_alg = { 526static struct crypto_alg tf_algs[5] = { {
527 .cra_name = "ecb(twofish)",
528 .cra_driver_name = "ecb-twofish-3way",
529 .cra_priority = 300,
530 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
531 .cra_blocksize = TF_BLOCK_SIZE,
532 .cra_ctxsize = sizeof(struct twofish_ctx),
533 .cra_alignmask = 0,
534 .cra_type = &crypto_blkcipher_type,
535 .cra_module = THIS_MODULE,
536 .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list),
537 .cra_u = {
538 .blkcipher = {
539 .min_keysize = TF_MIN_KEY_SIZE,
540 .max_keysize = TF_MAX_KEY_SIZE,
541 .setkey = twofish_setkey,
542 .encrypt = ecb_encrypt,
543 .decrypt = ecb_decrypt,
544 },
545 },
546}, {
547 .cra_name = "cbc(twofish)",
548 .cra_driver_name = "cbc-twofish-3way",
549 .cra_priority = 300,
550 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
551 .cra_blocksize = TF_BLOCK_SIZE,
552 .cra_ctxsize = sizeof(struct twofish_ctx),
553 .cra_alignmask = 0,
554 .cra_type = &crypto_blkcipher_type,
555 .cra_module = THIS_MODULE,
556 .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list),
557 .cra_u = {
558 .blkcipher = {
559 .min_keysize = TF_MIN_KEY_SIZE,
560 .max_keysize = TF_MAX_KEY_SIZE,
561 .ivsize = TF_BLOCK_SIZE,
562 .setkey = twofish_setkey,
563 .encrypt = cbc_encrypt,
564 .decrypt = cbc_decrypt,
565 },
566 },
567}, {
568 .cra_name = "ctr(twofish)",
569 .cra_driver_name = "ctr-twofish-3way",
570 .cra_priority = 300,
571 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
572 .cra_blocksize = 1,
573 .cra_ctxsize = sizeof(struct twofish_ctx),
574 .cra_alignmask = 0,
575 .cra_type = &crypto_blkcipher_type,
576 .cra_module = THIS_MODULE,
577 .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list),
578 .cra_u = {
579 .blkcipher = {
580 .min_keysize = TF_MIN_KEY_SIZE,
581 .max_keysize = TF_MAX_KEY_SIZE,
582 .ivsize = TF_BLOCK_SIZE,
583 .setkey = twofish_setkey,
584 .encrypt = ctr_crypt,
585 .decrypt = ctr_crypt,
586 },
587 },
588}, {
589 .cra_name = "lrw(twofish)",
590 .cra_driver_name = "lrw-twofish-3way",
591 .cra_priority = 300,
592 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
593 .cra_blocksize = TF_BLOCK_SIZE,
594 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
595 .cra_alignmask = 0,
596 .cra_type = &crypto_blkcipher_type,
597 .cra_module = THIS_MODULE,
598 .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list),
599 .cra_exit = lrw_exit_tfm,
600 .cra_u = {
601 .blkcipher = {
602 .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
603 .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
604 .ivsize = TF_BLOCK_SIZE,
605 .setkey = lrw_twofish_setkey,
606 .encrypt = lrw_encrypt,
607 .decrypt = lrw_decrypt,
608 },
609 },
610}, {
618 .cra_name = "xts(twofish)", 611 .cra_name = "xts(twofish)",
619 .cra_driver_name = "xts-twofish-3way", 612 .cra_driver_name = "xts-twofish-3way",
620 .cra_priority = 300, 613 .cra_priority = 300,
@@ -624,7 +617,7 @@ static struct crypto_alg blk_xts_alg = {
624 .cra_alignmask = 0, 617 .cra_alignmask = 0,
625 .cra_type = &crypto_blkcipher_type, 618 .cra_type = &crypto_blkcipher_type,
626 .cra_module = THIS_MODULE, 619 .cra_module = THIS_MODULE,
627 .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list), 620 .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list),
628 .cra_u = { 621 .cra_u = {
629 .blkcipher = { 622 .blkcipher = {
630 .min_keysize = TF_MIN_KEY_SIZE * 2, 623 .min_keysize = TF_MIN_KEY_SIZE * 2,
@@ -635,50 +628,62 @@ static struct crypto_alg blk_xts_alg = {
635 .decrypt = xts_decrypt, 628 .decrypt = xts_decrypt,
636 }, 629 },
637 }, 630 },
638}; 631} };
639 632
640int __init init(void) 633static bool is_blacklisted_cpu(void)
641{ 634{
642 int err; 635 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
636 return false;
637
638 if (boot_cpu_data.x86 == 0x06 &&
639 (boot_cpu_data.x86_model == 0x1c ||
640 boot_cpu_data.x86_model == 0x26 ||
641 boot_cpu_data.x86_model == 0x36)) {
642 /*
643 * On Atom, twofish-3way is slower than original assembler
644 * implementation. Twofish-3way trades off some performance in
645 * storing blocks in 64bit registers to allow three blocks to
646 * be processed parallel. Parallel operation then allows gaining
647 * more performance than was trade off, on out-of-order CPUs.
648 * However Atom does not benefit from this parallellism and
649 * should be blacklisted.
650 */
651 return true;
652 }
643 653
644 err = crypto_register_alg(&blk_ecb_alg); 654 if (boot_cpu_data.x86 == 0x0f) {
645 if (err) 655 /*
646 goto ecb_err; 656 * On Pentium 4, twofish-3way is slower than original assembler
647 err = crypto_register_alg(&blk_cbc_alg); 657 * implementation because excessive uses of 64bit rotate and
648 if (err) 658 * left-shifts (which are really slow on P4) needed to store and
649 goto cbc_err; 659 * handle 128bit block in two 64bit registers.
650 err = crypto_register_alg(&blk_ctr_alg); 660 */
651 if (err) 661 return true;
652 goto ctr_err; 662 }
653 err = crypto_register_alg(&blk_lrw_alg); 663
654 if (err) 664 return false;
655 goto blk_lrw_err; 665}
656 err = crypto_register_alg(&blk_xts_alg); 666
657 if (err) 667static int force;
658 goto blk_xts_err; 668module_param(force, int, 0);
659 669MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
660 return 0; 670
661 671static int __init init(void)
662 crypto_unregister_alg(&blk_xts_alg); 672{
663blk_xts_err: 673 if (!force && is_blacklisted_cpu()) {
664 crypto_unregister_alg(&blk_lrw_alg); 674 printk(KERN_INFO
665blk_lrw_err: 675 "twofish-x86_64-3way: performance on this CPU "
666 crypto_unregister_alg(&blk_ctr_alg); 676 "would be suboptimal: disabling "
667ctr_err: 677 "twofish-x86_64-3way.\n");
668 crypto_unregister_alg(&blk_cbc_alg); 678 return -ENODEV;
669cbc_err: 679 }
670 crypto_unregister_alg(&blk_ecb_alg); 680
671ecb_err: 681 return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
672 return err;
673} 682}
674 683
675void __exit fini(void) 684static void __exit fini(void)
676{ 685{
677 crypto_unregister_alg(&blk_xts_alg); 686 crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
678 crypto_unregister_alg(&blk_lrw_alg);
679 crypto_unregister_alg(&blk_ctr_alg);
680 crypto_unregister_alg(&blk_cbc_alg);
681 crypto_unregister_alg(&blk_ecb_alg);
682} 687}
683 688
684module_init(init); 689module_init(init);
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index 52d0ccfcf6ea..455646e0e532 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,6 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o 5obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
6obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
6 7
7sysv-$(CONFIG_SYSVIPC) := ipc32.o 8sysv-$(CONFIG_SYSVIPC) := ipc32.o
8obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) 9obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index fd843877e841..d511d951a052 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -26,7 +26,6 @@
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28 28
29#include <asm/system.h>
30#include <asm/uaccess.h> 29#include <asm/uaccess.h>
31#include <asm/pgalloc.h> 30#include <asm/pgalloc.h>
32#include <asm/cacheflush.h> 31#include <asm/cacheflush.h>
@@ -315,8 +314,14 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
315 current->mm->free_area_cache = TASK_UNMAPPED_BASE; 314 current->mm->free_area_cache = TASK_UNMAPPED_BASE;
316 current->mm->cached_hole_size = 0; 315 current->mm->cached_hole_size = 0;
317 316
317 retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
318 if (retval < 0) {
319 /* Someone check-me: is this error path enough? */
320 send_sig(SIGKILL, current, 0);
321 return retval;
322 }
323
318 install_exec_creds(bprm); 324 install_exec_creds(bprm);
319 current->flags &= ~PF_FORKNOEXEC;
320 325
321 if (N_MAGIC(ex) == OMAGIC) { 326 if (N_MAGIC(ex) == OMAGIC) {
322 unsigned long text_addr, map_size; 327 unsigned long text_addr, map_size;
@@ -410,13 +415,6 @@ beyond_if:
410 415
411 set_brk(current->mm->start_brk, current->mm->brk); 416 set_brk(current->mm->start_brk, current->mm->brk);
412 417
413 retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
414 if (retval < 0) {
415 /* Someone check-me: is this error path enough? */
416 send_sig(SIGKILL, current, 0);
417 return retval;
418 }
419
420 current->mm->start_stack = 418 current->mm->start_stack =
421 (unsigned long)create_aout_tables((char __user *)bprm->p, bprm); 419 (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
422 /* start thread */ 420 /* start thread */
@@ -519,7 +517,8 @@ out:
519 517
520static int __init init_aout_binfmt(void) 518static int __init init_aout_binfmt(void)
521{ 519{
522 return register_binfmt(&aout_format); 520 register_binfmt(&aout_format);
521 return 0;
523} 522}
524 523
525static void __exit exit_aout_binfmt(void) 524static void __exit exit_aout_binfmt(void)
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 65577698cab2..a69245ba27e3 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -12,10 +12,8 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/smp.h> 13#include <linux/smp.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/signal.h>
16#include <linux/errno.h> 15#include <linux/errno.h>
17#include <linux/wait.h> 16#include <linux/wait.h>
18#include <linux/ptrace.h>
19#include <linux/unistd.h> 17#include <linux/unistd.h>
20#include <linux/stddef.h> 18#include <linux/stddef.h>
21#include <linux/personality.h> 19#include <linux/personality.h>
@@ -24,6 +22,7 @@
24#include <asm/ucontext.h> 22#include <asm/ucontext.h>
25#include <asm/uaccess.h> 23#include <asm/uaccess.h>
26#include <asm/i387.h> 24#include <asm/i387.h>
25#include <asm/fpu-internal.h>
27#include <asm/ptrace.h> 26#include <asm/ptrace.h>
28#include <asm/ia32_unistd.h> 27#include <asm/ia32_unistd.h>
29#include <asm/user32.h> 28#include <asm/user32.h>
@@ -31,20 +30,15 @@
31#include <asm/proto.h> 30#include <asm/proto.h>
32#include <asm/vdso.h> 31#include <asm/vdso.h>
33#include <asm/sigframe.h> 32#include <asm/sigframe.h>
33#include <asm/sighandling.h>
34#include <asm/sys_ia32.h> 34#include <asm/sys_ia32.h>
35 35
36#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) 36#define FIX_EFLAGS __FIX_EFLAGS
37
38#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
39 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
40 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
41 X86_EFLAGS_CF)
42
43void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
44 37
45int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 38int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
46{ 39{
47 int err = 0; 40 int err = 0;
41 bool ia32 = is_ia32_task();
48 42
49 if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) 43 if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
50 return -EFAULT; 44 return -EFAULT;
@@ -74,8 +68,13 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
74 case __SI_FAULT >> 16: 68 case __SI_FAULT >> 16:
75 break; 69 break;
76 case __SI_CHLD >> 16: 70 case __SI_CHLD >> 16:
77 put_user_ex(from->si_utime, &to->si_utime); 71 if (ia32) {
78 put_user_ex(from->si_stime, &to->si_stime); 72 put_user_ex(from->si_utime, &to->si_utime);
73 put_user_ex(from->si_stime, &to->si_stime);
74 } else {
75 put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime);
76 put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime);
77 }
79 put_user_ex(from->si_status, &to->si_status); 78 put_user_ex(from->si_status, &to->si_status);
80 /* FALL THROUGH */ 79 /* FALL THROUGH */
81 default: 80 default:
@@ -347,7 +346,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
347 put_user_ex(regs->dx, &sc->dx); 346 put_user_ex(regs->dx, &sc->dx);
348 put_user_ex(regs->cx, &sc->cx); 347 put_user_ex(regs->cx, &sc->cx);
349 put_user_ex(regs->ax, &sc->ax); 348 put_user_ex(regs->ax, &sc->ax);
350 put_user_ex(current->thread.trap_no, &sc->trapno); 349 put_user_ex(current->thread.trap_nr, &sc->trapno);
351 put_user_ex(current->thread.error_code, &sc->err); 350 put_user_ex(current->thread.error_code, &sc->err);
352 put_user_ex(regs->ip, &sc->ip); 351 put_user_ex(regs->ip, &sc->ip);
353 put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); 352 put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 3e274564f6bf..e3e734005e19 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -14,6 +14,7 @@
14#include <asm/segment.h> 14#include <asm/segment.h>
15#include <asm/irqflags.h> 15#include <asm/irqflags.h>
16#include <linux/linkage.h> 16#include <linux/linkage.h>
17#include <linux/err.h>
17 18
18/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 19/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
19#include <linux/elf-em.h> 20#include <linux/elf-em.h>
@@ -27,8 +28,6 @@
27 28
28 .section .entry.text, "ax" 29 .section .entry.text, "ax"
29 30
30#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
31
32 .macro IA32_ARG_FIXUP noebp=0 31 .macro IA32_ARG_FIXUP noebp=0
33 movl %edi,%r8d 32 movl %edi,%r8d
34 .if \noebp 33 .if \noebp
@@ -191,7 +190,7 @@ sysexit_from_sys_call:
191 movl %ebx,%edx /* 3rd arg: 1st syscall arg */ 190 movl %ebx,%edx /* 3rd arg: 1st syscall arg */
192 movl %eax,%esi /* 2nd arg: syscall number */ 191 movl %eax,%esi /* 2nd arg: syscall number */
193 movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ 192 movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
194 call audit_syscall_entry 193 call __audit_syscall_entry
195 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ 194 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
196 cmpq $(IA32_NR_syscalls-1),%rax 195 cmpq $(IA32_NR_syscalls-1),%rax
197 ja ia32_badsys 196 ja ia32_badsys
@@ -208,12 +207,13 @@ sysexit_from_sys_call:
208 TRACE_IRQS_ON 207 TRACE_IRQS_ON
209 sti 208 sti
210 movl %eax,%esi /* second arg, syscall return value */ 209 movl %eax,%esi /* second arg, syscall return value */
211 cmpl $0,%eax /* is it < 0? */ 210 cmpl $-MAX_ERRNO,%eax /* is it an error ? */
212 setl %al /* 1 if so, 0 if not */ 211 jbe 1f
212 movslq %eax, %rsi /* if error sign extend to 64 bits */
2131: setbe %al /* 1 if error, 0 if not */
213 movzbl %al,%edi /* zero-extend that into %edi */ 214 movzbl %al,%edi /* zero-extend that into %edi */
214 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 215 call __audit_syscall_exit
215 call audit_syscall_exit 216 movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
216 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
217 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 217 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
218 cli 218 cli
219 TRACE_IRQS_OFF 219 TRACE_IRQS_OFF
@@ -447,9 +447,6 @@ ia32_badsys:
447 movq $-ENOSYS,%rax 447 movq $-ENOSYS,%rax
448 jmp ia32_sysret 448 jmp ia32_sysret
449 449
450quiet_ni_syscall:
451 movq $-ENOSYS,%rax
452 ret
453 CFI_ENDPROC 450 CFI_ENDPROC
454 451
455 .macro PTREGSCALL label, func, arg 452 .macro PTREGSCALL label, func, arg
@@ -494,357 +491,3 @@ ia32_ptregs_common:
494 jmp ia32_sysret /* misbalances the return cache */ 491 jmp ia32_sysret /* misbalances the return cache */
495 CFI_ENDPROC 492 CFI_ENDPROC
496END(ia32_ptregs_common) 493END(ia32_ptregs_common)
497
498 .section .rodata,"a"
499 .align 8
500ia32_sys_call_table:
501 .quad sys_restart_syscall
502 .quad sys_exit
503 .quad stub32_fork
504 .quad sys_read
505 .quad sys_write
506 .quad compat_sys_open /* 5 */
507 .quad sys_close
508 .quad sys32_waitpid
509 .quad sys_creat
510 .quad sys_link
511 .quad sys_unlink /* 10 */
512 .quad stub32_execve
513 .quad sys_chdir
514 .quad compat_sys_time
515 .quad sys_mknod
516 .quad sys_chmod /* 15 */
517 .quad sys_lchown16
518 .quad quiet_ni_syscall /* old break syscall holder */
519 .quad sys_stat
520 .quad sys32_lseek
521 .quad sys_getpid /* 20 */
522 .quad compat_sys_mount /* mount */
523 .quad sys_oldumount /* old_umount */
524 .quad sys_setuid16
525 .quad sys_getuid16
526 .quad compat_sys_stime /* stime */ /* 25 */
527 .quad compat_sys_ptrace /* ptrace */
528 .quad sys_alarm
529 .quad sys_fstat /* (old)fstat */
530 .quad sys_pause
531 .quad compat_sys_utime /* 30 */
532 .quad quiet_ni_syscall /* old stty syscall holder */
533 .quad quiet_ni_syscall /* old gtty syscall holder */
534 .quad sys_access
535 .quad sys_nice
536 .quad quiet_ni_syscall /* 35 */ /* old ftime syscall holder */
537 .quad sys_sync
538 .quad sys32_kill
539 .quad sys_rename
540 .quad sys_mkdir
541 .quad sys_rmdir /* 40 */
542 .quad sys_dup
543 .quad sys_pipe
544 .quad compat_sys_times
545 .quad quiet_ni_syscall /* old prof syscall holder */
546 .quad sys_brk /* 45 */
547 .quad sys_setgid16
548 .quad sys_getgid16
549 .quad sys_signal
550 .quad sys_geteuid16
551 .quad sys_getegid16 /* 50 */
552 .quad sys_acct
553 .quad sys_umount /* new_umount */
554 .quad quiet_ni_syscall /* old lock syscall holder */
555 .quad compat_sys_ioctl
556 .quad compat_sys_fcntl64 /* 55 */
557 .quad quiet_ni_syscall /* old mpx syscall holder */
558 .quad sys_setpgid
559 .quad quiet_ni_syscall /* old ulimit syscall holder */
560 .quad sys_olduname
561 .quad sys_umask /* 60 */
562 .quad sys_chroot
563 .quad compat_sys_ustat
564 .quad sys_dup2
565 .quad sys_getppid
566 .quad sys_getpgrp /* 65 */
567 .quad sys_setsid
568 .quad sys32_sigaction
569 .quad sys_sgetmask
570 .quad sys_ssetmask
571 .quad sys_setreuid16 /* 70 */
572 .quad sys_setregid16
573 .quad sys32_sigsuspend
574 .quad compat_sys_sigpending
575 .quad sys_sethostname
576 .quad compat_sys_setrlimit /* 75 */
577 .quad compat_sys_old_getrlimit /* old_getrlimit */
578 .quad compat_sys_getrusage
579 .quad compat_sys_gettimeofday
580 .quad compat_sys_settimeofday
581 .quad sys_getgroups16 /* 80 */
582 .quad sys_setgroups16
583 .quad compat_sys_old_select
584 .quad sys_symlink
585 .quad sys_lstat
586 .quad sys_readlink /* 85 */
587 .quad sys_uselib
588 .quad sys_swapon
589 .quad sys_reboot
590 .quad compat_sys_old_readdir
591 .quad sys32_mmap /* 90 */
592 .quad sys_munmap
593 .quad sys_truncate
594 .quad sys_ftruncate
595 .quad sys_fchmod
596 .quad sys_fchown16 /* 95 */
597 .quad sys_getpriority
598 .quad sys_setpriority
599 .quad quiet_ni_syscall /* old profil syscall holder */
600 .quad compat_sys_statfs
601 .quad compat_sys_fstatfs /* 100 */
602 .quad sys_ioperm
603 .quad compat_sys_socketcall
604 .quad sys_syslog
605 .quad compat_sys_setitimer
606 .quad compat_sys_getitimer /* 105 */
607 .quad compat_sys_newstat
608 .quad compat_sys_newlstat
609 .quad compat_sys_newfstat
610 .quad sys_uname
611 .quad stub32_iopl /* 110 */
612 .quad sys_vhangup
613 .quad quiet_ni_syscall /* old "idle" system call */
614 .quad sys32_vm86_warning /* vm86old */
615 .quad compat_sys_wait4
616 .quad sys_swapoff /* 115 */
617 .quad compat_sys_sysinfo
618 .quad sys32_ipc
619 .quad sys_fsync
620 .quad stub32_sigreturn
621 .quad stub32_clone /* 120 */
622 .quad sys_setdomainname
623 .quad sys_newuname
624 .quad sys_modify_ldt
625 .quad compat_sys_adjtimex
626 .quad sys32_mprotect /* 125 */
627 .quad compat_sys_sigprocmask
628 .quad quiet_ni_syscall /* create_module */
629 .quad sys_init_module
630 .quad sys_delete_module
631 .quad quiet_ni_syscall /* 130 get_kernel_syms */
632 .quad sys32_quotactl
633 .quad sys_getpgid
634 .quad sys_fchdir
635 .quad quiet_ni_syscall /* bdflush */
636 .quad sys_sysfs /* 135 */
637 .quad sys_personality
638 .quad quiet_ni_syscall /* for afs_syscall */
639 .quad sys_setfsuid16
640 .quad sys_setfsgid16
641 .quad sys_llseek /* 140 */
642 .quad compat_sys_getdents
643 .quad compat_sys_select
644 .quad sys_flock
645 .quad sys_msync
646 .quad compat_sys_readv /* 145 */
647 .quad compat_sys_writev
648 .quad sys_getsid
649 .quad sys_fdatasync
650 .quad compat_sys_sysctl /* sysctl */
651 .quad sys_mlock /* 150 */
652 .quad sys_munlock
653 .quad sys_mlockall
654 .quad sys_munlockall
655 .quad sys_sched_setparam
656 .quad sys_sched_getparam /* 155 */
657 .quad sys_sched_setscheduler
658 .quad sys_sched_getscheduler
659 .quad sys_sched_yield
660 .quad sys_sched_get_priority_max
661 .quad sys_sched_get_priority_min /* 160 */
662 .quad sys32_sched_rr_get_interval
663 .quad compat_sys_nanosleep
664 .quad sys_mremap
665 .quad sys_setresuid16
666 .quad sys_getresuid16 /* 165 */
667 .quad sys32_vm86_warning /* vm86 */
668 .quad quiet_ni_syscall /* query_module */
669 .quad sys_poll
670 .quad quiet_ni_syscall /* old nfsservctl */
671 .quad sys_setresgid16 /* 170 */
672 .quad sys_getresgid16
673 .quad sys_prctl
674 .quad stub32_rt_sigreturn
675 .quad sys32_rt_sigaction
676 .quad sys32_rt_sigprocmask /* 175 */
677 .quad sys32_rt_sigpending
678 .quad compat_sys_rt_sigtimedwait
679 .quad sys32_rt_sigqueueinfo
680 .quad sys_rt_sigsuspend
681 .quad sys32_pread /* 180 */
682 .quad sys32_pwrite
683 .quad sys_chown16
684 .quad sys_getcwd
685 .quad sys_capget
686 .quad sys_capset
687 .quad stub32_sigaltstack
688 .quad sys32_sendfile
689 .quad quiet_ni_syscall /* streams1 */
690 .quad quiet_ni_syscall /* streams2 */
691 .quad stub32_vfork /* 190 */
692 .quad compat_sys_getrlimit
693 .quad sys_mmap_pgoff
694 .quad sys32_truncate64
695 .quad sys32_ftruncate64
696 .quad sys32_stat64 /* 195 */
697 .quad sys32_lstat64
698 .quad sys32_fstat64
699 .quad sys_lchown
700 .quad sys_getuid
701 .quad sys_getgid /* 200 */
702 .quad sys_geteuid
703 .quad sys_getegid
704 .quad sys_setreuid
705 .quad sys_setregid
706 .quad sys_getgroups /* 205 */
707 .quad sys_setgroups
708 .quad sys_fchown
709 .quad sys_setresuid
710 .quad sys_getresuid
711 .quad sys_setresgid /* 210 */
712 .quad sys_getresgid
713 .quad sys_chown
714 .quad sys_setuid
715 .quad sys_setgid
716 .quad sys_setfsuid /* 215 */
717 .quad sys_setfsgid
718 .quad sys_pivot_root
719 .quad sys_mincore
720 .quad sys_madvise
721 .quad compat_sys_getdents64 /* 220 getdents64 */
722 .quad compat_sys_fcntl64
723 .quad quiet_ni_syscall /* tux */
724 .quad quiet_ni_syscall /* security */
725 .quad sys_gettid
726 .quad sys32_readahead /* 225 */
727 .quad sys_setxattr
728 .quad sys_lsetxattr
729 .quad sys_fsetxattr
730 .quad sys_getxattr
731 .quad sys_lgetxattr /* 230 */
732 .quad sys_fgetxattr
733 .quad sys_listxattr
734 .quad sys_llistxattr
735 .quad sys_flistxattr
736 .quad sys_removexattr /* 235 */
737 .quad sys_lremovexattr
738 .quad sys_fremovexattr
739 .quad sys_tkill
740 .quad sys_sendfile64
741 .quad compat_sys_futex /* 240 */
742 .quad compat_sys_sched_setaffinity
743 .quad compat_sys_sched_getaffinity
744 .quad sys_set_thread_area
745 .quad sys_get_thread_area
746 .quad compat_sys_io_setup /* 245 */
747 .quad sys_io_destroy
748 .quad compat_sys_io_getevents
749 .quad compat_sys_io_submit
750 .quad sys_io_cancel
751 .quad sys32_fadvise64 /* 250 */
752 .quad quiet_ni_syscall /* free_huge_pages */
753 .quad sys_exit_group
754 .quad sys32_lookup_dcookie
755 .quad sys_epoll_create
756 .quad sys_epoll_ctl /* 255 */
757 .quad sys_epoll_wait
758 .quad sys_remap_file_pages
759 .quad sys_set_tid_address
760 .quad compat_sys_timer_create
761 .quad compat_sys_timer_settime /* 260 */
762 .quad compat_sys_timer_gettime
763 .quad sys_timer_getoverrun
764 .quad sys_timer_delete
765 .quad compat_sys_clock_settime
766 .quad compat_sys_clock_gettime /* 265 */
767 .quad compat_sys_clock_getres
768 .quad compat_sys_clock_nanosleep
769 .quad compat_sys_statfs64
770 .quad compat_sys_fstatfs64
771 .quad sys_tgkill /* 270 */
772 .quad compat_sys_utimes
773 .quad sys32_fadvise64_64
774 .quad quiet_ni_syscall /* sys_vserver */
775 .quad sys_mbind
776 .quad compat_sys_get_mempolicy /* 275 */
777 .quad sys_set_mempolicy
778 .quad compat_sys_mq_open
779 .quad sys_mq_unlink
780 .quad compat_sys_mq_timedsend
781 .quad compat_sys_mq_timedreceive /* 280 */
782 .quad compat_sys_mq_notify
783 .quad compat_sys_mq_getsetattr
784 .quad compat_sys_kexec_load /* reserved for kexec */
785 .quad compat_sys_waitid
786 .quad quiet_ni_syscall /* 285: sys_altroot */
787 .quad sys_add_key
788 .quad sys_request_key
789 .quad sys_keyctl
790 .quad sys_ioprio_set
791 .quad sys_ioprio_get /* 290 */
792 .quad sys_inotify_init
793 .quad sys_inotify_add_watch
794 .quad sys_inotify_rm_watch
795 .quad sys_migrate_pages
796 .quad compat_sys_openat /* 295 */
797 .quad sys_mkdirat
798 .quad sys_mknodat
799 .quad sys_fchownat
800 .quad compat_sys_futimesat
801 .quad sys32_fstatat /* 300 */
802 .quad sys_unlinkat
803 .quad sys_renameat
804 .quad sys_linkat
805 .quad sys_symlinkat
806 .quad sys_readlinkat /* 305 */
807 .quad sys_fchmodat
808 .quad sys_faccessat
809 .quad compat_sys_pselect6
810 .quad compat_sys_ppoll
811 .quad sys_unshare /* 310 */
812 .quad compat_sys_set_robust_list
813 .quad compat_sys_get_robust_list
814 .quad sys_splice
815 .quad sys32_sync_file_range
816 .quad sys_tee /* 315 */
817 .quad compat_sys_vmsplice
818 .quad compat_sys_move_pages
819 .quad sys_getcpu
820 .quad sys_epoll_pwait
821 .quad compat_sys_utimensat /* 320 */
822 .quad compat_sys_signalfd
823 .quad sys_timerfd_create
824 .quad sys_eventfd
825 .quad sys32_fallocate
826 .quad compat_sys_timerfd_settime /* 325 */
827 .quad compat_sys_timerfd_gettime
828 .quad compat_sys_signalfd4
829 .quad sys_eventfd2
830 .quad sys_epoll_create1
831 .quad sys_dup3 /* 330 */
832 .quad sys_pipe2
833 .quad sys_inotify_init1
834 .quad compat_sys_preadv
835 .quad compat_sys_pwritev
836 .quad compat_sys_rt_tgsigqueueinfo /* 335 */
837 .quad sys_perf_event_open
838 .quad compat_sys_recvmmsg
839 .quad sys_fanotify_init
840 .quad sys32_fanotify_mark
841 .quad sys_prlimit64 /* 340 */
842 .quad sys_name_to_handle_at
843 .quad compat_sys_open_by_handle_at
844 .quad compat_sys_clock_adjtime
845 .quad sys_syncfs
846 .quad compat_sys_sendmmsg /* 345 */
847 .quad sys_setns
848 .quad compat_sys_process_vm_readv
849 .quad compat_sys_process_vm_writev
850ia32_syscall_end:
diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c
new file mode 100644
index 000000000000..51ecd5b4e787
--- /dev/null
+++ b/arch/x86/ia32/nosyscall.c
@@ -0,0 +1,7 @@
1#include <linux/kernel.h>
2#include <linux/errno.h>
3
4long compat_ni_syscall(void)
5{
6 return -ENOSYS;
7}
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index f6f5c53dc903..aec2202a596c 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -287,46 +287,6 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
287 return ret; 287 return ret;
288} 288}
289 289
290asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
291 compat_sigset_t __user *oset,
292 unsigned int sigsetsize)
293{
294 sigset_t s;
295 compat_sigset_t s32;
296 int ret;
297 mm_segment_t old_fs = get_fs();
298
299 if (set) {
300 if (copy_from_user(&s32, set, sizeof(compat_sigset_t)))
301 return -EFAULT;
302 switch (_NSIG_WORDS) {
303 case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
304 case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
305 case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
306 case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
307 }
308 }
309 set_fs(KERNEL_DS);
310 ret = sys_rt_sigprocmask(how,
311 set ? (sigset_t __user *)&s : NULL,
312 oset ? (sigset_t __user *)&s : NULL,
313 sigsetsize);
314 set_fs(old_fs);
315 if (ret)
316 return ret;
317 if (oset) {
318 switch (_NSIG_WORDS) {
319 case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
320 case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
321 case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
322 case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
323 }
324 if (copy_to_user(oset, &s32, sizeof(compat_sigset_t)))
325 return -EFAULT;
326 }
327 return 0;
328}
329
330asmlinkage long sys32_alarm(unsigned int seconds) 290asmlinkage long sys32_alarm(unsigned int seconds)
331{ 291{
332 return alarm_setitimer(seconds); 292 return alarm_setitimer(seconds);
diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c
new file mode 100644
index 000000000000..4754ba0f5d9f
--- /dev/null
+++ b/arch/x86/ia32/syscall_ia32.c
@@ -0,0 +1,25 @@
1/* System call table for ia32 emulation. */
2
3#include <linux/linkage.h>
4#include <linux/sys.h>
5#include <linux/cache.h>
6#include <asm/asm-offsets.h>
7
8#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
9#include <asm/syscalls_32.h>
10#undef __SYSCALL_I386
11
12#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
13
14typedef void (*sys_call_ptr_t)(void);
15
16extern void compat_ni_syscall(void);
17
18const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
19 /*
20 * Smells like a compiler bug -- it doesn't work
21 * when the & below is removed.
22 */
23 [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
24#include <asm/syscalls_32.h>
25};
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 6fa90a845e4c..f9c0d3ba9e84 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -14,12 +14,15 @@ header-y += msr.h
14header-y += mtrr.h 14header-y += mtrr.h
15header-y += posix_types_32.h 15header-y += posix_types_32.h
16header-y += posix_types_64.h 16header-y += posix_types_64.h
17header-y += posix_types_x32.h
17header-y += prctl.h 18header-y += prctl.h
18header-y += processor-flags.h 19header-y += processor-flags.h
19header-y += ptrace-abi.h 20header-y += ptrace-abi.h
20header-y += sigcontext32.h 21header-y += sigcontext32.h
21header-y += ucontext.h 22header-y += ucontext.h
22header-y += unistd_32.h
23header-y += unistd_64.h
24header-y += vm86.h 23header-y += vm86.h
25header-y += vsyscall.h 24header-y += vsyscall.h
25
26genhdr-y += unistd_32.h
27genhdr-y += unistd_64.h
28genhdr-y += unistd_x32.h
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 37ad100a2210..49331bedc158 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -145,6 +145,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
145 */ 145 */
146#define ASM_OUTPUT2(a...) a 146#define ASM_OUTPUT2(a...) a
147 147
148/*
149 * use this macro if you need clobbers but no inputs in
150 * alternative_{input,io,call}()
151 */
152#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
153
148struct paravirt_patch_site; 154struct paravirt_patch_site;
149#ifdef CONFIG_PARAVIRT 155#ifdef CONFIG_PARAVIRT
150void apply_paravirt(struct paravirt_patch_site *start, 156void apply_paravirt(struct paravirt_patch_site *start,
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3ab9bdd87e79..d85410171260 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -11,7 +11,6 @@
11#include <linux/atomic.h> 11#include <linux/atomic.h>
12#include <asm/fixmap.h> 12#include <asm/fixmap.h>
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/system.h>
15#include <asm/msr.h> 14#include <asm/msr.h>
16 15
17#define ARCH_APICTIMER_STOPS_ON_C3 1 16#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -288,6 +287,7 @@ struct apic {
288 287
289 int (*probe)(void); 288 int (*probe)(void);
290 int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); 289 int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
290 int (*apic_id_valid)(int apicid);
291 int (*apic_id_registered)(void); 291 int (*apic_id_registered)(void);
292 292
293 u32 irq_delivery_mode; 293 u32 irq_delivery_mode;
@@ -532,6 +532,11 @@ static inline unsigned int read_apic_id(void)
532 return apic->get_apic_id(reg); 532 return apic->get_apic_id(reg);
533} 533}
534 534
535static inline int default_apic_id_valid(int apicid)
536{
537 return (apicid < 255);
538}
539
535extern void default_setup_apic_routing(void); 540extern void default_setup_apic_routing(void);
536 541
537extern struct apic apic_noop; 542extern struct apic apic_noop;
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 24098aafce0d..198119910da5 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -14,13 +14,52 @@ typedef struct {
14 14
15#define ATOMIC64_INIT(val) { (val) } 15#define ATOMIC64_INIT(val) { (val) }
16 16
17#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
18#ifndef ATOMIC64_EXPORT
19#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
20#else
21#define ATOMIC64_DECL_ONE(sym) __ATOMIC64_DECL(sym); \
22 ATOMIC64_EXPORT(atomic64_##sym)
23#endif
24
17#ifdef CONFIG_X86_CMPXCHG64 25#ifdef CONFIG_X86_CMPXCHG64
18#define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8" 26#define __alternative_atomic64(f, g, out, in...) \
27 asm volatile("call %P[func]" \
28 : out : [func] "i" (atomic64_##g##_cx8), ## in)
29
30#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
19#else 31#else
20#define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8) 32#define __alternative_atomic64(f, g, out, in...) \
33 alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
34 X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in)
35
36#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \
37 ATOMIC64_DECL_ONE(sym##_386)
38
39ATOMIC64_DECL_ONE(add_386);
40ATOMIC64_DECL_ONE(sub_386);
41ATOMIC64_DECL_ONE(inc_386);
42ATOMIC64_DECL_ONE(dec_386);
21#endif 43#endif
22 44
23#define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f) 45#define alternative_atomic64(f, out, in...) \
46 __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
47
48ATOMIC64_DECL(read);
49ATOMIC64_DECL(set);
50ATOMIC64_DECL(xchg);
51ATOMIC64_DECL(add_return);
52ATOMIC64_DECL(sub_return);
53ATOMIC64_DECL(inc_return);
54ATOMIC64_DECL(dec_return);
55ATOMIC64_DECL(dec_if_positive);
56ATOMIC64_DECL(inc_not_zero);
57ATOMIC64_DECL(add_unless);
58
59#undef ATOMIC64_DECL
60#undef ATOMIC64_DECL_ONE
61#undef __ATOMIC64_DECL
62#undef ATOMIC64_EXPORT
24 63
25/** 64/**
26 * atomic64_cmpxchg - cmpxchg atomic64 variable 65 * atomic64_cmpxchg - cmpxchg atomic64 variable
@@ -50,11 +89,9 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n)
50 long long o; 89 long long o;
51 unsigned high = (unsigned)(n >> 32); 90 unsigned high = (unsigned)(n >> 32);
52 unsigned low = (unsigned)n; 91 unsigned low = (unsigned)n;
53 asm volatile(ATOMIC64_ALTERNATIVE(xchg) 92 alternative_atomic64(xchg, "=&A" (o),
54 : "=A" (o), "+b" (low), "+c" (high) 93 "S" (v), "b" (low), "c" (high)
55 : "S" (v) 94 : "memory");
56 : "memory"
57 );
58 return o; 95 return o;
59} 96}
60 97
@@ -69,11 +106,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
69{ 106{
70 unsigned high = (unsigned)(i >> 32); 107 unsigned high = (unsigned)(i >> 32);
71 unsigned low = (unsigned)i; 108 unsigned low = (unsigned)i;
72 asm volatile(ATOMIC64_ALTERNATIVE(set) 109 alternative_atomic64(set, /* no output */,
73 : "+b" (low), "+c" (high) 110 "S" (v), "b" (low), "c" (high)
74 : "S" (v) 111 : "eax", "edx", "memory");
75 : "eax", "edx", "memory"
76 );
77} 112}
78 113
79/** 114/**
@@ -82,13 +117,10 @@ static inline void atomic64_set(atomic64_t *v, long long i)
82 * 117 *
83 * Atomically reads the value of @v and returns it. 118 * Atomically reads the value of @v and returns it.
84 */ 119 */
85static inline long long atomic64_read(atomic64_t *v) 120static inline long long atomic64_read(const atomic64_t *v)
86{ 121{
87 long long r; 122 long long r;
88 asm volatile(ATOMIC64_ALTERNATIVE(read) 123 alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
89 : "=A" (r), "+c" (v)
90 : : "memory"
91 );
92 return r; 124 return r;
93 } 125 }
94 126
@@ -101,10 +133,9 @@ static inline long long atomic64_read(atomic64_t *v)
101 */ 133 */
102static inline long long atomic64_add_return(long long i, atomic64_t *v) 134static inline long long atomic64_add_return(long long i, atomic64_t *v)
103{ 135{
104 asm volatile(ATOMIC64_ALTERNATIVE(add_return) 136 alternative_atomic64(add_return,
105 : "+A" (i), "+c" (v) 137 ASM_OUTPUT2("+A" (i), "+c" (v)),
106 : : "memory" 138 ASM_NO_INPUT_CLOBBER("memory"));
107 );
108 return i; 139 return i;
109} 140}
110 141
@@ -113,32 +144,25 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
113 */ 144 */
114static inline long long atomic64_sub_return(long long i, atomic64_t *v) 145static inline long long atomic64_sub_return(long long i, atomic64_t *v)
115{ 146{
116 asm volatile(ATOMIC64_ALTERNATIVE(sub_return) 147 alternative_atomic64(sub_return,
117 : "+A" (i), "+c" (v) 148 ASM_OUTPUT2("+A" (i), "+c" (v)),
118 : : "memory" 149 ASM_NO_INPUT_CLOBBER("memory"));
119 );
120 return i; 150 return i;
121} 151}
122 152
123static inline long long atomic64_inc_return(atomic64_t *v) 153static inline long long atomic64_inc_return(atomic64_t *v)
124{ 154{
125 long long a; 155 long long a;
126 asm volatile(ATOMIC64_ALTERNATIVE(inc_return) 156 alternative_atomic64(inc_return, "=&A" (a),
127 : "=A" (a) 157 "S" (v) : "memory", "ecx");
128 : "S" (v)
129 : "memory", "ecx"
130 );
131 return a; 158 return a;
132} 159}
133 160
134static inline long long atomic64_dec_return(atomic64_t *v) 161static inline long long atomic64_dec_return(atomic64_t *v)
135{ 162{
136 long long a; 163 long long a;
137 asm volatile(ATOMIC64_ALTERNATIVE(dec_return) 164 alternative_atomic64(dec_return, "=&A" (a),
138 : "=A" (a) 165 "S" (v) : "memory", "ecx");
139 : "S" (v)
140 : "memory", "ecx"
141 );
142 return a; 166 return a;
143} 167}
144 168
@@ -151,10 +175,9 @@ static inline long long atomic64_dec_return(atomic64_t *v)
151 */ 175 */
152static inline long long atomic64_add(long long i, atomic64_t *v) 176static inline long long atomic64_add(long long i, atomic64_t *v)
153{ 177{
154 asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return) 178 __alternative_atomic64(add, add_return,
155 : "+A" (i), "+c" (v) 179 ASM_OUTPUT2("+A" (i), "+c" (v)),
156 : : "memory" 180 ASM_NO_INPUT_CLOBBER("memory"));
157 );
158 return i; 181 return i;
159} 182}
160 183
@@ -167,10 +190,9 @@ static inline long long atomic64_add(long long i, atomic64_t *v)
167 */ 190 */
168static inline long long atomic64_sub(long long i, atomic64_t *v) 191static inline long long atomic64_sub(long long i, atomic64_t *v)
169{ 192{
170 asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return) 193 __alternative_atomic64(sub, sub_return,
171 : "+A" (i), "+c" (v) 194 ASM_OUTPUT2("+A" (i), "+c" (v)),
172 : : "memory" 195 ASM_NO_INPUT_CLOBBER("memory"));
173 );
174 return i; 196 return i;
175} 197}
176 198
@@ -196,10 +218,8 @@ static inline int atomic64_sub_and_test(long long i, atomic64_t *v)
196 */ 218 */
197static inline void atomic64_inc(atomic64_t *v) 219static inline void atomic64_inc(atomic64_t *v)
198{ 220{
199 asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return) 221 __alternative_atomic64(inc, inc_return, /* no output */,
200 : : "S" (v) 222 "S" (v) : "memory", "eax", "ecx", "edx");
201 : "memory", "eax", "ecx", "edx"
202 );
203} 223}
204 224
205/** 225/**
@@ -210,10 +230,8 @@ static inline void atomic64_inc(atomic64_t *v)
210 */ 230 */
211static inline void atomic64_dec(atomic64_t *v) 231static inline void atomic64_dec(atomic64_t *v)
212{ 232{
213 asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return) 233 __alternative_atomic64(dec, dec_return, /* no output */,
214 : : "S" (v) 234 "S" (v) : "memory", "eax", "ecx", "edx");
215 : "memory", "eax", "ecx", "edx"
216 );
217} 235}
218 236
219/** 237/**
@@ -263,15 +281,15 @@ static inline int atomic64_add_negative(long long i, atomic64_t *v)
263 * @u: ...unless v is equal to u. 281 * @u: ...unless v is equal to u.
264 * 282 *
265 * Atomically adds @a to @v, so long as it was not @u. 283 * Atomically adds @a to @v, so long as it was not @u.
266 * Returns the old value of @v. 284 * Returns non-zero if the add was done, zero otherwise.
267 */ 285 */
268static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) 286static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
269{ 287{
270 unsigned low = (unsigned)u; 288 unsigned low = (unsigned)u;
271 unsigned high = (unsigned)(u >> 32); 289 unsigned high = (unsigned)(u >> 32);
272 asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t" 290 alternative_atomic64(add_unless,
273 : "+A" (a), "+c" (v), "+S" (low), "+D" (high) 291 ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
274 : : "memory"); 292 "S" (v) : "memory");
275 return (int)a; 293 return (int)a;
276} 294}
277 295
@@ -279,26 +297,20 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
279static inline int atomic64_inc_not_zero(atomic64_t *v) 297static inline int atomic64_inc_not_zero(atomic64_t *v)
280{ 298{
281 int r; 299 int r;
282 asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero) 300 alternative_atomic64(inc_not_zero, "=&a" (r),
283 : "=a" (r) 301 "S" (v) : "ecx", "edx", "memory");
284 : "S" (v)
285 : "ecx", "edx", "memory"
286 );
287 return r; 302 return r;
288} 303}
289 304
290static inline long long atomic64_dec_if_positive(atomic64_t *v) 305static inline long long atomic64_dec_if_positive(atomic64_t *v)
291{ 306{
292 long long r; 307 long long r;
293 asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive) 308 alternative_atomic64(dec_if_positive, "=&A" (r),
294 : "=A" (r) 309 "S" (v) : "ecx", "memory");
295 : "S" (v)
296 : "ecx", "memory"
297 );
298 return r; 310 return r;
299} 311}
300 312
301#undef ATOMIC64_ALTERNATIVE 313#undef alternative_atomic64
302#undef ATOMIC64_ALTERNATIVE_ 314#undef __alternative_atomic64
303 315
304#endif /* _ASM_X86_ATOMIC64_32_H */ 316#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/auxvec.h b/arch/x86/include/asm/auxvec.h
index 1316b4c35425..77203ac352de 100644
--- a/arch/x86/include/asm/auxvec.h
+++ b/arch/x86/include/asm/auxvec.h
@@ -9,4 +9,11 @@
9#endif 9#endif
10#define AT_SYSINFO_EHDR 33 10#define AT_SYSINFO_EHDR 33
11 11
12/* entries in ARCH_DLINFO: */
13#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
14# define AT_VECTOR_SIZE_ARCH 2
15#else /* else it's non-compat x86-64 */
16# define AT_VECTOR_SIZE_ARCH 1
17#endif
18
12#endif /* _ASM_X86_AUXVEC_H */ 19#endif /* _ASM_X86_AUXVEC_H */
diff --git a/arch/x86/um/asm/system.h b/arch/x86/include/asm/barrier.h
index a459fd9b7598..c6cd358a1eec 100644
--- a/arch/x86/um/asm/system.h
+++ b/arch/x86/include/asm/barrier.h
@@ -1,31 +1,15 @@
1#ifndef _ASM_X86_SYSTEM_H_ 1#ifndef _ASM_X86_BARRIER_H
2#define _ASM_X86_SYSTEM_H_ 2#define _ASM_X86_BARRIER_H
3 3
4#include <asm/asm.h> 4#include <asm/alternative.h>
5#include <asm/segment.h>
6#include <asm/cpufeature.h>
7#include <asm/cmpxchg.h>
8#include <asm/nops.h> 5#include <asm/nops.h>
9 6
10#include <linux/kernel.h>
11#include <linux/irqflags.h>
12
13/* entries in ARCH_DLINFO: */
14#ifdef CONFIG_IA32_EMULATION
15# define AT_VECTOR_SIZE_ARCH 2
16#else
17# define AT_VECTOR_SIZE_ARCH 1
18#endif
19
20extern unsigned long arch_align_stack(unsigned long sp);
21
22void default_idle(void);
23
24/* 7/*
25 * Force strict CPU ordering. 8 * Force strict CPU ordering.
26 * And yes, this is required on UP too when we're talking 9 * And yes, this is required on UP too when we're talking
27 * to devices. 10 * to devices.
28 */ 11 */
12
29#ifdef CONFIG_X86_32 13#ifdef CONFIG_X86_32
30/* 14/*
31 * Some non-Intel clones support out of order store. wmb() ceases to be a 15 * Some non-Intel clones support out of order store. wmb() ceases to be a
@@ -123,13 +107,10 @@ void default_idle(void);
123 * 107 *
124 * (Could use an alternative three way for this if there was one.) 108 * (Could use an alternative three way for this if there was one.)
125 */ 109 */
126static inline void rdtsc_barrier(void) 110static __always_inline void rdtsc_barrier(void)
127{ 111{
128 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); 112 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
129 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); 113 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
130} 114}
131 115
132extern void *_switch_to(void *prev, void *next, void *last); 116#endif /* _ASM_X86_BARRIER_H */
133#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
134
135#endif
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index e020d88ec02d..2f90c51cc49d 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -64,6 +64,8 @@ struct setup_header {
64 __u32 payload_offset; 64 __u32 payload_offset;
65 __u32 payload_length; 65 __u32 payload_length;
66 __u64 setup_data; 66 __u64 setup_data;
67 __u64 pref_address;
68 __u32 init_size;
67} __attribute__((packed)); 69} __attribute__((packed));
68 70
69struct sys_desc_table { 71struct sys_desc_table {
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index f654d1bb17fb..11e1152222d0 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -36,4 +36,8 @@ do { \
36#endif /* !CONFIG_BUG */ 36#endif /* !CONFIG_BUG */
37 37
38#include <asm-generic/bug.h> 38#include <asm-generic/bug.h>
39
40
41extern void show_regs_common(void);
42
39#endif /* _ASM_X86_BUG_H */ 43#endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 4e12668711e5..9863ee3747da 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -3,6 +3,7 @@
3 3
4/* Caches aren't brain-dead on the intel. */ 4/* Caches aren't brain-dead on the intel. */
5#include <asm-generic/cacheflush.h> 5#include <asm-generic/cacheflush.h>
6#include <asm/special_insns.h>
6 7
7#ifdef CONFIG_X86_PAT 8#ifdef CONFIG_X86_PAT
8/* 9/*
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 0c9fa2745f13..99480e55973d 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -43,7 +43,7 @@ extern void __add_wrong_size(void)
43 switch (sizeof(*(ptr))) { \ 43 switch (sizeof(*(ptr))) { \
44 case __X86_CASE_B: \ 44 case __X86_CASE_B: \
45 asm volatile (lock #op "b %b0, %1\n" \ 45 asm volatile (lock #op "b %b0, %1\n" \
46 : "+r" (__ret), "+m" (*(ptr)) \ 46 : "+q" (__ret), "+m" (*(ptr)) \
47 : : "memory", "cc"); \ 47 : : "memory", "cc"); \
48 break; \ 48 break; \
49 case __X86_CASE_W: \ 49 case __X86_CASE_W: \
@@ -145,13 +145,13 @@ extern void __add_wrong_size(void)
145 145
146#ifdef __HAVE_ARCH_CMPXCHG 146#ifdef __HAVE_ARCH_CMPXCHG
147#define cmpxchg(ptr, old, new) \ 147#define cmpxchg(ptr, old, new) \
148 __cmpxchg((ptr), (old), (new), sizeof(*ptr)) 148 __cmpxchg(ptr, old, new, sizeof(*(ptr)))
149 149
150#define sync_cmpxchg(ptr, old, new) \ 150#define sync_cmpxchg(ptr, old, new) \
151 __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) 151 __sync_cmpxchg(ptr, old, new, sizeof(*(ptr)))
152 152
153#define cmpxchg_local(ptr, old, new) \ 153#define cmpxchg_local(ptr, old, new) \
154 __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) 154 __cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
155#endif 155#endif
156 156
157/* 157/*
@@ -173,7 +173,7 @@ extern void __add_wrong_size(void)
173 switch (sizeof(*(ptr))) { \ 173 switch (sizeof(*(ptr))) { \
174 case __X86_CASE_B: \ 174 case __X86_CASE_B: \
175 asm volatile (lock "addb %b1, %0\n" \ 175 asm volatile (lock "addb %b1, %0\n" \
176 : "+m" (*(ptr)) : "ri" (inc) \ 176 : "+m" (*(ptr)) : "qi" (inc) \
177 : "memory", "cc"); \ 177 : "memory", "cc"); \
178 break; \ 178 break; \
179 case __X86_CASE_W: \ 179 case __X86_CASE_W: \
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 30d737ef2a42..d6805798d6fc 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -6,7 +6,9 @@
6 */ 6 */
7#include <linux/types.h> 7#include <linux/types.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <asm/processor.h>
9#include <asm/user32.h> 10#include <asm/user32.h>
11#include <asm/unistd.h>
10 12
11#define COMPAT_USER_HZ 100 13#define COMPAT_USER_HZ 100
12#define COMPAT_UTS_MACHINE "i686\0\0" 14#define COMPAT_UTS_MACHINE "i686\0\0"
@@ -186,7 +188,20 @@ struct compat_shmid64_ds {
186/* 188/*
187 * The type of struct elf_prstatus.pr_reg in compatible core dumps. 189 * The type of struct elf_prstatus.pr_reg in compatible core dumps.
188 */ 190 */
191#ifdef CONFIG_X86_X32_ABI
192typedef struct user_regs_struct compat_elf_gregset_t;
193
194#define PR_REG_SIZE(S) (test_thread_flag(TIF_IA32) ? 68 : 216)
195#define PRSTATUS_SIZE(S) (test_thread_flag(TIF_IA32) ? 144 : 296)
196#define SET_PR_FPVALID(S,V) \
197 do { *(int *) (((void *) &((S)->pr_reg)) + PR_REG_SIZE(0)) = (V); } \
198 while (0)
199
200#define COMPAT_USE_64BIT_TIME \
201 (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
202#else
189typedef struct user_regs_struct32 compat_elf_gregset_t; 203typedef struct user_regs_struct32 compat_elf_gregset_t;
204#endif
190 205
191/* 206/*
192 * A pointer passed in from user mode. This should not 207 * A pointer passed in from user mode. This should not
@@ -208,13 +223,30 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
208 223
209static inline void __user *arch_compat_alloc_user_space(long len) 224static inline void __user *arch_compat_alloc_user_space(long len)
210{ 225{
211 struct pt_regs *regs = task_pt_regs(current); 226 compat_uptr_t sp;
212 return (void __user *)regs->sp - len; 227
228 if (test_thread_flag(TIF_IA32)) {
229 sp = task_pt_regs(current)->sp;
230 } else {
231 /* -128 for the x32 ABI redzone */
232 sp = percpu_read(old_rsp) - 128;
233 }
234
235 return (void __user *)round_down(sp - len, 16);
236}
237
238static inline bool is_x32_task(void)
239{
240#ifdef CONFIG_X86_X32_ABI
241 if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
242 return true;
243#endif
244 return false;
213} 245}
214 246
215static inline int is_compat_task(void) 247static inline bool is_compat_task(void)
216{ 248{
217 return current_thread_info()->status & TS_COMPAT; 249 return is_ia32_task() || is_x32_task();
218} 250}
219 251
220#endif /* _ASM_X86_COMPAT_H */ 252#endif /* _ASM_X86_COMPAT_H */
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
new file mode 100644
index 000000000000..ff501e511d91
--- /dev/null
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -0,0 +1,13 @@
1#ifndef _CPU_DEVICE_ID
2#define _CPU_DEVICE_ID 1
3
4/*
5 * Declare drivers belonging to specific x86 CPUs
6 * Similar in spirit to pci_device_id and related PCI functions
7 */
8
9#include <linux/mod_devicetable.h>
10
11extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
12
13#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 17c5d4bdee5e..340ee49961a6 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -159,6 +159,7 @@
159#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ 159#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
160#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */ 160#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
161#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */ 161#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
162#define X86_FEATURE_TCE (6*32+17) /* translation cache extension */
162#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ 163#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
163#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ 164#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
164#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ 165#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
@@ -176,6 +177,7 @@
176#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ 177#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
177#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ 178#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
178#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ 179#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
180#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
179 181
180/* Virtualization flags: Linux defined, word 8 */ 182/* Virtualization flags: Linux defined, word 8 */
181#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ 183#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
@@ -198,10 +200,13 @@
198/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 200/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
199#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 201#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
200#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ 202#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */
203#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */
201#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ 204#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
202#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ 205#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */
203#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */ 206#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */
204#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ 207#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
208#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */
209#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
205 210
206#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 211#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
207 212
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 078ad0caefc6..2d91580bf228 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -78,8 +78,75 @@
78 */ 78 */
79#ifdef __KERNEL__ 79#ifdef __KERNEL__
80 80
81#include <linux/bug.h>
82
81DECLARE_PER_CPU(unsigned long, cpu_dr7); 83DECLARE_PER_CPU(unsigned long, cpu_dr7);
82 84
85#ifndef CONFIG_PARAVIRT
86/*
87 * These special macros can be used to get or set a debugging register
88 */
89#define get_debugreg(var, register) \
90 (var) = native_get_debugreg(register)
91#define set_debugreg(value, register) \
92 native_set_debugreg(register, value)
93#endif
94
95static inline unsigned long native_get_debugreg(int regno)
96{
97 unsigned long val = 0; /* Damn you, gcc! */
98
99 switch (regno) {
100 case 0:
101 asm("mov %%db0, %0" :"=r" (val));
102 break;
103 case 1:
104 asm("mov %%db1, %0" :"=r" (val));
105 break;
106 case 2:
107 asm("mov %%db2, %0" :"=r" (val));
108 break;
109 case 3:
110 asm("mov %%db3, %0" :"=r" (val));
111 break;
112 case 6:
113 asm("mov %%db6, %0" :"=r" (val));
114 break;
115 case 7:
116 asm("mov %%db7, %0" :"=r" (val));
117 break;
118 default:
119 BUG();
120 }
121 return val;
122}
123
124static inline void native_set_debugreg(int regno, unsigned long value)
125{
126 switch (regno) {
127 case 0:
128 asm("mov %0, %%db0" ::"r" (value));
129 break;
130 case 1:
131 asm("mov %0, %%db1" ::"r" (value));
132 break;
133 case 2:
134 asm("mov %0, %%db2" ::"r" (value));
135 break;
136 case 3:
137 asm("mov %0, %%db3" ::"r" (value));
138 break;
139 case 6:
140 asm("mov %0, %%db6" ::"r" (value));
141 break;
142 case 7:
143 asm("mov %0, %%db7" ::"r" (value));
144 break;
145 default:
146 BUG();
147 }
148}
149
83static inline void hw_breakpoint_disable(void) 150static inline void hw_breakpoint_disable(void)
84{ 151{
85 /* Zero the control register for HW Breakpoint */ 152 /* Zero the control register for HW Breakpoint */
@@ -101,6 +168,28 @@ extern void aout_dump_debugregs(struct user *dump);
101 168
102extern void hw_breakpoint_restore(void); 169extern void hw_breakpoint_restore(void);
103 170
171#ifdef CONFIG_X86_64
172DECLARE_PER_CPU(int, debug_stack_usage);
173static inline void debug_stack_usage_inc(void)
174{
175 __get_cpu_var(debug_stack_usage)++;
176}
177static inline void debug_stack_usage_dec(void)
178{
179 __get_cpu_var(debug_stack_usage)--;
180}
181int is_debug_stack(unsigned long addr);
182void debug_stack_set_zero(void);
183void debug_stack_reset(void);
184#else /* !X86_64 */
185static inline int is_debug_stack(unsigned long addr) { return 0; }
186static inline void debug_stack_set_zero(void) { }
187static inline void debug_stack_reset(void) { }
188static inline void debug_stack_usage_inc(void) { }
189static inline void debug_stack_usage_dec(void) { }
190#endif /* X86_64 */
191
192
104#endif /* __KERNEL__ */ 193#endif /* __KERNEL__ */
105 194
106#endif /* _ASM_X86_DEBUGREG_H */ 195#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 41935fadfdfc..e95822d683f4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
35 35
36extern struct desc_ptr idt_descr; 36extern struct desc_ptr idt_descr;
37extern gate_desc idt_table[]; 37extern gate_desc idt_table[];
38extern struct desc_ptr nmi_idt_descr;
39extern gate_desc nmi_idt_table[];
38 40
39struct gdt_page { 41struct gdt_page {
40 struct desc_struct gdt[GDT_ENTRIES]; 42 struct desc_struct gdt[GDT_ENTRIES];
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
307 desc->limit = (limit >> 16) & 0xf; 309 desc->limit = (limit >> 16) & 0xf;
308} 310}
309 311
312#ifdef CONFIG_X86_64
313static inline void set_nmi_gate(int gate, void *addr)
314{
315 gate_desc s;
316
317 pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
318 write_idt_entry(nmi_idt_table, gate, &s);
319}
320#endif
321
310static inline void _set_gate(int gate, unsigned type, void *addr, 322static inline void _set_gate(int gate, unsigned type, void *addr,
311 unsigned dpl, unsigned ist, unsigned seg) 323 unsigned dpl, unsigned ist, unsigned seg)
312{ 324{
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index ed3065fd6314..4b4331d71935 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -59,7 +59,8 @@ extern int dma_supported(struct device *hwdev, u64 mask);
59extern int dma_set_mask(struct device *dev, u64 mask); 59extern int dma_set_mask(struct device *dev, u64 mask);
60 60
61extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, 61extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
62 dma_addr_t *dma_addr, gfp_t flag); 62 dma_addr_t *dma_addr, gfp_t flag,
63 struct dma_attrs *attrs);
63 64
64static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) 65static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
65{ 66{
@@ -111,9 +112,11 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
111 return gfp; 112 return gfp;
112} 113}
113 114
115#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
116
114static inline void * 117static inline void *
115dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 118dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
116 gfp_t gfp) 119 gfp_t gfp, struct dma_attrs *attrs)
117{ 120{
118 struct dma_map_ops *ops = get_dma_ops(dev); 121 struct dma_map_ops *ops = get_dma_ops(dev);
119 void *memory; 122 void *memory;
@@ -129,18 +132,21 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
129 if (!is_device_dma_capable(dev)) 132 if (!is_device_dma_capable(dev))
130 return NULL; 133 return NULL;
131 134
132 if (!ops->alloc_coherent) 135 if (!ops->alloc)
133 return NULL; 136 return NULL;
134 137
135 memory = ops->alloc_coherent(dev, size, dma_handle, 138 memory = ops->alloc(dev, size, dma_handle,
136 dma_alloc_coherent_gfp_flags(dev, gfp)); 139 dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
137 debug_dma_alloc_coherent(dev, size, *dma_handle, memory); 140 debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
138 141
139 return memory; 142 return memory;
140} 143}
141 144
142static inline void dma_free_coherent(struct device *dev, size_t size, 145#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
143 void *vaddr, dma_addr_t bus) 146
147static inline void dma_free_attrs(struct device *dev, size_t size,
148 void *vaddr, dma_addr_t bus,
149 struct dma_attrs *attrs)
144{ 150{
145 struct dma_map_ops *ops = get_dma_ops(dev); 151 struct dma_map_ops *ops = get_dma_ops(dev);
146 152
@@ -150,8 +156,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
150 return; 156 return;
151 157
152 debug_dma_free_coherent(dev, size, vaddr, bus); 158 debug_dma_free_coherent(dev, size, vaddr, bus);
153 if (ops->free_coherent) 159 if (ops->free)
154 ops->free_coherent(dev, size, vaddr, bus); 160 ops->free(dev, size, vaddr, bus, attrs);
155} 161}
156 162
157#endif 163#endif
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 7093e4a6a0bc..c9dcc181d4d1 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -3,6 +3,8 @@
3 3
4#ifdef CONFIG_X86_32 4#ifdef CONFIG_X86_32
5 5
6#define EFI_LOADER_SIGNATURE "EL32"
7
6extern unsigned long asmlinkage efi_call_phys(void *, ...); 8extern unsigned long asmlinkage efi_call_phys(void *, ...);
7 9
8#define efi_call_phys0(f) efi_call_phys(f) 10#define efi_call_phys0(f) efi_call_phys(f)
@@ -37,6 +39,8 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
37 39
38#else /* !CONFIG_X86_32 */ 40#else /* !CONFIG_X86_32 */
39 41
42#define EFI_LOADER_SIGNATURE "EL64"
43
40extern u64 efi_call0(void *fp); 44extern u64 efi_call0(void *fp);
41extern u64 efi_call1(void *fp, u64 arg1); 45extern u64 efi_call1(void *fp, u64 arg1);
42extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); 46extern u64 efi_call2(void *fp, u64 arg1, u64 arg2);
@@ -91,7 +95,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
91 95
92extern int add_efi_memmap; 96extern int add_efi_memmap;
93extern void efi_set_executable(efi_memory_desc_t *md, bool executable); 97extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
94extern void efi_memblock_x86_reserve_range(void); 98extern int efi_memblock_x86_reserve_range(void);
95extern void efi_call_phys_prelog(void); 99extern void efi_call_phys_prelog(void);
96extern void efi_call_phys_epilog(void); 100extern void efi_call_phys_epilog(void);
97 101
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 5f962df30d0f..5939f44fe0c0 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -84,7 +84,6 @@ extern unsigned int vdso_enabled;
84 (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) 84 (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
85 85
86#include <asm/processor.h> 86#include <asm/processor.h>
87#include <asm/system.h>
88 87
89#ifdef CONFIG_X86_32 88#ifdef CONFIG_X86_32
90#include <asm/desc.h> 89#include <asm/desc.h>
@@ -156,7 +155,12 @@ do { \
156#define elf_check_arch(x) \ 155#define elf_check_arch(x) \
157 ((x)->e_machine == EM_X86_64) 156 ((x)->e_machine == EM_X86_64)
158 157
159#define compat_elf_check_arch(x) elf_check_arch_ia32(x) 158#define compat_elf_check_arch(x) \
159 (elf_check_arch_ia32(x) || (x)->e_machine == EM_X86_64)
160
161#if __USER32_DS != __USER_DS
162# error "The following code assumes __USER32_DS == __USER_DS"
163#endif
160 164
161static inline void elf_common_init(struct thread_struct *t, 165static inline void elf_common_init(struct thread_struct *t,
162 struct pt_regs *regs, const u16 ds) 166 struct pt_regs *regs, const u16 ds)
@@ -179,8 +183,9 @@ static inline void elf_common_init(struct thread_struct *t,
179void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); 183void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp);
180#define compat_start_thread start_thread_ia32 184#define compat_start_thread start_thread_ia32
181 185
182void set_personality_ia32(void); 186void set_personality_ia32(bool);
183#define COMPAT_SET_PERSONALITY(ex) set_personality_ia32() 187#define COMPAT_SET_PERSONALITY(ex) \
188 set_personality_ia32((ex).e_machine == EM_X86_64)
184 189
185#define COMPAT_ELF_PLATFORM ("i686") 190#define COMPAT_ELF_PLATFORM ("i686")
186 191
@@ -287,7 +292,7 @@ do { \
287#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ 292#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */
288 293
289/* 1GB for 64bit, 8MB for 32bit */ 294/* 1GB for 64bit, 8MB for 32bit */
290#define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) 295#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
291 296
292#define ARCH_DLINFO \ 297#define ARCH_DLINFO \
293do { \ 298do { \
@@ -296,9 +301,20 @@ do { \
296 (unsigned long)current->mm->context.vdso); \ 301 (unsigned long)current->mm->context.vdso); \
297} while (0) 302} while (0)
298 303
304#define ARCH_DLINFO_X32 \
305do { \
306 if (vdso_enabled) \
307 NEW_AUX_ENT(AT_SYSINFO_EHDR, \
308 (unsigned long)current->mm->context.vdso); \
309} while (0)
310
299#define AT_SYSINFO 32 311#define AT_SYSINFO 32
300 312
301#define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32) 313#define COMPAT_ARCH_DLINFO \
314if (test_thread_flag(TIF_X32)) \
315 ARCH_DLINFO_X32; \
316else \
317 ARCH_DLINFO_IA32(sysctl_vsyscall32)
302 318
303#define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) 319#define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
304 320
@@ -314,6 +330,8 @@ struct linux_binprm;
314#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 330#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
315extern int arch_setup_additional_pages(struct linux_binprm *bprm, 331extern int arch_setup_additional_pages(struct linux_binprm *bprm,
316 int uses_interp); 332 int uses_interp);
333extern int x32_setup_additional_pages(struct linux_binprm *bprm,
334 int uses_interp);
317 335
318extern int syscall32_setup_pages(struct linux_binprm *, int exstack); 336extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
319#define compat_arch_setup_additional_pages syscall32_setup_pages 337#define compat_arch_setup_additional_pages syscall32_setup_pages
@@ -330,7 +348,7 @@ static inline int mmap_is_ia32(void)
330 return 1; 348 return 1;
331#endif 349#endif
332#ifdef CONFIG_IA32_EMULATION 350#ifdef CONFIG_IA32_EMULATION
333 if (test_thread_flag(TIF_IA32)) 351 if (test_thread_flag(TIF_ADDR32))
334 return 1; 352 return 1;
335#endif 353#endif
336 return 0; 354 return 0;
diff --git a/arch/x86/include/asm/exec.h b/arch/x86/include/asm/exec.h
new file mode 100644
index 000000000000..54c2e1db274a
--- /dev/null
+++ b/arch/x86/include/asm/exec.h
@@ -0,0 +1 @@
/* define arch_align_stack() here */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 460c74e4852c..4da3c0c4c974 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -117,7 +117,7 @@ enum fixed_addresses {
117#endif 117#endif
118 FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ 118 FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
119 FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ 119 FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
120#ifdef CONFIG_X86_MRST 120#ifdef CONFIG_X86_INTEL_MID
121 FIX_LNW_VRTC, 121 FIX_LNW_VRTC,
122#endif 122#endif
123 __end_of_permanent_fixed_addresses, 123 __end_of_permanent_fixed_addresses,
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
new file mode 100644
index 000000000000..4fa88154e4de
--- /dev/null
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -0,0 +1,520 @@
1/*
2 * Copyright (C) 1994 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * x86-64 work by Andi Kleen 2002
8 */
9
10#ifndef _FPU_INTERNAL_H
11#define _FPU_INTERNAL_H
12
13#include <linux/kernel_stat.h>
14#include <linux/regset.h>
15#include <linux/slab.h>
16#include <asm/asm.h>
17#include <asm/cpufeature.h>
18#include <asm/processor.h>
19#include <asm/sigcontext.h>
20#include <asm/user.h>
21#include <asm/uaccess.h>
22#include <asm/xsave.h>
23
24extern unsigned int sig_xstate_size;
25extern void fpu_init(void);
26
27DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
28
29extern user_regset_active_fn fpregs_active, xfpregs_active;
30extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
31 xstateregs_get;
32extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
33 xstateregs_set;
34
35
36/*
37 * xstateregs_active == fpregs_active. Please refer to the comment
38 * at the definition of fpregs_active.
39 */
40#define xstateregs_active fpregs_active
41
42extern struct _fpx_sw_bytes fx_sw_reserved;
43#ifdef CONFIG_IA32_EMULATION
44extern unsigned int sig_xstate_ia32_size;
45extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
46struct _fpstate_ia32;
47struct _xstate_ia32;
48extern int save_i387_xstate_ia32(void __user *buf);
49extern int restore_i387_xstate_ia32(void __user *buf);
50#endif
51
52#ifdef CONFIG_MATH_EMULATION
53extern void finit_soft_fpu(struct i387_soft_struct *soft);
54#else
55static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
56#endif
57
58#define X87_FSW_ES (1 << 7) /* Exception Summary */
59
60static __always_inline __pure bool use_xsaveopt(void)
61{
62 return static_cpu_has(X86_FEATURE_XSAVEOPT);
63}
64
65static __always_inline __pure bool use_xsave(void)
66{
67 return static_cpu_has(X86_FEATURE_XSAVE);
68}
69
70static __always_inline __pure bool use_fxsr(void)
71{
72 return static_cpu_has(X86_FEATURE_FXSR);
73}
74
75extern void __sanitize_i387_state(struct task_struct *);
76
77static inline void sanitize_i387_state(struct task_struct *tsk)
78{
79 if (!use_xsaveopt())
80 return;
81 __sanitize_i387_state(tsk);
82}
83
84#ifdef CONFIG_X86_64
85static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
86{
87 int err;
88
89 /* See comment in fxsave() below. */
90#ifdef CONFIG_AS_FXSAVEQ
91 asm volatile("1: fxrstorq %[fx]\n\t"
92 "2:\n"
93 ".section .fixup,\"ax\"\n"
94 "3: movl $-1,%[err]\n"
95 " jmp 2b\n"
96 ".previous\n"
97 _ASM_EXTABLE(1b, 3b)
98 : [err] "=r" (err)
99 : [fx] "m" (*fx), "0" (0));
100#else
101 asm volatile("1: rex64/fxrstor (%[fx])\n\t"
102 "2:\n"
103 ".section .fixup,\"ax\"\n"
104 "3: movl $-1,%[err]\n"
105 " jmp 2b\n"
106 ".previous\n"
107 _ASM_EXTABLE(1b, 3b)
108 : [err] "=r" (err)
109 : [fx] "R" (fx), "m" (*fx), "0" (0));
110#endif
111 return err;
112}
113
114static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
115{
116 int err;
117
118 /*
119 * Clear the bytes not touched by the fxsave and reserved
120 * for the SW usage.
121 */
122 err = __clear_user(&fx->sw_reserved,
123 sizeof(struct _fpx_sw_bytes));
124 if (unlikely(err))
125 return -EFAULT;
126
127 /* See comment in fxsave() below. */
128#ifdef CONFIG_AS_FXSAVEQ
129 asm volatile("1: fxsaveq %[fx]\n\t"
130 "2:\n"
131 ".section .fixup,\"ax\"\n"
132 "3: movl $-1,%[err]\n"
133 " jmp 2b\n"
134 ".previous\n"
135 _ASM_EXTABLE(1b, 3b)
136 : [err] "=r" (err), [fx] "=m" (*fx)
137 : "0" (0));
138#else
139 asm volatile("1: rex64/fxsave (%[fx])\n\t"
140 "2:\n"
141 ".section .fixup,\"ax\"\n"
142 "3: movl $-1,%[err]\n"
143 " jmp 2b\n"
144 ".previous\n"
145 _ASM_EXTABLE(1b, 3b)
146 : [err] "=r" (err), "=m" (*fx)
147 : [fx] "R" (fx), "0" (0));
148#endif
149 if (unlikely(err) &&
150 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
151 err = -EFAULT;
152 /* No need to clear here because the caller clears USED_MATH */
153 return err;
154}
155
156static inline void fpu_fxsave(struct fpu *fpu)
157{
158 /* Using "rex64; fxsave %0" is broken because, if the memory operand
159 uses any extended registers for addressing, a second REX prefix
160 will be generated (to the assembler, rex64 followed by semicolon
161 is a separate instruction), and hence the 64-bitness is lost. */
162
163#ifdef CONFIG_AS_FXSAVEQ
164 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
165 starting with gas 2.16. */
166 __asm__ __volatile__("fxsaveq %0"
167 : "=m" (fpu->state->fxsave));
168#else
169 /* Using, as a workaround, the properly prefixed form below isn't
170 accepted by any binutils version so far released, complaining that
171 the same type of prefix is used twice if an extended register is
172 needed for addressing (fix submitted to mainline 2005-11-21).
173 asm volatile("rex64/fxsave %0"
174 : "=m" (fpu->state->fxsave));
175 This, however, we can work around by forcing the compiler to select
176 an addressing mode that doesn't require extended registers. */
177 asm volatile("rex64/fxsave (%[fx])"
178 : "=m" (fpu->state->fxsave)
179 : [fx] "R" (&fpu->state->fxsave));
180#endif
181}
182
183#else /* CONFIG_X86_32 */
184
185/* perform fxrstor iff the processor has extended states, otherwise frstor */
186static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
187{
188 /*
189 * The "nop" is needed to make the instructions the same
190 * length.
191 */
192 alternative_input(
193 "nop ; frstor %1",
194 "fxrstor %1",
195 X86_FEATURE_FXSR,
196 "m" (*fx));
197
198 return 0;
199}
200
201static inline void fpu_fxsave(struct fpu *fpu)
202{
203 asm volatile("fxsave %[fx]"
204 : [fx] "=m" (fpu->state->fxsave));
205}
206
207#endif /* CONFIG_X86_64 */
208
209/*
210 * These must be called with preempt disabled. Returns
211 * 'true' if the FPU state is still intact.
212 */
213static inline int fpu_save_init(struct fpu *fpu)
214{
215 if (use_xsave()) {
216 fpu_xsave(fpu);
217
218 /*
219 * xsave header may indicate the init state of the FP.
220 */
221 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
222 return 1;
223 } else if (use_fxsr()) {
224 fpu_fxsave(fpu);
225 } else {
226 asm volatile("fnsave %[fx]; fwait"
227 : [fx] "=m" (fpu->state->fsave));
228 return 0;
229 }
230
231 /*
232 * If exceptions are pending, we need to clear them so
233 * that we don't randomly get exceptions later.
234 *
235 * FIXME! Is this perhaps only true for the old-style
236 * irq13 case? Maybe we could leave the x87 state
237 * intact otherwise?
238 */
239 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
240 asm volatile("fnclex");
241 return 0;
242 }
243 return 1;
244}
245
246static inline int __save_init_fpu(struct task_struct *tsk)
247{
248 return fpu_save_init(&tsk->thread.fpu);
249}
250
251static inline int fpu_fxrstor_checking(struct fpu *fpu)
252{
253 return fxrstor_checking(&fpu->state->fxsave);
254}
255
256static inline int fpu_restore_checking(struct fpu *fpu)
257{
258 if (use_xsave())
259 return fpu_xrstor_checking(fpu);
260 else
261 return fpu_fxrstor_checking(fpu);
262}
263
264static inline int restore_fpu_checking(struct task_struct *tsk)
265{
266 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
267 is pending. Clear the x87 state here by setting it to fixed
268 values. "m" is a random variable that should be in L1 */
269 alternative_input(
270 ASM_NOP8 ASM_NOP2,
271 "emms\n\t" /* clear stack tags */
272 "fildl %P[addr]", /* set F?P to defined value */
273 X86_FEATURE_FXSAVE_LEAK,
274 [addr] "m" (tsk->thread.fpu.has_fpu));
275
276 return fpu_restore_checking(&tsk->thread.fpu);
277}
278
279/*
280 * Software FPU state helpers. Careful: these need to
281 * be preemption protection *and* they need to be
282 * properly paired with the CR0.TS changes!
283 */
284static inline int __thread_has_fpu(struct task_struct *tsk)
285{
286 return tsk->thread.fpu.has_fpu;
287}
288
289/* Must be paired with an 'stts' after! */
290static inline void __thread_clear_has_fpu(struct task_struct *tsk)
291{
292 tsk->thread.fpu.has_fpu = 0;
293 percpu_write(fpu_owner_task, NULL);
294}
295
296/* Must be paired with a 'clts' before! */
297static inline void __thread_set_has_fpu(struct task_struct *tsk)
298{
299 tsk->thread.fpu.has_fpu = 1;
300 percpu_write(fpu_owner_task, tsk);
301}
302
303/*
304 * Encapsulate the CR0.TS handling together with the
305 * software flag.
306 *
307 * These generally need preemption protection to work,
308 * do try to avoid using these on their own.
309 */
310static inline void __thread_fpu_end(struct task_struct *tsk)
311{
312 __thread_clear_has_fpu(tsk);
313 stts();
314}
315
316static inline void __thread_fpu_begin(struct task_struct *tsk)
317{
318 clts();
319 __thread_set_has_fpu(tsk);
320}
321
322/*
323 * FPU state switching for scheduling.
324 *
325 * This is a two-stage process:
326 *
327 * - switch_fpu_prepare() saves the old state and
328 * sets the new state of the CR0.TS bit. This is
329 * done within the context of the old process.
330 *
331 * - switch_fpu_finish() restores the new state as
332 * necessary.
333 */
334typedef struct { int preload; } fpu_switch_t;
335
336/*
337 * FIXME! We could do a totally lazy restore, but we need to
338 * add a per-cpu "this was the task that last touched the FPU
339 * on this CPU" variable, and the task needs to have a "I last
340 * touched the FPU on this CPU" and check them.
341 *
342 * We don't do that yet, so "fpu_lazy_restore()" always returns
343 * false, but some day..
344 */
345static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
346{
347 return new == percpu_read_stable(fpu_owner_task) &&
348 cpu == new->thread.fpu.last_cpu;
349}
350
351static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
352{
353 fpu_switch_t fpu;
354
355 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
356 if (__thread_has_fpu(old)) {
357 if (!__save_init_fpu(old))
358 cpu = ~0;
359 old->thread.fpu.last_cpu = cpu;
360 old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
361
362 /* Don't change CR0.TS if we just switch! */
363 if (fpu.preload) {
364 new->fpu_counter++;
365 __thread_set_has_fpu(new);
366 prefetch(new->thread.fpu.state);
367 } else
368 stts();
369 } else {
370 old->fpu_counter = 0;
371 old->thread.fpu.last_cpu = ~0;
372 if (fpu.preload) {
373 new->fpu_counter++;
374 if (fpu_lazy_restore(new, cpu))
375 fpu.preload = 0;
376 else
377 prefetch(new->thread.fpu.state);
378 __thread_fpu_begin(new);
379 }
380 }
381 return fpu;
382}
383
384/*
385 * By the time this gets called, we've already cleared CR0.TS and
386 * given the process the FPU if we are going to preload the FPU
387 * state - all we need to do is to conditionally restore the register
388 * state itself.
389 */
390static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
391{
392 if (fpu.preload) {
393 if (unlikely(restore_fpu_checking(new)))
394 __thread_fpu_end(new);
395 }
396}
397
398/*
399 * Signal frame handlers...
400 */
401extern int save_i387_xstate(void __user *buf);
402extern int restore_i387_xstate(void __user *buf);
403
404static inline void __clear_fpu(struct task_struct *tsk)
405{
406 if (__thread_has_fpu(tsk)) {
407 /* Ignore delayed exceptions from user space */
408 asm volatile("1: fwait\n"
409 "2:\n"
410 _ASM_EXTABLE(1b, 2b));
411 __thread_fpu_end(tsk);
412 }
413}
414
415/*
416 * The actual user_fpu_begin/end() functions
417 * need to be preemption-safe.
418 *
419 * NOTE! user_fpu_end() must be used only after you
420 * have saved the FP state, and user_fpu_begin() must
421 * be used only immediately before restoring it.
422 * These functions do not do any save/restore on
423 * their own.
424 */
425static inline void user_fpu_end(void)
426{
427 preempt_disable();
428 __thread_fpu_end(current);
429 preempt_enable();
430}
431
432static inline void user_fpu_begin(void)
433{
434 preempt_disable();
435 if (!user_has_fpu())
436 __thread_fpu_begin(current);
437 preempt_enable();
438}
439
440/*
441 * These disable preemption on their own and are safe
442 */
443static inline void save_init_fpu(struct task_struct *tsk)
444{
445 WARN_ON_ONCE(!__thread_has_fpu(tsk));
446 preempt_disable();
447 __save_init_fpu(tsk);
448 __thread_fpu_end(tsk);
449 preempt_enable();
450}
451
452static inline void clear_fpu(struct task_struct *tsk)
453{
454 preempt_disable();
455 __clear_fpu(tsk);
456 preempt_enable();
457}
458
459/*
460 * i387 state interaction
461 */
462static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
463{
464 if (cpu_has_fxsr) {
465 return tsk->thread.fpu.state->fxsave.cwd;
466 } else {
467 return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
468 }
469}
470
471static inline unsigned short get_fpu_swd(struct task_struct *tsk)
472{
473 if (cpu_has_fxsr) {
474 return tsk->thread.fpu.state->fxsave.swd;
475 } else {
476 return (unsigned short)tsk->thread.fpu.state->fsave.swd;
477 }
478}
479
480static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
481{
482 if (cpu_has_xmm) {
483 return tsk->thread.fpu.state->fxsave.mxcsr;
484 } else {
485 return MXCSR_DEFAULT;
486 }
487}
488
489static bool fpu_allocated(struct fpu *fpu)
490{
491 return fpu->state != NULL;
492}
493
494static inline int fpu_alloc(struct fpu *fpu)
495{
496 if (fpu_allocated(fpu))
497 return 0;
498 fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
499 if (!fpu->state)
500 return -ENOMEM;
501 WARN_ON((unsigned long)fpu->state & 15);
502 return 0;
503}
504
505static inline void fpu_free(struct fpu *fpu)
506{
507 if (fpu->state) {
508 kmem_cache_free(task_xstate_cachep, fpu->state);
509 fpu->state = NULL;
510 }
511}
512
513static inline void fpu_copy(struct fpu *dst, struct fpu *src)
514{
515 memcpy(dst->state, src->state, xstate_size);
516}
517
518extern void fpu_finit(struct fpu *fpu);
519
520#endif
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index d09bb03653f0..71ecbcba1a4e 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -9,7 +9,6 @@
9#include <asm/asm.h> 9#include <asm/asm.h>
10#include <asm/errno.h> 10#include <asm/errno.h>
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h>
13 12
14#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ 13#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
15 asm volatile("1:\t" insn "\n" \ 14 asm volatile("1:\t" insn "\n" \
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index da0b3ca815b7..382f75d735f3 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -7,7 +7,6 @@
7typedef struct { 7typedef struct {
8 unsigned int __softirq_pending; 8 unsigned int __softirq_pending;
9 unsigned int __nmi_count; /* arch dependent */ 9 unsigned int __nmi_count; /* arch dependent */
10 unsigned int irq0_irqs;
11#ifdef CONFIG_X86_LOCAL_APIC 10#ifdef CONFIG_X86_LOCAL_APIC
12 unsigned int apic_timer_irqs; /* arch dependent */ 11 unsigned int apic_timer_irqs; /* arch dependent */
13 unsigned int irq_spurious_count; 12 unsigned int irq_spurious_count;
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 3bd04022fd0c..302a323b3f67 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -61,7 +61,7 @@ void *kmap(struct page *page);
61void kunmap(struct page *page); 61void kunmap(struct page *page);
62 62
63void *kmap_atomic_prot(struct page *page, pgprot_t prot); 63void *kmap_atomic_prot(struct page *page, pgprot_t prot);
64void *__kmap_atomic(struct page *page); 64void *kmap_atomic(struct page *page);
65void __kunmap_atomic(void *kvaddr); 65void __kunmap_atomic(void *kvaddr);
66void *kmap_atomic_pfn(unsigned long pfn); 66void *kmap_atomic_pfn(unsigned long pfn);
67void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); 67void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 6919e936345b..257d9cca214f 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -13,323 +13,18 @@
13#ifndef __ASSEMBLY__ 13#ifndef __ASSEMBLY__
14 14
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/kernel_stat.h>
17#include <linux/regset.h>
18#include <linux/hardirq.h> 16#include <linux/hardirq.h>
19#include <linux/slab.h>
20#include <asm/asm.h>
21#include <asm/cpufeature.h>
22#include <asm/processor.h>
23#include <asm/sigcontext.h>
24#include <asm/user.h>
25#include <asm/uaccess.h>
26#include <asm/xsave.h>
27 17
28extern unsigned int sig_xstate_size; 18struct pt_regs;
29extern void fpu_init(void); 19struct user_i387_struct;
30extern void mxcsr_feature_mask_init(void); 20
31extern int init_fpu(struct task_struct *child); 21extern int init_fpu(struct task_struct *child);
32extern asmlinkage void math_state_restore(void);
33extern void __math_state_restore(void);
34extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 22extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
23extern void math_state_restore(void);
35 24
36extern user_regset_active_fn fpregs_active, xfpregs_active; 25extern bool irq_fpu_usable(void);
37extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, 26extern void kernel_fpu_begin(void);
38 xstateregs_get; 27extern void kernel_fpu_end(void);
39extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
40 xstateregs_set;
41
42/*
43 * xstateregs_active == fpregs_active. Please refer to the comment
44 * at the definition of fpregs_active.
45 */
46#define xstateregs_active fpregs_active
47
48extern struct _fpx_sw_bytes fx_sw_reserved;
49#ifdef CONFIG_IA32_EMULATION
50extern unsigned int sig_xstate_ia32_size;
51extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
52struct _fpstate_ia32;
53struct _xstate_ia32;
54extern int save_i387_xstate_ia32(void __user *buf);
55extern int restore_i387_xstate_ia32(void __user *buf);
56#endif
57
58#ifdef CONFIG_MATH_EMULATION
59extern void finit_soft_fpu(struct i387_soft_struct *soft);
60#else
61static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
62#endif
63
64#define X87_FSW_ES (1 << 7) /* Exception Summary */
65
66static __always_inline __pure bool use_xsaveopt(void)
67{
68 return static_cpu_has(X86_FEATURE_XSAVEOPT);
69}
70
71static __always_inline __pure bool use_xsave(void)
72{
73 return static_cpu_has(X86_FEATURE_XSAVE);
74}
75
76static __always_inline __pure bool use_fxsr(void)
77{
78 return static_cpu_has(X86_FEATURE_FXSR);
79}
80
81extern void __sanitize_i387_state(struct task_struct *);
82
83static inline void sanitize_i387_state(struct task_struct *tsk)
84{
85 if (!use_xsaveopt())
86 return;
87 __sanitize_i387_state(tsk);
88}
89
90#ifdef CONFIG_X86_64
91static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
92{
93 int err;
94
95 /* See comment in fxsave() below. */
96#ifdef CONFIG_AS_FXSAVEQ
97 asm volatile("1: fxrstorq %[fx]\n\t"
98 "2:\n"
99 ".section .fixup,\"ax\"\n"
100 "3: movl $-1,%[err]\n"
101 " jmp 2b\n"
102 ".previous\n"
103 _ASM_EXTABLE(1b, 3b)
104 : [err] "=r" (err)
105 : [fx] "m" (*fx), "0" (0));
106#else
107 asm volatile("1: rex64/fxrstor (%[fx])\n\t"
108 "2:\n"
109 ".section .fixup,\"ax\"\n"
110 "3: movl $-1,%[err]\n"
111 " jmp 2b\n"
112 ".previous\n"
113 _ASM_EXTABLE(1b, 3b)
114 : [err] "=r" (err)
115 : [fx] "R" (fx), "m" (*fx), "0" (0));
116#endif
117 return err;
118}
119
120static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
121{
122 int err;
123
124 /*
125 * Clear the bytes not touched by the fxsave and reserved
126 * for the SW usage.
127 */
128 err = __clear_user(&fx->sw_reserved,
129 sizeof(struct _fpx_sw_bytes));
130 if (unlikely(err))
131 return -EFAULT;
132
133 /* See comment in fxsave() below. */
134#ifdef CONFIG_AS_FXSAVEQ
135 asm volatile("1: fxsaveq %[fx]\n\t"
136 "2:\n"
137 ".section .fixup,\"ax\"\n"
138 "3: movl $-1,%[err]\n"
139 " jmp 2b\n"
140 ".previous\n"
141 _ASM_EXTABLE(1b, 3b)
142 : [err] "=r" (err), [fx] "=m" (*fx)
143 : "0" (0));
144#else
145 asm volatile("1: rex64/fxsave (%[fx])\n\t"
146 "2:\n"
147 ".section .fixup,\"ax\"\n"
148 "3: movl $-1,%[err]\n"
149 " jmp 2b\n"
150 ".previous\n"
151 _ASM_EXTABLE(1b, 3b)
152 : [err] "=r" (err), "=m" (*fx)
153 : [fx] "R" (fx), "0" (0));
154#endif
155 if (unlikely(err) &&
156 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
157 err = -EFAULT;
158 /* No need to clear here because the caller clears USED_MATH */
159 return err;
160}
161
162static inline void fpu_fxsave(struct fpu *fpu)
163{
164 /* Using "rex64; fxsave %0" is broken because, if the memory operand
165 uses any extended registers for addressing, a second REX prefix
166 will be generated (to the assembler, rex64 followed by semicolon
167 is a separate instruction), and hence the 64-bitness is lost. */
168
169#ifdef CONFIG_AS_FXSAVEQ
170 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
171 starting with gas 2.16. */
172 __asm__ __volatile__("fxsaveq %0"
173 : "=m" (fpu->state->fxsave));
174#else
175 /* Using, as a workaround, the properly prefixed form below isn't
176 accepted by any binutils version so far released, complaining that
177 the same type of prefix is used twice if an extended register is
178 needed for addressing (fix submitted to mainline 2005-11-21).
179 asm volatile("rex64/fxsave %0"
180 : "=m" (fpu->state->fxsave));
181 This, however, we can work around by forcing the compiler to select
182 an addressing mode that doesn't require extended registers. */
183 asm volatile("rex64/fxsave (%[fx])"
184 : "=m" (fpu->state->fxsave)
185 : [fx] "R" (&fpu->state->fxsave));
186#endif
187}
188
189#else /* CONFIG_X86_32 */
190
191/* perform fxrstor iff the processor has extended states, otherwise frstor */
192static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
193{
194 /*
195 * The "nop" is needed to make the instructions the same
196 * length.
197 */
198 alternative_input(
199 "nop ; frstor %1",
200 "fxrstor %1",
201 X86_FEATURE_FXSR,
202 "m" (*fx));
203
204 return 0;
205}
206
207static inline void fpu_fxsave(struct fpu *fpu)
208{
209 asm volatile("fxsave %[fx]"
210 : [fx] "=m" (fpu->state->fxsave));
211}
212
213#endif /* CONFIG_X86_64 */
214
215/* We need a safe address that is cheap to find and that is already
216 in L1 during context switch. The best choices are unfortunately
217 different for UP and SMP */
218#ifdef CONFIG_SMP
219#define safe_address (__per_cpu_offset[0])
220#else
221#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
222#endif
223
224/*
225 * These must be called with preempt disabled
226 */
227static inline void fpu_save_init(struct fpu *fpu)
228{
229 if (use_xsave()) {
230 fpu_xsave(fpu);
231
232 /*
233 * xsave header may indicate the init state of the FP.
234 */
235 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
236 return;
237 } else if (use_fxsr()) {
238 fpu_fxsave(fpu);
239 } else {
240 asm volatile("fnsave %[fx]; fwait"
241 : [fx] "=m" (fpu->state->fsave));
242 return;
243 }
244
245 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
246 asm volatile("fnclex");
247
248 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
249 is pending. Clear the x87 state here by setting it to fixed
250 values. safe_address is a random variable that should be in L1 */
251 alternative_input(
252 ASM_NOP8 ASM_NOP2,
253 "emms\n\t" /* clear stack tags */
254 "fildl %P[addr]", /* set F?P to defined value */
255 X86_FEATURE_FXSAVE_LEAK,
256 [addr] "m" (safe_address));
257}
258
259static inline void __save_init_fpu(struct task_struct *tsk)
260{
261 fpu_save_init(&tsk->thread.fpu);
262 task_thread_info(tsk)->status &= ~TS_USEDFPU;
263}
264
265static inline int fpu_fxrstor_checking(struct fpu *fpu)
266{
267 return fxrstor_checking(&fpu->state->fxsave);
268}
269
270static inline int fpu_restore_checking(struct fpu *fpu)
271{
272 if (use_xsave())
273 return fpu_xrstor_checking(fpu);
274 else
275 return fpu_fxrstor_checking(fpu);
276}
277
278static inline int restore_fpu_checking(struct task_struct *tsk)
279{
280 return fpu_restore_checking(&tsk->thread.fpu);
281}
282
283/*
284 * Signal frame handlers...
285 */
286extern int save_i387_xstate(void __user *buf);
287extern int restore_i387_xstate(void __user *buf);
288
289static inline void __unlazy_fpu(struct task_struct *tsk)
290{
291 if (task_thread_info(tsk)->status & TS_USEDFPU) {
292 __save_init_fpu(tsk);
293 stts();
294 } else
295 tsk->fpu_counter = 0;
296}
297
298static inline void __clear_fpu(struct task_struct *tsk)
299{
300 if (task_thread_info(tsk)->status & TS_USEDFPU) {
301 /* Ignore delayed exceptions from user space */
302 asm volatile("1: fwait\n"
303 "2:\n"
304 _ASM_EXTABLE(1b, 2b));
305 task_thread_info(tsk)->status &= ~TS_USEDFPU;
306 stts();
307 }
308}
309
310static inline void kernel_fpu_begin(void)
311{
312 struct thread_info *me = current_thread_info();
313 preempt_disable();
314 if (me->status & TS_USEDFPU)
315 __save_init_fpu(me->task);
316 else
317 clts();
318}
319
320static inline void kernel_fpu_end(void)
321{
322 stts();
323 preempt_enable();
324}
325
326static inline bool irq_fpu_usable(void)
327{
328 struct pt_regs *regs;
329
330 return !in_interrupt() || !(regs = get_irq_regs()) || \
331 user_mode(regs) || (read_cr0() & X86_CR0_TS);
332}
333 28
334/* 29/*
335 * Some instructions like VIA's padlock instructions generate a spurious 30 * Some instructions like VIA's padlock instructions generate a spurious
@@ -363,90 +58,21 @@ static inline void irq_ts_restore(int TS_state)
363} 58}
364 59
365/* 60/*
366 * These disable preemption on their own and are safe 61 * The question "does this thread have fpu access?"
367 */ 62 * is slightly racy, since preemption could come in
368static inline void save_init_fpu(struct task_struct *tsk) 63 * and revoke it immediately after the test.
369{ 64 *
370 preempt_disable(); 65 * However, even in that very unlikely scenario,
371 __save_init_fpu(tsk); 66 * we can just assume we have FPU access - typically
372 stts(); 67 * to save the FP state - we'll just take a #NM
373 preempt_enable(); 68 * fault and get the FPU access back.
374}
375
376static inline void unlazy_fpu(struct task_struct *tsk)
377{
378 preempt_disable();
379 __unlazy_fpu(tsk);
380 preempt_enable();
381}
382
383static inline void clear_fpu(struct task_struct *tsk)
384{
385 preempt_disable();
386 __clear_fpu(tsk);
387 preempt_enable();
388}
389
390/*
391 * i387 state interaction
392 */ 69 */
393static inline unsigned short get_fpu_cwd(struct task_struct *tsk) 70static inline int user_has_fpu(void)
394{
395 if (cpu_has_fxsr) {
396 return tsk->thread.fpu.state->fxsave.cwd;
397 } else {
398 return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
399 }
400}
401
402static inline unsigned short get_fpu_swd(struct task_struct *tsk)
403{
404 if (cpu_has_fxsr) {
405 return tsk->thread.fpu.state->fxsave.swd;
406 } else {
407 return (unsigned short)tsk->thread.fpu.state->fsave.swd;
408 }
409}
410
411static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
412{
413 if (cpu_has_xmm) {
414 return tsk->thread.fpu.state->fxsave.mxcsr;
415 } else {
416 return MXCSR_DEFAULT;
417 }
418}
419
420static bool fpu_allocated(struct fpu *fpu)
421{
422 return fpu->state != NULL;
423}
424
425static inline int fpu_alloc(struct fpu *fpu)
426{
427 if (fpu_allocated(fpu))
428 return 0;
429 fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
430 if (!fpu->state)
431 return -ENOMEM;
432 WARN_ON((unsigned long)fpu->state & 15);
433 return 0;
434}
435
436static inline void fpu_free(struct fpu *fpu)
437{
438 if (fpu->state) {
439 kmem_cache_free(task_xstate_cachep, fpu->state);
440 fpu->state = NULL;
441 }
442}
443
444static inline void fpu_copy(struct fpu *dst, struct fpu *src)
445{ 71{
446 memcpy(dst->state, src->state, xstate_size); 72 return current->thread.fpu.has_fpu;
447} 73}
448 74
449extern void fpu_finit(struct fpu *fpu); 75extern void unlazy_fpu(struct task_struct *tsk);
450 76
451#endif /* __ASSEMBLY__ */ 77#endif /* __ASSEMBLY__ */
452 78
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 1f7e62517284..ee52760549f0 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -43,6 +43,15 @@ struct ucontext_ia32 {
43 compat_sigset_t uc_sigmask; /* mask last for extensibility */ 43 compat_sigset_t uc_sigmask; /* mask last for extensibility */
44}; 44};
45 45
46struct ucontext_x32 {
47 unsigned int uc_flags;
48 unsigned int uc_link;
49 stack_ia32_t uc_stack;
50 unsigned int uc__pad0; /* needed for alignment */
51 struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */
52 compat_sigset_t uc_sigmask; /* mask last for extensibility */
53};
54
46/* This matches struct stat64 in glibc2.2, hence the absolutely 55/* This matches struct stat64 in glibc2.2, hence the absolutely
47 * insane amounts of padding around dev_t's. 56 * insane amounts of padding around dev_t's.
48 */ 57 */
@@ -116,6 +125,15 @@ typedef struct compat_siginfo {
116 compat_clock_t _stime; 125 compat_clock_t _stime;
117 } _sigchld; 126 } _sigchld;
118 127
128 /* SIGCHLD (x32 version) */
129 struct {
130 unsigned int _pid; /* which child */
131 unsigned int _uid; /* sender's uid */
132 int _status; /* exit code */
133 compat_s64 _utime;
134 compat_s64 _stime;
135 } _sigchld_x32;
136
119 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ 137 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
120 struct { 138 struct {
121 unsigned int _addr; /* faulting insn/memory ref. */ 139 unsigned int _addr; /* faulting insn/memory ref. */
diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h
index 976f6ecd2ce6..b0d5716ca1e4 100644
--- a/arch/x86/include/asm/ia32_unistd.h
+++ b/arch/x86/include/asm/ia32_unistd.h
@@ -2,17 +2,10 @@
2#define _ASM_X86_IA32_UNISTD_H 2#define _ASM_X86_IA32_UNISTD_H
3 3
4/* 4/*
5 * This file contains the system call numbers of the ia32 port, 5 * This file contains the system call numbers of the ia32 compat ABI,
6 * this is for the kernel only. 6 * this is for the kernel only.
7 * Only add syscalls here where some part of the kernel needs to know
8 * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK
9 */ 7 */
10 8#define __SYSCALL_ia32_NR(x) (x)
11#define __NR_ia32_restart_syscall 0 9#include <asm/unistd_32_ia32.h>
12#define __NR_ia32_exit 1
13#define __NR_ia32_read 3
14#define __NR_ia32_write 4
15#define __NR_ia32_sigreturn 119
16#define __NR_ia32_rt_sigreturn 173
17 10
18#endif /* _ASM_X86_IA32_UNISTD_H */ 11#endif /* _ASM_X86_IA32_UNISTD_H */
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
index f49253d75710..c5d1785373ed 100644
--- a/arch/x86/include/asm/idle.h
+++ b/arch/x86/include/asm/idle.h
@@ -14,6 +14,7 @@ void exit_idle(void);
14#else /* !CONFIG_X86_64 */ 14#else /* !CONFIG_X86_64 */
15static inline void enter_idle(void) { } 15static inline void enter_idle(void) { }
16static inline void exit_idle(void) { } 16static inline void exit_idle(void) { }
17static inline void __exit_idle(void) { }
17#endif /* CONFIG_X86_64 */ 18#endif /* CONFIG_X86_64 */
18 19
19void amd_e400_remove_cpu(int cpu); 20void amd_e400_remove_cpu(int cpu);
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 205b063e3e32..74a2e312e8a2 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -97,11 +97,12 @@
97 97
98/* Attribute search APIs */ 98/* Attribute search APIs */
99extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); 99extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
100extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
100extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, 101extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
101 insn_byte_t last_pfx, 102 int lpfx_id,
102 insn_attr_t esc_attr); 103 insn_attr_t esc_attr);
103extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, 104extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
104 insn_byte_t last_pfx, 105 int lpfx_id,
105 insn_attr_t esc_attr); 106 insn_attr_t esc_attr);
106extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, 107extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
107 insn_byte_t vex_m, 108 insn_byte_t vex_m,
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 8dbe353e41e1..adcc0ae73d09 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -5,6 +5,8 @@
5extern void __init early_ioremap_page_table_range_init(void); 5extern void __init early_ioremap_page_table_range_init(void);
6#endif 6#endif
7 7
8extern void __init zone_sizes_init(void);
9
8extern unsigned long __init 10extern unsigned long __init
9kernel_physical_mapping_init(unsigned long start, 11kernel_physical_mapping_init(unsigned long start,
10 unsigned long end, 12 unsigned long end,
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 74df3f1eddfd..48eb30a86062 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -96,12 +96,6 @@ struct insn {
96#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ 96#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
97#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ 97#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
98 98
99/* The last prefix is needed for two-byte and three-byte opcodes */
100static inline insn_byte_t insn_last_prefix(struct insn *insn)
101{
102 return insn->prefixes.bytes[3];
103}
104
105extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); 99extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
106extern void insn_get_prefixes(struct insn *insn); 100extern void insn_get_prefixes(struct insn *insn);
107extern void insn_get_opcode(struct insn *insn); 101extern void insn_get_opcode(struct insn *insn);
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
160 return X86_VEX_P(insn->vex_prefix.bytes[2]); 154 return X86_VEX_P(insn->vex_prefix.bytes[2]);
161} 155}
162 156
157/* Get the last prefix id from last prefix or VEX prefix */
158static inline int insn_last_prefix_id(struct insn *insn)
159{
160 if (insn_is_avx(insn))
161 return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
162
163 if (insn->prefixes.bytes[3])
164 return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
165
166 return 0;
167}
168
163/* Offset of each field from kaddr */ 169/* Offset of each field from kaddr */
164static inline int insn_offset_rex_prefix(struct insn *insn) 170static inline int insn_offset_rex_prefix(struct insn *insn)
165{ 171{
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 690d1cc9a877..2c4943de5150 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -21,6 +21,15 @@
21#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) 21#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
22#define IO_APIC_REDIR_MASKED (1 << 16) 22#define IO_APIC_REDIR_MASKED (1 << 16)
23 23
24struct io_apic_ops {
25 void (*init) (void);
26 unsigned int (*read) (unsigned int apic, unsigned int reg);
27 void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
28 void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
29};
30
31void __init set_io_apic_ops(const struct io_apic_ops *);
32
24/* 33/*
25 * The structure of the IO-APIC: 34 * The structure of the IO-APIC:
26 */ 35 */
diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h
deleted file mode 100644
index 423bbbddf36d..000000000000
--- a/arch/x86/include/asm/irq_controller.h
+++ /dev/null
@@ -1,12 +0,0 @@
1#ifndef __IRQ_CONTROLLER__
2#define __IRQ_CONTROLLER__
3
4struct irq_domain {
5 int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize,
6 u32 *out_hwirq, u32 *out_type);
7 void *priv;
8 struct device_node *controller;
9 struct list_head l;
10};
11
12#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a32b18ce6ead..3a16c1483b45 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -9,12 +9,12 @@
9 9
10#define JUMP_LABEL_NOP_SIZE 5 10#define JUMP_LABEL_NOP_SIZE 5
11 11
12#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" 12#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
13 13
14static __always_inline bool arch_static_branch(struct jump_label_key *key) 14static __always_inline bool arch_static_branch(struct static_key *key)
15{ 15{
16 asm goto("1:" 16 asm goto("1:"
17 JUMP_LABEL_INITIAL_NOP 17 STATIC_KEY_INITIAL_NOP
18 ".pushsection __jump_table, \"aw\" \n\t" 18 ".pushsection __jump_table, \"aw\" \n\t"
19 _ASM_ALIGN "\n\t" 19 _ASM_ALIGN "\n\t"
20 _ASM_PTR "1b, %l[l_yes], %c0 \n\t" 20 _ASM_PTR "1b, %l[l_yes], %c0 \n\t"
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h
index 77e95f54570a..332f98c9111f 100644
--- a/arch/x86/include/asm/kgdb.h
+++ b/arch/x86/include/asm/kgdb.h
@@ -64,11 +64,15 @@ enum regnames {
64 GDB_PS, /* 17 */ 64 GDB_PS, /* 17 */
65 GDB_CS, /* 18 */ 65 GDB_CS, /* 18 */
66 GDB_SS, /* 19 */ 66 GDB_SS, /* 19 */
67 GDB_DS, /* 20 */
68 GDB_ES, /* 21 */
69 GDB_FS, /* 22 */
70 GDB_GS, /* 23 */
67}; 71};
68#define GDB_ORIG_AX 57 72#define GDB_ORIG_AX 57
69#define DBG_MAX_REG_NUM 20 73#define DBG_MAX_REG_NUM 24
70/* 17 64 bit regs and 3 32 bit regs */ 74/* 17 64 bit regs and 5 32 bit regs */
71#define NUMREGBYTES ((17 * 8) + (3 * 4)) 75#define NUMREGBYTES ((17 * 8) + (5 * 4))
72#endif /* ! CONFIG_X86_32 */ 76#endif /* ! CONFIG_X86_32 */
73 77
74static inline void arch_kgdb_breakpoint(void) 78static inline void arch_kgdb_breakpoint(void)
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 4d8dcbdfc120..e7d1c194d272 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -321,4 +321,8 @@ struct kvm_xcrs {
321 __u64 padding[16]; 321 __u64 padding[16];
322}; 322};
323 323
324/* definition of registers in kvm_run */
325struct kvm_sync_regs {
326};
327
324#endif /* _ASM_X86_KVM_H */ 328#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index ab4092e3214e..c222e1a1b12a 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -176,6 +176,7 @@ struct x86_emulate_ops {
176 void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); 176 void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
177 ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); 177 ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
178 int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); 178 int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
179 void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val);
179 int (*cpl)(struct x86_emulate_ctxt *ctxt); 180 int (*cpl)(struct x86_emulate_ctxt *ctxt);
180 int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); 181 int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
181 int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); 182 int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
@@ -190,6 +191,9 @@ struct x86_emulate_ops {
190 int (*intercept)(struct x86_emulate_ctxt *ctxt, 191 int (*intercept)(struct x86_emulate_ctxt *ctxt,
191 struct x86_instruction_info *info, 192 struct x86_instruction_info *info,
192 enum x86_intercept_stage stage); 193 enum x86_intercept_stage stage);
194
195 bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
196 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
193}; 197};
194 198
195typedef u32 __attribute__((vector_size(16))) sse128_t; 199typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -298,6 +302,19 @@ struct x86_emulate_ctxt {
298#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ 302#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
299 X86EMUL_MODE_PROT64) 303 X86EMUL_MODE_PROT64)
300 304
305/* CPUID vendors */
306#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
307#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
308#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
309
310#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41
311#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574
312#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273
313
314#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547
315#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e
316#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69
317
301enum x86_intercept_stage { 318enum x86_intercept_stage {
302 X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ 319 X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */
303 X86_ICPT_PRE_EXCEPT, 320 X86_ICPT_PRE_EXCEPT,
@@ -372,7 +389,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
372#define EMULATION_INTERCEPTED 2 389#define EMULATION_INTERCEPTED 2
373int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); 390int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
374int emulator_task_switch(struct x86_emulate_ctxt *ctxt, 391int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
375 u16 tss_selector, int reason, 392 u16 tss_selector, int idt_index, int reason,
376 bool has_error_code, u32 error_code); 393 bool has_error_code, u32 error_code);
377int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); 394int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
378#endif /* _ASM_X86_KVM_X86_EMULATE_H */ 395#endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 52d6640a5ca1..e216ba066e79 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -29,7 +29,7 @@
29#include <asm/msr-index.h> 29#include <asm/msr-index.h>
30 30
31#define KVM_MAX_VCPUS 254 31#define KVM_MAX_VCPUS 254
32#define KVM_SOFT_MAX_VCPUS 64 32#define KVM_SOFT_MAX_VCPUS 160
33#define KVM_MEMORY_SLOTS 32 33#define KVM_MEMORY_SLOTS 32
34/* memory slots that does not exposed to userspace */ 34/* memory slots that does not exposed to userspace */
35#define KVM_PRIVATE_MEM_SLOTS 4 35#define KVM_PRIVATE_MEM_SLOTS 4
@@ -181,13 +181,6 @@ struct kvm_mmu_memory_cache {
181 void *objects[KVM_NR_MEM_OBJS]; 181 void *objects[KVM_NR_MEM_OBJS];
182}; 182};
183 183
184#define NR_PTE_CHAIN_ENTRIES 5
185
186struct kvm_pte_chain {
187 u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
188 struct hlist_node link;
189};
190
191/* 184/*
192 * kvm_mmu_page_role, below, is defined as: 185 * kvm_mmu_page_role, below, is defined as:
193 * 186 *
@@ -427,12 +420,16 @@ struct kvm_vcpu_arch {
427 420
428 u64 last_guest_tsc; 421 u64 last_guest_tsc;
429 u64 last_kernel_ns; 422 u64 last_kernel_ns;
430 u64 last_tsc_nsec; 423 u64 last_host_tsc;
431 u64 last_tsc_write; 424 u64 tsc_offset_adjustment;
432 u32 virtual_tsc_khz; 425 u64 this_tsc_nsec;
426 u64 this_tsc_write;
427 u8 this_tsc_generation;
433 bool tsc_catchup; 428 bool tsc_catchup;
434 u32 tsc_catchup_mult; 429 bool tsc_always_catchup;
435 s8 tsc_catchup_shift; 430 s8 virtual_tsc_shift;
431 u32 virtual_tsc_mult;
432 u32 virtual_tsc_khz;
436 433
437 atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ 434 atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
438 unsigned nmi_pending; /* NMI queued after currently running handler */ 435 unsigned nmi_pending; /* NMI queued after currently running handler */
@@ -478,6 +475,21 @@ struct kvm_vcpu_arch {
478 u32 id; 475 u32 id;
479 bool send_user_only; 476 bool send_user_only;
480 } apf; 477 } apf;
478
479 /* OSVW MSRs (AMD only) */
480 struct {
481 u64 length;
482 u64 status;
483 } osvw;
484};
485
486struct kvm_lpage_info {
487 unsigned long rmap_pde;
488 int write_count;
489};
490
491struct kvm_arch_memory_slot {
492 struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
481}; 493};
482 494
483struct kvm_arch { 495struct kvm_arch {
@@ -511,8 +523,12 @@ struct kvm_arch {
511 s64 kvmclock_offset; 523 s64 kvmclock_offset;
512 raw_spinlock_t tsc_write_lock; 524 raw_spinlock_t tsc_write_lock;
513 u64 last_tsc_nsec; 525 u64 last_tsc_nsec;
514 u64 last_tsc_offset;
515 u64 last_tsc_write; 526 u64 last_tsc_write;
527 u32 last_tsc_khz;
528 u64 cur_tsc_nsec;
529 u64 cur_tsc_write;
530 u64 cur_tsc_offset;
531 u8 cur_tsc_generation;
516 532
517 struct kvm_xen_hvm_config xen_hvm_config; 533 struct kvm_xen_hvm_config xen_hvm_config;
518 534
@@ -644,7 +660,7 @@ struct kvm_x86_ops {
644 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 660 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
645 int (*get_lpage_level)(void); 661 int (*get_lpage_level)(void);
646 bool (*rdtscp_supported)(void); 662 bool (*rdtscp_supported)(void);
647 void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment); 663 void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
648 664
649 void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); 665 void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
650 666
@@ -652,7 +668,7 @@ struct kvm_x86_ops {
652 668
653 bool (*has_wbinvd_exit)(void); 669 bool (*has_wbinvd_exit)(void);
654 670
655 void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); 671 void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
656 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); 672 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
657 673
658 u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); 674 u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
@@ -674,6 +690,17 @@ struct kvm_arch_async_pf {
674 690
675extern struct kvm_x86_ops *kvm_x86_ops; 691extern struct kvm_x86_ops *kvm_x86_ops;
676 692
693static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
694 s64 adjustment)
695{
696 kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false);
697}
698
699static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
700{
701 kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true);
702}
703
677int kvm_mmu_module_init(void); 704int kvm_mmu_module_init(void);
678void kvm_mmu_module_exit(void); 705void kvm_mmu_module_exit(void);
679 706
@@ -741,8 +768,8 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
741void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 768void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
742int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); 769int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
743 770
744int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 771int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
745 bool has_error_code, u32 error_code); 772 int reason, bool has_error_code, u32 error_code);
746 773
747int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 774int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
748int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); 775int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 9cdae5d47e8f..c8bed0da434a 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -3,7 +3,6 @@
3 3
4#include <linux/percpu.h> 4#include <linux/percpu.h>
5 5
6#include <asm/system.h>
7#include <linux/atomic.h> 6#include <linux/atomic.h>
8#include <asm/asm.h> 7#include <asm/asm.h>
9 8
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 0e8e85bb7c51..d354fb781c57 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -5,7 +5,6 @@
5#define _ASM_X86_MC146818RTC_H 5#define _ASM_X86_MC146818RTC_H
6 6
7#include <asm/io.h> 7#include <asm/io.h>
8#include <asm/system.h>
9#include <asm/processor.h> 8#include <asm/processor.h>
10#include <linux/mc146818rtc.h> 9#include <linux/mc146818rtc.h>
11 10
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index f35ce43c1a77..441520e4174f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
151 151
152void mce_setup(struct mce *m); 152void mce_setup(struct mce *m);
153void mce_log(struct mce *m); 153void mce_log(struct mce *m);
154DECLARE_PER_CPU(struct device, mce_device); 154DECLARE_PER_CPU(struct device *, mce_device);
155 155
156/* 156/*
157 * Maximum banks number. 157 * Maximum banks number.
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 0a0a95460434..fc18bf3ce7c8 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -26,8 +26,8 @@ extern struct sfi_rtc_table_entry sfi_mrtc_array[];
26 * identified via MSRs. 26 * identified via MSRs.
27 */ 27 */
28enum mrst_cpu_type { 28enum mrst_cpu_type {
29 MRST_CPU_CHIP_LINCROFT = 1, 29 /* 1 was Moorestown */
30 MRST_CPU_CHIP_PENWELL, 30 MRST_CPU_CHIP_PENWELL = 2,
31}; 31};
32 32
33extern enum mrst_cpu_type __mrst_cpu_chip; 33extern enum mrst_cpu_type __mrst_cpu_chip;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a6962d9161a0..ccb805966f68 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -56,6 +56,13 @@
56#define MSR_OFFCORE_RSP_0 0x000001a6 56#define MSR_OFFCORE_RSP_0 0x000001a6
57#define MSR_OFFCORE_RSP_1 0x000001a7 57#define MSR_OFFCORE_RSP_1 0x000001a7
58 58
59#define MSR_LBR_SELECT 0x000001c8
60#define MSR_LBR_TOS 0x000001c9
61#define MSR_LBR_NHM_FROM 0x00000680
62#define MSR_LBR_NHM_TO 0x000006c0
63#define MSR_LBR_CORE_FROM 0x00000040
64#define MSR_LBR_CORE_TO 0x00000060
65
59#define MSR_IA32_PEBS_ENABLE 0x000003f1 66#define MSR_IA32_PEBS_ENABLE 0x000003f1
60#define MSR_IA32_DS_AREA 0x00000600 67#define MSR_IA32_DS_AREA 0x00000600
61#define MSR_IA32_PERF_CAPABILITIES 0x00000345 68#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 4365ffdb461f..7e3f17f92c66 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -29,18 +29,18 @@
29 29
30#define MTRR_IOCTL_BASE 'M' 30#define MTRR_IOCTL_BASE 'M'
31 31
32struct mtrr_sentry {
33 unsigned long base; /* Base address */
34 unsigned int size; /* Size of region */
35 unsigned int type; /* Type of region */
36};
37
38/* Warning: this structure has a different order from i386 32/* Warning: this structure has a different order from i386
39 on x86-64. The 32bit emulation code takes care of that. 33 on x86-64. The 32bit emulation code takes care of that.
40 But you need to use this for 64bit, otherwise your X server 34 But you need to use this for 64bit, otherwise your X server
41 will break. */ 35 will break. */
42 36
43#ifdef __i386__ 37#ifdef __i386__
38struct mtrr_sentry {
39 unsigned long base; /* Base address */
40 unsigned int size; /* Size of region */
41 unsigned int type; /* Type of region */
42};
43
44struct mtrr_gentry { 44struct mtrr_gentry {
45 unsigned int regnum; /* Register number */ 45 unsigned int regnum; /* Register number */
46 unsigned long base; /* Base address */ 46 unsigned long base; /* Base address */
@@ -50,12 +50,20 @@ struct mtrr_gentry {
50 50
51#else /* __i386__ */ 51#else /* __i386__ */
52 52
53struct mtrr_sentry {
54 __u64 base; /* Base address */
55 __u32 size; /* Size of region */
56 __u32 type; /* Type of region */
57};
58
53struct mtrr_gentry { 59struct mtrr_gentry {
54 unsigned long base; /* Base address */ 60 __u64 base; /* Base address */
55 unsigned int size; /* Size of region */ 61 __u32 size; /* Size of region */
56 unsigned int regnum; /* Register number */ 62 __u32 regnum; /* Register number */
57 unsigned int type; /* Type of region */ 63 __u32 type; /* Type of region */
64 __u32 _pad; /* Unused */
58}; 65};
66
59#endif /* !__i386__ */ 67#endif /* !__i386__ */
60 68
61struct mtrr_var_range { 69struct mtrr_var_range {
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index bce688d54c12..e21fdd10479f 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -55,7 +55,6 @@ extern unsigned long init_memory_mapping(unsigned long start,
55 unsigned long end); 55 unsigned long end);
56 56
57extern void initmem_init(void); 57extern void initmem_init(void);
58extern void free_initmem(void);
59 58
60#endif /* !__ASSEMBLY__ */ 59#endif /* !__ASSEMBLY__ */
61 60
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a7d2db9a74fb..aa0f91308367 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -10,6 +10,7 @@
10#include <asm/paravirt_types.h> 10#include <asm/paravirt_types.h>
11 11
12#ifndef __ASSEMBLY__ 12#ifndef __ASSEMBLY__
13#include <linux/bug.h>
13#include <linux/types.h> 14#include <linux/types.h>
14#include <linux/cpumask.h> 15#include <linux/cpumask.h>
15 16
@@ -230,9 +231,9 @@ static inline unsigned long long paravirt_sched_clock(void)
230 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); 231 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
231} 232}
232 233
233struct jump_label_key; 234struct static_key;
234extern struct jump_label_key paravirt_steal_enabled; 235extern struct static_key paravirt_steal_enabled;
235extern struct jump_label_key paravirt_steal_rq_enabled; 236extern struct static_key paravirt_steal_rq_enabled;
236 237
237static inline u64 paravirt_steal_clock(int cpu) 238static inline u64 paravirt_steal_clock(int cpu)
238{ 239{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 096c975e099f..2291895b1836 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -23,6 +23,7 @@
23#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) 23#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16)
24#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) 24#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
25#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) 25#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18)
26#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL (1ULL << 19)
26#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) 27#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20)
27#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) 28#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21)
28#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) 29#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
@@ -188,8 +189,6 @@ extern u32 get_ibs_caps(void);
188#ifdef CONFIG_PERF_EVENTS 189#ifdef CONFIG_PERF_EVENTS
189extern void perf_events_lapic_init(void); 190extern void perf_events_lapic_init(void);
190 191
191#define PERF_EVENT_INDEX_OFFSET 0
192
193/* 192/*
194 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. 193 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
195 * This flag is otherwise unused and ABI specified to be 0, so nobody should 194 * This flag is otherwise unused and ABI specified to be 0, so nobody should
@@ -242,4 +241,12 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
242static inline void perf_events_lapic_init(void) { } 241static inline void perf_events_lapic_init(void) { }
243#endif 242#endif
244 243
244#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
245 extern void amd_pmu_enable_virt(void);
246 extern void amd_pmu_disable_virt(void);
247#else
248 static inline void amd_pmu_enable_virt(void) { }
249 static inline void amd_pmu_disable_virt(void) { }
250#endif
251
245#endif /* _ASM_X86_PERF_EVENT_H */ 252#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h
index bb7133dc155d..3427b7798dbc 100644
--- a/arch/x86/include/asm/posix_types.h
+++ b/arch/x86/include/asm/posix_types.h
@@ -7,7 +7,9 @@
7#else 7#else
8# ifdef __i386__ 8# ifdef __i386__
9# include "posix_types_32.h" 9# include "posix_types_32.h"
10# else 10# elif defined(__LP64__)
11# include "posix_types_64.h" 11# include "posix_types_64.h"
12# else
13# include "posix_types_x32.h"
12# endif 14# endif
13#endif 15#endif
diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h
index f7d9adf82e53..99f262e04b91 100644
--- a/arch/x86/include/asm/posix_types_32.h
+++ b/arch/x86/include/asm/posix_types_32.h
@@ -7,79 +7,22 @@
7 * assume GCC is being used. 7 * assume GCC is being used.
8 */ 8 */
9 9
10typedef unsigned long __kernel_ino_t;
11typedef unsigned short __kernel_mode_t; 10typedef unsigned short __kernel_mode_t;
11#define __kernel_mode_t __kernel_mode_t
12
12typedef unsigned short __kernel_nlink_t; 13typedef unsigned short __kernel_nlink_t;
13typedef long __kernel_off_t; 14#define __kernel_nlink_t __kernel_nlink_t
14typedef int __kernel_pid_t; 15
15typedef unsigned short __kernel_ipc_pid_t; 16typedef unsigned short __kernel_ipc_pid_t;
17#define __kernel_ipc_pid_t __kernel_ipc_pid_t
18
16typedef unsigned short __kernel_uid_t; 19typedef unsigned short __kernel_uid_t;
17typedef unsigned short __kernel_gid_t; 20typedef unsigned short __kernel_gid_t;
18typedef unsigned int __kernel_size_t; 21#define __kernel_uid_t __kernel_uid_t
19typedef int __kernel_ssize_t;
20typedef int __kernel_ptrdiff_t;
21typedef long __kernel_time_t;
22typedef long __kernel_suseconds_t;
23typedef long __kernel_clock_t;
24typedef int __kernel_timer_t;
25typedef int __kernel_clockid_t;
26typedef int __kernel_daddr_t;
27typedef char * __kernel_caddr_t;
28typedef unsigned short __kernel_uid16_t;
29typedef unsigned short __kernel_gid16_t;
30typedef unsigned int __kernel_uid32_t;
31typedef unsigned int __kernel_gid32_t;
32 22
33typedef unsigned short __kernel_old_uid_t;
34typedef unsigned short __kernel_old_gid_t;
35typedef unsigned short __kernel_old_dev_t; 23typedef unsigned short __kernel_old_dev_t;
24#define __kernel_old_dev_t __kernel_old_dev_t
36 25
37#ifdef __GNUC__ 26#include <asm-generic/posix_types.h>
38typedef long long __kernel_loff_t;
39#endif
40
41typedef struct {
42 int val[2];
43} __kernel_fsid_t;
44
45#if defined(__KERNEL__)
46
47#undef __FD_SET
48#define __FD_SET(fd,fdsetp) \
49 asm volatile("btsl %1,%0": \
50 "+m" (*(__kernel_fd_set *)(fdsetp)) \
51 : "r" ((int)(fd)))
52
53#undef __FD_CLR
54#define __FD_CLR(fd,fdsetp) \
55 asm volatile("btrl %1,%0": \
56 "+m" (*(__kernel_fd_set *)(fdsetp)) \
57 : "r" ((int) (fd)))
58
59#undef __FD_ISSET
60#define __FD_ISSET(fd,fdsetp) \
61 (__extension__ \
62 ({ \
63 unsigned char __result; \
64 asm volatile("btl %1,%2 ; setb %0" \
65 : "=q" (__result) \
66 : "r" ((int)(fd)), \
67 "m" (*(__kernel_fd_set *)(fdsetp))); \
68 __result; \
69}))
70
71#undef __FD_ZERO
72#define __FD_ZERO(fdsetp) \
73do { \
74 int __d0, __d1; \
75 asm volatile("cld ; rep ; stosl" \
76 : "=m" (*(__kernel_fd_set *)(fdsetp)), \
77 "=&c" (__d0), "=&D" (__d1) \
78 : "a" (0), "1" (__FDSET_LONGS), \
79 "2" ((__kernel_fd_set *)(fdsetp)) \
80 : "memory"); \
81} while (0)
82
83#endif /* defined(__KERNEL__) */
84 27
85#endif /* _ASM_X86_POSIX_TYPES_32_H */ 28#endif /* _ASM_X86_POSIX_TYPES_32_H */
diff --git a/arch/x86/include/asm/posix_types_64.h b/arch/x86/include/asm/posix_types_64.h
index eb8d2d92b63e..cba0c1ead162 100644
--- a/arch/x86/include/asm/posix_types_64.h
+++ b/arch/x86/include/asm/posix_types_64.h
@@ -7,113 +7,13 @@
7 * assume GCC is being used. 7 * assume GCC is being used.
8 */ 8 */
9 9
10typedef unsigned long __kernel_ino_t;
11typedef unsigned int __kernel_mode_t;
12typedef unsigned long __kernel_nlink_t;
13typedef long __kernel_off_t;
14typedef int __kernel_pid_t;
15typedef int __kernel_ipc_pid_t;
16typedef unsigned int __kernel_uid_t;
17typedef unsigned int __kernel_gid_t;
18typedef unsigned long __kernel_size_t;
19typedef long __kernel_ssize_t;
20typedef long __kernel_ptrdiff_t;
21typedef long __kernel_time_t;
22typedef long __kernel_suseconds_t;
23typedef long __kernel_clock_t;
24typedef int __kernel_timer_t;
25typedef int __kernel_clockid_t;
26typedef int __kernel_daddr_t;
27typedef char * __kernel_caddr_t;
28typedef unsigned short __kernel_uid16_t;
29typedef unsigned short __kernel_gid16_t;
30
31#ifdef __GNUC__
32typedef long long __kernel_loff_t;
33#endif
34
35typedef struct {
36 int val[2];
37} __kernel_fsid_t;
38
39typedef unsigned short __kernel_old_uid_t; 10typedef unsigned short __kernel_old_uid_t;
40typedef unsigned short __kernel_old_gid_t; 11typedef unsigned short __kernel_old_gid_t;
41typedef __kernel_uid_t __kernel_uid32_t; 12#define __kernel_old_uid_t __kernel_old_uid_t
42typedef __kernel_gid_t __kernel_gid32_t;
43 13
44typedef unsigned long __kernel_old_dev_t; 14typedef unsigned long __kernel_old_dev_t;
15#define __kernel_old_dev_t __kernel_old_dev_t
45 16
46#ifdef __KERNEL__ 17#include <asm-generic/posix_types.h>
47
48#undef __FD_SET
49static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
50{
51 unsigned long _tmp = fd / __NFDBITS;
52 unsigned long _rem = fd % __NFDBITS;
53 fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
54}
55
56#undef __FD_CLR
57static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
58{
59 unsigned long _tmp = fd / __NFDBITS;
60 unsigned long _rem = fd % __NFDBITS;
61 fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
62}
63
64#undef __FD_ISSET
65static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p)
66{
67 unsigned long _tmp = fd / __NFDBITS;
68 unsigned long _rem = fd % __NFDBITS;
69 return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
70}
71
72/*
73 * This will unroll the loop for the normal constant cases (8 or 32 longs,
74 * for 256 and 1024-bit fd_sets respectively)
75 */
76#undef __FD_ZERO
77static inline void __FD_ZERO(__kernel_fd_set *p)
78{
79 unsigned long *tmp = p->fds_bits;
80 int i;
81
82 if (__builtin_constant_p(__FDSET_LONGS)) {
83 switch (__FDSET_LONGS) {
84 case 32:
85 tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
86 tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
87 tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
88 tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
89 tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0;
90 tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0;
91 tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0;
92 tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0;
93 return;
94 case 16:
95 tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
96 tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
97 tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
98 tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
99 return;
100 case 8:
101 tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
102 tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
103 return;
104 case 4:
105 tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
106 return;
107 }
108 }
109 i = __FDSET_LONGS;
110 while (i) {
111 i--;
112 *tmp = 0;
113 tmp++;
114 }
115}
116
117#endif /* defined(__KERNEL__) */
118 18
119#endif /* _ASM_X86_POSIX_TYPES_64_H */ 19#endif /* _ASM_X86_POSIX_TYPES_64_H */
diff --git a/arch/x86/include/asm/posix_types_x32.h b/arch/x86/include/asm/posix_types_x32.h
new file mode 100644
index 000000000000..85f9bdafa93c
--- /dev/null
+++ b/arch/x86/include/asm/posix_types_x32.h
@@ -0,0 +1,19 @@
1#ifndef _ASM_X86_POSIX_TYPES_X32_H
2#define _ASM_X86_POSIX_TYPES_X32_H
3
4/*
5 * This file is only used by user-level software, so you need to
6 * be a little careful about namespace pollution etc. Also, we cannot
7 * assume GCC is being used.
8 *
9 * These types should generally match the ones used by the 64-bit kernel,
10 *
11 */
12
13typedef long long __kernel_long_t;
14typedef unsigned long long __kernel_ulong_t;
15#define __kernel_long_t __kernel_long_t
16
17#include <asm/posix_types_64.h>
18
19#endif /* _ASM_X86_POSIX_TYPES_X32_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index aa9088c26931..4fa7dcceb6c0 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -14,13 +14,13 @@ struct mm_struct;
14#include <asm/sigcontext.h> 14#include <asm/sigcontext.h>
15#include <asm/current.h> 15#include <asm/current.h>
16#include <asm/cpufeature.h> 16#include <asm/cpufeature.h>
17#include <asm/system.h>
18#include <asm/page.h> 17#include <asm/page.h>
19#include <asm/pgtable_types.h> 18#include <asm/pgtable_types.h>
20#include <asm/percpu.h> 19#include <asm/percpu.h>
21#include <asm/msr.h> 20#include <asm/msr.h>
22#include <asm/desc_defs.h> 21#include <asm/desc_defs.h>
23#include <asm/nops.h> 22#include <asm/nops.h>
23#include <asm/special_insns.h>
24 24
25#include <linux/personality.h> 25#include <linux/personality.h>
26#include <linux/cpumask.h> 26#include <linux/cpumask.h>
@@ -29,6 +29,15 @@ struct mm_struct;
29#include <linux/math64.h> 29#include <linux/math64.h>
30#include <linux/init.h> 30#include <linux/init.h>
31#include <linux/err.h> 31#include <linux/err.h>
32#include <linux/irqflags.h>
33
34/*
35 * We handle most unaligned accesses in hardware. On the other hand
36 * unaligned DMA can be quite expensive on some Nehalem processors.
37 *
38 * Based on this we disable the IP header alignment in network drivers.
39 */
40#define NET_IP_ALIGN 0
32 41
33#define HBP_NUM 4 42#define HBP_NUM 4
34/* 43/*
@@ -162,6 +171,7 @@ extern void early_cpu_init(void);
162extern void identify_boot_cpu(void); 171extern void identify_boot_cpu(void);
163extern void identify_secondary_cpu(struct cpuinfo_x86 *); 172extern void identify_secondary_cpu(struct cpuinfo_x86 *);
164extern void print_cpu_info(struct cpuinfo_x86 *); 173extern void print_cpu_info(struct cpuinfo_x86 *);
174void print_cpu_msr(struct cpuinfo_x86 *);
165extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); 175extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
166extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 176extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
167extern unsigned short num_cache_leaves; 177extern unsigned short num_cache_leaves;
@@ -374,6 +384,8 @@ union thread_xstate {
374}; 384};
375 385
376struct fpu { 386struct fpu {
387 unsigned int last_cpu;
388 unsigned int has_fpu;
377 union thread_xstate *state; 389 union thread_xstate *state;
378}; 390};
379 391
@@ -451,7 +463,7 @@ struct thread_struct {
451 unsigned long ptrace_dr7; 463 unsigned long ptrace_dr7;
452 /* Fault info: */ 464 /* Fault info: */
453 unsigned long cr2; 465 unsigned long cr2;
454 unsigned long trap_no; 466 unsigned long trap_nr;
455 unsigned long error_code; 467 unsigned long error_code;
456 /* floating point and extended processor state */ 468 /* floating point and extended processor state */
457 struct fpu fpu; 469 struct fpu fpu;
@@ -472,61 +484,6 @@ struct thread_struct {
472 unsigned io_bitmap_max; 484 unsigned io_bitmap_max;
473}; 485};
474 486
475static inline unsigned long native_get_debugreg(int regno)
476{
477 unsigned long val = 0; /* Damn you, gcc! */
478
479 switch (regno) {
480 case 0:
481 asm("mov %%db0, %0" :"=r" (val));
482 break;
483 case 1:
484 asm("mov %%db1, %0" :"=r" (val));
485 break;
486 case 2:
487 asm("mov %%db2, %0" :"=r" (val));
488 break;
489 case 3:
490 asm("mov %%db3, %0" :"=r" (val));
491 break;
492 case 6:
493 asm("mov %%db6, %0" :"=r" (val));
494 break;
495 case 7:
496 asm("mov %%db7, %0" :"=r" (val));
497 break;
498 default:
499 BUG();
500 }
501 return val;
502}
503
504static inline void native_set_debugreg(int regno, unsigned long value)
505{
506 switch (regno) {
507 case 0:
508 asm("mov %0, %%db0" ::"r" (value));
509 break;
510 case 1:
511 asm("mov %0, %%db1" ::"r" (value));
512 break;
513 case 2:
514 asm("mov %0, %%db2" ::"r" (value));
515 break;
516 case 3:
517 asm("mov %0, %%db3" ::"r" (value));
518 break;
519 case 6:
520 asm("mov %0, %%db6" ::"r" (value));
521 break;
522 case 7:
523 asm("mov %0, %%db7" ::"r" (value));
524 break;
525 default:
526 BUG();
527 }
528}
529
530/* 487/*
531 * Set IOPL bits in EFLAGS from given mask 488 * Set IOPL bits in EFLAGS from given mask
532 */ 489 */
@@ -572,14 +529,6 @@ static inline void native_swapgs(void)
572#define __cpuid native_cpuid 529#define __cpuid native_cpuid
573#define paravirt_enabled() 0 530#define paravirt_enabled() 0
574 531
575/*
576 * These special macros can be used to get or set a debugging register
577 */
578#define get_debugreg(var, register) \
579 (var) = native_get_debugreg(register)
580#define set_debugreg(value, register) \
581 native_set_debugreg(register, value)
582
583static inline void load_sp0(struct tss_struct *tss, 532static inline void load_sp0(struct tss_struct *tss,
584 struct thread_struct *thread) 533 struct thread_struct *thread)
585{ 534{
@@ -924,9 +873,9 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
924#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ 873#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
925 0xc0000000 : 0xFFFFe000) 874 0xc0000000 : 0xFFFFe000)
926 875
927#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ 876#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
928 IA32_PAGE_OFFSET : TASK_SIZE_MAX) 877 IA32_PAGE_OFFSET : TASK_SIZE_MAX)
929#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ 878#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
930 IA32_PAGE_OFFSET : TASK_SIZE_MAX) 879 IA32_PAGE_OFFSET : TASK_SIZE_MAX)
931 880
932#define STACK_TOP TASK_SIZE 881#define STACK_TOP TASK_SIZE
@@ -948,6 +897,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
948 897
949#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) 898#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
950extern unsigned long KSTK_ESP(struct task_struct *task); 899extern unsigned long KSTK_ESP(struct task_struct *task);
900
901/*
902 * User space RSP while inside the SYSCALL fast path
903 */
904DECLARE_PER_CPU(unsigned long, old_rsp);
905
951#endif /* CONFIG_X86_64 */ 906#endif /* CONFIG_X86_64 */
952 907
953extern void start_thread(struct pt_regs *regs, unsigned long new_ip, 908extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
@@ -1019,4 +974,14 @@ extern bool cpu_has_amd_erratum(const int *);
1019#define cpu_has_amd_erratum(x) (false) 974#define cpu_has_amd_erratum(x) (false)
1020#endif /* CONFIG_CPU_SUP_AMD */ 975#endif /* CONFIG_CPU_SUP_AMD */
1021 976
977void cpu_idle_wait(void);
978
979extern unsigned long arch_align_stack(unsigned long sp);
980extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
981
982void default_idle(void);
983bool set_pm_idle_to_default(void);
984
985void stop_this_cpu(void *dummy);
986
1022#endif /* _ASM_X86_PROCESSOR_H */ 987#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 644dd885f05a..60bef663609a 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -21,7 +21,6 @@
21#include <asm/irq.h> 21#include <asm/irq.h>
22#include <linux/atomic.h> 22#include <linux/atomic.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24#include <asm/irq_controller.h>
25 24
26#ifdef CONFIG_OF 25#ifdef CONFIG_OF
27extern int of_ioapic; 26extern int of_ioapic;
@@ -43,15 +42,6 @@ extern char cmd_line[COMMAND_LINE_SIZE];
43#define pci_address_to_pio pci_address_to_pio 42#define pci_address_to_pio pci_address_to_pio
44unsigned long pci_address_to_pio(phys_addr_t addr); 43unsigned long pci_address_to_pio(phys_addr_t addr);
45 44
46/**
47 * irq_dispose_mapping - Unmap an interrupt
48 * @virq: linux virq number of the interrupt to unmap
49 *
50 * FIXME: We really should implement proper virq handling like power,
51 * but that's going to be major surgery.
52 */
53static inline void irq_dispose_mapping(unsigned int virq) { }
54
55#define HAVE_ARCH_DEVTREE_FIXUPS 45#define HAVE_ARCH_DEVTREE_FIXUPS
56 46
57#endif /* __ASSEMBLY__ */ 47#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 35664547125b..dcfde52979c3 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -145,7 +145,6 @@ extern unsigned long
145convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); 145convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
146extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, 146extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
147 int error_code, int si_code); 147 int error_code, int si_code);
148void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
149 148
150extern long syscall_trace_enter(struct pt_regs *); 149extern long syscall_trace_enter(struct pt_regs *);
151extern void syscall_trace_leave(struct pt_regs *); 150extern void syscall_trace_leave(struct pt_regs *);
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 5e641715c3fe..165466233ab0 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -212,7 +212,61 @@
212#ifdef __KERNEL__ 212#ifdef __KERNEL__
213#ifndef __ASSEMBLY__ 213#ifndef __ASSEMBLY__
214extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10]; 214extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10];
215#endif 215
216#endif 216/*
217 * Load a segment. Fall back on loading the zero
218 * segment if something goes wrong..
219 */
220#define loadsegment(seg, value) \
221do { \
222 unsigned short __val = (value); \
223 \
224 asm volatile(" \n" \
225 "1: movl %k0,%%" #seg " \n" \
226 \
227 ".section .fixup,\"ax\" \n" \
228 "2: xorl %k0,%k0 \n" \
229 " jmp 1b \n" \
230 ".previous \n" \
231 \
232 _ASM_EXTABLE(1b, 2b) \
233 \
234 : "+r" (__val) : : "memory"); \
235} while (0)
236
237/*
238 * Save a segment register away
239 */
240#define savesegment(seg, value) \
241 asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
242
243/*
244 * x86_32 user gs accessors.
245 */
246#ifdef CONFIG_X86_32
247#ifdef CONFIG_X86_32_LAZY_GS
248#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
249#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
250#define task_user_gs(tsk) ((tsk)->thread.gs)
251#define lazy_save_gs(v) savesegment(gs, (v))
252#define lazy_load_gs(v) loadsegment(gs, (v))
253#else /* X86_32_LAZY_GS */
254#define get_user_gs(regs) (u16)((regs)->gs)
255#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
256#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
257#define lazy_save_gs(v) do { } while (0)
258#define lazy_load_gs(v) do { } while (0)
259#endif /* X86_32_LAZY_GS */
260#endif /* X86_32 */
261
262static inline unsigned long get_limit(unsigned long segment)
263{
264 unsigned long __limit;
265 asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
266 return __limit + 1;
267}
268
269#endif /* !__ASSEMBLY__ */
270#endif /* __KERNEL__ */
217 271
218#endif /* _ASM_X86_SEGMENT_H */ 272#endif /* _ASM_X86_SEGMENT_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 9756551ec760..d0f19f9fb846 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -47,7 +47,7 @@ extern void reserve_standard_io_resources(void);
47extern void i386_reserve_resources(void); 47extern void i386_reserve_resources(void);
48extern void setup_default_timer_irq(void); 48extern void setup_default_timer_irq(void);
49 49
50#ifdef CONFIG_X86_MRST 50#ifdef CONFIG_X86_INTEL_MID
51extern void x86_mrst_early_setup(void); 51extern void x86_mrst_early_setup(void);
52#else 52#else
53static inline void x86_mrst_early_setup(void) { } 53static inline void x86_mrst_early_setup(void) { }
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 04459d25e66e..4a085383af27 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -230,34 +230,37 @@ struct sigcontext {
230 * User-space might still rely on the old definition: 230 * User-space might still rely on the old definition:
231 */ 231 */
232struct sigcontext { 232struct sigcontext {
233 unsigned long r8; 233 __u64 r8;
234 unsigned long r9; 234 __u64 r9;
235 unsigned long r10; 235 __u64 r10;
236 unsigned long r11; 236 __u64 r11;
237 unsigned long r12; 237 __u64 r12;
238 unsigned long r13; 238 __u64 r13;
239 unsigned long r14; 239 __u64 r14;
240 unsigned long r15; 240 __u64 r15;
241 unsigned long rdi; 241 __u64 rdi;
242 unsigned long rsi; 242 __u64 rsi;
243 unsigned long rbp; 243 __u64 rbp;
244 unsigned long rbx; 244 __u64 rbx;
245 unsigned long rdx; 245 __u64 rdx;
246 unsigned long rax; 246 __u64 rax;
247 unsigned long rcx; 247 __u64 rcx;
248 unsigned long rsp; 248 __u64 rsp;
249 unsigned long rip; 249 __u64 rip;
250 unsigned long eflags; /* RFLAGS */ 250 __u64 eflags; /* RFLAGS */
251 unsigned short cs; 251 __u16 cs;
252 unsigned short gs; 252 __u16 gs;
253 unsigned short fs; 253 __u16 fs;
254 unsigned short __pad0; 254 __u16 __pad0;
255 unsigned long err; 255 __u64 err;
256 unsigned long trapno; 256 __u64 trapno;
257 unsigned long oldmask; 257 __u64 oldmask;
258 unsigned long cr2; 258 __u64 cr2;
259 struct _fpstate __user *fpstate; /* zero when no FPU context */ 259 struct _fpstate __user *fpstate; /* zero when no FPU context */
260 unsigned long reserved1[8]; 260#ifndef __LP64__
261 __u32 __fpstate_pad;
262#endif
263 __u64 reserved1[8];
261}; 264};
262#endif /* !__KERNEL__ */ 265#endif /* !__KERNEL__ */
263 266
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index 4e0fe26d27d3..7c7c27c97daa 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -59,12 +59,25 @@ struct rt_sigframe_ia32 {
59#endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */ 59#endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */
60 60
61#ifdef CONFIG_X86_64 61#ifdef CONFIG_X86_64
62
62struct rt_sigframe { 63struct rt_sigframe {
63 char __user *pretcode; 64 char __user *pretcode;
64 struct ucontext uc; 65 struct ucontext uc;
65 struct siginfo info; 66 struct siginfo info;
66 /* fp state follows here */ 67 /* fp state follows here */
67}; 68};
69
70#ifdef CONFIG_X86_X32_ABI
71
72struct rt_sigframe_x32 {
73 u64 pretcode;
74 struct ucontext_x32 uc;
75 compat_siginfo_t info;
76 /* fp state follows here */
77};
78
79#endif /* CONFIG_X86_X32_ABI */
80
68#endif /* CONFIG_X86_64 */ 81#endif /* CONFIG_X86_64 */
69 82
70#endif /* _ASM_X86_SIGFRAME_H */ 83#endif /* _ASM_X86_SIGFRAME_H */
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
new file mode 100644
index 000000000000..ada93b3b8c66
--- /dev/null
+++ b/arch/x86/include/asm/sighandling.h
@@ -0,0 +1,24 @@
1#ifndef _ASM_X86_SIGHANDLING_H
2#define _ASM_X86_SIGHANDLING_H
3
4#include <linux/compiler.h>
5#include <linux/ptrace.h>
6#include <linux/signal.h>
7
8#include <asm/processor-flags.h>
9
10#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
11
12#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
13 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
14 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
15 X86_EFLAGS_CF)
16
17void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
18
19int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
20 unsigned long *pax);
21int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
22 struct pt_regs *regs, unsigned long mask);
23
24#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 73b11bc0ae6f..0434c400287c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -225,5 +225,11 @@ extern int hard_smp_processor_id(void);
225 225
226#endif /* CONFIG_X86_LOCAL_APIC */ 226#endif /* CONFIG_X86_LOCAL_APIC */
227 227
228#ifdef CONFIG_DEBUG_NMI_SELFTEST
229extern void nmi_selftest(void);
230#else
231#define nmi_selftest() do { } while (0)
232#endif
233
228#endif /* __ASSEMBLY__ */ 234#endif /* __ASSEMBLY__ */
229#endif /* _ASM_X86_SMP_H */ 235#endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
new file mode 100644
index 000000000000..41fc93a2e225
--- /dev/null
+++ b/arch/x86/include/asm/special_insns.h
@@ -0,0 +1,199 @@
1#ifndef _ASM_X86_SPECIAL_INSNS_H
2#define _ASM_X86_SPECIAL_INSNS_H
3
4
5#ifdef __KERNEL__
6
7static inline void native_clts(void)
8{
9 asm volatile("clts");
10}
11
12/*
13 * Volatile isn't enough to prevent the compiler from reordering the
14 * read/write functions for the control registers and messing everything up.
15 * A memory clobber would solve the problem, but would prevent reordering of
16 * all loads stores around it, which can hurt performance. Solution is to
17 * use a variable and mimic reads and writes to it to enforce serialization
18 */
19static unsigned long __force_order;
20
21static inline unsigned long native_read_cr0(void)
22{
23 unsigned long val;
24 asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
25 return val;
26}
27
28static inline void native_write_cr0(unsigned long val)
29{
30 asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
31}
32
33static inline unsigned long native_read_cr2(void)
34{
35 unsigned long val;
36 asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
37 return val;
38}
39
40static inline void native_write_cr2(unsigned long val)
41{
42 asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
43}
44
45static inline unsigned long native_read_cr3(void)
46{
47 unsigned long val;
48 asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
49 return val;
50}
51
52static inline void native_write_cr3(unsigned long val)
53{
54 asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
55}
56
57static inline unsigned long native_read_cr4(void)
58{
59 unsigned long val;
60 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
61 return val;
62}
63
64static inline unsigned long native_read_cr4_safe(void)
65{
66 unsigned long val;
67 /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
68 * exists, so it will never fail. */
69#ifdef CONFIG_X86_32
70 asm volatile("1: mov %%cr4, %0\n"
71 "2:\n"
72 _ASM_EXTABLE(1b, 2b)
73 : "=r" (val), "=m" (__force_order) : "0" (0));
74#else
75 val = native_read_cr4();
76#endif
77 return val;
78}
79
80static inline void native_write_cr4(unsigned long val)
81{
82 asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
83}
84
85#ifdef CONFIG_X86_64
86static inline unsigned long native_read_cr8(void)
87{
88 unsigned long cr8;
89 asm volatile("movq %%cr8,%0" : "=r" (cr8));
90 return cr8;
91}
92
93static inline void native_write_cr8(unsigned long val)
94{
95 asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
96}
97#endif
98
99static inline void native_wbinvd(void)
100{
101 asm volatile("wbinvd": : :"memory");
102}
103
104extern void native_load_gs_index(unsigned);
105
106#ifdef CONFIG_PARAVIRT
107#include <asm/paravirt.h>
108#else
109
110static inline unsigned long read_cr0(void)
111{
112 return native_read_cr0();
113}
114
115static inline void write_cr0(unsigned long x)
116{
117 native_write_cr0(x);
118}
119
120static inline unsigned long read_cr2(void)
121{
122 return native_read_cr2();
123}
124
125static inline void write_cr2(unsigned long x)
126{
127 native_write_cr2(x);
128}
129
130static inline unsigned long read_cr3(void)
131{
132 return native_read_cr3();
133}
134
135static inline void write_cr3(unsigned long x)
136{
137 native_write_cr3(x);
138}
139
140static inline unsigned long read_cr4(void)
141{
142 return native_read_cr4();
143}
144
145static inline unsigned long read_cr4_safe(void)
146{
147 return native_read_cr4_safe();
148}
149
150static inline void write_cr4(unsigned long x)
151{
152 native_write_cr4(x);
153}
154
155static inline void wbinvd(void)
156{
157 native_wbinvd();
158}
159
160#ifdef CONFIG_X86_64
161
162static inline unsigned long read_cr8(void)
163{
164 return native_read_cr8();
165}
166
167static inline void write_cr8(unsigned long x)
168{
169 native_write_cr8(x);
170}
171
172static inline void load_gs_index(unsigned selector)
173{
174 native_load_gs_index(selector);
175}
176
177#endif
178
179/* Clear the 'TS' bit */
180static inline void clts(void)
181{
182 native_clts();
183}
184
185#endif/* CONFIG_PARAVIRT */
186
187#define stts() write_cr0(read_cr0() | X86_CR0_TS)
188
189static inline void clflush(volatile void *__p)
190{
191 asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
192}
193
194#define nop() asm volatile ("nop")
195
196
197#endif /* __KERNEL__ */
198
199#endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index a82c2bf504b6..76bfa2cf301d 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
88{ 88{
89 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 89 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
90 90
91 return !!(tmp.tail ^ tmp.head); 91 return tmp.tail != tmp.head;
92} 92}
93 93
94static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) 94static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
95{ 95{
96 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 96 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
97 97
98 return ((tmp.tail - tmp.head) & TICKET_MASK) > 1; 98 return (__ticket_t)(tmp.tail - tmp.head) > 1;
99} 99}
100 100
101#ifndef CONFIG_PARAVIRT_SPINLOCKS 101#ifndef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 8ebd5df7451e..ad0ad07fc006 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -16,7 +16,6 @@ typedef u32 __ticketpair_t;
16#endif 16#endif
17 17
18#define TICKET_SHIFT (sizeof(__ticket_t) * 8) 18#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
19#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))
20 19
21typedef struct arch_spinlock { 20typedef struct arch_spinlock {
22 union { 21 union {
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 157517763565..b5d9533d2c38 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -38,7 +38,6 @@
38#include <asm/tsc.h> 38#include <asm/tsc.h>
39#include <asm/processor.h> 39#include <asm/processor.h>
40#include <asm/percpu.h> 40#include <asm/percpu.h>
41#include <asm/system.h>
42#include <asm/desc.h> 41#include <asm/desc.h>
43#include <linux/random.h> 42#include <linux/random.h>
44 43
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
new file mode 100644
index 000000000000..4ec45b3abba1
--- /dev/null
+++ b/arch/x86/include/asm/switch_to.h
@@ -0,0 +1,129 @@
1#ifndef _ASM_X86_SWITCH_TO_H
2#define _ASM_X86_SWITCH_TO_H
3
4struct task_struct; /* one of the stranger aspects of C forward declarations */
5struct task_struct *__switch_to(struct task_struct *prev,
6 struct task_struct *next);
7struct tss_struct;
8void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
9 struct tss_struct *tss);
10
11#ifdef CONFIG_X86_32
12
13#ifdef CONFIG_CC_STACKPROTECTOR
14#define __switch_canary \
15 "movl %P[task_canary](%[next]), %%ebx\n\t" \
16 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
17#define __switch_canary_oparam \
18 , [stack_canary] "=m" (stack_canary.canary)
19#define __switch_canary_iparam \
20 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
21#else /* CC_STACKPROTECTOR */
22#define __switch_canary
23#define __switch_canary_oparam
24#define __switch_canary_iparam
25#endif /* CC_STACKPROTECTOR */
26
27/*
28 * Saving eflags is important. It switches not only IOPL between tasks,
29 * it also protects other tasks from NT leaking through sysenter etc.
30 */
31#define switch_to(prev, next, last) \
32do { \
33 /* \
34 * Context-switching clobbers all registers, so we clobber \
35 * them explicitly, via unused output variables. \
36 * (EAX and EBP is not listed because EBP is saved/restored \
37 * explicitly for wchan access and EAX is the return value of \
38 * __switch_to()) \
39 */ \
40 unsigned long ebx, ecx, edx, esi, edi; \
41 \
42 asm volatile("pushfl\n\t" /* save flags */ \
43 "pushl %%ebp\n\t" /* save EBP */ \
44 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
45 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
46 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
47 "pushl %[next_ip]\n\t" /* restore EIP */ \
48 __switch_canary \
49 "jmp __switch_to\n" /* regparm call */ \
50 "1:\t" \
51 "popl %%ebp\n\t" /* restore EBP */ \
52 "popfl\n" /* restore flags */ \
53 \
54 /* output parameters */ \
55 : [prev_sp] "=m" (prev->thread.sp), \
56 [prev_ip] "=m" (prev->thread.ip), \
57 "=a" (last), \
58 \
59 /* clobbered output registers: */ \
60 "=b" (ebx), "=c" (ecx), "=d" (edx), \
61 "=S" (esi), "=D" (edi) \
62 \
63 __switch_canary_oparam \
64 \
65 /* input parameters: */ \
66 : [next_sp] "m" (next->thread.sp), \
67 [next_ip] "m" (next->thread.ip), \
68 \
69 /* regparm parameters for __switch_to(): */ \
70 [prev] "a" (prev), \
71 [next] "d" (next) \
72 \
73 __switch_canary_iparam \
74 \
75 : /* reloaded segment registers */ \
76 "memory"); \
77} while (0)
78
79#else /* CONFIG_X86_32 */
80
81/* frame pointer must be last for get_wchan */
82#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
83#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
84
85#define __EXTRA_CLOBBER \
86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
87 "r12", "r13", "r14", "r15"
88
89#ifdef CONFIG_CC_STACKPROTECTOR
90#define __switch_canary \
91 "movq %P[task_canary](%%rsi),%%r8\n\t" \
92 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
93#define __switch_canary_oparam \
94 , [gs_canary] "=m" (irq_stack_union.stack_canary)
95#define __switch_canary_iparam \
96 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
97#else /* CC_STACKPROTECTOR */
98#define __switch_canary
99#define __switch_canary_oparam
100#define __switch_canary_iparam
101#endif /* CC_STACKPROTECTOR */
102
103/* Save restore flags to clear handle leaking NT */
104#define switch_to(prev, next, last) \
105 asm volatile(SAVE_CONTEXT \
106 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
107 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
108 "call __switch_to\n\t" \
109 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
110 __switch_canary \
111 "movq %P[thread_info](%%rsi),%%r8\n\t" \
112 "movq %%rax,%%rdi\n\t" \
113 "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
114 "jnz ret_from_fork\n\t" \
115 RESTORE_CONTEXT \
116 : "=a" (last) \
117 __switch_canary_oparam \
118 : [next] "S" (next), [prev] "D" (prev), \
119 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
120 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
121 [_tif_fork] "i" (_TIF_FORK), \
122 [thread_info] "i" (offsetof(struct task_struct, stack)), \
123 [current_task] "m" (current_task) \
124 __switch_canary_iparam \
125 : "memory", "cc" __EXTRA_CLOBBER)
126
127#endif /* CONFIG_X86_32 */
128
129#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index cb238526a9f1..3fda9db48819 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -10,6 +10,8 @@
10#ifndef _ASM_X86_SYS_IA32_H 10#ifndef _ASM_X86_SYS_IA32_H
11#define _ASM_X86_SYS_IA32_H 11#define _ASM_X86_SYS_IA32_H
12 12
13#ifdef CONFIG_COMPAT
14
13#include <linux/compiler.h> 15#include <linux/compiler.h>
14#include <linux/linkage.h> 16#include <linux/linkage.h>
15#include <linux/types.h> 17#include <linux/types.h>
@@ -36,8 +38,6 @@ asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
36 struct sigaction32 __user *, unsigned int); 38 struct sigaction32 __user *, unsigned int);
37asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, 39asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
38 struct old_sigaction32 __user *); 40 struct old_sigaction32 __user *);
39asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
40 compat_sigset_t __user *, unsigned int);
41asmlinkage long sys32_alarm(unsigned int); 41asmlinkage long sys32_alarm(unsigned int);
42 42
43asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); 43asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
@@ -83,4 +83,7 @@ asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
83 83
84asmlinkage long sys32_fanotify_mark(int, unsigned int, u32, u32, int, 84asmlinkage long sys32_fanotify_mark(int, unsigned int, u32, u32, int,
85 const char __user *); 85 const char __user *);
86
87#endif /* CONFIG_COMPAT */
88
86#endif /* _ASM_X86_SYS_IA32_H */ 89#endif /* _ASM_X86_SYS_IA32_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index c4a348f7bd43..386b78686c4d 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -15,6 +15,8 @@
15 15
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/err.h> 17#include <linux/err.h>
18#include <asm/asm-offsets.h> /* For NR_syscalls */
19#include <asm/unistd.h>
18 20
19extern const unsigned long sys_call_table[]; 21extern const unsigned long sys_call_table[];
20 22
@@ -25,13 +27,13 @@ extern const unsigned long sys_call_table[];
25 */ 27 */
26static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) 28static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
27{ 29{
28 return regs->orig_ax; 30 return regs->orig_ax & __SYSCALL_MASK;
29} 31}
30 32
31static inline void syscall_rollback(struct task_struct *task, 33static inline void syscall_rollback(struct task_struct *task,
32 struct pt_regs *regs) 34 struct pt_regs *regs)
33{ 35{
34 regs->ax = regs->orig_ax; 36 regs->ax = regs->orig_ax & __SYSCALL_MASK;
35} 37}
36 38
37static inline long syscall_get_error(struct task_struct *task, 39static inline long syscall_get_error(struct task_struct *task,
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
deleted file mode 100644
index 2d2f01ce6dcb..000000000000
--- a/arch/x86/include/asm/system.h
+++ /dev/null
@@ -1,523 +0,0 @@
1#ifndef _ASM_X86_SYSTEM_H
2#define _ASM_X86_SYSTEM_H
3
4#include <asm/asm.h>
5#include <asm/segment.h>
6#include <asm/cpufeature.h>
7#include <asm/cmpxchg.h>
8#include <asm/nops.h>
9
10#include <linux/kernel.h>
11#include <linux/irqflags.h>
12
13/* entries in ARCH_DLINFO: */
14#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
15# define AT_VECTOR_SIZE_ARCH 2
16#else /* else it's non-compat x86-64 */
17# define AT_VECTOR_SIZE_ARCH 1
18#endif
19
20struct task_struct; /* one of the stranger aspects of C forward declarations */
21struct task_struct *__switch_to(struct task_struct *prev,
22 struct task_struct *next);
23struct tss_struct;
24void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
25 struct tss_struct *tss);
26extern void show_regs_common(void);
27
28#ifdef CONFIG_X86_32
29
30#ifdef CONFIG_CC_STACKPROTECTOR
31#define __switch_canary \
32 "movl %P[task_canary](%[next]), %%ebx\n\t" \
33 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
34#define __switch_canary_oparam \
35 , [stack_canary] "=m" (stack_canary.canary)
36#define __switch_canary_iparam \
37 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
38#else /* CC_STACKPROTECTOR */
39#define __switch_canary
40#define __switch_canary_oparam
41#define __switch_canary_iparam
42#endif /* CC_STACKPROTECTOR */
43
44/*
45 * Saving eflags is important. It switches not only IOPL between tasks,
46 * it also protects other tasks from NT leaking through sysenter etc.
47 */
48#define switch_to(prev, next, last) \
49do { \
50 /* \
51 * Context-switching clobbers all registers, so we clobber \
52 * them explicitly, via unused output variables. \
53 * (EAX and EBP is not listed because EBP is saved/restored \
54 * explicitly for wchan access and EAX is the return value of \
55 * __switch_to()) \
56 */ \
57 unsigned long ebx, ecx, edx, esi, edi; \
58 \
59 asm volatile("pushfl\n\t" /* save flags */ \
60 "pushl %%ebp\n\t" /* save EBP */ \
61 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
62 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
63 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
64 "pushl %[next_ip]\n\t" /* restore EIP */ \
65 __switch_canary \
66 "jmp __switch_to\n" /* regparm call */ \
67 "1:\t" \
68 "popl %%ebp\n\t" /* restore EBP */ \
69 "popfl\n" /* restore flags */ \
70 \
71 /* output parameters */ \
72 : [prev_sp] "=m" (prev->thread.sp), \
73 [prev_ip] "=m" (prev->thread.ip), \
74 "=a" (last), \
75 \
76 /* clobbered output registers: */ \
77 "=b" (ebx), "=c" (ecx), "=d" (edx), \
78 "=S" (esi), "=D" (edi) \
79 \
80 __switch_canary_oparam \
81 \
82 /* input parameters: */ \
83 : [next_sp] "m" (next->thread.sp), \
84 [next_ip] "m" (next->thread.ip), \
85 \
86 /* regparm parameters for __switch_to(): */ \
87 [prev] "a" (prev), \
88 [next] "d" (next) \
89 \
90 __switch_canary_iparam \
91 \
92 : /* reloaded segment registers */ \
93 "memory"); \
94} while (0)
95
96/*
97 * disable hlt during certain critical i/o operations
98 */
99#define HAVE_DISABLE_HLT
100#else
101
102/* frame pointer must be last for get_wchan */
103#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
104#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
105
106#define __EXTRA_CLOBBER \
107 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
108 "r12", "r13", "r14", "r15"
109
110#ifdef CONFIG_CC_STACKPROTECTOR
111#define __switch_canary \
112 "movq %P[task_canary](%%rsi),%%r8\n\t" \
113 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
114#define __switch_canary_oparam \
115 , [gs_canary] "=m" (irq_stack_union.stack_canary)
116#define __switch_canary_iparam \
117 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
118#else /* CC_STACKPROTECTOR */
119#define __switch_canary
120#define __switch_canary_oparam
121#define __switch_canary_iparam
122#endif /* CC_STACKPROTECTOR */
123
124/* Save restore flags to clear handle leaking NT */
125#define switch_to(prev, next, last) \
126 asm volatile(SAVE_CONTEXT \
127 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
128 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
129 "call __switch_to\n\t" \
130 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
131 __switch_canary \
132 "movq %P[thread_info](%%rsi),%%r8\n\t" \
133 "movq %%rax,%%rdi\n\t" \
134 "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
135 "jnz ret_from_fork\n\t" \
136 RESTORE_CONTEXT \
137 : "=a" (last) \
138 __switch_canary_oparam \
139 : [next] "S" (next), [prev] "D" (prev), \
140 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
141 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
142 [_tif_fork] "i" (_TIF_FORK), \
143 [thread_info] "i" (offsetof(struct task_struct, stack)), \
144 [current_task] "m" (current_task) \
145 __switch_canary_iparam \
146 : "memory", "cc" __EXTRA_CLOBBER)
147#endif
148
149#ifdef __KERNEL__
150
151extern void native_load_gs_index(unsigned);
152
153/*
154 * Load a segment. Fall back on loading the zero
155 * segment if something goes wrong..
156 */
157#define loadsegment(seg, value) \
158do { \
159 unsigned short __val = (value); \
160 \
161 asm volatile(" \n" \
162 "1: movl %k0,%%" #seg " \n" \
163 \
164 ".section .fixup,\"ax\" \n" \
165 "2: xorl %k0,%k0 \n" \
166 " jmp 1b \n" \
167 ".previous \n" \
168 \
169 _ASM_EXTABLE(1b, 2b) \
170 \
171 : "+r" (__val) : : "memory"); \
172} while (0)
173
174/*
175 * Save a segment register away
176 */
177#define savesegment(seg, value) \
178 asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
179
180/*
181 * x86_32 user gs accessors.
182 */
183#ifdef CONFIG_X86_32
184#ifdef CONFIG_X86_32_LAZY_GS
185#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
186#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
187#define task_user_gs(tsk) ((tsk)->thread.gs)
188#define lazy_save_gs(v) savesegment(gs, (v))
189#define lazy_load_gs(v) loadsegment(gs, (v))
190#else /* X86_32_LAZY_GS */
191#define get_user_gs(regs) (u16)((regs)->gs)
192#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
193#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
194#define lazy_save_gs(v) do { } while (0)
195#define lazy_load_gs(v) do { } while (0)
196#endif /* X86_32_LAZY_GS */
197#endif /* X86_32 */
198
199static inline unsigned long get_limit(unsigned long segment)
200{
201 unsigned long __limit;
202 asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
203 return __limit + 1;
204}
205
206static inline void native_clts(void)
207{
208 asm volatile("clts");
209}
210
211/*
212 * Volatile isn't enough to prevent the compiler from reordering the
213 * read/write functions for the control registers and messing everything up.
214 * A memory clobber would solve the problem, but would prevent reordering of
215 * all loads stores around it, which can hurt performance. Solution is to
216 * use a variable and mimic reads and writes to it to enforce serialization
217 */
218static unsigned long __force_order;
219
220static inline unsigned long native_read_cr0(void)
221{
222 unsigned long val;
223 asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
224 return val;
225}
226
227static inline void native_write_cr0(unsigned long val)
228{
229 asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
230}
231
232static inline unsigned long native_read_cr2(void)
233{
234 unsigned long val;
235 asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
236 return val;
237}
238
239static inline void native_write_cr2(unsigned long val)
240{
241 asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
242}
243
244static inline unsigned long native_read_cr3(void)
245{
246 unsigned long val;
247 asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
248 return val;
249}
250
251static inline void native_write_cr3(unsigned long val)
252{
253 asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
254}
255
256static inline unsigned long native_read_cr4(void)
257{
258 unsigned long val;
259 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
260 return val;
261}
262
263static inline unsigned long native_read_cr4_safe(void)
264{
265 unsigned long val;
266 /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
267 * exists, so it will never fail. */
268#ifdef CONFIG_X86_32
269 asm volatile("1: mov %%cr4, %0\n"
270 "2:\n"
271 _ASM_EXTABLE(1b, 2b)
272 : "=r" (val), "=m" (__force_order) : "0" (0));
273#else
274 val = native_read_cr4();
275#endif
276 return val;
277}
278
279static inline void native_write_cr4(unsigned long val)
280{
281 asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
282}
283
284#ifdef CONFIG_X86_64
285static inline unsigned long native_read_cr8(void)
286{
287 unsigned long cr8;
288 asm volatile("movq %%cr8,%0" : "=r" (cr8));
289 return cr8;
290}
291
292static inline void native_write_cr8(unsigned long val)
293{
294 asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
295}
296#endif
297
298static inline void native_wbinvd(void)
299{
300 asm volatile("wbinvd": : :"memory");
301}
302
303#ifdef CONFIG_PARAVIRT
304#include <asm/paravirt.h>
305#else
306
307static inline unsigned long read_cr0(void)
308{
309 return native_read_cr0();
310}
311
312static inline void write_cr0(unsigned long x)
313{
314 native_write_cr0(x);
315}
316
317static inline unsigned long read_cr2(void)
318{
319 return native_read_cr2();
320}
321
322static inline void write_cr2(unsigned long x)
323{
324 native_write_cr2(x);
325}
326
327static inline unsigned long read_cr3(void)
328{
329 return native_read_cr3();
330}
331
332static inline void write_cr3(unsigned long x)
333{
334 native_write_cr3(x);
335}
336
337static inline unsigned long read_cr4(void)
338{
339 return native_read_cr4();
340}
341
342static inline unsigned long read_cr4_safe(void)
343{
344 return native_read_cr4_safe();
345}
346
347static inline void write_cr4(unsigned long x)
348{
349 native_write_cr4(x);
350}
351
352static inline void wbinvd(void)
353{
354 native_wbinvd();
355}
356
357#ifdef CONFIG_X86_64
358
359static inline unsigned long read_cr8(void)
360{
361 return native_read_cr8();
362}
363
364static inline void write_cr8(unsigned long x)
365{
366 native_write_cr8(x);
367}
368
369static inline void load_gs_index(unsigned selector)
370{
371 native_load_gs_index(selector);
372}
373
374#endif
375
376/* Clear the 'TS' bit */
377static inline void clts(void)
378{
379 native_clts();
380}
381
382#endif/* CONFIG_PARAVIRT */
383
384#define stts() write_cr0(read_cr0() | X86_CR0_TS)
385
386#endif /* __KERNEL__ */
387
388static inline void clflush(volatile void *__p)
389{
390 asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
391}
392
393#define nop() asm volatile ("nop")
394
395void disable_hlt(void);
396void enable_hlt(void);
397
398void cpu_idle_wait(void);
399
400extern unsigned long arch_align_stack(unsigned long sp);
401extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
402
403void default_idle(void);
404bool set_pm_idle_to_default(void);
405
406void stop_this_cpu(void *dummy);
407
408/*
409 * Force strict CPU ordering.
410 * And yes, this is required on UP too when we're talking
411 * to devices.
412 */
413#ifdef CONFIG_X86_32
414/*
415 * Some non-Intel clones support out of order store. wmb() ceases to be a
416 * nop for these.
417 */
418#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
419#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
420#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
421#else
422#define mb() asm volatile("mfence":::"memory")
423#define rmb() asm volatile("lfence":::"memory")
424#define wmb() asm volatile("sfence" ::: "memory")
425#endif
426
427/**
428 * read_barrier_depends - Flush all pending reads that subsequents reads
429 * depend on.
430 *
431 * No data-dependent reads from memory-like regions are ever reordered
432 * over this barrier. All reads preceding this primitive are guaranteed
433 * to access memory (but not necessarily other CPUs' caches) before any
434 * reads following this primitive that depend on the data return by
435 * any of the preceding reads. This primitive is much lighter weight than
436 * rmb() on most CPUs, and is never heavier weight than is
437 * rmb().
438 *
439 * These ordering constraints are respected by both the local CPU
440 * and the compiler.
441 *
442 * Ordering is not guaranteed by anything other than these primitives,
443 * not even by data dependencies. See the documentation for
444 * memory_barrier() for examples and URLs to more information.
445 *
446 * For example, the following code would force ordering (the initial
447 * value of "a" is zero, "b" is one, and "p" is "&a"):
448 *
449 * <programlisting>
450 * CPU 0 CPU 1
451 *
452 * b = 2;
453 * memory_barrier();
454 * p = &b; q = p;
455 * read_barrier_depends();
456 * d = *q;
457 * </programlisting>
458 *
459 * because the read of "*q" depends on the read of "p" and these
460 * two reads are separated by a read_barrier_depends(). However,
461 * the following code, with the same initial values for "a" and "b":
462 *
463 * <programlisting>
464 * CPU 0 CPU 1
465 *
466 * a = 2;
467 * memory_barrier();
468 * b = 3; y = b;
469 * read_barrier_depends();
470 * x = a;
471 * </programlisting>
472 *
473 * does not enforce ordering, since there is no data dependency between
474 * the read of "a" and the read of "b". Therefore, on some CPUs, such
475 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
476 * in cases like this where there are no data dependencies.
477 **/
478
479#define read_barrier_depends() do { } while (0)
480
481#ifdef CONFIG_SMP
482#define smp_mb() mb()
483#ifdef CONFIG_X86_PPRO_FENCE
484# define smp_rmb() rmb()
485#else
486# define smp_rmb() barrier()
487#endif
488#ifdef CONFIG_X86_OOSTORE
489# define smp_wmb() wmb()
490#else
491# define smp_wmb() barrier()
492#endif
493#define smp_read_barrier_depends() read_barrier_depends()
494#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
495#else
496#define smp_mb() barrier()
497#define smp_rmb() barrier()
498#define smp_wmb() barrier()
499#define smp_read_barrier_depends() do { } while (0)
500#define set_mb(var, value) do { var = value; barrier(); } while (0)
501#endif
502
503/*
504 * Stop RDTSC speculation. This is needed when you need to use RDTSC
505 * (or get_cycles or vread that possibly accesses the TSC) in a defined
506 * code region.
507 *
508 * (Could use an alternative three way for this if there was one.)
509 */
510static __always_inline void rdtsc_barrier(void)
511{
512 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
513 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
514}
515
516/*
517 * We handle most unaligned accesses in hardware. On the other hand
518 * unaligned DMA can be quite expensive on some Nehalem processors.
519 *
520 * Based on this we disable the IP header alignment in network drivers.
521 */
522#define NET_IP_ALIGN 0
523#endif /* _ASM_X86_SYSTEM_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 74047159d0ab..ad6df8ccd715 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -40,8 +40,8 @@ struct thread_info {
40 */ 40 */
41 __u8 supervisor_stack[0]; 41 __u8 supervisor_stack[0];
42#endif 42#endif
43 int sig_on_uaccess_error:1; 43 unsigned int sig_on_uaccess_error:1;
44 int uaccess_err:1; /* uaccess failed */ 44 unsigned int uaccess_err:1; /* uaccess failed */
45}; 45};
46 46
47#define INIT_THREAD_INFO(tsk) \ 47#define INIT_THREAD_INFO(tsk) \
@@ -86,7 +86,7 @@ struct thread_info {
86#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ 86#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
87#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ 87#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
88#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 88#define TIF_NOTSC 16 /* TSC is not accessible in userland */
89#define TIF_IA32 17 /* 32bit process */ 89#define TIF_IA32 17 /* IA32 compatibility process */
90#define TIF_FORK 18 /* ret_from_fork */ 90#define TIF_FORK 18 /* ret_from_fork */
91#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ 91#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
92#define TIF_DEBUG 21 /* uses debug registers */ 92#define TIF_DEBUG 21 /* uses debug registers */
@@ -95,6 +95,8 @@ struct thread_info {
95#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ 95#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
96#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ 96#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
97#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ 97#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
98#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
99#define TIF_X32 30 /* 32-bit native x86-64 binary */
98 100
99#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 101#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
100#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 102#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -116,6 +118,8 @@ struct thread_info {
116#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) 118#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
117#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) 119#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
118#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) 120#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
121#define _TIF_ADDR32 (1 << TIF_ADDR32)
122#define _TIF_X32 (1 << TIF_X32)
119 123
120/* work to do in syscall_trace_enter() */ 124/* work to do in syscall_trace_enter() */
121#define _TIF_WORK_SYSCALL_ENTRY \ 125#define _TIF_WORK_SYSCALL_ENTRY \
@@ -247,8 +251,6 @@ static inline struct thread_info *current_thread_info(void)
247 * ever touches our thread-synchronous status, so we don't 251 * ever touches our thread-synchronous status, so we don't
248 * have to worry about atomic accesses. 252 * have to worry about atomic accesses.
249 */ 253 */
250#define TS_USEDFPU 0x0001 /* FPU was used by this task
251 this quantum (SMP) */
252#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ 254#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
253#define TS_POLLING 0x0004 /* idle task polling need_resched, 255#define TS_POLLING 0x0004 /* idle task polling need_resched,
254 skip sending interrupt */ 256 skip sending interrupt */
@@ -264,6 +266,18 @@ static inline void set_restore_sigmask(void)
264 ti->status |= TS_RESTORE_SIGMASK; 266 ti->status |= TS_RESTORE_SIGMASK;
265 set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); 267 set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
266} 268}
269
270static inline bool is_ia32_task(void)
271{
272#ifdef CONFIG_X86_32
273 return true;
274#endif
275#ifdef CONFIG_IA32_EMULATION
276 if (current_thread_info()->status & TS_COMPAT)
277 return true;
278#endif
279 return false;
280}
267#endif /* !__ASSEMBLY__ */ 281#endif /* !__ASSEMBLY__ */
268 282
269#ifndef __ASSEMBLY__ 283#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 431793e5d484..34baa0eb5d0c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -57,14 +57,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
57 57
58static inline unsigned long long __cycles_2_ns(unsigned long long cyc) 58static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
59{ 59{
60 unsigned long long quot;
61 unsigned long long rem;
62 int cpu = smp_processor_id(); 60 int cpu = smp_processor_id();
63 unsigned long long ns = per_cpu(cyc2ns_offset, cpu); 61 unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
64 quot = (cyc >> CYC2NS_SCALE_FACTOR); 62 ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
65 rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1); 63 (1UL << CYC2NS_SCALE_FACTOR));
66 ns += quot * per_cpu(cyc2ns, cpu) +
67 ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
68 return ns; 64 return ns;
69} 65}
70 66
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 169be8938b96..c0e108e08079 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -5,7 +5,7 @@
5#include <linux/sched.h> 5#include <linux/sched.h>
6 6
7#include <asm/processor.h> 7#include <asm/processor.h>
8#include <asm/system.h> 8#include <asm/special_insns.h>
9 9
10#ifdef CONFIG_PARAVIRT 10#ifdef CONFIG_PARAVIRT
11#include <asm/paravirt.h> 11#include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0012d0902c5f..88eae2aec619 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -89,4 +89,29 @@ asmlinkage void smp_thermal_interrupt(void);
89asmlinkage void mce_threshold_interrupt(void); 89asmlinkage void mce_threshold_interrupt(void);
90#endif 90#endif
91 91
92/* Interrupts/Exceptions */
93enum {
94 X86_TRAP_DE = 0, /* 0, Divide-by-zero */
95 X86_TRAP_DB, /* 1, Debug */
96 X86_TRAP_NMI, /* 2, Non-maskable Interrupt */
97 X86_TRAP_BP, /* 3, Breakpoint */
98 X86_TRAP_OF, /* 4, Overflow */
99 X86_TRAP_BR, /* 5, Bound Range Exceeded */
100 X86_TRAP_UD, /* 6, Invalid Opcode */
101 X86_TRAP_NM, /* 7, Device Not Available */
102 X86_TRAP_DF, /* 8, Double Fault */
103 X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */
104 X86_TRAP_TS, /* 10, Invalid TSS */
105 X86_TRAP_NP, /* 11, Segment Not Present */
106 X86_TRAP_SS, /* 12, Stack Segment Fault */
107 X86_TRAP_GP, /* 13, General Protection Fault */
108 X86_TRAP_PF, /* 14, Page Fault */
109 X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */
110 X86_TRAP_MF, /* 16, x87 Floating-Point Exception */
111 X86_TRAP_AC, /* 17, Alignment Check */
112 X86_TRAP_MC, /* 18, Machine Check */
113 X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */
114 X86_TRAP_IRET = 32, /* 32, IRET Exception */
115};
116
92#endif /* _ASM_X86_TRAPS_H */ 117#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 15d99153a96d..c91e8b9d588b 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu);
61extern void check_tsc_sync_target(void); 61extern void check_tsc_sync_target(void);
62 62
63extern int notsc_setup(char *); 63extern int notsc_setup(char *);
64extern void save_sched_clock_state(void); 64extern void tsc_save_sched_clock_state(void);
65extern void restore_sched_clock_state(void); 65extern void tsc_restore_sched_clock_state(void);
66 66
67#endif /* _ASM_X86_TSC_H */ 67#endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8be5f54d9360..e0544597cfe7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -557,6 +557,8 @@ struct __large_struct { unsigned long buf[100]; };
557 557
558extern unsigned long 558extern unsigned long
559copy_from_user_nmi(void *to, const void __user *from, unsigned long n); 559copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
560extern __must_check long
561strncpy_from_user(char *dst, const char __user *src, long count);
560 562
561/* 563/*
562 * movsl can be slow when source and dest are not both 8-byte aligned 564 * movsl can be slow when source and dest are not both 8-byte aligned
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 566e803cc602..8084bc73b18c 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -213,11 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to,
213 return n; 213 return n;
214} 214}
215 215
216long __must_check strncpy_from_user(char *dst, const char __user *src,
217 long count);
218long __must_check __strncpy_from_user(char *dst,
219 const char __user *src, long count);
220
221/** 216/**
222 * strlen_user: - Get the size of a string in user space. 217 * strlen_user: - Get the size of a string in user space.
223 * @str: The string to measure. 218 * @str: The string to measure.
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 1c66d30971ad..fcd4b6f3ef02 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -208,10 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
208 } 208 }
209} 209}
210 210
211__must_check long
212strncpy_from_user(char *dst, const char __user *src, long count);
213__must_check long
214__strncpy_from_user(char *dst, const char __user *src, long count);
215__must_check long strnlen_user(const char __user *str, long n); 211__must_check long strnlen_user(const char __user *str, long n);
216__must_check long __strnlen_user(const char __user *str, long n); 212__must_check long __strnlen_user(const char __user *str, long n);
217__must_check long strlen_user(const char __user *str); 213__must_check long strlen_user(const char __user *str);
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 2a58ed3e51d8..37cdc9d99bb1 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -1,13 +1,73 @@
1#ifndef _ASM_X86_UNISTD_H
2#define _ASM_X86_UNISTD_H 1
3
4/* x32 syscall flag bit */
5#define __X32_SYSCALL_BIT 0x40000000
6
1#ifdef __KERNEL__ 7#ifdef __KERNEL__
8
9# ifdef CONFIG_X86_X32_ABI
10# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
11# else
12# define __SYSCALL_MASK (~0)
13# endif
14
2# ifdef CONFIG_X86_32 15# ifdef CONFIG_X86_32
3# include "unistd_32.h" 16
17# include <asm/unistd_32.h>
18# define __ARCH_WANT_IPC_PARSE_VERSION
19# define __ARCH_WANT_STAT64
20# define __ARCH_WANT_SYS_IPC
21# define __ARCH_WANT_SYS_OLD_MMAP
22# define __ARCH_WANT_SYS_OLD_SELECT
23
4# else 24# else
5# include "unistd_64.h" 25
26# include <asm/unistd_64.h>
27# include <asm/unistd_64_x32.h>
28# define __ARCH_WANT_COMPAT_SYS_TIME
29
6# endif 30# endif
31
32# define __ARCH_WANT_OLD_READDIR
33# define __ARCH_WANT_OLD_STAT
34# define __ARCH_WANT_SYS_ALARM
35# define __ARCH_WANT_SYS_FADVISE64
36# define __ARCH_WANT_SYS_GETHOSTNAME
37# define __ARCH_WANT_SYS_GETPGRP
38# define __ARCH_WANT_SYS_LLSEEK
39# define __ARCH_WANT_SYS_NICE
40# define __ARCH_WANT_SYS_OLDUMOUNT
41# define __ARCH_WANT_SYS_OLD_GETRLIMIT
42# define __ARCH_WANT_SYS_OLD_UNAME
43# define __ARCH_WANT_SYS_PAUSE
44# define __ARCH_WANT_SYS_RT_SIGACTION
45# define __ARCH_WANT_SYS_RT_SIGSUSPEND
46# define __ARCH_WANT_SYS_SGETMASK
47# define __ARCH_WANT_SYS_SIGNAL
48# define __ARCH_WANT_SYS_SIGPENDING
49# define __ARCH_WANT_SYS_SIGPROCMASK
50# define __ARCH_WANT_SYS_SOCKETCALL
51# define __ARCH_WANT_SYS_TIME
52# define __ARCH_WANT_SYS_UTIME
53# define __ARCH_WANT_SYS_WAITPID
54
55/*
56 * "Conditional" syscalls
57 *
58 * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
59 * but it doesn't work on all toolchains, so we just do it by hand
60 */
61# define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
62
7#else 63#else
8# ifdef __i386__ 64# ifdef __i386__
9# include "unistd_32.h" 65# include <asm/unistd_32.h>
66# elif defined(__LP64__)
67# include <asm/unistd_64.h>
10# else 68# else
11# include "unistd_64.h" 69# include <asm/unistd_x32.h>
12# endif 70# endif
13#endif 71#endif
72
73#endif /* _ASM_X86_UNISTD_H */
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
deleted file mode 100644
index 599c77d38f33..000000000000
--- a/arch/x86/include/asm/unistd_32.h
+++ /dev/null
@@ -1,401 +0,0 @@
1#ifndef _ASM_X86_UNISTD_32_H
2#define _ASM_X86_UNISTD_32_H
3
4/*
5 * This file contains the system call numbers.
6 */
7
8#define __NR_restart_syscall 0
9#define __NR_exit 1
10#define __NR_fork 2
11#define __NR_read 3
12#define __NR_write 4
13#define __NR_open 5
14#define __NR_close 6
15#define __NR_waitpid 7
16#define __NR_creat 8
17#define __NR_link 9
18#define __NR_unlink 10
19#define __NR_execve 11
20#define __NR_chdir 12
21#define __NR_time 13
22#define __NR_mknod 14
23#define __NR_chmod 15
24#define __NR_lchown 16
25#define __NR_break 17
26#define __NR_oldstat 18
27#define __NR_lseek 19
28#define __NR_getpid 20
29#define __NR_mount 21
30#define __NR_umount 22
31#define __NR_setuid 23
32#define __NR_getuid 24
33#define __NR_stime 25
34#define __NR_ptrace 26
35#define __NR_alarm 27
36#define __NR_oldfstat 28
37#define __NR_pause 29
38#define __NR_utime 30
39#define __NR_stty 31
40#define __NR_gtty 32
41#define __NR_access 33
42#define __NR_nice 34
43#define __NR_ftime 35
44#define __NR_sync 36
45#define __NR_kill 37
46#define __NR_rename 38
47#define __NR_mkdir 39
48#define __NR_rmdir 40
49#define __NR_dup 41
50#define __NR_pipe 42
51#define __NR_times 43
52#define __NR_prof 44
53#define __NR_brk 45
54#define __NR_setgid 46
55#define __NR_getgid 47
56#define __NR_signal 48
57#define __NR_geteuid 49
58#define __NR_getegid 50
59#define __NR_acct 51
60#define __NR_umount2 52
61#define __NR_lock 53
62#define __NR_ioctl 54
63#define __NR_fcntl 55
64#define __NR_mpx 56
65#define __NR_setpgid 57
66#define __NR_ulimit 58
67#define __NR_oldolduname 59
68#define __NR_umask 60
69#define __NR_chroot 61
70#define __NR_ustat 62
71#define __NR_dup2 63
72#define __NR_getppid 64
73#define __NR_getpgrp 65
74#define __NR_setsid 66
75#define __NR_sigaction 67
76#define __NR_sgetmask 68
77#define __NR_ssetmask 69
78#define __NR_setreuid 70
79#define __NR_setregid 71
80#define __NR_sigsuspend 72
81#define __NR_sigpending 73
82#define __NR_sethostname 74
83#define __NR_setrlimit 75
84#define __NR_getrlimit 76 /* Back compatible 2Gig limited rlimit */
85#define __NR_getrusage 77
86#define __NR_gettimeofday 78
87#define __NR_settimeofday 79
88#define __NR_getgroups 80
89#define __NR_setgroups 81
90#define __NR_select 82
91#define __NR_symlink 83
92#define __NR_oldlstat 84
93#define __NR_readlink 85
94#define __NR_uselib 86
95#define __NR_swapon 87
96#define __NR_reboot 88
97#define __NR_readdir 89
98#define __NR_mmap 90
99#define __NR_munmap 91
100#define __NR_truncate 92
101#define __NR_ftruncate 93
102#define __NR_fchmod 94
103#define __NR_fchown 95
104#define __NR_getpriority 96
105#define __NR_setpriority 97
106#define __NR_profil 98
107#define __NR_statfs 99
108#define __NR_fstatfs 100
109#define __NR_ioperm 101
110#define __NR_socketcall 102
111#define __NR_syslog 103
112#define __NR_setitimer 104
113#define __NR_getitimer 105
114#define __NR_stat 106
115#define __NR_lstat 107
116#define __NR_fstat 108
117#define __NR_olduname 109
118#define __NR_iopl 110
119#define __NR_vhangup 111
120#define __NR_idle 112
121#define __NR_vm86old 113
122#define __NR_wait4 114
123#define __NR_swapoff 115
124#define __NR_sysinfo 116
125#define __NR_ipc 117
126#define __NR_fsync 118
127#define __NR_sigreturn 119
128#define __NR_clone 120
129#define __NR_setdomainname 121
130#define __NR_uname 122
131#define __NR_modify_ldt 123
132#define __NR_adjtimex 124
133#define __NR_mprotect 125
134#define __NR_sigprocmask 126
135#define __NR_create_module 127
136#define __NR_init_module 128
137#define __NR_delete_module 129
138#define __NR_get_kernel_syms 130
139#define __NR_quotactl 131
140#define __NR_getpgid 132
141#define __NR_fchdir 133
142#define __NR_bdflush 134
143#define __NR_sysfs 135
144#define __NR_personality 136
145#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
146#define __NR_setfsuid 138
147#define __NR_setfsgid 139
148#define __NR__llseek 140
149#define __NR_getdents 141
150#define __NR__newselect 142
151#define __NR_flock 143
152#define __NR_msync 144
153#define __NR_readv 145
154#define __NR_writev 146
155#define __NR_getsid 147
156#define __NR_fdatasync 148
157#define __NR__sysctl 149
158#define __NR_mlock 150
159#define __NR_munlock 151
160#define __NR_mlockall 152
161#define __NR_munlockall 153
162#define __NR_sched_setparam 154
163#define __NR_sched_getparam 155
164#define __NR_sched_setscheduler 156
165#define __NR_sched_getscheduler 157
166#define __NR_sched_yield 158
167#define __NR_sched_get_priority_max 159
168#define __NR_sched_get_priority_min 160
169#define __NR_sched_rr_get_interval 161
170#define __NR_nanosleep 162
171#define __NR_mremap 163
172#define __NR_setresuid 164
173#define __NR_getresuid 165
174#define __NR_vm86 166
175#define __NR_query_module 167
176#define __NR_poll 168
177#define __NR_nfsservctl 169
178#define __NR_setresgid 170
179#define __NR_getresgid 171
180#define __NR_prctl 172
181#define __NR_rt_sigreturn 173
182#define __NR_rt_sigaction 174
183#define __NR_rt_sigprocmask 175
184#define __NR_rt_sigpending 176
185#define __NR_rt_sigtimedwait 177
186#define __NR_rt_sigqueueinfo 178
187#define __NR_rt_sigsuspend 179
188#define __NR_pread64 180
189#define __NR_pwrite64 181
190#define __NR_chown 182
191#define __NR_getcwd 183
192#define __NR_capget 184
193#define __NR_capset 185
194#define __NR_sigaltstack 186
195#define __NR_sendfile 187
196#define __NR_getpmsg 188 /* some people actually want streams */
197#define __NR_putpmsg 189 /* some people actually want streams */
198#define __NR_vfork 190
199#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */
200#define __NR_mmap2 192
201#define __NR_truncate64 193
202#define __NR_ftruncate64 194
203#define __NR_stat64 195
204#define __NR_lstat64 196
205#define __NR_fstat64 197
206#define __NR_lchown32 198
207#define __NR_getuid32 199
208#define __NR_getgid32 200
209#define __NR_geteuid32 201
210#define __NR_getegid32 202
211#define __NR_setreuid32 203
212#define __NR_setregid32 204
213#define __NR_getgroups32 205
214#define __NR_setgroups32 206
215#define __NR_fchown32 207
216#define __NR_setresuid32 208
217#define __NR_getresuid32 209
218#define __NR_setresgid32 210
219#define __NR_getresgid32 211
220#define __NR_chown32 212
221#define __NR_setuid32 213
222#define __NR_setgid32 214
223#define __NR_setfsuid32 215
224#define __NR_setfsgid32 216
225#define __NR_pivot_root 217
226#define __NR_mincore 218
227#define __NR_madvise 219
228#define __NR_madvise1 219 /* delete when C lib stub is removed */
229#define __NR_getdents64 220
230#define __NR_fcntl64 221
231/* 223 is unused */
232#define __NR_gettid 224
233#define __NR_readahead 225
234#define __NR_setxattr 226
235#define __NR_lsetxattr 227
236#define __NR_fsetxattr 228
237#define __NR_getxattr 229
238#define __NR_lgetxattr 230
239#define __NR_fgetxattr 231
240#define __NR_listxattr 232
241#define __NR_llistxattr 233
242#define __NR_flistxattr 234
243#define __NR_removexattr 235
244#define __NR_lremovexattr 236
245#define __NR_fremovexattr 237
246#define __NR_tkill 238
247#define __NR_sendfile64 239
248#define __NR_futex 240
249#define __NR_sched_setaffinity 241
250#define __NR_sched_getaffinity 242
251#define __NR_set_thread_area 243
252#define __NR_get_thread_area 244
253#define __NR_io_setup 245
254#define __NR_io_destroy 246
255#define __NR_io_getevents 247
256#define __NR_io_submit 248
257#define __NR_io_cancel 249
258#define __NR_fadvise64 250
259/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */
260#define __NR_exit_group 252
261#define __NR_lookup_dcookie 253
262#define __NR_epoll_create 254
263#define __NR_epoll_ctl 255
264#define __NR_epoll_wait 256
265#define __NR_remap_file_pages 257
266#define __NR_set_tid_address 258
267#define __NR_timer_create 259
268#define __NR_timer_settime (__NR_timer_create+1)
269#define __NR_timer_gettime (__NR_timer_create+2)
270#define __NR_timer_getoverrun (__NR_timer_create+3)
271#define __NR_timer_delete (__NR_timer_create+4)
272#define __NR_clock_settime (__NR_timer_create+5)
273#define __NR_clock_gettime (__NR_timer_create+6)
274#define __NR_clock_getres (__NR_timer_create+7)
275#define __NR_clock_nanosleep (__NR_timer_create+8)
276#define __NR_statfs64 268
277#define __NR_fstatfs64 269
278#define __NR_tgkill 270
279#define __NR_utimes 271
280#define __NR_fadvise64_64 272
281#define __NR_vserver 273
282#define __NR_mbind 274
283#define __NR_get_mempolicy 275
284#define __NR_set_mempolicy 276
285#define __NR_mq_open 277
286#define __NR_mq_unlink (__NR_mq_open+1)
287#define __NR_mq_timedsend (__NR_mq_open+2)
288#define __NR_mq_timedreceive (__NR_mq_open+3)
289#define __NR_mq_notify (__NR_mq_open+4)
290#define __NR_mq_getsetattr (__NR_mq_open+5)
291#define __NR_kexec_load 283
292#define __NR_waitid 284
293/* #define __NR_sys_setaltroot 285 */
294#define __NR_add_key 286
295#define __NR_request_key 287
296#define __NR_keyctl 288
297#define __NR_ioprio_set 289
298#define __NR_ioprio_get 290
299#define __NR_inotify_init 291
300#define __NR_inotify_add_watch 292
301#define __NR_inotify_rm_watch 293
302#define __NR_migrate_pages 294
303#define __NR_openat 295
304#define __NR_mkdirat 296
305#define __NR_mknodat 297
306#define __NR_fchownat 298
307#define __NR_futimesat 299
308#define __NR_fstatat64 300
309#define __NR_unlinkat 301
310#define __NR_renameat 302
311#define __NR_linkat 303
312#define __NR_symlinkat 304
313#define __NR_readlinkat 305
314#define __NR_fchmodat 306
315#define __NR_faccessat 307
316#define __NR_pselect6 308
317#define __NR_ppoll 309
318#define __NR_unshare 310
319#define __NR_set_robust_list 311
320#define __NR_get_robust_list 312
321#define __NR_splice 313
322#define __NR_sync_file_range 314
323#define __NR_tee 315
324#define __NR_vmsplice 316
325#define __NR_move_pages 317
326#define __NR_getcpu 318
327#define __NR_epoll_pwait 319
328#define __NR_utimensat 320
329#define __NR_signalfd 321
330#define __NR_timerfd_create 322
331#define __NR_eventfd 323
332#define __NR_fallocate 324
333#define __NR_timerfd_settime 325
334#define __NR_timerfd_gettime 326
335#define __NR_signalfd4 327
336#define __NR_eventfd2 328
337#define __NR_epoll_create1 329
338#define __NR_dup3 330
339#define __NR_pipe2 331
340#define __NR_inotify_init1 332
341#define __NR_preadv 333
342#define __NR_pwritev 334
343#define __NR_rt_tgsigqueueinfo 335
344#define __NR_perf_event_open 336
345#define __NR_recvmmsg 337
346#define __NR_fanotify_init 338
347#define __NR_fanotify_mark 339
348#define __NR_prlimit64 340
349#define __NR_name_to_handle_at 341
350#define __NR_open_by_handle_at 342
351#define __NR_clock_adjtime 343
352#define __NR_syncfs 344
353#define __NR_sendmmsg 345
354#define __NR_setns 346
355#define __NR_process_vm_readv 347
356#define __NR_process_vm_writev 348
357
358#ifdef __KERNEL__
359
360#define NR_syscalls 349
361
362#define __ARCH_WANT_IPC_PARSE_VERSION
363#define __ARCH_WANT_OLD_READDIR
364#define __ARCH_WANT_OLD_STAT
365#define __ARCH_WANT_STAT64
366#define __ARCH_WANT_SYS_ALARM
367#define __ARCH_WANT_SYS_GETHOSTNAME
368#define __ARCH_WANT_SYS_IPC
369#define __ARCH_WANT_SYS_PAUSE
370#define __ARCH_WANT_SYS_SGETMASK
371#define __ARCH_WANT_SYS_SIGNAL
372#define __ARCH_WANT_SYS_TIME
373#define __ARCH_WANT_SYS_UTIME
374#define __ARCH_WANT_SYS_WAITPID
375#define __ARCH_WANT_SYS_SOCKETCALL
376#define __ARCH_WANT_SYS_FADVISE64
377#define __ARCH_WANT_SYS_GETPGRP
378#define __ARCH_WANT_SYS_LLSEEK
379#define __ARCH_WANT_SYS_NICE
380#define __ARCH_WANT_SYS_OLD_GETRLIMIT
381#define __ARCH_WANT_SYS_OLD_UNAME
382#define __ARCH_WANT_SYS_OLD_MMAP
383#define __ARCH_WANT_SYS_OLD_SELECT
384#define __ARCH_WANT_SYS_OLDUMOUNT
385#define __ARCH_WANT_SYS_SIGPENDING
386#define __ARCH_WANT_SYS_SIGPROCMASK
387#define __ARCH_WANT_SYS_RT_SIGACTION
388#define __ARCH_WANT_SYS_RT_SIGSUSPEND
389
390/*
391 * "Conditional" syscalls
392 *
393 * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
394 * but it doesn't work on all toolchains, so we just do it by hand
395 */
396#ifndef cond_syscall
397#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
398#endif
399
400#endif /* __KERNEL__ */
401#endif /* _ASM_X86_UNISTD_32_H */
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
deleted file mode 100644
index 0431f193c3f2..000000000000
--- a/arch/x86/include/asm/unistd_64.h
+++ /dev/null
@@ -1,732 +0,0 @@
1#ifndef _ASM_X86_UNISTD_64_H
2#define _ASM_X86_UNISTD_64_H
3
4#ifndef __SYSCALL
5#define __SYSCALL(a, b)
6#endif
7
8/*
9 * This file contains the system call numbers.
10 *
11 * Note: holes are not allowed.
12 */
13
14/* at least 8 syscall per cacheline */
15#define __NR_read 0
16__SYSCALL(__NR_read, sys_read)
17#define __NR_write 1
18__SYSCALL(__NR_write, sys_write)
19#define __NR_open 2
20__SYSCALL(__NR_open, sys_open)
21#define __NR_close 3
22__SYSCALL(__NR_close, sys_close)
23#define __NR_stat 4
24__SYSCALL(__NR_stat, sys_newstat)
25#define __NR_fstat 5
26__SYSCALL(__NR_fstat, sys_newfstat)
27#define __NR_lstat 6
28__SYSCALL(__NR_lstat, sys_newlstat)
29#define __NR_poll 7
30__SYSCALL(__NR_poll, sys_poll)
31
32#define __NR_lseek 8
33__SYSCALL(__NR_lseek, sys_lseek)
34#define __NR_mmap 9
35__SYSCALL(__NR_mmap, sys_mmap)
36#define __NR_mprotect 10
37__SYSCALL(__NR_mprotect, sys_mprotect)
38#define __NR_munmap 11
39__SYSCALL(__NR_munmap, sys_munmap)
40#define __NR_brk 12
41__SYSCALL(__NR_brk, sys_brk)
42#define __NR_rt_sigaction 13
43__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction)
44#define __NR_rt_sigprocmask 14
45__SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask)
46#define __NR_rt_sigreturn 15
47__SYSCALL(__NR_rt_sigreturn, stub_rt_sigreturn)
48
49#define __NR_ioctl 16
50__SYSCALL(__NR_ioctl, sys_ioctl)
51#define __NR_pread64 17
52__SYSCALL(__NR_pread64, sys_pread64)
53#define __NR_pwrite64 18
54__SYSCALL(__NR_pwrite64, sys_pwrite64)
55#define __NR_readv 19
56__SYSCALL(__NR_readv, sys_readv)
57#define __NR_writev 20
58__SYSCALL(__NR_writev, sys_writev)
59#define __NR_access 21
60__SYSCALL(__NR_access, sys_access)
61#define __NR_pipe 22
62__SYSCALL(__NR_pipe, sys_pipe)
63#define __NR_select 23
64__SYSCALL(__NR_select, sys_select)
65
66#define __NR_sched_yield 24
67__SYSCALL(__NR_sched_yield, sys_sched_yield)
68#define __NR_mremap 25
69__SYSCALL(__NR_mremap, sys_mremap)
70#define __NR_msync 26
71__SYSCALL(__NR_msync, sys_msync)
72#define __NR_mincore 27
73__SYSCALL(__NR_mincore, sys_mincore)
74#define __NR_madvise 28
75__SYSCALL(__NR_madvise, sys_madvise)
76#define __NR_shmget 29
77__SYSCALL(__NR_shmget, sys_shmget)
78#define __NR_shmat 30
79__SYSCALL(__NR_shmat, sys_shmat)
80#define __NR_shmctl 31
81__SYSCALL(__NR_shmctl, sys_shmctl)
82
83#define __NR_dup 32
84__SYSCALL(__NR_dup, sys_dup)
85#define __NR_dup2 33
86__SYSCALL(__NR_dup2, sys_dup2)
87#define __NR_pause 34
88__SYSCALL(__NR_pause, sys_pause)
89#define __NR_nanosleep 35
90__SYSCALL(__NR_nanosleep, sys_nanosleep)
91#define __NR_getitimer 36
92__SYSCALL(__NR_getitimer, sys_getitimer)
93#define __NR_alarm 37
94__SYSCALL(__NR_alarm, sys_alarm)
95#define __NR_setitimer 38
96__SYSCALL(__NR_setitimer, sys_setitimer)
97#define __NR_getpid 39
98__SYSCALL(__NR_getpid, sys_getpid)
99
100#define __NR_sendfile 40
101__SYSCALL(__NR_sendfile, sys_sendfile64)
102#define __NR_socket 41
103__SYSCALL(__NR_socket, sys_socket)
104#define __NR_connect 42
105__SYSCALL(__NR_connect, sys_connect)
106#define __NR_accept 43
107__SYSCALL(__NR_accept, sys_accept)
108#define __NR_sendto 44
109__SYSCALL(__NR_sendto, sys_sendto)
110#define __NR_recvfrom 45
111__SYSCALL(__NR_recvfrom, sys_recvfrom)
112#define __NR_sendmsg 46
113__SYSCALL(__NR_sendmsg, sys_sendmsg)
114#define __NR_recvmsg 47
115__SYSCALL(__NR_recvmsg, sys_recvmsg)
116
117#define __NR_shutdown 48
118__SYSCALL(__NR_shutdown, sys_shutdown)
119#define __NR_bind 49
120__SYSCALL(__NR_bind, sys_bind)
121#define __NR_listen 50
122__SYSCALL(__NR_listen, sys_listen)
123#define __NR_getsockname 51
124__SYSCALL(__NR_getsockname, sys_getsockname)
125#define __NR_getpeername 52
126__SYSCALL(__NR_getpeername, sys_getpeername)
127#define __NR_socketpair 53
128__SYSCALL(__NR_socketpair, sys_socketpair)
129#define __NR_setsockopt 54
130__SYSCALL(__NR_setsockopt, sys_setsockopt)
131#define __NR_getsockopt 55
132__SYSCALL(__NR_getsockopt, sys_getsockopt)
133
134#define __NR_clone 56
135__SYSCALL(__NR_clone, stub_clone)
136#define __NR_fork 57
137__SYSCALL(__NR_fork, stub_fork)
138#define __NR_vfork 58
139__SYSCALL(__NR_vfork, stub_vfork)
140#define __NR_execve 59
141__SYSCALL(__NR_execve, stub_execve)
142#define __NR_exit 60
143__SYSCALL(__NR_exit, sys_exit)
144#define __NR_wait4 61
145__SYSCALL(__NR_wait4, sys_wait4)
146#define __NR_kill 62
147__SYSCALL(__NR_kill, sys_kill)
148#define __NR_uname 63
149__SYSCALL(__NR_uname, sys_newuname)
150
151#define __NR_semget 64
152__SYSCALL(__NR_semget, sys_semget)
153#define __NR_semop 65
154__SYSCALL(__NR_semop, sys_semop)
155#define __NR_semctl 66
156__SYSCALL(__NR_semctl, sys_semctl)
157#define __NR_shmdt 67
158__SYSCALL(__NR_shmdt, sys_shmdt)
159#define __NR_msgget 68
160__SYSCALL(__NR_msgget, sys_msgget)
161#define __NR_msgsnd 69
162__SYSCALL(__NR_msgsnd, sys_msgsnd)
163#define __NR_msgrcv 70
164__SYSCALL(__NR_msgrcv, sys_msgrcv)
165#define __NR_msgctl 71
166__SYSCALL(__NR_msgctl, sys_msgctl)
167
168#define __NR_fcntl 72
169__SYSCALL(__NR_fcntl, sys_fcntl)
170#define __NR_flock 73
171__SYSCALL(__NR_flock, sys_flock)
172#define __NR_fsync 74
173__SYSCALL(__NR_fsync, sys_fsync)
174#define __NR_fdatasync 75
175__SYSCALL(__NR_fdatasync, sys_fdatasync)
176#define __NR_truncate 76
177__SYSCALL(__NR_truncate, sys_truncate)
178#define __NR_ftruncate 77
179__SYSCALL(__NR_ftruncate, sys_ftruncate)
180#define __NR_getdents 78
181__SYSCALL(__NR_getdents, sys_getdents)
182#define __NR_getcwd 79
183__SYSCALL(__NR_getcwd, sys_getcwd)
184
185#define __NR_chdir 80
186__SYSCALL(__NR_chdir, sys_chdir)
187#define __NR_fchdir 81
188__SYSCALL(__NR_fchdir, sys_fchdir)
189#define __NR_rename 82
190__SYSCALL(__NR_rename, sys_rename)
191#define __NR_mkdir 83
192__SYSCALL(__NR_mkdir, sys_mkdir)
193#define __NR_rmdir 84
194__SYSCALL(__NR_rmdir, sys_rmdir)
195#define __NR_creat 85
196__SYSCALL(__NR_creat, sys_creat)
197#define __NR_link 86
198__SYSCALL(__NR_link, sys_link)
199#define __NR_unlink 87
200__SYSCALL(__NR_unlink, sys_unlink)
201
202#define __NR_symlink 88
203__SYSCALL(__NR_symlink, sys_symlink)
204#define __NR_readlink 89
205__SYSCALL(__NR_readlink, sys_readlink)
206#define __NR_chmod 90
207__SYSCALL(__NR_chmod, sys_chmod)
208#define __NR_fchmod 91
209__SYSCALL(__NR_fchmod, sys_fchmod)
210#define __NR_chown 92
211__SYSCALL(__NR_chown, sys_chown)
212#define __NR_fchown 93
213__SYSCALL(__NR_fchown, sys_fchown)
214#define __NR_lchown 94
215__SYSCALL(__NR_lchown, sys_lchown)
216#define __NR_umask 95
217__SYSCALL(__NR_umask, sys_umask)
218
219#define __NR_gettimeofday 96
220__SYSCALL(__NR_gettimeofday, sys_gettimeofday)
221#define __NR_getrlimit 97
222__SYSCALL(__NR_getrlimit, sys_getrlimit)
223#define __NR_getrusage 98
224__SYSCALL(__NR_getrusage, sys_getrusage)
225#define __NR_sysinfo 99
226__SYSCALL(__NR_sysinfo, sys_sysinfo)
227#define __NR_times 100
228__SYSCALL(__NR_times, sys_times)
229#define __NR_ptrace 101
230__SYSCALL(__NR_ptrace, sys_ptrace)
231#define __NR_getuid 102
232__SYSCALL(__NR_getuid, sys_getuid)
233#define __NR_syslog 103
234__SYSCALL(__NR_syslog, sys_syslog)
235
236/* at the very end the stuff that never runs during the benchmarks */
237#define __NR_getgid 104
238__SYSCALL(__NR_getgid, sys_getgid)
239#define __NR_setuid 105
240__SYSCALL(__NR_setuid, sys_setuid)
241#define __NR_setgid 106
242__SYSCALL(__NR_setgid, sys_setgid)
243#define __NR_geteuid 107
244__SYSCALL(__NR_geteuid, sys_geteuid)
245#define __NR_getegid 108
246__SYSCALL(__NR_getegid, sys_getegid)
247#define __NR_setpgid 109
248__SYSCALL(__NR_setpgid, sys_setpgid)
249#define __NR_getppid 110
250__SYSCALL(__NR_getppid, sys_getppid)
251#define __NR_getpgrp 111
252__SYSCALL(__NR_getpgrp, sys_getpgrp)
253
254#define __NR_setsid 112
255__SYSCALL(__NR_setsid, sys_setsid)
256#define __NR_setreuid 113
257__SYSCALL(__NR_setreuid, sys_setreuid)
258#define __NR_setregid 114
259__SYSCALL(__NR_setregid, sys_setregid)
260#define __NR_getgroups 115
261__SYSCALL(__NR_getgroups, sys_getgroups)
262#define __NR_setgroups 116
263__SYSCALL(__NR_setgroups, sys_setgroups)
264#define __NR_setresuid 117
265__SYSCALL(__NR_setresuid, sys_setresuid)
266#define __NR_getresuid 118
267__SYSCALL(__NR_getresuid, sys_getresuid)
268#define __NR_setresgid 119
269__SYSCALL(__NR_setresgid, sys_setresgid)
270
271#define __NR_getresgid 120
272__SYSCALL(__NR_getresgid, sys_getresgid)
273#define __NR_getpgid 121
274__SYSCALL(__NR_getpgid, sys_getpgid)
275#define __NR_setfsuid 122
276__SYSCALL(__NR_setfsuid, sys_setfsuid)
277#define __NR_setfsgid 123
278__SYSCALL(__NR_setfsgid, sys_setfsgid)
279#define __NR_getsid 124
280__SYSCALL(__NR_getsid, sys_getsid)
281#define __NR_capget 125
282__SYSCALL(__NR_capget, sys_capget)
283#define __NR_capset 126
284__SYSCALL(__NR_capset, sys_capset)
285
286#define __NR_rt_sigpending 127
287__SYSCALL(__NR_rt_sigpending, sys_rt_sigpending)
288#define __NR_rt_sigtimedwait 128
289__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait)
290#define __NR_rt_sigqueueinfo 129
291__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo)
292#define __NR_rt_sigsuspend 130
293__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend)
294#define __NR_sigaltstack 131
295__SYSCALL(__NR_sigaltstack, stub_sigaltstack)
296#define __NR_utime 132
297__SYSCALL(__NR_utime, sys_utime)
298#define __NR_mknod 133
299__SYSCALL(__NR_mknod, sys_mknod)
300
301/* Only needed for a.out */
302#define __NR_uselib 134
303__SYSCALL(__NR_uselib, sys_ni_syscall)
304#define __NR_personality 135
305__SYSCALL(__NR_personality, sys_personality)
306
307#define __NR_ustat 136
308__SYSCALL(__NR_ustat, sys_ustat)
309#define __NR_statfs 137
310__SYSCALL(__NR_statfs, sys_statfs)
311#define __NR_fstatfs 138
312__SYSCALL(__NR_fstatfs, sys_fstatfs)
313#define __NR_sysfs 139
314__SYSCALL(__NR_sysfs, sys_sysfs)
315
316#define __NR_getpriority 140
317__SYSCALL(__NR_getpriority, sys_getpriority)
318#define __NR_setpriority 141
319__SYSCALL(__NR_setpriority, sys_setpriority)
320#define __NR_sched_setparam 142
321__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
322#define __NR_sched_getparam 143
323__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
324#define __NR_sched_setscheduler 144
325__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
326#define __NR_sched_getscheduler 145
327__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
328#define __NR_sched_get_priority_max 146
329__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
330#define __NR_sched_get_priority_min 147
331__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
332#define __NR_sched_rr_get_interval 148
333__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval)
334
335#define __NR_mlock 149
336__SYSCALL(__NR_mlock, sys_mlock)
337#define __NR_munlock 150
338__SYSCALL(__NR_munlock, sys_munlock)
339#define __NR_mlockall 151
340__SYSCALL(__NR_mlockall, sys_mlockall)
341#define __NR_munlockall 152
342__SYSCALL(__NR_munlockall, sys_munlockall)
343
344#define __NR_vhangup 153
345__SYSCALL(__NR_vhangup, sys_vhangup)
346
347#define __NR_modify_ldt 154
348__SYSCALL(__NR_modify_ldt, sys_modify_ldt)
349
350#define __NR_pivot_root 155
351__SYSCALL(__NR_pivot_root, sys_pivot_root)
352
353#define __NR__sysctl 156
354__SYSCALL(__NR__sysctl, sys_sysctl)
355
356#define __NR_prctl 157
357__SYSCALL(__NR_prctl, sys_prctl)
358#define __NR_arch_prctl 158
359__SYSCALL(__NR_arch_prctl, sys_arch_prctl)
360
361#define __NR_adjtimex 159
362__SYSCALL(__NR_adjtimex, sys_adjtimex)
363
364#define __NR_setrlimit 160
365__SYSCALL(__NR_setrlimit, sys_setrlimit)
366
367#define __NR_chroot 161
368__SYSCALL(__NR_chroot, sys_chroot)
369
370#define __NR_sync 162
371__SYSCALL(__NR_sync, sys_sync)
372
373#define __NR_acct 163
374__SYSCALL(__NR_acct, sys_acct)
375
376#define __NR_settimeofday 164
377__SYSCALL(__NR_settimeofday, sys_settimeofday)
378
379#define __NR_mount 165
380__SYSCALL(__NR_mount, sys_mount)
381#define __NR_umount2 166
382__SYSCALL(__NR_umount2, sys_umount)
383
384#define __NR_swapon 167
385__SYSCALL(__NR_swapon, sys_swapon)
386#define __NR_swapoff 168
387__SYSCALL(__NR_swapoff, sys_swapoff)
388
389#define __NR_reboot 169
390__SYSCALL(__NR_reboot, sys_reboot)
391
392#define __NR_sethostname 170
393__SYSCALL(__NR_sethostname, sys_sethostname)
394#define __NR_setdomainname 171
395__SYSCALL(__NR_setdomainname, sys_setdomainname)
396
397#define __NR_iopl 172
398__SYSCALL(__NR_iopl, stub_iopl)
399#define __NR_ioperm 173
400__SYSCALL(__NR_ioperm, sys_ioperm)
401
402#define __NR_create_module 174
403__SYSCALL(__NR_create_module, sys_ni_syscall)
404#define __NR_init_module 175
405__SYSCALL(__NR_init_module, sys_init_module)
406#define __NR_delete_module 176
407__SYSCALL(__NR_delete_module, sys_delete_module)
408#define __NR_get_kernel_syms 177
409__SYSCALL(__NR_get_kernel_syms, sys_ni_syscall)
410#define __NR_query_module 178
411__SYSCALL(__NR_query_module, sys_ni_syscall)
412
413#define __NR_quotactl 179
414__SYSCALL(__NR_quotactl, sys_quotactl)
415
416#define __NR_nfsservctl 180
417__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
418
419/* reserved for LiS/STREAMS */
420#define __NR_getpmsg 181
421__SYSCALL(__NR_getpmsg, sys_ni_syscall)
422#define __NR_putpmsg 182
423__SYSCALL(__NR_putpmsg, sys_ni_syscall)
424
425/* reserved for AFS */
426#define __NR_afs_syscall 183
427__SYSCALL(__NR_afs_syscall, sys_ni_syscall)
428
429/* reserved for tux */
430#define __NR_tuxcall 184
431__SYSCALL(__NR_tuxcall, sys_ni_syscall)
432
433#define __NR_security 185
434__SYSCALL(__NR_security, sys_ni_syscall)
435
436#define __NR_gettid 186
437__SYSCALL(__NR_gettid, sys_gettid)
438
439#define __NR_readahead 187
440__SYSCALL(__NR_readahead, sys_readahead)
441#define __NR_setxattr 188
442__SYSCALL(__NR_setxattr, sys_setxattr)
443#define __NR_lsetxattr 189
444__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
445#define __NR_fsetxattr 190
446__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
447#define __NR_getxattr 191
448__SYSCALL(__NR_getxattr, sys_getxattr)
449#define __NR_lgetxattr 192
450__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
451#define __NR_fgetxattr 193
452__SYSCALL(__NR_fgetxattr, sys_fgetxattr)
453#define __NR_listxattr 194
454__SYSCALL(__NR_listxattr, sys_listxattr)
455#define __NR_llistxattr 195
456__SYSCALL(__NR_llistxattr, sys_llistxattr)
457#define __NR_flistxattr 196
458__SYSCALL(__NR_flistxattr, sys_flistxattr)
459#define __NR_removexattr 197
460__SYSCALL(__NR_removexattr, sys_removexattr)
461#define __NR_lremovexattr 198
462__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
463#define __NR_fremovexattr 199
464__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
465#define __NR_tkill 200
466__SYSCALL(__NR_tkill, sys_tkill)
467#define __NR_time 201
468__SYSCALL(__NR_time, sys_time)
469#define __NR_futex 202
470__SYSCALL(__NR_futex, sys_futex)
471#define __NR_sched_setaffinity 203
472__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity)
473#define __NR_sched_getaffinity 204
474__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity)
475#define __NR_set_thread_area 205
476__SYSCALL(__NR_set_thread_area, sys_ni_syscall) /* use arch_prctl */
477#define __NR_io_setup 206
478__SYSCALL(__NR_io_setup, sys_io_setup)
479#define __NR_io_destroy 207
480__SYSCALL(__NR_io_destroy, sys_io_destroy)
481#define __NR_io_getevents 208
482__SYSCALL(__NR_io_getevents, sys_io_getevents)
483#define __NR_io_submit 209
484__SYSCALL(__NR_io_submit, sys_io_submit)
485#define __NR_io_cancel 210
486__SYSCALL(__NR_io_cancel, sys_io_cancel)
487#define __NR_get_thread_area 211
488__SYSCALL(__NR_get_thread_area, sys_ni_syscall) /* use arch_prctl */
489#define __NR_lookup_dcookie 212
490__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie)
491#define __NR_epoll_create 213
492__SYSCALL(__NR_epoll_create, sys_epoll_create)
493#define __NR_epoll_ctl_old 214
494__SYSCALL(__NR_epoll_ctl_old, sys_ni_syscall)
495#define __NR_epoll_wait_old 215
496__SYSCALL(__NR_epoll_wait_old, sys_ni_syscall)
497#define __NR_remap_file_pages 216
498__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
499#define __NR_getdents64 217
500__SYSCALL(__NR_getdents64, sys_getdents64)
501#define __NR_set_tid_address 218
502__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
503#define __NR_restart_syscall 219
504__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
505#define __NR_semtimedop 220
506__SYSCALL(__NR_semtimedop, sys_semtimedop)
507#define __NR_fadvise64 221
508__SYSCALL(__NR_fadvise64, sys_fadvise64)
509#define __NR_timer_create 222
510__SYSCALL(__NR_timer_create, sys_timer_create)
511#define __NR_timer_settime 223
512__SYSCALL(__NR_timer_settime, sys_timer_settime)
513#define __NR_timer_gettime 224
514__SYSCALL(__NR_timer_gettime, sys_timer_gettime)
515#define __NR_timer_getoverrun 225
516__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
517#define __NR_timer_delete 226
518__SYSCALL(__NR_timer_delete, sys_timer_delete)
519#define __NR_clock_settime 227
520__SYSCALL(__NR_clock_settime, sys_clock_settime)
521#define __NR_clock_gettime 228
522__SYSCALL(__NR_clock_gettime, sys_clock_gettime)
523#define __NR_clock_getres 229
524__SYSCALL(__NR_clock_getres, sys_clock_getres)
525#define __NR_clock_nanosleep 230
526__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep)
527#define __NR_exit_group 231
528__SYSCALL(__NR_exit_group, sys_exit_group)
529#define __NR_epoll_wait 232
530__SYSCALL(__NR_epoll_wait, sys_epoll_wait)
531#define __NR_epoll_ctl 233
532__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
533#define __NR_tgkill 234
534__SYSCALL(__NR_tgkill, sys_tgkill)
535#define __NR_utimes 235
536__SYSCALL(__NR_utimes, sys_utimes)
537#define __NR_vserver 236
538__SYSCALL(__NR_vserver, sys_ni_syscall)
539#define __NR_mbind 237
540__SYSCALL(__NR_mbind, sys_mbind)
541#define __NR_set_mempolicy 238
542__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
543#define __NR_get_mempolicy 239
544__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
545#define __NR_mq_open 240
546__SYSCALL(__NR_mq_open, sys_mq_open)
547#define __NR_mq_unlink 241
548__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
549#define __NR_mq_timedsend 242
550__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend)
551#define __NR_mq_timedreceive 243
552__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive)
553#define __NR_mq_notify 244
554__SYSCALL(__NR_mq_notify, sys_mq_notify)
555#define __NR_mq_getsetattr 245
556__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
557#define __NR_kexec_load 246
558__SYSCALL(__NR_kexec_load, sys_kexec_load)
559#define __NR_waitid 247
560__SYSCALL(__NR_waitid, sys_waitid)
561#define __NR_add_key 248
562__SYSCALL(__NR_add_key, sys_add_key)
563#define __NR_request_key 249
564__SYSCALL(__NR_request_key, sys_request_key)
565#define __NR_keyctl 250
566__SYSCALL(__NR_keyctl, sys_keyctl)
567#define __NR_ioprio_set 251
568__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
569#define __NR_ioprio_get 252
570__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
571#define __NR_inotify_init 253
572__SYSCALL(__NR_inotify_init, sys_inotify_init)
573#define __NR_inotify_add_watch 254
574__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
575#define __NR_inotify_rm_watch 255
576__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
577#define __NR_migrate_pages 256
578__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
579#define __NR_openat 257
580__SYSCALL(__NR_openat, sys_openat)
581#define __NR_mkdirat 258
582__SYSCALL(__NR_mkdirat, sys_mkdirat)
583#define __NR_mknodat 259
584__SYSCALL(__NR_mknodat, sys_mknodat)
585#define __NR_fchownat 260
586__SYSCALL(__NR_fchownat, sys_fchownat)
587#define __NR_futimesat 261
588__SYSCALL(__NR_futimesat, sys_futimesat)
589#define __NR_newfstatat 262
590__SYSCALL(__NR_newfstatat, sys_newfstatat)
591#define __NR_unlinkat 263
592__SYSCALL(__NR_unlinkat, sys_unlinkat)
593#define __NR_renameat 264
594__SYSCALL(__NR_renameat, sys_renameat)
595#define __NR_linkat 265
596__SYSCALL(__NR_linkat, sys_linkat)
597#define __NR_symlinkat 266
598__SYSCALL(__NR_symlinkat, sys_symlinkat)
599#define __NR_readlinkat 267
600__SYSCALL(__NR_readlinkat, sys_readlinkat)
601#define __NR_fchmodat 268
602__SYSCALL(__NR_fchmodat, sys_fchmodat)
603#define __NR_faccessat 269
604__SYSCALL(__NR_faccessat, sys_faccessat)
605#define __NR_pselect6 270
606__SYSCALL(__NR_pselect6, sys_pselect6)
607#define __NR_ppoll 271
608__SYSCALL(__NR_ppoll, sys_ppoll)
609#define __NR_unshare 272
610__SYSCALL(__NR_unshare, sys_unshare)
611#define __NR_set_robust_list 273
612__SYSCALL(__NR_set_robust_list, sys_set_robust_list)
613#define __NR_get_robust_list 274
614__SYSCALL(__NR_get_robust_list, sys_get_robust_list)
615#define __NR_splice 275
616__SYSCALL(__NR_splice, sys_splice)
617#define __NR_tee 276
618__SYSCALL(__NR_tee, sys_tee)
619#define __NR_sync_file_range 277
620__SYSCALL(__NR_sync_file_range, sys_sync_file_range)
621#define __NR_vmsplice 278
622__SYSCALL(__NR_vmsplice, sys_vmsplice)
623#define __NR_move_pages 279
624__SYSCALL(__NR_move_pages, sys_move_pages)
625#define __NR_utimensat 280
626__SYSCALL(__NR_utimensat, sys_utimensat)
627#define __NR_epoll_pwait 281
628__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait)
629#define __NR_signalfd 282
630__SYSCALL(__NR_signalfd, sys_signalfd)
631#define __NR_timerfd_create 283
632__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
633#define __NR_eventfd 284
634__SYSCALL(__NR_eventfd, sys_eventfd)
635#define __NR_fallocate 285
636__SYSCALL(__NR_fallocate, sys_fallocate)
637#define __NR_timerfd_settime 286
638__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
639#define __NR_timerfd_gettime 287
640__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
641#define __NR_accept4 288
642__SYSCALL(__NR_accept4, sys_accept4)
643#define __NR_signalfd4 289
644__SYSCALL(__NR_signalfd4, sys_signalfd4)
645#define __NR_eventfd2 290
646__SYSCALL(__NR_eventfd2, sys_eventfd2)
647#define __NR_epoll_create1 291
648__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
649#define __NR_dup3 292
650__SYSCALL(__NR_dup3, sys_dup3)
651#define __NR_pipe2 293
652__SYSCALL(__NR_pipe2, sys_pipe2)
653#define __NR_inotify_init1 294
654__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
655#define __NR_preadv 295
656__SYSCALL(__NR_preadv, sys_preadv)
657#define __NR_pwritev 296
658__SYSCALL(__NR_pwritev, sys_pwritev)
659#define __NR_rt_tgsigqueueinfo 297
660__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
661#define __NR_perf_event_open 298
662__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
663#define __NR_recvmmsg 299
664__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
665#define __NR_fanotify_init 300
666__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
667#define __NR_fanotify_mark 301
668__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
669#define __NR_prlimit64 302
670__SYSCALL(__NR_prlimit64, sys_prlimit64)
671#define __NR_name_to_handle_at 303
672__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
673#define __NR_open_by_handle_at 304
674__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
675#define __NR_clock_adjtime 305
676__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
677#define __NR_syncfs 306
678__SYSCALL(__NR_syncfs, sys_syncfs)
679#define __NR_sendmmsg 307
680__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
681#define __NR_setns 308
682__SYSCALL(__NR_setns, sys_setns)
683#define __NR_getcpu 309
684__SYSCALL(__NR_getcpu, sys_getcpu)
685#define __NR_process_vm_readv 310
686__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
687#define __NR_process_vm_writev 311
688__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
689
690#ifndef __NO_STUBS
691#define __ARCH_WANT_OLD_READDIR
692#define __ARCH_WANT_OLD_STAT
693#define __ARCH_WANT_SYS_ALARM
694#define __ARCH_WANT_SYS_GETHOSTNAME
695#define __ARCH_WANT_SYS_PAUSE
696#define __ARCH_WANT_SYS_SGETMASK
697#define __ARCH_WANT_SYS_SIGNAL
698#define __ARCH_WANT_SYS_UTIME
699#define __ARCH_WANT_SYS_WAITPID
700#define __ARCH_WANT_SYS_SOCKETCALL
701#define __ARCH_WANT_SYS_FADVISE64
702#define __ARCH_WANT_SYS_GETPGRP
703#define __ARCH_WANT_SYS_LLSEEK
704#define __ARCH_WANT_SYS_NICE
705#define __ARCH_WANT_SYS_OLD_GETRLIMIT
706#define __ARCH_WANT_SYS_OLD_UNAME
707#define __ARCH_WANT_SYS_OLDUMOUNT
708#define __ARCH_WANT_SYS_SIGPENDING
709#define __ARCH_WANT_SYS_SIGPROCMASK
710#define __ARCH_WANT_SYS_RT_SIGACTION
711#define __ARCH_WANT_SYS_RT_SIGSUSPEND
712#define __ARCH_WANT_SYS_TIME
713#define __ARCH_WANT_COMPAT_SYS_TIME
714#endif /* __NO_STUBS */
715
716#ifdef __KERNEL__
717
718#ifndef COMPILE_OFFSETS
719#include <asm/asm-offsets.h>
720#define NR_syscalls (__NR_syscall_max + 1)
721#endif
722
723/*
724 * "Conditional" syscalls
725 *
726 * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
727 * but it doesn't work on all toolchains, so we just do it by hand
728 */
729#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
730#endif /* __KERNEL__ */
731
732#endif /* _ASM_X86_UNISTD_64_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 8e862aaf0d90..becf47b81735 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -65,7 +65,7 @@
65 * UV2: Bit 19 selects between 65 * UV2: Bit 19 selects between
66 * (0): 10 microsecond timebase and 66 * (0): 10 microsecond timebase and
67 * (1): 80 microseconds 67 * (1): 80 microseconds
68 * we're using 655us, similar to UV1: 65 units of 10us 68 * we're using 560us, similar to UV1: 65 units of 10us
69 */ 69 */
70#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL) 70#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL)
71#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL) 71#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL)
@@ -167,6 +167,7 @@
167#define FLUSH_RETRY_TIMEOUT 2 167#define FLUSH_RETRY_TIMEOUT 2
168#define FLUSH_GIVEUP 3 168#define FLUSH_GIVEUP 3
169#define FLUSH_COMPLETE 4 169#define FLUSH_COMPLETE 4
170#define FLUSH_RETRY_BUSYBUG 5
170 171
171/* 172/*
172 * tuning the action when the numalink network is extremely delayed 173 * tuning the action when the numalink network is extremely delayed
@@ -235,10 +236,10 @@ struct bau_msg_payload {
235 236
236 237
237/* 238/*
238 * Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) 239 * UV1 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
239 * see table 4.2.3.0.1 in broacast_assist spec. 240 * see table 4.2.3.0.1 in broacast_assist spec.
240 */ 241 */
241struct bau_msg_header { 242struct uv1_bau_msg_header {
242 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 243 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
243 /* bits 5:0 */ 244 /* bits 5:0 */
244 unsigned int base_dest_nasid:15; /* nasid of the first bit */ 245 unsigned int base_dest_nasid:15; /* nasid of the first bit */
@@ -318,19 +319,87 @@ struct bau_msg_header {
318}; 319};
319 320
320/* 321/*
322 * UV2 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
323 * see figure 9-2 of harp_sys.pdf
324 */
325struct uv2_bau_msg_header {
326 unsigned int base_dest_nasid:15; /* nasid of the first bit */
327 /* bits 14:0 */ /* in uvhub map */
328 unsigned int dest_subnodeid:5; /* must be 0x10, for the LB */
329 /* bits 19:15 */
330 unsigned int rsvd_1:1; /* must be zero */
331 /* bit 20 */
332 /* Address bits 59:21 */
333 /* bits 25:2 of address (44:21) are payload */
334 /* these next 24 bits become bytes 12-14 of msg */
335 /* bits 28:21 land in byte 12 */
336 unsigned int replied_to:1; /* sent as 0 by the source to
337 byte 12 */
338 /* bit 21 */
339 unsigned int msg_type:3; /* software type of the
340 message */
341 /* bits 24:22 */
342 unsigned int canceled:1; /* message canceled, resource
343 is to be freed*/
344 /* bit 25 */
345 unsigned int payload_1:3; /* not currently used */
346 /* bits 28:26 */
347
348 /* bits 36:29 land in byte 13 */
349 unsigned int payload_2a:3; /* not currently used */
350 unsigned int payload_2b:5; /* not currently used */
351 /* bits 36:29 */
352
353 /* bits 44:37 land in byte 14 */
354 unsigned int payload_3:8; /* not currently used */
355 /* bits 44:37 */
356
357 unsigned int rsvd_2:7; /* reserved */
358 /* bits 51:45 */
359 unsigned int swack_flag:1; /* software acknowledge flag */
360 /* bit 52 */
361 unsigned int rsvd_3a:3; /* must be zero */
362 unsigned int rsvd_3b:8; /* must be zero */
363 unsigned int rsvd_3c:8; /* must be zero */
364 unsigned int rsvd_3d:3; /* must be zero */
365 /* bits 74:53 */
366 unsigned int fairness:3; /* usually zero */
367 /* bits 77:75 */
368
369 unsigned int sequence:16; /* message sequence number */
370 /* bits 93:78 Suppl_A */
371 unsigned int chaining:1; /* next descriptor is part of
372 this activation*/
373 /* bit 94 */
374 unsigned int multilevel:1; /* multi-level multicast
375 format */
376 /* bit 95 */
377 unsigned int rsvd_4:24; /* ordered / source node /
378 source subnode / aging
379 must be zero */
380 /* bits 119:96 */
381 unsigned int command:8; /* message type */
382 /* bits 127:120 */
383};
384
385/*
321 * The activation descriptor: 386 * The activation descriptor:
322 * The format of the message to send, plus all accompanying control 387 * The format of the message to send, plus all accompanying control
323 * Should be 64 bytes 388 * Should be 64 bytes
324 */ 389 */
325struct bau_desc { 390struct bau_desc {
326 struct pnmask distribution; 391 struct pnmask distribution;
327 /* 392 /*
328 * message template, consisting of header and payload: 393 * message template, consisting of header and payload:
329 */ 394 */
330 struct bau_msg_header header; 395 union bau_msg_header {
331 struct bau_msg_payload payload; 396 struct uv1_bau_msg_header uv1_hdr;
397 struct uv2_bau_msg_header uv2_hdr;
398 } header;
399
400 struct bau_msg_payload payload;
332}; 401};
333/* 402/* UV1:
334 * -payload-- ---------header------ 403 * -payload-- ---------header------
335 * bytes 0-11 bits 41-56 bits 58-81 404 * bytes 0-11 bits 41-56 bits 58-81
336 * A B (2) C (3) 405 * A B (2) C (3)
@@ -340,6 +409,16 @@ struct bau_desc {
340 * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector) 409 * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
341 * ------------payload queue----------- 410 * ------------payload queue-----------
342 */ 411 */
412/* UV2:
413 * -payload-- ---------header------
414 * bytes 0-11 bits 70-78 bits 21-44
415 * A B (2) C (3)
416 *
417 * A/B/C are moved to:
418 * A C B
419 * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
420 * ------------payload queue-----------
421 */
343 422
344/* 423/*
345 * The payload queue on the destination side is an array of these. 424 * The payload queue on the destination side is an array of these.
@@ -385,7 +464,6 @@ struct bau_pq_entry {
385struct msg_desc { 464struct msg_desc {
386 struct bau_pq_entry *msg; 465 struct bau_pq_entry *msg;
387 int msg_slot; 466 int msg_slot;
388 int swack_slot;
389 struct bau_pq_entry *queue_first; 467 struct bau_pq_entry *queue_first;
390 struct bau_pq_entry *queue_last; 468 struct bau_pq_entry *queue_last;
391}; 469};
@@ -405,6 +483,7 @@ struct ptc_stats {
405 requests */ 483 requests */
406 unsigned long s_stimeout; /* source side timeouts */ 484 unsigned long s_stimeout; /* source side timeouts */
407 unsigned long s_dtimeout; /* destination side timeouts */ 485 unsigned long s_dtimeout; /* destination side timeouts */
486 unsigned long s_strongnacks; /* number of strong nack's */
408 unsigned long s_time; /* time spent in sending side */ 487 unsigned long s_time; /* time spent in sending side */
409 unsigned long s_retriesok; /* successful retries */ 488 unsigned long s_retriesok; /* successful retries */
410 unsigned long s_ntargcpu; /* total number of cpu's 489 unsigned long s_ntargcpu; /* total number of cpu's
@@ -439,6 +518,9 @@ struct ptc_stats {
439 unsigned long s_retry_messages; /* retry broadcasts */ 518 unsigned long s_retry_messages; /* retry broadcasts */
440 unsigned long s_bau_reenabled; /* for bau enable/disable */ 519 unsigned long s_bau_reenabled; /* for bau enable/disable */
441 unsigned long s_bau_disabled; /* for bau enable/disable */ 520 unsigned long s_bau_disabled; /* for bau enable/disable */
521 unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
522 unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
523 unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
442 /* destination statistics */ 524 /* destination statistics */
443 unsigned long d_alltlb; /* times all tlb's on this 525 unsigned long d_alltlb; /* times all tlb's on this
444 cpu were flushed */ 526 cpu were flushed */
@@ -511,9 +593,12 @@ struct bau_control {
511 short osnode; 593 short osnode;
512 short uvhub_cpu; 594 short uvhub_cpu;
513 short uvhub; 595 short uvhub;
596 short uvhub_version;
514 short cpus_in_socket; 597 short cpus_in_socket;
515 short cpus_in_uvhub; 598 short cpus_in_uvhub;
516 short partition_base_pnode; 599 short partition_base_pnode;
600 short using_desc; /* an index, like uvhub_cpu */
601 unsigned int inuse_map;
517 unsigned short message_number; 602 unsigned short message_number;
518 unsigned short uvhub_quiesce; 603 unsigned short uvhub_quiesce;
519 short socket_acknowledge_count[DEST_Q_SIZE]; 604 short socket_acknowledge_count[DEST_Q_SIZE];
@@ -531,6 +616,7 @@ struct bau_control {
531 int cong_response_us; 616 int cong_response_us;
532 int cong_reps; 617 int cong_reps;
533 int cong_period; 618 int cong_period;
619 unsigned long clocks_per_100_usec;
534 cycles_t period_time; 620 cycles_t period_time;
535 long period_requests; 621 long period_requests;
536 struct hub_and_pnode *thp; 622 struct hub_and_pnode *thp;
@@ -591,6 +677,11 @@ static inline void write_mmr_sw_ack(unsigned long mr)
591 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); 677 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
592} 678}
593 679
680static inline void write_gmmr_sw_ack(int pnode, unsigned long mr)
681{
682 write_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
683}
684
594static inline unsigned long read_mmr_sw_ack(void) 685static inline unsigned long read_mmr_sw_ack(void)
595{ 686{
596 return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); 687 return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 54a13aaebc40..21f7385badb8 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -318,13 +318,13 @@ uv_gpa_in_mmr_space(unsigned long gpa)
318/* UV global physical address --> socket phys RAM */ 318/* UV global physical address --> socket phys RAM */
319static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa) 319static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa)
320{ 320{
321 unsigned long paddr = gpa & uv_hub_info->gpa_mask; 321 unsigned long paddr;
322 unsigned long remap_base = uv_hub_info->lowmem_remap_base; 322 unsigned long remap_base = uv_hub_info->lowmem_remap_base;
323 unsigned long remap_top = uv_hub_info->lowmem_remap_top; 323 unsigned long remap_top = uv_hub_info->lowmem_remap_top;
324 324
325 gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | 325 gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
326 ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val); 326 ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val);
327 gpa = gpa & uv_hub_info->gpa_mask; 327 paddr = gpa & uv_hub_info->gpa_mask;
328 if (paddr >= remap_base && paddr < remap_base + remap_top) 328 if (paddr >= remap_base && paddr < remap_base + remap_top)
329 paddr -= remap_base; 329 paddr -= remap_base;
330 return paddr; 330 return paddr;
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 815285bcaceb..8b38be2de9e1 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -5,13 +5,8 @@
5#include <linux/clocksource.h> 5#include <linux/clocksource.h>
6 6
7struct vsyscall_gtod_data { 7struct vsyscall_gtod_data {
8 seqlock_t lock; 8 seqcount_t seq;
9 9
10 /* open coded 'struct timespec' */
11 time_t wall_time_sec;
12 u32 wall_time_nsec;
13
14 struct timezone sys_tz;
15 struct { /* extract of a clocksource struct */ 10 struct { /* extract of a clocksource struct */
16 int vclock_mode; 11 int vclock_mode;
17 cycle_t cycle_last; 12 cycle_t cycle_last;
@@ -19,8 +14,16 @@ struct vsyscall_gtod_data {
19 u32 mult; 14 u32 mult;
20 u32 shift; 15 u32 shift;
21 } clock; 16 } clock;
22 struct timespec wall_to_monotonic; 17
18 /* open coded 'struct timespec' */
19 time_t wall_time_sec;
20 u32 wall_time_nsec;
21 u32 monotonic_time_nsec;
22 time_t monotonic_time_sec;
23
24 struct timezone sys_tz;
23 struct timespec wall_time_coarse; 25 struct timespec wall_time_coarse;
26 struct timespec monotonic_time_coarse;
24}; 27};
25extern struct vsyscall_gtod_data vsyscall_gtod_data; 28extern struct vsyscall_gtod_data vsyscall_gtod_data;
26 29
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index e0f9aa16358b..5da71c27cc59 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -16,7 +16,6 @@
16#define _ASM_X86_VIRTEX_H 16#define _ASM_X86_VIRTEX_H
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <asm/system.h>
20 19
21#include <asm/vmx.h> 20#include <asm/vmx.h>
22#include <asm/svm.h> 21#include <asm/svm.h>
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..6fe6767b7124
--- /dev/null
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -0,0 +1,46 @@
1#ifndef _ASM_WORD_AT_A_TIME_H
2#define _ASM_WORD_AT_A_TIME_H
3
4/*
5 * This is largely generic for little-endian machines, but the
6 * optimal byte mask counting is probably going to be something
7 * that is architecture-specific. If you have a reliably fast
8 * bit count instruction, that might be better than the multiply
9 * and shift, for example.
10 */
11
12#ifdef CONFIG_64BIT
13
14/*
15 * Jan Achrenius on G+: microoptimized version of
16 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
17 * that works for the bytemasks without having to
18 * mask them first.
19 */
20static inline long count_masked_bytes(unsigned long mask)
21{
22 return mask*0x0001020304050608ul >> 56;
23}
24
25#else /* 32-bit case */
26
27/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
28static inline long count_masked_bytes(long mask)
29{
30 /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
31 long a = (0x0ff0001+mask) >> 23;
32 /* Fix the 1 for 00 case */
33 return a & mask;
34}
35
36#endif
37
38#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
39
40/* Return the high bit set in the first byte that is a zero */
41static inline unsigned long has_zero(unsigned long a)
42{
43 return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
44}
45
46#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
index 6bf5b8e478c0..92e54abf89e0 100644
--- a/arch/x86/include/asm/x2apic.h
+++ b/arch/x86/include/asm/x2apic.h
@@ -18,6 +18,11 @@ static const struct cpumask *x2apic_target_cpus(void)
18 return cpu_online_mask; 18 return cpu_online_mask;
19} 19}
20 20
21static int x2apic_apic_id_valid(int apicid)
22{
23 return 1;
24}
25
21static int x2apic_apic_id_registered(void) 26static int x2apic_apic_id_registered(void)
22{ 27{
23 return 1; 28 return 1;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 517d4767ffdd..baaca8defec8 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -145,9 +145,11 @@ struct x86_init_ops {
145/** 145/**
146 * struct x86_cpuinit_ops - platform specific cpu hotplug setups 146 * struct x86_cpuinit_ops - platform specific cpu hotplug setups
147 * @setup_percpu_clockev: set up the per cpu clock event device 147 * @setup_percpu_clockev: set up the per cpu clock event device
148 * @early_percpu_clock_init: early init of the per cpu clock event device
148 */ 149 */
149struct x86_cpuinit_ops { 150struct x86_cpuinit_ops {
150 void (*setup_percpu_clockev)(void); 151 void (*setup_percpu_clockev)(void);
152 void (*early_percpu_clock_init)(void);
151 void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); 153 void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
152}; 154};
153 155
@@ -160,6 +162,8 @@ struct x86_cpuinit_ops {
160 * @is_untracked_pat_range exclude from PAT logic 162 * @is_untracked_pat_range exclude from PAT logic
161 * @nmi_init enable NMI on cpus 163 * @nmi_init enable NMI on cpus
162 * @i8042_detect pre-detect if i8042 controller exists 164 * @i8042_detect pre-detect if i8042 controller exists
165 * @save_sched_clock_state: save state for sched_clock() on suspend
166 * @restore_sched_clock_state: restore state for sched_clock() on resume
163 */ 167 */
164struct x86_platform_ops { 168struct x86_platform_ops {
165 unsigned long (*calibrate_tsc)(void); 169 unsigned long (*calibrate_tsc)(void);
@@ -171,6 +175,8 @@ struct x86_platform_ops {
171 void (*nmi_init)(void); 175 void (*nmi_init)(void);
172 unsigned char (*get_nmi_reason)(void); 176 unsigned char (*get_nmi_reason)(void);
173 int (*i8042_detect)(void); 177 int (*i8042_detect)(void);
178 void (*save_sched_clock_state)(void);
179 void (*restore_sched_clock_state)(void);
174}; 180};
175 181
176struct pci_dev; 182struct pci_dev;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8baca3c4871c..532d2e090e6f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -25,7 +25,8 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
25obj-y += probe_roms.o 25obj-y += probe_roms.o
26obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 26obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
27obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 27obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
28obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 28obj-y += syscall_$(BITS).o
29obj-$(CONFIG_X86_64) += vsyscall_64.o
29obj-$(CONFIG_X86_64) += vsyscall_emu_64.o 30obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
30obj-y += bootflag.o e820.o 31obj-y += bootflag.o e820.o
31obj-y += pci-dma.o quirks.o topology.o kdebugfs.o 32obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
@@ -68,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
68obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
69obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
70obj-$(CONFIG_KPROBES) += kprobes.o 71obj-$(CONFIG_KPROBES) += kprobes.o
72obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
71obj-$(CONFIG_MODULES) += module.o 73obj-$(CONFIG_MODULES) += module.o
72obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o 74obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
73obj-$(CONFIG_KGDB) += kgdb.o 75obj-$(CONFIG_KGDB) += kgdb.o
@@ -80,6 +82,7 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o
80obj-$(CONFIG_AMD_NB) += amd_nb.o 82obj-$(CONFIG_AMD_NB) += amd_nb.o
81obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o 83obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
82obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o 84obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
85obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
83 86
84obj-$(CONFIG_KVM_GUEST) += kvm.o 87obj-$(CONFIG_KVM_GUEST) += kvm.o
85obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 88obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ce664f33ea8e..a415b1f44365 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -239,7 +239,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
239 * to not preallocating memory for all NR_CPUS 239 * to not preallocating memory for all NR_CPUS
240 * when we use CPU hotplug. 240 * when we use CPU hotplug.
241 */ 241 */
242 if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled) 242 if (!apic->apic_id_valid(apic_id) && enabled)
243 printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); 243 printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
244 else 244 else
245 acpi_register_lapic(apic_id, enabled); 245 acpi_register_lapic(apic_id, enabled);
@@ -593,7 +593,7 @@ void __init acpi_set_irq_model_ioapic(void)
593#ifdef CONFIG_ACPI_HOTPLUG_CPU 593#ifdef CONFIG_ACPI_HOTPLUG_CPU
594#include <acpi/processor.h> 594#include <acpi/processor.h>
595 595
596static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) 596static void __cpuinitdata acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
597{ 597{
598#ifdef CONFIG_ACPI_NUMA 598#ifdef CONFIG_ACPI_NUMA
599 int nid; 599 int nid;
@@ -642,6 +642,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
642 kfree(buffer.pointer); 642 kfree(buffer.pointer);
643 buffer.length = ACPI_ALLOCATE_BUFFER; 643 buffer.length = ACPI_ALLOCATE_BUFFER;
644 buffer.pointer = NULL; 644 buffer.pointer = NULL;
645 lapic = NULL;
645 646
646 if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL)) 647 if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
647 goto out; 648 goto out;
@@ -650,7 +651,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
650 goto free_tmp_map; 651 goto free_tmp_map;
651 652
652 cpumask_copy(tmp_map, cpu_present_mask); 653 cpumask_copy(tmp_map, cpu_present_mask);
653 acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); 654 acpi_register_lapic(physid, ACPI_MADT_ENABLED);
654 655
655 /* 656 /*
656 * If mp_register_lapic successfully generates a new logical cpu 657 * If mp_register_lapic successfully generates a new logical cpu
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index f50e7fb2a201..d2b7f27781bc 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -14,6 +14,7 @@
14#include <acpi/processor.h> 14#include <acpi/processor.h>
15#include <asm/acpi.h> 15#include <asm/acpi.h>
16#include <asm/mwait.h> 16#include <asm/mwait.h>
17#include <asm/special_insns.h>
17 18
18/* 19/*
19 * Initialize bm_flags based on the CPU cache properties 20 * Initialize bm_flags based on the CPU cache properties
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b1e7c7f7a0af..e66311200cbd 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -477,7 +477,7 @@ error:
477/* allocate and map a coherent mapping */ 477/* allocate and map a coherent mapping */
478static void * 478static void *
479gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, 479gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
480 gfp_t flag) 480 gfp_t flag, struct dma_attrs *attrs)
481{ 481{
482 dma_addr_t paddr; 482 dma_addr_t paddr;
483 unsigned long align_mask; 483 unsigned long align_mask;
@@ -500,7 +500,8 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
500 } 500 }
501 __free_pages(page, get_order(size)); 501 __free_pages(page, get_order(size));
502 } else 502 } else
503 return dma_generic_alloc_coherent(dev, size, dma_addr, flag); 503 return dma_generic_alloc_coherent(dev, size, dma_addr, flag,
504 attrs);
504 505
505 return NULL; 506 return NULL;
506} 507}
@@ -508,7 +509,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
508/* free a coherent mapping */ 509/* free a coherent mapping */
509static void 510static void
510gart_free_coherent(struct device *dev, size_t size, void *vaddr, 511gart_free_coherent(struct device *dev, size_t size, void *vaddr,
511 dma_addr_t dma_addr) 512 dma_addr_t dma_addr, struct dma_attrs *attrs)
512{ 513{
513 gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL); 514 gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
514 free_pages((unsigned long)vaddr, get_order(size)); 515 free_pages((unsigned long)vaddr, get_order(size));
@@ -700,8 +701,8 @@ static struct dma_map_ops gart_dma_ops = {
700 .unmap_sg = gart_unmap_sg, 701 .unmap_sg = gart_unmap_sg,
701 .map_page = gart_map_page, 702 .map_page = gart_map_page,
702 .unmap_page = gart_unmap_page, 703 .unmap_page = gart_unmap_page,
703 .alloc_coherent = gart_alloc_coherent, 704 .alloc = gart_alloc_coherent,
704 .free_coherent = gart_free_coherent, 705 .free = gart_free_coherent,
705 .mapping_error = gart_mapping_error, 706 .mapping_error = gart_mapping_error,
706}; 707};
707 708
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2eec05b6d1b8..11544d8f1e97 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -383,20 +383,25 @@ static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
383 383
384static unsigned int reserve_eilvt_offset(int offset, unsigned int new) 384static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
385{ 385{
386 unsigned int rsvd; /* 0: uninitialized */ 386 unsigned int rsvd, vector;
387 387
388 if (offset >= APIC_EILVT_NR_MAX) 388 if (offset >= APIC_EILVT_NR_MAX)
389 return ~0; 389 return ~0;
390 390
391 rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED; 391 rsvd = atomic_read(&eilvt_offsets[offset]);
392 do { 392 do {
393 if (rsvd && 393 vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */
394 !eilvt_entry_is_changeable(rsvd, new)) 394 if (vector && !eilvt_entry_is_changeable(vector, new))
395 /* may not change if vectors are different */ 395 /* may not change if vectors are different */
396 return rsvd; 396 return rsvd;
397 rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); 397 rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
398 } while (rsvd != new); 398 } while (rsvd != new);
399 399
400 rsvd &= ~APIC_EILVT_MASKED;
401 if (rsvd && rsvd != vector)
402 pr_info("LVT offset %d assigned for vector 0x%02x\n",
403 offset, rsvd);
404
400 return new; 405 return new;
401} 406}
402 407
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 8c3cdded6f2b..359b6899a36c 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -180,6 +180,7 @@ static struct apic apic_flat = {
180 .name = "flat", 180 .name = "flat",
181 .probe = flat_probe, 181 .probe = flat_probe,
182 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 182 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
183 .apic_id_valid = default_apic_id_valid,
183 .apic_id_registered = flat_apic_id_registered, 184 .apic_id_registered = flat_apic_id_registered,
184 185
185 .irq_delivery_mode = dest_LowestPrio, 186 .irq_delivery_mode = dest_LowestPrio,
@@ -337,6 +338,7 @@ static struct apic apic_physflat = {
337 .name = "physical flat", 338 .name = "physical flat",
338 .probe = physflat_probe, 339 .probe = physflat_probe,
339 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 340 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
341 .apic_id_valid = default_apic_id_valid,
340 .apic_id_registered = flat_apic_id_registered, 342 .apic_id_registered = flat_apic_id_registered,
341 343
342 .irq_delivery_mode = dest_Fixed, 344 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 775b82bc655c..634ae6cdd5c9 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -124,6 +124,7 @@ struct apic apic_noop = {
124 .probe = noop_probe, 124 .probe = noop_probe,
125 .acpi_madt_oem_check = NULL, 125 .acpi_madt_oem_check = NULL,
126 126
127 .apic_id_valid = default_apic_id_valid,
127 .apic_id_registered = noop_apic_id_registered, 128 .apic_id_registered = noop_apic_id_registered,
128 129
129 .irq_delivery_mode = dest_LowestPrio, 130 .irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 09d3d8c1cd99..899803e03214 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -56,6 +56,12 @@ static unsigned int read_xapic_id(void)
56 return get_apic_id(apic_read(APIC_ID)); 56 return get_apic_id(apic_read(APIC_ID));
57} 57}
58 58
59static int numachip_apic_id_valid(int apicid)
60{
61 /* Trust what bootloader passes in MADT */
62 return 1;
63}
64
59static int numachip_apic_id_registered(void) 65static int numachip_apic_id_registered(void)
60{ 66{
61 return physid_isset(read_xapic_id(), phys_cpu_present_map); 67 return physid_isset(read_xapic_id(), phys_cpu_present_map);
@@ -238,6 +244,7 @@ static struct apic apic_numachip __refconst = {
238 .name = "NumaConnect system", 244 .name = "NumaConnect system",
239 .probe = numachip_probe, 245 .probe = numachip_probe,
240 .acpi_madt_oem_check = numachip_acpi_madt_oem_check, 246 .acpi_madt_oem_check = numachip_acpi_madt_oem_check,
247 .apic_id_valid = numachip_apic_id_valid,
241 .apic_id_registered = numachip_apic_id_registered, 248 .apic_id_registered = numachip_apic_id_registered,
242 249
243 .irq_delivery_mode = dest_Fixed, 250 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 521bead01137..0cdec7065aff 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -198,6 +198,7 @@ static struct apic apic_bigsmp = {
198 .name = "bigsmp", 198 .name = "bigsmp",
199 .probe = probe_bigsmp, 199 .probe = probe_bigsmp,
200 .acpi_madt_oem_check = NULL, 200 .acpi_madt_oem_check = NULL,
201 .apic_id_valid = default_apic_id_valid,
201 .apic_id_registered = bigsmp_apic_id_registered, 202 .apic_id_registered = bigsmp_apic_id_registered,
202 203
203 .irq_delivery_mode = dest_Fixed, 204 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 5d513bc47b6b..e42d1d3b9134 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -625,6 +625,7 @@ static struct apic __refdata apic_es7000_cluster = {
625 .name = "es7000", 625 .name = "es7000",
626 .probe = probe_es7000, 626 .probe = probe_es7000,
627 .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster, 627 .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster,
628 .apic_id_valid = default_apic_id_valid,
628 .apic_id_registered = es7000_apic_id_registered, 629 .apic_id_registered = es7000_apic_id_registered,
629 630
630 .irq_delivery_mode = dest_LowestPrio, 631 .irq_delivery_mode = dest_LowestPrio,
@@ -690,6 +691,7 @@ static struct apic __refdata apic_es7000 = {
690 .name = "es7000", 691 .name = "es7000",
691 .probe = probe_es7000, 692 .probe = probe_es7000,
692 .acpi_madt_oem_check = es7000_acpi_madt_oem_check, 693 .acpi_madt_oem_check = es7000_acpi_madt_oem_check,
694 .apic_id_valid = default_apic_id_valid,
693 .apic_id_registered = es7000_apic_id_registered, 695 .apic_id_registered = es7000_apic_id_registered,
694 696
695 .irq_delivery_mode = dest_Fixed, 697 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fb072754bc1d..e88300d8e80a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -64,9 +64,28 @@
64#include <asm/apic.h> 64#include <asm/apic.h>
65 65
66#define __apicdebuginit(type) static type __init 66#define __apicdebuginit(type) static type __init
67
67#define for_each_irq_pin(entry, head) \ 68#define for_each_irq_pin(entry, head) \
68 for (entry = head; entry; entry = entry->next) 69 for (entry = head; entry; entry = entry->next)
69 70
71static void __init __ioapic_init_mappings(void);
72
73static unsigned int __io_apic_read (unsigned int apic, unsigned int reg);
74static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val);
75static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
76
77static struct io_apic_ops io_apic_ops = {
78 .init = __ioapic_init_mappings,
79 .read = __io_apic_read,
80 .write = __io_apic_write,
81 .modify = __io_apic_modify,
82};
83
84void __init set_io_apic_ops(const struct io_apic_ops *ops)
85{
86 io_apic_ops = *ops;
87}
88
70/* 89/*
71 * Is the SiS APIC rmw bug present ? 90 * Is the SiS APIC rmw bug present ?
72 * -1 = don't know, 0 = no, 1 = yes 91 * -1 = don't know, 0 = no, 1 = yes
@@ -294,6 +313,22 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
294 irq_free_desc(at); 313 irq_free_desc(at);
295} 314}
296 315
316static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
317{
318 return io_apic_ops.read(apic, reg);
319}
320
321static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
322{
323 io_apic_ops.write(apic, reg, value);
324}
325
326static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
327{
328 io_apic_ops.modify(apic, reg, value);
329}
330
331
297struct io_apic { 332struct io_apic {
298 unsigned int index; 333 unsigned int index;
299 unsigned int unused[3]; 334 unsigned int unused[3];
@@ -314,16 +349,17 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
314 writel(vector, &io_apic->eoi); 349 writel(vector, &io_apic->eoi);
315} 350}
316 351
317static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 352static unsigned int __io_apic_read(unsigned int apic, unsigned int reg)
318{ 353{
319 struct io_apic __iomem *io_apic = io_apic_base(apic); 354 struct io_apic __iomem *io_apic = io_apic_base(apic);
320 writel(reg, &io_apic->index); 355 writel(reg, &io_apic->index);
321 return readl(&io_apic->data); 356 return readl(&io_apic->data);
322} 357}
323 358
324static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) 359static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
325{ 360{
326 struct io_apic __iomem *io_apic = io_apic_base(apic); 361 struct io_apic __iomem *io_apic = io_apic_base(apic);
362
327 writel(reg, &io_apic->index); 363 writel(reg, &io_apic->index);
328 writel(value, &io_apic->data); 364 writel(value, &io_apic->data);
329} 365}
@@ -334,7 +370,7 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
334 * 370 *
335 * Older SiS APIC requires we rewrite the index register 371 * Older SiS APIC requires we rewrite the index register
336 */ 372 */
337static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) 373static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
338{ 374{
339 struct io_apic __iomem *io_apic = io_apic_base(apic); 375 struct io_apic __iomem *io_apic = io_apic_base(apic);
340 376
@@ -377,6 +413,7 @@ static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
377 413
378 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); 414 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
379 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); 415 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
416
380 return eu.entry; 417 return eu.entry;
381} 418}
382 419
@@ -384,9 +421,11 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
384{ 421{
385 union entry_union eu; 422 union entry_union eu;
386 unsigned long flags; 423 unsigned long flags;
424
387 raw_spin_lock_irqsave(&ioapic_lock, flags); 425 raw_spin_lock_irqsave(&ioapic_lock, flags);
388 eu.entry = __ioapic_read_entry(apic, pin); 426 eu.entry = __ioapic_read_entry(apic, pin);
389 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 427 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
428
390 return eu.entry; 429 return eu.entry;
391} 430}
392 431
@@ -396,8 +435,7 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
396 * the interrupt, and we need to make sure the entry is fully populated 435 * the interrupt, and we need to make sure the entry is fully populated
397 * before that happens. 436 * before that happens.
398 */ 437 */
399static void 438static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
400__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
401{ 439{
402 union entry_union eu = {{0, 0}}; 440 union entry_union eu = {{0, 0}};
403 441
@@ -409,6 +447,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
409static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 447static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
410{ 448{
411 unsigned long flags; 449 unsigned long flags;
450
412 raw_spin_lock_irqsave(&ioapic_lock, flags); 451 raw_spin_lock_irqsave(&ioapic_lock, flags);
413 __ioapic_write_entry(apic, pin, e); 452 __ioapic_write_entry(apic, pin, e);
414 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 453 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -435,8 +474,7 @@ static void ioapic_mask_entry(int apic, int pin)
435 * shared ISA-space IRQs, so we have to support them. We are super 474 * shared ISA-space IRQs, so we have to support them. We are super
436 * fast in the common case, and fast for shared ISA-space IRQs. 475 * fast in the common case, and fast for shared ISA-space IRQs.
437 */ 476 */
438static int 477static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
439__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
440{ 478{
441 struct irq_pin_list **last, *entry; 479 struct irq_pin_list **last, *entry;
442 480
@@ -521,6 +559,7 @@ static void io_apic_sync(struct irq_pin_list *entry)
521 * a dummy read from the IO-APIC 559 * a dummy read from the IO-APIC
522 */ 560 */
523 struct io_apic __iomem *io_apic; 561 struct io_apic __iomem *io_apic;
562
524 io_apic = io_apic_base(entry->apic); 563 io_apic = io_apic_base(entry->apic);
525 readl(&io_apic->data); 564 readl(&io_apic->data);
526} 565}
@@ -2512,21 +2551,73 @@ static void ack_apic_edge(struct irq_data *data)
2512 2551
2513atomic_t irq_mis_count; 2552atomic_t irq_mis_count;
2514 2553
2515static void ack_apic_level(struct irq_data *data)
2516{
2517 struct irq_cfg *cfg = data->chip_data;
2518 int i, do_unmask_irq = 0, irq = data->irq;
2519 unsigned long v;
2520
2521 irq_complete_move(cfg);
2522#ifdef CONFIG_GENERIC_PENDING_IRQ 2554#ifdef CONFIG_GENERIC_PENDING_IRQ
2555static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
2556{
2523 /* If we are moving the irq we need to mask it */ 2557 /* If we are moving the irq we need to mask it */
2524 if (unlikely(irqd_is_setaffinity_pending(data))) { 2558 if (unlikely(irqd_is_setaffinity_pending(data))) {
2525 do_unmask_irq = 1;
2526 mask_ioapic(cfg); 2559 mask_ioapic(cfg);
2560 return true;
2527 } 2561 }
2562 return false;
2563}
2564
2565static inline void ioapic_irqd_unmask(struct irq_data *data,
2566 struct irq_cfg *cfg, bool masked)
2567{
2568 if (unlikely(masked)) {
2569 /* Only migrate the irq if the ack has been received.
2570 *
2571 * On rare occasions the broadcast level triggered ack gets
2572 * delayed going to ioapics, and if we reprogram the
2573 * vector while Remote IRR is still set the irq will never
2574 * fire again.
2575 *
2576 * To prevent this scenario we read the Remote IRR bit
2577 * of the ioapic. This has two effects.
2578 * - On any sane system the read of the ioapic will
2579 * flush writes (and acks) going to the ioapic from
2580 * this cpu.
2581 * - We get to see if the ACK has actually been delivered.
2582 *
2583 * Based on failed experiments of reprogramming the
2584 * ioapic entry from outside of irq context starting
2585 * with masking the ioapic entry and then polling until
2586 * Remote IRR was clear before reprogramming the
2587 * ioapic I don't trust the Remote IRR bit to be
2588 * completey accurate.
2589 *
2590 * However there appears to be no other way to plug
2591 * this race, so if the Remote IRR bit is not
2592 * accurate and is causing problems then it is a hardware bug
2593 * and you can go talk to the chipset vendor about it.
2594 */
2595 if (!io_apic_level_ack_pending(cfg))
2596 irq_move_masked_irq(data);
2597 unmask_ioapic(cfg);
2598 }
2599}
2600#else
2601static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
2602{
2603 return false;
2604}
2605static inline void ioapic_irqd_unmask(struct irq_data *data,
2606 struct irq_cfg *cfg, bool masked)
2607{
2608}
2528#endif 2609#endif
2529 2610
2611static void ack_apic_level(struct irq_data *data)
2612{
2613 struct irq_cfg *cfg = data->chip_data;
2614 int i, irq = data->irq;
2615 unsigned long v;
2616 bool masked;
2617
2618 irq_complete_move(cfg);
2619 masked = ioapic_irqd_mask(data, cfg);
2620
2530 /* 2621 /*
2531 * It appears there is an erratum which affects at least version 0x11 2622 * It appears there is an erratum which affects at least version 0x11
2532 * of I/O APIC (that's the 82093AA and cores integrated into various 2623 * of I/O APIC (that's the 82093AA and cores integrated into various
@@ -2581,38 +2672,7 @@ static void ack_apic_level(struct irq_data *data)
2581 eoi_ioapic_irq(irq, cfg); 2672 eoi_ioapic_irq(irq, cfg);
2582 } 2673 }
2583 2674
2584 /* Now we can move and renable the irq */ 2675 ioapic_irqd_unmask(data, cfg, masked);
2585 if (unlikely(do_unmask_irq)) {
2586 /* Only migrate the irq if the ack has been received.
2587 *
2588 * On rare occasions the broadcast level triggered ack gets
2589 * delayed going to ioapics, and if we reprogram the
2590 * vector while Remote IRR is still set the irq will never
2591 * fire again.
2592 *
2593 * To prevent this scenario we read the Remote IRR bit
2594 * of the ioapic. This has two effects.
2595 * - On any sane system the read of the ioapic will
2596 * flush writes (and acks) going to the ioapic from
2597 * this cpu.
2598 * - We get to see if the ACK has actually been delivered.
2599 *
2600 * Based on failed experiments of reprogramming the
2601 * ioapic entry from outside of irq context starting
2602 * with masking the ioapic entry and then polling until
2603 * Remote IRR was clear before reprogramming the
2604 * ioapic I don't trust the Remote IRR bit to be
2605 * completey accurate.
2606 *
2607 * However there appears to be no other way to plug
2608 * this race, so if the Remote IRR bit is not
2609 * accurate and is causing problems then it is a hardware bug
2610 * and you can go talk to the chipset vendor about it.
2611 */
2612 if (!io_apic_level_ack_pending(cfg))
2613 irq_move_masked_irq(data);
2614 unmask_ioapic(cfg);
2615 }
2616} 2676}
2617 2677
2618#ifdef CONFIG_IRQ_REMAP 2678#ifdef CONFIG_IRQ_REMAP
@@ -3873,6 +3933,11 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3873 3933
3874void __init ioapic_and_gsi_init(void) 3934void __init ioapic_and_gsi_init(void)
3875{ 3935{
3936 io_apic_ops.init();
3937}
3938
3939static void __init __ioapic_init_mappings(void)
3940{
3876 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 3941 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3877 struct resource *ioapic_res; 3942 struct resource *ioapic_res;
3878 int i; 3943 int i;
@@ -3967,18 +4032,36 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
3967static __init int bad_ioapic(unsigned long address) 4032static __init int bad_ioapic(unsigned long address)
3968{ 4033{
3969 if (nr_ioapics >= MAX_IO_APICS) { 4034 if (nr_ioapics >= MAX_IO_APICS) {
3970 printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded " 4035 pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
3971 "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); 4036 MAX_IO_APICS, nr_ioapics);
3972 return 1; 4037 return 1;
3973 } 4038 }
3974 if (!address) { 4039 if (!address) {
3975 printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address" 4040 pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
3976 " found in table, skipping!\n");
3977 return 1; 4041 return 1;
3978 } 4042 }
3979 return 0; 4043 return 0;
3980} 4044}
3981 4045
4046static __init int bad_ioapic_register(int idx)
4047{
4048 union IO_APIC_reg_00 reg_00;
4049 union IO_APIC_reg_01 reg_01;
4050 union IO_APIC_reg_02 reg_02;
4051
4052 reg_00.raw = io_apic_read(idx, 0);
4053 reg_01.raw = io_apic_read(idx, 1);
4054 reg_02.raw = io_apic_read(idx, 2);
4055
4056 if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) {
4057 pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n",
4058 mpc_ioapic_addr(idx));
4059 return 1;
4060 }
4061
4062 return 0;
4063}
4064
3982void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) 4065void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
3983{ 4066{
3984 int idx = 0; 4067 int idx = 0;
@@ -3995,6 +4078,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
3995 ioapics[idx].mp_config.apicaddr = address; 4078 ioapics[idx].mp_config.apicaddr = address;
3996 4079
3997 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 4080 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
4081
4082 if (bad_ioapic_register(idx)) {
4083 clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
4084 return;
4085 }
4086
3998 ioapics[idx].mp_config.apicid = io_apic_unique_id(id); 4087 ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
3999 ioapics[idx].mp_config.apicver = io_apic_get_version(idx); 4088 ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
4000 4089
@@ -4015,10 +4104,10 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4015 if (gsi_cfg->gsi_end >= gsi_top) 4104 if (gsi_cfg->gsi_end >= gsi_top)
4016 gsi_top = gsi_cfg->gsi_end + 1; 4105 gsi_top = gsi_cfg->gsi_end + 1;
4017 4106
4018 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " 4107 pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
4019 "GSI %d-%d\n", idx, mpc_ioapic_id(idx), 4108 idx, mpc_ioapic_id(idx),
4020 mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), 4109 mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
4021 gsi_cfg->gsi_base, gsi_cfg->gsi_end); 4110 gsi_cfg->gsi_base, gsi_cfg->gsi_end);
4022 4111
4023 nr_ioapics++; 4112 nr_ioapics++;
4024} 4113}
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index c4a61ca1349a..00d2422ca7c9 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -478,6 +478,7 @@ static struct apic __refdata apic_numaq = {
478 .name = "NUMAQ", 478 .name = "NUMAQ",
479 .probe = probe_numaq, 479 .probe = probe_numaq,
480 .acpi_madt_oem_check = NULL, 480 .acpi_madt_oem_check = NULL,
481 .apic_id_valid = default_apic_id_valid,
481 .apic_id_registered = numaq_apic_id_registered, 482 .apic_id_registered = numaq_apic_id_registered,
482 483
483 .irq_delivery_mode = dest_LowestPrio, 484 .irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 0787bb3412f4..ff2c1b9aac4d 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -92,6 +92,7 @@ static struct apic apic_default = {
92 .name = "default", 92 .name = "default",
93 .probe = probe_default, 93 .probe = probe_default,
94 .acpi_madt_oem_check = NULL, 94 .acpi_madt_oem_check = NULL,
95 .apic_id_valid = default_apic_id_valid,
95 .apic_id_registered = default_apic_id_registered, 96 .apic_id_registered = default_apic_id_registered,
96 97
97 .irq_delivery_mode = dest_LowestPrio, 98 .irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 19114423c58c..fea000b27f07 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -496,6 +496,7 @@ static struct apic apic_summit = {
496 .name = "summit", 496 .name = "summit",
497 .probe = probe_summit, 497 .probe = probe_summit,
498 .acpi_madt_oem_check = summit_acpi_madt_oem_check, 498 .acpi_madt_oem_check = summit_acpi_madt_oem_check,
499 .apic_id_valid = default_apic_id_valid,
499 .apic_id_registered = summit_apic_id_registered, 500 .apic_id_registered = summit_apic_id_registered,
500 501
501 .irq_delivery_mode = dest_LowestPrio, 502 .irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 500795875827..48f3103b3c93 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -213,6 +213,7 @@ static struct apic apic_x2apic_cluster = {
213 .name = "cluster x2apic", 213 .name = "cluster x2apic",
214 .probe = x2apic_cluster_probe, 214 .probe = x2apic_cluster_probe,
215 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 215 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
216 .apic_id_valid = x2apic_apic_id_valid,
216 .apic_id_registered = x2apic_apic_id_registered, 217 .apic_id_registered = x2apic_apic_id_registered,
217 218
218 .irq_delivery_mode = dest_LowestPrio, 219 .irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f5373dfde21e..8a778db45e3a 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -119,6 +119,7 @@ static struct apic apic_x2apic_phys = {
119 .name = "physical x2apic", 119 .name = "physical x2apic",
120 .probe = x2apic_phys_probe, 120 .probe = x2apic_phys_probe,
121 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 121 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
122 .apic_id_valid = x2apic_apic_id_valid,
122 .apic_id_registered = x2apic_apic_id_registered, 123 .apic_id_registered = x2apic_apic_id_registered,
123 124
124 .irq_delivery_mode = dest_Fixed, 125 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 9d59bbacd4e3..87bfa69e216e 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -266,6 +266,11 @@ static void uv_send_IPI_all(int vector)
266 uv_send_IPI_mask(cpu_online_mask, vector); 266 uv_send_IPI_mask(cpu_online_mask, vector);
267} 267}
268 268
269static int uv_apic_id_valid(int apicid)
270{
271 return 1;
272}
273
269static int uv_apic_id_registered(void) 274static int uv_apic_id_registered(void)
270{ 275{
271 return 1; 276 return 1;
@@ -351,6 +356,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
351 .name = "UV large system", 356 .name = "UV large system",
352 .probe = uv_probe, 357 .probe = uv_probe,
353 .acpi_madt_oem_check = uv_acpi_madt_oem_check, 358 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
359 .apic_id_valid = uv_apic_id_valid,
354 .apic_id_registered = uv_apic_id_registered, 360 .apic_id_registered = uv_apic_id_registered,
355 361
356 .irq_delivery_mode = dest_Fixed, 362 .irq_delivery_mode = dest_Fixed,
@@ -769,7 +775,12 @@ void __init uv_system_init(void)
769 for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) 775 for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
770 uv_possible_blades += 776 uv_possible_blades +=
771 hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); 777 hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
772 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); 778
779 /* uv_num_possible_blades() is really the hub count */
780 printk(KERN_INFO "UV: Found %d blades, %d hubs\n",
781 is_uv1_hub() ? uv_num_possible_blades() :
782 (uv_num_possible_blades() + 1) / 2,
783 uv_num_possible_blades());
773 784
774 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 785 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
775 uv_blade_info = kzalloc(bytes, GFP_KERNEL); 786 uv_blade_info = kzalloc(bytes, GFP_KERNEL);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index a46bd383953c..459e78cbf61e 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -231,7 +231,6 @@
231#include <linux/syscore_ops.h> 231#include <linux/syscore_ops.h>
232#include <linux/i8253.h> 232#include <linux/i8253.h>
233 233
234#include <asm/system.h>
235#include <asm/uaccess.h> 234#include <asm/uaccess.h>
236#include <asm/desc.h> 235#include <asm/desc.h>
237#include <asm/olpc.h> 236#include <asm/olpc.h>
@@ -383,21 +382,21 @@ static int ignore_sys_suspend;
383static int ignore_normal_resume; 382static int ignore_normal_resume;
384static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; 383static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
385 384
386static int debug __read_mostly; 385static bool debug __read_mostly;
387static int smp __read_mostly; 386static bool smp __read_mostly;
388static int apm_disabled = -1; 387static int apm_disabled = -1;
389#ifdef CONFIG_SMP 388#ifdef CONFIG_SMP
390static int power_off; 389static bool power_off;
391#else 390#else
392static int power_off = 1; 391static bool power_off = 1;
393#endif 392#endif
394static int realmode_power_off; 393static bool realmode_power_off;
395#ifdef CONFIG_APM_ALLOW_INTS 394#ifdef CONFIG_APM_ALLOW_INTS
396static int allow_ints = 1; 395static bool allow_ints = 1;
397#else 396#else
398static int allow_ints; 397static bool allow_ints;
399#endif 398#endif
400static int broken_psr; 399static bool broken_psr;
401 400
402static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); 401static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
403static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); 402static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
@@ -1234,8 +1233,7 @@ static int suspend(int vetoable)
1234 struct apm_user *as; 1233 struct apm_user *as;
1235 1234
1236 dpm_suspend_start(PMSG_SUSPEND); 1235 dpm_suspend_start(PMSG_SUSPEND);
1237 1236 dpm_suspend_end(PMSG_SUSPEND);
1238 dpm_suspend_noirq(PMSG_SUSPEND);
1239 1237
1240 local_irq_disable(); 1238 local_irq_disable();
1241 syscore_suspend(); 1239 syscore_suspend();
@@ -1259,9 +1257,9 @@ static int suspend(int vetoable)
1259 syscore_resume(); 1257 syscore_resume();
1260 local_irq_enable(); 1258 local_irq_enable();
1261 1259
1262 dpm_resume_noirq(PMSG_RESUME); 1260 dpm_resume_start(PMSG_RESUME);
1263
1264 dpm_resume_end(PMSG_RESUME); 1261 dpm_resume_end(PMSG_RESUME);
1262
1265 queue_event(APM_NORMAL_RESUME, NULL); 1263 queue_event(APM_NORMAL_RESUME, NULL);
1266 spin_lock(&user_list_lock); 1264 spin_lock(&user_list_lock);
1267 for (as = user_list; as != NULL; as = as->next) { 1265 for (as = user_list; as != NULL; as = as->next) {
@@ -1277,7 +1275,7 @@ static void standby(void)
1277{ 1275{
1278 int err; 1276 int err;
1279 1277
1280 dpm_suspend_noirq(PMSG_SUSPEND); 1278 dpm_suspend_end(PMSG_SUSPEND);
1281 1279
1282 local_irq_disable(); 1280 local_irq_disable();
1283 syscore_suspend(); 1281 syscore_suspend();
@@ -1291,7 +1289,7 @@ static void standby(void)
1291 syscore_resume(); 1289 syscore_resume();
1292 local_irq_enable(); 1290 local_irq_enable();
1293 1291
1294 dpm_resume_noirq(PMSG_RESUME); 1292 dpm_resume_start(PMSG_RESUME);
1295} 1293}
1296 1294
1297static apm_event_t get_event(void) 1295static apm_event_t get_event(void)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 4f13fafc5264..68de2dc962ec 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -67,4 +67,6 @@ void common(void) {
67 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); 67 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
68 OFFSET(BP_version, boot_params, hdr.version); 68 OFFSET(BP_version, boot_params, hdr.version);
69 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); 69 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
70 OFFSET(BP_pref_address, boot_params, hdr.pref_address);
71 OFFSET(BP_code32_start, boot_params, hdr.code32_start);
70} 72}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 395a10e68067..85d98ab15cdc 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -3,6 +3,11 @@
3#include <linux/lguest.h> 3#include <linux/lguest.h>
4#include "../../../drivers/lguest/lg.h" 4#include "../../../drivers/lguest/lg.h"
5 5
6#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
7static char syscalls[] = {
8#include <asm/syscalls_32.h>
9};
10
6/* workaround for a warning with -Wmissing-prototypes */ 11/* workaround for a warning with -Wmissing-prototypes */
7void foo(void); 12void foo(void);
8 13
@@ -76,4 +81,7 @@ void foo(void)
76 OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); 81 OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
77 OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); 82 OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
78#endif 83#endif
84 BLANK();
85 DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
86 DEFINE(NR_syscalls, sizeof(syscalls));
79} 87}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index e72a1194af22..1b4754f82ba7 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,11 +1,18 @@
1#include <asm/ia32.h> 1#include <asm/ia32.h>
2 2
3#define __NO_STUBS 1 3#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
4#undef __SYSCALL 4#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
5#undef _ASM_X86_UNISTD_64_H 5#ifdef CONFIG_X86_X32_ABI
6#define __SYSCALL(nr, sym) [nr] = 1, 6# define __SYSCALL_X32(nr, sym, compat) [nr] = 1,
7static char syscalls[] = { 7#else
8#include <asm/unistd.h> 8# define __SYSCALL_X32(nr, sym, compat) /* nothing */
9#endif
10static char syscalls_64[] = {
11#include <asm/syscalls_64.h>
12};
13#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
14static char syscalls_ia32[] = {
15#include <asm/syscalls_32.h>
9}; 16};
10 17
11int main(void) 18int main(void)
@@ -72,7 +79,11 @@ int main(void)
72 OFFSET(TSS_ist, tss_struct, x86_tss.ist); 79 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
73 BLANK(); 80 BLANK();
74 81
75 DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); 82 DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
83 DEFINE(NR_syscalls, sizeof(syscalls_64));
84
85 DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1);
86 DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
76 87
77 return 0; 88 return 0;
78} 89}
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 25f24dccdcfa..6ab6aa2fdfdd 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -16,6 +16,7 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
16obj-y += proc.o capflags.o powerflags.o common.o 16obj-y += proc.o capflags.o powerflags.o common.o
17obj-y += vmware.o hypervisor.o sched.o mshyperv.o 17obj-y += vmware.o hypervisor.o sched.o mshyperv.o
18obj-y += rdrand.o 18obj-y += rdrand.o
19obj-y += match.o
19 20
20obj-$(CONFIG_X86_32) += bugs.o 21obj-$(CONFIG_X86_32) += bugs.o
21obj-$(CONFIG_X86_64) += bugs_64.o 22obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f4773f4aae35..0a44b90602b0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
5#include <linux/mm.h> 5#include <linux/mm.h>
6 6
7#include <linux/io.h> 7#include <linux/io.h>
8#include <linux/sched.h>
8#include <asm/processor.h> 9#include <asm/processor.h>
9#include <asm/apic.h> 10#include <asm/apic.h>
10#include <asm/cpu.h> 11#include <asm/cpu.h>
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
456 if (c->x86_power & (1 << 8)) { 457 if (c->x86_power & (1 << 8)) {
457 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 458 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
458 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 459 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
460 if (!check_tsc_unstable())
461 sched_clock_stable = 1;
459 } 462 }
460 463
461#ifdef CONFIG_X86_64 464#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 850f2963a420..67e258362a3d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -18,6 +18,7 @@
18#include <asm/archrandom.h> 18#include <asm/archrandom.h>
19#include <asm/hypervisor.h> 19#include <asm/hypervisor.h>
20#include <asm/processor.h> 20#include <asm/processor.h>
21#include <asm/debugreg.h>
21#include <asm/sections.h> 22#include <asm/sections.h>
22#include <linux/topology.h> 23#include <linux/topology.h>
23#include <linux/cpumask.h> 24#include <linux/cpumask.h>
@@ -28,6 +29,7 @@
28#include <asm/apic.h> 29#include <asm/apic.h>
29#include <asm/desc.h> 30#include <asm/desc.h>
30#include <asm/i387.h> 31#include <asm/i387.h>
32#include <asm/fpu-internal.h>
31#include <asm/mtrr.h> 33#include <asm/mtrr.h>
32#include <linux/numa.h> 34#include <linux/numa.h>
33#include <asm/asm.h> 35#include <asm/asm.h>
@@ -933,7 +935,7 @@ static const struct msr_range msr_range_array[] __cpuinitconst = {
933 { 0xc0011000, 0xc001103b}, 935 { 0xc0011000, 0xc001103b},
934}; 936};
935 937
936static void __cpuinit print_cpu_msr(void) 938static void __cpuinit __print_cpu_msr(void)
937{ 939{
938 unsigned index_min, index_max; 940 unsigned index_min, index_max;
939 unsigned index; 941 unsigned index;
@@ -997,13 +999,13 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
997 else 999 else
998 printk(KERN_CONT "\n"); 1000 printk(KERN_CONT "\n");
999 1001
1000#ifdef CONFIG_SMP 1002 print_cpu_msr(c);
1003}
1004
1005void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c)
1006{
1001 if (c->cpu_index < show_msr) 1007 if (c->cpu_index < show_msr)
1002 print_cpu_msr(); 1008 __print_cpu_msr();
1003#else
1004 if (show_msr)
1005 print_cpu_msr();
1006#endif
1007} 1009}
1008 1010
1009static __init int setup_disablecpuid(char *arg) 1011static __init int setup_disablecpuid(char *arg)
@@ -1021,6 +1023,8 @@ __setup("clearcpuid=", setup_disablecpuid);
1021 1023
1022#ifdef CONFIG_X86_64 1024#ifdef CONFIG_X86_64
1023struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1025struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
1026struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
1027 (unsigned long) nmi_idt_table };
1024 1028
1025DEFINE_PER_CPU_FIRST(union irq_stack_union, 1029DEFINE_PER_CPU_FIRST(union irq_stack_union,
1026 irq_stack_union) __aligned(PAGE_SIZE); 1030 irq_stack_union) __aligned(PAGE_SIZE);
@@ -1042,6 +1046,8 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
1042 1046
1043DEFINE_PER_CPU(unsigned int, irq_count) = -1; 1047DEFINE_PER_CPU(unsigned int, irq_count) = -1;
1044 1048
1049DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1050
1045/* 1051/*
1046 * Special IST stacks which the CPU switches to when it calls 1052 * Special IST stacks which the CPU switches to when it calls
1047 * an IST-marked descriptor entry. Up to 7 stacks (hardware 1053 * an IST-marked descriptor entry. Up to 7 stacks (hardware
@@ -1085,10 +1091,31 @@ unsigned long kernel_eflags;
1085 */ 1091 */
1086DEFINE_PER_CPU(struct orig_ist, orig_ist); 1092DEFINE_PER_CPU(struct orig_ist, orig_ist);
1087 1093
1094static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
1095DEFINE_PER_CPU(int, debug_stack_usage);
1096
1097int is_debug_stack(unsigned long addr)
1098{
1099 return __get_cpu_var(debug_stack_usage) ||
1100 (addr <= __get_cpu_var(debug_stack_addr) &&
1101 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
1102}
1103
1104void debug_stack_set_zero(void)
1105{
1106 load_idt((const struct desc_ptr *)&nmi_idt_descr);
1107}
1108
1109void debug_stack_reset(void)
1110{
1111 load_idt((const struct desc_ptr *)&idt_descr);
1112}
1113
1088#else /* CONFIG_X86_64 */ 1114#else /* CONFIG_X86_64 */
1089 1115
1090DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 1116DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
1091EXPORT_PER_CPU_SYMBOL(current_task); 1117EXPORT_PER_CPU_SYMBOL(current_task);
1118DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1092 1119
1093#ifdef CONFIG_CC_STACKPROTECTOR 1120#ifdef CONFIG_CC_STACKPROTECTOR
1094DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); 1121DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
@@ -1212,6 +1239,8 @@ void __cpuinit cpu_init(void)
1212 estacks += exception_stack_sizes[v]; 1239 estacks += exception_stack_sizes[v];
1213 oist->ist[v] = t->x86_tss.ist[v] = 1240 oist->ist[v] = t->x86_tss.ist[v] =
1214 (unsigned long)estacks; 1241 (unsigned long)estacks;
1242 if (v == DEBUG_STACK-1)
1243 per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
1215 } 1244 }
1216 } 1245 }
1217 1246
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6b45e5e7a901..73d08ed98a64 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -326,8 +326,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
326 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; 326 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
327} 327}
328 328
329static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, 329static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
330 int index)
331{ 330{
332 int node; 331 int node;
333 332
@@ -725,14 +724,16 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
725#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) 724#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
726 725
727#ifdef CONFIG_SMP 726#ifdef CONFIG_SMP
728static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 727
728static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
729{ 729{
730 struct _cpuid4_info *this_leaf, *sibling_leaf; 730 struct _cpuid4_info *this_leaf;
731 unsigned long num_threads_sharing; 731 int ret, i, sibling;
732 int index_msb, i, sibling;
733 struct cpuinfo_x86 *c = &cpu_data(cpu); 732 struct cpuinfo_x86 *c = &cpu_data(cpu);
734 733
735 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { 734 ret = 0;
735 if (index == 3) {
736 ret = 1;
736 for_each_cpu(i, cpu_llc_shared_mask(cpu)) { 737 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
737 if (!per_cpu(ici_cpuid4_info, i)) 738 if (!per_cpu(ici_cpuid4_info, i))
738 continue; 739 continue;
@@ -743,8 +744,35 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
743 set_bit(sibling, this_leaf->shared_cpu_map); 744 set_bit(sibling, this_leaf->shared_cpu_map);
744 } 745 }
745 } 746 }
746 return; 747 } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
748 ret = 1;
749 for_each_cpu(i, cpu_sibling_mask(cpu)) {
750 if (!per_cpu(ici_cpuid4_info, i))
751 continue;
752 this_leaf = CPUID4_INFO_IDX(i, index);
753 for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
754 if (!cpu_online(sibling))
755 continue;
756 set_bit(sibling, this_leaf->shared_cpu_map);
757 }
758 }
747 } 759 }
760
761 return ret;
762}
763
764static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
765{
766 struct _cpuid4_info *this_leaf, *sibling_leaf;
767 unsigned long num_threads_sharing;
768 int index_msb, i;
769 struct cpuinfo_x86 *c = &cpu_data(cpu);
770
771 if (c->x86_vendor == X86_VENDOR_AMD) {
772 if (cache_shared_amd_cpu_map_setup(cpu, index))
773 return;
774 }
775
748 this_leaf = CPUID4_INFO_IDX(cpu, index); 776 this_leaf = CPUID4_INFO_IDX(cpu, index);
749 num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing; 777 num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
750 778
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
new file mode 100644
index 000000000000..5502b289341b
--- /dev/null
+++ b/arch/x86/kernel/cpu/match.c
@@ -0,0 +1,91 @@
1#include <asm/cpu_device_id.h>
2#include <asm/processor.h>
3#include <linux/cpu.h>
4#include <linux/module.h>
5#include <linux/slab.h>
6
7/**
8 * x86_match_cpu - match current CPU again an array of x86_cpu_ids
9 * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
10 * {}.
11 *
12 * Return the entry if the current CPU matches the entries in the
13 * passed x86_cpu_id match table. Otherwise NULL. The match table
14 * contains vendor (X86_VENDOR_*), family, model and feature bits or
15 * respective wildcard entries.
16 *
17 * A typical table entry would be to match a specific CPU
18 * { X86_VENDOR_INTEL, 6, 0x12 }
19 * or to match a specific CPU feature
20 * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
21 *
22 * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
23 * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
24 *
25 * Arrays used to match for this should also be declared using
26 * MODULE_DEVICE_TABLE(x86_cpu, ...)
27 *
28 * This always matches against the boot cpu, assuming models and features are
29 * consistent over all CPUs.
30 */
31const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
32{
33 const struct x86_cpu_id *m;
34 struct cpuinfo_x86 *c = &boot_cpu_data;
35
36 for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
37 if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
38 continue;
39 if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
40 continue;
41 if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
42 continue;
43 if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
44 continue;
45 return m;
46 }
47 return NULL;
48}
49EXPORT_SYMBOL(x86_match_cpu);
50
51ssize_t arch_print_cpu_modalias(struct device *dev,
52 struct device_attribute *attr,
53 char *bufptr)
54{
55 int size = PAGE_SIZE;
56 int i, n;
57 char *buf = bufptr;
58
59 n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:"
60 "model:%04X:feature:",
61 boot_cpu_data.x86_vendor,
62 boot_cpu_data.x86,
63 boot_cpu_data.x86_model);
64 size -= n;
65 buf += n;
66 size -= 1;
67 for (i = 0; i < NCAPINTS*32; i++) {
68 if (boot_cpu_has(i)) {
69 n = snprintf(buf, size, ",%04X", i);
70 if (n >= size) {
71 WARN(1, "x86 features overflow page\n");
72 break;
73 }
74 size -= n;
75 buf += n;
76 }
77 }
78 *buf++ = '\n';
79 return buf - bufptr;
80}
81
82int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
83{
84 char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
85 if (buf) {
86 arch_print_cpu_modalias(NULL, NULL, buf);
87 add_uevent_var(env, "MODALIAS=%s", buf);
88 kfree(buf);
89 }
90 return 0;
91}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 7395d5f4272d..0c82091b1652 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -54,7 +54,14 @@ static struct severity {
54#define MASK(x, y) .mask = x, .result = y 54#define MASK(x, y) .mask = x, .result = y
55#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) 55#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
56#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) 56#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
57#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
57#define MCACOD 0xffff 58#define MCACOD 0xffff
59/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
60#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
61#define MCACOD_SCRUBMSK 0xfff0
62#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
63#define MCACOD_DATA 0x0134 /* Data Load */
64#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
58 65
59 MCESEV( 66 MCESEV(
60 NO, "Invalid", 67 NO, "Invalid",
@@ -102,11 +109,24 @@ static struct severity {
102 SER, BITCLR(MCI_STATUS_S) 109 SER, BITCLR(MCI_STATUS_S)
103 ), 110 ),
104 111
105 /* AR add known MCACODs here */
106 MCESEV( 112 MCESEV(
107 PANIC, "Action required with lost events", 113 PANIC, "Action required with lost events",
108 SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) 114 SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
109 ), 115 ),
116
117 /* known AR MCACODs: */
118#ifdef CONFIG_MEMORY_FAILURE
119 MCESEV(
120 KEEP, "HT thread notices Action required: data load error",
121 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
122 MCGMASK(MCG_STATUS_EIPV, 0)
123 ),
124 MCESEV(
125 AR, "Action required: data load error",
126 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
127 USER
128 ),
129#endif
110 MCESEV( 130 MCESEV(
111 PANIC, "Action required: unknown MCACOD", 131 PANIC, "Action required: unknown MCACOD",
112 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) 132 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
@@ -115,11 +135,11 @@ static struct severity {
115 /* known AO MCACODs: */ 135 /* known AO MCACODs: */
116 MCESEV( 136 MCESEV(
117 AO, "Action optional: memory scrubbing error", 137 AO, "Action optional: memory scrubbing error",
118 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0) 138 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
119 ), 139 ),
120 MCESEV( 140 MCESEV(
121 AO, "Action optional: last level cache writeback error", 141 AO, "Action optional: last level cache writeback error",
122 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a) 142 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
123 ), 143 ),
124 MCESEV( 144 MCESEV(
125 SOME, "Action optional: unknown MCACOD", 145 SOME, "Action optional: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f22a9f7f6390..d086a09c087d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -191,7 +191,7 @@ static void drain_mcelog_buffer(void)
191{ 191{
192 unsigned int next, i, prev = 0; 192 unsigned int next, i, prev = 0;
193 193
194 next = rcu_dereference_check_mce(mcelog.next); 194 next = ACCESS_ONCE(mcelog.next);
195 195
196 do { 196 do {
197 struct mce *m; 197 struct mce *m;
@@ -540,6 +540,27 @@ static void mce_report_event(struct pt_regs *regs)
540 irq_work_queue(&__get_cpu_var(mce_irq_work)); 540 irq_work_queue(&__get_cpu_var(mce_irq_work));
541} 541}
542 542
543/*
544 * Read ADDR and MISC registers.
545 */
546static void mce_read_aux(struct mce *m, int i)
547{
548 if (m->status & MCI_STATUS_MISCV)
549 m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
550 if (m->status & MCI_STATUS_ADDRV) {
551 m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
552
553 /*
554 * Mask the reported address by the reported granularity.
555 */
556 if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
557 u8 shift = MCI_MISC_ADDR_LSB(m->misc);
558 m->addr >>= shift;
559 m->addr <<= shift;
560 }
561 }
562}
563
543DEFINE_PER_CPU(unsigned, mce_poll_count); 564DEFINE_PER_CPU(unsigned, mce_poll_count);
544 565
545/* 566/*
@@ -590,10 +611,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
590 (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) 611 (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
591 continue; 612 continue;
592 613
593 if (m.status & MCI_STATUS_MISCV) 614 mce_read_aux(&m, i);
594 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
595 if (m.status & MCI_STATUS_ADDRV)
596 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
597 615
598 if (!(flags & MCP_TIMESTAMP)) 616 if (!(flags & MCP_TIMESTAMP))
599 m.tsc = 0; 617 m.tsc = 0;
@@ -917,6 +935,49 @@ static void mce_clear_state(unsigned long *toclear)
917} 935}
918 936
919/* 937/*
938 * Need to save faulting physical address associated with a process
939 * in the machine check handler some place where we can grab it back
940 * later in mce_notify_process()
941 */
942#define MCE_INFO_MAX 16
943
944struct mce_info {
945 atomic_t inuse;
946 struct task_struct *t;
947 __u64 paddr;
948} mce_info[MCE_INFO_MAX];
949
950static void mce_save_info(__u64 addr)
951{
952 struct mce_info *mi;
953
954 for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
955 if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
956 mi->t = current;
957 mi->paddr = addr;
958 return;
959 }
960 }
961
962 mce_panic("Too many concurrent recoverable errors", NULL, NULL);
963}
964
965static struct mce_info *mce_find_info(void)
966{
967 struct mce_info *mi;
968
969 for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
970 if (atomic_read(&mi->inuse) && mi->t == current)
971 return mi;
972 return NULL;
973}
974
975static void mce_clear_info(struct mce_info *mi)
976{
977 atomic_set(&mi->inuse, 0);
978}
979
980/*
920 * The actual machine check handler. This only handles real 981 * The actual machine check handler. This only handles real
921 * exceptions when something got corrupted coming in through int 18. 982 * exceptions when something got corrupted coming in through int 18.
922 * 983 *
@@ -969,7 +1030,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
969 barrier(); 1030 barrier();
970 1031
971 /* 1032 /*
972 * When no restart IP must always kill or panic. 1033 * When no restart IP might need to kill or panic.
1034 * Assume the worst for now, but if we find the
1035 * severity is MCE_AR_SEVERITY we have other options.
973 */ 1036 */
974 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 1037 if (!(m.mcgstatus & MCG_STATUS_RIPV))
975 kill_it = 1; 1038 kill_it = 1;
@@ -1023,16 +1086,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1023 continue; 1086 continue;
1024 } 1087 }
1025 1088
1026 /* 1089 mce_read_aux(&m, i);
1027 * Kill on action required.
1028 */
1029 if (severity == MCE_AR_SEVERITY)
1030 kill_it = 1;
1031
1032 if (m.status & MCI_STATUS_MISCV)
1033 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
1034 if (m.status & MCI_STATUS_ADDRV)
1035 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
1036 1090
1037 /* 1091 /*
1038 * Action optional error. Queue address for later processing. 1092 * Action optional error. Queue address for later processing.
@@ -1052,6 +1106,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1052 } 1106 }
1053 } 1107 }
1054 1108
1109 /* mce_clear_state will clear *final, save locally for use later */
1110 m = *final;
1111
1055 if (!no_way_out) 1112 if (!no_way_out)
1056 mce_clear_state(toclear); 1113 mce_clear_state(toclear);
1057 1114
@@ -1063,27 +1120,22 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1063 no_way_out = worst >= MCE_PANIC_SEVERITY; 1120 no_way_out = worst >= MCE_PANIC_SEVERITY;
1064 1121
1065 /* 1122 /*
1066 * If we have decided that we just CAN'T continue, and the user 1123 * At insane "tolerant" levels we take no action. Otherwise
1067 * has not set tolerant to an insane level, give up and die. 1124 * we only die if we have no other choice. For less serious
1068 * 1125 * issues we try to recover, or limit damage to the current
1069 * This is mainly used in the case when the system doesn't 1126 * process.
1070 * support MCE broadcasting or it has been disabled.
1071 */ 1127 */
1072 if (no_way_out && tolerant < 3) 1128 if (tolerant < 3) {
1073 mce_panic("Fatal machine check on current CPU", final, msg); 1129 if (no_way_out)
1074 1130 mce_panic("Fatal machine check on current CPU", &m, msg);
1075 /* 1131 if (worst == MCE_AR_SEVERITY) {
1076 * If the error seems to be unrecoverable, something should be 1132 /* schedule action before return to userland */
1077 * done. Try to kill as little as possible. If we can kill just 1133 mce_save_info(m.addr);
1078 * one task, do that. If the user has set the tolerance very 1134 set_thread_flag(TIF_MCE_NOTIFY);
1079 * high, don't try to do anything at all. 1135 } else if (kill_it) {
1080 */ 1136 force_sig(SIGBUS, current);
1081 1137 }
1082 if (kill_it && tolerant < 3) 1138 }
1083 force_sig(SIGBUS, current);
1084
1085 /* notify userspace ASAP */
1086 set_thread_flag(TIF_MCE_NOTIFY);
1087 1139
1088 if (worst > 0) 1140 if (worst > 0)
1089 mce_report_event(regs); 1141 mce_report_event(regs);
@@ -1094,34 +1146,57 @@ out:
1094} 1146}
1095EXPORT_SYMBOL_GPL(do_machine_check); 1147EXPORT_SYMBOL_GPL(do_machine_check);
1096 1148
1097/* dummy to break dependency. actual code is in mm/memory-failure.c */ 1149#ifndef CONFIG_MEMORY_FAILURE
1098void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) 1150int memory_failure(unsigned long pfn, int vector, int flags)
1099{ 1151{
1100 printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); 1152 /* mce_severity() should not hand us an ACTION_REQUIRED error */
1153 BUG_ON(flags & MF_ACTION_REQUIRED);
1154 printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
1155 "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);
1156
1157 return 0;
1101} 1158}
1159#endif
1102 1160
1103/* 1161/*
1104 * Called after mce notification in process context. This code 1162 * Called in process context that interrupted by MCE and marked with
1105 * is allowed to sleep. Call the high level VM handler to process 1163 * TIF_MCE_NOTIFY, just before returning to erroneous userland.
1106 * any corrupted pages. 1164 * This code is allowed to sleep.
1107 * Assume that the work queue code only calls this one at a time 1165 * Attempt possible recovery such as calling the high level VM handler to
1108 * per CPU. 1166 * process any corrupted pages, and kill/signal current process if required.
1109 * Note we don't disable preemption, so this code might run on the wrong 1167 * Action required errors are handled here.
1110 * CPU. In this case the event is picked up by the scheduled work queue.
1111 * This is merely a fast path to expedite processing in some common
1112 * cases.
1113 */ 1168 */
1114void mce_notify_process(void) 1169void mce_notify_process(void)
1115{ 1170{
1116 unsigned long pfn; 1171 unsigned long pfn;
1117 mce_notify_irq(); 1172 struct mce_info *mi = mce_find_info();
1118 while (mce_ring_get(&pfn)) 1173
1119 memory_failure(pfn, MCE_VECTOR); 1174 if (!mi)
1175 mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
1176 pfn = mi->paddr >> PAGE_SHIFT;
1177
1178 clear_thread_flag(TIF_MCE_NOTIFY);
1179
1180 pr_err("Uncorrected hardware memory error in user-access at %llx",
1181 mi->paddr);
1182 if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
1183 pr_err("Memory error not recovered");
1184 force_sig(SIGBUS, current);
1185 }
1186 mce_clear_info(mi);
1120} 1187}
1121 1188
1189/*
1190 * Action optional processing happens here (picking up
1191 * from the list of faulting pages that do_machine_check()
1192 * placed into the "ring").
1193 */
1122static void mce_process_work(struct work_struct *dummy) 1194static void mce_process_work(struct work_struct *dummy)
1123{ 1195{
1124 mce_notify_process(); 1196 unsigned long pfn;
1197
1198 while (mce_ring_get(&pfn))
1199 memory_failure(pfn, MCE_VECTOR, 0);
1125} 1200}
1126 1201
1127#ifdef CONFIG_X86_MCE_INTEL 1202#ifdef CONFIG_X86_MCE_INTEL
@@ -1211,8 +1286,6 @@ int mce_notify_irq(void)
1211 /* Not more than two messages every minute */ 1286 /* Not more than two messages every minute */
1212 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); 1287 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
1213 1288
1214 clear_thread_flag(TIF_MCE_NOTIFY);
1215
1216 if (test_and_clear_bit(0, &mce_need_notify)) { 1289 if (test_and_clear_bit(0, &mce_need_notify)) {
1217 /* wake processes polling /dev/mcelog */ 1290 /* wake processes polling /dev/mcelog */
1218 wake_up_interruptible(&mce_chrdev_wait); 1291 wake_up_interruptible(&mce_chrdev_wait);
@@ -1541,6 +1614,12 @@ static int __mce_read_apei(char __user **ubuf, size_t usize)
1541 /* Error or no more MCE record */ 1614 /* Error or no more MCE record */
1542 if (rc <= 0) { 1615 if (rc <= 0) {
1543 mce_apei_read_done = 1; 1616 mce_apei_read_done = 1;
1617 /*
1618 * When ERST is disabled, mce_chrdev_read() should return
1619 * "no record" instead of "no device."
1620 */
1621 if (rc == -ENODEV)
1622 return 0;
1544 return rc; 1623 return rc;
1545 } 1624 }
1546 rc = -EFAULT; 1625 rc = -EFAULT;
@@ -1859,7 +1938,7 @@ static struct bus_type mce_subsys = {
1859 .dev_name = "machinecheck", 1938 .dev_name = "machinecheck",
1860}; 1939};
1861 1940
1862DEFINE_PER_CPU(struct device, mce_device); 1941DEFINE_PER_CPU(struct device *, mce_device);
1863 1942
1864__cpuinitdata 1943__cpuinitdata
1865void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1944void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
@@ -2001,19 +2080,27 @@ static struct device_attribute *mce_device_attrs[] = {
2001 2080
2002static cpumask_var_t mce_device_initialized; 2081static cpumask_var_t mce_device_initialized;
2003 2082
2083static void mce_device_release(struct device *dev)
2084{
2085 kfree(dev);
2086}
2087
2004/* Per cpu device init. All of the cpus still share the same ctrl bank: */ 2088/* Per cpu device init. All of the cpus still share the same ctrl bank: */
2005static __cpuinit int mce_device_create(unsigned int cpu) 2089static __cpuinit int mce_device_create(unsigned int cpu)
2006{ 2090{
2007 struct device *dev = &per_cpu(mce_device, cpu); 2091 struct device *dev;
2008 int err; 2092 int err;
2009 int i, j; 2093 int i, j;
2010 2094
2011 if (!mce_available(&boot_cpu_data)) 2095 if (!mce_available(&boot_cpu_data))
2012 return -EIO; 2096 return -EIO;
2013 2097
2014 memset(&dev->kobj, 0, sizeof(struct kobject)); 2098 dev = kzalloc(sizeof *dev, GFP_KERNEL);
2099 if (!dev)
2100 return -ENOMEM;
2015 dev->id = cpu; 2101 dev->id = cpu;
2016 dev->bus = &mce_subsys; 2102 dev->bus = &mce_subsys;
2103 dev->release = &mce_device_release;
2017 2104
2018 err = device_register(dev); 2105 err = device_register(dev);
2019 if (err) 2106 if (err)
@@ -2030,6 +2117,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
2030 goto error2; 2117 goto error2;
2031 } 2118 }
2032 cpumask_set_cpu(cpu, mce_device_initialized); 2119 cpumask_set_cpu(cpu, mce_device_initialized);
2120 per_cpu(mce_device, cpu) = dev;
2033 2121
2034 return 0; 2122 return 0;
2035error2: 2123error2:
@@ -2046,7 +2134,7 @@ error:
2046 2134
2047static __cpuinit void mce_device_remove(unsigned int cpu) 2135static __cpuinit void mce_device_remove(unsigned int cpu)
2048{ 2136{
2049 struct device *dev = &per_cpu(mce_device, cpu); 2137 struct device *dev = per_cpu(mce_device, cpu);
2050 int i; 2138 int i;
2051 2139
2052 if (!cpumask_test_cpu(cpu, mce_device_initialized)) 2140 if (!cpumask_test_cpu(cpu, mce_device_initialized))
@@ -2060,6 +2148,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
2060 2148
2061 device_unregister(dev); 2149 device_unregister(dev);
2062 cpumask_clear_cpu(cpu, mce_device_initialized); 2150 cpumask_clear_cpu(cpu, mce_device_initialized);
2151 per_cpu(mce_device, cpu) = NULL;
2063} 2152}
2064 2153
2065/* Make sure there are no machine checks on offlined CPUs. */ 2154/* Make sure there are no machine checks on offlined CPUs. */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ba0b94a7e204..99b57179f912 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -523,10 +523,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
523{ 523{
524 int i, err = 0; 524 int i, err = 0;
525 struct threshold_bank *b = NULL; 525 struct threshold_bank *b = NULL;
526 struct device *dev = per_cpu(mce_device, cpu);
526 char name[32]; 527 char name[32];
527 528
528 sprintf(name, "threshold_bank%i", bank); 529 sprintf(name, "threshold_bank%i", bank);
529 530
531#ifdef CONFIG_SMP
530 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 532 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
531 i = cpumask_first(cpu_llc_shared_mask(cpu)); 533 i = cpumask_first(cpu_llc_shared_mask(cpu));
532 534
@@ -543,8 +545,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
543 if (!b) 545 if (!b)
544 goto out; 546 goto out;
545 547
546 err = sysfs_create_link(&per_cpu(mce_device, cpu).kobj, 548 err = sysfs_create_link(&dev->kobj, b->kobj, name);
547 b->kobj, name);
548 if (err) 549 if (err)
549 goto out; 550 goto out;
550 551
@@ -553,6 +554,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
553 554
554 goto out; 555 goto out;
555 } 556 }
557#endif
556 558
557 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); 559 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
558 if (!b) { 560 if (!b) {
@@ -565,7 +567,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
565 goto out; 567 goto out;
566 } 568 }
567 569
568 b->kobj = kobject_create_and_add(name, &per_cpu(mce_device, cpu).kobj); 570 b->kobj = kobject_create_and_add(name, &dev->kobj);
569 if (!b->kobj) 571 if (!b->kobj)
570 goto out_free; 572 goto out_free;
571 573
@@ -585,8 +587,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
585 if (i == cpu) 587 if (i == cpu)
586 continue; 588 continue;
587 589
588 err = sysfs_create_link(&per_cpu(mce_device, i).kobj, 590 dev = per_cpu(mce_device, i);
589 b->kobj, name); 591 if (dev)
592 err = sysfs_create_link(&dev->kobj,b->kobj, name);
590 if (err) 593 if (err)
591 goto out; 594 goto out;
592 595
@@ -649,6 +652,7 @@ static void deallocate_threshold_block(unsigned int cpu,
649static void threshold_remove_bank(unsigned int cpu, int bank) 652static void threshold_remove_bank(unsigned int cpu, int bank)
650{ 653{
651 struct threshold_bank *b; 654 struct threshold_bank *b;
655 struct device *dev;
652 char name[32]; 656 char name[32];
653 int i = 0; 657 int i = 0;
654 658
@@ -663,7 +667,8 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
663#ifdef CONFIG_SMP 667#ifdef CONFIG_SMP
664 /* sibling symlink */ 668 /* sibling symlink */
665 if (shared_bank[bank] && b->blocks->cpu != cpu) { 669 if (shared_bank[bank] && b->blocks->cpu != cpu) {
666 sysfs_remove_link(&per_cpu(mce_device, cpu).kobj, name); 670 dev = per_cpu(mce_device, cpu);
671 sysfs_remove_link(&dev->kobj, name);
667 per_cpu(threshold_banks, cpu)[bank] = NULL; 672 per_cpu(threshold_banks, cpu)[bank] = NULL;
668 673
669 return; 674 return;
@@ -675,7 +680,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
675 if (i == cpu) 680 if (i == cpu)
676 continue; 681 continue;
677 682
678 sysfs_remove_link(&per_cpu(mce_device, i).kobj, name); 683 dev = per_cpu(mce_device, i);
684 if (dev)
685 sysfs_remove_link(&dev->kobj, name);
679 per_cpu(threshold_banks, i)[bank] = NULL; 686 per_cpu(threshold_banks, i)[bank] = NULL;
680 } 687 }
681 688
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 5c0e6533d9bc..2d5454cd2c4f 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -9,7 +9,6 @@
9#include <linux/smp.h> 9#include <linux/smp.h>
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/mce.h> 12#include <asm/mce.h>
14#include <asm/msr.h> 13#include <asm/msr.h>
15 14
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 67bb17a37a0a..47a1870279aa 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -25,7 +25,6 @@
25#include <linux/cpu.h> 25#include <linux/cpu.h>
26 26
27#include <asm/processor.h> 27#include <asm/processor.h>
28#include <asm/system.h>
29#include <asm/apic.h> 28#include <asm/apic.h>
30#include <asm/idle.h> 29#include <asm/idle.h>
31#include <asm/mce.h> 30#include <asm/mce.h>
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 54060f565974..2d7998fb628c 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -8,7 +8,6 @@
8#include <linux/init.h> 8#include <linux/init.h>
9 9
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/system.h>
12#include <asm/mce.h> 11#include <asm/mce.h>
13#include <asm/msr.h> 12#include <asm/msr.h>
14 13
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 97b26356e9ee..75772ae6c65f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -12,7 +12,6 @@
12#include <asm/processor-flags.h> 12#include <asm/processor-flags.h>
13#include <asm/cpufeature.h> 13#include <asm/cpufeature.h>
14#include <asm/tlbflush.h> 14#include <asm/tlbflush.h>
15#include <asm/system.h>
16#include <asm/mtrr.h> 15#include <asm/mtrr.h>
17#include <asm/msr.h> 16#include <asm/msr.h>
18#include <asm/pat.h> 17#include <asm/pat.h>
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 79289632cb27..a041e094b8b9 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -167,6 +167,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
167{ 167{
168 int err = 0; 168 int err = 0;
169 mtrr_type type; 169 mtrr_type type;
170 unsigned long base;
170 unsigned long size; 171 unsigned long size;
171 struct mtrr_sentry sentry; 172 struct mtrr_sentry sentry;
172 struct mtrr_gentry gentry; 173 struct mtrr_gentry gentry;
@@ -267,14 +268,14 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
267#endif 268#endif
268 if (gentry.regnum >= num_var_ranges) 269 if (gentry.regnum >= num_var_ranges)
269 return -EINVAL; 270 return -EINVAL;
270 mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); 271 mtrr_if->get(gentry.regnum, &base, &size, &type);
271 272
272 /* Hide entries that go above 4GB */ 273 /* Hide entries that go above 4GB */
273 if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) 274 if (base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))
274 || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))) 275 || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)))
275 gentry.base = gentry.size = gentry.type = 0; 276 gentry.base = gentry.size = gentry.type = 0;
276 else { 277 else {
277 gentry.base <<= PAGE_SHIFT; 278 gentry.base = base << PAGE_SHIFT;
278 gentry.size = size << PAGE_SHIFT; 279 gentry.size = size << PAGE_SHIFT;
279 gentry.type = type; 280 gentry.type = type;
280 } 281 }
@@ -321,11 +322,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
321#endif 322#endif
322 if (gentry.regnum >= num_var_ranges) 323 if (gentry.regnum >= num_var_ranges)
323 return -EINVAL; 324 return -EINVAL;
324 mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); 325 mtrr_if->get(gentry.regnum, &base, &size, &type);
325 /* Hide entries that would overflow */ 326 /* Hide entries that would overflow */
326 if (size != (__typeof__(gentry.size))size) 327 if (size != (__typeof__(gentry.size))size)
327 gentry.base = gentry.size = gentry.type = 0; 328 gentry.base = gentry.size = gentry.type = 0;
328 else { 329 else {
330 gentry.base = base;
329 gentry.size = size; 331 gentry.size = size;
330 gentry.type = type; 332 gentry.type = type;
331 } 333 }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5adce1040b11..bb8e03407e18 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -24,13 +24,14 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/bitops.h> 26#include <linux/bitops.h>
27#include <linux/device.h>
27 28
28#include <asm/apic.h> 29#include <asm/apic.h>
29#include <asm/stacktrace.h> 30#include <asm/stacktrace.h>
30#include <asm/nmi.h> 31#include <asm/nmi.h>
31#include <asm/compat.h>
32#include <asm/smp.h> 32#include <asm/smp.h>
33#include <asm/alternative.h> 33#include <asm/alternative.h>
34#include <asm/timer.h>
34 35
35#include "perf_event.h" 36#include "perf_event.h"
36 37
@@ -351,6 +352,36 @@ int x86_setup_perfctr(struct perf_event *event)
351 return 0; 352 return 0;
352} 353}
353 354
355/*
356 * check that branch_sample_type is compatible with
357 * settings needed for precise_ip > 1 which implies
358 * using the LBR to capture ALL taken branches at the
359 * priv levels of the measurement
360 */
361static inline int precise_br_compat(struct perf_event *event)
362{
363 u64 m = event->attr.branch_sample_type;
364 u64 b = 0;
365
366 /* must capture all branches */
367 if (!(m & PERF_SAMPLE_BRANCH_ANY))
368 return 0;
369
370 m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
371
372 if (!event->attr.exclude_user)
373 b |= PERF_SAMPLE_BRANCH_USER;
374
375 if (!event->attr.exclude_kernel)
376 b |= PERF_SAMPLE_BRANCH_KERNEL;
377
378 /*
379 * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
380 */
381
382 return m == b;
383}
384
354int x86_pmu_hw_config(struct perf_event *event) 385int x86_pmu_hw_config(struct perf_event *event)
355{ 386{
356 if (event->attr.precise_ip) { 387 if (event->attr.precise_ip) {
@@ -367,6 +398,36 @@ int x86_pmu_hw_config(struct perf_event *event)
367 398
368 if (event->attr.precise_ip > precise) 399 if (event->attr.precise_ip > precise)
369 return -EOPNOTSUPP; 400 return -EOPNOTSUPP;
401 /*
402 * check that PEBS LBR correction does not conflict with
403 * whatever the user is asking with attr->branch_sample_type
404 */
405 if (event->attr.precise_ip > 1) {
406 u64 *br_type = &event->attr.branch_sample_type;
407
408 if (has_branch_stack(event)) {
409 if (!precise_br_compat(event))
410 return -EOPNOTSUPP;
411
412 /* branch_sample_type is compatible */
413
414 } else {
415 /*
416 * user did not specify branch_sample_type
417 *
418 * For PEBS fixups, we capture all
419 * the branches at the priv level of the
420 * event.
421 */
422 *br_type = PERF_SAMPLE_BRANCH_ANY;
423
424 if (!event->attr.exclude_user)
425 *br_type |= PERF_SAMPLE_BRANCH_USER;
426
427 if (!event->attr.exclude_kernel)
428 *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
429 }
430 }
370 } 431 }
371 432
372 /* 433 /*
@@ -424,6 +485,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
424 /* mark unused */ 485 /* mark unused */
425 event->hw.extra_reg.idx = EXTRA_REG_NONE; 486 event->hw.extra_reg.idx = EXTRA_REG_NONE;
426 487
488 /* mark not used */
489 event->hw.extra_reg.idx = EXTRA_REG_NONE;
490 event->hw.branch_reg.idx = EXTRA_REG_NONE;
491
427 return x86_pmu.hw_config(event); 492 return x86_pmu.hw_config(event);
428} 493}
429 494
@@ -577,14 +642,14 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
577 /* Prefer fixed purpose counters */ 642 /* Prefer fixed purpose counters */
578 if (x86_pmu.num_counters_fixed) { 643 if (x86_pmu.num_counters_fixed) {
579 idx = X86_PMC_IDX_FIXED; 644 idx = X86_PMC_IDX_FIXED;
580 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { 645 for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
581 if (!__test_and_set_bit(idx, sched->state.used)) 646 if (!__test_and_set_bit(idx, sched->state.used))
582 goto done; 647 goto done;
583 } 648 }
584 } 649 }
585 /* Grab the first unused counter starting with idx */ 650 /* Grab the first unused counter starting with idx */
586 idx = sched->state.counter; 651 idx = sched->state.counter;
587 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { 652 for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
588 if (!__test_and_set_bit(idx, sched->state.used)) 653 if (!__test_and_set_bit(idx, sched->state.used))
589 goto done; 654 goto done;
590 } 655 }
@@ -1210,6 +1275,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1210 break; 1275 break;
1211 1276
1212 case CPU_STARTING: 1277 case CPU_STARTING:
1278 if (x86_pmu.attr_rdpmc)
1279 set_in_cr4(X86_CR4_PCE);
1213 if (x86_pmu.cpu_starting) 1280 if (x86_pmu.cpu_starting)
1214 x86_pmu.cpu_starting(cpu); 1281 x86_pmu.cpu_starting(cpu);
1215 break; 1282 break;
@@ -1246,6 +1313,11 @@ static void __init pmu_check_apic(void)
1246 pr_info("no hardware sampling interrupt available.\n"); 1313 pr_info("no hardware sampling interrupt available.\n");
1247} 1314}
1248 1315
1316static struct attribute_group x86_pmu_format_group = {
1317 .name = "format",
1318 .attrs = NULL,
1319};
1320
1249static int __init init_hw_perf_events(void) 1321static int __init init_hw_perf_events(void)
1250{ 1322{
1251 struct x86_pmu_quirk *quirk; 1323 struct x86_pmu_quirk *quirk;
@@ -1319,6 +1391,9 @@ static int __init init_hw_perf_events(void)
1319 } 1391 }
1320 } 1392 }
1321 1393
1394 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1395 x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1396
1322 pr_info("... version: %d\n", x86_pmu.version); 1397 pr_info("... version: %d\n", x86_pmu.version);
1323 pr_info("... bit width: %d\n", x86_pmu.cntval_bits); 1398 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1324 pr_info("... generic registers: %d\n", x86_pmu.num_counters); 1399 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1542,23 +1617,115 @@ static int x86_pmu_event_init(struct perf_event *event)
1542 return err; 1617 return err;
1543} 1618}
1544 1619
1620static int x86_pmu_event_idx(struct perf_event *event)
1621{
1622 int idx = event->hw.idx;
1623
1624 if (!x86_pmu.attr_rdpmc)
1625 return 0;
1626
1627 if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
1628 idx -= X86_PMC_IDX_FIXED;
1629 idx |= 1 << 30;
1630 }
1631
1632 return idx + 1;
1633}
1634
1635static ssize_t get_attr_rdpmc(struct device *cdev,
1636 struct device_attribute *attr,
1637 char *buf)
1638{
1639 return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
1640}
1641
1642static void change_rdpmc(void *info)
1643{
1644 bool enable = !!(unsigned long)info;
1645
1646 if (enable)
1647 set_in_cr4(X86_CR4_PCE);
1648 else
1649 clear_in_cr4(X86_CR4_PCE);
1650}
1651
1652static ssize_t set_attr_rdpmc(struct device *cdev,
1653 struct device_attribute *attr,
1654 const char *buf, size_t count)
1655{
1656 unsigned long val = simple_strtoul(buf, NULL, 0);
1657
1658 if (!!val != !!x86_pmu.attr_rdpmc) {
1659 x86_pmu.attr_rdpmc = !!val;
1660 smp_call_function(change_rdpmc, (void *)val, 1);
1661 }
1662
1663 return count;
1664}
1665
1666static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
1667
1668static struct attribute *x86_pmu_attrs[] = {
1669 &dev_attr_rdpmc.attr,
1670 NULL,
1671};
1672
1673static struct attribute_group x86_pmu_attr_group = {
1674 .attrs = x86_pmu_attrs,
1675};
1676
1677static const struct attribute_group *x86_pmu_attr_groups[] = {
1678 &x86_pmu_attr_group,
1679 &x86_pmu_format_group,
1680 NULL,
1681};
1682
1683static void x86_pmu_flush_branch_stack(void)
1684{
1685 if (x86_pmu.flush_branch_stack)
1686 x86_pmu.flush_branch_stack();
1687}
1688
1545static struct pmu pmu = { 1689static struct pmu pmu = {
1546 .pmu_enable = x86_pmu_enable, 1690 .pmu_enable = x86_pmu_enable,
1547 .pmu_disable = x86_pmu_disable, 1691 .pmu_disable = x86_pmu_disable,
1692
1693 .attr_groups = x86_pmu_attr_groups,
1548 1694
1549 .event_init = x86_pmu_event_init, 1695 .event_init = x86_pmu_event_init,
1550 1696
1551 .add = x86_pmu_add, 1697 .add = x86_pmu_add,
1552 .del = x86_pmu_del, 1698 .del = x86_pmu_del,
1553 .start = x86_pmu_start, 1699 .start = x86_pmu_start,
1554 .stop = x86_pmu_stop, 1700 .stop = x86_pmu_stop,
1555 .read = x86_pmu_read, 1701 .read = x86_pmu_read,
1556 1702
1557 .start_txn = x86_pmu_start_txn, 1703 .start_txn = x86_pmu_start_txn,
1558 .cancel_txn = x86_pmu_cancel_txn, 1704 .cancel_txn = x86_pmu_cancel_txn,
1559 .commit_txn = x86_pmu_commit_txn, 1705 .commit_txn = x86_pmu_commit_txn,
1706
1707 .event_idx = x86_pmu_event_idx,
1708 .flush_branch_stack = x86_pmu_flush_branch_stack,
1560}; 1709};
1561 1710
1711void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
1712{
1713 userpg->cap_usr_time = 0;
1714 userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
1715 userpg->pmc_width = x86_pmu.cntval_bits;
1716
1717 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1718 return;
1719
1720 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1721 return;
1722
1723 userpg->cap_usr_time = 1;
1724 userpg->time_mult = this_cpu_read(cyc2ns);
1725 userpg->time_shift = CYC2NS_SCALE_FACTOR;
1726 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
1727}
1728
1562/* 1729/*
1563 * callchain support 1730 * callchain support
1564 */ 1731 */
@@ -1595,6 +1762,9 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1595} 1762}
1596 1763
1597#ifdef CONFIG_COMPAT 1764#ifdef CONFIG_COMPAT
1765
1766#include <asm/compat.h>
1767
1598static inline int 1768static inline int
1599perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) 1769perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1600{ 1770{
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8944062f46e2..6638aaf54493 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -33,6 +33,7 @@ enum extra_reg_type {
33 33
34 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ 34 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
35 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ 35 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
36 EXTRA_REG_LBR = 2, /* lbr_select */
36 37
37 EXTRA_REG_MAX /* number of entries needed */ 38 EXTRA_REG_MAX /* number of entries needed */
38}; 39};
@@ -130,6 +131,8 @@ struct cpu_hw_events {
130 void *lbr_context; 131 void *lbr_context;
131 struct perf_branch_stack lbr_stack; 132 struct perf_branch_stack lbr_stack;
132 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 133 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
134 struct er_account *lbr_sel;
135 u64 br_sel;
133 136
134 /* 137 /*
135 * Intel host/guest exclude bits 138 * Intel host/guest exclude bits
@@ -147,7 +150,9 @@ struct cpu_hw_events {
147 /* 150 /*
148 * AMD specific bits 151 * AMD specific bits
149 */ 152 */
150 struct amd_nb *amd_nb; 153 struct amd_nb *amd_nb;
154 /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
155 u64 perf_ctr_virt_mask;
151 156
152 void *kfree_on_online; 157 void *kfree_on_online;
153}; 158};
@@ -266,6 +271,29 @@ struct x86_pmu_quirk {
266 void (*func)(void); 271 void (*func)(void);
267}; 272};
268 273
274union x86_pmu_config {
275 struct {
276 u64 event:8,
277 umask:8,
278 usr:1,
279 os:1,
280 edge:1,
281 pc:1,
282 interrupt:1,
283 __reserved1:1,
284 en:1,
285 inv:1,
286 cmask:8,
287 event2:4,
288 __reserved2:4,
289 go:1,
290 ho:1;
291 } bits;
292 u64 value;
293};
294
295#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
296
269/* 297/*
270 * struct x86_pmu - generic x86 pmu 298 * struct x86_pmu - generic x86 pmu
271 */ 299 */
@@ -307,10 +335,20 @@ struct x86_pmu {
307 struct x86_pmu_quirk *quirks; 335 struct x86_pmu_quirk *quirks;
308 int perfctr_second_write; 336 int perfctr_second_write;
309 337
338 /*
339 * sysfs attrs
340 */
341 int attr_rdpmc;
342 struct attribute **format_attrs;
343
344 /*
345 * CPU Hotplug hooks
346 */
310 int (*cpu_prepare)(int cpu); 347 int (*cpu_prepare)(int cpu);
311 void (*cpu_starting)(int cpu); 348 void (*cpu_starting)(int cpu);
312 void (*cpu_dying)(int cpu); 349 void (*cpu_dying)(int cpu);
313 void (*cpu_dead)(int cpu); 350 void (*cpu_dead)(int cpu);
351 void (*flush_branch_stack)(void);
314 352
315 /* 353 /*
316 * Intel Arch Perfmon v2+ 354 * Intel Arch Perfmon v2+
@@ -332,6 +370,8 @@ struct x86_pmu {
332 */ 370 */
333 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ 371 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
334 int lbr_nr; /* hardware stack size */ 372 int lbr_nr; /* hardware stack size */
373 u64 lbr_sel_mask; /* LBR_SELECT valid bits */
374 const int *lbr_sel_map; /* lbr_select mappings */
335 375
336 /* 376 /*
337 * Extra registers for events 377 * Extra registers for events
@@ -417,9 +457,11 @@ void x86_pmu_disable_all(void);
417static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, 457static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
418 u64 enable_mask) 458 u64 enable_mask)
419{ 459{
460 u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
461
420 if (hwc->extra_reg.reg) 462 if (hwc->extra_reg.reg)
421 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); 463 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
422 wrmsrl(hwc->config_base, hwc->config | enable_mask); 464 wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
423} 465}
424 466
425void x86_pmu_enable_all(int added); 467void x86_pmu_enable_all(int added);
@@ -443,6 +485,15 @@ extern struct event_constraint emptyconstraint;
443 485
444extern struct event_constraint unconstrained; 486extern struct event_constraint unconstrained;
445 487
488static inline bool kernel_ip(unsigned long ip)
489{
490#ifdef CONFIG_X86_32
491 return ip > PAGE_OFFSET;
492#else
493 return (long)ip < 0;
494#endif
495}
496
446#ifdef CONFIG_CPU_SUP_AMD 497#ifdef CONFIG_CPU_SUP_AMD
447 498
448int amd_pmu_init(void); 499int amd_pmu_init(void);
@@ -523,6 +574,10 @@ void intel_pmu_lbr_init_nhm(void);
523 574
524void intel_pmu_lbr_init_atom(void); 575void intel_pmu_lbr_init_atom(void);
525 576
577void intel_pmu_lbr_init_snb(void);
578
579int intel_pmu_setup_lbr_filter(struct perf_event *event);
580
526int p4_pmu_init(void); 581int p4_pmu_init(void);
527 582
528int p6_pmu_init(void); 583int p6_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 0397b23be8e9..95e7fe1c5f0b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,4 +1,5 @@
1#include <linux/perf_event.h> 1#include <linux/perf_event.h>
2#include <linux/export.h>
2#include <linux/types.h> 3#include <linux/types.h>
3#include <linux/init.h> 4#include <linux/init.h>
4#include <linux/slab.h> 5#include <linux/slab.h>
@@ -138,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
138 if (ret) 139 if (ret)
139 return ret; 140 return ret;
140 141
142 if (has_branch_stack(event))
143 return -EOPNOTSUPP;
144
141 if (event->attr.exclude_host && event->attr.exclude_guest) 145 if (event->attr.exclude_host && event->attr.exclude_guest)
142 /* 146 /*
143 * When HO == GO == 1 the hardware treats that as GO == HO == 0 147 * When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -357,7 +361,9 @@ static void amd_pmu_cpu_starting(int cpu)
357 struct amd_nb *nb; 361 struct amd_nb *nb;
358 int i, nb_id; 362 int i, nb_id;
359 363
360 if (boot_cpu_data.x86_max_cores < 2) 364 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
365
366 if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15)
361 return; 367 return;
362 368
363 nb_id = amd_get_nb_id(cpu); 369 nb_id = amd_get_nb_id(cpu);
@@ -398,6 +404,21 @@ static void amd_pmu_cpu_dead(int cpu)
398 } 404 }
399} 405}
400 406
407PMU_FORMAT_ATTR(event, "config:0-7,32-35");
408PMU_FORMAT_ATTR(umask, "config:8-15" );
409PMU_FORMAT_ATTR(edge, "config:18" );
410PMU_FORMAT_ATTR(inv, "config:23" );
411PMU_FORMAT_ATTR(cmask, "config:24-31" );
412
413static struct attribute *amd_format_attr[] = {
414 &format_attr_event.attr,
415 &format_attr_umask.attr,
416 &format_attr_edge.attr,
417 &format_attr_inv.attr,
418 &format_attr_cmask.attr,
419 NULL,
420};
421
401static __initconst const struct x86_pmu amd_pmu = { 422static __initconst const struct x86_pmu amd_pmu = {
402 .name = "AMD", 423 .name = "AMD",
403 .handle_irq = x86_pmu_handle_irq, 424 .handle_irq = x86_pmu_handle_irq,
@@ -420,6 +441,8 @@ static __initconst const struct x86_pmu amd_pmu = {
420 .get_event_constraints = amd_get_event_constraints, 441 .get_event_constraints = amd_get_event_constraints,
421 .put_event_constraints = amd_put_event_constraints, 442 .put_event_constraints = amd_put_event_constraints,
422 443
444 .format_attrs = amd_format_attr,
445
423 .cpu_prepare = amd_pmu_cpu_prepare, 446 .cpu_prepare = amd_pmu_cpu_prepare,
424 .cpu_starting = amd_pmu_cpu_starting, 447 .cpu_starting = amd_pmu_cpu_starting,
425 .cpu_dead = amd_pmu_cpu_dead, 448 .cpu_dead = amd_pmu_cpu_dead,
@@ -587,9 +610,10 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
587 .put_event_constraints = amd_put_event_constraints, 610 .put_event_constraints = amd_put_event_constraints,
588 611
589 .cpu_prepare = amd_pmu_cpu_prepare, 612 .cpu_prepare = amd_pmu_cpu_prepare,
590 .cpu_starting = amd_pmu_cpu_starting,
591 .cpu_dead = amd_pmu_cpu_dead, 613 .cpu_dead = amd_pmu_cpu_dead,
592#endif 614#endif
615 .cpu_starting = amd_pmu_cpu_starting,
616 .format_attrs = amd_format_attr,
593}; 617};
594 618
595__init int amd_pmu_init(void) 619__init int amd_pmu_init(void)
@@ -621,3 +645,33 @@ __init int amd_pmu_init(void)
621 645
622 return 0; 646 return 0;
623} 647}
648
649void amd_pmu_enable_virt(void)
650{
651 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
652
653 cpuc->perf_ctr_virt_mask = 0;
654
655 /* Reload all events */
656 x86_pmu_disable_all();
657 x86_pmu_enable_all(0);
658}
659EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
660
661void amd_pmu_disable_virt(void)
662{
663 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
664
665 /*
666 * We only mask out the Host-only bit so that host-only counting works
667 * when SVM is disabled. If someone sets up a guest-only counter when
668 * SVM is disabled the Guest-only bits still gets set and the counter
669 * will not count anything.
670 */
671 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
672
673 /* Reload all events */
674 x86_pmu_disable_all();
675 x86_pmu_enable_all(0);
676}
677EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 3bd37bdf1b8e..26b3e2fef104 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -385,14 +385,15 @@ static __initconst const u64 westmere_hw_cache_event_ids
385#define NHM_LOCAL_DRAM (1 << 14) 385#define NHM_LOCAL_DRAM (1 << 14)
386#define NHM_NON_DRAM (1 << 15) 386#define NHM_NON_DRAM (1 << 15)
387 387
388#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM) 388#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
389#define NHM_REMOTE (NHM_REMOTE_DRAM)
389 390
390#define NHM_DMND_READ (NHM_DMND_DATA_RD) 391#define NHM_DMND_READ (NHM_DMND_DATA_RD)
391#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) 392#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
392#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) 393#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
393 394
394#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) 395#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
395#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD) 396#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
396#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) 397#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
397 398
398static __initconst const u64 nehalem_hw_cache_extra_regs 399static __initconst const u64 nehalem_hw_cache_extra_regs
@@ -416,16 +417,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
416 }, 417 },
417 [ C(NODE) ] = { 418 [ C(NODE) ] = {
418 [ C(OP_READ) ] = { 419 [ C(OP_READ) ] = {
419 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM, 420 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
420 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM, 421 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE,
421 }, 422 },
422 [ C(OP_WRITE) ] = { 423 [ C(OP_WRITE) ] = {
423 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM, 424 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
424 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM, 425 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE,
425 }, 426 },
426 [ C(OP_PREFETCH) ] = { 427 [ C(OP_PREFETCH) ] = {
427 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM, 428 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
428 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM, 429 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE,
429 }, 430 },
430 }, 431 },
431}; 432};
@@ -727,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
727 }, 728 },
728}; 729};
729 730
731static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
732{
733 /* user explicitly requested branch sampling */
734 if (has_branch_stack(event))
735 return true;
736
737 /* implicit branch sampling to correct PEBS skid */
738 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
739 return true;
740
741 return false;
742}
743
730static void intel_pmu_disable_all(void) 744static void intel_pmu_disable_all(void)
731{ 745{
732 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 746 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -881,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
881 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); 895 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
882 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); 896 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
883 897
898 /*
899 * must disable before any actual event
900 * because any event may be combined with LBR
901 */
902 if (intel_pmu_needs_lbr_smpl(event))
903 intel_pmu_lbr_disable(event);
904
884 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 905 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
885 intel_pmu_disable_fixed(hwc); 906 intel_pmu_disable_fixed(hwc);
886 return; 907 return;
@@ -935,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
935 intel_pmu_enable_bts(hwc->config); 956 intel_pmu_enable_bts(hwc->config);
936 return; 957 return;
937 } 958 }
959 /*
960 * must enabled before any actual event
961 * because any event may be combined with LBR
962 */
963 if (intel_pmu_needs_lbr_smpl(event))
964 intel_pmu_lbr_enable(event);
938 965
939 if (event->attr.exclude_host) 966 if (event->attr.exclude_host)
940 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); 967 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1057,6 +1084,9 @@ again:
1057 1084
1058 data.period = event->hw.last_period; 1085 data.period = event->hw.last_period;
1059 1086
1087 if (has_branch_stack(event))
1088 data.br_stack = &cpuc->lbr_stack;
1089
1060 if (perf_event_overflow(event, &data, regs)) 1090 if (perf_event_overflow(event, &data, regs))
1061 x86_pmu_stop(event, 0); 1091 x86_pmu_stop(event, 0);
1062 } 1092 }
@@ -1123,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
1123 */ 1153 */
1124static struct event_constraint * 1154static struct event_constraint *
1125__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, 1155__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1126 struct perf_event *event) 1156 struct perf_event *event,
1157 struct hw_perf_event_extra *reg)
1127{ 1158{
1128 struct event_constraint *c = &emptyconstraint; 1159 struct event_constraint *c = &emptyconstraint;
1129 struct hw_perf_event_extra *reg = &event->hw.extra_reg;
1130 struct er_account *era; 1160 struct er_account *era;
1131 unsigned long flags; 1161 unsigned long flags;
1132 int orig_idx = reg->idx; 1162 int orig_idx = reg->idx;
1133 1163
1134 /* already allocated shared msr */ 1164 /* already allocated shared msr */
1135 if (reg->alloc) 1165 if (reg->alloc)
1136 return &unconstrained; 1166 return NULL; /* call x86_get_event_constraint() */
1137 1167
1138again: 1168again:
1139 era = &cpuc->shared_regs->regs[reg->idx]; 1169 era = &cpuc->shared_regs->regs[reg->idx];
@@ -1156,14 +1186,10 @@ again:
1156 reg->alloc = 1; 1186 reg->alloc = 1;
1157 1187
1158 /* 1188 /*
1159 * All events using extra_reg are unconstrained. 1189 * need to call x86_get_event_constraint()
1160 * Avoids calling x86_get_event_constraints() 1190 * to check if associated event has constraints
1161 *
1162 * Must revisit if extra_reg controlling events
1163 * ever have constraints. Worst case we go through
1164 * the regular event constraint table.
1165 */ 1191 */
1166 c = &unconstrained; 1192 c = NULL;
1167 } else if (intel_try_alt_er(event, orig_idx)) { 1193 } else if (intel_try_alt_er(event, orig_idx)) {
1168 raw_spin_unlock_irqrestore(&era->lock, flags); 1194 raw_spin_unlock_irqrestore(&era->lock, flags);
1169 goto again; 1195 goto again;
@@ -1200,11 +1226,23 @@ static struct event_constraint *
1200intel_shared_regs_constraints(struct cpu_hw_events *cpuc, 1226intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1201 struct perf_event *event) 1227 struct perf_event *event)
1202{ 1228{
1203 struct event_constraint *c = NULL; 1229 struct event_constraint *c = NULL, *d;
1204 1230 struct hw_perf_event_extra *xreg, *breg;
1205 if (event->hw.extra_reg.idx != EXTRA_REG_NONE) 1231
1206 c = __intel_shared_reg_get_constraints(cpuc, event); 1232 xreg = &event->hw.extra_reg;
1207 1233 if (xreg->idx != EXTRA_REG_NONE) {
1234 c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
1235 if (c == &emptyconstraint)
1236 return c;
1237 }
1238 breg = &event->hw.branch_reg;
1239 if (breg->idx != EXTRA_REG_NONE) {
1240 d = __intel_shared_reg_get_constraints(cpuc, event, breg);
1241 if (d == &emptyconstraint) {
1242 __intel_shared_reg_put_constraints(cpuc, xreg);
1243 c = d;
1244 }
1245 }
1208 return c; 1246 return c;
1209} 1247}
1210 1248
@@ -1252,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1252 reg = &event->hw.extra_reg; 1290 reg = &event->hw.extra_reg;
1253 if (reg->idx != EXTRA_REG_NONE) 1291 if (reg->idx != EXTRA_REG_NONE)
1254 __intel_shared_reg_put_constraints(cpuc, reg); 1292 __intel_shared_reg_put_constraints(cpuc, reg);
1293
1294 reg = &event->hw.branch_reg;
1295 if (reg->idx != EXTRA_REG_NONE)
1296 __intel_shared_reg_put_constraints(cpuc, reg);
1255} 1297}
1256 1298
1257static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1299static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1287,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
1287 * 1329 *
1288 * Thereby we gain a PEBS capable cycle counter. 1330 * Thereby we gain a PEBS capable cycle counter.
1289 */ 1331 */
1290 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ 1332 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
1333
1291 1334
1292 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 1335 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1293 event->hw.config = alt_config; 1336 event->hw.config = alt_config;
1294 } 1337 }
1295 1338
1339 if (intel_pmu_needs_lbr_smpl(event)) {
1340 ret = intel_pmu_setup_lbr_filter(event);
1341 if (ret)
1342 return ret;
1343 }
1344
1296 if (event->attr.type != PERF_TYPE_RAW) 1345 if (event->attr.type != PERF_TYPE_RAW)
1297 return 0; 1346 return 0;
1298 1347
@@ -1382,6 +1431,24 @@ static void core_pmu_enable_all(int added)
1382 } 1431 }
1383} 1432}
1384 1433
1434PMU_FORMAT_ATTR(event, "config:0-7" );
1435PMU_FORMAT_ATTR(umask, "config:8-15" );
1436PMU_FORMAT_ATTR(edge, "config:18" );
1437PMU_FORMAT_ATTR(pc, "config:19" );
1438PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
1439PMU_FORMAT_ATTR(inv, "config:23" );
1440PMU_FORMAT_ATTR(cmask, "config:24-31" );
1441
1442static struct attribute *intel_arch_formats_attr[] = {
1443 &format_attr_event.attr,
1444 &format_attr_umask.attr,
1445 &format_attr_edge.attr,
1446 &format_attr_pc.attr,
1447 &format_attr_inv.attr,
1448 &format_attr_cmask.attr,
1449 NULL,
1450};
1451
1385static __initconst const struct x86_pmu core_pmu = { 1452static __initconst const struct x86_pmu core_pmu = {
1386 .name = "core", 1453 .name = "core",
1387 .handle_irq = x86_pmu_handle_irq, 1454 .handle_irq = x86_pmu_handle_irq,
@@ -1406,6 +1473,7 @@ static __initconst const struct x86_pmu core_pmu = {
1406 .put_event_constraints = intel_put_event_constraints, 1473 .put_event_constraints = intel_put_event_constraints,
1407 .event_constraints = intel_core_event_constraints, 1474 .event_constraints = intel_core_event_constraints,
1408 .guest_get_msrs = core_guest_get_msrs, 1475 .guest_get_msrs = core_guest_get_msrs,
1476 .format_attrs = intel_arch_formats_attr,
1409}; 1477};
1410 1478
1411struct intel_shared_regs *allocate_shared_regs(int cpu) 1479struct intel_shared_regs *allocate_shared_regs(int cpu)
@@ -1431,7 +1499,7 @@ static int intel_pmu_cpu_prepare(int cpu)
1431{ 1499{
1432 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1500 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1433 1501
1434 if (!x86_pmu.extra_regs) 1502 if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
1435 return NOTIFY_OK; 1503 return NOTIFY_OK;
1436 1504
1437 cpuc->shared_regs = allocate_shared_regs(cpu); 1505 cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1453,22 +1521,28 @@ static void intel_pmu_cpu_starting(int cpu)
1453 */ 1521 */
1454 intel_pmu_lbr_reset(); 1522 intel_pmu_lbr_reset();
1455 1523
1456 if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) 1524 cpuc->lbr_sel = NULL;
1525
1526 if (!cpuc->shared_regs)
1457 return; 1527 return;
1458 1528
1459 for_each_cpu(i, topology_thread_cpumask(cpu)) { 1529 if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
1460 struct intel_shared_regs *pc; 1530 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1531 struct intel_shared_regs *pc;
1461 1532
1462 pc = per_cpu(cpu_hw_events, i).shared_regs; 1533 pc = per_cpu(cpu_hw_events, i).shared_regs;
1463 if (pc && pc->core_id == core_id) { 1534 if (pc && pc->core_id == core_id) {
1464 cpuc->kfree_on_online = cpuc->shared_regs; 1535 cpuc->kfree_on_online = cpuc->shared_regs;
1465 cpuc->shared_regs = pc; 1536 cpuc->shared_regs = pc;
1466 break; 1537 break;
1538 }
1467 } 1539 }
1540 cpuc->shared_regs->core_id = core_id;
1541 cpuc->shared_regs->refcnt++;
1468 } 1542 }
1469 1543
1470 cpuc->shared_regs->core_id = core_id; 1544 if (x86_pmu.lbr_sel_map)
1471 cpuc->shared_regs->refcnt++; 1545 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
1472} 1546}
1473 1547
1474static void intel_pmu_cpu_dying(int cpu) 1548static void intel_pmu_cpu_dying(int cpu)
@@ -1486,6 +1560,33 @@ static void intel_pmu_cpu_dying(int cpu)
1486 fini_debug_store_on_cpu(cpu); 1560 fini_debug_store_on_cpu(cpu);
1487} 1561}
1488 1562
1563static void intel_pmu_flush_branch_stack(void)
1564{
1565 /*
1566 * Intel LBR does not tag entries with the
1567 * PID of the current task, then we need to
1568 * flush it on ctxsw
1569 * For now, we simply reset it
1570 */
1571 if (x86_pmu.lbr_nr)
1572 intel_pmu_lbr_reset();
1573}
1574
1575PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
1576
1577static struct attribute *intel_arch3_formats_attr[] = {
1578 &format_attr_event.attr,
1579 &format_attr_umask.attr,
1580 &format_attr_edge.attr,
1581 &format_attr_pc.attr,
1582 &format_attr_any.attr,
1583 &format_attr_inv.attr,
1584 &format_attr_cmask.attr,
1585
1586 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
1587 NULL,
1588};
1589
1489static __initconst const struct x86_pmu intel_pmu = { 1590static __initconst const struct x86_pmu intel_pmu = {
1490 .name = "Intel", 1591 .name = "Intel",
1491 .handle_irq = intel_pmu_handle_irq, 1592 .handle_irq = intel_pmu_handle_irq,
@@ -1509,10 +1610,13 @@ static __initconst const struct x86_pmu intel_pmu = {
1509 .get_event_constraints = intel_get_event_constraints, 1610 .get_event_constraints = intel_get_event_constraints,
1510 .put_event_constraints = intel_put_event_constraints, 1611 .put_event_constraints = intel_put_event_constraints,
1511 1612
1613 .format_attrs = intel_arch3_formats_attr,
1614
1512 .cpu_prepare = intel_pmu_cpu_prepare, 1615 .cpu_prepare = intel_pmu_cpu_prepare,
1513 .cpu_starting = intel_pmu_cpu_starting, 1616 .cpu_starting = intel_pmu_cpu_starting,
1514 .cpu_dying = intel_pmu_cpu_dying, 1617 .cpu_dying = intel_pmu_cpu_dying,
1515 .guest_get_msrs = intel_guest_get_msrs, 1618 .guest_get_msrs = intel_guest_get_msrs,
1619 .flush_branch_stack = intel_pmu_flush_branch_stack,
1516}; 1620};
1517 1621
1518static __init void intel_clovertown_quirk(void) 1622static __init void intel_clovertown_quirk(void)
@@ -1689,9 +1793,11 @@ __init int intel_pmu_init(void)
1689 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1793 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1690 1794
1691 /* UOPS_ISSUED.STALLED_CYCLES */ 1795 /* UOPS_ISSUED.STALLED_CYCLES */
1692 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1796 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
1797 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
1693 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1798 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1694 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1799 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
1800 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
1695 1801
1696 x86_add_quirk(intel_nehalem_quirk); 1802 x86_add_quirk(intel_nehalem_quirk);
1697 1803
@@ -1726,9 +1832,11 @@ __init int intel_pmu_init(void)
1726 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1832 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1727 1833
1728 /* UOPS_ISSUED.STALLED_CYCLES */ 1834 /* UOPS_ISSUED.STALLED_CYCLES */
1729 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1835 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
1836 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
1730 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1837 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1731 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1838 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
1839 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
1732 1840
1733 pr_cont("Westmere events, "); 1841 pr_cont("Westmere events, ");
1734 break; 1842 break;
@@ -1739,7 +1847,7 @@ __init int intel_pmu_init(void)
1739 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1847 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1740 sizeof(hw_cache_event_ids)); 1848 sizeof(hw_cache_event_ids));
1741 1849
1742 intel_pmu_lbr_init_nhm(); 1850 intel_pmu_lbr_init_snb();
1743 1851
1744 x86_pmu.event_constraints = intel_snb_event_constraints; 1852 x86_pmu.event_constraints = intel_snb_event_constraints;
1745 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1853 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
@@ -1749,9 +1857,11 @@ __init int intel_pmu_init(void)
1749 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 1857 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
1750 1858
1751 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 1859 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
1752 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1860 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
1861 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
1753 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ 1862 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
1754 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; 1863 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
1864 X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
1755 1865
1756 pr_cont("SandyBridge events, "); 1866 pr_cont("SandyBridge events, ");
1757 break; 1867 break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 73da6b64f5b7..7f64df19e7dd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -3,6 +3,7 @@
3#include <linux/slab.h> 3#include <linux/slab.h>
4 4
5#include <asm/perf_event.h> 5#include <asm/perf_event.h>
6#include <asm/insn.h>
6 7
7#include "perf_event.h" 8#include "perf_event.h"
8 9
@@ -439,10 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
439 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 440 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
440 441
441 cpuc->pebs_enabled |= 1ULL << hwc->idx; 442 cpuc->pebs_enabled |= 1ULL << hwc->idx;
442 WARN_ON_ONCE(cpuc->enabled);
443
444 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
445 intel_pmu_lbr_enable(event);
446} 443}
447 444
448void intel_pmu_pebs_disable(struct perf_event *event) 445void intel_pmu_pebs_disable(struct perf_event *event)
@@ -455,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
455 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 452 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
456 453
457 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 454 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
458
459 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
460 intel_pmu_lbr_disable(event);
461} 455}
462 456
463void intel_pmu_pebs_enable_all(void) 457void intel_pmu_pebs_enable_all(void)
@@ -476,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
476 wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 470 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
477} 471}
478 472
479#include <asm/insn.h>
480
481static inline bool kernel_ip(unsigned long ip)
482{
483#ifdef CONFIG_X86_32
484 return ip > PAGE_OFFSET;
485#else
486 return (long)ip < 0;
487#endif
488}
489
490static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) 473static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
491{ 474{
492 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 475 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -573,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
573 * both formats and we don't use the other fields in this 556 * both formats and we don't use the other fields in this
574 * routine. 557 * routine.
575 */ 558 */
559 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
576 struct pebs_record_core *pebs = __pebs; 560 struct pebs_record_core *pebs = __pebs;
577 struct perf_sample_data data; 561 struct perf_sample_data data;
578 struct pt_regs regs; 562 struct pt_regs regs;
@@ -603,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
603 else 587 else
604 regs.flags &= ~PERF_EFLAGS_EXACT; 588 regs.flags &= ~PERF_EFLAGS_EXACT;
605 589
590 if (has_branch_stack(event))
591 data.br_stack = &cpuc->lbr_stack;
592
606 if (perf_event_overflow(event, &data, &regs)) 593 if (perf_event_overflow(event, &data, &regs))
607 x86_pmu_stop(event, 0); 594 x86_pmu_stop(event, 0);
608} 595}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 3fab3de3ce96..520b4265fcd2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -3,6 +3,7 @@
3 3
4#include <asm/perf_event.h> 4#include <asm/perf_event.h>
5#include <asm/msr.h> 5#include <asm/msr.h>
6#include <asm/insn.h>
6 7
7#include "perf_event.h" 8#include "perf_event.h"
8 9
@@ -14,6 +15,100 @@ enum {
14}; 15};
15 16
16/* 17/*
18 * Intel LBR_SELECT bits
19 * Intel Vol3a, April 2011, Section 16.7 Table 16-10
20 *
21 * Hardware branch filter (not available on all CPUs)
22 */
23#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
24#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
25#define LBR_JCC_BIT 2 /* do not capture conditional branches */
26#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
27#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
28#define LBR_RETURN_BIT 5 /* do not capture near returns */
29#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
30#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
31#define LBR_FAR_BIT 8 /* do not capture far branches */
32
33#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
34#define LBR_USER (1 << LBR_USER_BIT)
35#define LBR_JCC (1 << LBR_JCC_BIT)
36#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
37#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
38#define LBR_RETURN (1 << LBR_RETURN_BIT)
39#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
40#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
41#define LBR_FAR (1 << LBR_FAR_BIT)
42
43#define LBR_PLM (LBR_KERNEL | LBR_USER)
44
45#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
46#define LBR_NOT_SUPP -1 /* LBR filter not supported */
47#define LBR_IGN 0 /* ignored */
48
49#define LBR_ANY \
50 (LBR_JCC |\
51 LBR_REL_CALL |\
52 LBR_IND_CALL |\
53 LBR_RETURN |\
54 LBR_REL_JMP |\
55 LBR_IND_JMP |\
56 LBR_FAR)
57
58#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
59
60#define for_each_branch_sample_type(x) \
61 for ((x) = PERF_SAMPLE_BRANCH_USER; \
62 (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
63
64/*
65 * x86control flow change classification
66 * x86control flow changes include branches, interrupts, traps, faults
67 */
68enum {
69 X86_BR_NONE = 0, /* unknown */
70
71 X86_BR_USER = 1 << 0, /* branch target is user */
72 X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
73
74 X86_BR_CALL = 1 << 2, /* call */
75 X86_BR_RET = 1 << 3, /* return */
76 X86_BR_SYSCALL = 1 << 4, /* syscall */
77 X86_BR_SYSRET = 1 << 5, /* syscall return */
78 X86_BR_INT = 1 << 6, /* sw interrupt */
79 X86_BR_IRET = 1 << 7, /* return from interrupt */
80 X86_BR_JCC = 1 << 8, /* conditional */
81 X86_BR_JMP = 1 << 9, /* jump */
82 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
83 X86_BR_IND_CALL = 1 << 11,/* indirect calls */
84};
85
86#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
87
88#define X86_BR_ANY \
89 (X86_BR_CALL |\
90 X86_BR_RET |\
91 X86_BR_SYSCALL |\
92 X86_BR_SYSRET |\
93 X86_BR_INT |\
94 X86_BR_IRET |\
95 X86_BR_JCC |\
96 X86_BR_JMP |\
97 X86_BR_IRQ |\
98 X86_BR_IND_CALL)
99
100#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
101
102#define X86_BR_ANY_CALL \
103 (X86_BR_CALL |\
104 X86_BR_IND_CALL |\
105 X86_BR_SYSCALL |\
106 X86_BR_IRQ |\
107 X86_BR_INT)
108
109static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
110
111/*
17 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 112 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
18 * otherwise it becomes near impossible to get a reliable stack. 113 * otherwise it becomes near impossible to get a reliable stack.
19 */ 114 */
@@ -21,6 +116,10 @@ enum {
21static void __intel_pmu_lbr_enable(void) 116static void __intel_pmu_lbr_enable(void)
22{ 117{
23 u64 debugctl; 118 u64 debugctl;
119 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
120
121 if (cpuc->lbr_sel)
122 wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
24 123
25 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 124 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
26 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 125 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
@@ -72,17 +171,15 @@ void intel_pmu_lbr_enable(struct perf_event *event)
72 if (!x86_pmu.lbr_nr) 171 if (!x86_pmu.lbr_nr)
73 return; 172 return;
74 173
75 WARN_ON_ONCE(cpuc->enabled);
76
77 /* 174 /*
78 * Reset the LBR stack if we changed task context to 175 * Reset the LBR stack if we changed task context to
79 * avoid data leaks. 176 * avoid data leaks.
80 */ 177 */
81
82 if (event->ctx->task && cpuc->lbr_context != event->ctx) { 178 if (event->ctx->task && cpuc->lbr_context != event->ctx) {
83 intel_pmu_lbr_reset(); 179 intel_pmu_lbr_reset();
84 cpuc->lbr_context = event->ctx; 180 cpuc->lbr_context = event->ctx;
85 } 181 }
182 cpuc->br_sel = event->hw.branch_reg.reg;
86 183
87 cpuc->lbr_users++; 184 cpuc->lbr_users++;
88} 185}
@@ -97,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
97 cpuc->lbr_users--; 194 cpuc->lbr_users--;
98 WARN_ON_ONCE(cpuc->lbr_users < 0); 195 WARN_ON_ONCE(cpuc->lbr_users < 0);
99 196
100 if (cpuc->enabled && !cpuc->lbr_users) 197 if (cpuc->enabled && !cpuc->lbr_users) {
101 __intel_pmu_lbr_disable(); 198 __intel_pmu_lbr_disable();
199 /* avoid stale pointer */
200 cpuc->lbr_context = NULL;
201 }
102} 202}
103 203
104void intel_pmu_lbr_enable_all(void) 204void intel_pmu_lbr_enable_all(void)
@@ -117,6 +217,9 @@ void intel_pmu_lbr_disable_all(void)
117 __intel_pmu_lbr_disable(); 217 __intel_pmu_lbr_disable();
118} 218}
119 219
220/*
221 * TOS = most recently recorded branch
222 */
120static inline u64 intel_pmu_lbr_tos(void) 223static inline u64 intel_pmu_lbr_tos(void)
121{ 224{
122 u64 tos; 225 u64 tos;
@@ -144,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
144 247
145 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 248 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
146 249
147 cpuc->lbr_entries[i].from = msr_lastbranch.from; 250 cpuc->lbr_entries[i].from = msr_lastbranch.from;
148 cpuc->lbr_entries[i].to = msr_lastbranch.to; 251 cpuc->lbr_entries[i].to = msr_lastbranch.to;
149 cpuc->lbr_entries[i].flags = 0; 252 cpuc->lbr_entries[i].mispred = 0;
253 cpuc->lbr_entries[i].predicted = 0;
254 cpuc->lbr_entries[i].reserved = 0;
150 } 255 }
151 cpuc->lbr_stack.nr = i; 256 cpuc->lbr_stack.nr = i;
152} 257}
153 258
154#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
155
156/* 259/*
157 * Due to lack of segmentation in Linux the effective address (offset) 260 * Due to lack of segmentation in Linux the effective address (offset)
158 * is the same as the linear address, allowing us to merge the LIP and EIP 261 * is the same as the linear address, allowing us to merge the LIP and EIP
@@ -167,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
167 270
168 for (i = 0; i < x86_pmu.lbr_nr; i++) { 271 for (i = 0; i < x86_pmu.lbr_nr; i++) {
169 unsigned long lbr_idx = (tos - i) & mask; 272 unsigned long lbr_idx = (tos - i) & mask;
170 u64 from, to, flags = 0; 273 u64 from, to, mis = 0, pred = 0;
171 274
172 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 275 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
173 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 276 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
174 277
175 if (lbr_format == LBR_FORMAT_EIP_FLAGS) { 278 if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
176 flags = !!(from & LBR_FROM_FLAG_MISPRED); 279 mis = !!(from & LBR_FROM_FLAG_MISPRED);
280 pred = !mis;
177 from = (u64)((((s64)from) << 1) >> 1); 281 from = (u64)((((s64)from) << 1) >> 1);
178 } 282 }
179 283
180 cpuc->lbr_entries[i].from = from; 284 cpuc->lbr_entries[i].from = from;
181 cpuc->lbr_entries[i].to = to; 285 cpuc->lbr_entries[i].to = to;
182 cpuc->lbr_entries[i].flags = flags; 286 cpuc->lbr_entries[i].mispred = mis;
287 cpuc->lbr_entries[i].predicted = pred;
288 cpuc->lbr_entries[i].reserved = 0;
183 } 289 }
184 cpuc->lbr_stack.nr = i; 290 cpuc->lbr_stack.nr = i;
185} 291}
@@ -195,28 +301,404 @@ void intel_pmu_lbr_read(void)
195 intel_pmu_lbr_read_32(cpuc); 301 intel_pmu_lbr_read_32(cpuc);
196 else 302 else
197 intel_pmu_lbr_read_64(cpuc); 303 intel_pmu_lbr_read_64(cpuc);
304
305 intel_pmu_lbr_filter(cpuc);
306}
307
308/*
309 * SW filter is used:
310 * - in case there is no HW filter
311 * - in case the HW filter has errata or limitations
312 */
313static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
314{
315 u64 br_type = event->attr.branch_sample_type;
316 int mask = 0;
317
318 if (br_type & PERF_SAMPLE_BRANCH_USER)
319 mask |= X86_BR_USER;
320
321 if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
322 mask |= X86_BR_KERNEL;
323
324 /* we ignore BRANCH_HV here */
325
326 if (br_type & PERF_SAMPLE_BRANCH_ANY)
327 mask |= X86_BR_ANY;
328
329 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
330 mask |= X86_BR_ANY_CALL;
331
332 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
333 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
334
335 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
336 mask |= X86_BR_IND_CALL;
337 /*
338 * stash actual user request into reg, it may
339 * be used by fixup code for some CPU
340 */
341 event->hw.branch_reg.reg = mask;
342}
343
344/*
345 * setup the HW LBR filter
346 * Used only when available, may not be enough to disambiguate
347 * all branches, may need the help of the SW filter
348 */
349static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
350{
351 struct hw_perf_event_extra *reg;
352 u64 br_type = event->attr.branch_sample_type;
353 u64 mask = 0, m;
354 u64 v;
355
356 for_each_branch_sample_type(m) {
357 if (!(br_type & m))
358 continue;
359
360 v = x86_pmu.lbr_sel_map[m];
361 if (v == LBR_NOT_SUPP)
362 return -EOPNOTSUPP;
363
364 if (v != LBR_IGN)
365 mask |= v;
366 }
367 reg = &event->hw.branch_reg;
368 reg->idx = EXTRA_REG_LBR;
369
370 /* LBR_SELECT operates in suppress mode so invert mask */
371 reg->config = ~mask & x86_pmu.lbr_sel_mask;
372
373 return 0;
374}
375
376int intel_pmu_setup_lbr_filter(struct perf_event *event)
377{
378 int ret = 0;
379
380 /*
381 * no LBR on this PMU
382 */
383 if (!x86_pmu.lbr_nr)
384 return -EOPNOTSUPP;
385
386 /*
387 * setup SW LBR filter
388 */
389 intel_pmu_setup_sw_lbr_filter(event);
390
391 /*
392 * setup HW LBR filter, if any
393 */
394 if (x86_pmu.lbr_sel_map)
395 ret = intel_pmu_setup_hw_lbr_filter(event);
396
397 return ret;
398}
399
400/*
401 * return the type of control flow change at address "from"
402 * intruction is not necessarily a branch (in case of interrupt).
403 *
404 * The branch type returned also includes the priv level of the
405 * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
406 *
407 * If a branch type is unknown OR the instruction cannot be
408 * decoded (e.g., text page not present), then X86_BR_NONE is
409 * returned.
410 */
411static int branch_type(unsigned long from, unsigned long to)
412{
413 struct insn insn;
414 void *addr;
415 int bytes, size = MAX_INSN_SIZE;
416 int ret = X86_BR_NONE;
417 int ext, to_plm, from_plm;
418 u8 buf[MAX_INSN_SIZE];
419 int is64 = 0;
420
421 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
422 from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
423
424 /*
425 * maybe zero if lbr did not fill up after a reset by the time
426 * we get a PMU interrupt
427 */
428 if (from == 0 || to == 0)
429 return X86_BR_NONE;
430
431 if (from_plm == X86_BR_USER) {
432 /*
433 * can happen if measuring at the user level only
434 * and we interrupt in a kernel thread, e.g., idle.
435 */
436 if (!current->mm)
437 return X86_BR_NONE;
438
439 /* may fail if text not present */
440 bytes = copy_from_user_nmi(buf, (void __user *)from, size);
441 if (bytes != size)
442 return X86_BR_NONE;
443
444 addr = buf;
445 } else
446 addr = (void *)from;
447
448 /*
449 * decoder needs to know the ABI especially
450 * on 64-bit systems running 32-bit apps
451 */
452#ifdef CONFIG_X86_64
453 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
454#endif
455 insn_init(&insn, addr, is64);
456 insn_get_opcode(&insn);
457
458 switch (insn.opcode.bytes[0]) {
459 case 0xf:
460 switch (insn.opcode.bytes[1]) {
461 case 0x05: /* syscall */
462 case 0x34: /* sysenter */
463 ret = X86_BR_SYSCALL;
464 break;
465 case 0x07: /* sysret */
466 case 0x35: /* sysexit */
467 ret = X86_BR_SYSRET;
468 break;
469 case 0x80 ... 0x8f: /* conditional */
470 ret = X86_BR_JCC;
471 break;
472 default:
473 ret = X86_BR_NONE;
474 }
475 break;
476 case 0x70 ... 0x7f: /* conditional */
477 ret = X86_BR_JCC;
478 break;
479 case 0xc2: /* near ret */
480 case 0xc3: /* near ret */
481 case 0xca: /* far ret */
482 case 0xcb: /* far ret */
483 ret = X86_BR_RET;
484 break;
485 case 0xcf: /* iret */
486 ret = X86_BR_IRET;
487 break;
488 case 0xcc ... 0xce: /* int */
489 ret = X86_BR_INT;
490 break;
491 case 0xe8: /* call near rel */
492 case 0x9a: /* call far absolute */
493 ret = X86_BR_CALL;
494 break;
495 case 0xe0 ... 0xe3: /* loop jmp */
496 ret = X86_BR_JCC;
497 break;
498 case 0xe9 ... 0xeb: /* jmp */
499 ret = X86_BR_JMP;
500 break;
501 case 0xff: /* call near absolute, call far absolute ind */
502 insn_get_modrm(&insn);
503 ext = (insn.modrm.bytes[0] >> 3) & 0x7;
504 switch (ext) {
505 case 2: /* near ind call */
506 case 3: /* far ind call */
507 ret = X86_BR_IND_CALL;
508 break;
509 case 4:
510 case 5:
511 ret = X86_BR_JMP;
512 break;
513 }
514 break;
515 default:
516 ret = X86_BR_NONE;
517 }
518 /*
519 * interrupts, traps, faults (and thus ring transition) may
520 * occur on any instructions. Thus, to classify them correctly,
521 * we need to first look at the from and to priv levels. If they
522 * are different and to is in the kernel, then it indicates
523 * a ring transition. If the from instruction is not a ring
524 * transition instr (syscall, systenter, int), then it means
525 * it was a irq, trap or fault.
526 *
527 * we have no way of detecting kernel to kernel faults.
528 */
529 if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
530 && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
531 ret = X86_BR_IRQ;
532
533 /*
534 * branch priv level determined by target as
535 * is done by HW when LBR_SELECT is implemented
536 */
537 if (ret != X86_BR_NONE)
538 ret |= to_plm;
539
540 return ret;
541}
542
543/*
544 * implement actual branch filter based on user demand.
545 * Hardware may not exactly satisfy that request, thus
546 * we need to inspect opcodes. Mismatched branches are
547 * discarded. Therefore, the number of branches returned
548 * in PERF_SAMPLE_BRANCH_STACK sample may vary.
549 */
550static void
551intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
552{
553 u64 from, to;
554 int br_sel = cpuc->br_sel;
555 int i, j, type;
556 bool compress = false;
557
558 /* if sampling all branches, then nothing to filter */
559 if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
560 return;
561
562 for (i = 0; i < cpuc->lbr_stack.nr; i++) {
563
564 from = cpuc->lbr_entries[i].from;
565 to = cpuc->lbr_entries[i].to;
566
567 type = branch_type(from, to);
568
569 /* if type does not correspond, then discard */
570 if (type == X86_BR_NONE || (br_sel & type) != type) {
571 cpuc->lbr_entries[i].from = 0;
572 compress = true;
573 }
574 }
575
576 if (!compress)
577 return;
578
579 /* remove all entries with from=0 */
580 for (i = 0; i < cpuc->lbr_stack.nr; ) {
581 if (!cpuc->lbr_entries[i].from) {
582 j = i;
583 while (++j < cpuc->lbr_stack.nr)
584 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
585 cpuc->lbr_stack.nr--;
586 if (!cpuc->lbr_entries[i].from)
587 continue;
588 }
589 i++;
590 }
198} 591}
199 592
593/*
594 * Map interface branch filters onto LBR filters
595 */
596static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
597 [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
598 [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
599 [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
600 [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
601 [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
602 | LBR_IND_JMP | LBR_FAR,
603 /*
604 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
605 */
606 [PERF_SAMPLE_BRANCH_ANY_CALL] =
607 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
608 /*
609 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
610 */
611 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
612};
613
614static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
615 [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
616 [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
617 [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
618 [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
619 [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
620 [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
621 | LBR_FAR,
622 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
623};
624
625/* core */
200void intel_pmu_lbr_init_core(void) 626void intel_pmu_lbr_init_core(void)
201{ 627{
202 x86_pmu.lbr_nr = 4; 628 x86_pmu.lbr_nr = 4;
203 x86_pmu.lbr_tos = 0x01c9; 629 x86_pmu.lbr_tos = MSR_LBR_TOS;
204 x86_pmu.lbr_from = 0x40; 630 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
205 x86_pmu.lbr_to = 0x60; 631 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
632
633 /*
634 * SW branch filter usage:
635 * - compensate for lack of HW filter
636 */
637 pr_cont("4-deep LBR, ");
206} 638}
207 639
640/* nehalem/westmere */
208void intel_pmu_lbr_init_nhm(void) 641void intel_pmu_lbr_init_nhm(void)
209{ 642{
210 x86_pmu.lbr_nr = 16; 643 x86_pmu.lbr_nr = 16;
211 x86_pmu.lbr_tos = 0x01c9; 644 x86_pmu.lbr_tos = MSR_LBR_TOS;
212 x86_pmu.lbr_from = 0x680; 645 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
213 x86_pmu.lbr_to = 0x6c0; 646 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
647
648 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
649 x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
650
651 /*
652 * SW branch filter usage:
653 * - workaround LBR_SEL errata (see above)
654 * - support syscall, sysret capture.
655 * That requires LBR_FAR but that means far
656 * jmp need to be filtered out
657 */
658 pr_cont("16-deep LBR, ");
659}
660
661/* sandy bridge */
662void intel_pmu_lbr_init_snb(void)
663{
664 x86_pmu.lbr_nr = 16;
665 x86_pmu.lbr_tos = MSR_LBR_TOS;
666 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
667 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
668
669 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
670 x86_pmu.lbr_sel_map = snb_lbr_sel_map;
671
672 /*
673 * SW branch filter usage:
674 * - support syscall, sysret capture.
675 * That requires LBR_FAR but that means far
676 * jmp need to be filtered out
677 */
678 pr_cont("16-deep LBR, ");
214} 679}
215 680
681/* atom */
216void intel_pmu_lbr_init_atom(void) 682void intel_pmu_lbr_init_atom(void)
217{ 683{
684 /*
685 * only models starting at stepping 10 seems
686 * to have an operational LBR which can freeze
687 * on PMU interrupt
688 */
689 if (boot_cpu_data.x86_mask < 10) {
690 pr_cont("LBR disabled due to erratum");
691 return;
692 }
693
218 x86_pmu.lbr_nr = 8; 694 x86_pmu.lbr_nr = 8;
219 x86_pmu.lbr_tos = 0x01c9; 695 x86_pmu.lbr_tos = MSR_LBR_TOS;
220 x86_pmu.lbr_from = 0x40; 696 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
221 x86_pmu.lbr_to = 0x60; 697 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
698
699 /*
700 * SW branch filter usage:
701 * - compensate for lack of HW filter
702 */
703 pr_cont("8-deep LBR, ");
222} 704}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ef484d9d0a25..a2dfacfd7103 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1271,6 +1271,17 @@ done:
1271 return num ? -EINVAL : 0; 1271 return num ? -EINVAL : 0;
1272} 1272}
1273 1273
1274PMU_FORMAT_ATTR(cccr, "config:0-31" );
1275PMU_FORMAT_ATTR(escr, "config:32-62");
1276PMU_FORMAT_ATTR(ht, "config:63" );
1277
1278static struct attribute *intel_p4_formats_attr[] = {
1279 &format_attr_cccr.attr,
1280 &format_attr_escr.attr,
1281 &format_attr_ht.attr,
1282 NULL,
1283};
1284
1274static __initconst const struct x86_pmu p4_pmu = { 1285static __initconst const struct x86_pmu p4_pmu = {
1275 .name = "Netburst P4/Xeon", 1286 .name = "Netburst P4/Xeon",
1276 .handle_irq = p4_pmu_handle_irq, 1287 .handle_irq = p4_pmu_handle_irq,
@@ -1305,6 +1316,8 @@ static __initconst const struct x86_pmu p4_pmu = {
1305 * the former idea is taken from OProfile code 1316 * the former idea is taken from OProfile code
1306 */ 1317 */
1307 .perfctr_second_write = 1, 1318 .perfctr_second_write = 1,
1319
1320 .format_attrs = intel_p4_formats_attr,
1308}; 1321};
1309 1322
1310__init int p4_pmu_init(void) 1323__init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index c7181befecde..32bcfc7dd230 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -87,6 +87,23 @@ static void p6_pmu_enable_event(struct perf_event *event)
87 (void)checking_wrmsrl(hwc->config_base, val); 87 (void)checking_wrmsrl(hwc->config_base, val);
88} 88}
89 89
90PMU_FORMAT_ATTR(event, "config:0-7" );
91PMU_FORMAT_ATTR(umask, "config:8-15" );
92PMU_FORMAT_ATTR(edge, "config:18" );
93PMU_FORMAT_ATTR(pc, "config:19" );
94PMU_FORMAT_ATTR(inv, "config:23" );
95PMU_FORMAT_ATTR(cmask, "config:24-31" );
96
97static struct attribute *intel_p6_formats_attr[] = {
98 &format_attr_event.attr,
99 &format_attr_umask.attr,
100 &format_attr_edge.attr,
101 &format_attr_pc.attr,
102 &format_attr_inv.attr,
103 &format_attr_cmask.attr,
104 NULL,
105};
106
90static __initconst const struct x86_pmu p6_pmu = { 107static __initconst const struct x86_pmu p6_pmu = {
91 .name = "p6", 108 .name = "p6",
92 .handle_irq = x86_pmu_handle_irq, 109 .handle_irq = x86_pmu_handle_irq,
@@ -115,6 +132,8 @@ static __initconst const struct x86_pmu p6_pmu = {
115 .cntval_mask = (1ULL << 32) - 1, 132 .cntval_mask = (1ULL << 32) - 1,
116 .get_event_constraints = x86_get_event_constraints, 133 .get_event_constraints = x86_get_event_constraints,
117 .event_constraints = p6_event_constraints, 134 .event_constraints = p6_event_constraints,
135
136 .format_attrs = intel_p6_formats_attr,
118}; 137};
119 138
120__init int p6_pmu_init(void) 139__init int p6_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index c7f64e6f537a..addf9e82a7f2 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -40,6 +40,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
40 { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, 40 { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
41 { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, 41 { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
42 { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, 42 { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
43 { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
43 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, 44 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
44 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, 45 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
45 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, 46 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index a524353d93f2..39472dd2323f 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -43,7 +43,6 @@
43 43
44#include <asm/processor.h> 44#include <asm/processor.h>
45#include <asm/msr.h> 45#include <asm/msr.h>
46#include <asm/system.h>
47 46
48static struct class *cpuid_class; 47static struct class *cpuid_class;
49 48
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 642f75a68cd5..11891ca7b716 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -62,16 +62,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
62 62
63 if (!userbuf) { 63 if (!userbuf) {
64 memcpy(buf, (vaddr + offset), csize); 64 memcpy(buf, (vaddr + offset), csize);
65 kunmap_atomic(vaddr, KM_PTE0); 65 kunmap_atomic(vaddr);
66 } else { 66 } else {
67 if (!kdump_buf_page) { 67 if (!kdump_buf_page) {
68 printk(KERN_WARNING "Kdump: Kdump buffer page not" 68 printk(KERN_WARNING "Kdump: Kdump buffer page not"
69 " allocated\n"); 69 " allocated\n");
70 kunmap_atomic(vaddr, KM_PTE0); 70 kunmap_atomic(vaddr);
71 return -EFAULT; 71 return -EFAULT;
72 } 72 }
73 copy_page(kdump_buf_page, vaddr); 73 copy_page(kdump_buf_page, vaddr);
74 kunmap_atomic(vaddr, KM_PTE0); 74 kunmap_atomic(vaddr);
75 if (copy_to_user(buf, (kdump_buf_page + offset), csize)) 75 if (copy_to_user(buf, (kdump_buf_page + offset), csize))
76 return -EFAULT; 76 return -EFAULT;
77 } 77 }
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 52821799a702..3ae2ced4a874 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -4,6 +4,7 @@
4#include <linux/bootmem.h> 4#include <linux/bootmem.h>
5#include <linux/export.h> 5#include <linux/export.h>
6#include <linux/io.h> 6#include <linux/io.h>
7#include <linux/irqdomain.h>
7#include <linux/interrupt.h> 8#include <linux/interrupt.h>
8#include <linux/list.h> 9#include <linux/list.h>
9#include <linux/of.h> 10#include <linux/of.h>
@@ -17,64 +18,14 @@
17#include <linux/initrd.h> 18#include <linux/initrd.h>
18 19
19#include <asm/hpet.h> 20#include <asm/hpet.h>
20#include <asm/irq_controller.h>
21#include <asm/apic.h> 21#include <asm/apic.h>
22#include <asm/pci_x86.h> 22#include <asm/pci_x86.h>
23 23
24__initdata u64 initial_dtb; 24__initdata u64 initial_dtb;
25char __initdata cmd_line[COMMAND_LINE_SIZE]; 25char __initdata cmd_line[COMMAND_LINE_SIZE];
26static LIST_HEAD(irq_domains);
27static DEFINE_RAW_SPINLOCK(big_irq_lock);
28 26
29int __initdata of_ioapic; 27int __initdata of_ioapic;
30 28
31#ifdef CONFIG_X86_IO_APIC
32static void add_interrupt_host(struct irq_domain *ih)
33{
34 unsigned long flags;
35
36 raw_spin_lock_irqsave(&big_irq_lock, flags);
37 list_add(&ih->l, &irq_domains);
38 raw_spin_unlock_irqrestore(&big_irq_lock, flags);
39}
40#endif
41
42static struct irq_domain *get_ih_from_node(struct device_node *controller)
43{
44 struct irq_domain *ih, *found = NULL;
45 unsigned long flags;
46
47 raw_spin_lock_irqsave(&big_irq_lock, flags);
48 list_for_each_entry(ih, &irq_domains, l) {
49 if (ih->controller == controller) {
50 found = ih;
51 break;
52 }
53 }
54 raw_spin_unlock_irqrestore(&big_irq_lock, flags);
55 return found;
56}
57
58unsigned int irq_create_of_mapping(struct device_node *controller,
59 const u32 *intspec, unsigned int intsize)
60{
61 struct irq_domain *ih;
62 u32 virq, type;
63 int ret;
64
65 ih = get_ih_from_node(controller);
66 if (!ih)
67 return 0;
68 ret = ih->xlate(ih, intspec, intsize, &virq, &type);
69 if (ret)
70 return 0;
71 if (type == IRQ_TYPE_NONE)
72 return virq;
73 irq_set_irq_type(virq, type);
74 return virq;
75}
76EXPORT_SYMBOL_GPL(irq_create_of_mapping);
77
78unsigned long pci_address_to_pio(phys_addr_t address) 29unsigned long pci_address_to_pio(phys_addr_t address)
79{ 30{
80 /* 31 /*
@@ -354,36 +305,43 @@ static struct of_ioapic_type of_ioapic_type[] =
354 }, 305 },
355}; 306};
356 307
357static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize, 308static int ioapic_xlate(struct irq_domain *domain,
358 u32 *out_hwirq, u32 *out_type) 309 struct device_node *controller,
310 const u32 *intspec, u32 intsize,
311 irq_hw_number_t *out_hwirq, u32 *out_type)
359{ 312{
360 struct mp_ioapic_gsi *gsi_cfg;
361 struct io_apic_irq_attr attr; 313 struct io_apic_irq_attr attr;
362 struct of_ioapic_type *it; 314 struct of_ioapic_type *it;
363 u32 line, idx, type; 315 u32 line, idx;
316 int rc;
364 317
365 if (intsize < 2) 318 if (WARN_ON(intsize < 2))
366 return -EINVAL; 319 return -EINVAL;
367 320
368 line = *intspec; 321 line = intspec[0];
369 idx = (u32) id->priv;
370 gsi_cfg = mp_ioapic_gsi_routing(idx);
371 *out_hwirq = line + gsi_cfg->gsi_base;
372
373 intspec++;
374 type = *intspec;
375 322
376 if (type >= ARRAY_SIZE(of_ioapic_type)) 323 if (intspec[1] >= ARRAY_SIZE(of_ioapic_type))
377 return -EINVAL; 324 return -EINVAL;
378 325
379 it = of_ioapic_type + type; 326 it = &of_ioapic_type[intspec[1]];
380 *out_type = it->out_type;
381 327
328 idx = (u32) domain->host_data;
382 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); 329 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
383 330
384 return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr); 331 rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line),
332 cpu_to_node(0), &attr);
333 if (rc)
334 return rc;
335
336 *out_hwirq = line;
337 *out_type = it->out_type;
338 return 0;
385} 339}
386 340
341const struct irq_domain_ops ioapic_irq_domain_ops = {
342 .xlate = ioapic_xlate,
343};
344
387static void __init ioapic_add_ofnode(struct device_node *np) 345static void __init ioapic_add_ofnode(struct device_node *np)
388{ 346{
389 struct resource r; 347 struct resource r;
@@ -399,13 +357,14 @@ static void __init ioapic_add_ofnode(struct device_node *np)
399 for (i = 0; i < nr_ioapics; i++) { 357 for (i = 0; i < nr_ioapics; i++) {
400 if (r.start == mpc_ioapic_addr(i)) { 358 if (r.start == mpc_ioapic_addr(i)) {
401 struct irq_domain *id; 359 struct irq_domain *id;
360 struct mp_ioapic_gsi *gsi_cfg;
361
362 gsi_cfg = mp_ioapic_gsi_routing(i);
402 363
403 id = kzalloc(sizeof(*id), GFP_KERNEL); 364 id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
365 &ioapic_irq_domain_ops,
366 (void*)i);
404 BUG_ON(!id); 367 BUG_ON(!id);
405 id->controller = np;
406 id->xlate = ioapic_xlate;
407 id->priv = (void *)i;
408 add_interrupt_host(id);
409 return; 368 return;
410 } 369 }
411 } 370 }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 1aae78f775fc..1b81839b6c88 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -37,13 +37,16 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
37 const struct stacktrace_ops *ops, 37 const struct stacktrace_ops *ops,
38 struct thread_info *tinfo, int *graph) 38 struct thread_info *tinfo, int *graph)
39{ 39{
40 struct task_struct *task = tinfo->task; 40 struct task_struct *task;
41 unsigned long ret_addr; 41 unsigned long ret_addr;
42 int index = task->curr_ret_stack; 42 int index;
43 43
44 if (addr != (unsigned long)return_to_handler) 44 if (addr != (unsigned long)return_to_handler)
45 return; 45 return;
46 46
47 task = tinfo->task;
48 index = task->curr_ret_stack;
49
47 if (!task->ret_stack || index < *graph) 50 if (!task->ret_stack || index < *graph)
48 return; 51 return;
49 52
@@ -252,7 +255,8 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
252 unsigned short ss; 255 unsigned short ss;
253 unsigned long sp; 256 unsigned long sp;
254#endif 257#endif
255 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); 258 printk(KERN_DEFAULT
259 "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
256#ifdef CONFIG_PREEMPT 260#ifdef CONFIG_PREEMPT
257 printk("PREEMPT "); 261 printk("PREEMPT ");
258#endif 262#endif
@@ -264,7 +268,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
264#endif 268#endif
265 printk("\n"); 269 printk("\n");
266 if (notify_die(DIE_OOPS, str, regs, err, 270 if (notify_die(DIE_OOPS, str, regs, err,
267 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) 271 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
268 return 1; 272 return 1;
269 273
270 show_registers(regs); 274 show_registers(regs);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index c99f9ed013d5..88ec9129271d 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs)
87 int i; 87 int i;
88 88
89 print_modules(); 89 print_modules();
90 __show_regs(regs, 0); 90 __show_regs(regs, !user_mode_vm(regs));
91 91
92 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", 92 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
93 TASK_COMM_LEN, current->comm, task_pid_nr(current), 93 TASK_COMM_LEN, current->comm, task_pid_nr(current),
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6d728d9284bd..17107bd6e1f0 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -129,7 +129,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
129 if (!stack) { 129 if (!stack) {
130 if (regs) 130 if (regs)
131 stack = (unsigned long *)regs->sp; 131 stack = (unsigned long *)regs->sp;
132 else if (task && task != current) 132 else if (task != current)
133 stack = (unsigned long *)task->thread.sp; 133 stack = (unsigned long *)task->thread.sp;
134 else 134 else
135 stack = &dummy; 135 stack = &dummy;
@@ -269,11 +269,11 @@ void show_registers(struct pt_regs *regs)
269 unsigned char c; 269 unsigned char c;
270 u8 *ip; 270 u8 *ip;
271 271
272 printk(KERN_EMERG "Stack:\n"); 272 printk(KERN_DEFAULT "Stack:\n");
273 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 273 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
274 0, KERN_EMERG); 274 0, KERN_DEFAULT);
275 275
276 printk(KERN_EMERG "Code: "); 276 printk(KERN_DEFAULT "Code: ");
277 277
278 ip = (u8 *)regs->ip - code_prologue; 278 ip = (u8 *)regs->ip - code_prologue;
279 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { 279 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 8071e2f3d6eb..62d61e9976eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -19,6 +19,7 @@
19#include <linux/acpi.h> 19#include <linux/acpi.h>
20#include <linux/firmware-map.h> 20#include <linux/firmware-map.h>
21#include <linux/memblock.h> 21#include <linux/memblock.h>
22#include <linux/sort.h>
22 23
23#include <asm/e820.h> 24#include <asm/e820.h>
24#include <asm/proto.h> 25#include <asm/proto.h>
@@ -227,22 +228,38 @@ void __init e820_print_map(char *who)
227 * ____________________33__ 228 * ____________________33__
228 * ______________________4_ 229 * ______________________4_
229 */ 230 */
231struct change_member {
232 struct e820entry *pbios; /* pointer to original bios entry */
233 unsigned long long addr; /* address for this change point */
234};
235
236static int __init cpcompare(const void *a, const void *b)
237{
238 struct change_member * const *app = a, * const *bpp = b;
239 const struct change_member *ap = *app, *bp = *bpp;
240
241 /*
242 * Inputs are pointers to two elements of change_point[]. If their
243 * addresses are unequal, their difference dominates. If the addresses
244 * are equal, then consider one that represents the end of its region
245 * to be greater than one that does not.
246 */
247 if (ap->addr != bp->addr)
248 return ap->addr > bp->addr ? 1 : -1;
249
250 return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr);
251}
230 252
231int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, 253int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
232 u32 *pnr_map) 254 u32 *pnr_map)
233{ 255{
234 struct change_member {
235 struct e820entry *pbios; /* pointer to original bios entry */
236 unsigned long long addr; /* address for this change point */
237 };
238 static struct change_member change_point_list[2*E820_X_MAX] __initdata; 256 static struct change_member change_point_list[2*E820_X_MAX] __initdata;
239 static struct change_member *change_point[2*E820_X_MAX] __initdata; 257 static struct change_member *change_point[2*E820_X_MAX] __initdata;
240 static struct e820entry *overlap_list[E820_X_MAX] __initdata; 258 static struct e820entry *overlap_list[E820_X_MAX] __initdata;
241 static struct e820entry new_bios[E820_X_MAX] __initdata; 259 static struct e820entry new_bios[E820_X_MAX] __initdata;
242 struct change_member *change_tmp;
243 unsigned long current_type, last_type; 260 unsigned long current_type, last_type;
244 unsigned long long last_addr; 261 unsigned long long last_addr;
245 int chgidx, still_changing; 262 int chgidx;
246 int overlap_entries; 263 int overlap_entries;
247 int new_bios_entry; 264 int new_bios_entry;
248 int old_nr, new_nr, chg_nr; 265 int old_nr, new_nr, chg_nr;
@@ -279,35 +296,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
279 chg_nr = chgidx; 296 chg_nr = chgidx;
280 297
281 /* sort change-point list by memory addresses (low -> high) */ 298 /* sort change-point list by memory addresses (low -> high) */
282 still_changing = 1; 299 sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL);
283 while (still_changing) {
284 still_changing = 0;
285 for (i = 1; i < chg_nr; i++) {
286 unsigned long long curaddr, lastaddr;
287 unsigned long long curpbaddr, lastpbaddr;
288
289 curaddr = change_point[i]->addr;
290 lastaddr = change_point[i - 1]->addr;
291 curpbaddr = change_point[i]->pbios->addr;
292 lastpbaddr = change_point[i - 1]->pbios->addr;
293
294 /*
295 * swap entries, when:
296 *
297 * curaddr > lastaddr or
298 * curaddr == lastaddr and curaddr == curpbaddr and
299 * lastaddr != lastpbaddr
300 */
301 if (curaddr < lastaddr ||
302 (curaddr == lastaddr && curaddr == curpbaddr &&
303 lastaddr != lastpbaddr)) {
304 change_tmp = change_point[i];
305 change_point[i] = change_point[i-1];
306 change_point[i-1] = change_tmp;
307 still_changing = 1;
308 }
309 }
310 }
311 300
312 /* create a new bios memory map, removing overlaps */ 301 /* create a new bios memory map, removing overlaps */
313 overlap_entries = 0; /* number of entries in the overlap table */ 302 overlap_entries = 0; /* number of entries in the overlap table */
@@ -714,7 +703,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
714} 703}
715#endif 704#endif
716 705
717#ifdef CONFIG_HIBERNATION 706#ifdef CONFIG_ACPI
718/** 707/**
719 * Mark ACPI NVS memory region, so that we can save/restore it during 708 * Mark ACPI NVS memory region, so that we can save/restore it during
720 * hibernation and the subsequent resume. 709 * hibernation and the subsequent resume.
@@ -727,7 +716,7 @@ static int __init e820_mark_nvs_memory(void)
727 struct e820entry *ei = &e820.map[i]; 716 struct e820entry *ei = &e820.map[i];
728 717
729 if (ei->type == E820_NVS) 718 if (ei->type == E820_NVS)
730 suspend_nvs_register(ei->addr, ei->size); 719 acpi_nvs_register(ei->addr, ei->size);
731 } 720 }
732 721
733 return 0; 722 return 0;
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 9d42a52d2331..9b9f18b49918 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf)
240 if (!strncmp(buf, "xen", 3)) 240 if (!strncmp(buf, "xen", 3))
241 early_console_register(&xenboot_console, keep); 241 early_console_register(&xenboot_console, keep);
242#endif 242#endif
243#ifdef CONFIG_EARLY_PRINTK_MRST 243#ifdef CONFIG_EARLY_PRINTK_INTEL_MID
244 if (!strncmp(buf, "mrst", 4)) { 244 if (!strncmp(buf, "mrst", 4)) {
245 mrst_early_console_init(); 245 mrst_early_console_init();
246 early_console_register(&early_mrst_console, keep); 246 early_console_register(&early_mrst_console, keep);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 22d0e21b4dd7..7b784f4ef1e4 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -42,6 +42,7 @@
42 */ 42 */
43 43
44#include <linux/linkage.h> 44#include <linux/linkage.h>
45#include <linux/err.h>
45#include <asm/thread_info.h> 46#include <asm/thread_info.h>
46#include <asm/irqflags.h> 47#include <asm/irqflags.h>
47#include <asm/errno.h> 48#include <asm/errno.h>
@@ -81,8 +82,6 @@
81 * enough to patch inline, increasing performance. 82 * enough to patch inline, increasing performance.
82 */ 83 */
83 84
84#define nr_syscalls ((syscall_table_size)/4)
85
86#ifdef CONFIG_PREEMPT 85#ifdef CONFIG_PREEMPT
87#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 86#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
88#else 87#else
@@ -99,12 +98,6 @@
99#endif 98#endif
100.endm 99.endm
101 100
102#ifdef CONFIG_VM86
103#define resume_userspace_sig check_userspace
104#else
105#define resume_userspace_sig resume_userspace
106#endif
107
108/* 101/*
109 * User gs save/restore 102 * User gs save/restore
110 * 103 *
@@ -328,10 +321,19 @@ ret_from_exception:
328 preempt_stop(CLBR_ANY) 321 preempt_stop(CLBR_ANY)
329ret_from_intr: 322ret_from_intr:
330 GET_THREAD_INFO(%ebp) 323 GET_THREAD_INFO(%ebp)
331check_userspace: 324resume_userspace_sig:
325#ifdef CONFIG_VM86
332 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 326 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
333 movb PT_CS(%esp), %al 327 movb PT_CS(%esp), %al
334 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax 328 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
329#else
330 /*
331 * We can be coming here from a syscall done in the kernel space,
332 * e.g. a failed kernel_execve().
333 */
334 movl PT_CS(%esp), %eax
335 andl $SEGMENT_RPL_MASK, %eax
336#endif
335 cmpl $USER_RPL, %eax 337 cmpl $USER_RPL, %eax
336 jb resume_kernel # not returning to v8086 or userspace 338 jb resume_kernel # not returning to v8086 or userspace
337 339
@@ -423,7 +425,7 @@ sysenter_past_esp:
423 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) 425 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
424 jnz sysenter_audit 426 jnz sysenter_audit
425sysenter_do_call: 427sysenter_do_call:
426 cmpl $(nr_syscalls), %eax 428 cmpl $(NR_syscalls), %eax
427 jae syscall_badsys 429 jae syscall_badsys
428 call *sys_call_table(,%eax,4) 430 call *sys_call_table(,%eax,4)
429 movl %eax,PT_EAX(%esp) 431 movl %eax,PT_EAX(%esp)
@@ -455,7 +457,7 @@ sysenter_audit:
455 movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ 457 movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
456 movl %eax,%edx /* 2nd arg: syscall number */ 458 movl %eax,%edx /* 2nd arg: syscall number */
457 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ 459 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
458 call audit_syscall_entry 460 call __audit_syscall_entry
459 pushl_cfi %ebx 461 pushl_cfi %ebx
460 movl PT_EAX(%esp),%eax /* reload syscall number */ 462 movl PT_EAX(%esp),%eax /* reload syscall number */
461 jmp sysenter_do_call 463 jmp sysenter_do_call
@@ -466,11 +468,10 @@ sysexit_audit:
466 TRACE_IRQS_ON 468 TRACE_IRQS_ON
467 ENABLE_INTERRUPTS(CLBR_ANY) 469 ENABLE_INTERRUPTS(CLBR_ANY)
468 movl %eax,%edx /* second arg, syscall return value */ 470 movl %eax,%edx /* second arg, syscall return value */
469 cmpl $0,%eax /* is it < 0? */ 471 cmpl $-MAX_ERRNO,%eax /* is it an error ? */
470 setl %al /* 1 if so, 0 if not */ 472 setbe %al /* 1 if so, 0 if not */
471 movzbl %al,%eax /* zero-extend that */ 473 movzbl %al,%eax /* zero-extend that */
472 inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 474 call __audit_syscall_exit
473 call audit_syscall_exit
474 DISABLE_INTERRUPTS(CLBR_ANY) 475 DISABLE_INTERRUPTS(CLBR_ANY)
475 TRACE_IRQS_OFF 476 TRACE_IRQS_OFF
476 movl TI_flags(%ebp), %ecx 477 movl TI_flags(%ebp), %ecx
@@ -504,7 +505,7 @@ ENTRY(system_call)
504 # system call tracing in operation / emulation 505 # system call tracing in operation / emulation
505 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) 506 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
506 jnz syscall_trace_entry 507 jnz syscall_trace_entry
507 cmpl $(nr_syscalls), %eax 508 cmpl $(NR_syscalls), %eax
508 jae syscall_badsys 509 jae syscall_badsys
509syscall_call: 510syscall_call:
510 call *sys_call_table(,%eax,4) 511 call *sys_call_table(,%eax,4)
@@ -654,7 +655,7 @@ syscall_trace_entry:
654 movl %esp, %eax 655 movl %esp, %eax
655 call syscall_trace_enter 656 call syscall_trace_enter
656 /* What it returned is what we'll actually use. */ 657 /* What it returned is what we'll actually use. */
657 cmpl $(nr_syscalls), %eax 658 cmpl $(NR_syscalls), %eax
658 jnae syscall_call 659 jnae syscall_call
659 jmp syscall_exit 660 jmp syscall_exit
660END(syscall_trace_entry) 661END(syscall_trace_entry)
@@ -694,29 +695,28 @@ END(syscall_badsys)
694 * System calls that need a pt_regs pointer. 695 * System calls that need a pt_regs pointer.
695 */ 696 */
696#define PTREGSCALL0(name) \ 697#define PTREGSCALL0(name) \
697 ALIGN; \ 698ENTRY(ptregs_##name) ; \
698ptregs_##name: \
699 leal 4(%esp),%eax; \ 699 leal 4(%esp),%eax; \
700 jmp sys_##name; 700 jmp sys_##name; \
701ENDPROC(ptregs_##name)
701 702
702#define PTREGSCALL1(name) \ 703#define PTREGSCALL1(name) \
703 ALIGN; \ 704ENTRY(ptregs_##name) ; \
704ptregs_##name: \
705 leal 4(%esp),%edx; \ 705 leal 4(%esp),%edx; \
706 movl (PT_EBX+4)(%esp),%eax; \ 706 movl (PT_EBX+4)(%esp),%eax; \
707 jmp sys_##name; 707 jmp sys_##name; \
708ENDPROC(ptregs_##name)
708 709
709#define PTREGSCALL2(name) \ 710#define PTREGSCALL2(name) \
710 ALIGN; \ 711ENTRY(ptregs_##name) ; \
711ptregs_##name: \
712 leal 4(%esp),%ecx; \ 712 leal 4(%esp),%ecx; \
713 movl (PT_ECX+4)(%esp),%edx; \ 713 movl (PT_ECX+4)(%esp),%edx; \
714 movl (PT_EBX+4)(%esp),%eax; \ 714 movl (PT_EBX+4)(%esp),%eax; \
715 jmp sys_##name; 715 jmp sys_##name; \
716ENDPROC(ptregs_##name)
716 717
717#define PTREGSCALL3(name) \ 718#define PTREGSCALL3(name) \
718 ALIGN; \ 719ENTRY(ptregs_##name) ; \
719ptregs_##name: \
720 CFI_STARTPROC; \ 720 CFI_STARTPROC; \
721 leal 4(%esp),%eax; \ 721 leal 4(%esp),%eax; \
722 pushl_cfi %eax; \ 722 pushl_cfi %eax; \
@@ -741,8 +741,7 @@ PTREGSCALL2(vm86)
741PTREGSCALL1(vm86old) 741PTREGSCALL1(vm86old)
742 742
743/* Clone is an oddball. The 4th arg is in %edi */ 743/* Clone is an oddball. The 4th arg is in %edi */
744 ALIGN; 744ENTRY(ptregs_clone)
745ptregs_clone:
746 CFI_STARTPROC 745 CFI_STARTPROC
747 leal 4(%esp),%eax 746 leal 4(%esp),%eax
748 pushl_cfi %eax 747 pushl_cfi %eax
@@ -1213,11 +1212,6 @@ return_to_handler:
1213 jmp *%ecx 1212 jmp *%ecx
1214#endif 1213#endif
1215 1214
1216.section .rodata,"a"
1217#include "syscall_table_32.S"
1218
1219syscall_table_size=(.-sys_call_table)
1220
1221/* 1215/*
1222 * Some functions should be protected against kprobes 1216 * Some functions should be protected against kprobes
1223 */ 1217 */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a20e1cb9dc87..cdc79b5cfcd9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
55#include <asm/paravirt.h> 55#include <asm/paravirt.h>
56#include <asm/ftrace.h> 56#include <asm/ftrace.h>
57#include <asm/percpu.h> 57#include <asm/percpu.h>
58#include <linux/err.h>
58 59
59/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 60/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
60#include <linux/elf-em.h> 61#include <linux/elf-em.h>
@@ -319,7 +320,7 @@ ENDPROC(native_usergs_sysret64)
319 movq %rsp, %rsi 320 movq %rsp, %rsi
320 321
321 leaq -RBP(%rsp),%rdi /* arg1 for handler */ 322 leaq -RBP(%rsp),%rdi /* arg1 for handler */
322 testl $3, CS(%rdi) 323 testl $3, CS-RBP(%rsi)
323 je 1f 324 je 1f
324 SWAPGS 325 SWAPGS
325 /* 326 /*
@@ -329,11 +330,10 @@ ENDPROC(native_usergs_sysret64)
329 * moving irq_enter into assembly, which would be too much work) 330 * moving irq_enter into assembly, which would be too much work)
330 */ 331 */
3311: incl PER_CPU_VAR(irq_count) 3321: incl PER_CPU_VAR(irq_count)
332 jne 2f 333 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
333 mov PER_CPU_VAR(irq_stack_ptr),%rsp
334 CFI_DEF_CFA_REGISTER rsi 334 CFI_DEF_CFA_REGISTER rsi
335 335
3362: /* Store previous stack value */ 336 /* Store previous stack value */
337 pushq %rsi 337 pushq %rsi
338 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ 338 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
339 0x77 /* DW_OP_breg7 */, 0, \ 339 0x77 /* DW_OP_breg7 */, 0, \
@@ -481,7 +481,12 @@ GLOBAL(system_call_after_swapgs)
481 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 481 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
482 jnz tracesys 482 jnz tracesys
483system_call_fastpath: 483system_call_fastpath:
484#if __SYSCALL_MASK == ~0
484 cmpq $__NR_syscall_max,%rax 485 cmpq $__NR_syscall_max,%rax
486#else
487 andl $__SYSCALL_MASK,%eax
488 cmpl $__NR_syscall_max,%eax
489#endif
485 ja badsys 490 ja badsys
486 movq %r10,%rcx 491 movq %r10,%rcx
487 call *sys_call_table(,%rax,8) # XXX: rip relative 492 call *sys_call_table(,%rax,8) # XXX: rip relative
@@ -548,7 +553,7 @@ badsys:
548#ifdef CONFIG_AUDITSYSCALL 553#ifdef CONFIG_AUDITSYSCALL
549 /* 554 /*
550 * Fast path for syscall audit without full syscall trace. 555 * Fast path for syscall audit without full syscall trace.
551 * We just call audit_syscall_entry() directly, and then 556 * We just call __audit_syscall_entry() directly, and then
552 * jump back to the normal fast path. 557 * jump back to the normal fast path.
553 */ 558 */
554auditsys: 559auditsys:
@@ -558,22 +563,21 @@ auditsys:
558 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ 563 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
559 movq %rax,%rsi /* 2nd arg: syscall number */ 564 movq %rax,%rsi /* 2nd arg: syscall number */
560 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ 565 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
561 call audit_syscall_entry 566 call __audit_syscall_entry
562 LOAD_ARGS 0 /* reload call-clobbered registers */ 567 LOAD_ARGS 0 /* reload call-clobbered registers */
563 jmp system_call_fastpath 568 jmp system_call_fastpath
564 569
565 /* 570 /*
566 * Return fast path for syscall audit. Call audit_syscall_exit() 571 * Return fast path for syscall audit. Call __audit_syscall_exit()
567 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT 572 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
568 * masked off. 573 * masked off.
569 */ 574 */
570sysret_audit: 575sysret_audit:
571 movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ 576 movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
572 cmpq $0,%rsi /* is it < 0? */ 577 cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
573 setl %al /* 1 if so, 0 if not */ 578 setbe %al /* 1 if so, 0 if not */
574 movzbl %al,%edi /* zero-extend that into %edi */ 579 movzbl %al,%edi /* zero-extend that into %edi */
575 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 580 call __audit_syscall_exit
576 call audit_syscall_exit
577 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 581 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
578 jmp sysret_check 582 jmp sysret_check
579#endif /* CONFIG_AUDITSYSCALL */ 583#endif /* CONFIG_AUDITSYSCALL */
@@ -596,7 +600,12 @@ tracesys:
596 */ 600 */
597 LOAD_ARGS ARGOFFSET, 1 601 LOAD_ARGS ARGOFFSET, 1
598 RESTORE_REST 602 RESTORE_REST
603#if __SYSCALL_MASK == ~0
599 cmpq $__NR_syscall_max,%rax 604 cmpq $__NR_syscall_max,%rax
605#else
606 andl $__SYSCALL_MASK,%eax
607 cmpl $__NR_syscall_max,%eax
608#endif
600 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ 609 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
601 movq %r10,%rcx /* fixup for C */ 610 movq %r10,%rcx /* fixup for C */
602 call *sys_call_table(,%rax,8) 611 call *sys_call_table(,%rax,8)
@@ -736,6 +745,40 @@ ENTRY(stub_rt_sigreturn)
736 CFI_ENDPROC 745 CFI_ENDPROC
737END(stub_rt_sigreturn) 746END(stub_rt_sigreturn)
738 747
748#ifdef CONFIG_X86_X32_ABI
749 PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx
750
751ENTRY(stub_x32_rt_sigreturn)
752 CFI_STARTPROC
753 addq $8, %rsp
754 PARTIAL_FRAME 0
755 SAVE_REST
756 movq %rsp,%rdi
757 FIXUP_TOP_OF_STACK %r11
758 call sys32_x32_rt_sigreturn
759 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
760 RESTORE_REST
761 jmp int_ret_from_sys_call
762 CFI_ENDPROC
763END(stub_x32_rt_sigreturn)
764
765ENTRY(stub_x32_execve)
766 CFI_STARTPROC
767 addq $8, %rsp
768 PARTIAL_FRAME 0
769 SAVE_REST
770 FIXUP_TOP_OF_STACK %r11
771 movq %rsp, %rcx
772 call sys32_execve
773 RESTORE_TOP_OF_STACK %r11
774 movq %rax,RAX(%rsp)
775 RESTORE_REST
776 jmp int_ret_from_sys_call
777 CFI_ENDPROC
778END(stub_x32_execve)
779
780#endif
781
739/* 782/*
740 * Build the entry stubs and pointer table with some assembler magic. 783 * Build the entry stubs and pointer table with some assembler magic.
741 * We pack 7 stubs into a single 32-byte chunk, which will fit in a 784 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
@@ -813,7 +856,7 @@ ret_from_intr:
813 856
814 /* Restore saved previous stack */ 857 /* Restore saved previous stack */
815 popq %rsi 858 popq %rsi
816 CFI_DEF_CFA_REGISTER rsi 859 CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */
817 leaq ARGOFFSET-RBP(%rsi), %rsp 860 leaq ARGOFFSET-RBP(%rsi), %rsp
818 CFI_DEF_CFA_REGISTER rsp 861 CFI_DEF_CFA_REGISTER rsp
819 CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET 862 CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
@@ -1480,60 +1523,225 @@ ENTRY(error_exit)
1480 CFI_ENDPROC 1523 CFI_ENDPROC
1481END(error_exit) 1524END(error_exit)
1482 1525
1526/*
1527 * Test if a given stack is an NMI stack or not.
1528 */
1529 .macro test_in_nmi reg stack nmi_ret normal_ret
1530 cmpq %\reg, \stack
1531 ja \normal_ret
1532 subq $EXCEPTION_STKSZ, %\reg
1533 cmpq %\reg, \stack
1534 jb \normal_ret
1535 jmp \nmi_ret
1536 .endm
1483 1537
1484 /* runs on exception stack */ 1538 /* runs on exception stack */
1485ENTRY(nmi) 1539ENTRY(nmi)
1486 INTR_FRAME 1540 INTR_FRAME
1487 PARAVIRT_ADJUST_EXCEPTION_FRAME 1541 PARAVIRT_ADJUST_EXCEPTION_FRAME
1488 pushq_cfi $-1 1542 /*
1543 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1544 * the iretq it performs will take us out of NMI context.
1545 * This means that we can have nested NMIs where the next
1546 * NMI is using the top of the stack of the previous NMI. We
1547 * can't let it execute because the nested NMI will corrupt the
1548 * stack of the previous NMI. NMI handlers are not re-entrant
1549 * anyway.
1550 *
1551 * To handle this case we do the following:
1552 * Check the a special location on the stack that contains
1553 * a variable that is set when NMIs are executing.
1554 * The interrupted task's stack is also checked to see if it
1555 * is an NMI stack.
1556 * If the variable is not set and the stack is not the NMI
1557 * stack then:
1558 * o Set the special variable on the stack
1559 * o Copy the interrupt frame into a "saved" location on the stack
1560 * o Copy the interrupt frame into a "copy" location on the stack
1561 * o Continue processing the NMI
1562 * If the variable is set or the previous stack is the NMI stack:
1563 * o Modify the "copy" location to jump to the repeate_nmi
1564 * o return back to the first NMI
1565 *
1566 * Now on exit of the first NMI, we first clear the stack variable
1567 * The NMI stack will tell any nested NMIs at that point that it is
1568 * nested. Then we pop the stack normally with iret, and if there was
1569 * a nested NMI that updated the copy interrupt stack frame, a
1570 * jump will be made to the repeat_nmi code that will handle the second
1571 * NMI.
1572 */
1573
1574 /* Use %rdx as out temp variable throughout */
1575 pushq_cfi %rdx
1576 CFI_REL_OFFSET rdx, 0
1577
1578 /*
1579 * If %cs was not the kernel segment, then the NMI triggered in user
1580 * space, which means it is definitely not nested.
1581 */
1582 cmpl $__KERNEL_CS, 16(%rsp)
1583 jne first_nmi
1584
1585 /*
1586 * Check the special variable on the stack to see if NMIs are
1587 * executing.
1588 */
1589 cmpl $1, -8(%rsp)
1590 je nested_nmi
1591
1592 /*
1593 * Now test if the previous stack was an NMI stack.
1594 * We need the double check. We check the NMI stack to satisfy the
1595 * race when the first NMI clears the variable before returning.
1596 * We check the variable because the first NMI could be in a
1597 * breakpoint routine using a breakpoint stack.
1598 */
1599 lea 6*8(%rsp), %rdx
1600 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
1601 CFI_REMEMBER_STATE
1602
1603nested_nmi:
1604 /*
1605 * Do nothing if we interrupted the fixup in repeat_nmi.
1606 * It's about to repeat the NMI handler, so we are fine
1607 * with ignoring this one.
1608 */
1609 movq $repeat_nmi, %rdx
1610 cmpq 8(%rsp), %rdx
1611 ja 1f
1612 movq $end_repeat_nmi, %rdx
1613 cmpq 8(%rsp), %rdx
1614 ja nested_nmi_out
1615
16161:
1617 /* Set up the interrupted NMIs stack to jump to repeat_nmi */
1618 leaq -6*8(%rsp), %rdx
1619 movq %rdx, %rsp
1620 CFI_ADJUST_CFA_OFFSET 6*8
1621 pushq_cfi $__KERNEL_DS
1622 pushq_cfi %rdx
1623 pushfq_cfi
1624 pushq_cfi $__KERNEL_CS
1625 pushq_cfi $repeat_nmi
1626
1627 /* Put stack back */
1628 addq $(11*8), %rsp
1629 CFI_ADJUST_CFA_OFFSET -11*8
1630
1631nested_nmi_out:
1632 popq_cfi %rdx
1633 CFI_RESTORE rdx
1634
1635 /* No need to check faults here */
1636 INTERRUPT_RETURN
1637
1638 CFI_RESTORE_STATE
1639first_nmi:
1640 /*
1641 * Because nested NMIs will use the pushed location that we
1642 * stored in rdx, we must keep that space available.
1643 * Here's what our stack frame will look like:
1644 * +-------------------------+
1645 * | original SS |
1646 * | original Return RSP |
1647 * | original RFLAGS |
1648 * | original CS |
1649 * | original RIP |
1650 * +-------------------------+
1651 * | temp storage for rdx |
1652 * +-------------------------+
1653 * | NMI executing variable |
1654 * +-------------------------+
1655 * | Saved SS |
1656 * | Saved Return RSP |
1657 * | Saved RFLAGS |
1658 * | Saved CS |
1659 * | Saved RIP |
1660 * +-------------------------+
1661 * | copied SS |
1662 * | copied Return RSP |
1663 * | copied RFLAGS |
1664 * | copied CS |
1665 * | copied RIP |
1666 * +-------------------------+
1667 * | pt_regs |
1668 * +-------------------------+
1669 *
1670 * The saved stack frame is used to fix up the copied stack frame
1671 * that a nested NMI may change to make the interrupted NMI iret jump
1672 * to the repeat_nmi. The original stack frame and the temp storage
1673 * is also used by nested NMIs and can not be trusted on exit.
1674 */
1675 /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
1676 movq (%rsp), %rdx
1677 CFI_RESTORE rdx
1678
1679 /* Set the NMI executing variable on the stack. */
1680 pushq_cfi $1
1681
1682 /* Copy the stack frame to the Saved frame */
1683 .rept 5
1684 pushq_cfi 6*8(%rsp)
1685 .endr
1686 CFI_DEF_CFA_OFFSET SS+8-RIP
1687
1688 /* Everything up to here is safe from nested NMIs */
1689
1690 /*
1691 * If there was a nested NMI, the first NMI's iret will return
1692 * here. But NMIs are still enabled and we can take another
1693 * nested NMI. The nested NMI checks the interrupted RIP to see
1694 * if it is between repeat_nmi and end_repeat_nmi, and if so
1695 * it will just return, as we are about to repeat an NMI anyway.
1696 * This makes it safe to copy to the stack frame that a nested
1697 * NMI will update.
1698 */
1699repeat_nmi:
1700 /*
1701 * Update the stack variable to say we are still in NMI (the update
1702 * is benign for the non-repeat case, where 1 was pushed just above
1703 * to this very stack slot).
1704 */
1705 movq $1, 5*8(%rsp)
1706
1707 /* Make another copy, this one may be modified by nested NMIs */
1708 .rept 5
1709 pushq_cfi 4*8(%rsp)
1710 .endr
1711 CFI_DEF_CFA_OFFSET SS+8-RIP
1712end_repeat_nmi:
1713
1714 /*
1715 * Everything below this point can be preempted by a nested
1716 * NMI if the first NMI took an exception and reset our iret stack
1717 * so that we repeat another NMI.
1718 */
1719 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1489 subq $ORIG_RAX-R15, %rsp 1720 subq $ORIG_RAX-R15, %rsp
1490 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1721 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1722 /*
1723 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
1724 * as we should not be calling schedule in NMI context.
1725 * Even with normal interrupts enabled. An NMI should not be
1726 * setting NEED_RESCHED or anything that normal interrupts and
1727 * exceptions might do.
1728 */
1491 call save_paranoid 1729 call save_paranoid
1492 DEFAULT_FRAME 0 1730 DEFAULT_FRAME 0
1493 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1731 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1494 movq %rsp,%rdi 1732 movq %rsp,%rdi
1495 movq $-1,%rsi 1733 movq $-1,%rsi
1496 call do_nmi 1734 call do_nmi
1497#ifdef CONFIG_TRACE_IRQFLAGS
1498 /* paranoidexit; without TRACE_IRQS_OFF */
1499 /* ebx: no swapgs flag */
1500 DISABLE_INTERRUPTS(CLBR_NONE)
1501 testl %ebx,%ebx /* swapgs needed? */ 1735 testl %ebx,%ebx /* swapgs needed? */
1502 jnz nmi_restore 1736 jnz nmi_restore
1503 testl $3,CS(%rsp)
1504 jnz nmi_userspace
1505nmi_swapgs: 1737nmi_swapgs:
1506 SWAPGS_UNSAFE_STACK 1738 SWAPGS_UNSAFE_STACK
1507nmi_restore: 1739nmi_restore:
1508 RESTORE_ALL 8 1740 RESTORE_ALL 8
1741 /* Clear the NMI executing stack variable */
1742 movq $0, 10*8(%rsp)
1509 jmp irq_return 1743 jmp irq_return
1510nmi_userspace:
1511 GET_THREAD_INFO(%rcx)
1512 movl TI_flags(%rcx),%ebx
1513 andl $_TIF_WORK_MASK,%ebx
1514 jz nmi_swapgs
1515 movq %rsp,%rdi /* &pt_regs */
1516 call sync_regs
1517 movq %rax,%rsp /* switch stack for scheduling */
1518 testl $_TIF_NEED_RESCHED,%ebx
1519 jnz nmi_schedule
1520 movl %ebx,%edx /* arg3: thread flags */
1521 ENABLE_INTERRUPTS(CLBR_NONE)
1522 xorl %esi,%esi /* arg2: oldset */
1523 movq %rsp,%rdi /* arg1: &pt_regs */
1524 call do_notify_resume
1525 DISABLE_INTERRUPTS(CLBR_NONE)
1526 jmp nmi_userspace
1527nmi_schedule:
1528 ENABLE_INTERRUPTS(CLBR_ANY)
1529 call schedule
1530 DISABLE_INTERRUPTS(CLBR_ANY)
1531 jmp nmi_userspace
1532 CFI_ENDPROC 1744 CFI_ENDPROC
1533#else
1534 jmp paranoid_exit
1535 CFI_ENDPROC
1536#endif
1537END(nmi) 1745END(nmi)
1538 1746
1539ENTRY(ignore_sysret) 1747ENTRY(ignore_sysret)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index e11e39478a49..40f4eb3766d1 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -417,6 +417,10 @@ ENTRY(phys_base)
417ENTRY(idt_table) 417ENTRY(idt_table)
418 .skip IDT_ENTRIES * 16 418 .skip IDT_ENTRIES * 16
419 419
420 .align L1_CACHE_BYTES
421ENTRY(nmi_idt_table)
422 .skip IDT_ENTRIES * 16
423
420 __PAGE_ALIGNED_BSS 424 __PAGE_ALIGNED_BSS
421 .align PAGE_SIZE 425 .align PAGE_SIZE
422ENTRY(empty_zero_page) 426ENTRY(empty_zero_page)
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 739d8598f789..7734bcbb5a3a 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -16,6 +16,7 @@
16#include <asm/uaccess.h> 16#include <asm/uaccess.h>
17#include <asm/ptrace.h> 17#include <asm/ptrace.h>
18#include <asm/i387.h> 18#include <asm/i387.h>
19#include <asm/fpu-internal.h>
19#include <asm/user.h> 20#include <asm/user.h>
20 21
21#ifdef CONFIG_X86_64 22#ifdef CONFIG_X86_64
@@ -32,6 +33,86 @@
32# define user32_fxsr_struct user_fxsr_struct 33# define user32_fxsr_struct user_fxsr_struct
33#endif 34#endif
34 35
36/*
37 * Were we in an interrupt that interrupted kernel mode?
38 *
39 * We can do a kernel_fpu_begin/end() pair *ONLY* if that
40 * pair does nothing at all: the thread must not have fpu (so
41 * that we don't try to save the FPU state), and TS must
42 * be set (so that the clts/stts pair does nothing that is
43 * visible in the interrupted kernel thread).
44 */
45static inline bool interrupted_kernel_fpu_idle(void)
46{
47 return !__thread_has_fpu(current) &&
48 (read_cr0() & X86_CR0_TS);
49}
50
51/*
52 * Were we in user mode (or vm86 mode) when we were
53 * interrupted?
54 *
55 * Doing kernel_fpu_begin/end() is ok if we are running
56 * in an interrupt context from user mode - we'll just
57 * save the FPU state as required.
58 */
59static inline bool interrupted_user_mode(void)
60{
61 struct pt_regs *regs = get_irq_regs();
62 return regs && user_mode_vm(regs);
63}
64
65/*
66 * Can we use the FPU in kernel mode with the
67 * whole "kernel_fpu_begin/end()" sequence?
68 *
69 * It's always ok in process context (ie "not interrupt")
70 * but it is sometimes ok even from an irq.
71 */
72bool irq_fpu_usable(void)
73{
74 return !in_interrupt() ||
75 interrupted_user_mode() ||
76 interrupted_kernel_fpu_idle();
77}
78EXPORT_SYMBOL(irq_fpu_usable);
79
80void kernel_fpu_begin(void)
81{
82 struct task_struct *me = current;
83
84 WARN_ON_ONCE(!irq_fpu_usable());
85 preempt_disable();
86 if (__thread_has_fpu(me)) {
87 __save_init_fpu(me);
88 __thread_clear_has_fpu(me);
89 /* We do 'stts()' in kernel_fpu_end() */
90 } else {
91 percpu_write(fpu_owner_task, NULL);
92 clts();
93 }
94}
95EXPORT_SYMBOL(kernel_fpu_begin);
96
97void kernel_fpu_end(void)
98{
99 stts();
100 preempt_enable();
101}
102EXPORT_SYMBOL(kernel_fpu_end);
103
104void unlazy_fpu(struct task_struct *tsk)
105{
106 preempt_disable();
107 if (__thread_has_fpu(tsk)) {
108 __save_init_fpu(tsk);
109 __thread_fpu_end(tsk);
110 } else
111 tsk->fpu_counter = 0;
112 preempt_enable();
113}
114EXPORT_SYMBOL(unlazy_fpu);
115
35#ifdef CONFIG_MATH_EMULATION 116#ifdef CONFIG_MATH_EMULATION
36# define HAVE_HWFP (boot_cpu_data.hard_math) 117# define HAVE_HWFP (boot_cpu_data.hard_math)
37#else 118#else
@@ -44,7 +125,7 @@ EXPORT_SYMBOL_GPL(xstate_size);
44unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); 125unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
45static struct i387_fxsave_struct fx_scratch __cpuinitdata; 126static struct i387_fxsave_struct fx_scratch __cpuinitdata;
46 127
47void __cpuinit mxcsr_feature_mask_init(void) 128static void __cpuinit mxcsr_feature_mask_init(void)
48{ 129{
49 unsigned long mask = 0; 130 unsigned long mask = 0;
50 131
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 610485223bdb..36d1853e91af 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -15,7 +15,6 @@
15#include <linux/delay.h> 15#include <linux/delay.h>
16 16
17#include <linux/atomic.h> 17#include <linux/atomic.h>
18#include <asm/system.h>
19#include <asm/timer.h> 18#include <asm/timer.h>
20#include <asm/hw_irq.h> 19#include <asm/hw_irq.h>
21#include <asm/pgtable.h> 20#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 7943e0c21bde..3dafc6003b7c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -282,8 +282,13 @@ void fixup_irqs(void)
282 else if (!(warned++)) 282 else if (!(warned++))
283 set_affinity = 0; 283 set_affinity = 0;
284 284
285 /*
286 * We unmask if the irq was not marked masked by the
287 * core code. That respects the lazy irq disable
288 * behaviour.
289 */
285 if (!irqd_can_move_in_process_context(data) && 290 if (!irqd_can_move_in_process_context(data) &&
286 !irqd_irq_disabled(data) && chip->irq_unmask) 291 !irqd_irq_masked(data) && chip->irq_unmask)
287 chip->irq_unmask(data); 292 chip->irq_unmask(data);
288 293
289 raw_spin_unlock(&desc->lock); 294 raw_spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 72090705a656..58b7f27cb3e9 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -28,6 +28,9 @@ DEFINE_PER_CPU(struct pt_regs *, irq_regs);
28EXPORT_PER_CPU_SYMBOL(irq_regs); 28EXPORT_PER_CPU_SYMBOL(irq_regs);
29 29
30#ifdef CONFIG_DEBUG_STACKOVERFLOW 30#ifdef CONFIG_DEBUG_STACKOVERFLOW
31
32int sysctl_panic_on_stackoverflow __read_mostly;
33
31/* Debugging check for stack overflow: is there less than 1KB free? */ 34/* Debugging check for stack overflow: is there less than 1KB free? */
32static int check_stack_overflow(void) 35static int check_stack_overflow(void)
33{ 36{
@@ -43,6 +46,8 @@ static void print_stack_overflow(void)
43{ 46{
44 printk(KERN_WARNING "low stack detected by irq handler\n"); 47 printk(KERN_WARNING "low stack detected by irq handler\n");
45 dump_stack(); 48 dump_stack();
49 if (sysctl_panic_on_stackoverflow)
50 panic("low stack detected by irq handler - check messages\n");
46} 51}
47 52
48#else 53#else
@@ -95,13 +100,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
95 irqctx->tinfo.task = curctx->tinfo.task; 100 irqctx->tinfo.task = curctx->tinfo.task;
96 irqctx->tinfo.previous_esp = current_stack_pointer; 101 irqctx->tinfo.previous_esp = current_stack_pointer;
97 102
98 /* 103 /* Copy the preempt_count so that the [soft]irq checks work. */
99 * Copy the softirq bits in preempt_count so that the 104 irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
100 * softirq checks work in the hardirq context.
101 */
102 irqctx->tinfo.preempt_count =
103 (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
104 (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
105 105
106 if (unlikely(overflow)) 106 if (unlikely(overflow))
107 call_on_stack(print_stack_overflow, isp); 107 call_on_stack(print_stack_overflow, isp);
@@ -191,7 +191,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
191 if (unlikely(!desc)) 191 if (unlikely(!desc))
192 return false; 192 return false;
193 193
194 if (!execute_on_irq_stack(overflow, desc, irq)) { 194 if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
195 if (unlikely(overflow)) 195 if (unlikely(overflow))
196 print_stack_overflow(); 196 print_stack_overflow();
197 desc->handle_irq(irq, desc); 197 desc->handle_irq(irq, desc);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 69bca468c47a..d04d3ecded62 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
26DEFINE_PER_CPU(struct pt_regs *, irq_regs); 26DEFINE_PER_CPU(struct pt_regs *, irq_regs);
27EXPORT_PER_CPU_SYMBOL(irq_regs); 27EXPORT_PER_CPU_SYMBOL(irq_regs);
28 28
29int sysctl_panic_on_stackoverflow;
30
29/* 31/*
30 * Probabilistic stack overflow check: 32 * Probabilistic stack overflow check:
31 * 33 *
@@ -36,18 +38,39 @@ EXPORT_PER_CPU_SYMBOL(irq_regs);
36static inline void stack_overflow_check(struct pt_regs *regs) 38static inline void stack_overflow_check(struct pt_regs *regs)
37{ 39{
38#ifdef CONFIG_DEBUG_STACKOVERFLOW 40#ifdef CONFIG_DEBUG_STACKOVERFLOW
41#define STACK_TOP_MARGIN 128
42 struct orig_ist *oist;
43 u64 irq_stack_top, irq_stack_bottom;
44 u64 estack_top, estack_bottom;
39 u64 curbase = (u64)task_stack_page(current); 45 u64 curbase = (u64)task_stack_page(current);
40 46
41 if (user_mode_vm(regs)) 47 if (user_mode_vm(regs))
42 return; 48 return;
43 49
44 WARN_ONCE(regs->sp >= curbase && 50 if (regs->sp >= curbase + sizeof(struct thread_info) +
45 regs->sp <= curbase + THREAD_SIZE && 51 sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
46 regs->sp < curbase + sizeof(struct thread_info) + 52 regs->sp <= curbase + THREAD_SIZE)
47 sizeof(struct pt_regs) + 128, 53 return;
54
55 irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) +
56 STACK_TOP_MARGIN;
57 irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr);
58 if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
59 return;
60
61 oist = &__get_cpu_var(orig_ist);
62 estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
63 estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
64 if (regs->sp >= estack_top && regs->sp <= estack_bottom)
65 return;
66
67 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
68 current->comm, curbase, regs->sp,
69 irq_stack_top, irq_stack_bottom,
70 estack_top, estack_bottom);
48 71
49 "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", 72 if (sysctl_panic_on_stackoverflow)
50 current->comm, curbase, regs->sp); 73 panic("low stack detected by irq handler - check messages\n");
51#endif 74#endif
52} 75}
53 76
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 313fb5cddbce..252981afd6c4 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -16,7 +16,6 @@
16#include <linux/delay.h> 16#include <linux/delay.h>
17 17
18#include <linux/atomic.h> 18#include <linux/atomic.h>
19#include <asm/system.h>
20#include <asm/timer.h> 19#include <asm/timer.h>
21#include <asm/hw_irq.h> 20#include <asm/hw_irq.h>
22#include <asm/pgtable.h> 21#include <asm/pgtable.h>
@@ -61,7 +60,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id)
61 outb(0, 0xF0); 60 outb(0, 0xF0);
62 if (ignore_fpu_irq || !boot_cpu_data.hard_math) 61 if (ignore_fpu_irq || !boot_cpu_data.hard_math)
63 return IRQ_NONE; 62 return IRQ_NONE;
64 math_error(get_irq_regs(), 0, 16); 63 math_error(get_irq_regs(), 0, X86_TRAP_MF);
65 return IRQ_HANDLED; 64 return IRQ_HANDLED;
66} 65}
67 66
@@ -306,10 +305,10 @@ void __init native_init_IRQ(void)
306 * us. (some of these will be overridden and become 305 * us. (some of these will be overridden and become
307 * 'special' SMP interrupts) 306 * 'special' SMP interrupts)
308 */ 307 */
309 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { 308 i = FIRST_EXTERNAL_VECTOR;
309 for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
310 /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ 310 /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
311 if (!test_bit(i, used_vectors)) 311 set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
312 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
313 } 312 }
314 313
315 if (!acpi_ioapic && !of_ioapic) 314 if (!acpi_ioapic && !of_ioapic)
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 90fcf62854bb..1d5d31ea686b 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -68,16 +68,9 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf,
68 return count; 68 return count;
69} 69}
70 70
71static int setup_data_open(struct inode *inode, struct file *file)
72{
73 file->private_data = inode->i_private;
74
75 return 0;
76}
77
78static const struct file_operations fops_setup_data = { 71static const struct file_operations fops_setup_data = {
79 .read = setup_data_read, 72 .read = setup_data_read,
80 .open = setup_data_open, 73 .open = simple_open,
81 .llseek = default_llseek, 74 .llseek = default_llseek,
82}; 75};
83 76
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index faba5771acad..8bfb6146f753 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,10 +43,11 @@
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/nmi.h> 44#include <linux/nmi.h>
45#include <linux/hw_breakpoint.h> 45#include <linux/hw_breakpoint.h>
46#include <linux/uaccess.h>
47#include <linux/memory.h>
46 48
47#include <asm/debugreg.h> 49#include <asm/debugreg.h>
48#include <asm/apicdef.h> 50#include <asm/apicdef.h>
49#include <asm/system.h>
50#include <asm/apic.h> 51#include <asm/apic.h>
51#include <asm/nmi.h> 52#include <asm/nmi.h>
52 53
@@ -67,8 +68,6 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
67 { "ss", 4, offsetof(struct pt_regs, ss) }, 68 { "ss", 4, offsetof(struct pt_regs, ss) },
68 { "ds", 4, offsetof(struct pt_regs, ds) }, 69 { "ds", 4, offsetof(struct pt_regs, ds) },
69 { "es", 4, offsetof(struct pt_regs, es) }, 70 { "es", 4, offsetof(struct pt_regs, es) },
70 { "fs", 4, -1 },
71 { "gs", 4, -1 },
72#else 71#else
73 { "ax", 8, offsetof(struct pt_regs, ax) }, 72 { "ax", 8, offsetof(struct pt_regs, ax) },
74 { "bx", 8, offsetof(struct pt_regs, bx) }, 73 { "bx", 8, offsetof(struct pt_regs, bx) },
@@ -90,7 +89,11 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
90 { "flags", 4, offsetof(struct pt_regs, flags) }, 89 { "flags", 4, offsetof(struct pt_regs, flags) },
91 { "cs", 4, offsetof(struct pt_regs, cs) }, 90 { "cs", 4, offsetof(struct pt_regs, cs) },
92 { "ss", 4, offsetof(struct pt_regs, ss) }, 91 { "ss", 4, offsetof(struct pt_regs, ss) },
92 { "ds", 4, -1 },
93 { "es", 4, -1 },
93#endif 94#endif
95 { "fs", 4, -1 },
96 { "gs", 4, -1 },
94}; 97};
95 98
96int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) 99int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
@@ -740,6 +743,64 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
740 regs->ip = ip; 743 regs->ip = ip;
741} 744}
742 745
746int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
747{
748 int err;
749 char opc[BREAK_INSTR_SIZE];
750
751 bpt->type = BP_BREAKPOINT;
752 err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
753 BREAK_INSTR_SIZE);
754 if (err)
755 return err;
756 err = probe_kernel_write((char *)bpt->bpt_addr,
757 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
758#ifdef CONFIG_DEBUG_RODATA
759 if (!err)
760 return err;
761 /*
762 * It is safe to call text_poke() because normal kernel execution
763 * is stopped on all cores, so long as the text_mutex is not locked.
764 */
765 if (mutex_is_locked(&text_mutex))
766 return -EBUSY;
767 text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
768 BREAK_INSTR_SIZE);
769 err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
770 if (err)
771 return err;
772 if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
773 return -EINVAL;
774 bpt->type = BP_POKE_BREAKPOINT;
775#endif /* CONFIG_DEBUG_RODATA */
776 return err;
777}
778
779int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
780{
781#ifdef CONFIG_DEBUG_RODATA
782 int err;
783 char opc[BREAK_INSTR_SIZE];
784
785 if (bpt->type != BP_POKE_BREAKPOINT)
786 goto knl_write;
787 /*
788 * It is safe to call text_poke() because normal kernel execution
789 * is stopped on all cores, so long as the text_mutex is not locked.
790 */
791 if (mutex_is_locked(&text_mutex))
792 goto knl_write;
793 text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
794 err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
795 if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
796 goto knl_write;
797 return err;
798knl_write:
799#endif /* CONFIG_DEBUG_RODATA */
800 return probe_kernel_write((char *)bpt->bpt_addr,
801 (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
802}
803
743struct kgdb_arch arch_kgdb_ops = { 804struct kgdb_arch arch_kgdb_ops = {
744 /* Breakpoint instruction: */ 805 /* Breakpoint instruction: */
745 .gdb_bpt_instr = { 0xcc }, 806 .gdb_bpt_instr = { 0xcc },
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h
new file mode 100644
index 000000000000..3230b68ef29a
--- /dev/null
+++ b/arch/x86/kernel/kprobes-common.h
@@ -0,0 +1,102 @@
1#ifndef __X86_KERNEL_KPROBES_COMMON_H
2#define __X86_KERNEL_KPROBES_COMMON_H
3
4/* Kprobes and Optprobes common header */
5
6#ifdef CONFIG_X86_64
7#define SAVE_REGS_STRING \
8 /* Skip cs, ip, orig_ax. */ \
9 " subq $24, %rsp\n" \
10 " pushq %rdi\n" \
11 " pushq %rsi\n" \
12 " pushq %rdx\n" \
13 " pushq %rcx\n" \
14 " pushq %rax\n" \
15 " pushq %r8\n" \
16 " pushq %r9\n" \
17 " pushq %r10\n" \
18 " pushq %r11\n" \
19 " pushq %rbx\n" \
20 " pushq %rbp\n" \
21 " pushq %r12\n" \
22 " pushq %r13\n" \
23 " pushq %r14\n" \
24 " pushq %r15\n"
25#define RESTORE_REGS_STRING \
26 " popq %r15\n" \
27 " popq %r14\n" \
28 " popq %r13\n" \
29 " popq %r12\n" \
30 " popq %rbp\n" \
31 " popq %rbx\n" \
32 " popq %r11\n" \
33 " popq %r10\n" \
34 " popq %r9\n" \
35 " popq %r8\n" \
36 " popq %rax\n" \
37 " popq %rcx\n" \
38 " popq %rdx\n" \
39 " popq %rsi\n" \
40 " popq %rdi\n" \
41 /* Skip orig_ax, ip, cs */ \
42 " addq $24, %rsp\n"
43#else
44#define SAVE_REGS_STRING \
45 /* Skip cs, ip, orig_ax and gs. */ \
46 " subl $16, %esp\n" \
47 " pushl %fs\n" \
48 " pushl %es\n" \
49 " pushl %ds\n" \
50 " pushl %eax\n" \
51 " pushl %ebp\n" \
52 " pushl %edi\n" \
53 " pushl %esi\n" \
54 " pushl %edx\n" \
55 " pushl %ecx\n" \
56 " pushl %ebx\n"
57#define RESTORE_REGS_STRING \
58 " popl %ebx\n" \
59 " popl %ecx\n" \
60 " popl %edx\n" \
61 " popl %esi\n" \
62 " popl %edi\n" \
63 " popl %ebp\n" \
64 " popl %eax\n" \
65 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
66 " addl $24, %esp\n"
67#endif
68
69/* Ensure if the instruction can be boostable */
70extern int can_boost(kprobe_opcode_t *instruction);
71/* Recover instruction if given address is probed */
72extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
73 unsigned long addr);
74/*
75 * Copy an instruction and adjust the displacement if the instruction
76 * uses the %rip-relative addressing mode.
77 */
78extern int __copy_instruction(u8 *dest, u8 *src);
79
80/* Generate a relative-jump/call instruction */
81extern void synthesize_reljump(void *from, void *to);
82extern void synthesize_relcall(void *from, void *to);
83
84#ifdef CONFIG_OPTPROBES
85extern int arch_init_optprobes(void);
86extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
87extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
88#else /* !CONFIG_OPTPROBES */
89static inline int arch_init_optprobes(void)
90{
91 return 0;
92}
93static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
94{
95 return 0;
96}
97static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
98{
99 return addr;
100}
101#endif
102#endif
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c
new file mode 100644
index 000000000000..c5e410eed403
--- /dev/null
+++ b/arch/x86/kernel/kprobes-opt.c
@@ -0,0 +1,512 @@
1/*
2 * Kernel Probes Jump Optimization (Optprobes)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2002, 2004
19 * Copyright (C) Hitachi Ltd., 2012
20 */
21#include <linux/kprobes.h>
22#include <linux/ptrace.h>
23#include <linux/string.h>
24#include <linux/slab.h>
25#include <linux/hardirq.h>
26#include <linux/preempt.h>
27#include <linux/module.h>
28#include <linux/kdebug.h>
29#include <linux/kallsyms.h>
30#include <linux/ftrace.h>
31
32#include <asm/cacheflush.h>
33#include <asm/desc.h>
34#include <asm/pgtable.h>
35#include <asm/uaccess.h>
36#include <asm/alternative.h>
37#include <asm/insn.h>
38#include <asm/debugreg.h>
39
40#include "kprobes-common.h"
41
42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
43{
44 struct optimized_kprobe *op;
45 struct kprobe *kp;
46 long offs;
47 int i;
48
49 for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
50 kp = get_kprobe((void *)addr - i);
51 /* This function only handles jump-optimized kprobe */
52 if (kp && kprobe_optimized(kp)) {
53 op = container_of(kp, struct optimized_kprobe, kp);
54 /* If op->list is not empty, op is under optimizing */
55 if (list_empty(&op->list))
56 goto found;
57 }
58 }
59
60 return addr;
61found:
62 /*
63 * If the kprobe can be optimized, original bytes which can be
64 * overwritten by jump destination address. In this case, original
65 * bytes must be recovered from op->optinsn.copied_insn buffer.
66 */
67 memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
68 if (addr == (unsigned long)kp->addr) {
69 buf[0] = kp->opcode;
70 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
71 } else {
72 offs = addr - (unsigned long)kp->addr - 1;
73 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
74 }
75
76 return (unsigned long)buf;
77}
78
79/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
80static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
81{
82#ifdef CONFIG_X86_64
83 *addr++ = 0x48;
84 *addr++ = 0xbf;
85#else
86 *addr++ = 0xb8;
87#endif
88 *(unsigned long *)addr = val;
89}
90
91static void __used __kprobes kprobes_optinsn_template_holder(void)
92{
93 asm volatile (
94 ".global optprobe_template_entry\n"
95 "optprobe_template_entry:\n"
96#ifdef CONFIG_X86_64
97 /* We don't bother saving the ss register */
98 " pushq %rsp\n"
99 " pushfq\n"
100 SAVE_REGS_STRING
101 " movq %rsp, %rsi\n"
102 ".global optprobe_template_val\n"
103 "optprobe_template_val:\n"
104 ASM_NOP5
105 ASM_NOP5
106 ".global optprobe_template_call\n"
107 "optprobe_template_call:\n"
108 ASM_NOP5
109 /* Move flags to rsp */
110 " movq 144(%rsp), %rdx\n"
111 " movq %rdx, 152(%rsp)\n"
112 RESTORE_REGS_STRING
113 /* Skip flags entry */
114 " addq $8, %rsp\n"
115 " popfq\n"
116#else /* CONFIG_X86_32 */
117 " pushf\n"
118 SAVE_REGS_STRING
119 " movl %esp, %edx\n"
120 ".global optprobe_template_val\n"
121 "optprobe_template_val:\n"
122 ASM_NOP5
123 ".global optprobe_template_call\n"
124 "optprobe_template_call:\n"
125 ASM_NOP5
126 RESTORE_REGS_STRING
127 " addl $4, %esp\n" /* skip cs */
128 " popf\n"
129#endif
130 ".global optprobe_template_end\n"
131 "optprobe_template_end:\n");
132}
133
134#define TMPL_MOVE_IDX \
135 ((long)&optprobe_template_val - (long)&optprobe_template_entry)
136#define TMPL_CALL_IDX \
137 ((long)&optprobe_template_call - (long)&optprobe_template_entry)
138#define TMPL_END_IDX \
139 ((long)&optprobe_template_end - (long)&optprobe_template_entry)
140
141#define INT3_SIZE sizeof(kprobe_opcode_t)
142
143/* Optimized kprobe call back function: called from optinsn */
144static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
145{
146 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
147 unsigned long flags;
148
149 /* This is possible if op is under delayed unoptimizing */
150 if (kprobe_disabled(&op->kp))
151 return;
152
153 local_irq_save(flags);
154 if (kprobe_running()) {
155 kprobes_inc_nmissed_count(&op->kp);
156 } else {
157 /* Save skipped registers */
158#ifdef CONFIG_X86_64
159 regs->cs = __KERNEL_CS;
160#else
161 regs->cs = __KERNEL_CS | get_kernel_rpl();
162 regs->gs = 0;
163#endif
164 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
165 regs->orig_ax = ~0UL;
166
167 __this_cpu_write(current_kprobe, &op->kp);
168 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
169 opt_pre_handler(&op->kp, regs);
170 __this_cpu_write(current_kprobe, NULL);
171 }
172 local_irq_restore(flags);
173}
174
175static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
176{
177 int len = 0, ret;
178
179 while (len < RELATIVEJUMP_SIZE) {
180 ret = __copy_instruction(dest + len, src + len);
181 if (!ret || !can_boost(dest + len))
182 return -EINVAL;
183 len += ret;
184 }
185 /* Check whether the address range is reserved */
186 if (ftrace_text_reserved(src, src + len - 1) ||
187 alternatives_text_reserved(src, src + len - 1) ||
188 jump_label_text_reserved(src, src + len - 1))
189 return -EBUSY;
190
191 return len;
192}
193
194/* Check whether insn is indirect jump */
195static int __kprobes insn_is_indirect_jump(struct insn *insn)
196{
197 return ((insn->opcode.bytes[0] == 0xff &&
198 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
199 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
200}
201
202/* Check whether insn jumps into specified address range */
203static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
204{
205 unsigned long target = 0;
206
207 switch (insn->opcode.bytes[0]) {
208 case 0xe0: /* loopne */
209 case 0xe1: /* loope */
210 case 0xe2: /* loop */
211 case 0xe3: /* jcxz */
212 case 0xe9: /* near relative jump */
213 case 0xeb: /* short relative jump */
214 break;
215 case 0x0f:
216 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
217 break;
218 return 0;
219 default:
220 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
221 break;
222 return 0;
223 }
224 target = (unsigned long)insn->next_byte + insn->immediate.value;
225
226 return (start <= target && target <= start + len);
227}
228
229/* Decode whole function to ensure any instructions don't jump into target */
230static int __kprobes can_optimize(unsigned long paddr)
231{
232 unsigned long addr, size = 0, offset = 0;
233 struct insn insn;
234 kprobe_opcode_t buf[MAX_INSN_SIZE];
235
236 /* Lookup symbol including addr */
237 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
238 return 0;
239
240 /*
241 * Do not optimize in the entry code due to the unstable
242 * stack handling.
243 */
244 if ((paddr >= (unsigned long)__entry_text_start) &&
245 (paddr < (unsigned long)__entry_text_end))
246 return 0;
247
248 /* Check there is enough space for a relative jump. */
249 if (size - offset < RELATIVEJUMP_SIZE)
250 return 0;
251
252 /* Decode instructions */
253 addr = paddr - offset;
254 while (addr < paddr - offset + size) { /* Decode until function end */
255 if (search_exception_tables(addr))
256 /*
257 * Since some fixup code will jumps into this function,
258 * we can't optimize kprobe in this function.
259 */
260 return 0;
261 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr));
262 insn_get_length(&insn);
263 /* Another subsystem puts a breakpoint */
264 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
265 return 0;
266 /* Recover address */
267 insn.kaddr = (void *)addr;
268 insn.next_byte = (void *)(addr + insn.length);
269 /* Check any instructions don't jump into target */
270 if (insn_is_indirect_jump(&insn) ||
271 insn_jump_into_range(&insn, paddr + INT3_SIZE,
272 RELATIVE_ADDR_SIZE))
273 return 0;
274 addr += insn.length;
275 }
276
277 return 1;
278}
279
280/* Check optimized_kprobe can actually be optimized. */
281int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
282{
283 int i;
284 struct kprobe *p;
285
286 for (i = 1; i < op->optinsn.size; i++) {
287 p = get_kprobe(op->kp.addr + i);
288 if (p && !kprobe_disabled(p))
289 return -EEXIST;
290 }
291
292 return 0;
293}
294
295/* Check the addr is within the optimized instructions. */
296int __kprobes
297arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
298{
299 return ((unsigned long)op->kp.addr <= addr &&
300 (unsigned long)op->kp.addr + op->optinsn.size > addr);
301}
302
303/* Free optimized instruction slot */
304static __kprobes
305void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
306{
307 if (op->optinsn.insn) {
308 free_optinsn_slot(op->optinsn.insn, dirty);
309 op->optinsn.insn = NULL;
310 op->optinsn.size = 0;
311 }
312}
313
314void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
315{
316 __arch_remove_optimized_kprobe(op, 1);
317}
318
319/*
320 * Copy replacing target instructions
321 * Target instructions MUST be relocatable (checked inside)
322 * This is called when new aggr(opt)probe is allocated or reused.
323 */
324int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
325{
326 u8 *buf;
327 int ret;
328 long rel;
329
330 if (!can_optimize((unsigned long)op->kp.addr))
331 return -EILSEQ;
332
333 op->optinsn.insn = get_optinsn_slot();
334 if (!op->optinsn.insn)
335 return -ENOMEM;
336
337 /*
338 * Verify if the address gap is in 2GB range, because this uses
339 * a relative jump.
340 */
341 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
342 if (abs(rel) > 0x7fffffff)
343 return -ERANGE;
344
345 buf = (u8 *)op->optinsn.insn;
346
347 /* Copy instructions into the out-of-line buffer */
348 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
349 if (ret < 0) {
350 __arch_remove_optimized_kprobe(op, 0);
351 return ret;
352 }
353 op->optinsn.size = ret;
354
355 /* Copy arch-dep-instance from template */
356 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
357
358 /* Set probe information */
359 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
360
361 /* Set probe function call */
362 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
363
364 /* Set returning jmp instruction at the tail of out-of-line buffer */
365 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
366 (u8 *)op->kp.addr + op->optinsn.size);
367
368 flush_icache_range((unsigned long) buf,
369 (unsigned long) buf + TMPL_END_IDX +
370 op->optinsn.size + RELATIVEJUMP_SIZE);
371 return 0;
372}
373
374#define MAX_OPTIMIZE_PROBES 256
375static struct text_poke_param *jump_poke_params;
376static struct jump_poke_buffer {
377 u8 buf[RELATIVEJUMP_SIZE];
378} *jump_poke_bufs;
379
380static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
381 u8 *insn_buf,
382 struct optimized_kprobe *op)
383{
384 s32 rel = (s32)((long)op->optinsn.insn -
385 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
386
387 /* Backup instructions which will be replaced by jump address */
388 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
389 RELATIVE_ADDR_SIZE);
390
391 insn_buf[0] = RELATIVEJUMP_OPCODE;
392 *(s32 *)(&insn_buf[1]) = rel;
393
394 tprm->addr = op->kp.addr;
395 tprm->opcode = insn_buf;
396 tprm->len = RELATIVEJUMP_SIZE;
397}
398
399/*
400 * Replace breakpoints (int3) with relative jumps.
401 * Caller must call with locking kprobe_mutex and text_mutex.
402 */
403void __kprobes arch_optimize_kprobes(struct list_head *oplist)
404{
405 struct optimized_kprobe *op, *tmp;
406 int c = 0;
407
408 list_for_each_entry_safe(op, tmp, oplist, list) {
409 WARN_ON(kprobe_disabled(&op->kp));
410 /* Setup param */
411 setup_optimize_kprobe(&jump_poke_params[c],
412 jump_poke_bufs[c].buf, op);
413 list_del_init(&op->list);
414 if (++c >= MAX_OPTIMIZE_PROBES)
415 break;
416 }
417
418 /*
419 * text_poke_smp doesn't support NMI/MCE code modifying.
420 * However, since kprobes itself also doesn't support NMI/MCE
421 * code probing, it's not a problem.
422 */
423 text_poke_smp_batch(jump_poke_params, c);
424}
425
426static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
427 u8 *insn_buf,
428 struct optimized_kprobe *op)
429{
430 /* Set int3 to first byte for kprobes */
431 insn_buf[0] = BREAKPOINT_INSTRUCTION;
432 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
433
434 tprm->addr = op->kp.addr;
435 tprm->opcode = insn_buf;
436 tprm->len = RELATIVEJUMP_SIZE;
437}
438
439/*
440 * Recover original instructions and breakpoints from relative jumps.
441 * Caller must call with locking kprobe_mutex.
442 */
443extern void arch_unoptimize_kprobes(struct list_head *oplist,
444 struct list_head *done_list)
445{
446 struct optimized_kprobe *op, *tmp;
447 int c = 0;
448
449 list_for_each_entry_safe(op, tmp, oplist, list) {
450 /* Setup param */
451 setup_unoptimize_kprobe(&jump_poke_params[c],
452 jump_poke_bufs[c].buf, op);
453 list_move(&op->list, done_list);
454 if (++c >= MAX_OPTIMIZE_PROBES)
455 break;
456 }
457
458 /*
459 * text_poke_smp doesn't support NMI/MCE code modifying.
460 * However, since kprobes itself also doesn't support NMI/MCE
461 * code probing, it's not a problem.
462 */
463 text_poke_smp_batch(jump_poke_params, c);
464}
465
466/* Replace a relative jump with a breakpoint (int3). */
467void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
468{
469 u8 buf[RELATIVEJUMP_SIZE];
470
471 /* Set int3 to first byte for kprobes */
472 buf[0] = BREAKPOINT_INSTRUCTION;
473 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
474 text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
475}
476
477int __kprobes
478setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
479{
480 struct optimized_kprobe *op;
481
482 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
483 /* This kprobe is really able to run optimized path. */
484 op = container_of(p, struct optimized_kprobe, kp);
485 /* Detour through copied instructions */
486 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
487 if (!reenter)
488 reset_current_kprobe();
489 preempt_enable_no_resched();
490 return 1;
491 }
492 return 0;
493}
494
495int __kprobes arch_init_optprobes(void)
496{
497 /* Allocate code buffer and parameter array */
498 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
499 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
500 if (!jump_poke_bufs)
501 return -ENOMEM;
502
503 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
504 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
505 if (!jump_poke_params) {
506 kfree(jump_poke_bufs);
507 jump_poke_bufs = NULL;
508 return -ENOMEM;
509 }
510
511 return 0;
512}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7da647d8b64c..e213fc8408d2 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -30,16 +30,15 @@
30 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 30 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
31 * <prasanna@in.ibm.com> added function-return probes. 31 * <prasanna@in.ibm.com> added function-return probes.
32 * 2005-May Rusty Lynch <rusty.lynch@intel.com> 32 * 2005-May Rusty Lynch <rusty.lynch@intel.com>
33 * Added function return probes functionality 33 * Added function return probes functionality
34 * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added 34 * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
35 * kprobe-booster and kretprobe-booster for i386. 35 * kprobe-booster and kretprobe-booster for i386.
36 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster 36 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
37 * and kretprobe-booster for x86-64 37 * and kretprobe-booster for x86-64
38 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven 38 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
39 * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> 39 * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
40 * unified x86 kprobes code. 40 * unified x86 kprobes code.
41 */ 41 */
42
43#include <linux/kprobes.h> 42#include <linux/kprobes.h>
44#include <linux/ptrace.h> 43#include <linux/ptrace.h>
45#include <linux/string.h> 44#include <linux/string.h>
@@ -59,6 +58,8 @@
59#include <asm/insn.h> 58#include <asm/insn.h>
60#include <asm/debugreg.h> 59#include <asm/debugreg.h>
61 60
61#include "kprobes-common.h"
62
62void jprobe_return_end(void); 63void jprobe_return_end(void);
63 64
64DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 65DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
108 doesn't switch kernel stack.*/ 109 doesn't switch kernel stack.*/
109 {NULL, NULL} /* Terminator */ 110 {NULL, NULL} /* Terminator */
110}; 111};
112
111const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 113const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
112 114
113static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) 115static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
@@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
123} 125}
124 126
125/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 127/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
126static void __kprobes synthesize_reljump(void *from, void *to) 128void __kprobes synthesize_reljump(void *from, void *to)
127{ 129{
128 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); 130 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
129} 131}
130 132
133/* Insert a call instruction at address 'from', which calls address 'to'.*/
134void __kprobes synthesize_relcall(void *from, void *to)
135{
136 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
137}
138
131/* 139/*
132 * Skip the prefixes of the instruction. 140 * Skip the prefixes of the instruction.
133 */ 141 */
@@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
151 * Returns non-zero if opcode is boostable. 159 * Returns non-zero if opcode is boostable.
152 * RIP relative instructions are adjusted at copying time in 64 bits mode 160 * RIP relative instructions are adjusted at copying time in 64 bits mode
153 */ 161 */
154static int __kprobes can_boost(kprobe_opcode_t *opcodes) 162int __kprobes can_boost(kprobe_opcode_t *opcodes)
155{ 163{
156 kprobe_opcode_t opcode; 164 kprobe_opcode_t opcode;
157 kprobe_opcode_t *orig_opcodes = opcodes; 165 kprobe_opcode_t *orig_opcodes = opcodes;
@@ -207,13 +215,15 @@ retry:
207 } 215 }
208} 216}
209 217
210/* Recover the probed instruction at addr for further analysis. */ 218static unsigned long
211static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) 219__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
212{ 220{
213 struct kprobe *kp; 221 struct kprobe *kp;
222
214 kp = get_kprobe((void *)addr); 223 kp = get_kprobe((void *)addr);
224 /* There is no probe, return original address */
215 if (!kp) 225 if (!kp)
216 return -EINVAL; 226 return addr;
217 227
218 /* 228 /*
219 * Basically, kp->ainsn.insn has an original instruction. 229 * Basically, kp->ainsn.insn has an original instruction.
@@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 */ 240 */
231 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 241 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
232 buf[0] = kp->opcode; 242 buf[0] = kp->opcode;
233 return 0; 243 return (unsigned long)buf;
244}
245
246/*
247 * Recover the probed instruction at addr for further analysis.
248 * Caller must lock kprobes by kprobe_mutex, or disable preemption
249 * for preventing to release referencing kprobes.
250 */
251unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
252{
253 unsigned long __addr;
254
255 __addr = __recover_optprobed_insn(buf, addr);
256 if (__addr != addr)
257 return __addr;
258
259 return __recover_probed_insn(buf, addr);
234} 260}
235 261
236/* Check if paddr is at an instruction boundary */ 262/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 263static int __kprobes can_probe(unsigned long paddr)
238{ 264{
239 int ret; 265 unsigned long addr, __addr, offset = 0;
240 unsigned long addr, offset = 0;
241 struct insn insn; 266 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 267 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 268
@@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr)
247 /* Decode instructions */ 272 /* Decode instructions */
248 addr = paddr - offset; 273 addr = paddr - offset;
249 while (addr < paddr) { 274 while (addr < paddr) {
250 kernel_insn_init(&insn, (void *)addr);
251 insn_get_opcode(&insn);
252
253 /* 275 /*
254 * Check if the instruction has been modified by another 276 * Check if the instruction has been modified by another
255 * kprobe, in which case we replace the breakpoint by the 277 * kprobe, in which case we replace the breakpoint by the
256 * original instruction in our buffer. 278 * original instruction in our buffer.
279 * Also, jump optimization will change the breakpoint to
280 * relative-jump. Since the relative-jump itself is
281 * normally used, we just go through if there is no kprobe.
257 */ 282 */
258 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { 283 __addr = recover_probed_instruction(buf, addr);
259 ret = recover_probed_instruction(buf, addr); 284 kernel_insn_init(&insn, (void *)__addr);
260 if (ret)
261 /*
262 * Another debugging subsystem might insert
263 * this breakpoint. In that case, we can't
264 * recover it.
265 */
266 return 0;
267 kernel_insn_init(&insn, buf);
268 }
269 insn_get_length(&insn); 285 insn_get_length(&insn);
286
287 /*
288 * Another debugging subsystem might insert this breakpoint.
289 * In that case, we can't recover it.
290 */
291 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
292 return 0;
270 addr += insn.length; 293 addr += insn.length;
271 } 294 }
272 295
@@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
299 * If not, return null. 322 * If not, return null.
300 * Only applicable to 64-bit x86. 323 * Only applicable to 64-bit x86.
301 */ 324 */
302static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) 325int __kprobes __copy_instruction(u8 *dest, u8 *src)
303{ 326{
304 struct insn insn; 327 struct insn insn;
305 int ret;
306 kprobe_opcode_t buf[MAX_INSN_SIZE]; 328 kprobe_opcode_t buf[MAX_INSN_SIZE];
307 329
308 kernel_insn_init(&insn, src); 330 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src));
309 if (recover) {
310 insn_get_opcode(&insn);
311 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
312 ret = recover_probed_instruction(buf,
313 (unsigned long)src);
314 if (ret)
315 return 0;
316 kernel_insn_init(&insn, buf);
317 }
318 }
319 insn_get_length(&insn); 331 insn_get_length(&insn);
332 /* Another subsystem puts a breakpoint, failed to recover */
333 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
334 return 0;
320 memcpy(dest, insn.kaddr, insn.length); 335 memcpy(dest, insn.kaddr, insn.length);
321 336
322#ifdef CONFIG_X86_64 337#ifdef CONFIG_X86_64
@@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
337 * extension of the original signed 32-bit displacement would 352 * extension of the original signed 32-bit displacement would
338 * have given. 353 * have given.
339 */ 354 */
340 newdisp = (u8 *) src + (s64) insn.displacement.value - 355 newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
341 (u8 *) dest;
342 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ 356 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
343 disp = (u8 *) dest + insn_offset_displacement(&insn); 357 disp = (u8 *) dest + insn_offset_displacement(&insn);
344 *(s32 *) disp = (s32) newdisp; 358 *(s32 *) disp = (s32) newdisp;
@@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
349 363
350static void __kprobes arch_copy_kprobe(struct kprobe *p) 364static void __kprobes arch_copy_kprobe(struct kprobe *p)
351{ 365{
366 /* Copy an instruction with recovering if other optprobe modifies it.*/
367 __copy_instruction(p->ainsn.insn, p->addr);
368
352 /* 369 /*
353 * Copy an instruction without recovering int3, because it will be 370 * __copy_instruction can modify the displacement of the instruction,
354 * put by another subsystem. 371 * but it doesn't affect boostable check.
355 */ 372 */
356 __copy_instruction(p->ainsn.insn, p->addr, 0); 373 if (can_boost(p->ainsn.insn))
357
358 if (can_boost(p->addr))
359 p->ainsn.boostable = 0; 374 p->ainsn.boostable = 0;
360 else 375 else
361 p->ainsn.boostable = -1; 376 p->ainsn.boostable = -1;
362 377
363 p->opcode = *p->addr; 378 /* Also, displacement change doesn't affect the first byte */
379 p->opcode = p->ainsn.insn[0];
364} 380}
365 381
366int __kprobes arch_prepare_kprobe(struct kprobe *p) 382int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -442,8 +458,8 @@ static void __kprobes restore_btf(void)
442 } 458 }
443} 459}
444 460
445void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 461void __kprobes
446 struct pt_regs *regs) 462arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
447{ 463{
448 unsigned long *sara = stack_addr(regs); 464 unsigned long *sara = stack_addr(regs);
449 465
@@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
453 *sara = (unsigned long) &kretprobe_trampoline; 469 *sara = (unsigned long) &kretprobe_trampoline;
454} 470}
455 471
456#ifdef CONFIG_OPTPROBES 472static void __kprobes
457static int __kprobes setup_detour_execution(struct kprobe *p, 473setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
458 struct pt_regs *regs,
459 int reenter);
460#else
461#define setup_detour_execution(p, regs, reenter) (0)
462#endif
463
464static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
465 struct kprobe_ctlblk *kcb, int reenter)
466{ 474{
467 if (setup_detour_execution(p, regs, reenter)) 475 if (setup_detour_execution(p, regs, reenter))
468 return; 476 return;
@@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
504 * within the handler. We save the original kprobes variables and just single 512 * within the handler. We save the original kprobes variables and just single
505 * step on the instruction of the new probe without calling any user handlers. 513 * step on the instruction of the new probe without calling any user handlers.
506 */ 514 */
507static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, 515static int __kprobes
508 struct kprobe_ctlblk *kcb) 516reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
509{ 517{
510 switch (kcb->kprobe_status) { 518 switch (kcb->kprobe_status) {
511 case KPROBE_HIT_SSDONE: 519 case KPROBE_HIT_SSDONE:
@@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
600 return 0; 608 return 0;
601} 609}
602 610
603#ifdef CONFIG_X86_64
604#define SAVE_REGS_STRING \
605 /* Skip cs, ip, orig_ax. */ \
606 " subq $24, %rsp\n" \
607 " pushq %rdi\n" \
608 " pushq %rsi\n" \
609 " pushq %rdx\n" \
610 " pushq %rcx\n" \
611 " pushq %rax\n" \
612 " pushq %r8\n" \
613 " pushq %r9\n" \
614 " pushq %r10\n" \
615 " pushq %r11\n" \
616 " pushq %rbx\n" \
617 " pushq %rbp\n" \
618 " pushq %r12\n" \
619 " pushq %r13\n" \
620 " pushq %r14\n" \
621 " pushq %r15\n"
622#define RESTORE_REGS_STRING \
623 " popq %r15\n" \
624 " popq %r14\n" \
625 " popq %r13\n" \
626 " popq %r12\n" \
627 " popq %rbp\n" \
628 " popq %rbx\n" \
629 " popq %r11\n" \
630 " popq %r10\n" \
631 " popq %r9\n" \
632 " popq %r8\n" \
633 " popq %rax\n" \
634 " popq %rcx\n" \
635 " popq %rdx\n" \
636 " popq %rsi\n" \
637 " popq %rdi\n" \
638 /* Skip orig_ax, ip, cs */ \
639 " addq $24, %rsp\n"
640#else
641#define SAVE_REGS_STRING \
642 /* Skip cs, ip, orig_ax and gs. */ \
643 " subl $16, %esp\n" \
644 " pushl %fs\n" \
645 " pushl %es\n" \
646 " pushl %ds\n" \
647 " pushl %eax\n" \
648 " pushl %ebp\n" \
649 " pushl %edi\n" \
650 " pushl %esi\n" \
651 " pushl %edx\n" \
652 " pushl %ecx\n" \
653 " pushl %ebx\n"
654#define RESTORE_REGS_STRING \
655 " popl %ebx\n" \
656 " popl %ecx\n" \
657 " popl %edx\n" \
658 " popl %esi\n" \
659 " popl %edi\n" \
660 " popl %ebp\n" \
661 " popl %eax\n" \
662 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
663 " addl $24, %esp\n"
664#endif
665
666/* 611/*
667 * When a retprobed function returns, this code saves registers and 612 * When a retprobed function returns, this code saves registers and
668 * calls trampoline_handler() runs, which calls the kretprobe's handler. 613 * calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
816 * jump instruction after the copied instruction, that jumps to the next 761 * jump instruction after the copied instruction, that jumps to the next
817 * instruction after the probepoint. 762 * instruction after the probepoint.
818 */ 763 */
819static void __kprobes resume_execution(struct kprobe *p, 764static void __kprobes
820 struct pt_regs *regs, struct kprobe_ctlblk *kcb) 765resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
821{ 766{
822 unsigned long *tos = stack_addr(regs); 767 unsigned long *tos = stack_addr(regs);
823 unsigned long copy_ip = (unsigned long)p->ainsn.insn; 768 unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
996/* 941/*
997 * Wrapper routine for handling exceptions. 942 * Wrapper routine for handling exceptions.
998 */ 943 */
999int __kprobes kprobe_exceptions_notify(struct notifier_block *self, 944int __kprobes
1000 unsigned long val, void *data) 945kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
1001{ 946{
1002 struct die_args *args = data; 947 struct die_args *args = data;
1003 int ret = NOTIFY_DONE; 948 int ret = NOTIFY_DONE;
@@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1107 return 0; 1052 return 0;
1108} 1053}
1109 1054
1110
1111#ifdef CONFIG_OPTPROBES
1112
1113/* Insert a call instruction at address 'from', which calls address 'to'.*/
1114static void __kprobes synthesize_relcall(void *from, void *to)
1115{
1116 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
1117}
1118
1119/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
1120static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1121 unsigned long val)
1122{
1123#ifdef CONFIG_X86_64
1124 *addr++ = 0x48;
1125 *addr++ = 0xbf;
1126#else
1127 *addr++ = 0xb8;
1128#endif
1129 *(unsigned long *)addr = val;
1130}
1131
1132static void __used __kprobes kprobes_optinsn_template_holder(void)
1133{
1134 asm volatile (
1135 ".global optprobe_template_entry\n"
1136 "optprobe_template_entry: \n"
1137#ifdef CONFIG_X86_64
1138 /* We don't bother saving the ss register */
1139 " pushq %rsp\n"
1140 " pushfq\n"
1141 SAVE_REGS_STRING
1142 " movq %rsp, %rsi\n"
1143 ".global optprobe_template_val\n"
1144 "optprobe_template_val: \n"
1145 ASM_NOP5
1146 ASM_NOP5
1147 ".global optprobe_template_call\n"
1148 "optprobe_template_call: \n"
1149 ASM_NOP5
1150 /* Move flags to rsp */
1151 " movq 144(%rsp), %rdx\n"
1152 " movq %rdx, 152(%rsp)\n"
1153 RESTORE_REGS_STRING
1154 /* Skip flags entry */
1155 " addq $8, %rsp\n"
1156 " popfq\n"
1157#else /* CONFIG_X86_32 */
1158 " pushf\n"
1159 SAVE_REGS_STRING
1160 " movl %esp, %edx\n"
1161 ".global optprobe_template_val\n"
1162 "optprobe_template_val: \n"
1163 ASM_NOP5
1164 ".global optprobe_template_call\n"
1165 "optprobe_template_call: \n"
1166 ASM_NOP5
1167 RESTORE_REGS_STRING
1168 " addl $4, %esp\n" /* skip cs */
1169 " popf\n"
1170#endif
1171 ".global optprobe_template_end\n"
1172 "optprobe_template_end: \n");
1173}
1174
1175#define TMPL_MOVE_IDX \
1176 ((long)&optprobe_template_val - (long)&optprobe_template_entry)
1177#define TMPL_CALL_IDX \
1178 ((long)&optprobe_template_call - (long)&optprobe_template_entry)
1179#define TMPL_END_IDX \
1180 ((long)&optprobe_template_end - (long)&optprobe_template_entry)
1181
1182#define INT3_SIZE sizeof(kprobe_opcode_t)
1183
1184/* Optimized kprobe call back function: called from optinsn */
1185static void __kprobes optimized_callback(struct optimized_kprobe *op,
1186 struct pt_regs *regs)
1187{
1188 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1189 unsigned long flags;
1190
1191 /* This is possible if op is under delayed unoptimizing */
1192 if (kprobe_disabled(&op->kp))
1193 return;
1194
1195 local_irq_save(flags);
1196 if (kprobe_running()) {
1197 kprobes_inc_nmissed_count(&op->kp);
1198 } else {
1199 /* Save skipped registers */
1200#ifdef CONFIG_X86_64
1201 regs->cs = __KERNEL_CS;
1202#else
1203 regs->cs = __KERNEL_CS | get_kernel_rpl();
1204 regs->gs = 0;
1205#endif
1206 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
1207 regs->orig_ax = ~0UL;
1208
1209 __this_cpu_write(current_kprobe, &op->kp);
1210 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1211 opt_pre_handler(&op->kp, regs);
1212 __this_cpu_write(current_kprobe, NULL);
1213 }
1214 local_irq_restore(flags);
1215}
1216
1217static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1218{
1219 int len = 0, ret;
1220
1221 while (len < RELATIVEJUMP_SIZE) {
1222 ret = __copy_instruction(dest + len, src + len, 1);
1223 if (!ret || !can_boost(dest + len))
1224 return -EINVAL;
1225 len += ret;
1226 }
1227 /* Check whether the address range is reserved */
1228 if (ftrace_text_reserved(src, src + len - 1) ||
1229 alternatives_text_reserved(src, src + len - 1) ||
1230 jump_label_text_reserved(src, src + len - 1))
1231 return -EBUSY;
1232
1233 return len;
1234}
1235
1236/* Check whether insn is indirect jump */
1237static int __kprobes insn_is_indirect_jump(struct insn *insn)
1238{
1239 return ((insn->opcode.bytes[0] == 0xff &&
1240 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
1241 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
1242}
1243
1244/* Check whether insn jumps into specified address range */
1245static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
1246{
1247 unsigned long target = 0;
1248
1249 switch (insn->opcode.bytes[0]) {
1250 case 0xe0: /* loopne */
1251 case 0xe1: /* loope */
1252 case 0xe2: /* loop */
1253 case 0xe3: /* jcxz */
1254 case 0xe9: /* near relative jump */
1255 case 0xeb: /* short relative jump */
1256 break;
1257 case 0x0f:
1258 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
1259 break;
1260 return 0;
1261 default:
1262 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
1263 break;
1264 return 0;
1265 }
1266 target = (unsigned long)insn->next_byte + insn->immediate.value;
1267
1268 return (start <= target && target <= start + len);
1269}
1270
1271/* Decode whole function to ensure any instructions don't jump into target */
1272static int __kprobes can_optimize(unsigned long paddr)
1273{
1274 int ret;
1275 unsigned long addr, size = 0, offset = 0;
1276 struct insn insn;
1277 kprobe_opcode_t buf[MAX_INSN_SIZE];
1278
1279 /* Lookup symbol including addr */
1280 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1281 return 0;
1282
1283 /*
1284 * Do not optimize in the entry code due to the unstable
1285 * stack handling.
1286 */
1287 if ((paddr >= (unsigned long )__entry_text_start) &&
1288 (paddr < (unsigned long )__entry_text_end))
1289 return 0;
1290
1291 /* Check there is enough space for a relative jump. */
1292 if (size - offset < RELATIVEJUMP_SIZE)
1293 return 0;
1294
1295 /* Decode instructions */
1296 addr = paddr - offset;
1297 while (addr < paddr - offset + size) { /* Decode until function end */
1298 if (search_exception_tables(addr))
1299 /*
1300 * Since some fixup code will jumps into this function,
1301 * we can't optimize kprobe in this function.
1302 */
1303 return 0;
1304 kernel_insn_init(&insn, (void *)addr);
1305 insn_get_opcode(&insn);
1306 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
1307 ret = recover_probed_instruction(buf, addr);
1308 if (ret)
1309 return 0;
1310 kernel_insn_init(&insn, buf);
1311 }
1312 insn_get_length(&insn);
1313 /* Recover address */
1314 insn.kaddr = (void *)addr;
1315 insn.next_byte = (void *)(addr + insn.length);
1316 /* Check any instructions don't jump into target */
1317 if (insn_is_indirect_jump(&insn) ||
1318 insn_jump_into_range(&insn, paddr + INT3_SIZE,
1319 RELATIVE_ADDR_SIZE))
1320 return 0;
1321 addr += insn.length;
1322 }
1323
1324 return 1;
1325}
1326
1327/* Check optimized_kprobe can actually be optimized. */
1328int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
1329{
1330 int i;
1331 struct kprobe *p;
1332
1333 for (i = 1; i < op->optinsn.size; i++) {
1334 p = get_kprobe(op->kp.addr + i);
1335 if (p && !kprobe_disabled(p))
1336 return -EEXIST;
1337 }
1338
1339 return 0;
1340}
1341
1342/* Check the addr is within the optimized instructions. */
1343int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
1344 unsigned long addr)
1345{
1346 return ((unsigned long)op->kp.addr <= addr &&
1347 (unsigned long)op->kp.addr + op->optinsn.size > addr);
1348}
1349
1350/* Free optimized instruction slot */
1351static __kprobes
1352void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
1353{
1354 if (op->optinsn.insn) {
1355 free_optinsn_slot(op->optinsn.insn, dirty);
1356 op->optinsn.insn = NULL;
1357 op->optinsn.size = 0;
1358 }
1359}
1360
1361void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
1362{
1363 __arch_remove_optimized_kprobe(op, 1);
1364}
1365
1366/*
1367 * Copy replacing target instructions
1368 * Target instructions MUST be relocatable (checked inside)
1369 */
1370int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1371{
1372 u8 *buf;
1373 int ret;
1374 long rel;
1375
1376 if (!can_optimize((unsigned long)op->kp.addr))
1377 return -EILSEQ;
1378
1379 op->optinsn.insn = get_optinsn_slot();
1380 if (!op->optinsn.insn)
1381 return -ENOMEM;
1382
1383 /*
1384 * Verify if the address gap is in 2GB range, because this uses
1385 * a relative jump.
1386 */
1387 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
1388 if (abs(rel) > 0x7fffffff)
1389 return -ERANGE;
1390
1391 buf = (u8 *)op->optinsn.insn;
1392
1393 /* Copy instructions into the out-of-line buffer */
1394 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
1395 if (ret < 0) {
1396 __arch_remove_optimized_kprobe(op, 0);
1397 return ret;
1398 }
1399 op->optinsn.size = ret;
1400
1401 /* Copy arch-dep-instance from template */
1402 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
1403
1404 /* Set probe information */
1405 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
1406
1407 /* Set probe function call */
1408 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
1409
1410 /* Set returning jmp instruction at the tail of out-of-line buffer */
1411 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
1412 (u8 *)op->kp.addr + op->optinsn.size);
1413
1414 flush_icache_range((unsigned long) buf,
1415 (unsigned long) buf + TMPL_END_IDX +
1416 op->optinsn.size + RELATIVEJUMP_SIZE);
1417 return 0;
1418}
1419
1420#define MAX_OPTIMIZE_PROBES 256
1421static struct text_poke_param *jump_poke_params;
1422static struct jump_poke_buffer {
1423 u8 buf[RELATIVEJUMP_SIZE];
1424} *jump_poke_bufs;
1425
1426static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
1427 u8 *insn_buf,
1428 struct optimized_kprobe *op)
1429{
1430 s32 rel = (s32)((long)op->optinsn.insn -
1431 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1432
1433 /* Backup instructions which will be replaced by jump address */
1434 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1435 RELATIVE_ADDR_SIZE);
1436
1437 insn_buf[0] = RELATIVEJUMP_OPCODE;
1438 *(s32 *)(&insn_buf[1]) = rel;
1439
1440 tprm->addr = op->kp.addr;
1441 tprm->opcode = insn_buf;
1442 tprm->len = RELATIVEJUMP_SIZE;
1443}
1444
1445/*
1446 * Replace breakpoints (int3) with relative jumps.
1447 * Caller must call with locking kprobe_mutex and text_mutex.
1448 */
1449void __kprobes arch_optimize_kprobes(struct list_head *oplist)
1450{
1451 struct optimized_kprobe *op, *tmp;
1452 int c = 0;
1453
1454 list_for_each_entry_safe(op, tmp, oplist, list) {
1455 WARN_ON(kprobe_disabled(&op->kp));
1456 /* Setup param */
1457 setup_optimize_kprobe(&jump_poke_params[c],
1458 jump_poke_bufs[c].buf, op);
1459 list_del_init(&op->list);
1460 if (++c >= MAX_OPTIMIZE_PROBES)
1461 break;
1462 }
1463
1464 /*
1465 * text_poke_smp doesn't support NMI/MCE code modifying.
1466 * However, since kprobes itself also doesn't support NMI/MCE
1467 * code probing, it's not a problem.
1468 */
1469 text_poke_smp_batch(jump_poke_params, c);
1470}
1471
1472static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
1473 u8 *insn_buf,
1474 struct optimized_kprobe *op)
1475{
1476 /* Set int3 to first byte for kprobes */
1477 insn_buf[0] = BREAKPOINT_INSTRUCTION;
1478 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1479
1480 tprm->addr = op->kp.addr;
1481 tprm->opcode = insn_buf;
1482 tprm->len = RELATIVEJUMP_SIZE;
1483}
1484
1485/*
1486 * Recover original instructions and breakpoints from relative jumps.
1487 * Caller must call with locking kprobe_mutex.
1488 */
1489extern void arch_unoptimize_kprobes(struct list_head *oplist,
1490 struct list_head *done_list)
1491{
1492 struct optimized_kprobe *op, *tmp;
1493 int c = 0;
1494
1495 list_for_each_entry_safe(op, tmp, oplist, list) {
1496 /* Setup param */
1497 setup_unoptimize_kprobe(&jump_poke_params[c],
1498 jump_poke_bufs[c].buf, op);
1499 list_move(&op->list, done_list);
1500 if (++c >= MAX_OPTIMIZE_PROBES)
1501 break;
1502 }
1503
1504 /*
1505 * text_poke_smp doesn't support NMI/MCE code modifying.
1506 * However, since kprobes itself also doesn't support NMI/MCE
1507 * code probing, it's not a problem.
1508 */
1509 text_poke_smp_batch(jump_poke_params, c);
1510}
1511
1512/* Replace a relative jump with a breakpoint (int3). */
1513void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
1514{
1515 u8 buf[RELATIVEJUMP_SIZE];
1516
1517 /* Set int3 to first byte for kprobes */
1518 buf[0] = BREAKPOINT_INSTRUCTION;
1519 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1520 text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
1521}
1522
1523static int __kprobes setup_detour_execution(struct kprobe *p,
1524 struct pt_regs *regs,
1525 int reenter)
1526{
1527 struct optimized_kprobe *op;
1528
1529 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
1530 /* This kprobe is really able to run optimized path. */
1531 op = container_of(p, struct optimized_kprobe, kp);
1532 /* Detour through copied instructions */
1533 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
1534 if (!reenter)
1535 reset_current_kprobe();
1536 preempt_enable_no_resched();
1537 return 1;
1538 }
1539 return 0;
1540}
1541
1542static int __kprobes init_poke_params(void)
1543{
1544 /* Allocate code buffer and parameter array */
1545 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
1546 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1547 if (!jump_poke_bufs)
1548 return -ENOMEM;
1549
1550 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
1551 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1552 if (!jump_poke_params) {
1553 kfree(jump_poke_bufs);
1554 jump_poke_bufs = NULL;
1555 return -ENOMEM;
1556 }
1557
1558 return 0;
1559}
1560#else /* !CONFIG_OPTPROBES */
1561static int __kprobes init_poke_params(void)
1562{
1563 return 0;
1564}
1565#endif
1566
1567int __init arch_init_kprobes(void) 1055int __init arch_init_kprobes(void)
1568{ 1056{
1569 return init_poke_params(); 1057 return arch_init_optprobes();
1570} 1058}
1571 1059
1572int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1060int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f0c6fd6f176b..b8ba6e4a27e4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -38,6 +38,7 @@
38#include <asm/traps.h> 38#include <asm/traps.h>
39#include <asm/desc.h> 39#include <asm/desc.h>
40#include <asm/tlbflush.h> 40#include <asm/tlbflush.h>
41#include <asm/idle.h>
41 42
42static int kvmapf = 1; 43static int kvmapf = 1;
43 44
@@ -253,7 +254,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
253 kvm_async_pf_task_wait((u32)read_cr2()); 254 kvm_async_pf_task_wait((u32)read_cr2());
254 break; 255 break;
255 case KVM_PV_REASON_PAGE_READY: 256 case KVM_PV_REASON_PAGE_READY:
257 rcu_irq_enter();
258 exit_idle();
256 kvm_async_pf_task_wake((u32)read_cr2()); 259 kvm_async_pf_task_wake((u32)read_cr2());
260 rcu_irq_exit();
257 break; 261 break;
258 } 262 }
259} 263}
@@ -438,9 +442,9 @@ void __init kvm_guest_init(void)
438static __init int activate_jump_labels(void) 442static __init int activate_jump_labels(void)
439{ 443{
440 if (has_steal_clock) { 444 if (has_steal_clock) {
441 jump_label_inc(&paravirt_steal_enabled); 445 static_key_slow_inc(&paravirt_steal_enabled);
442 if (steal_acc) 446 if (steal_acc)
443 jump_label_inc(&paravirt_steal_rq_enabled); 447 static_key_slow_inc(&paravirt_steal_rq_enabled);
444 } 448 }
445 449
446 return 0; 450 return 0;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 44842d756b29..f8492da65bfc 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -136,6 +136,15 @@ int kvm_register_clock(char *txt)
136 return ret; 136 return ret;
137} 137}
138 138
139static void kvm_save_sched_clock_state(void)
140{
141}
142
143static void kvm_restore_sched_clock_state(void)
144{
145 kvm_register_clock("primary cpu clock, resume");
146}
147
139#ifdef CONFIG_X86_LOCAL_APIC 148#ifdef CONFIG_X86_LOCAL_APIC
140static void __cpuinit kvm_setup_secondary_clock(void) 149static void __cpuinit kvm_setup_secondary_clock(void)
141{ 150{
@@ -144,8 +153,6 @@ static void __cpuinit kvm_setup_secondary_clock(void)
144 * we shouldn't fail. 153 * we shouldn't fail.
145 */ 154 */
146 WARN_ON(kvm_register_clock("secondary cpu clock")); 155 WARN_ON(kvm_register_clock("secondary cpu clock"));
147 /* ok, done with our trickery, call native */
148 setup_secondary_APIC_clock();
149} 156}
150#endif 157#endif
151 158
@@ -194,9 +201,11 @@ void __init kvmclock_init(void)
194 x86_platform.get_wallclock = kvm_get_wallclock; 201 x86_platform.get_wallclock = kvm_get_wallclock;
195 x86_platform.set_wallclock = kvm_set_wallclock; 202 x86_platform.set_wallclock = kvm_set_wallclock;
196#ifdef CONFIG_X86_LOCAL_APIC 203#ifdef CONFIG_X86_LOCAL_APIC
197 x86_cpuinit.setup_percpu_clockev = 204 x86_cpuinit.early_percpu_clock_init =
198 kvm_setup_secondary_clock; 205 kvm_setup_secondary_clock;
199#endif 206#endif
207 x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
208 x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
200 machine_ops.shutdown = kvm_shutdown; 209 machine_ops.shutdown = kvm_shutdown;
201#ifdef CONFIG_KEXEC 210#ifdef CONFIG_KEXEC
202 machine_ops.crash_shutdown = kvm_crash_shutdown; 211 machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index ea697263b373..ebc987398923 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -15,7 +15,6 @@
15#include <linux/vmalloc.h> 15#include <linux/vmalloc.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17 17
18#include <asm/system.h>
19#include <asm/ldt.h> 18#include <asm/ldt.h>
20#include <asm/desc.h> 19#include <asm/desc.h>
21#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index a3fa43ba5d3b..5b19e4d78b00 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -23,7 +23,6 @@
23#include <asm/apic.h> 23#include <asm/apic.h>
24#include <asm/cpufeature.h> 24#include <asm/cpufeature.h>
25#include <asm/desc.h> 25#include <asm/desc.h>
26#include <asm/system.h>
27#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
28#include <asm/debugreg.h> 27#include <asm/debugreg.h>
29 28
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
index 177183cbb6ae..7eb1e2b97827 100644
--- a/arch/x86/kernel/mca_32.c
+++ b/arch/x86/kernel/mca_32.c
@@ -43,7 +43,6 @@
43#include <linux/mca.h> 43#include <linux/mca.h>
44#include <linux/kprobes.h> 44#include <linux/kprobes.h>
45#include <linux/slab.h> 45#include <linux/slab.h>
46#include <asm/system.h>
47#include <asm/io.h> 46#include <asm/io.h>
48#include <linux/proc_fs.h> 47#include <linux/proc_fs.h>
49#include <linux/mman.h> 48#include <linux/mman.h>
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index fe86493f3ed1..73465aab28f8 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -311,13 +311,33 @@ out:
311 return state; 311 return state;
312} 312}
313 313
314/*
315 * AMD microcode firmware naming convention, up to family 15h they are in
316 * the legacy file:
317 *
318 * amd-ucode/microcode_amd.bin
319 *
320 * This legacy file is always smaller than 2K in size.
321 *
322 * Starting at family 15h they are in family specific firmware files:
323 *
324 * amd-ucode/microcode_amd_fam15h.bin
325 * amd-ucode/microcode_amd_fam16h.bin
326 * ...
327 *
328 * These might be larger than 2K.
329 */
314static enum ucode_state request_microcode_amd(int cpu, struct device *device) 330static enum ucode_state request_microcode_amd(int cpu, struct device *device)
315{ 331{
316 const char *fw_name = "amd-ucode/microcode_amd.bin"; 332 char fw_name[36] = "amd-ucode/microcode_amd.bin";
317 const struct firmware *fw; 333 const struct firmware *fw;
318 enum ucode_state ret = UCODE_NFOUND; 334 enum ucode_state ret = UCODE_NFOUND;
335 struct cpuinfo_x86 *c = &cpu_data(cpu);
336
337 if (c->x86 >= 0x15)
338 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
319 339
320 if (request_firmware(&fw, fw_name, device)) { 340 if (request_firmware(&fw, (const char *)fw_name, device)) {
321 pr_err("failed to load file %s\n", fw_name); 341 pr_err("failed to load file %s\n", fw_name);
322 goto out; 342 goto out;
323 } 343 }
@@ -340,7 +360,6 @@ out:
340static enum ucode_state 360static enum ucode_state
341request_microcode_user(int cpu, const void __user *buf, size_t size) 361request_microcode_user(int cpu, const void __user *buf, size_t size)
342{ 362{
343 pr_info("AMD microcode update via /dev/cpu/microcode not supported\n");
344 return UCODE_ERROR; 363 return UCODE_ERROR;
345} 364}
346 365
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fda91c307104..87a0f8688301 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -86,6 +86,7 @@
86 86
87#include <asm/microcode.h> 87#include <asm/microcode.h>
88#include <asm/processor.h> 88#include <asm/processor.h>
89#include <asm/cpu_device_id.h>
89 90
90MODULE_DESCRIPTION("Microcode Update Driver"); 91MODULE_DESCRIPTION("Microcode Update Driver");
91MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 92MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -504,6 +505,20 @@ static struct notifier_block __refdata mc_cpu_notifier = {
504 .notifier_call = mc_cpu_callback, 505 .notifier_call = mc_cpu_callback,
505}; 506};
506 507
508#ifdef MODULE
509/* Autoload on Intel and AMD systems */
510static const struct x86_cpu_id microcode_id[] = {
511#ifdef CONFIG_MICROCODE_INTEL
512 { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
513#endif
514#ifdef CONFIG_MICROCODE_AMD
515 { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, },
516#endif
517 {}
518};
519MODULE_DEVICE_TABLE(x86cpu, microcode_id);
520#endif
521
507static int __init microcode_init(void) 522static int __init microcode_init(void)
508{ 523{
509 struct cpuinfo_x86 *c = &cpu_data(0); 524 struct cpuinfo_x86 *c = &cpu_data(0);
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 925179f871de..f21fd94ac897 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -26,7 +26,6 @@
26#include <linux/gfp.h> 26#include <linux/gfp.h>
27#include <linux/jump_label.h> 27#include <linux/jump_label.h>
28 28
29#include <asm/system.h>
30#include <asm/page.h> 29#include <asm/page.h>
31#include <asm/pgtable.h> 30#include <asm/pgtable.h>
32 31
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 96356762a51d..eb113693f043 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -40,7 +40,6 @@
40 40
41#include <asm/processor.h> 41#include <asm/processor.h>
42#include <asm/msr.h> 42#include <asm/msr.h>
43#include <asm/system.h>
44 43
45static struct class *msr_class; 44static struct class *msr_class;
46 45
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index e88f37b58ddd..47acaf319165 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
405 unknown_nmi_error(reason, regs); 405 unknown_nmi_error(reason, regs);
406} 406}
407 407
408/*
409 * NMIs can hit breakpoints which will cause it to lose its
410 * NMI context with the CPU when the breakpoint does an iret.
411 */
412#ifdef CONFIG_X86_32
413/*
414 * For i386, NMIs use the same stack as the kernel, and we can
415 * add a workaround to the iret problem in C. Simply have 3 states
416 * the NMI can be in.
417 *
418 * 1) not running
419 * 2) executing
420 * 3) latched
421 *
422 * When no NMI is in progress, it is in the "not running" state.
423 * When an NMI comes in, it goes into the "executing" state.
424 * Normally, if another NMI is triggered, it does not interrupt
425 * the running NMI and the HW will simply latch it so that when
426 * the first NMI finishes, it will restart the second NMI.
427 * (Note, the latch is binary, thus multiple NMIs triggering,
428 * when one is running, are ignored. Only one NMI is restarted.)
429 *
430 * If an NMI hits a breakpoint that executes an iret, another
431 * NMI can preempt it. We do not want to allow this new NMI
432 * to run, but we want to execute it when the first one finishes.
433 * We set the state to "latched", and the first NMI will perform
434 * an cmpxchg on the state, and if it doesn't successfully
435 * reset the state to "not running" it will restart the next
436 * NMI.
437 */
438enum nmi_states {
439 NMI_NOT_RUNNING,
440 NMI_EXECUTING,
441 NMI_LATCHED,
442};
443static DEFINE_PER_CPU(enum nmi_states, nmi_state);
444
445#define nmi_nesting_preprocess(regs) \
446 do { \
447 if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
448 __get_cpu_var(nmi_state) = NMI_LATCHED; \
449 return; \
450 } \
451 nmi_restart: \
452 __get_cpu_var(nmi_state) = NMI_EXECUTING; \
453 } while (0)
454
455#define nmi_nesting_postprocess() \
456 do { \
457 if (cmpxchg(&__get_cpu_var(nmi_state), \
458 NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
459 goto nmi_restart; \
460 } while (0)
461#else /* x86_64 */
462/*
463 * In x86_64 things are a bit more difficult. This has the same problem
464 * where an NMI hitting a breakpoint that calls iret will remove the
465 * NMI context, allowing a nested NMI to enter. What makes this more
466 * difficult is that both NMIs and breakpoints have their own stack.
467 * When a new NMI or breakpoint is executed, the stack is set to a fixed
468 * point. If an NMI is nested, it will have its stack set at that same
469 * fixed address that the first NMI had, and will start corrupting the
470 * stack. This is handled in entry_64.S, but the same problem exists with
471 * the breakpoint stack.
472 *
473 * If a breakpoint is being processed, and the debug stack is being used,
474 * if an NMI comes in and also hits a breakpoint, the stack pointer
475 * will be set to the same fixed address as the breakpoint that was
476 * interrupted, causing that stack to be corrupted. To handle this case,
477 * check if the stack that was interrupted is the debug stack, and if
478 * so, change the IDT so that new breakpoints will use the current stack
479 * and not switch to the fixed address. On return of the NMI, switch back
480 * to the original IDT.
481 */
482static DEFINE_PER_CPU(int, update_debug_stack);
483
484static inline void nmi_nesting_preprocess(struct pt_regs *regs)
485{
486 /*
487 * If we interrupted a breakpoint, it is possible that
488 * the nmi handler will have breakpoints too. We need to
489 * change the IDT such that breakpoints that happen here
490 * continue to use the NMI stack.
491 */
492 if (unlikely(is_debug_stack(regs->sp))) {
493 debug_stack_set_zero();
494 __get_cpu_var(update_debug_stack) = 1;
495 }
496}
497
498static inline void nmi_nesting_postprocess(void)
499{
500 if (unlikely(__get_cpu_var(update_debug_stack)))
501 debug_stack_reset();
502}
503#endif
504
408dotraplinkage notrace __kprobes void 505dotraplinkage notrace __kprobes void
409do_nmi(struct pt_regs *regs, long error_code) 506do_nmi(struct pt_regs *regs, long error_code)
410{ 507{
508 nmi_nesting_preprocess(regs);
509
411 nmi_enter(); 510 nmi_enter();
412 511
413 inc_irq_stat(__nmi_count); 512 inc_irq_stat(__nmi_count);
@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code)
416 default_do_nmi(regs); 515 default_do_nmi(regs);
417 516
418 nmi_exit(); 517 nmi_exit();
518
519 /* On i386, may loop back to preprocess */
520 nmi_nesting_postprocess();
419} 521}
420 522
421void stop_nmi(void) 523void stop_nmi(void)
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
new file mode 100644
index 000000000000..2c39dcd510fa
--- /dev/null
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -0,0 +1,181 @@
1/*
2 * arch/x86/kernel/nmi-selftest.c
3 *
4 * Testsuite for NMI: IPIs
5 *
6 * Started by Don Zickus:
7 * (using lib/locking-selftest.c as a guide)
8 *
9 * Copyright (C) 2011 Red Hat, Inc., Don Zickus <dzickus@redhat.com>
10 */
11
12#include <linux/smp.h>
13#include <linux/cpumask.h>
14#include <linux/delay.h>
15#include <linux/init.h>
16
17#include <asm/apic.h>
18#include <asm/nmi.h>
19
20#define SUCCESS 0
21#define FAILURE 1
22#define TIMEOUT 2
23
24static int __initdata nmi_fail;
25
26/* check to see if NMI IPIs work on this machine */
27static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __initdata;
28
29static int __initdata testcase_total;
30static int __initdata testcase_successes;
31static int __initdata expected_testcase_failures;
32static int __initdata unexpected_testcase_failures;
33static int __initdata unexpected_testcase_unknowns;
34
35static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
36{
37 unexpected_testcase_unknowns++;
38 return NMI_HANDLED;
39}
40
41static void __init init_nmi_testsuite(void)
42{
43 /* trap all the unknown NMIs we may generate */
44 register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
45}
46
47static void __init cleanup_nmi_testsuite(void)
48{
49 unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk");
50}
51
52static int __init test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
53{
54 int cpu = raw_smp_processor_id();
55
56 if (cpumask_test_and_clear_cpu(cpu, to_cpumask(nmi_ipi_mask)))
57 return NMI_HANDLED;
58
59 return NMI_DONE;
60}
61
62static void __init test_nmi_ipi(struct cpumask *mask)
63{
64 unsigned long timeout;
65
66 if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
67 NMI_FLAG_FIRST, "nmi_selftest")) {
68 nmi_fail = FAILURE;
69 return;
70 }
71
72 /* sync above data before sending NMI */
73 wmb();
74
75 apic->send_IPI_mask(mask, NMI_VECTOR);
76
77 /* Don't wait longer than a second */
78 timeout = USEC_PER_SEC;
79 while (!cpumask_empty(mask) && timeout--)
80 udelay(1);
81
82 /* What happens if we timeout, do we still unregister?? */
83 unregister_nmi_handler(NMI_LOCAL, "nmi_selftest");
84
85 if (!timeout)
86 nmi_fail = TIMEOUT;
87 return;
88}
89
90static void __init remote_ipi(void)
91{
92 cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask);
93 cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
94 if (!cpumask_empty(to_cpumask(nmi_ipi_mask)))
95 test_nmi_ipi(to_cpumask(nmi_ipi_mask));
96}
97
98static void __init local_ipi(void)
99{
100 cpumask_clear(to_cpumask(nmi_ipi_mask));
101 cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
102 test_nmi_ipi(to_cpumask(nmi_ipi_mask));
103}
104
105static void __init reset_nmi(void)
106{
107 nmi_fail = 0;
108}
109
110static void __init dotest(void (*testcase_fn)(void), int expected)
111{
112 testcase_fn();
113 /*
114 * Filter out expected failures:
115 */
116 if (nmi_fail != expected) {
117 unexpected_testcase_failures++;
118
119 if (nmi_fail == FAILURE)
120 printk("FAILED |");
121 else if (nmi_fail == TIMEOUT)
122 printk("TIMEOUT|");
123 else
124 printk("ERROR |");
125 dump_stack();
126 } else {
127 testcase_successes++;
128 printk(" ok |");
129 }
130 testcase_total++;
131
132 reset_nmi();
133}
134
135static inline void __init print_testname(const char *testname)
136{
137 printk("%12s:", testname);
138}
139
140void __init nmi_selftest(void)
141{
142 init_nmi_testsuite();
143
144 /*
145 * Run the testsuite:
146 */
147 printk("----------------\n");
148 printk("| NMI testsuite:\n");
149 printk("--------------------\n");
150
151 print_testname("remote IPI");
152 dotest(remote_ipi, SUCCESS);
153 printk("\n");
154 print_testname("local IPI");
155 dotest(local_ipi, SUCCESS);
156 printk("\n");
157
158 cleanup_nmi_testsuite();
159
160 if (unexpected_testcase_failures) {
161 printk("--------------------\n");
162 printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
163 unexpected_testcase_failures, testcase_total);
164 printk("-----------------------------------------------------------------\n");
165 } else if (expected_testcase_failures && testcase_successes) {
166 printk("--------------------\n");
167 printk("%3d out of %3d testcases failed, as expected. |\n",
168 expected_testcase_failures, testcase_total);
169 printk("----------------------------------------------------\n");
170 } else if (expected_testcase_failures && !testcase_successes) {
171 printk("--------------------\n");
172 printk("All %3d testcases failed, as expected. |\n",
173 expected_testcase_failures);
174 printk("----------------------------------------\n");
175 } else {
176 printk("--------------------\n");
177 printk("Good, all %3d testcases passed! |\n",
178 testcase_successes);
179 printk("---------------------------------\n");
180 }
181}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index d90272e6bc40..ab137605e694 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -26,6 +26,7 @@
26 26
27#include <asm/bug.h> 27#include <asm/bug.h>
28#include <asm/paravirt.h> 28#include <asm/paravirt.h>
29#include <asm/debugreg.h>
29#include <asm/desc.h> 30#include <asm/desc.h>
30#include <asm/setup.h> 31#include <asm/setup.h>
31#include <asm/pgtable.h> 32#include <asm/pgtable.h>
@@ -37,6 +38,7 @@
37#include <asm/apic.h> 38#include <asm/apic.h>
38#include <asm/tlbflush.h> 39#include <asm/tlbflush.h>
39#include <asm/timer.h> 40#include <asm/timer.h>
41#include <asm/special_insns.h>
40 42
41/* nop stub */ 43/* nop stub */
42void _paravirt_nop(void) 44void _paravirt_nop(void)
@@ -202,8 +204,8 @@ static void native_flush_tlb_single(unsigned long addr)
202 __native_flush_tlb_single(addr); 204 __native_flush_tlb_single(addr);
203} 205}
204 206
205struct jump_label_key paravirt_steal_enabled; 207struct static_key paravirt_steal_enabled;
206struct jump_label_key paravirt_steal_rq_enabled; 208struct static_key paravirt_steal_rq_enabled;
207 209
208static u64 native_steal_clock(int cpu) 210static u64 native_steal_clock(int cpu)
209{ 211{
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 726494b58345..d0b2fb9ccbb1 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -42,7 +42,6 @@
42#include <asm/calgary.h> 42#include <asm/calgary.h>
43#include <asm/tce.h> 43#include <asm/tce.h>
44#include <asm/pci-direct.h> 44#include <asm/pci-direct.h>
45#include <asm/system.h>
46#include <asm/dma.h> 45#include <asm/dma.h>
47#include <asm/rio.h> 46#include <asm/rio.h>
48#include <asm/bios_ebda.h> 47#include <asm/bios_ebda.h>
@@ -431,7 +430,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
431} 430}
432 431
433static void* calgary_alloc_coherent(struct device *dev, size_t size, 432static void* calgary_alloc_coherent(struct device *dev, size_t size,
434 dma_addr_t *dma_handle, gfp_t flag) 433 dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs)
435{ 434{
436 void *ret = NULL; 435 void *ret = NULL;
437 dma_addr_t mapping; 436 dma_addr_t mapping;
@@ -464,7 +463,8 @@ error:
464} 463}
465 464
466static void calgary_free_coherent(struct device *dev, size_t size, 465static void calgary_free_coherent(struct device *dev, size_t size,
467 void *vaddr, dma_addr_t dma_handle) 466 void *vaddr, dma_addr_t dma_handle,
467 struct dma_attrs *attrs)
468{ 468{
469 unsigned int npages; 469 unsigned int npages;
470 struct iommu_table *tbl = find_iommu_table(dev); 470 struct iommu_table *tbl = find_iommu_table(dev);
@@ -477,8 +477,8 @@ static void calgary_free_coherent(struct device *dev, size_t size,
477} 477}
478 478
479static struct dma_map_ops calgary_dma_ops = { 479static struct dma_map_ops calgary_dma_ops = {
480 .alloc_coherent = calgary_alloc_coherent, 480 .alloc = calgary_alloc_coherent,
481 .free_coherent = calgary_free_coherent, 481 .free = calgary_free_coherent,
482 .map_sg = calgary_map_sg, 482 .map_sg = calgary_map_sg,
483 .unmap_sg = calgary_unmap_sg, 483 .unmap_sg = calgary_unmap_sg,
484 .map_page = calgary_map_page, 484 .map_page = calgary_map_page,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1c4d769e21ea..3003250ac51d 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -96,7 +96,8 @@ void __init pci_iommu_alloc(void)
96 } 96 }
97} 97}
98void *dma_generic_alloc_coherent(struct device *dev, size_t size, 98void *dma_generic_alloc_coherent(struct device *dev, size_t size,
99 dma_addr_t *dma_addr, gfp_t flag) 99 dma_addr_t *dma_addr, gfp_t flag,
100 struct dma_attrs *attrs)
100{ 101{
101 unsigned long dma_mask; 102 unsigned long dma_mask;
102 struct page *page; 103 struct page *page;
@@ -262,10 +263,11 @@ rootfs_initcall(pci_iommu_init);
262 263
263static __devinit void via_no_dac(struct pci_dev *dev) 264static __devinit void via_no_dac(struct pci_dev *dev)
264{ 265{
265 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { 266 if (forbid_dac == 0) {
266 dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); 267 dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
267 forbid_dac = 1; 268 forbid_dac = 1;
268 } 269 }
269} 270}
270DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); 271DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
272 PCI_CLASS_BRIDGE_PCI, 8, via_no_dac);
271#endif 273#endif
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 3af4af810c07..f96050685b46 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -75,7 +75,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
75} 75}
76 76
77static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, 77static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
78 dma_addr_t dma_addr) 78 dma_addr_t dma_addr, struct dma_attrs *attrs)
79{ 79{
80 free_pages((unsigned long)vaddr, get_order(size)); 80 free_pages((unsigned long)vaddr, get_order(size));
81} 81}
@@ -96,8 +96,8 @@ static void nommu_sync_sg_for_device(struct device *dev,
96} 96}
97 97
98struct dma_map_ops nommu_dma_ops = { 98struct dma_map_ops nommu_dma_ops = {
99 .alloc_coherent = dma_generic_alloc_coherent, 99 .alloc = dma_generic_alloc_coherent,
100 .free_coherent = nommu_free_coherent, 100 .free = nommu_free_coherent,
101 .map_sg = nommu_map_sg, 101 .map_sg = nommu_map_sg,
102 .map_page = nommu_map_page, 102 .map_page = nommu_map_page,
103 .sync_single_for_device = nommu_sync_single_for_device, 103 .sync_single_for_device = nommu_sync_single_for_device,
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 8f972cbddef0..6c483ba98b9c 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -15,21 +15,30 @@
15int swiotlb __read_mostly; 15int swiotlb __read_mostly;
16 16
17static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, 17static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
18 dma_addr_t *dma_handle, gfp_t flags) 18 dma_addr_t *dma_handle, gfp_t flags,
19 struct dma_attrs *attrs)
19{ 20{
20 void *vaddr; 21 void *vaddr;
21 22
22 vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags); 23 vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
24 attrs);
23 if (vaddr) 25 if (vaddr)
24 return vaddr; 26 return vaddr;
25 27
26 return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); 28 return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
27} 29}
28 30
31static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
32 void *vaddr, dma_addr_t dma_addr,
33 struct dma_attrs *attrs)
34{
35 swiotlb_free_coherent(dev, size, vaddr, dma_addr);
36}
37
29static struct dma_map_ops swiotlb_dma_ops = { 38static struct dma_map_ops swiotlb_dma_ops = {
30 .mapping_error = swiotlb_dma_mapping_error, 39 .mapping_error = swiotlb_dma_mapping_error,
31 .alloc_coherent = x86_swiotlb_alloc_coherent, 40 .alloc = x86_swiotlb_alloc_coherent,
32 .free_coherent = swiotlb_free_coherent, 41 .free = x86_swiotlb_free_coherent,
33 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 42 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
34 .sync_single_for_device = swiotlb_sync_single_for_device, 43 .sync_single_for_device = swiotlb_sync_single_for_device,
35 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, 44 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 34e06e84ce31..0bc72e2069e3 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -12,6 +12,7 @@
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/export.h> 13#include <linux/export.h>
14 14
15#include <asm/probe_roms.h>
15#include <asm/pci-direct.h> 16#include <asm/pci-direct.h>
16#include <asm/e820.h> 17#include <asm/e820.h>
17#include <asm/mmzone.h> 18#include <asm/mmzone.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 15763af7bfe3..1d92a5ab6e8b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -12,16 +12,37 @@
12#include <linux/user-return-notifier.h> 12#include <linux/user-return-notifier.h>
13#include <linux/dmi.h> 13#include <linux/dmi.h>
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/stackprotector.h>
16#include <linux/tick.h>
17#include <linux/cpuidle.h>
15#include <trace/events/power.h> 18#include <trace/events/power.h>
16#include <linux/hw_breakpoint.h> 19#include <linux/hw_breakpoint.h>
17#include <asm/cpu.h> 20#include <asm/cpu.h>
18#include <asm/system.h>
19#include <asm/apic.h> 21#include <asm/apic.h>
20#include <asm/syscalls.h> 22#include <asm/syscalls.h>
21#include <asm/idle.h> 23#include <asm/idle.h>
22#include <asm/uaccess.h> 24#include <asm/uaccess.h>
23#include <asm/i387.h> 25#include <asm/i387.h>
26#include <asm/fpu-internal.h>
24#include <asm/debugreg.h> 27#include <asm/debugreg.h>
28#include <asm/nmi.h>
29
30#ifdef CONFIG_X86_64
31static DEFINE_PER_CPU(unsigned char, is_idle);
32static ATOMIC_NOTIFIER_HEAD(idle_notifier);
33
34void idle_notifier_register(struct notifier_block *n)
35{
36 atomic_notifier_chain_register(&idle_notifier, n);
37}
38EXPORT_SYMBOL_GPL(idle_notifier_register);
39
40void idle_notifier_unregister(struct notifier_block *n)
41{
42 atomic_notifier_chain_unregister(&idle_notifier, n);
43}
44EXPORT_SYMBOL_GPL(idle_notifier_unregister);
45#endif
25 46
26struct kmem_cache *task_xstate_cachep; 47struct kmem_cache *task_xstate_cachep;
27EXPORT_SYMBOL_GPL(task_xstate_cachep); 48EXPORT_SYMBOL_GPL(task_xstate_cachep);
@@ -341,44 +362,113 @@ void (*pm_idle)(void);
341EXPORT_SYMBOL(pm_idle); 362EXPORT_SYMBOL(pm_idle);
342#endif 363#endif
343 364
344#ifdef CONFIG_X86_32 365static inline int hlt_use_halt(void)
345/*
346 * This halt magic was a workaround for ancient floppy DMA
347 * wreckage. It should be safe to remove.
348 */
349static int hlt_counter;
350void disable_hlt(void)
351{ 366{
352 hlt_counter++; 367 return 1;
353} 368}
354EXPORT_SYMBOL(disable_hlt);
355 369
356void enable_hlt(void) 370#ifndef CONFIG_SMP
371static inline void play_dead(void)
357{ 372{
358 hlt_counter--; 373 BUG();
359} 374}
360EXPORT_SYMBOL(enable_hlt); 375#endif
361 376
362static inline int hlt_use_halt(void) 377#ifdef CONFIG_X86_64
378void enter_idle(void)
363{ 379{
364 return (!hlt_counter && boot_cpu_data.hlt_works_ok); 380 percpu_write(is_idle, 1);
381 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
365} 382}
366#else 383
367static inline int hlt_use_halt(void) 384static void __exit_idle(void)
368{ 385{
369 return 1; 386 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
387 return;
388 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
389}
390
391/* Called from interrupts to signify idle end */
392void exit_idle(void)
393{
394 /* idle loop has pid 0 */
395 if (current->pid)
396 return;
397 __exit_idle();
370} 398}
371#endif 399#endif
372 400
373/* 401/*
402 * The idle thread. There's no useful work to be
403 * done, so just try to conserve power and have a
404 * low exit latency (ie sit in a loop waiting for
405 * somebody to say that they'd like to reschedule)
406 */
407void cpu_idle(void)
408{
409 /*
410 * If we're the non-boot CPU, nothing set the stack canary up
411 * for us. CPU0 already has it initialized but no harm in
412 * doing it again. This is a good place for updating it, as
413 * we wont ever return from this function (so the invalid
414 * canaries already on the stack wont ever trigger).
415 */
416 boot_init_stack_canary();
417 current_thread_info()->status |= TS_POLLING;
418
419 while (1) {
420 tick_nohz_idle_enter();
421
422 while (!need_resched()) {
423 rmb();
424
425 if (cpu_is_offline(smp_processor_id()))
426 play_dead();
427
428 /*
429 * Idle routines should keep interrupts disabled
430 * from here on, until they go to idle.
431 * Otherwise, idle callbacks can misfire.
432 */
433 local_touch_nmi();
434 local_irq_disable();
435
436 enter_idle();
437
438 /* Don't trace irqs off for idle */
439 stop_critical_timings();
440
441 /* enter_idle() needs rcu for notifiers */
442 rcu_idle_enter();
443
444 if (cpuidle_idle_call())
445 pm_idle();
446
447 rcu_idle_exit();
448 start_critical_timings();
449
450 /* In many cases the interrupt that ended idle
451 has already called exit_idle. But some idle
452 loops can be woken up without interrupt. */
453 __exit_idle();
454 }
455
456 tick_nohz_idle_exit();
457 preempt_enable_no_resched();
458 schedule();
459 preempt_disable();
460 }
461}
462
463/*
374 * We use this if we don't have any better 464 * We use this if we don't have any better
375 * idle routine.. 465 * idle routine..
376 */ 466 */
377void default_idle(void) 467void default_idle(void)
378{ 468{
379 if (hlt_use_halt()) { 469 if (hlt_use_halt()) {
380 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 470 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
381 trace_cpu_idle(1, smp_processor_id()); 471 trace_cpu_idle_rcuidle(1, smp_processor_id());
382 current_thread_info()->status &= ~TS_POLLING; 472 current_thread_info()->status &= ~TS_POLLING;
383 /* 473 /*
384 * TS_POLLING-cleared state must be visible before we 474 * TS_POLLING-cleared state must be visible before we
@@ -391,8 +481,8 @@ void default_idle(void)
391 else 481 else
392 local_irq_enable(); 482 local_irq_enable();
393 current_thread_info()->status |= TS_POLLING; 483 current_thread_info()->status |= TS_POLLING;
394 trace_power_end(smp_processor_id()); 484 trace_power_end_rcuidle(smp_processor_id());
395 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); 485 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
396 } else { 486 } else {
397 local_irq_enable(); 487 local_irq_enable();
398 /* loop is done by the caller */ 488 /* loop is done by the caller */
@@ -450,8 +540,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
450static void mwait_idle(void) 540static void mwait_idle(void)
451{ 541{
452 if (!need_resched()) { 542 if (!need_resched()) {
453 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 543 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
454 trace_cpu_idle(1, smp_processor_id()); 544 trace_cpu_idle_rcuidle(1, smp_processor_id());
455 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) 545 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
456 clflush((void *)&current_thread_info()->flags); 546 clflush((void *)&current_thread_info()->flags);
457 547
@@ -461,8 +551,8 @@ static void mwait_idle(void)
461 __sti_mwait(0, 0); 551 __sti_mwait(0, 0);
462 else 552 else
463 local_irq_enable(); 553 local_irq_enable();
464 trace_power_end(smp_processor_id()); 554 trace_power_end_rcuidle(smp_processor_id());
465 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); 555 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
466 } else 556 } else
467 local_irq_enable(); 557 local_irq_enable();
468} 558}
@@ -474,13 +564,13 @@ static void mwait_idle(void)
474 */ 564 */
475static void poll_idle(void) 565static void poll_idle(void)
476{ 566{
477 trace_power_start(POWER_CSTATE, 0, smp_processor_id()); 567 trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
478 trace_cpu_idle(0, smp_processor_id()); 568 trace_cpu_idle_rcuidle(0, smp_processor_id());
479 local_irq_enable(); 569 local_irq_enable();
480 while (!need_resched()) 570 while (!need_resched())
481 cpu_relax(); 571 cpu_relax();
482 trace_power_end(smp_processor_id()); 572 trace_power_end_rcuidle(smp_processor_id());
483 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); 573 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
484} 574}
485 575
486/* 576/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 485204f58cda..ae6847303e26 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -9,7 +9,6 @@
9 * This file handles the architecture-dependent parts of process handling.. 9 * This file handles the architecture-dependent parts of process handling..
10 */ 10 */
11 11
12#include <linux/stackprotector.h>
13#include <linux/cpu.h> 12#include <linux/cpu.h>
14#include <linux/errno.h> 13#include <linux/errno.h>
15#include <linux/sched.h> 14#include <linux/sched.h>
@@ -31,20 +30,18 @@
31#include <linux/kallsyms.h> 30#include <linux/kallsyms.h>
32#include <linux/ptrace.h> 31#include <linux/ptrace.h>
33#include <linux/personality.h> 32#include <linux/personality.h>
34#include <linux/tick.h>
35#include <linux/percpu.h> 33#include <linux/percpu.h>
36#include <linux/prctl.h> 34#include <linux/prctl.h>
37#include <linux/ftrace.h> 35#include <linux/ftrace.h>
38#include <linux/uaccess.h> 36#include <linux/uaccess.h>
39#include <linux/io.h> 37#include <linux/io.h>
40#include <linux/kdebug.h> 38#include <linux/kdebug.h>
41#include <linux/cpuidle.h>
42 39
43#include <asm/pgtable.h> 40#include <asm/pgtable.h>
44#include <asm/system.h>
45#include <asm/ldt.h> 41#include <asm/ldt.h>
46#include <asm/processor.h> 42#include <asm/processor.h>
47#include <asm/i387.h> 43#include <asm/i387.h>
44#include <asm/fpu-internal.h>
48#include <asm/desc.h> 45#include <asm/desc.h>
49#ifdef CONFIG_MATH_EMULATION 46#ifdef CONFIG_MATH_EMULATION
50#include <asm/math_emu.h> 47#include <asm/math_emu.h>
@@ -57,7 +54,7 @@
57#include <asm/idle.h> 54#include <asm/idle.h>
58#include <asm/syscalls.h> 55#include <asm/syscalls.h>
59#include <asm/debugreg.h> 56#include <asm/debugreg.h>
60#include <asm/nmi.h> 57#include <asm/switch_to.h>
61 58
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 59asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 60
@@ -69,62 +66,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
69 return ((unsigned long *)tsk->thread.sp)[3]; 66 return ((unsigned long *)tsk->thread.sp)[3];
70} 67}
71 68
72#ifndef CONFIG_SMP
73static inline void play_dead(void)
74{
75 BUG();
76}
77#endif
78
79/*
80 * The idle thread. There's no useful work to be
81 * done, so just try to conserve power and have a
82 * low exit latency (ie sit in a loop waiting for
83 * somebody to say that they'd like to reschedule)
84 */
85void cpu_idle(void)
86{
87 int cpu = smp_processor_id();
88
89 /*
90 * If we're the non-boot CPU, nothing set the stack canary up
91 * for us. CPU0 already has it initialized but no harm in
92 * doing it again. This is a good place for updating it, as
93 * we wont ever return from this function (so the invalid
94 * canaries already on the stack wont ever trigger).
95 */
96 boot_init_stack_canary();
97
98 current_thread_info()->status |= TS_POLLING;
99
100 /* endless idle loop with no priority at all */
101 while (1) {
102 tick_nohz_idle_enter();
103 rcu_idle_enter();
104 while (!need_resched()) {
105
106 check_pgt_cache();
107 rmb();
108
109 if (cpu_is_offline(cpu))
110 play_dead();
111
112 local_touch_nmi();
113 local_irq_disable();
114 /* Don't trace irqs off for idle */
115 stop_critical_timings();
116 if (cpuidle_idle_call())
117 pm_idle();
118 start_critical_timings();
119 }
120 rcu_idle_exit();
121 tick_nohz_idle_exit();
122 preempt_enable_no_resched();
123 schedule();
124 preempt_disable();
125 }
126}
127
128void __show_regs(struct pt_regs *regs, int all) 69void __show_regs(struct pt_regs *regs, int all)
129{ 70{
130 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 71 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -214,6 +155,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
214 155
215 task_user_gs(p) = get_user_gs(regs); 156 task_user_gs(p) = get_user_gs(regs);
216 157
158 p->fpu_counter = 0;
217 p->thread.io_bitmap_ptr = NULL; 159 p->thread.io_bitmap_ptr = NULL;
218 tsk = current; 160 tsk = current;
219 err = -ENOMEM; 161 err = -ENOMEM;
@@ -299,22 +241,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
299 *next = &next_p->thread; 241 *next = &next_p->thread;
300 int cpu = smp_processor_id(); 242 int cpu = smp_processor_id();
301 struct tss_struct *tss = &per_cpu(init_tss, cpu); 243 struct tss_struct *tss = &per_cpu(init_tss, cpu);
302 bool preload_fpu; 244 fpu_switch_t fpu;
303 245
304 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 246 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
305 247
306 /* 248 fpu = switch_fpu_prepare(prev_p, next_p, cpu);
307 * If the task has used fpu the last 5 timeslices, just do a full
308 * restore of the math state immediately to avoid the trap; the
309 * chances of needing FPU soon are obviously high now
310 */
311 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
312
313 __unlazy_fpu(prev_p);
314
315 /* we're going to use this soon, after a few expensive things */
316 if (preload_fpu)
317 prefetch(next->fpu.state);
318 249
319 /* 250 /*
320 * Reload esp0. 251 * Reload esp0.
@@ -354,11 +285,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
354 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) 285 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
355 __switch_to_xtra(prev_p, next_p, tss); 286 __switch_to_xtra(prev_p, next_p, tss);
356 287
357 /* If we're going to preload the fpu context, make sure clts
358 is run while we're batching the cpu state updates. */
359 if (preload_fpu)
360 clts();
361
362 /* 288 /*
363 * Leave lazy mode, flushing any hypercalls made here. 289 * Leave lazy mode, flushing any hypercalls made here.
364 * This must be done before restoring TLS segments so 290 * This must be done before restoring TLS segments so
@@ -368,15 +294,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
368 */ 294 */
369 arch_end_context_switch(next_p); 295 arch_end_context_switch(next_p);
370 296
371 if (preload_fpu)
372 __math_state_restore();
373
374 /* 297 /*
375 * Restore %gs if needed (which is common) 298 * Restore %gs if needed (which is common)
376 */ 299 */
377 if (prev->gs | next->gs) 300 if (prev->gs | next->gs)
378 lazy_load_gs(next->gs); 301 lazy_load_gs(next->gs);
379 302
303 switch_fpu_finish(next_p, fpu);
304
380 percpu_write(current_task, next_p); 305 percpu_write(current_task, next_p);
381 306
382 return prev_p; 307 return prev_p;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9b9fe4a85c87..733ca39f367e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -14,7 +14,6 @@
14 * This file handles the architecture-dependent parts of process handling.. 14 * This file handles the architecture-dependent parts of process handling..
15 */ 15 */
16 16
17#include <linux/stackprotector.h>
18#include <linux/cpu.h> 17#include <linux/cpu.h>
19#include <linux/errno.h> 18#include <linux/errno.h>
20#include <linux/sched.h> 19#include <linux/sched.h>
@@ -32,17 +31,15 @@
32#include <linux/notifier.h> 31#include <linux/notifier.h>
33#include <linux/kprobes.h> 32#include <linux/kprobes.h>
34#include <linux/kdebug.h> 33#include <linux/kdebug.h>
35#include <linux/tick.h>
36#include <linux/prctl.h> 34#include <linux/prctl.h>
37#include <linux/uaccess.h> 35#include <linux/uaccess.h>
38#include <linux/io.h> 36#include <linux/io.h>
39#include <linux/ftrace.h> 37#include <linux/ftrace.h>
40#include <linux/cpuidle.h>
41 38
42#include <asm/pgtable.h> 39#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/processor.h> 40#include <asm/processor.h>
45#include <asm/i387.h> 41#include <asm/i387.h>
42#include <asm/fpu-internal.h>
46#include <asm/mmu_context.h> 43#include <asm/mmu_context.h>
47#include <asm/prctl.h> 44#include <asm/prctl.h>
48#include <asm/desc.h> 45#include <asm/desc.h>
@@ -51,116 +48,11 @@
51#include <asm/idle.h> 48#include <asm/idle.h>
52#include <asm/syscalls.h> 49#include <asm/syscalls.h>
53#include <asm/debugreg.h> 50#include <asm/debugreg.h>
54#include <asm/nmi.h> 51#include <asm/switch_to.h>
55 52
56asmlinkage extern void ret_from_fork(void); 53asmlinkage extern void ret_from_fork(void);
57 54
58DEFINE_PER_CPU(unsigned long, old_rsp); 55DEFINE_PER_CPU(unsigned long, old_rsp);
59static DEFINE_PER_CPU(unsigned char, is_idle);
60
61static ATOMIC_NOTIFIER_HEAD(idle_notifier);
62
63void idle_notifier_register(struct notifier_block *n)
64{
65 atomic_notifier_chain_register(&idle_notifier, n);
66}
67EXPORT_SYMBOL_GPL(idle_notifier_register);
68
69void idle_notifier_unregister(struct notifier_block *n)
70{
71 atomic_notifier_chain_unregister(&idle_notifier, n);
72}
73EXPORT_SYMBOL_GPL(idle_notifier_unregister);
74
75void enter_idle(void)
76{
77 percpu_write(is_idle, 1);
78 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
79}
80
81static void __exit_idle(void)
82{
83 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
84 return;
85 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
86}
87
88/* Called from interrupts to signify idle end */
89void exit_idle(void)
90{
91 /* idle loop has pid 0 */
92 if (current->pid)
93 return;
94 __exit_idle();
95}
96
97#ifndef CONFIG_SMP
98static inline void play_dead(void)
99{
100 BUG();
101}
102#endif
103
104/*
105 * The idle thread. There's no useful work to be
106 * done, so just try to conserve power and have a
107 * low exit latency (ie sit in a loop waiting for
108 * somebody to say that they'd like to reschedule)
109 */
110void cpu_idle(void)
111{
112 current_thread_info()->status |= TS_POLLING;
113
114 /*
115 * If we're the non-boot CPU, nothing set the stack canary up
116 * for us. CPU0 already has it initialized but no harm in
117 * doing it again. This is a good place for updating it, as
118 * we wont ever return from this function (so the invalid
119 * canaries already on the stack wont ever trigger).
120 */
121 boot_init_stack_canary();
122
123 /* endless idle loop with no priority at all */
124 while (1) {
125 tick_nohz_idle_enter();
126 while (!need_resched()) {
127
128 rmb();
129
130 if (cpu_is_offline(smp_processor_id()))
131 play_dead();
132 /*
133 * Idle routines should keep interrupts disabled
134 * from here on, until they go to idle.
135 * Otherwise, idle callbacks can misfire.
136 */
137 local_touch_nmi();
138 local_irq_disable();
139 enter_idle();
140 /* Don't trace irqs off for idle */
141 stop_critical_timings();
142
143 /* enter_idle() needs rcu for notifiers */
144 rcu_idle_enter();
145
146 if (cpuidle_idle_call())
147 pm_idle();
148
149 rcu_idle_exit();
150 start_critical_timings();
151
152 /* In many cases the interrupt that ended idle
153 has already called exit_idle. But some idle
154 loops can be woken up without interrupt. */
155 __exit_idle();
156 }
157
158 tick_nohz_idle_exit();
159 preempt_enable_no_resched();
160 schedule();
161 preempt_disable();
162 }
163}
164 56
165/* Prints also some state that isn't saved in the pt_regs */ 57/* Prints also some state that isn't saved in the pt_regs */
166void __show_regs(struct pt_regs *regs, int all) 58void __show_regs(struct pt_regs *regs, int all)
@@ -286,6 +178,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
286 178
287 set_tsk_thread_flag(p, TIF_FORK); 179 set_tsk_thread_flag(p, TIF_FORK);
288 180
181 p->fpu_counter = 0;
289 p->thread.io_bitmap_ptr = NULL; 182 p->thread.io_bitmap_ptr = NULL;
290 183
291 savesegment(gs, p->thread.gsindex); 184 savesegment(gs, p->thread.gsindex);
@@ -341,6 +234,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
341 loadsegment(es, _ds); 234 loadsegment(es, _ds);
342 loadsegment(ds, _ds); 235 loadsegment(ds, _ds);
343 load_gs_index(0); 236 load_gs_index(0);
237 current->thread.usersp = new_sp;
344 regs->ip = new_ip; 238 regs->ip = new_ip;
345 regs->sp = new_sp; 239 regs->sp = new_sp;
346 percpu_write(old_rsp, new_sp); 240 percpu_write(old_rsp, new_sp);
@@ -364,7 +258,9 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
364void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) 258void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
365{ 259{
366 start_thread_common(regs, new_ip, new_sp, 260 start_thread_common(regs, new_ip, new_sp,
367 __USER32_CS, __USER32_DS, __USER32_DS); 261 test_thread_flag(TIF_X32)
262 ? __USER_CS : __USER32_CS,
263 __USER_DS, __USER_DS);
368} 264}
369#endif 265#endif
370 266
@@ -386,18 +282,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
386 int cpu = smp_processor_id(); 282 int cpu = smp_processor_id();
387 struct tss_struct *tss = &per_cpu(init_tss, cpu); 283 struct tss_struct *tss = &per_cpu(init_tss, cpu);
388 unsigned fsindex, gsindex; 284 unsigned fsindex, gsindex;
389 bool preload_fpu; 285 fpu_switch_t fpu;
390 286
391 /* 287 fpu = switch_fpu_prepare(prev_p, next_p, cpu);
392 * If the task has used fpu the last 5 timeslices, just do a full
393 * restore of the math state immediately to avoid the trap; the
394 * chances of needing FPU soon are obviously high now
395 */
396 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
397
398 /* we're going to use this soon, after a few expensive things */
399 if (preload_fpu)
400 prefetch(next->fpu.state);
401 288
402 /* 289 /*
403 * Reload esp0, LDT and the page table pointer: 290 * Reload esp0, LDT and the page table pointer:
@@ -427,13 +314,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
427 314
428 load_TLS(next, cpu); 315 load_TLS(next, cpu);
429 316
430 /* Must be after DS reload */
431 __unlazy_fpu(prev_p);
432
433 /* Make sure cpu is ready for new context */
434 if (preload_fpu)
435 clts();
436
437 /* 317 /*
438 * Leave lazy mode, flushing any hypercalls made here. 318 * Leave lazy mode, flushing any hypercalls made here.
439 * This must be done before restoring TLS segments so 319 * This must be done before restoring TLS segments so
@@ -474,6 +354,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
474 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 354 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
475 prev->gsindex = gsindex; 355 prev->gsindex = gsindex;
476 356
357 switch_fpu_finish(next_p, fpu);
358
477 /* 359 /*
478 * Switch the PDA and FPU contexts. 360 * Switch the PDA and FPU contexts.
479 */ 361 */
@@ -492,13 +374,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
492 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 374 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
493 __switch_to_xtra(prev_p, next_p, tss); 375 __switch_to_xtra(prev_p, next_p, tss);
494 376
495 /*
496 * Preload the FPU context, now that we've determined that the
497 * task is likely to be using it.
498 */
499 if (preload_fpu)
500 __math_state_restore();
501
502 return prev_p; 377 return prev_p;
503} 378}
504 379
@@ -508,6 +383,8 @@ void set_personality_64bit(void)
508 383
509 /* Make sure to be in 64bit mode */ 384 /* Make sure to be in 64bit mode */
510 clear_thread_flag(TIF_IA32); 385 clear_thread_flag(TIF_IA32);
386 clear_thread_flag(TIF_ADDR32);
387 clear_thread_flag(TIF_X32);
511 388
512 /* Ensure the corresponding mm is not marked. */ 389 /* Ensure the corresponding mm is not marked. */
513 if (current->mm) 390 if (current->mm)
@@ -520,20 +397,31 @@ void set_personality_64bit(void)
520 current->personality &= ~READ_IMPLIES_EXEC; 397 current->personality &= ~READ_IMPLIES_EXEC;
521} 398}
522 399
523void set_personality_ia32(void) 400void set_personality_ia32(bool x32)
524{ 401{
525 /* inherit personality from parent */ 402 /* inherit personality from parent */
526 403
527 /* Make sure to be in 32bit mode */ 404 /* Make sure to be in 32bit mode */
528 set_thread_flag(TIF_IA32); 405 set_thread_flag(TIF_ADDR32);
529 current->personality |= force_personality32;
530 406
531 /* Mark the associated mm as containing 32-bit tasks. */ 407 /* Mark the associated mm as containing 32-bit tasks. */
532 if (current->mm) 408 if (current->mm)
533 current->mm->context.ia32_compat = 1; 409 current->mm->context.ia32_compat = 1;
534 410
535 /* Prepare the first "return" to user space */ 411 if (x32) {
536 current_thread_info()->status |= TS_COMPAT; 412 clear_thread_flag(TIF_IA32);
413 set_thread_flag(TIF_X32);
414 current->personality &= ~READ_IMPLIES_EXEC;
415 /* is_compat_task() uses the presence of the x32
416 syscall bit flag to determine compat status */
417 current_thread_info()->status &= ~TS_COMPAT;
418 } else {
419 set_thread_flag(TIF_IA32);
420 clear_thread_flag(TIF_X32);
421 current->personality |= force_personality32;
422 /* Prepare the first "return" to user space */
423 current_thread_info()->status |= TS_COMPAT;
424 }
537} 425}
538 426
539unsigned long get_wchan(struct task_struct *p) 427unsigned long get_wchan(struct task_struct *p)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 89a04c7b5bb6..685845cf16e0 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -24,15 +24,16 @@
24 24
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/pgtable.h> 26#include <asm/pgtable.h>
27#include <asm/system.h>
28#include <asm/processor.h> 27#include <asm/processor.h>
29#include <asm/i387.h> 28#include <asm/i387.h>
29#include <asm/fpu-internal.h>
30#include <asm/debugreg.h> 30#include <asm/debugreg.h>
31#include <asm/ldt.h> 31#include <asm/ldt.h>
32#include <asm/desc.h> 32#include <asm/desc.h>
33#include <asm/prctl.h> 33#include <asm/prctl.h>
34#include <asm/proto.h> 34#include <asm/proto.h>
35#include <asm/hw_breakpoint.h> 35#include <asm/hw_breakpoint.h>
36#include <asm/traps.h>
36 37
37#include "tls.h" 38#include "tls.h"
38 39
@@ -1130,6 +1131,100 @@ static int genregs32_set(struct task_struct *target,
1130 return ret; 1131 return ret;
1131} 1132}
1132 1133
1134#ifdef CONFIG_X86_X32_ABI
1135static long x32_arch_ptrace(struct task_struct *child,
1136 compat_long_t request, compat_ulong_t caddr,
1137 compat_ulong_t cdata)
1138{
1139 unsigned long addr = caddr;
1140 unsigned long data = cdata;
1141 void __user *datap = compat_ptr(data);
1142 int ret;
1143
1144 switch (request) {
1145 /* Read 32bits at location addr in the USER area. Only allow
1146 to return the lower 32bits of segment and debug registers. */
1147 case PTRACE_PEEKUSR: {
1148 u32 tmp;
1149
1150 ret = -EIO;
1151 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) ||
1152 addr < offsetof(struct user_regs_struct, cs))
1153 break;
1154
1155 tmp = 0; /* Default return condition */
1156 if (addr < sizeof(struct user_regs_struct))
1157 tmp = getreg(child, addr);
1158 else if (addr >= offsetof(struct user, u_debugreg[0]) &&
1159 addr <= offsetof(struct user, u_debugreg[7])) {
1160 addr -= offsetof(struct user, u_debugreg[0]);
1161 tmp = ptrace_get_debugreg(child, addr / sizeof(data));
1162 }
1163 ret = put_user(tmp, (__u32 __user *)datap);
1164 break;
1165 }
1166
1167 /* Write the word at location addr in the USER area. Only allow
1168 to update segment and debug registers with the upper 32bits
1169 zero-extended. */
1170 case PTRACE_POKEUSR:
1171 ret = -EIO;
1172 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) ||
1173 addr < offsetof(struct user_regs_struct, cs))
1174 break;
1175
1176 if (addr < sizeof(struct user_regs_struct))
1177 ret = putreg(child, addr, data);
1178 else if (addr >= offsetof(struct user, u_debugreg[0]) &&
1179 addr <= offsetof(struct user, u_debugreg[7])) {
1180 addr -= offsetof(struct user, u_debugreg[0]);
1181 ret = ptrace_set_debugreg(child,
1182 addr / sizeof(data), data);
1183 }
1184 break;
1185
1186 case PTRACE_GETREGS: /* Get all gp regs from the child. */
1187 return copy_regset_to_user(child,
1188 task_user_regset_view(current),
1189 REGSET_GENERAL,
1190 0, sizeof(struct user_regs_struct),
1191 datap);
1192
1193 case PTRACE_SETREGS: /* Set all gp regs in the child. */
1194 return copy_regset_from_user(child,
1195 task_user_regset_view(current),
1196 REGSET_GENERAL,
1197 0, sizeof(struct user_regs_struct),
1198 datap);
1199
1200 case PTRACE_GETFPREGS: /* Get the child FPU state. */
1201 return copy_regset_to_user(child,
1202 task_user_regset_view(current),
1203 REGSET_FP,
1204 0, sizeof(struct user_i387_struct),
1205 datap);
1206
1207 case PTRACE_SETFPREGS: /* Set the child FPU state. */
1208 return copy_regset_from_user(child,
1209 task_user_regset_view(current),
1210 REGSET_FP,
1211 0, sizeof(struct user_i387_struct),
1212 datap);
1213
1214 /* normal 64bit interface to access TLS data.
1215 Works just like arch_prctl, except that the arguments
1216 are reversed. */
1217 case PTRACE_ARCH_PRCTL:
1218 return do_arch_prctl(child, data, addr);
1219
1220 default:
1221 return compat_ptrace_request(child, request, addr, data);
1222 }
1223
1224 return ret;
1225}
1226#endif
1227
1133long compat_arch_ptrace(struct task_struct *child, compat_long_t request, 1228long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1134 compat_ulong_t caddr, compat_ulong_t cdata) 1229 compat_ulong_t caddr, compat_ulong_t cdata)
1135{ 1230{
@@ -1139,6 +1234,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1139 int ret; 1234 int ret;
1140 __u32 val; 1235 __u32 val;
1141 1236
1237#ifdef CONFIG_X86_X32_ABI
1238 if (!is_ia32_task())
1239 return x32_arch_ptrace(child, request, caddr, cdata);
1240#endif
1241
1142 switch (request) { 1242 switch (request) {
1143 case PTRACE_PEEKUSR: 1243 case PTRACE_PEEKUSR:
1144 ret = getreg32(child, addr, &val); 1244 ret = getreg32(child, addr, &val);
@@ -1326,7 +1426,7 @@ static void fill_sigtrap_info(struct task_struct *tsk,
1326 int error_code, int si_code, 1426 int error_code, int si_code,
1327 struct siginfo *info) 1427 struct siginfo *info)
1328{ 1428{
1329 tsk->thread.trap_no = 1; 1429 tsk->thread.trap_nr = X86_TRAP_DB;
1330 tsk->thread.error_code = error_code; 1430 tsk->thread.error_code = error_code;
1331 1431
1332 memset(info, 0, sizeof(*info)); 1432 memset(info, 0, sizeof(*info));
@@ -1392,20 +1492,18 @@ long syscall_trace_enter(struct pt_regs *regs)
1392 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 1492 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1393 trace_sys_enter(regs, regs->orig_ax); 1493 trace_sys_enter(regs, regs->orig_ax);
1394 1494
1395 if (unlikely(current->audit_context)) { 1495 if (IS_IA32)
1396 if (IS_IA32) 1496 audit_syscall_entry(AUDIT_ARCH_I386,
1397 audit_syscall_entry(AUDIT_ARCH_I386, 1497 regs->orig_ax,
1398 regs->orig_ax, 1498 regs->bx, regs->cx,
1399 regs->bx, regs->cx, 1499 regs->dx, regs->si);
1400 regs->dx, regs->si);
1401#ifdef CONFIG_X86_64 1500#ifdef CONFIG_X86_64
1402 else 1501 else
1403 audit_syscall_entry(AUDIT_ARCH_X86_64, 1502 audit_syscall_entry(AUDIT_ARCH_X86_64,
1404 regs->orig_ax, 1503 regs->orig_ax,
1405 regs->di, regs->si, 1504 regs->di, regs->si,
1406 regs->dx, regs->r10); 1505 regs->dx, regs->r10);
1407#endif 1506#endif
1408 }
1409 1507
1410 return ret ?: regs->orig_ax; 1508 return ret ?: regs->orig_ax;
1411} 1509}
@@ -1414,8 +1512,7 @@ void syscall_trace_leave(struct pt_regs *regs)
1414{ 1512{
1415 bool step; 1513 bool step;
1416 1514
1417 if (unlikely(current->audit_context)) 1515 audit_syscall_exit(regs);
1418 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
1419 1516
1420 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 1517 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1421 trace_sys_exit(regs, regs->ax); 1518 trace_sys_exit(regs, regs->ax);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 37a458b521a6..d840e69a853c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -39,6 +39,14 @@ static int reboot_mode;
39enum reboot_type reboot_type = BOOT_ACPI; 39enum reboot_type reboot_type = BOOT_ACPI;
40int reboot_force; 40int reboot_force;
41 41
42/* This variable is used privately to keep track of whether or not
43 * reboot_type is still set to its default value (i.e., reboot= hasn't
44 * been set on the command line). This is needed so that we can
45 * suppress DMI scanning for reboot quirks. Without it, it's
46 * impossible to override a faulty reboot quirk without recompiling.
47 */
48static int reboot_default = 1;
49
42#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) 50#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
43static int reboot_cpu = -1; 51static int reboot_cpu = -1;
44#endif 52#endif
@@ -67,6 +75,12 @@ bool port_cf9_safe = false;
67static int __init reboot_setup(char *str) 75static int __init reboot_setup(char *str)
68{ 76{
69 for (;;) { 77 for (;;) {
78 /* Having anything passed on the command line via
79 * reboot= will cause us to disable DMI checking
80 * below.
81 */
82 reboot_default = 0;
83
70 switch (*str) { 84 switch (*str) {
71 case 'w': 85 case 'w':
72 reboot_mode = 0x1234; 86 reboot_mode = 0x1234;
@@ -295,14 +309,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
295 DMI_MATCH(DMI_BOARD_NAME, "P4S800"), 309 DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
296 }, 310 },
297 }, 311 },
298 { /* Handle problems with rebooting on VersaLogic Menlow boards */
299 .callback = set_bios_reboot,
300 .ident = "VersaLogic Menlow based board",
301 .matches = {
302 DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"),
303 DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"),
304 },
305 },
306 { /* Handle reboot issue on Acer Aspire one */ 312 { /* Handle reboot issue on Acer Aspire one */
307 .callback = set_kbd_reboot, 313 .callback = set_kbd_reboot,
308 .ident = "Acer Aspire One A110", 314 .ident = "Acer Aspire One A110",
@@ -316,7 +322,12 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
316 322
317static int __init reboot_init(void) 323static int __init reboot_init(void)
318{ 324{
319 dmi_check_system(reboot_dmi_table); 325 /* Only do the DMI check if reboot_type hasn't been overridden
326 * on the command line
327 */
328 if (reboot_default) {
329 dmi_check_system(reboot_dmi_table);
330 }
320 return 0; 331 return 0;
321} 332}
322core_initcall(reboot_init); 333core_initcall(reboot_init);
@@ -465,7 +476,12 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
465 476
466static int __init pci_reboot_init(void) 477static int __init pci_reboot_init(void)
467{ 478{
468 dmi_check_system(pci_reboot_dmi_table); 479 /* Only do the DMI check if reboot_type hasn't been overridden
480 * on the command line
481 */
482 if (reboot_default) {
483 dmi_check_system(pci_reboot_dmi_table);
484 }
469 return 0; 485 return 0;
470} 486}
471core_initcall(pci_reboot_init); 487core_initcall(pci_reboot_init);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d05444ac2aea..1a2901562059 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -90,7 +90,6 @@
90#include <asm/processor.h> 90#include <asm/processor.h>
91#include <asm/bugs.h> 91#include <asm/bugs.h>
92 92
93#include <asm/system.h>
94#include <asm/vsyscall.h> 93#include <asm/vsyscall.h>
95#include <asm/cpu.h> 94#include <asm/cpu.h>
96#include <asm/desc.h> 95#include <asm/desc.h>
@@ -509,15 +508,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)
509 508
510#ifdef CONFIG_KEXEC 509#ifdef CONFIG_KEXEC
511 510
512static inline unsigned long long get_total_mem(void)
513{
514 unsigned long long total;
515
516 total = max_pfn - min_low_pfn;
517
518 return total << PAGE_SHIFT;
519}
520
521/* 511/*
522 * Keep the crash kernel below this limit. On 32 bits earlier kernels 512 * Keep the crash kernel below this limit. On 32 bits earlier kernels
523 * would limit the kernel to the low 512 MiB due to mapping restrictions. 513 * would limit the kernel to the low 512 MiB due to mapping restrictions.
@@ -536,7 +526,7 @@ static void __init reserve_crashkernel(void)
536 unsigned long long crash_size, crash_base; 526 unsigned long long crash_size, crash_base;
537 int ret; 527 int ret;
538 528
539 total_mem = get_total_mem(); 529 total_mem = memblock_phys_mem_size();
540 530
541 ret = parse_crashkernel(boot_command_line, total_mem, 531 ret = parse_crashkernel(boot_command_line, total_mem,
542 &crash_size, &crash_base); 532 &crash_size, &crash_base);
@@ -749,15 +739,16 @@ void __init setup_arch(char **cmdline_p)
749#endif 739#endif
750#ifdef CONFIG_EFI 740#ifdef CONFIG_EFI
751 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 741 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
752#ifdef CONFIG_X86_32 742 "EL32", 4)) {
753 "EL32", 743 efi_enabled = 1;
754#else 744 efi_64bit = false;
755 "EL64", 745 } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
756#endif 746 "EL64", 4)) {
757 4)) {
758 efi_enabled = 1; 747 efi_enabled = 1;
759 efi_memblock_x86_reserve_range(); 748 efi_64bit = true;
760 } 749 }
750 if (efi_enabled && efi_memblock_x86_reserve_range())
751 efi_enabled = 0;
761#endif 752#endif
762 753
763 x86_init.oem.arch_setup(); 754 x86_init.oem.arch_setup();
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 46a01bdc27e2..115eac431483 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -10,10 +10,8 @@
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/smp.h> 11#include <linux/smp.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/signal.h>
14#include <linux/errno.h> 13#include <linux/errno.h>
15#include <linux/wait.h> 14#include <linux/wait.h>
16#include <linux/ptrace.h>
17#include <linux/tracehook.h> 15#include <linux/tracehook.h>
18#include <linux/unistd.h> 16#include <linux/unistd.h>
19#include <linux/stddef.h> 17#include <linux/stddef.h>
@@ -24,12 +22,15 @@
24#include <asm/processor.h> 22#include <asm/processor.h>
25#include <asm/ucontext.h> 23#include <asm/ucontext.h>
26#include <asm/i387.h> 24#include <asm/i387.h>
25#include <asm/fpu-internal.h>
27#include <asm/vdso.h> 26#include <asm/vdso.h>
28#include <asm/mce.h> 27#include <asm/mce.h>
28#include <asm/sighandling.h>
29 29
30#ifdef CONFIG_X86_64 30#ifdef CONFIG_X86_64
31#include <asm/proto.h> 31#include <asm/proto.h>
32#include <asm/ia32_unistd.h> 32#include <asm/ia32_unistd.h>
33#include <asm/sys_ia32.h>
33#endif /* CONFIG_X86_64 */ 34#endif /* CONFIG_X86_64 */
34 35
35#include <asm/syscall.h> 36#include <asm/syscall.h>
@@ -37,13 +38,6 @@
37 38
38#include <asm/sigframe.h> 39#include <asm/sigframe.h>
39 40
40#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
41
42#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
43 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
44 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
45 X86_EFLAGS_CF)
46
47#ifdef CONFIG_X86_32 41#ifdef CONFIG_X86_32
48# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) 42# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF)
49#else 43#else
@@ -68,9 +62,8 @@
68 regs->seg = GET_SEG(seg) | 3; \ 62 regs->seg = GET_SEG(seg) | 3; \
69} while (0) 63} while (0)
70 64
71static int 65int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
72restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, 66 unsigned long *pax)
73 unsigned long *pax)
74{ 67{
75 void __user *buf; 68 void __user *buf;
76 unsigned int tmpflags; 69 unsigned int tmpflags;
@@ -125,9 +118,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
125 return err; 118 return err;
126} 119}
127 120
128static int 121int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
129setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, 122 struct pt_regs *regs, unsigned long mask)
130 struct pt_regs *regs, unsigned long mask)
131{ 123{
132 int err = 0; 124 int err = 0;
133 125
@@ -159,7 +151,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
159 put_user_ex(regs->r15, &sc->r15); 151 put_user_ex(regs->r15, &sc->r15);
160#endif /* CONFIG_X86_64 */ 152#endif /* CONFIG_X86_64 */
161 153
162 put_user_ex(current->thread.trap_no, &sc->trapno); 154 put_user_ex(current->thread.trap_nr, &sc->trapno);
163 put_user_ex(current->thread.error_code, &sc->err); 155 put_user_ex(current->thread.error_code, &sc->err);
164 put_user_ex(regs->ip, &sc->ip); 156 put_user_ex(regs->ip, &sc->ip);
165#ifdef CONFIG_X86_32 157#ifdef CONFIG_X86_32
@@ -642,6 +634,16 @@ static int signr_convert(int sig)
642#define is_ia32 0 634#define is_ia32 0
643#endif /* CONFIG_IA32_EMULATION */ 635#endif /* CONFIG_IA32_EMULATION */
644 636
637#ifdef CONFIG_X86_X32_ABI
638#define is_x32 test_thread_flag(TIF_X32)
639
640static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
641 siginfo_t *info, compat_sigset_t *set,
642 struct pt_regs *regs);
643#else /* !CONFIG_X86_X32_ABI */
644#define is_x32 0
645#endif /* CONFIG_X86_X32_ABI */
646
645int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 647int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
646 sigset_t *set, struct pt_regs *regs); 648 sigset_t *set, struct pt_regs *regs);
647int ia32_setup_frame(int sig, struct k_sigaction *ka, 649int ia32_setup_frame(int sig, struct k_sigaction *ka,
@@ -666,8 +668,14 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
666 ret = ia32_setup_rt_frame(usig, ka, info, set, regs); 668 ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
667 else 669 else
668 ret = ia32_setup_frame(usig, ka, set, regs); 670 ret = ia32_setup_frame(usig, ka, set, regs);
669 } else 671#ifdef CONFIG_X86_X32_ABI
672 } else if (is_x32) {
673 ret = x32_setup_rt_frame(usig, ka, info,
674 (compat_sigset_t *)set, regs);
675#endif
676 } else {
670 ret = __setup_rt_frame(sig, ka, info, set, regs); 677 ret = __setup_rt_frame(sig, ka, info, set, regs);
678 }
671 679
672 if (ret) { 680 if (ret) {
673 force_sigsegv(sig, current); 681 force_sigsegv(sig, current);
@@ -850,3 +858,102 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
850 858
851 force_sig(SIGSEGV, me); 859 force_sig(SIGSEGV, me);
852} 860}
861
862#ifdef CONFIG_X86_X32_ABI
863static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
864 siginfo_t *info, compat_sigset_t *set,
865 struct pt_regs *regs)
866{
867 struct rt_sigframe_x32 __user *frame;
868 void __user *restorer;
869 int err = 0;
870 void __user *fpstate = NULL;
871
872 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
873
874 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
875 return -EFAULT;
876
877 if (ka->sa.sa_flags & SA_SIGINFO) {
878 if (copy_siginfo_to_user32(&frame->info, info))
879 return -EFAULT;
880 }
881
882 put_user_try {
883 /* Create the ucontext. */
884 if (cpu_has_xsave)
885 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
886 else
887 put_user_ex(0, &frame->uc.uc_flags);
888 put_user_ex(0, &frame->uc.uc_link);
889 put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
890 put_user_ex(sas_ss_flags(regs->sp),
891 &frame->uc.uc_stack.ss_flags);
892 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
893 put_user_ex(0, &frame->uc.uc__pad0);
894 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
895 regs, set->sig[0]);
896 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
897
898 if (ka->sa.sa_flags & SA_RESTORER) {
899 restorer = ka->sa.sa_restorer;
900 } else {
901 /* could use a vstub here */
902 restorer = NULL;
903 err |= -EFAULT;
904 }
905 put_user_ex(restorer, &frame->pretcode);
906 } put_user_catch(err);
907
908 if (err)
909 return -EFAULT;
910
911 /* Set up registers for signal handler */
912 regs->sp = (unsigned long) frame;
913 regs->ip = (unsigned long) ka->sa.sa_handler;
914
915 /* We use the x32 calling convention here... */
916 regs->di = sig;
917 regs->si = (unsigned long) &frame->info;
918 regs->dx = (unsigned long) &frame->uc;
919
920 loadsegment(ds, __USER_DS);
921 loadsegment(es, __USER_DS);
922
923 regs->cs = __USER_CS;
924 regs->ss = __USER_DS;
925
926 return 0;
927}
928
929asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
930{
931 struct rt_sigframe_x32 __user *frame;
932 sigset_t set;
933 unsigned long ax;
934 struct pt_regs tregs;
935
936 frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
937
938 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
939 goto badframe;
940 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
941 goto badframe;
942
943 sigdelsetmask(&set, ~_BLOCKABLE);
944 set_current_blocked(&set);
945
946 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
947 goto badframe;
948
949 tregs = *regs;
950 if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
951 goto badframe;
952
953 return ax;
954
955badframe:
956 signal_fault(regs, frame, "x32 rt_sigreturn");
957 return 0;
958}
959#endif
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 16204dc15484..66c74f481cab 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -29,6 +29,7 @@
29#include <asm/mmu_context.h> 29#include <asm/mmu_context.h>
30#include <asm/proto.h> 30#include <asm/proto.h>
31#include <asm/apic.h> 31#include <asm/apic.h>
32#include <asm/nmi.h>
32/* 33/*
33 * Some notes on x86 processor bugs affecting SMP operation: 34 * Some notes on x86 processor bugs affecting SMP operation:
34 * 35 *
@@ -148,6 +149,60 @@ void native_send_call_func_ipi(const struct cpumask *mask)
148 free_cpumask_var(allbutself); 149 free_cpumask_var(allbutself);
149} 150}
150 151
152static atomic_t stopping_cpu = ATOMIC_INIT(-1);
153
154static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
155{
156 /* We are registered on stopping cpu too, avoid spurious NMI */
157 if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
158 return NMI_HANDLED;
159
160 stop_this_cpu(NULL);
161
162 return NMI_HANDLED;
163}
164
165static void native_nmi_stop_other_cpus(int wait)
166{
167 unsigned long flags;
168 unsigned long timeout;
169
170 if (reboot_force)
171 return;
172
173 /*
174 * Use an own vector here because smp_call_function
175 * does lots of things not suitable in a panic situation.
176 */
177 if (num_online_cpus() > 1) {
178 /* did someone beat us here? */
179 if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
180 return;
181
182 if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
183 NMI_FLAG_FIRST, "smp_stop"))
184 /* Note: we ignore failures here */
185 return;
186
187 /* sync above data before sending NMI */
188 wmb();
189
190 apic->send_IPI_allbutself(NMI_VECTOR);
191
192 /*
193 * Don't wait longer than a second if the caller
194 * didn't ask us to wait.
195 */
196 timeout = USEC_PER_SEC;
197 while (num_online_cpus() > 1 && (wait || timeout--))
198 udelay(1);
199 }
200
201 local_irq_save(flags);
202 disable_local_APIC();
203 local_irq_restore(flags);
204}
205
151/* 206/*
152 * this function calls the 'stop' function on all other CPUs in the system. 207 * this function calls the 'stop' function on all other CPUs in the system.
153 */ 208 */
@@ -160,7 +215,7 @@ asmlinkage void smp_reboot_interrupt(void)
160 irq_exit(); 215 irq_exit();
161} 216}
162 217
163static void native_stop_other_cpus(int wait) 218static void native_irq_stop_other_cpus(int wait)
164{ 219{
165 unsigned long flags; 220 unsigned long flags;
166 unsigned long timeout; 221 unsigned long timeout;
@@ -194,6 +249,11 @@ static void native_stop_other_cpus(int wait)
194 local_irq_restore(flags); 249 local_irq_restore(flags);
195} 250}
196 251
252static void native_smp_disable_nmi_ipi(void)
253{
254 smp_ops.stop_other_cpus = native_irq_stop_other_cpus;
255}
256
197/* 257/*
198 * Reschedule call back. 258 * Reschedule call back.
199 */ 259 */
@@ -225,12 +285,20 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
225 irq_exit(); 285 irq_exit();
226} 286}
227 287
288static int __init nonmi_ipi_setup(char *str)
289{
290 native_smp_disable_nmi_ipi();
291 return 1;
292}
293
294__setup("nonmi_ipi", nonmi_ipi_setup);
295
228struct smp_ops smp_ops = { 296struct smp_ops smp_ops = {
229 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, 297 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
230 .smp_prepare_cpus = native_smp_prepare_cpus, 298 .smp_prepare_cpus = native_smp_prepare_cpus,
231 .smp_cpus_done = native_smp_cpus_done, 299 .smp_cpus_done = native_smp_cpus_done,
232 300
233 .stop_other_cpus = native_stop_other_cpus, 301 .stop_other_cpus = native_nmi_stop_other_cpus,
234 .smp_send_reschedule = native_smp_send_reschedule, 302 .smp_send_reschedule = native_smp_send_reschedule,
235 303
236 .cpu_up = native_cpu_up, 304 .cpu_up = native_cpu_up,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e38e21754eea..6e1e406038c2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -50,6 +50,7 @@
50#include <linux/tboot.h> 50#include <linux/tboot.h>
51#include <linux/stackprotector.h> 51#include <linux/stackprotector.h>
52#include <linux/gfp.h> 52#include <linux/gfp.h>
53#include <linux/cpuidle.h>
53 54
54#include <asm/acpi.h> 55#include <asm/acpi.h>
55#include <asm/desc.h> 56#include <asm/desc.h>
@@ -207,21 +208,22 @@ static void __cpuinit smp_callin(void)
207 * Need to setup vector mappings before we enable interrupts. 208 * Need to setup vector mappings before we enable interrupts.
208 */ 209 */
209 setup_vector_irq(smp_processor_id()); 210 setup_vector_irq(smp_processor_id());
211
210 /* 212 /*
211 * Get our bogomips. 213 * Save our processor parameters. Note: this information
212 * 214 * is needed for clock calibration.
213 * Need to enable IRQs because it can take longer and then
214 * the NMI watchdog might kill us.
215 */ 215 */
216 local_irq_enable(); 216 smp_store_cpu_info(cpuid);
217 calibrate_delay();
218 local_irq_disable();
219 pr_debug("Stack at about %p\n", &cpuid);
220 217
221 /* 218 /*
222 * Save our processor parameters 219 * Get our bogomips.
220 * Update loops_per_jiffy in cpu_data. Previous call to
221 * smp_store_cpu_info() stored a value that is close but not as
222 * accurate as the value just calculated.
223 */ 223 */
224 smp_store_cpu_info(cpuid); 224 calibrate_delay();
225 cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
226 pr_debug("Stack at about %p\n", &cpuid);
225 227
226 /* 228 /*
227 * This must be done before setting cpu_online_mask 229 * This must be done before setting cpu_online_mask
@@ -249,6 +251,7 @@ notrace static void __cpuinit start_secondary(void *unused)
249 * most necessary things. 251 * most necessary things.
250 */ 252 */
251 cpu_init(); 253 cpu_init();
254 x86_cpuinit.early_percpu_clock_init();
252 preempt_disable(); 255 preempt_disable();
253 smp_callin(); 256 smp_callin();
254 257
@@ -285,19 +288,6 @@ notrace static void __cpuinit start_secondary(void *unused)
285 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 288 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
286 x86_platform.nmi_init(); 289 x86_platform.nmi_init();
287 290
288 /*
289 * Wait until the cpu which brought this one up marked it
290 * online before enabling interrupts. If we don't do that then
291 * we can end up waking up the softirq thread before this cpu
292 * reached the active state, which makes the scheduler unhappy
293 * and schedule the softirq thread on the wrong cpu. This is
294 * only observable with forced threaded interrupts, but in
295 * theory it could also happen w/o them. It's just way harder
296 * to achieve.
297 */
298 while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
299 cpu_relax();
300
301 /* enable local interrupts */ 291 /* enable local interrupts */
302 local_irq_enable(); 292 local_irq_enable();
303 293
@@ -734,8 +724,6 @@ do_rest:
734 * the targeted processor. 724 * the targeted processor.
735 */ 725 */
736 726
737 printk(KERN_DEBUG "smpboot cpu %d: start_ip = %lx\n", cpu, start_ip);
738
739 atomic_set(&init_deasserted, 0); 727 atomic_set(&init_deasserted, 0);
740 728
741 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { 729 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
@@ -785,9 +773,10 @@ do_rest:
785 schedule(); 773 schedule();
786 } 774 }
787 775
788 if (cpumask_test_cpu(cpu, cpu_callin_mask)) 776 if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
777 print_cpu_msr(&cpu_data(cpu));
789 pr_debug("CPU%d: has booted.\n", cpu); 778 pr_debug("CPU%d: has booted.\n", cpu);
790 else { 779 } else {
791 boot_error = 1; 780 boot_error = 1;
792 if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) 781 if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status)
793 == 0xA5A5A5A5) 782 == 0xA5A5A5A5)
@@ -841,7 +830,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
841 830
842 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || 831 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
843 !physid_isset(apicid, phys_cpu_present_map) || 832 !physid_isset(apicid, phys_cpu_present_map) ||
844 (!x2apic_mode && apicid >= 255)) { 833 !apic->apic_id_valid(apicid)) {
845 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); 834 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
846 return -EINVAL; 835 return -EINVAL;
847 } 836 }
@@ -1143,6 +1132,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1143{ 1132{
1144 pr_debug("Boot done.\n"); 1133 pr_debug("Boot done.\n");
1145 1134
1135 nmi_selftest();
1146 impress_friends(); 1136 impress_friends();
1147#ifdef CONFIG_X86_IO_APIC 1137#ifdef CONFIG_X86_IO_APIC
1148 setup_ioapic_dest(); 1138 setup_ioapic_dest();
@@ -1415,7 +1405,8 @@ void native_play_dead(void)
1415 tboot_shutdown(TB_SHUTDOWN_WFS); 1405 tboot_shutdown(TB_SHUTDOWN_WFS);
1416 1406
1417 mwait_play_dead(); /* Only returns on failure */ 1407 mwait_play_dead(); /* Only returns on failure */
1418 hlt_play_dead(); 1408 if (cpuidle_play_dead())
1409 hlt_play_dead();
1419} 1410}
1420 1411
1421#else /* ... !CONFIG_HOTPLUG_CPU */ 1412#else /* ... !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 051489082d59..b4d3c3927dd8 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -98,7 +98,7 @@ out:
98static void find_start_end(unsigned long flags, unsigned long *begin, 98static void find_start_end(unsigned long flags, unsigned long *begin,
99 unsigned long *end) 99 unsigned long *end)
100{ 100{
101 if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { 101 if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) {
102 unsigned long new_begin; 102 unsigned long new_begin;
103 /* This is usually used needed to map code in small 103 /* This is usually used needed to map code in small
104 model, so it needs to be in the first 31bit. Limit 104 model, so it needs to be in the first 31bit. Limit
@@ -144,7 +144,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
144 (!vma || addr + len <= vma->vm_start)) 144 (!vma || addr + len <= vma->vm_start))
145 return addr; 145 return addr;
146 } 146 }
147 if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) 147 if (((flags & MAP_32BIT) || test_thread_flag(TIF_ADDR32))
148 && len <= mm->cached_hole_size) { 148 && len <= mm->cached_hole_size) {
149 mm->cached_hole_size = 0; 149 mm->cached_hole_size = 0;
150 mm->free_area_cache = begin; 150 mm->free_area_cache = begin;
@@ -195,7 +195,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
195{ 195{
196 struct vm_area_struct *vma; 196 struct vm_area_struct *vma;
197 struct mm_struct *mm = current->mm; 197 struct mm_struct *mm = current->mm;
198 unsigned long addr = addr0; 198 unsigned long addr = addr0, start_addr;
199 199
200 /* requested length too big for entire address space */ 200 /* requested length too big for entire address space */
201 if (len > TASK_SIZE) 201 if (len > TASK_SIZE)
@@ -205,7 +205,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
205 return addr; 205 return addr;
206 206
207 /* for MAP_32BIT mappings we force the legact mmap base */ 207 /* for MAP_32BIT mappings we force the legact mmap base */
208 if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) 208 if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
209 goto bottomup; 209 goto bottomup;
210 210
211 /* requesting a specific address */ 211 /* requesting a specific address */
@@ -223,25 +223,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
223 mm->free_area_cache = mm->mmap_base; 223 mm->free_area_cache = mm->mmap_base;
224 } 224 }
225 225
226try_again:
226 /* either no address requested or can't fit in requested address hole */ 227 /* either no address requested or can't fit in requested address hole */
227 addr = mm->free_area_cache; 228 start_addr = addr = mm->free_area_cache;
228
229 /* make sure it can fit in the remaining address space */
230 if (addr > len) {
231 unsigned long tmp_addr = align_addr(addr - len, filp,
232 ALIGN_TOPDOWN);
233
234 vma = find_vma(mm, tmp_addr);
235 if (!vma || tmp_addr + len <= vma->vm_start)
236 /* remember the address as a hint for next time */
237 return mm->free_area_cache = tmp_addr;
238 }
239
240 if (mm->mmap_base < len)
241 goto bottomup;
242 229
243 addr = mm->mmap_base-len; 230 if (addr < len)
231 goto fail;
244 232
233 addr -= len;
245 do { 234 do {
246 addr = align_addr(addr, filp, ALIGN_TOPDOWN); 235 addr = align_addr(addr, filp, ALIGN_TOPDOWN);
247 236
@@ -263,6 +252,17 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
263 addr = vma->vm_start-len; 252 addr = vma->vm_start-len;
264 } while (len < vma->vm_start); 253 } while (len < vma->vm_start);
265 254
255fail:
256 /*
257 * if hint left us with no space for the requested
258 * mapping then try again:
259 */
260 if (start_addr != mm->mmap_base) {
261 mm->free_area_cache = mm->mmap_base;
262 mm->cached_hole_size = 0;
263 goto try_again;
264 }
265
266bottomup: 266bottomup:
267 /* 267 /*
268 * A failed mmap() very likely causes application failure, 268 * A failed mmap() very likely causes application failure,
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c
new file mode 100644
index 000000000000..147fcd4941c4
--- /dev/null
+++ b/arch/x86/kernel/syscall_32.c
@@ -0,0 +1,25 @@
1/* System call table for i386. */
2
3#include <linux/linkage.h>
4#include <linux/sys.h>
5#include <linux/cache.h>
6#include <asm/asm-offsets.h>
7
8#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
9#include <asm/syscalls_32.h>
10#undef __SYSCALL_I386
11
12#define __SYSCALL_I386(nr, sym, compat) [nr] = sym,
13
14typedef asmlinkage void (*sys_call_ptr_t)(void);
15
16extern asmlinkage void sys_ni_syscall(void);
17
18const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
19 /*
20 * Smells like a compiler bug -- it doesn't work
21 * when the & below is removed.
22 */
23 [0 ... __NR_syscall_max] = &sys_ni_syscall,
24#include <asm/syscalls_32.h>
25};
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index de87d6008295..5c7f8c20da74 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -5,15 +5,19 @@
5#include <linux/cache.h> 5#include <linux/cache.h>
6#include <asm/asm-offsets.h> 6#include <asm/asm-offsets.h>
7 7
8#define __NO_STUBS 8#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
9 9
10#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 10#ifdef CONFIG_X86_X32_ABI
11#undef _ASM_X86_UNISTD_64_H 11# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
12#include <asm/unistd_64.h> 12#else
13# define __SYSCALL_X32(nr, sym, compat) /* nothing */
14#endif
13 15
14#undef __SYSCALL 16#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ;
15#define __SYSCALL(nr, sym) [nr] = sym, 17#include <asm/syscalls_64.h>
16#undef _ASM_X86_UNISTD_64_H 18#undef __SYSCALL_64
19
20#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
17 21
18typedef void (*sys_call_ptr_t)(void); 22typedef void (*sys_call_ptr_t)(void);
19 23
@@ -21,9 +25,9 @@ extern void sys_ni_syscall(void);
21 25
22const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { 26const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
23 /* 27 /*
24 *Smells like a like a compiler bug -- it doesn't work 28 * Smells like a compiler bug -- it doesn't work
25 *when the & below is removed. 29 * when the & below is removed.
26 */ 30 */
27 [0 ... __NR_syscall_max] = &sys_ni_syscall, 31 [0 ... __NR_syscall_max] = &sys_ni_syscall,
28#include <asm/unistd_64.h> 32#include <asm/syscalls_64.h>
29}; 33};
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
deleted file mode 100644
index 9a0e31293920..000000000000
--- a/arch/x86/kernel/syscall_table_32.S
+++ /dev/null
@@ -1,350 +0,0 @@
1ENTRY(sys_call_table)
2 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
3 .long sys_exit
4 .long ptregs_fork
5 .long sys_read
6 .long sys_write
7 .long sys_open /* 5 */
8 .long sys_close
9 .long sys_waitpid
10 .long sys_creat
11 .long sys_link
12 .long sys_unlink /* 10 */
13 .long ptregs_execve
14 .long sys_chdir
15 .long sys_time
16 .long sys_mknod
17 .long sys_chmod /* 15 */
18 .long sys_lchown16
19 .long sys_ni_syscall /* old break syscall holder */
20 .long sys_stat
21 .long sys_lseek
22 .long sys_getpid /* 20 */
23 .long sys_mount
24 .long sys_oldumount
25 .long sys_setuid16
26 .long sys_getuid16
27 .long sys_stime /* 25 */
28 .long sys_ptrace
29 .long sys_alarm
30 .long sys_fstat
31 .long sys_pause
32 .long sys_utime /* 30 */
33 .long sys_ni_syscall /* old stty syscall holder */
34 .long sys_ni_syscall /* old gtty syscall holder */
35 .long sys_access
36 .long sys_nice
37 .long sys_ni_syscall /* 35 - old ftime syscall holder */
38 .long sys_sync
39 .long sys_kill
40 .long sys_rename
41 .long sys_mkdir
42 .long sys_rmdir /* 40 */
43 .long sys_dup
44 .long sys_pipe
45 .long sys_times
46 .long sys_ni_syscall /* old prof syscall holder */
47 .long sys_brk /* 45 */
48 .long sys_setgid16
49 .long sys_getgid16
50 .long sys_signal
51 .long sys_geteuid16
52 .long sys_getegid16 /* 50 */
53 .long sys_acct
54 .long sys_umount /* recycled never used phys() */
55 .long sys_ni_syscall /* old lock syscall holder */
56 .long sys_ioctl
57 .long sys_fcntl /* 55 */
58 .long sys_ni_syscall /* old mpx syscall holder */
59 .long sys_setpgid
60 .long sys_ni_syscall /* old ulimit syscall holder */
61 .long sys_olduname
62 .long sys_umask /* 60 */
63 .long sys_chroot
64 .long sys_ustat
65 .long sys_dup2
66 .long sys_getppid
67 .long sys_getpgrp /* 65 */
68 .long sys_setsid
69 .long sys_sigaction
70 .long sys_sgetmask
71 .long sys_ssetmask
72 .long sys_setreuid16 /* 70 */
73 .long sys_setregid16
74 .long sys_sigsuspend
75 .long sys_sigpending
76 .long sys_sethostname
77 .long sys_setrlimit /* 75 */
78 .long sys_old_getrlimit
79 .long sys_getrusage
80 .long sys_gettimeofday
81 .long sys_settimeofday
82 .long sys_getgroups16 /* 80 */
83 .long sys_setgroups16
84 .long sys_old_select
85 .long sys_symlink
86 .long sys_lstat
87 .long sys_readlink /* 85 */
88 .long sys_uselib
89 .long sys_swapon
90 .long sys_reboot
91 .long sys_old_readdir
92 .long sys_old_mmap /* 90 */
93 .long sys_munmap
94 .long sys_truncate
95 .long sys_ftruncate
96 .long sys_fchmod
97 .long sys_fchown16 /* 95 */
98 .long sys_getpriority
99 .long sys_setpriority
100 .long sys_ni_syscall /* old profil syscall holder */
101 .long sys_statfs
102 .long sys_fstatfs /* 100 */
103 .long sys_ioperm
104 .long sys_socketcall
105 .long sys_syslog
106 .long sys_setitimer
107 .long sys_getitimer /* 105 */
108 .long sys_newstat
109 .long sys_newlstat
110 .long sys_newfstat
111 .long sys_uname
112 .long ptregs_iopl /* 110 */
113 .long sys_vhangup
114 .long sys_ni_syscall /* old "idle" system call */
115 .long ptregs_vm86old
116 .long sys_wait4
117 .long sys_swapoff /* 115 */
118 .long sys_sysinfo
119 .long sys_ipc
120 .long sys_fsync
121 .long ptregs_sigreturn
122 .long ptregs_clone /* 120 */
123 .long sys_setdomainname
124 .long sys_newuname
125 .long sys_modify_ldt
126 .long sys_adjtimex
127 .long sys_mprotect /* 125 */
128 .long sys_sigprocmask
129 .long sys_ni_syscall /* old "create_module" */
130 .long sys_init_module
131 .long sys_delete_module
132 .long sys_ni_syscall /* 130: old "get_kernel_syms" */
133 .long sys_quotactl
134 .long sys_getpgid
135 .long sys_fchdir
136 .long sys_bdflush
137 .long sys_sysfs /* 135 */
138 .long sys_personality
139 .long sys_ni_syscall /* reserved for afs_syscall */
140 .long sys_setfsuid16
141 .long sys_setfsgid16
142 .long sys_llseek /* 140 */
143 .long sys_getdents
144 .long sys_select
145 .long sys_flock
146 .long sys_msync
147 .long sys_readv /* 145 */
148 .long sys_writev
149 .long sys_getsid
150 .long sys_fdatasync
151 .long sys_sysctl
152 .long sys_mlock /* 150 */
153 .long sys_munlock
154 .long sys_mlockall
155 .long sys_munlockall
156 .long sys_sched_setparam
157 .long sys_sched_getparam /* 155 */
158 .long sys_sched_setscheduler
159 .long sys_sched_getscheduler
160 .long sys_sched_yield
161 .long sys_sched_get_priority_max
162 .long sys_sched_get_priority_min /* 160 */
163 .long sys_sched_rr_get_interval
164 .long sys_nanosleep
165 .long sys_mremap
166 .long sys_setresuid16
167 .long sys_getresuid16 /* 165 */
168 .long ptregs_vm86
169 .long sys_ni_syscall /* Old sys_query_module */
170 .long sys_poll
171 .long sys_ni_syscall /* Old nfsservctl */
172 .long sys_setresgid16 /* 170 */
173 .long sys_getresgid16
174 .long sys_prctl
175 .long ptregs_rt_sigreturn
176 .long sys_rt_sigaction
177 .long sys_rt_sigprocmask /* 175 */
178 .long sys_rt_sigpending
179 .long sys_rt_sigtimedwait
180 .long sys_rt_sigqueueinfo
181 .long sys_rt_sigsuspend
182 .long sys_pread64 /* 180 */
183 .long sys_pwrite64
184 .long sys_chown16
185 .long sys_getcwd
186 .long sys_capget
187 .long sys_capset /* 185 */
188 .long ptregs_sigaltstack
189 .long sys_sendfile
190 .long sys_ni_syscall /* reserved for streams1 */
191 .long sys_ni_syscall /* reserved for streams2 */
192 .long ptregs_vfork /* 190 */
193 .long sys_getrlimit
194 .long sys_mmap_pgoff
195 .long sys_truncate64
196 .long sys_ftruncate64
197 .long sys_stat64 /* 195 */
198 .long sys_lstat64
199 .long sys_fstat64
200 .long sys_lchown
201 .long sys_getuid
202 .long sys_getgid /* 200 */
203 .long sys_geteuid
204 .long sys_getegid
205 .long sys_setreuid
206 .long sys_setregid
207 .long sys_getgroups /* 205 */
208 .long sys_setgroups
209 .long sys_fchown
210 .long sys_setresuid
211 .long sys_getresuid
212 .long sys_setresgid /* 210 */
213 .long sys_getresgid
214 .long sys_chown
215 .long sys_setuid
216 .long sys_setgid
217 .long sys_setfsuid /* 215 */
218 .long sys_setfsgid
219 .long sys_pivot_root
220 .long sys_mincore
221 .long sys_madvise
222 .long sys_getdents64 /* 220 */
223 .long sys_fcntl64
224 .long sys_ni_syscall /* reserved for TUX */
225 .long sys_ni_syscall
226 .long sys_gettid
227 .long sys_readahead /* 225 */
228 .long sys_setxattr
229 .long sys_lsetxattr
230 .long sys_fsetxattr
231 .long sys_getxattr
232 .long sys_lgetxattr /* 230 */
233 .long sys_fgetxattr
234 .long sys_listxattr
235 .long sys_llistxattr
236 .long sys_flistxattr
237 .long sys_removexattr /* 235 */
238 .long sys_lremovexattr
239 .long sys_fremovexattr
240 .long sys_tkill
241 .long sys_sendfile64
242 .long sys_futex /* 240 */
243 .long sys_sched_setaffinity
244 .long sys_sched_getaffinity
245 .long sys_set_thread_area
246 .long sys_get_thread_area
247 .long sys_io_setup /* 245 */
248 .long sys_io_destroy
249 .long sys_io_getevents
250 .long sys_io_submit
251 .long sys_io_cancel
252 .long sys_fadvise64 /* 250 */
253 .long sys_ni_syscall
254 .long sys_exit_group
255 .long sys_lookup_dcookie
256 .long sys_epoll_create
257 .long sys_epoll_ctl /* 255 */
258 .long sys_epoll_wait
259 .long sys_remap_file_pages
260 .long sys_set_tid_address
261 .long sys_timer_create
262 .long sys_timer_settime /* 260 */
263 .long sys_timer_gettime
264 .long sys_timer_getoverrun
265 .long sys_timer_delete
266 .long sys_clock_settime
267 .long sys_clock_gettime /* 265 */
268 .long sys_clock_getres
269 .long sys_clock_nanosleep
270 .long sys_statfs64
271 .long sys_fstatfs64
272 .long sys_tgkill /* 270 */
273 .long sys_utimes
274 .long sys_fadvise64_64
275 .long sys_ni_syscall /* sys_vserver */
276 .long sys_mbind
277 .long sys_get_mempolicy
278 .long sys_set_mempolicy
279 .long sys_mq_open
280 .long sys_mq_unlink
281 .long sys_mq_timedsend
282 .long sys_mq_timedreceive /* 280 */
283 .long sys_mq_notify
284 .long sys_mq_getsetattr
285 .long sys_kexec_load
286 .long sys_waitid
287 .long sys_ni_syscall /* 285 */ /* available */
288 .long sys_add_key
289 .long sys_request_key
290 .long sys_keyctl
291 .long sys_ioprio_set
292 .long sys_ioprio_get /* 290 */
293 .long sys_inotify_init
294 .long sys_inotify_add_watch
295 .long sys_inotify_rm_watch
296 .long sys_migrate_pages
297 .long sys_openat /* 295 */
298 .long sys_mkdirat
299 .long sys_mknodat
300 .long sys_fchownat
301 .long sys_futimesat
302 .long sys_fstatat64 /* 300 */
303 .long sys_unlinkat
304 .long sys_renameat
305 .long sys_linkat
306 .long sys_symlinkat
307 .long sys_readlinkat /* 305 */
308 .long sys_fchmodat
309 .long sys_faccessat
310 .long sys_pselect6
311 .long sys_ppoll
312 .long sys_unshare /* 310 */
313 .long sys_set_robust_list
314 .long sys_get_robust_list
315 .long sys_splice
316 .long sys_sync_file_range
317 .long sys_tee /* 315 */
318 .long sys_vmsplice
319 .long sys_move_pages
320 .long sys_getcpu
321 .long sys_epoll_pwait
322 .long sys_utimensat /* 320 */
323 .long sys_signalfd
324 .long sys_timerfd_create
325 .long sys_eventfd
326 .long sys_fallocate
327 .long sys_timerfd_settime /* 325 */
328 .long sys_timerfd_gettime
329 .long sys_signalfd4
330 .long sys_eventfd2
331 .long sys_epoll_create1
332 .long sys_dup3 /* 330 */
333 .long sys_pipe2
334 .long sys_inotify_init1
335 .long sys_preadv
336 .long sys_pwritev
337 .long sys_rt_tgsigqueueinfo /* 335 */
338 .long sys_perf_event_open
339 .long sys_recvmmsg
340 .long sys_fanotify_init
341 .long sys_fanotify_mark
342 .long sys_prlimit64 /* 340 */
343 .long sys_name_to_handle_at
344 .long sys_open_by_handle_at
345 .long sys_clock_adjtime
346 .long sys_syncfs
347 .long sys_sendmmsg /* 345 */
348 .long sys_setns
349 .long sys_process_vm_readv
350 .long sys_process_vm_writev
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index e2410e27f97e..6410744ac5cb 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -272,7 +272,7 @@ static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
272 offsetof(struct acpi_table_facs, firmware_waking_vector); 272 offsetof(struct acpi_table_facs, firmware_waking_vector);
273} 273}
274 274
275void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) 275static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
276{ 276{
277 static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = { 277 static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = {
278 /* S0,1,2: */ -1, -1, -1, 278 /* S0,1,2: */ -1, -1, -1,
@@ -281,7 +281,7 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
281 /* S5: */ TB_SHUTDOWN_S5 }; 281 /* S5: */ TB_SHUTDOWN_S5 };
282 282
283 if (!tboot_enabled()) 283 if (!tboot_enabled())
284 return; 284 return 0;
285 285
286 tboot_copy_fadt(&acpi_gbl_FADT); 286 tboot_copy_fadt(&acpi_gbl_FADT);
287 tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control; 287 tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control;
@@ -292,10 +292,11 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
292 if (sleep_state >= ACPI_S_STATE_COUNT || 292 if (sleep_state >= ACPI_S_STATE_COUNT ||
293 acpi_shutdown_map[sleep_state] == -1) { 293 acpi_shutdown_map[sleep_state] == -1) {
294 pr_warning("unsupported sleep state 0x%x\n", sleep_state); 294 pr_warning("unsupported sleep state 0x%x\n", sleep_state);
295 return; 295 return -1;
296 } 296 }
297 297
298 tboot_shutdown(acpi_shutdown_map[sleep_state]); 298 tboot_shutdown(acpi_shutdown_map[sleep_state]);
299 return 0;
299} 300}
300 301
301static atomic_t ap_wfs_count; 302static atomic_t ap_wfs_count;
@@ -345,6 +346,8 @@ static __init int tboot_late_init(void)
345 346
346 atomic_set(&ap_wfs_count, 0); 347 atomic_set(&ap_wfs_count, 0);
347 register_hotcpu_notifier(&tboot_cpu_notifier); 348 register_hotcpu_notifier(&tboot_cpu_notifier);
349
350 acpi_os_set_prepare_sleep(&tboot_sleep);
348 return 0; 351 return 0;
349} 352}
350 353
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index 9e540fee7009..ab40954e113e 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -34,6 +34,7 @@
34#include <asm/tce.h> 34#include <asm/tce.h>
35#include <asm/calgary.h> 35#include <asm/calgary.h>
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm/cacheflush.h>
37 38
38/* flush a tce at 'tceaddr' to main memory */ 39/* flush a tce at 'tceaddr' to main memory */
39static inline void flush_tce(void* tceaddr) 40static inline void flush_tce(void* tceaddr)
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index dd5fbf4101fc..c6eba2b42673 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -57,9 +57,6 @@ EXPORT_SYMBOL(profile_pc);
57 */ 57 */
58static irqreturn_t timer_interrupt(int irq, void *dev_id) 58static irqreturn_t timer_interrupt(int irq, void *dev_id)
59{ 59{
60 /* Keep nmi watchdog up to date */
61 inc_irq_stat(irq0_irqs);
62
63 global_clock_event->event_handler(global_clock_event); 60 global_clock_event->event_handler(global_clock_event);
64 61
65 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ 62 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 6bb7b8579e70..9d9d2f9e77a5 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -6,7 +6,6 @@
6 6
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8#include <asm/desc.h> 8#include <asm/desc.h>
9#include <asm/system.h>
10#include <asm/ldt.h> 9#include <asm/ldt.h>
11#include <asm/processor.h> 10#include <asm/processor.h>
12#include <asm/proto.h> 11#include <asm/proto.h>
@@ -163,7 +162,7 @@ int regset_tls_get(struct task_struct *target, const struct user_regset *regset,
163{ 162{
164 const struct desc_struct *tls; 163 const struct desc_struct *tls;
165 164
166 if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || 165 if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
167 (pos % sizeof(struct user_desc)) != 0 || 166 (pos % sizeof(struct user_desc)) != 0 ||
168 (count % sizeof(struct user_desc)) != 0) 167 (count % sizeof(struct user_desc)) != 0)
169 return -EINVAL; 168 return -EINVAL;
@@ -198,7 +197,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
198 struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; 197 struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
199 const struct user_desc *info; 198 const struct user_desc *info;
200 199
201 if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || 200 if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
202 (pos % sizeof(struct user_desc)) != 0 || 201 (pos % sizeof(struct user_desc)) != 0 ||
203 (count % sizeof(struct user_desc)) != 0) 202 (count % sizeof(struct user_desc)) != 0)
204 return -EINVAL; 203 return -EINVAL;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index fa1191fb679d..ff9281f16029 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -50,10 +50,10 @@
50#include <asm/processor.h> 50#include <asm/processor.h>
51#include <asm/debugreg.h> 51#include <asm/debugreg.h>
52#include <linux/atomic.h> 52#include <linux/atomic.h>
53#include <asm/system.h>
54#include <asm/traps.h> 53#include <asm/traps.h>
55#include <asm/desc.h> 54#include <asm/desc.h>
56#include <asm/i387.h> 55#include <asm/i387.h>
56#include <asm/fpu-internal.h>
57#include <asm/mce.h> 57#include <asm/mce.h>
58 58
59#include <asm/mach_traps.h> 59#include <asm/mach_traps.h>
@@ -119,7 +119,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
119 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. 119 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
120 * On nmi (interrupt 2), do_trap should not be called. 120 * On nmi (interrupt 2), do_trap should not be called.
121 */ 121 */
122 if (trapnr < 6) 122 if (trapnr < X86_TRAP_UD)
123 goto vm86_trap; 123 goto vm86_trap;
124 goto trap_signal; 124 goto trap_signal;
125 } 125 }
@@ -132,7 +132,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
132trap_signal: 132trap_signal:
133#endif 133#endif
134 /* 134 /*
135 * We want error_code and trap_no set for userspace faults and 135 * We want error_code and trap_nr set for userspace faults and
136 * kernelspace faults which result in die(), but not 136 * kernelspace faults which result in die(), but not
137 * kernelspace faults which are fixed up. die() gives the 137 * kernelspace faults which are fixed up. die() gives the
138 * process no chance to handle the signal and notice the 138 * process no chance to handle the signal and notice the
@@ -141,7 +141,7 @@ trap_signal:
141 * delivered, faults. See also do_general_protection below. 141 * delivered, faults. See also do_general_protection below.
142 */ 142 */
143 tsk->thread.error_code = error_code; 143 tsk->thread.error_code = error_code;
144 tsk->thread.trap_no = trapnr; 144 tsk->thread.trap_nr = trapnr;
145 145
146#ifdef CONFIG_X86_64 146#ifdef CONFIG_X86_64
147 if (show_unhandled_signals && unhandled_signal(tsk, signr) && 147 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
@@ -164,7 +164,7 @@ trap_signal:
164kernel_trap: 164kernel_trap:
165 if (!fixup_exception(regs)) { 165 if (!fixup_exception(regs)) {
166 tsk->thread.error_code = error_code; 166 tsk->thread.error_code = error_code;
167 tsk->thread.trap_no = trapnr; 167 tsk->thread.trap_nr = trapnr;
168 die(str, regs, error_code); 168 die(str, regs, error_code);
169 } 169 }
170 return; 170 return;
@@ -203,27 +203,31 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
203 do_trap(trapnr, signr, str, regs, error_code, &info); \ 203 do_trap(trapnr, signr, str, regs, error_code, &info); \
204} 204}
205 205
206DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) 206DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
207DO_ERROR(4, SIGSEGV, "overflow", overflow) 207 regs->ip)
208DO_ERROR(5, SIGSEGV, "bounds", bounds) 208DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow)
209DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) 209DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds)
210DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 210DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN,
211DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 211 regs->ip)
212DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 212DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",
213 coprocessor_segment_overrun)
214DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
215DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
213#ifdef CONFIG_X86_32 216#ifdef CONFIG_X86_32
214DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 217DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
215#endif 218#endif
216DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 219DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
220 BUS_ADRALN, 0)
217 221
218#ifdef CONFIG_X86_64 222#ifdef CONFIG_X86_64
219/* Runs on IST stack */ 223/* Runs on IST stack */
220dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) 224dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
221{ 225{
222 if (notify_die(DIE_TRAP, "stack segment", regs, error_code, 226 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
223 12, SIGBUS) == NOTIFY_STOP) 227 X86_TRAP_SS, SIGBUS) == NOTIFY_STOP)
224 return; 228 return;
225 preempt_conditional_sti(regs); 229 preempt_conditional_sti(regs);
226 do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); 230 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
227 preempt_conditional_cli(regs); 231 preempt_conditional_cli(regs);
228} 232}
229 233
@@ -233,10 +237,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
233 struct task_struct *tsk = current; 237 struct task_struct *tsk = current;
234 238
235 /* Return not checked because double check cannot be ignored */ 239 /* Return not checked because double check cannot be ignored */
236 notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); 240 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
237 241
238 tsk->thread.error_code = error_code; 242 tsk->thread.error_code = error_code;
239 tsk->thread.trap_no = 8; 243 tsk->thread.trap_nr = X86_TRAP_DF;
240 244
241 /* 245 /*
242 * This is always a kernel trap and never fixable (and thus must 246 * This is always a kernel trap and never fixable (and thus must
@@ -264,7 +268,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
264 goto gp_in_kernel; 268 goto gp_in_kernel;
265 269
266 tsk->thread.error_code = error_code; 270 tsk->thread.error_code = error_code;
267 tsk->thread.trap_no = 13; 271 tsk->thread.trap_nr = X86_TRAP_GP;
268 272
269 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && 273 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
270 printk_ratelimit()) { 274 printk_ratelimit()) {
@@ -291,9 +295,9 @@ gp_in_kernel:
291 return; 295 return;
292 296
293 tsk->thread.error_code = error_code; 297 tsk->thread.error_code = error_code;
294 tsk->thread.trap_no = 13; 298 tsk->thread.trap_nr = X86_TRAP_GP;
295 if (notify_die(DIE_GPF, "general protection fault", regs, 299 if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
296 error_code, 13, SIGSEGV) == NOTIFY_STOP) 300 X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
297 return; 301 return;
298 die("general protection fault", regs, error_code); 302 die("general protection fault", regs, error_code);
299} 303}
@@ -302,18 +306,24 @@ gp_in_kernel:
302dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) 306dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
303{ 307{
304#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 308#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
305 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) 309 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
306 == NOTIFY_STOP) 310 SIGTRAP) == NOTIFY_STOP)
307 return; 311 return;
308#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 312#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
309 313
310 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) 314 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
311 == NOTIFY_STOP) 315 SIGTRAP) == NOTIFY_STOP)
312 return; 316 return;
313 317
318 /*
319 * Let others (NMI) know that the debug stack is in use
320 * as we may switch to the interrupt stack.
321 */
322 debug_stack_usage_inc();
314 preempt_conditional_sti(regs); 323 preempt_conditional_sti(regs);
315 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 324 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
316 preempt_conditional_cli(regs); 325 preempt_conditional_cli(regs);
326 debug_stack_usage_dec();
317} 327}
318 328
319#ifdef CONFIG_X86_64 329#ifdef CONFIG_X86_64
@@ -406,13 +416,20 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
406 SIGTRAP) == NOTIFY_STOP) 416 SIGTRAP) == NOTIFY_STOP)
407 return; 417 return;
408 418
419 /*
420 * Let others (NMI) know that the debug stack is in use
421 * as we may switch to the interrupt stack.
422 */
423 debug_stack_usage_inc();
424
409 /* It's safe to allow irq's after DR6 has been saved */ 425 /* It's safe to allow irq's after DR6 has been saved */
410 preempt_conditional_sti(regs); 426 preempt_conditional_sti(regs);
411 427
412 if (regs->flags & X86_VM_MASK) { 428 if (regs->flags & X86_VM_MASK) {
413 handle_vm86_trap((struct kernel_vm86_regs *) regs, 429 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
414 error_code, 1); 430 X86_TRAP_DB);
415 preempt_conditional_cli(regs); 431 preempt_conditional_cli(regs);
432 debug_stack_usage_dec();
416 return; 433 return;
417 } 434 }
418 435
@@ -432,6 +449,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
432 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) 449 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
433 send_sigtrap(tsk, regs, error_code, si_code); 450 send_sigtrap(tsk, regs, error_code, si_code);
434 preempt_conditional_cli(regs); 451 preempt_conditional_cli(regs);
452 debug_stack_usage_dec();
435 453
436 return; 454 return;
437} 455}
@@ -446,7 +464,8 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
446 struct task_struct *task = current; 464 struct task_struct *task = current;
447 siginfo_t info; 465 siginfo_t info;
448 unsigned short err; 466 unsigned short err;
449 char *str = (trapnr == 16) ? "fpu exception" : "simd exception"; 467 char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
468 "simd exception";
450 469
451 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) 470 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
452 return; 471 return;
@@ -456,7 +475,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
456 { 475 {
457 if (!fixup_exception(regs)) { 476 if (!fixup_exception(regs)) {
458 task->thread.error_code = error_code; 477 task->thread.error_code = error_code;
459 task->thread.trap_no = trapnr; 478 task->thread.trap_nr = trapnr;
460 die(str, regs, error_code); 479 die(str, regs, error_code);
461 } 480 }
462 return; 481 return;
@@ -466,12 +485,12 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
466 * Save the info for the exception handler and clear the error. 485 * Save the info for the exception handler and clear the error.
467 */ 486 */
468 save_init_fpu(task); 487 save_init_fpu(task);
469 task->thread.trap_no = trapnr; 488 task->thread.trap_nr = trapnr;
470 task->thread.error_code = error_code; 489 task->thread.error_code = error_code;
471 info.si_signo = SIGFPE; 490 info.si_signo = SIGFPE;
472 info.si_errno = 0; 491 info.si_errno = 0;
473 info.si_addr = (void __user *)regs->ip; 492 info.si_addr = (void __user *)regs->ip;
474 if (trapnr == 16) { 493 if (trapnr == X86_TRAP_MF) {
475 unsigned short cwd, swd; 494 unsigned short cwd, swd;
476 /* 495 /*
477 * (~cwd & swd) will mask out exceptions that are not set to unmasked 496 * (~cwd & swd) will mask out exceptions that are not set to unmasked
@@ -515,10 +534,11 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
515 info.si_code = FPE_FLTRES; 534 info.si_code = FPE_FLTRES;
516 } else { 535 } else {
517 /* 536 /*
518 * If we're using IRQ 13, or supposedly even some trap 16 537 * If we're using IRQ 13, or supposedly even some trap
519 * implementations, it's possible we get a spurious trap... 538 * X86_TRAP_MF implementations, it's possible
539 * we get a spurious trap, which is not an error.
520 */ 540 */
521 return; /* Spurious trap, no error */ 541 return;
522 } 542 }
523 force_sig_info(SIGFPE, &info, task); 543 force_sig_info(SIGFPE, &info, task);
524} 544}
@@ -529,13 +549,13 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
529 ignore_fpu_irq = 1; 549 ignore_fpu_irq = 1;
530#endif 550#endif
531 551
532 math_error(regs, error_code, 16); 552 math_error(regs, error_code, X86_TRAP_MF);
533} 553}
534 554
535dotraplinkage void 555dotraplinkage void
536do_simd_coprocessor_error(struct pt_regs *regs, long error_code) 556do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
537{ 557{
538 math_error(regs, error_code, 19); 558 math_error(regs, error_code, X86_TRAP_XF);
539} 559}
540 560
541dotraplinkage void 561dotraplinkage void
@@ -557,41 +577,18 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
557} 577}
558 578
559/* 579/*
560 * __math_state_restore assumes that cr0.TS is already clear and the
561 * fpu state is all ready for use. Used during context switch.
562 */
563void __math_state_restore(void)
564{
565 struct thread_info *thread = current_thread_info();
566 struct task_struct *tsk = thread->task;
567
568 /*
569 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
570 */
571 if (unlikely(restore_fpu_checking(tsk))) {
572 stts();
573 force_sig(SIGSEGV, tsk);
574 return;
575 }
576
577 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
578 tsk->fpu_counter++;
579}
580
581/*
582 * 'math_state_restore()' saves the current math information in the 580 * 'math_state_restore()' saves the current math information in the
583 * old math state array, and gets the new ones from the current task 581 * old math state array, and gets the new ones from the current task
584 * 582 *
585 * Careful.. There are problems with IBM-designed IRQ13 behaviour. 583 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
586 * Don't touch unless you *really* know how it works. 584 * Don't touch unless you *really* know how it works.
587 * 585 *
588 * Must be called with kernel preemption disabled (in this case, 586 * Must be called with kernel preemption disabled (eg with local
589 * local interrupts are disabled at the call-site in entry.S). 587 * local interrupts as in the case of do_device_not_available).
590 */ 588 */
591asmlinkage void math_state_restore(void) 589void math_state_restore(void)
592{ 590{
593 struct thread_info *thread = current_thread_info(); 591 struct task_struct *tsk = current;
594 struct task_struct *tsk = thread->task;
595 592
596 if (!tsk_used_math(tsk)) { 593 if (!tsk_used_math(tsk)) {
597 local_irq_enable(); 594 local_irq_enable();
@@ -608,9 +605,17 @@ asmlinkage void math_state_restore(void)
608 local_irq_disable(); 605 local_irq_disable();
609 } 606 }
610 607
611 clts(); /* Allow maths ops (or we recurse) */ 608 __thread_fpu_begin(tsk);
609 /*
610 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
611 */
612 if (unlikely(restore_fpu_checking(tsk))) {
613 __thread_fpu_end(tsk);
614 force_sig(SIGSEGV, tsk);
615 return;
616 }
612 617
613 __math_state_restore(); 618 tsk->fpu_counter++;
614} 619}
615EXPORT_SYMBOL_GPL(math_state_restore); 620EXPORT_SYMBOL_GPL(math_state_restore);
616 621
@@ -644,20 +649,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
644 info.si_errno = 0; 649 info.si_errno = 0;
645 info.si_code = ILL_BADSTK; 650 info.si_code = ILL_BADSTK;
646 info.si_addr = NULL; 651 info.si_addr = NULL;
647 if (notify_die(DIE_TRAP, "iret exception", 652 if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
648 regs, error_code, 32, SIGILL) == NOTIFY_STOP) 653 X86_TRAP_IRET, SIGILL) == NOTIFY_STOP)
649 return; 654 return;
650 do_trap(32, SIGILL, "iret exception", regs, error_code, &info); 655 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
656 &info);
651} 657}
652#endif 658#endif
653 659
654/* Set of traps needed for early debugging. */ 660/* Set of traps needed for early debugging. */
655void __init early_trap_init(void) 661void __init early_trap_init(void)
656{ 662{
657 set_intr_gate_ist(1, &debug, DEBUG_STACK); 663 set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
658 /* int3 can be called from all */ 664 /* int3 can be called from all */
659 set_system_intr_gate_ist(3, &int3, DEBUG_STACK); 665 set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
660 set_intr_gate(14, &page_fault); 666 set_intr_gate(X86_TRAP_PF, &page_fault);
661 load_idt(&idt_descr); 667 load_idt(&idt_descr);
662} 668}
663 669
@@ -673,30 +679,30 @@ void __init trap_init(void)
673 early_iounmap(p, 4); 679 early_iounmap(p, 4);
674#endif 680#endif
675 681
676 set_intr_gate(0, &divide_error); 682 set_intr_gate(X86_TRAP_DE, &divide_error);
677 set_intr_gate_ist(2, &nmi, NMI_STACK); 683 set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK);
678 /* int4 can be called from all */ 684 /* int4 can be called from all */
679 set_system_intr_gate(4, &overflow); 685 set_system_intr_gate(X86_TRAP_OF, &overflow);
680 set_intr_gate(5, &bounds); 686 set_intr_gate(X86_TRAP_BR, &bounds);
681 set_intr_gate(6, &invalid_op); 687 set_intr_gate(X86_TRAP_UD, &invalid_op);
682 set_intr_gate(7, &device_not_available); 688 set_intr_gate(X86_TRAP_NM, &device_not_available);
683#ifdef CONFIG_X86_32 689#ifdef CONFIG_X86_32
684 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); 690 set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS);
685#else 691#else
686 set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); 692 set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK);
687#endif 693#endif
688 set_intr_gate(9, &coprocessor_segment_overrun); 694 set_intr_gate(X86_TRAP_OLD_MF, &coprocessor_segment_overrun);
689 set_intr_gate(10, &invalid_TSS); 695 set_intr_gate(X86_TRAP_TS, &invalid_TSS);
690 set_intr_gate(11, &segment_not_present); 696 set_intr_gate(X86_TRAP_NP, &segment_not_present);
691 set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); 697 set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK);
692 set_intr_gate(13, &general_protection); 698 set_intr_gate(X86_TRAP_GP, &general_protection);
693 set_intr_gate(15, &spurious_interrupt_bug); 699 set_intr_gate(X86_TRAP_SPURIOUS, &spurious_interrupt_bug);
694 set_intr_gate(16, &coprocessor_error); 700 set_intr_gate(X86_TRAP_MF, &coprocessor_error);
695 set_intr_gate(17, &alignment_check); 701 set_intr_gate(X86_TRAP_AC, &alignment_check);
696#ifdef CONFIG_X86_MCE 702#ifdef CONFIG_X86_MCE
697 set_intr_gate_ist(18, &machine_check, MCE_STACK); 703 set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK);
698#endif 704#endif
699 set_intr_gate(19, &simd_coprocessor_error); 705 set_intr_gate(X86_TRAP_XF, &simd_coprocessor_error);
700 706
701 /* Reserve all the builtin and the syscall vector: */ 707 /* Reserve all the builtin and the syscall vector: */
702 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 708 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
@@ -718,4 +724,10 @@ void __init trap_init(void)
718 cpu_init(); 724 cpu_init();
719 725
720 x86_init.irqs.trap_init(); 726 x86_init.irqs.trap_init();
727
728#ifdef CONFIG_X86_64
729 memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
730 set_nmi_gate(X86_TRAP_DB, &debug);
731 set_nmi_gate(X86_TRAP_BP, &int3);
732#endif
721} 733}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 2c9cf0fd78f5..fc0a147e3727 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -290,14 +290,15 @@ static inline int pit_verify_msb(unsigned char val)
290static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 290static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
291{ 291{
292 int count; 292 int count;
293 u64 tsc = 0; 293 u64 tsc = 0, prev_tsc = 0;
294 294
295 for (count = 0; count < 50000; count++) { 295 for (count = 0; count < 50000; count++) {
296 if (!pit_verify_msb(val)) 296 if (!pit_verify_msb(val))
297 break; 297 break;
298 prev_tsc = tsc;
298 tsc = get_cycles(); 299 tsc = get_cycles();
299 } 300 }
300 *deltap = get_cycles() - tsc; 301 *deltap = get_cycles() - prev_tsc;
301 *tscp = tsc; 302 *tscp = tsc;
302 303
303 /* 304 /*
@@ -311,9 +312,9 @@ static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *de
311 * How many MSB values do we want to see? We aim for 312 * How many MSB values do we want to see? We aim for
312 * a maximum error rate of 500ppm (in practice the 313 * a maximum error rate of 500ppm (in practice the
313 * real error is much smaller), but refuse to spend 314 * real error is much smaller), but refuse to spend
314 * more than 25ms on it. 315 * more than 50ms on it.
315 */ 316 */
316#define MAX_QUICK_PIT_MS 25 317#define MAX_QUICK_PIT_MS 50
317#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) 318#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
318 319
319static unsigned long quick_pit_calibrate(void) 320static unsigned long quick_pit_calibrate(void)
@@ -383,15 +384,12 @@ success:
383 * 384 *
384 * As a result, we can depend on there not being 385 * As a result, we can depend on there not being
385 * any odd delays anywhere, and the TSC reads are 386 * any odd delays anywhere, and the TSC reads are
386 * reliable (within the error). We also adjust the 387 * reliable (within the error).
387 * delta to the middle of the error bars, just
388 * because it looks nicer.
389 * 388 *
390 * kHz = ticks / time-in-seconds / 1000; 389 * kHz = ticks / time-in-seconds / 1000;
391 * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 390 * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
392 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) 391 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
393 */ 392 */
394 delta += (long)(d2 - d1)/2;
395 delta *= PIT_TICK_RATE; 393 delta *= PIT_TICK_RATE;
396 do_div(delta, i*256*1000); 394 do_div(delta, i*256*1000);
397 printk("Fast TSC calibration using PIT\n"); 395 printk("Fast TSC calibration using PIT\n");
@@ -622,7 +620,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
622 620
623 if (cpu_khz) { 621 if (cpu_khz) {
624 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; 622 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
625 *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR); 623 *offset = ns_now - mult_frac(tsc_now, *scale,
624 (1UL << CYC2NS_SCALE_FACTOR));
626 } 625 }
627 626
628 sched_clock_idle_wakeup_event(0); 627 sched_clock_idle_wakeup_event(0);
@@ -631,7 +630,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
631 630
632static unsigned long long cyc2ns_suspend; 631static unsigned long long cyc2ns_suspend;
633 632
634void save_sched_clock_state(void) 633void tsc_save_sched_clock_state(void)
635{ 634{
636 if (!sched_clock_stable) 635 if (!sched_clock_stable)
637 return; 636 return;
@@ -647,7 +646,7 @@ void save_sched_clock_state(void)
647 * that sched_clock() continues from the point where it was left off during 646 * that sched_clock() continues from the point where it was left off during
648 * suspend. 647 * suspend.
649 */ 648 */
650void restore_sched_clock_state(void) 649void tsc_restore_sched_clock_state(void)
651{ 650{
652 unsigned long long offset; 651 unsigned long long offset;
653 unsigned long flags; 652 unsigned long flags;
@@ -934,6 +933,16 @@ static int __init init_tsc_clocksource(void)
934 clocksource_tsc.rating = 0; 933 clocksource_tsc.rating = 0;
935 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 934 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
936 } 935 }
936
937 /*
938 * Trust the results of the earlier calibration on systems
939 * exporting a reliable TSC.
940 */
941 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
942 clocksource_register_khz(&clocksource_tsc, tsc_khz);
943 return 0;
944 }
945
937 schedule_delayed_work(&tsc_irqwork, 0); 946 schedule_delayed_work(&tsc_irqwork, 0);
938 return 0; 947 return 0;
939} 948}
@@ -995,3 +1004,23 @@ void __init tsc_init(void)
995 check_system_tsc_reliable(); 1004 check_system_tsc_reliable();
996} 1005}
997 1006
1007#ifdef CONFIG_SMP
1008/*
1009 * If we have a constant TSC and are using the TSC for the delay loop,
1010 * we can skip clock calibration if another cpu in the same socket has already
1011 * been calibrated. This assumes that CONSTANT_TSC applies to all
1012 * cpus in the socket - this should be a safe assumption.
1013 */
1014unsigned long __cpuinit calibrate_delay_is_known(void)
1015{
1016 int i, cpu = smp_processor_id();
1017
1018 if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
1019 return 0;
1020
1021 for_each_online_cpu(i)
1022 if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
1023 return cpu_data(i).loops_per_jiffy;
1024 return 0;
1025}
1026#endif
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9eba29b46cb7..fc25e60a5884 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -42,7 +42,7 @@ static __cpuinitdata int nr_warps;
42/* 42/*
43 * TSC-warp measurement loop running on both CPUs: 43 * TSC-warp measurement loop running on both CPUs:
44 */ 44 */
45static __cpuinit void check_tsc_warp(void) 45static __cpuinit void check_tsc_warp(unsigned int timeout)
46{ 46{
47 cycles_t start, now, prev, end; 47 cycles_t start, now, prev, end;
48 int i; 48 int i;
@@ -51,9 +51,9 @@ static __cpuinit void check_tsc_warp(void)
51 start = get_cycles(); 51 start = get_cycles();
52 rdtsc_barrier(); 52 rdtsc_barrier();
53 /* 53 /*
54 * The measurement runs for 20 msecs: 54 * The measurement runs for 'timeout' msecs:
55 */ 55 */
56 end = start + tsc_khz * 20ULL; 56 end = start + (cycles_t) tsc_khz * timeout;
57 now = start; 57 now = start;
58 58
59 for (i = 0; ; i++) { 59 for (i = 0; ; i++) {
@@ -99,6 +99,25 @@ static __cpuinit void check_tsc_warp(void)
99} 99}
100 100
101/* 101/*
102 * If the target CPU coming online doesn't have any of its core-siblings
103 * online, a timeout of 20msec will be used for the TSC-warp measurement
104 * loop. Otherwise a smaller timeout of 2msec will be used, as we have some
105 * information about this socket already (and this information grows as we
106 * have more and more logical-siblings in that socket).
107 *
108 * Ideally we should be able to skip the TSC sync check on the other
109 * core-siblings, if the first logical CPU in a socket passed the sync test.
110 * But as the TSC is per-logical CPU and can potentially be modified wrongly
111 * by the bios, TSC sync test for smaller duration should be able
112 * to catch such errors. Also this will catch the condition where all the
113 * cores in the socket doesn't get reset at the same time.
114 */
115static inline unsigned int loop_timeout(int cpu)
116{
117 return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20;
118}
119
120/*
102 * Source CPU calls into this - it waits for the freshly booted 121 * Source CPU calls into this - it waits for the freshly booted
103 * target CPU to arrive and then starts the measurement: 122 * target CPU to arrive and then starts the measurement:
104 */ 123 */
@@ -135,7 +154,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
135 */ 154 */
136 atomic_inc(&start_count); 155 atomic_inc(&start_count);
137 156
138 check_tsc_warp(); 157 check_tsc_warp(loop_timeout(cpu));
139 158
140 while (atomic_read(&stop_count) != cpus-1) 159 while (atomic_read(&stop_count) != cpus-1)
141 cpu_relax(); 160 cpu_relax();
@@ -183,7 +202,7 @@ void __cpuinit check_tsc_sync_target(void)
183 while (atomic_read(&start_count) != cpus) 202 while (atomic_read(&start_count) != cpus)
184 cpu_relax(); 203 cpu_relax();
185 204
186 check_tsc_warp(); 205 check_tsc_warp(loop_timeout(smp_processor_id()));
187 206
188 /* 207 /*
189 * Ok, we are done: 208 * Ok, we are done:
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 863f8753ab0a..255f58ae71e8 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
172 spinlock_t *ptl; 172 spinlock_t *ptl;
173 int i; 173 int i;
174 174
175 down_write(&mm->mmap_sem);
175 pgd = pgd_offset(mm, 0xA0000); 176 pgd = pgd_offset(mm, 0xA0000);
176 if (pgd_none_or_clear_bad(pgd)) 177 if (pgd_none_or_clear_bad(pgd))
177 goto out; 178 goto out;
@@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
190 } 191 }
191 pte_unmap_unlock(pte, ptl); 192 pte_unmap_unlock(pte, ptl);
192out: 193out:
194 up_write(&mm->mmap_sem);
193 flush_tlb(); 195 flush_tlb();
194} 196}
195 197
@@ -335,9 +337,11 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
335 if (info->flags & VM86_SCREEN_BITMAP) 337 if (info->flags & VM86_SCREEN_BITMAP)
336 mark_screen_rdonly(tsk->mm); 338 mark_screen_rdonly(tsk->mm);
337 339
338 /*call audit_syscall_exit since we do not exit via the normal paths */ 340 /*call __audit_syscall_exit since we do not exit via the normal paths */
341#ifdef CONFIG_AUDITSYSCALL
339 if (unlikely(current->audit_context)) 342 if (unlikely(current->audit_context))
340 audit_syscall_exit(AUDITSC_RESULT(0), 0); 343 __audit_syscall_exit(1, 0);
344#endif
341 345
342 __asm__ __volatile__( 346 __asm__ __volatile__(
343 "movl %0,%%esp\n\t" 347 "movl %0,%%esp\n\t"
@@ -565,7 +569,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
565 } 569 }
566 if (trapno != 1) 570 if (trapno != 1)
567 return 1; /* we let this handle by the calling routine */ 571 return 1; /* we let this handle by the calling routine */
568 current->thread.trap_no = trapno; 572 current->thread.trap_nr = trapno;
569 current->thread.error_code = error_code; 573 current->thread.error_code = error_code;
570 force_sig(SIGTRAP, current); 574 force_sig(SIGTRAP, current);
571 return 0; 575 return 0;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index b07ba9393564..7515cf0e1805 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -52,10 +52,7 @@
52#include "vsyscall_trace.h" 52#include "vsyscall_trace.h"
53 53
54DEFINE_VVAR(int, vgetcpu_mode); 54DEFINE_VVAR(int, vgetcpu_mode);
55DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = 55DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
56{
57 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
58};
59 56
60static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; 57static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
61 58
@@ -80,20 +77,15 @@ early_param("vsyscall", vsyscall_setup);
80 77
81void update_vsyscall_tz(void) 78void update_vsyscall_tz(void)
82{ 79{
83 unsigned long flags;
84
85 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
86 /* sys_tz has changed */
87 vsyscall_gtod_data.sys_tz = sys_tz; 80 vsyscall_gtod_data.sys_tz = sys_tz;
88 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
89} 81}
90 82
91void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, 83void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
92 struct clocksource *clock, u32 mult) 84 struct clocksource *clock, u32 mult)
93{ 85{
94 unsigned long flags; 86 struct timespec monotonic;
95 87
96 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); 88 write_seqcount_begin(&vsyscall_gtod_data.seq);
97 89
98 /* copy vsyscall data */ 90 /* copy vsyscall data */
99 vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; 91 vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
@@ -101,12 +93,19 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
101 vsyscall_gtod_data.clock.mask = clock->mask; 93 vsyscall_gtod_data.clock.mask = clock->mask;
102 vsyscall_gtod_data.clock.mult = mult; 94 vsyscall_gtod_data.clock.mult = mult;
103 vsyscall_gtod_data.clock.shift = clock->shift; 95 vsyscall_gtod_data.clock.shift = clock->shift;
96
104 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; 97 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
105 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; 98 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
106 vsyscall_gtod_data.wall_to_monotonic = *wtm; 99
100 monotonic = timespec_add(*wall_time, *wtm);
101 vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec;
102 vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec;
103
107 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); 104 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
105 vsyscall_gtod_data.monotonic_time_coarse =
106 timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm);
108 107
109 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); 108 write_seqcount_end(&vsyscall_gtod_data.seq);
110} 109}
111 110
112static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, 111static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
@@ -153,7 +152,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
153 152
154 thread->error_code = 6; /* user fault, no page, write */ 153 thread->error_code = 6; /* user fault, no page, write */
155 thread->cr2 = ptr; 154 thread->cr2 = ptr;
156 thread->trap_no = 14; 155 thread->trap_nr = X86_TRAP_PF;
157 156
158 memset(&info, 0, sizeof(info)); 157 memset(&info, 0, sizeof(info));
159 info.si_signo = SIGSEGV; 158 info.si_signo = SIGSEGV;
@@ -217,9 +216,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
217 current_thread_info()->sig_on_uaccess_error = 1; 216 current_thread_info()->sig_on_uaccess_error = 1;
218 217
219 /* 218 /*
220 * 0 is a valid user pointer (in the access_ok sense) on 32-bit and 219 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
221 * 64-bit, so we don't need to special-case it here. For all the 220 * 64-bit, so we don't need to special-case it here. For all the
222 * vsyscalls, 0 means "don't write anything" not "write it at 221 * vsyscalls, NULL means "don't write anything" not "write it at
223 * address 0". 222 * address 0".
224 */ 223 */
225 ret = -EFAULT; 224 ret = -EFAULT;
@@ -248,7 +247,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
248 247
249 ret = sys_getcpu((unsigned __user *)regs->di, 248 ret = sys_getcpu((unsigned __user *)regs->di,
250 (unsigned __user *)regs->si, 249 (unsigned __user *)regs->si,
251 0); 250 NULL);
252 break; 251 break;
253 } 252 }
254 253
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 947a06ccc673..e9f265fd79ae 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -91,6 +91,7 @@ struct x86_init_ops x86_init __initdata = {
91}; 91};
92 92
93struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 93struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
94 .early_percpu_clock_init = x86_init_noop,
94 .setup_percpu_clockev = setup_secondary_APIC_clock, 95 .setup_percpu_clockev = setup_secondary_APIC_clock,
95 .fixup_cpu_id = x86_default_fixup_cpu_id, 96 .fixup_cpu_id = x86_default_fixup_cpu_id,
96}; 97};
@@ -107,7 +108,9 @@ struct x86_platform_ops x86_platform = {
107 .is_untracked_pat_range = is_ISA_range, 108 .is_untracked_pat_range = is_ISA_range,
108 .nmi_init = default_nmi_init, 109 .nmi_init = default_nmi_init,
109 .get_nmi_reason = default_get_nmi_reason, 110 .get_nmi_reason = default_get_nmi_reason,
110 .i8042_detect = default_i8042_detect 111 .i8042_detect = default_i8042_detect,
112 .save_sched_clock_state = tsc_save_sched_clock_state,
113 .restore_sched_clock_state = tsc_restore_sched_clock_state,
111}; 114};
112 115
113EXPORT_SYMBOL_GPL(x86_platform); 116EXPORT_SYMBOL_GPL(x86_platform);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a3911343976b..e62728e30b01 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -6,6 +6,7 @@
6#include <linux/bootmem.h> 6#include <linux/bootmem.h>
7#include <linux/compat.h> 7#include <linux/compat.h>
8#include <asm/i387.h> 8#include <asm/i387.h>
9#include <asm/fpu-internal.h>
9#ifdef CONFIG_IA32_EMULATION 10#ifdef CONFIG_IA32_EMULATION
10#include <asm/sigcontext32.h> 11#include <asm/sigcontext32.h>
11#endif 12#endif
@@ -47,7 +48,7 @@ void __sanitize_i387_state(struct task_struct *tsk)
47 if (!fx) 48 if (!fx)
48 return; 49 return;
49 50
50 BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU); 51 BUG_ON(__thread_has_fpu(tsk));
51 52
52 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; 53 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
53 54
@@ -168,7 +169,7 @@ int save_i387_xstate(void __user *buf)
168 if (!used_math()) 169 if (!used_math())
169 return 0; 170 return 0;
170 171
171 if (task_thread_info(tsk)->status & TS_USEDFPU) { 172 if (user_has_fpu()) {
172 if (use_xsave()) 173 if (use_xsave())
173 err = xsave_user(buf); 174 err = xsave_user(buf);
174 else 175 else
@@ -176,8 +177,7 @@ int save_i387_xstate(void __user *buf)
176 177
177 if (err) 178 if (err)
178 return err; 179 return err;
179 task_thread_info(tsk)->status &= ~TS_USEDFPU; 180 user_fpu_end();
180 stts();
181 } else { 181 } else {
182 sanitize_i387_state(tsk); 182 sanitize_i387_state(tsk);
183 if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, 183 if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
@@ -292,10 +292,7 @@ int restore_i387_xstate(void __user *buf)
292 return err; 292 return err;
293 } 293 }
294 294
295 if (!(task_thread_info(current)->status & TS_USEDFPU)) { 295 user_fpu_begin();
296 clts();
297 task_thread_info(current)->status |= TS_USEDFPU;
298 }
299 if (use_xsave()) 296 if (use_xsave())
300 err = restore_user_xstate(buf); 297 err = restore_user_xstate(buf);
301 else 298 else
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 89b02bfaaca5..9fed5bedaad6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -236,7 +236,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
236 const u32 kvm_supported_word6_x86_features = 236 const u32 kvm_supported_word6_x86_features =
237 F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | 237 F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
238 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | 238 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
239 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | 239 F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
240 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); 240 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
241 241
242 /* cpuid 0xC0000001.edx */ 242 /* cpuid 0xC0000001.edx */
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 5b97e1797a6d..26d1fb437eb5 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -43,4 +43,12 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
43 return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); 43 return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
44} 44}
45 45
46static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
47{
48 struct kvm_cpuid_entry2 *best;
49
50 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
51 return best && (best->ecx & bit(X86_FEATURE_OSVW));
52}
53
46#endif 54#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 05a562b85025..83756223f8aa 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -57,6 +57,7 @@
57#define OpDS 23ull /* DS */ 57#define OpDS 23ull /* DS */
58#define OpFS 24ull /* FS */ 58#define OpFS 24ull /* FS */
59#define OpGS 25ull /* GS */ 59#define OpGS 25ull /* GS */
60#define OpMem8 26ull /* 8-bit zero extended memory operand */
60 61
61#define OpBits 5 /* Width of operand field */ 62#define OpBits 5 /* Width of operand field */
62#define OpMask ((1ull << OpBits) - 1) 63#define OpMask ((1ull << OpBits) - 1)
@@ -101,6 +102,7 @@
101#define SrcAcc (OpAcc << SrcShift) 102#define SrcAcc (OpAcc << SrcShift)
102#define SrcImmU16 (OpImmU16 << SrcShift) 103#define SrcImmU16 (OpImmU16 << SrcShift)
103#define SrcDX (OpDX << SrcShift) 104#define SrcDX (OpDX << SrcShift)
105#define SrcMem8 (OpMem8 << SrcShift)
104#define SrcMask (OpMask << SrcShift) 106#define SrcMask (OpMask << SrcShift)
105#define BitOp (1<<11) 107#define BitOp (1<<11)
106#define MemAbs (1<<12) /* Memory operand is absolute displacement */ 108#define MemAbs (1<<12) /* Memory operand is absolute displacement */
@@ -858,8 +860,7 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
858} 860}
859 861
860static void decode_register_operand(struct x86_emulate_ctxt *ctxt, 862static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
861 struct operand *op, 863 struct operand *op)
862 int inhibit_bytereg)
863{ 864{
864 unsigned reg = ctxt->modrm_reg; 865 unsigned reg = ctxt->modrm_reg;
865 int highbyte_regs = ctxt->rex_prefix == 0; 866 int highbyte_regs = ctxt->rex_prefix == 0;
@@ -876,7 +877,7 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
876 } 877 }
877 878
878 op->type = OP_REG; 879 op->type = OP_REG;
879 if ((ctxt->d & ByteOp) && !inhibit_bytereg) { 880 if (ctxt->d & ByteOp) {
880 op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); 881 op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs);
881 op->bytes = 1; 882 op->bytes = 1;
882 } else { 883 } else {
@@ -1151,6 +1152,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1151 return 1; 1152 return 1;
1152} 1153}
1153 1154
1155static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1156 u16 index, struct desc_struct *desc)
1157{
1158 struct desc_ptr dt;
1159 ulong addr;
1160
1161 ctxt->ops->get_idt(ctxt, &dt);
1162
1163 if (dt.size < index * 8 + 7)
1164 return emulate_gp(ctxt, index << 3 | 0x2);
1165
1166 addr = dt.address + index * 8;
1167 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
1168 &ctxt->exception);
1169}
1170
1154static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, 1171static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1155 u16 selector, struct desc_ptr *dt) 1172 u16 selector, struct desc_ptr *dt)
1156{ 1173{
@@ -1227,6 +1244,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1227 seg_desc.type = 3; 1244 seg_desc.type = 3;
1228 seg_desc.p = 1; 1245 seg_desc.p = 1;
1229 seg_desc.s = 1; 1246 seg_desc.s = 1;
1247 if (ctxt->mode == X86EMUL_MODE_VM86)
1248 seg_desc.dpl = 3;
1230 goto load; 1249 goto load;
1231 } 1250 }
1232 1251
@@ -1891,6 +1910,62 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1891 ss->p = 1; 1910 ss->p = 1;
1892} 1911}
1893 1912
1913static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
1914{
1915 u32 eax, ebx, ecx, edx;
1916
1917 eax = ecx = 0;
1918 return ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)
1919 && ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
1920 && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
1921 && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
1922}
1923
1924static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
1925{
1926 struct x86_emulate_ops *ops = ctxt->ops;
1927 u32 eax, ebx, ecx, edx;
1928
1929 /*
1930 * syscall should always be enabled in longmode - so only become
1931 * vendor specific (cpuid) if other modes are active...
1932 */
1933 if (ctxt->mode == X86EMUL_MODE_PROT64)
1934 return true;
1935
1936 eax = 0x00000000;
1937 ecx = 0x00000000;
1938 if (ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)) {
1939 /*
1940 * Intel ("GenuineIntel")
1941 * remark: Intel CPUs only support "syscall" in 64bit
1942 * longmode. Also an 64bit guest with a
1943 * 32bit compat-app running will #UD !! While this
1944 * behaviour can be fixed (by emulating) into AMD
1945 * response - CPUs of AMD can't behave like Intel.
1946 */
1947 if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
1948 ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
1949 edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
1950 return false;
1951
1952 /* AMD ("AuthenticAMD") */
1953 if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
1954 ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
1955 edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
1956 return true;
1957
1958 /* AMD ("AMDisbetter!") */
1959 if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
1960 ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
1961 edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
1962 return true;
1963 }
1964
1965 /* default: (not Intel, not AMD), apply Intel's stricter rules... */
1966 return false;
1967}
1968
1894static int em_syscall(struct x86_emulate_ctxt *ctxt) 1969static int em_syscall(struct x86_emulate_ctxt *ctxt)
1895{ 1970{
1896 struct x86_emulate_ops *ops = ctxt->ops; 1971 struct x86_emulate_ops *ops = ctxt->ops;
@@ -1904,9 +1979,15 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
1904 ctxt->mode == X86EMUL_MODE_VM86) 1979 ctxt->mode == X86EMUL_MODE_VM86)
1905 return emulate_ud(ctxt); 1980 return emulate_ud(ctxt);
1906 1981
1982 if (!(em_syscall_is_enabled(ctxt)))
1983 return emulate_ud(ctxt);
1984
1907 ops->get_msr(ctxt, MSR_EFER, &efer); 1985 ops->get_msr(ctxt, MSR_EFER, &efer);
1908 setup_syscalls_segments(ctxt, &cs, &ss); 1986 setup_syscalls_segments(ctxt, &cs, &ss);
1909 1987
1988 if (!(efer & EFER_SCE))
1989 return emulate_ud(ctxt);
1990
1910 ops->get_msr(ctxt, MSR_STAR, &msr_data); 1991 ops->get_msr(ctxt, MSR_STAR, &msr_data);
1911 msr_data >>= 32; 1992 msr_data >>= 32;
1912 cs_sel = (u16)(msr_data & 0xfffc); 1993 cs_sel = (u16)(msr_data & 0xfffc);
@@ -1956,6 +2037,14 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
1956 if (ctxt->mode == X86EMUL_MODE_REAL) 2037 if (ctxt->mode == X86EMUL_MODE_REAL)
1957 return emulate_gp(ctxt, 0); 2038 return emulate_gp(ctxt, 0);
1958 2039
2040 /*
2041 * Not recognized on AMD in compat mode (but is recognized in legacy
2042 * mode).
2043 */
2044 if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA)
2045 && !vendor_intel(ctxt))
2046 return emulate_ud(ctxt);
2047
1959 /* XXX sysenter/sysexit have not been tested in 64bit mode. 2048 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1960 * Therefore, we inject an #UD. 2049 * Therefore, we inject an #UD.
1961 */ 2050 */
@@ -2255,6 +2344,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2255 return emulate_gp(ctxt, 0); 2344 return emulate_gp(ctxt, 0);
2256 ctxt->_eip = tss->eip; 2345 ctxt->_eip = tss->eip;
2257 ctxt->eflags = tss->eflags | 2; 2346 ctxt->eflags = tss->eflags | 2;
2347
2348 /* General purpose registers */
2258 ctxt->regs[VCPU_REGS_RAX] = tss->eax; 2349 ctxt->regs[VCPU_REGS_RAX] = tss->eax;
2259 ctxt->regs[VCPU_REGS_RCX] = tss->ecx; 2350 ctxt->regs[VCPU_REGS_RCX] = tss->ecx;
2260 ctxt->regs[VCPU_REGS_RDX] = tss->edx; 2351 ctxt->regs[VCPU_REGS_RDX] = tss->edx;
@@ -2277,6 +2368,24 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2277 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); 2368 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2278 2369
2279 /* 2370 /*
2371 * If we're switching between Protected Mode and VM86, we need to make
2372 * sure to update the mode before loading the segment descriptors so
2373 * that the selectors are interpreted correctly.
2374 *
2375 * Need to get rflags to the vcpu struct immediately because it
2376 * influences the CPL which is checked at least when loading the segment
2377 * descriptors and when pushing an error code to the new kernel stack.
2378 *
2379 * TODO Introduce a separate ctxt->ops->set_cpl callback
2380 */
2381 if (ctxt->eflags & X86_EFLAGS_VM)
2382 ctxt->mode = X86EMUL_MODE_VM86;
2383 else
2384 ctxt->mode = X86EMUL_MODE_PROT32;
2385
2386 ctxt->ops->set_rflags(ctxt, ctxt->eflags);
2387
2388 /*
2280 * Now load segment descriptors. If fault happenes at this stage 2389 * Now load segment descriptors. If fault happenes at this stage
2281 * it is handled in a context of new task 2390 * it is handled in a context of new task
2282 */ 2391 */
@@ -2350,7 +2459,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2350} 2459}
2351 2460
2352static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, 2461static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2353 u16 tss_selector, int reason, 2462 u16 tss_selector, int idt_index, int reason,
2354 bool has_error_code, u32 error_code) 2463 bool has_error_code, u32 error_code)
2355{ 2464{
2356 struct x86_emulate_ops *ops = ctxt->ops; 2465 struct x86_emulate_ops *ops = ctxt->ops;
@@ -2372,12 +2481,35 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2372 2481
2373 /* FIXME: check that next_tss_desc is tss */ 2482 /* FIXME: check that next_tss_desc is tss */
2374 2483
2375 if (reason != TASK_SWITCH_IRET) { 2484 /*
2376 if ((tss_selector & 3) > next_tss_desc.dpl || 2485 * Check privileges. The three cases are task switch caused by...
2377 ops->cpl(ctxt) > next_tss_desc.dpl) 2486 *
2378 return emulate_gp(ctxt, 0); 2487 * 1. jmp/call/int to task gate: Check against DPL of the task gate
2488 * 2. Exception/IRQ/iret: No check is performed
2489 * 3. jmp/call to TSS: Check agains DPL of the TSS
2490 */
2491 if (reason == TASK_SWITCH_GATE) {
2492 if (idt_index != -1) {
2493 /* Software interrupts */
2494 struct desc_struct task_gate_desc;
2495 int dpl;
2496
2497 ret = read_interrupt_descriptor(ctxt, idt_index,
2498 &task_gate_desc);
2499 if (ret != X86EMUL_CONTINUE)
2500 return ret;
2501
2502 dpl = task_gate_desc.dpl;
2503 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2504 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2505 }
2506 } else if (reason != TASK_SWITCH_IRET) {
2507 int dpl = next_tss_desc.dpl;
2508 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2509 return emulate_gp(ctxt, tss_selector);
2379 } 2510 }
2380 2511
2512
2381 desc_limit = desc_limit_scaled(&next_tss_desc); 2513 desc_limit = desc_limit_scaled(&next_tss_desc);
2382 if (!next_tss_desc.p || 2514 if (!next_tss_desc.p ||
2383 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || 2515 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
@@ -2430,7 +2562,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2430} 2562}
2431 2563
2432int emulator_task_switch(struct x86_emulate_ctxt *ctxt, 2564int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2433 u16 tss_selector, int reason, 2565 u16 tss_selector, int idt_index, int reason,
2434 bool has_error_code, u32 error_code) 2566 bool has_error_code, u32 error_code)
2435{ 2567{
2436 int rc; 2568 int rc;
@@ -2438,7 +2570,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2438 ctxt->_eip = ctxt->eip; 2570 ctxt->_eip = ctxt->eip;
2439 ctxt->dst.type = OP_NONE; 2571 ctxt->dst.type = OP_NONE;
2440 2572
2441 rc = emulator_do_task_switch(ctxt, tss_selector, reason, 2573 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
2442 has_error_code, error_code); 2574 has_error_code, error_code);
2443 2575
2444 if (rc == X86EMUL_CONTINUE) 2576 if (rc == X86EMUL_CONTINUE)
@@ -3463,13 +3595,13 @@ static struct opcode twobyte_table[256] = {
3463 I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), 3595 I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
3464 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), 3596 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
3465 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), 3597 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
3466 D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 3598 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3467 /* 0xB8 - 0xBF */ 3599 /* 0xB8 - 0xBF */
3468 N, N, 3600 N, N,
3469 G(BitOp, group8), 3601 G(BitOp, group8),
3470 I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), 3602 I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
3471 I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), 3603 I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr),
3472 D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 3604 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3473 /* 0xC0 - 0xCF */ 3605 /* 0xC0 - 0xCF */
3474 D2bv(DstMem | SrcReg | ModRM | Lock), 3606 D2bv(DstMem | SrcReg | ModRM | Lock),
3475 N, D(DstMem | SrcReg | ModRM | Mov), 3607 N, D(DstMem | SrcReg | ModRM | Mov),
@@ -3551,9 +3683,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
3551 3683
3552 switch (d) { 3684 switch (d) {
3553 case OpReg: 3685 case OpReg:
3554 decode_register_operand(ctxt, op, 3686 decode_register_operand(ctxt, op);
3555 op == &ctxt->dst &&
3556 ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7));
3557 break; 3687 break;
3558 case OpImmUByte: 3688 case OpImmUByte:
3559 rc = decode_imm(ctxt, op, 1, false); 3689 rc = decode_imm(ctxt, op, 1, false);
@@ -3605,6 +3735,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
3605 case OpImm: 3735 case OpImm:
3606 rc = decode_imm(ctxt, op, imm_size(ctxt), true); 3736 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
3607 break; 3737 break;
3738 case OpMem8:
3739 ctxt->memop.bytes = 1;
3740 goto mem_common;
3608 case OpMem16: 3741 case OpMem16:
3609 ctxt->memop.bytes = 2; 3742 ctxt->memop.bytes = 2;
3610 goto mem_common; 3743 goto mem_common;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index b6a73537e1ef..81cf4fa4a2be 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -307,6 +307,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
307 if (val & 0x10) { 307 if (val & 0x10) {
308 s->init4 = val & 1; 308 s->init4 = val & 1;
309 s->last_irr = 0; 309 s->last_irr = 0;
310 s->irr &= s->elcr;
310 s->imr = 0; 311 s->imr = 0;
311 s->priority_add = 0; 312 s->priority_add = 0;
312 s->special_mask = 0; 313 s->special_mask = 0;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cfdc6e0ef002..858432287ab6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -433,7 +433,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
433 break; 433 break;
434 434
435 case APIC_DM_INIT: 435 case APIC_DM_INIT:
436 if (level) { 436 if (!trig_mode || level) {
437 result = 1; 437 result = 1;
438 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 438 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
439 kvm_make_request(KVM_REQ_EVENT, vcpu); 439 kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -731,7 +731,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
731 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 731 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
732 u64 ns = 0; 732 u64 ns = 0;
733 struct kvm_vcpu *vcpu = apic->vcpu; 733 struct kvm_vcpu *vcpu = apic->vcpu;
734 unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); 734 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
735 unsigned long flags; 735 unsigned long flags;
736 736
737 if (unlikely(!tscdeadline || !this_tsc_khz)) 737 if (unlikely(!tscdeadline || !this_tsc_khz))
@@ -1283,9 +1283,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
1283 if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) 1283 if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
1284 return; 1284 return;
1285 1285
1286 vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0); 1286 vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
1287 data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); 1287 data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
1288 kunmap_atomic(vapic, KM_USER0); 1288 kunmap_atomic(vapic);
1289 1289
1290 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1290 apic_set_tpr(vcpu->arch.apic, data & 0xff);
1291} 1291}
@@ -1310,9 +1310,9 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
1310 max_isr = 0; 1310 max_isr = 0;
1311 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 1311 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
1312 1312
1313 vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0); 1313 vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
1314 *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; 1314 *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
1315 kunmap_atomic(vapic, KM_USER0); 1315 kunmap_atomic(vapic);
1316} 1316}
1317 1317
1318void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1318void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2a2a9b40db19..4cb164268846 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -74,7 +74,7 @@ enum {
74#endif 74#endif
75 75
76#ifdef MMU_DEBUG 76#ifdef MMU_DEBUG
77static int dbg = 0; 77static bool dbg = 0;
78module_param(dbg, bool, 0644); 78module_param(dbg, bool, 0644);
79#endif 79#endif
80 80
@@ -688,9 +688,8 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
688{ 688{
689 unsigned long idx; 689 unsigned long idx;
690 690
691 idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - 691 idx = gfn_to_index(gfn, slot->base_gfn, level);
692 (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); 692 return &slot->arch.lpage_info[level - 2][idx];
693 return &slot->lpage_info[level - 2][idx];
694} 693}
695 694
696static void account_shadowed(struct kvm *kvm, gfn_t gfn) 695static void account_shadowed(struct kvm *kvm, gfn_t gfn)
@@ -946,7 +945,7 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
946 } 945 }
947} 946}
948 947
949static unsigned long *__gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level, 948static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
950 struct kvm_memory_slot *slot) 949 struct kvm_memory_slot *slot)
951{ 950{
952 struct kvm_lpage_info *linfo; 951 struct kvm_lpage_info *linfo;
@@ -966,7 +965,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
966 struct kvm_memory_slot *slot; 965 struct kvm_memory_slot *slot;
967 966
968 slot = gfn_to_memslot(kvm, gfn); 967 slot = gfn_to_memslot(kvm, gfn);
969 return __gfn_to_rmap(kvm, gfn, level, slot); 968 return __gfn_to_rmap(gfn, level, slot);
970} 969}
971 970
972static bool rmap_can_add(struct kvm_vcpu *vcpu) 971static bool rmap_can_add(struct kvm_vcpu *vcpu)
@@ -988,7 +987,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
988 return pte_list_add(vcpu, spte, rmapp); 987 return pte_list_add(vcpu, spte, rmapp);
989} 988}
990 989
991static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) 990static u64 *rmap_next(unsigned long *rmapp, u64 *spte)
992{ 991{
993 return pte_list_next(rmapp, spte); 992 return pte_list_next(rmapp, spte);
994} 993}
@@ -1018,8 +1017,8 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
1018 u64 *spte; 1017 u64 *spte;
1019 int i, write_protected = 0; 1018 int i, write_protected = 0;
1020 1019
1021 rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot); 1020 rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot);
1022 spte = rmap_next(kvm, rmapp, NULL); 1021 spte = rmap_next(rmapp, NULL);
1023 while (spte) { 1022 while (spte) {
1024 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1023 BUG_ON(!(*spte & PT_PRESENT_MASK));
1025 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 1024 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
@@ -1027,14 +1026,14 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
1027 mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); 1026 mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK);
1028 write_protected = 1; 1027 write_protected = 1;
1029 } 1028 }
1030 spte = rmap_next(kvm, rmapp, spte); 1029 spte = rmap_next(rmapp, spte);
1031 } 1030 }
1032 1031
1033 /* check for huge page mappings */ 1032 /* check for huge page mappings */
1034 for (i = PT_DIRECTORY_LEVEL; 1033 for (i = PT_DIRECTORY_LEVEL;
1035 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 1034 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
1036 rmapp = __gfn_to_rmap(kvm, gfn, i, slot); 1035 rmapp = __gfn_to_rmap(gfn, i, slot);
1037 spte = rmap_next(kvm, rmapp, NULL); 1036 spte = rmap_next(rmapp, NULL);
1038 while (spte) { 1037 while (spte) {
1039 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1038 BUG_ON(!(*spte & PT_PRESENT_MASK));
1040 BUG_ON(!is_large_pte(*spte)); 1039 BUG_ON(!is_large_pte(*spte));
@@ -1045,7 +1044,7 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
1045 spte = NULL; 1044 spte = NULL;
1046 write_protected = 1; 1045 write_protected = 1;
1047 } 1046 }
1048 spte = rmap_next(kvm, rmapp, spte); 1047 spte = rmap_next(rmapp, spte);
1049 } 1048 }
1050 } 1049 }
1051 1050
@@ -1066,7 +1065,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
1066 u64 *spte; 1065 u64 *spte;
1067 int need_tlb_flush = 0; 1066 int need_tlb_flush = 0;
1068 1067
1069 while ((spte = rmap_next(kvm, rmapp, NULL))) { 1068 while ((spte = rmap_next(rmapp, NULL))) {
1070 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1069 BUG_ON(!(*spte & PT_PRESENT_MASK));
1071 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); 1070 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
1072 drop_spte(kvm, spte); 1071 drop_spte(kvm, spte);
@@ -1085,14 +1084,14 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
1085 1084
1086 WARN_ON(pte_huge(*ptep)); 1085 WARN_ON(pte_huge(*ptep));
1087 new_pfn = pte_pfn(*ptep); 1086 new_pfn = pte_pfn(*ptep);
1088 spte = rmap_next(kvm, rmapp, NULL); 1087 spte = rmap_next(rmapp, NULL);
1089 while (spte) { 1088 while (spte) {
1090 BUG_ON(!is_shadow_present_pte(*spte)); 1089 BUG_ON(!is_shadow_present_pte(*spte));
1091 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); 1090 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
1092 need_flush = 1; 1091 need_flush = 1;
1093 if (pte_write(*ptep)) { 1092 if (pte_write(*ptep)) {
1094 drop_spte(kvm, spte); 1093 drop_spte(kvm, spte);
1095 spte = rmap_next(kvm, rmapp, NULL); 1094 spte = rmap_next(rmapp, NULL);
1096 } else { 1095 } else {
1097 new_spte = *spte &~ (PT64_BASE_ADDR_MASK); 1096 new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
1098 new_spte |= (u64)new_pfn << PAGE_SHIFT; 1097 new_spte |= (u64)new_pfn << PAGE_SHIFT;
@@ -1102,7 +1101,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
1102 new_spte &= ~shadow_accessed_mask; 1101 new_spte &= ~shadow_accessed_mask;
1103 mmu_spte_clear_track_bits(spte); 1102 mmu_spte_clear_track_bits(spte);
1104 mmu_spte_set(spte, new_spte); 1103 mmu_spte_set(spte, new_spte);
1105 spte = rmap_next(kvm, rmapp, spte); 1104 spte = rmap_next(rmapp, spte);
1106 } 1105 }
1107 } 1106 }
1108 if (need_flush) 1107 if (need_flush)
@@ -1176,7 +1175,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1176 if (!shadow_accessed_mask) 1175 if (!shadow_accessed_mask)
1177 return kvm_unmap_rmapp(kvm, rmapp, data); 1176 return kvm_unmap_rmapp(kvm, rmapp, data);
1178 1177
1179 spte = rmap_next(kvm, rmapp, NULL); 1178 spte = rmap_next(rmapp, NULL);
1180 while (spte) { 1179 while (spte) {
1181 int _young; 1180 int _young;
1182 u64 _spte = *spte; 1181 u64 _spte = *spte;
@@ -1186,7 +1185,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1186 young = 1; 1185 young = 1;
1187 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); 1186 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
1188 } 1187 }
1189 spte = rmap_next(kvm, rmapp, spte); 1188 spte = rmap_next(rmapp, spte);
1190 } 1189 }
1191 return young; 1190 return young;
1192} 1191}
@@ -1205,7 +1204,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1205 if (!shadow_accessed_mask) 1204 if (!shadow_accessed_mask)
1206 goto out; 1205 goto out;
1207 1206
1208 spte = rmap_next(kvm, rmapp, NULL); 1207 spte = rmap_next(rmapp, NULL);
1209 while (spte) { 1208 while (spte) {
1210 u64 _spte = *spte; 1209 u64 _spte = *spte;
1211 BUG_ON(!(_spte & PT_PRESENT_MASK)); 1210 BUG_ON(!(_spte & PT_PRESENT_MASK));
@@ -1214,7 +1213,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1214 young = 1; 1213 young = 1;
1215 break; 1214 break;
1216 } 1215 }
1217 spte = rmap_next(kvm, rmapp, spte); 1216 spte = rmap_next(rmapp, spte);
1218 } 1217 }
1219out: 1218out:
1220 return young; 1219 return young;
@@ -1391,11 +1390,6 @@ struct kvm_mmu_pages {
1391 unsigned int nr; 1390 unsigned int nr;
1392}; 1391};
1393 1392
1394#define for_each_unsync_children(bitmap, idx) \
1395 for (idx = find_first_bit(bitmap, 512); \
1396 idx < 512; \
1397 idx = find_next_bit(bitmap, 512, idx+1))
1398
1399static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, 1393static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
1400 int idx) 1394 int idx)
1401{ 1395{
@@ -1417,7 +1411,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
1417{ 1411{
1418 int i, ret, nr_unsync_leaf = 0; 1412 int i, ret, nr_unsync_leaf = 0;
1419 1413
1420 for_each_unsync_children(sp->unsync_child_bitmap, i) { 1414 for_each_set_bit(i, sp->unsync_child_bitmap, 512) {
1421 struct kvm_mmu_page *child; 1415 struct kvm_mmu_page *child;
1422 u64 ent = sp->spt[i]; 1416 u64 ent = sp->spt[i];
1423 1417
@@ -1803,6 +1797,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1803{ 1797{
1804 if (is_large_pte(*sptep)) { 1798 if (is_large_pte(*sptep)) {
1805 drop_spte(vcpu->kvm, sptep); 1799 drop_spte(vcpu->kvm, sptep);
1800 --vcpu->kvm->stat.lpages;
1806 kvm_flush_remote_tlbs(vcpu->kvm); 1801 kvm_flush_remote_tlbs(vcpu->kvm);
1807 } 1802 }
1808} 1803}
@@ -3190,15 +3185,14 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
3190#undef PTTYPE 3185#undef PTTYPE
3191 3186
3192static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, 3187static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3193 struct kvm_mmu *context, 3188 struct kvm_mmu *context)
3194 int level)
3195{ 3189{
3196 int maxphyaddr = cpuid_maxphyaddr(vcpu); 3190 int maxphyaddr = cpuid_maxphyaddr(vcpu);
3197 u64 exb_bit_rsvd = 0; 3191 u64 exb_bit_rsvd = 0;
3198 3192
3199 if (!context->nx) 3193 if (!context->nx)
3200 exb_bit_rsvd = rsvd_bits(63, 63); 3194 exb_bit_rsvd = rsvd_bits(63, 63);
3201 switch (level) { 3195 switch (context->root_level) {
3202 case PT32_ROOT_LEVEL: 3196 case PT32_ROOT_LEVEL:
3203 /* no rsvd bits for 2 level 4K page table entries */ 3197 /* no rsvd bits for 2 level 4K page table entries */
3204 context->rsvd_bits_mask[0][1] = 0; 3198 context->rsvd_bits_mask[0][1] = 0;
@@ -3256,8 +3250,9 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
3256 int level) 3250 int level)
3257{ 3251{
3258 context->nx = is_nx(vcpu); 3252 context->nx = is_nx(vcpu);
3253 context->root_level = level;
3259 3254
3260 reset_rsvds_bits_mask(vcpu, context, level); 3255 reset_rsvds_bits_mask(vcpu, context);
3261 3256
3262 ASSERT(is_pae(vcpu)); 3257 ASSERT(is_pae(vcpu));
3263 context->new_cr3 = paging_new_cr3; 3258 context->new_cr3 = paging_new_cr3;
@@ -3267,7 +3262,6 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
3267 context->invlpg = paging64_invlpg; 3262 context->invlpg = paging64_invlpg;
3268 context->update_pte = paging64_update_pte; 3263 context->update_pte = paging64_update_pte;
3269 context->free = paging_free; 3264 context->free = paging_free;
3270 context->root_level = level;
3271 context->shadow_root_level = level; 3265 context->shadow_root_level = level;
3272 context->root_hpa = INVALID_PAGE; 3266 context->root_hpa = INVALID_PAGE;
3273 context->direct_map = false; 3267 context->direct_map = false;
@@ -3284,8 +3278,9 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
3284 struct kvm_mmu *context) 3278 struct kvm_mmu *context)
3285{ 3279{
3286 context->nx = false; 3280 context->nx = false;
3281 context->root_level = PT32_ROOT_LEVEL;
3287 3282
3288 reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); 3283 reset_rsvds_bits_mask(vcpu, context);
3289 3284
3290 context->new_cr3 = paging_new_cr3; 3285 context->new_cr3 = paging_new_cr3;
3291 context->page_fault = paging32_page_fault; 3286 context->page_fault = paging32_page_fault;
@@ -3294,7 +3289,6 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
3294 context->sync_page = paging32_sync_page; 3289 context->sync_page = paging32_sync_page;
3295 context->invlpg = paging32_invlpg; 3290 context->invlpg = paging32_invlpg;
3296 context->update_pte = paging32_update_pte; 3291 context->update_pte = paging32_update_pte;
3297 context->root_level = PT32_ROOT_LEVEL;
3298 context->shadow_root_level = PT32E_ROOT_LEVEL; 3292 context->shadow_root_level = PT32E_ROOT_LEVEL;
3299 context->root_hpa = INVALID_PAGE; 3293 context->root_hpa = INVALID_PAGE;
3300 context->direct_map = false; 3294 context->direct_map = false;
@@ -3325,7 +3319,6 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3325 context->get_cr3 = get_cr3; 3319 context->get_cr3 = get_cr3;
3326 context->get_pdptr = kvm_pdptr_read; 3320 context->get_pdptr = kvm_pdptr_read;
3327 context->inject_page_fault = kvm_inject_page_fault; 3321 context->inject_page_fault = kvm_inject_page_fault;
3328 context->nx = is_nx(vcpu);
3329 3322
3330 if (!is_paging(vcpu)) { 3323 if (!is_paging(vcpu)) {
3331 context->nx = false; 3324 context->nx = false;
@@ -3333,19 +3326,19 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3333 context->root_level = 0; 3326 context->root_level = 0;
3334 } else if (is_long_mode(vcpu)) { 3327 } else if (is_long_mode(vcpu)) {
3335 context->nx = is_nx(vcpu); 3328 context->nx = is_nx(vcpu);
3336 reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL);
3337 context->gva_to_gpa = paging64_gva_to_gpa;
3338 context->root_level = PT64_ROOT_LEVEL; 3329 context->root_level = PT64_ROOT_LEVEL;
3330 reset_rsvds_bits_mask(vcpu, context);
3331 context->gva_to_gpa = paging64_gva_to_gpa;
3339 } else if (is_pae(vcpu)) { 3332 } else if (is_pae(vcpu)) {
3340 context->nx = is_nx(vcpu); 3333 context->nx = is_nx(vcpu);
3341 reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL);
3342 context->gva_to_gpa = paging64_gva_to_gpa;
3343 context->root_level = PT32E_ROOT_LEVEL; 3334 context->root_level = PT32E_ROOT_LEVEL;
3335 reset_rsvds_bits_mask(vcpu, context);
3336 context->gva_to_gpa = paging64_gva_to_gpa;
3344 } else { 3337 } else {
3345 context->nx = false; 3338 context->nx = false;
3346 reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL);
3347 context->gva_to_gpa = paging32_gva_to_gpa;
3348 context->root_level = PT32_ROOT_LEVEL; 3339 context->root_level = PT32_ROOT_LEVEL;
3340 reset_rsvds_bits_mask(vcpu, context);
3341 context->gva_to_gpa = paging32_gva_to_gpa;
3349 } 3342 }
3350 3343
3351 return 0; 3344 return 0;
@@ -3408,18 +3401,18 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
3408 g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; 3401 g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
3409 } else if (is_long_mode(vcpu)) { 3402 } else if (is_long_mode(vcpu)) {
3410 g_context->nx = is_nx(vcpu); 3403 g_context->nx = is_nx(vcpu);
3411 reset_rsvds_bits_mask(vcpu, g_context, PT64_ROOT_LEVEL);
3412 g_context->root_level = PT64_ROOT_LEVEL; 3404 g_context->root_level = PT64_ROOT_LEVEL;
3405 reset_rsvds_bits_mask(vcpu, g_context);
3413 g_context->gva_to_gpa = paging64_gva_to_gpa_nested; 3406 g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
3414 } else if (is_pae(vcpu)) { 3407 } else if (is_pae(vcpu)) {
3415 g_context->nx = is_nx(vcpu); 3408 g_context->nx = is_nx(vcpu);
3416 reset_rsvds_bits_mask(vcpu, g_context, PT32E_ROOT_LEVEL);
3417 g_context->root_level = PT32E_ROOT_LEVEL; 3409 g_context->root_level = PT32E_ROOT_LEVEL;
3410 reset_rsvds_bits_mask(vcpu, g_context);
3418 g_context->gva_to_gpa = paging64_gva_to_gpa_nested; 3411 g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
3419 } else { 3412 } else {
3420 g_context->nx = false; 3413 g_context->nx = false;
3421 reset_rsvds_bits_mask(vcpu, g_context, PT32_ROOT_LEVEL);
3422 g_context->root_level = PT32_ROOT_LEVEL; 3414 g_context->root_level = PT32_ROOT_LEVEL;
3415 reset_rsvds_bits_mask(vcpu, g_context);
3423 g_context->gva_to_gpa = paging32_gva_to_gpa_nested; 3416 g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
3424 } 3417 }
3425 3418
@@ -3555,7 +3548,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
3555 * If we're seeing too many writes to a page, it may no longer be a page table, 3548 * If we're seeing too many writes to a page, it may no longer be a page table,
3556 * or we may be forking, in which case it is better to unmap the page. 3549 * or we may be forking, in which case it is better to unmap the page.
3557 */ 3550 */
3558static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte) 3551static bool detect_write_flooding(struct kvm_mmu_page *sp)
3559{ 3552{
3560 /* 3553 /*
3561 * Skip write-flooding detected for the sp whose level is 1, because 3554 * Skip write-flooding detected for the sp whose level is 1, because
@@ -3664,10 +3657,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3664 3657
3665 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; 3658 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
3666 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { 3659 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
3667 spte = get_written_sptes(sp, gpa, &npte);
3668
3669 if (detect_write_misaligned(sp, gpa, bytes) || 3660 if (detect_write_misaligned(sp, gpa, bytes) ||
3670 detect_write_flooding(sp, spte)) { 3661 detect_write_flooding(sp)) {
3671 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, 3662 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
3672 &invalid_list); 3663 &invalid_list);
3673 ++vcpu->kvm->stat.mmu_flooded; 3664 ++vcpu->kvm->stat.mmu_flooded;
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index fe15dcc07a6b..715da5a19a5b 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -200,13 +200,13 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
200 slot = gfn_to_memslot(kvm, sp->gfn); 200 slot = gfn_to_memslot(kvm, sp->gfn);
201 rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; 201 rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
202 202
203 spte = rmap_next(kvm, rmapp, NULL); 203 spte = rmap_next(rmapp, NULL);
204 while (spte) { 204 while (spte) {
205 if (is_writable_pte(*spte)) 205 if (is_writable_pte(*spte))
206 audit_printk(kvm, "shadow page has writable " 206 audit_printk(kvm, "shadow page has writable "
207 "mappings: gfn %llx role %x\n", 207 "mappings: gfn %llx role %x\n",
208 sp->gfn, sp->role.word); 208 sp->gfn, sp->role.word);
209 spte = rmap_next(kvm, rmapp, spte); 209 spte = rmap_next(rmapp, spte);
210 } 210 }
211} 211}
212 212
@@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
234} 234}
235 235
236static bool mmu_audit; 236static bool mmu_audit;
237static struct jump_label_key mmu_audit_key; 237static struct static_key mmu_audit_key;
238 238
239static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) 239static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
240{ 240{
@@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
250 250
251static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) 251static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
252{ 252{
253 if (static_branch((&mmu_audit_key))) 253 if (static_key_false((&mmu_audit_key)))
254 __kvm_mmu_audit(vcpu, point); 254 __kvm_mmu_audit(vcpu, point);
255} 255}
256 256
@@ -259,7 +259,7 @@ static void mmu_audit_enable(void)
259 if (mmu_audit) 259 if (mmu_audit)
260 return; 260 return;
261 261
262 jump_label_inc(&mmu_audit_key); 262 static_key_slow_inc(&mmu_audit_key);
263 mmu_audit = true; 263 mmu_audit = true;
264} 264}
265 265
@@ -268,7 +268,7 @@ static void mmu_audit_disable(void)
268 if (!mmu_audit) 268 if (!mmu_audit)
269 return; 269 return;
270 270
271 jump_label_dec(&mmu_audit_key); 271 static_key_slow_dec(&mmu_audit_key);
272 mmu_audit = false; 272 mmu_audit = false;
273} 273}
274 274
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 15610285ebb6..df5a70311be8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -92,9 +92,9 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
92 if (unlikely(npages != 1)) 92 if (unlikely(npages != 1))
93 return -EFAULT; 93 return -EFAULT;
94 94
95 table = kmap_atomic(page, KM_USER0); 95 table = kmap_atomic(page);
96 ret = CMPXCHG(&table[index], orig_pte, new_pte); 96 ret = CMPXCHG(&table[index], orig_pte, new_pte);
97 kunmap_atomic(table, KM_USER0); 97 kunmap_atomic(table);
98 98
99 kvm_release_page_dirty(page); 99 kvm_release_page_dirty(page);
100 100
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 7aad5446f393..173df38dbda5 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -33,10 +33,11 @@ static struct kvm_arch_event_perf_mapping {
33 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, 33 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
34 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 34 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
35 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, 35 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
36 [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
36}; 37};
37 38
38/* mapping between fixed pmc index and arch_events array */ 39/* mapping between fixed pmc index and arch_events array */
39int fixed_pmc_events[] = {1, 0, 2}; 40int fixed_pmc_events[] = {1, 0, 7};
40 41
41static bool pmc_is_gp(struct kvm_pmc *pmc) 42static bool pmc_is_gp(struct kvm_pmc *pmc)
42{ 43{
@@ -210,6 +211,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
210 unsigned config, type = PERF_TYPE_RAW; 211 unsigned config, type = PERF_TYPE_RAW;
211 u8 event_select, unit_mask; 212 u8 event_select, unit_mask;
212 213
214 if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
215 printk_once("kvm pmu: pin control bit is ignored\n");
216
213 pmc->eventsel = eventsel; 217 pmc->eventsel = eventsel;
214 218
215 stop_counter(pmc); 219 stop_counter(pmc);
@@ -220,7 +224,7 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
220 event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; 224 event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
221 unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; 225 unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
222 226
223 if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE | 227 if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
224 ARCH_PERFMON_EVENTSEL_INV | 228 ARCH_PERFMON_EVENTSEL_INV |
225 ARCH_PERFMON_EVENTSEL_CMASK))) { 229 ARCH_PERFMON_EVENTSEL_CMASK))) {
226 config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, 230 config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
@@ -365,7 +369,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
365 case MSR_CORE_PERF_FIXED_CTR_CTRL: 369 case MSR_CORE_PERF_FIXED_CTR_CTRL:
366 if (pmu->fixed_ctr_ctrl == data) 370 if (pmu->fixed_ctr_ctrl == data)
367 return 0; 371 return 0;
368 if (!(data & 0xfffffffffffff444)) { 372 if (!(data & 0xfffffffffffff444ull)) {
369 reprogram_fixed_counters(pmu, data); 373 reprogram_fixed_counters(pmu, data);
370 return 0; 374 return 0;
371 } 375 }
@@ -413,7 +417,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
413 struct kvm_pmc *counters; 417 struct kvm_pmc *counters;
414 u64 ctr; 418 u64 ctr;
415 419
416 pmc &= (3u << 30) - 1; 420 pmc &= ~(3u << 30);
417 if (!fixed && pmc >= pmu->nr_arch_gp_counters) 421 if (!fixed && pmc >= pmu->nr_arch_gp_counters)
418 return 1; 422 return 1;
419 if (fixed && pmc >= pmu->nr_arch_fixed_counters) 423 if (fixed && pmc >= pmu->nr_arch_fixed_counters)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5fa553babe56..e334389e1c75 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -29,6 +29,7 @@
29#include <linux/ftrace_event.h> 29#include <linux/ftrace_event.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31 31
32#include <asm/perf_event.h>
32#include <asm/tlbflush.h> 33#include <asm/tlbflush.h>
33#include <asm/desc.h> 34#include <asm/desc.h>
34#include <asm/kvm_para.h> 35#include <asm/kvm_para.h>
@@ -110,6 +111,12 @@ struct nested_state {
110#define MSRPM_OFFSETS 16 111#define MSRPM_OFFSETS 16
111static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; 112static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
112 113
114/*
115 * Set osvw_len to higher value when updated Revision Guides
116 * are published and we know what the new status bits are
117 */
118static uint64_t osvw_len = 4, osvw_status;
119
113struct vcpu_svm { 120struct vcpu_svm {
114 struct kvm_vcpu vcpu; 121 struct kvm_vcpu vcpu;
115 struct vmcb *vmcb; 122 struct vmcb *vmcb;
@@ -176,11 +183,13 @@ static bool npt_enabled = true;
176#else 183#else
177static bool npt_enabled; 184static bool npt_enabled;
178#endif 185#endif
179static int npt = 1;
180 186
187/* allow nested paging (virtualized MMU) for all guests */
188static int npt = true;
181module_param(npt, int, S_IRUGO); 189module_param(npt, int, S_IRUGO);
182 190
183static int nested = 1; 191/* allow nested virtualization in KVM/SVM */
192static int nested = true;
184module_param(nested, int, S_IRUGO); 193module_param(nested, int, S_IRUGO);
185 194
186static void svm_flush_tlb(struct kvm_vcpu *vcpu); 195static void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -556,6 +565,27 @@ static void svm_init_erratum_383(void)
556 erratum_383_found = true; 565 erratum_383_found = true;
557} 566}
558 567
568static void svm_init_osvw(struct kvm_vcpu *vcpu)
569{
570 /*
571 * Guests should see errata 400 and 415 as fixed (assuming that
572 * HLT and IO instructions are intercepted).
573 */
574 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
575 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
576
577 /*
578 * By increasing VCPU's osvw.length to 3 we are telling the guest that
579 * all osvw.status bits inside that length, including bit 0 (which is
580 * reserved for erratum 298), are valid. However, if host processor's
581 * osvw_len is 0 then osvw_status[0] carries no information. We need to
582 * be conservative here and therefore we tell the guest that erratum 298
583 * is present (because we really don't know).
584 */
585 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
586 vcpu->arch.osvw.status |= 1;
587}
588
559static int has_svm(void) 589static int has_svm(void)
560{ 590{
561 const char *msg; 591 const char *msg;
@@ -575,6 +605,8 @@ static void svm_hardware_disable(void *garbage)
575 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); 605 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
576 606
577 cpu_svm_disable(); 607 cpu_svm_disable();
608
609 amd_pmu_disable_virt();
578} 610}
579 611
580static int svm_hardware_enable(void *garbage) 612static int svm_hardware_enable(void *garbage)
@@ -620,8 +652,40 @@ static int svm_hardware_enable(void *garbage)
620 __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; 652 __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
621 } 653 }
622 654
655
656 /*
657 * Get OSVW bits.
658 *
659 * Note that it is possible to have a system with mixed processor
660 * revisions and therefore different OSVW bits. If bits are not the same
661 * on different processors then choose the worst case (i.e. if erratum
662 * is present on one processor and not on another then assume that the
663 * erratum is present everywhere).
664 */
665 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
666 uint64_t len, status = 0;
667 int err;
668
669 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
670 if (!err)
671 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
672 &err);
673
674 if (err)
675 osvw_status = osvw_len = 0;
676 else {
677 if (len < osvw_len)
678 osvw_len = len;
679 osvw_status |= status;
680 osvw_status &= (1ULL << osvw_len) - 1;
681 }
682 } else
683 osvw_status = osvw_len = 0;
684
623 svm_init_erratum_383(); 685 svm_init_erratum_383();
624 686
687 amd_pmu_enable_virt();
688
625 return 0; 689 return 0;
626} 690}
627 691
@@ -905,20 +969,25 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
905 return _tsc; 969 return _tsc;
906} 970}
907 971
908static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) 972static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
909{ 973{
910 struct vcpu_svm *svm = to_svm(vcpu); 974 struct vcpu_svm *svm = to_svm(vcpu);
911 u64 ratio; 975 u64 ratio;
912 u64 khz; 976 u64 khz;
913 977
914 /* TSC scaling supported? */ 978 /* Guest TSC same frequency as host TSC? */
915 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) 979 if (!scale) {
980 svm->tsc_ratio = TSC_RATIO_DEFAULT;
916 return; 981 return;
982 }
917 983
918 /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ 984 /* TSC scaling supported? */
919 if (user_tsc_khz == 0) { 985 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
920 vcpu->arch.virtual_tsc_khz = 0; 986 if (user_tsc_khz > tsc_khz) {
921 svm->tsc_ratio = TSC_RATIO_DEFAULT; 987 vcpu->arch.tsc_catchup = 1;
988 vcpu->arch.tsc_always_catchup = 1;
989 } else
990 WARN(1, "user requested TSC rate below hardware speed\n");
922 return; 991 return;
923 } 992 }
924 993
@@ -933,7 +1002,6 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
933 user_tsc_khz); 1002 user_tsc_khz);
934 return; 1003 return;
935 } 1004 }
936 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
937 svm->tsc_ratio = ratio; 1005 svm->tsc_ratio = ratio;
938} 1006}
939 1007
@@ -953,10 +1021,14 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
953 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 1021 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
954} 1022}
955 1023
956static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) 1024static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
957{ 1025{
958 struct vcpu_svm *svm = to_svm(vcpu); 1026 struct vcpu_svm *svm = to_svm(vcpu);
959 1027
1028 WARN_ON(adjustment < 0);
1029 if (host)
1030 adjustment = svm_scale_tsc(vcpu, adjustment);
1031
960 svm->vmcb->control.tsc_offset += adjustment; 1032 svm->vmcb->control.tsc_offset += adjustment;
961 if (is_guest_mode(vcpu)) 1033 if (is_guest_mode(vcpu))
962 svm->nested.hsave->control.tsc_offset += adjustment; 1034 svm->nested.hsave->control.tsc_offset += adjustment;
@@ -1186,6 +1258,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1186 if (kvm_vcpu_is_bsp(&svm->vcpu)) 1258 if (kvm_vcpu_is_bsp(&svm->vcpu))
1187 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 1259 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1188 1260
1261 svm_init_osvw(&svm->vcpu);
1262
1189 return &svm->vcpu; 1263 return &svm->vcpu;
1190 1264
1191free_page4: 1265free_page4:
@@ -1263,6 +1337,21 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1263 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1337 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1264} 1338}
1265 1339
1340static void svm_update_cpl(struct kvm_vcpu *vcpu)
1341{
1342 struct vcpu_svm *svm = to_svm(vcpu);
1343 int cpl;
1344
1345 if (!is_protmode(vcpu))
1346 cpl = 0;
1347 else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
1348 cpl = 3;
1349 else
1350 cpl = svm->vmcb->save.cs.selector & 0x3;
1351
1352 svm->vmcb->save.cpl = cpl;
1353}
1354
1266static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 1355static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1267{ 1356{
1268 return to_svm(vcpu)->vmcb->save.rflags; 1357 return to_svm(vcpu)->vmcb->save.rflags;
@@ -1270,7 +1359,11 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1270 1359
1271static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1360static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1272{ 1361{
1362 unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags;
1363
1273 to_svm(vcpu)->vmcb->save.rflags = rflags; 1364 to_svm(vcpu)->vmcb->save.rflags = rflags;
1365 if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
1366 svm_update_cpl(vcpu);
1274} 1367}
1275 1368
1276static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 1369static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
@@ -1538,9 +1631,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
1538 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; 1631 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1539 } 1632 }
1540 if (seg == VCPU_SREG_CS) 1633 if (seg == VCPU_SREG_CS)
1541 svm->vmcb->save.cpl 1634 svm_update_cpl(vcpu);
1542 = (svm->vmcb->save.cs.attrib
1543 >> SVM_SELECTOR_DPL_SHIFT) & 3;
1544 1635
1545 mark_dirty(svm->vmcb, VMCB_SEG); 1636 mark_dirty(svm->vmcb, VMCB_SEG);
1546} 1637}
@@ -2730,7 +2821,10 @@ static int task_switch_interception(struct vcpu_svm *svm)
2730 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) 2821 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2731 skip_emulated_instruction(&svm->vcpu); 2822 skip_emulated_instruction(&svm->vcpu);
2732 2823
2733 if (kvm_task_switch(&svm->vcpu, tss_selector, reason, 2824 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
2825 int_vec = -1;
2826
2827 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
2734 has_error_code, error_code) == EMULATE_FAIL) { 2828 has_error_code, error_code) == EMULATE_FAIL) {
2735 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 2829 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2736 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 2830 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 906a7e84200f..ad85adfef843 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,29 +51,26 @@
51MODULE_AUTHOR("Qumranet"); 51MODULE_AUTHOR("Qumranet");
52MODULE_LICENSE("GPL"); 52MODULE_LICENSE("GPL");
53 53
54static int __read_mostly enable_vpid = 1; 54static bool __read_mostly enable_vpid = 1;
55module_param_named(vpid, enable_vpid, bool, 0444); 55module_param_named(vpid, enable_vpid, bool, 0444);
56 56
57static int __read_mostly flexpriority_enabled = 1; 57static bool __read_mostly flexpriority_enabled = 1;
58module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); 58module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
59 59
60static int __read_mostly enable_ept = 1; 60static bool __read_mostly enable_ept = 1;
61module_param_named(ept, enable_ept, bool, S_IRUGO); 61module_param_named(ept, enable_ept, bool, S_IRUGO);
62 62
63static int __read_mostly enable_unrestricted_guest = 1; 63static bool __read_mostly enable_unrestricted_guest = 1;
64module_param_named(unrestricted_guest, 64module_param_named(unrestricted_guest,
65 enable_unrestricted_guest, bool, S_IRUGO); 65 enable_unrestricted_guest, bool, S_IRUGO);
66 66
67static int __read_mostly emulate_invalid_guest_state = 0; 67static bool __read_mostly emulate_invalid_guest_state = 0;
68module_param(emulate_invalid_guest_state, bool, S_IRUGO); 68module_param(emulate_invalid_guest_state, bool, S_IRUGO);
69 69
70static int __read_mostly vmm_exclusive = 1; 70static bool __read_mostly vmm_exclusive = 1;
71module_param(vmm_exclusive, bool, S_IRUGO); 71module_param(vmm_exclusive, bool, S_IRUGO);
72 72
73static int __read_mostly yield_on_hlt = 1; 73static bool __read_mostly fasteoi = 1;
74module_param(yield_on_hlt, bool, S_IRUGO);
75
76static int __read_mostly fasteoi = 1;
77module_param(fasteoi, bool, S_IRUGO); 74module_param(fasteoi, bool, S_IRUGO);
78 75
79/* 76/*
@@ -81,7 +78,7 @@ module_param(fasteoi, bool, S_IRUGO);
81 * VMX and be a hypervisor for its own guests. If nested=0, guests may not 78 * VMX and be a hypervisor for its own guests. If nested=0, guests may not
82 * use VMX instructions. 79 * use VMX instructions.
83 */ 80 */
84static int __read_mostly nested = 0; 81static bool __read_mostly nested = 0;
85module_param(nested, bool, S_IRUGO); 82module_param(nested, bool, S_IRUGO);
86 83
87#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ 84#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
@@ -1457,7 +1454,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
1457#ifdef CONFIG_X86_64 1454#ifdef CONFIG_X86_64
1458 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 1455 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
1459#endif 1456#endif
1460 if (current_thread_info()->status & TS_USEDFPU) 1457 if (user_has_fpu())
1461 clts(); 1458 clts();
1462 load_gdt(&__get_cpu_var(host_gdt)); 1459 load_gdt(&__get_cpu_var(host_gdt));
1463} 1460}
@@ -1655,17 +1652,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
1655 vmx_set_interrupt_shadow(vcpu, 0); 1652 vmx_set_interrupt_shadow(vcpu, 0);
1656} 1653}
1657 1654
1658static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
1659{
1660 /* Ensure that we clear the HLT state in the VMCS. We don't need to
1661 * explicitly skip the instruction because if the HLT state is set, then
1662 * the instruction is already executing and RIP has already been
1663 * advanced. */
1664 if (!yield_on_hlt &&
1665 vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
1666 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
1667}
1668
1669/* 1655/*
1670 * KVM wants to inject page-faults which it got to the guest. This function 1656 * KVM wants to inject page-faults which it got to the guest. This function
1671 * checks whether in a nested guest, we need to inject them to L1 or L2. 1657 * checks whether in a nested guest, we need to inject them to L1 or L2.
@@ -1678,7 +1664,7 @@ static int nested_pf_handled(struct kvm_vcpu *vcpu)
1678 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 1664 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1679 1665
1680 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 1666 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
1681 if (!(vmcs12->exception_bitmap & PF_VECTOR)) 1667 if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
1682 return 0; 1668 return 0;
1683 1669
1684 nested_vmx_vmexit(vcpu); 1670 nested_vmx_vmexit(vcpu);
@@ -1718,7 +1704,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
1718 intr_info |= INTR_TYPE_HARD_EXCEPTION; 1704 intr_info |= INTR_TYPE_HARD_EXCEPTION;
1719 1705
1720 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 1706 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
1721 vmx_clear_hlt(vcpu);
1722} 1707}
1723 1708
1724static bool vmx_rdtscp_supported(void) 1709static bool vmx_rdtscp_supported(void)
@@ -1817,13 +1802,19 @@ u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
1817} 1802}
1818 1803
1819/* 1804/*
1820 * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ 1805 * Engage any workarounds for mis-matched TSC rates. Currently limited to
1821 * ioctl. In this case the call-back should update internal vmx state to make 1806 * software catchup for faster rates on slower CPUs.
1822 * the changes effective.
1823 */ 1807 */
1824static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) 1808static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1825{ 1809{
1826 /* Nothing to do here */ 1810 if (!scale)
1811 return;
1812
1813 if (user_tsc_khz > tsc_khz) {
1814 vcpu->arch.tsc_catchup = 1;
1815 vcpu->arch.tsc_always_catchup = 1;
1816 } else
1817 WARN(1, "user requested TSC rate below hardware speed\n");
1827} 1818}
1828 1819
1829/* 1820/*
@@ -1850,7 +1841,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1850 } 1841 }
1851} 1842}
1852 1843
1853static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) 1844static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
1854{ 1845{
1855 u64 offset = vmcs_read64(TSC_OFFSET); 1846 u64 offset = vmcs_read64(TSC_OFFSET);
1856 vmcs_write64(TSC_OFFSET, offset + adjustment); 1847 vmcs_write64(TSC_OFFSET, offset + adjustment);
@@ -2219,6 +2210,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
2219 msr = find_msr_entry(vmx, msr_index); 2210 msr = find_msr_entry(vmx, msr_index);
2220 if (msr) { 2211 if (msr) {
2221 msr->data = data; 2212 msr->data = data;
2213 if (msr - vmx->guest_msrs < vmx->save_nmsrs)
2214 kvm_set_shared_msr(msr->index, msr->data,
2215 msr->mask);
2222 break; 2216 break;
2223 } 2217 }
2224 ret = kvm_set_msr_common(vcpu, msr_index, data); 2218 ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -2399,7 +2393,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2399 &_pin_based_exec_control) < 0) 2393 &_pin_based_exec_control) < 0)
2400 return -EIO; 2394 return -EIO;
2401 2395
2402 min = 2396 min = CPU_BASED_HLT_EXITING |
2403#ifdef CONFIG_X86_64 2397#ifdef CONFIG_X86_64
2404 CPU_BASED_CR8_LOAD_EXITING | 2398 CPU_BASED_CR8_LOAD_EXITING |
2405 CPU_BASED_CR8_STORE_EXITING | 2399 CPU_BASED_CR8_STORE_EXITING |
@@ -2414,9 +2408,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2414 CPU_BASED_INVLPG_EXITING | 2408 CPU_BASED_INVLPG_EXITING |
2415 CPU_BASED_RDPMC_EXITING; 2409 CPU_BASED_RDPMC_EXITING;
2416 2410
2417 if (yield_on_hlt)
2418 min |= CPU_BASED_HLT_EXITING;
2419
2420 opt = CPU_BASED_TPR_SHADOW | 2411 opt = CPU_BASED_TPR_SHADOW |
2421 CPU_BASED_USE_MSR_BITMAPS | 2412 CPU_BASED_USE_MSR_BITMAPS |
2422 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 2413 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -3915,7 +3906,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
3915 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 3906 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
3916 3907
3917 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 3908 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
3909 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3918 vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ 3910 vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
3911 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3919 vmx_set_cr4(&vmx->vcpu, 0); 3912 vmx_set_cr4(&vmx->vcpu, 0);
3920 vmx_set_efer(&vmx->vcpu, 0); 3913 vmx_set_efer(&vmx->vcpu, 0);
3921 vmx_fpu_activate(&vmx->vcpu); 3914 vmx_fpu_activate(&vmx->vcpu);
@@ -4003,7 +3996,6 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
4003 } else 3996 } else
4004 intr |= INTR_TYPE_EXT_INTR; 3997 intr |= INTR_TYPE_EXT_INTR;
4005 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); 3998 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
4006 vmx_clear_hlt(vcpu);
4007} 3999}
4008 4000
4009static void vmx_inject_nmi(struct kvm_vcpu *vcpu) 4001static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -4035,7 +4027,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
4035 } 4027 }
4036 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 4028 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
4037 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 4029 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
4038 vmx_clear_hlt(vcpu);
4039} 4030}
4040 4031
4041static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 4032static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -4672,9 +4663,10 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
4672 bool has_error_code = false; 4663 bool has_error_code = false;
4673 u32 error_code = 0; 4664 u32 error_code = 0;
4674 u16 tss_selector; 4665 u16 tss_selector;
4675 int reason, type, idt_v; 4666 int reason, type, idt_v, idt_index;
4676 4667
4677 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); 4668 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
4669 idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
4678 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); 4670 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
4679 4671
4680 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 4672 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -4712,8 +4704,9 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
4712 type != INTR_TYPE_NMI_INTR)) 4704 type != INTR_TYPE_NMI_INTR))
4713 skip_emulated_instruction(vcpu); 4705 skip_emulated_instruction(vcpu);
4714 4706
4715 if (kvm_task_switch(vcpu, tss_selector, reason, 4707 if (kvm_task_switch(vcpu, tss_selector,
4716 has_error_code, error_code) == EMULATE_FAIL) { 4708 type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
4709 has_error_code, error_code) == EMULATE_FAIL) {
4717 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 4710 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4718 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 4711 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4719 vcpu->run->internal.ndata = 0; 4712 vcpu->run->internal.ndata = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1171def5f96b..4044ce0bf7c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -57,6 +57,7 @@
57#include <asm/mtrr.h> 57#include <asm/mtrr.h>
58#include <asm/mce.h> 58#include <asm/mce.h>
59#include <asm/i387.h> 59#include <asm/i387.h>
60#include <asm/fpu-internal.h> /* Ugh! */
60#include <asm/xcr.h> 61#include <asm/xcr.h>
61#include <asm/pvclock.h> 62#include <asm/pvclock.h>
62#include <asm/div64.h> 63#include <asm/div64.h>
@@ -88,14 +89,18 @@ static void process_nmi(struct kvm_vcpu *vcpu);
88struct kvm_x86_ops *kvm_x86_ops; 89struct kvm_x86_ops *kvm_x86_ops;
89EXPORT_SYMBOL_GPL(kvm_x86_ops); 90EXPORT_SYMBOL_GPL(kvm_x86_ops);
90 91
91int ignore_msrs = 0; 92static bool ignore_msrs = 0;
92module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); 93module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
93 94
94bool kvm_has_tsc_control; 95bool kvm_has_tsc_control;
95EXPORT_SYMBOL_GPL(kvm_has_tsc_control); 96EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
96u32 kvm_max_guest_tsc_khz; 97u32 kvm_max_guest_tsc_khz;
97EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); 98EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
98 99
100/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
101static u32 tsc_tolerance_ppm = 250;
102module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
103
99#define KVM_NR_SHARED_MSRS 16 104#define KVM_NR_SHARED_MSRS 16
100 105
101struct kvm_shared_msrs_global { 106struct kvm_shared_msrs_global {
@@ -968,50 +973,51 @@ static inline u64 get_kernel_ns(void)
968static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); 973static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
969unsigned long max_tsc_khz; 974unsigned long max_tsc_khz;
970 975
971static inline int kvm_tsc_changes_freq(void) 976static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
972{ 977{
973 int cpu = get_cpu(); 978 return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
974 int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && 979 vcpu->arch.virtual_tsc_shift);
975 cpufreq_quick_get(cpu) != 0;
976 put_cpu();
977 return ret;
978} 980}
979 981
980u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) 982static u32 adjust_tsc_khz(u32 khz, s32 ppm)
981{ 983{
982 if (vcpu->arch.virtual_tsc_khz) 984 u64 v = (u64)khz * (1000000 + ppm);
983 return vcpu->arch.virtual_tsc_khz; 985 do_div(v, 1000000);
984 else 986 return v;
985 return __this_cpu_read(cpu_tsc_khz);
986} 987}
987 988
988static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) 989static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
989{ 990{
990 u64 ret; 991 u32 thresh_lo, thresh_hi;
991 992 int use_scaling = 0;
992 WARN_ON(preemptible());
993 if (kvm_tsc_changes_freq())
994 printk_once(KERN_WARNING
995 "kvm: unreliable cycle conversion on adjustable rate TSC\n");
996 ret = nsec * vcpu_tsc_khz(vcpu);
997 do_div(ret, USEC_PER_SEC);
998 return ret;
999}
1000 993
1001static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1002{
1003 /* Compute a scale to convert nanoseconds in TSC cycles */ 994 /* Compute a scale to convert nanoseconds in TSC cycles */
1004 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, 995 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1005 &vcpu->arch.tsc_catchup_shift, 996 &vcpu->arch.virtual_tsc_shift,
1006 &vcpu->arch.tsc_catchup_mult); 997 &vcpu->arch.virtual_tsc_mult);
998 vcpu->arch.virtual_tsc_khz = this_tsc_khz;
999
1000 /*
1001 * Compute the variation in TSC rate which is acceptable
1002 * within the range of tolerance and decide if the
1003 * rate being applied is within that bounds of the hardware
1004 * rate. If so, no scaling or compensation need be done.
1005 */
1006 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1007 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1008 if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
1009 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
1010 use_scaling = 1;
1011 }
1012 kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
1007} 1013}
1008 1014
1009static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) 1015static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1010{ 1016{
1011 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, 1017 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1012 vcpu->arch.tsc_catchup_mult, 1018 vcpu->arch.virtual_tsc_mult,
1013 vcpu->arch.tsc_catchup_shift); 1019 vcpu->arch.virtual_tsc_shift);
1014 tsc += vcpu->arch.last_tsc_write; 1020 tsc += vcpu->arch.this_tsc_write;
1015 return tsc; 1021 return tsc;
1016} 1022}
1017 1023
@@ -1020,48 +1026,88 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1020 struct kvm *kvm = vcpu->kvm; 1026 struct kvm *kvm = vcpu->kvm;
1021 u64 offset, ns, elapsed; 1027 u64 offset, ns, elapsed;
1022 unsigned long flags; 1028 unsigned long flags;
1023 s64 sdiff; 1029 s64 usdiff;
1024 1030
1025 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1031 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1026 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); 1032 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1027 ns = get_kernel_ns(); 1033 ns = get_kernel_ns();
1028 elapsed = ns - kvm->arch.last_tsc_nsec; 1034 elapsed = ns - kvm->arch.last_tsc_nsec;
1029 sdiff = data - kvm->arch.last_tsc_write; 1035
1030 if (sdiff < 0) 1036 /* n.b - signed multiplication and division required */
1031 sdiff = -sdiff; 1037 usdiff = data - kvm->arch.last_tsc_write;
1038#ifdef CONFIG_X86_64
1039 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
1040#else
1041 /* do_div() only does unsigned */
1042 asm("idivl %2; xor %%edx, %%edx"
1043 : "=A"(usdiff)
1044 : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
1045#endif
1046 do_div(elapsed, 1000);
1047 usdiff -= elapsed;
1048 if (usdiff < 0)
1049 usdiff = -usdiff;
1032 1050
1033 /* 1051 /*
1034 * Special case: close write to TSC within 5 seconds of 1052 * Special case: TSC write with a small delta (1 second) of virtual
1035 * another CPU is interpreted as an attempt to synchronize 1053 * cycle time against real time is interpreted as an attempt to
1036 * The 5 seconds is to accommodate host load / swapping as 1054 * synchronize the CPU.
1037 * well as any reset of TSC during the boot process. 1055 *
1038 * 1056 * For a reliable TSC, we can match TSC offsets, and for an unstable
1039 * In that case, for a reliable TSC, we can match TSC offsets, 1057 * TSC, we add elapsed time in this computation. We could let the
1040 * or make a best guest using elapsed value. 1058 * compensation code attempt to catch up if we fall behind, but
1041 */ 1059 * it's better to try to match offsets from the beginning.
1042 if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && 1060 */
1043 elapsed < 5ULL * NSEC_PER_SEC) { 1061 if (usdiff < USEC_PER_SEC &&
1062 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1044 if (!check_tsc_unstable()) { 1063 if (!check_tsc_unstable()) {
1045 offset = kvm->arch.last_tsc_offset; 1064 offset = kvm->arch.cur_tsc_offset;
1046 pr_debug("kvm: matched tsc offset for %llu\n", data); 1065 pr_debug("kvm: matched tsc offset for %llu\n", data);
1047 } else { 1066 } else {
1048 u64 delta = nsec_to_cycles(vcpu, elapsed); 1067 u64 delta = nsec_to_cycles(vcpu, elapsed);
1049 offset += delta; 1068 data += delta;
1069 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1050 pr_debug("kvm: adjusted tsc offset by %llu\n", delta); 1070 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1051 } 1071 }
1052 ns = kvm->arch.last_tsc_nsec; 1072 } else {
1073 /*
1074 * We split periods of matched TSC writes into generations.
1075 * For each generation, we track the original measured
1076 * nanosecond time, offset, and write, so if TSCs are in
1077 * sync, we can match exact offset, and if not, we can match
1078 * exact software computaion in compute_guest_tsc()
1079 *
1080 * These values are tracked in kvm->arch.cur_xxx variables.
1081 */
1082 kvm->arch.cur_tsc_generation++;
1083 kvm->arch.cur_tsc_nsec = ns;
1084 kvm->arch.cur_tsc_write = data;
1085 kvm->arch.cur_tsc_offset = offset;
1086 pr_debug("kvm: new tsc generation %u, clock %llu\n",
1087 kvm->arch.cur_tsc_generation, data);
1053 } 1088 }
1089
1090 /*
1091 * We also track th most recent recorded KHZ, write and time to
1092 * allow the matching interval to be extended at each write.
1093 */
1054 kvm->arch.last_tsc_nsec = ns; 1094 kvm->arch.last_tsc_nsec = ns;
1055 kvm->arch.last_tsc_write = data; 1095 kvm->arch.last_tsc_write = data;
1056 kvm->arch.last_tsc_offset = offset; 1096 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1057 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1058 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1059 1097
1060 /* Reset of TSC must disable overshoot protection below */ 1098 /* Reset of TSC must disable overshoot protection below */
1061 vcpu->arch.hv_clock.tsc_timestamp = 0; 1099 vcpu->arch.hv_clock.tsc_timestamp = 0;
1062 vcpu->arch.last_tsc_write = data; 1100 vcpu->arch.last_guest_tsc = data;
1063 vcpu->arch.last_tsc_nsec = ns; 1101
1102 /* Keep track of which generation this VCPU has synchronized to */
1103 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1104 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1105 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1106
1107 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1108 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1064} 1109}
1110
1065EXPORT_SYMBOL_GPL(kvm_write_tsc); 1111EXPORT_SYMBOL_GPL(kvm_write_tsc);
1066 1112
1067static int kvm_guest_time_update(struct kvm_vcpu *v) 1113static int kvm_guest_time_update(struct kvm_vcpu *v)
@@ -1077,7 +1123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1077 local_irq_save(flags); 1123 local_irq_save(flags);
1078 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); 1124 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
1079 kernel_ns = get_kernel_ns(); 1125 kernel_ns = get_kernel_ns();
1080 this_tsc_khz = vcpu_tsc_khz(v); 1126 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1081 if (unlikely(this_tsc_khz == 0)) { 1127 if (unlikely(this_tsc_khz == 0)) {
1082 local_irq_restore(flags); 1128 local_irq_restore(flags);
1083 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); 1129 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -1097,7 +1143,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1097 if (vcpu->tsc_catchup) { 1143 if (vcpu->tsc_catchup) {
1098 u64 tsc = compute_guest_tsc(v, kernel_ns); 1144 u64 tsc = compute_guest_tsc(v, kernel_ns);
1099 if (tsc > tsc_timestamp) { 1145 if (tsc > tsc_timestamp) {
1100 kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp); 1146 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
1101 tsc_timestamp = tsc; 1147 tsc_timestamp = tsc;
1102 } 1148 }
1103 } 1149 }
@@ -1129,7 +1175,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1129 * observed by the guest and ensure the new system time is greater. 1175 * observed by the guest and ensure the new system time is greater.
1130 */ 1176 */
1131 max_kernel_ns = 0; 1177 max_kernel_ns = 0;
1132 if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) { 1178 if (vcpu->hv_clock.tsc_timestamp) {
1133 max_kernel_ns = vcpu->last_guest_tsc - 1179 max_kernel_ns = vcpu->last_guest_tsc -
1134 vcpu->hv_clock.tsc_timestamp; 1180 vcpu->hv_clock.tsc_timestamp;
1135 max_kernel_ns = pvclock_scale_delta(max_kernel_ns, 1181 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
@@ -1162,12 +1208,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1162 */ 1208 */
1163 vcpu->hv_clock.version += 2; 1209 vcpu->hv_clock.version += 2;
1164 1210
1165 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); 1211 shared_kaddr = kmap_atomic(vcpu->time_page);
1166 1212
1167 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, 1213 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
1168 sizeof(vcpu->hv_clock)); 1214 sizeof(vcpu->hv_clock));
1169 1215
1170 kunmap_atomic(shared_kaddr, KM_USER0); 1216 kunmap_atomic(shared_kaddr);
1171 1217
1172 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 1218 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
1173 return 0; 1219 return 0;
@@ -1495,12 +1541,15 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
1495 1541
1496int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1542int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1497{ 1543{
1544 bool pr = false;
1545
1498 switch (msr) { 1546 switch (msr) {
1499 case MSR_EFER: 1547 case MSR_EFER:
1500 return set_efer(vcpu, data); 1548 return set_efer(vcpu, data);
1501 case MSR_K7_HWCR: 1549 case MSR_K7_HWCR:
1502 data &= ~(u64)0x40; /* ignore flush filter disable */ 1550 data &= ~(u64)0x40; /* ignore flush filter disable */
1503 data &= ~(u64)0x100; /* ignore ignne emulation enable */ 1551 data &= ~(u64)0x100; /* ignore ignne emulation enable */
1552 data &= ~(u64)0x8; /* ignore TLB cache disable */
1504 if (data != 0) { 1553 if (data != 0) {
1505 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", 1554 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1506 data); 1555 data);
@@ -1635,6 +1684,18 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1635 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " 1684 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1636 "0x%x data 0x%llx\n", msr, data); 1685 "0x%x data 0x%llx\n", msr, data);
1637 break; 1686 break;
1687 case MSR_P6_PERFCTR0:
1688 case MSR_P6_PERFCTR1:
1689 pr = true;
1690 case MSR_P6_EVNTSEL0:
1691 case MSR_P6_EVNTSEL1:
1692 if (kvm_pmu_msr(vcpu, msr))
1693 return kvm_pmu_set_msr(vcpu, msr, data);
1694
1695 if (pr || data != 0)
1696 pr_unimpl(vcpu, "disabled perfctr wrmsr: "
1697 "0x%x data 0x%llx\n", msr, data);
1698 break;
1638 case MSR_K7_CLK_CTL: 1699 case MSR_K7_CLK_CTL:
1639 /* 1700 /*
1640 * Ignore all writes to this no longer documented MSR. 1701 * Ignore all writes to this no longer documented MSR.
@@ -1661,6 +1722,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1661 */ 1722 */
1662 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); 1723 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
1663 break; 1724 break;
1725 case MSR_AMD64_OSVW_ID_LENGTH:
1726 if (!guest_cpuid_has_osvw(vcpu))
1727 return 1;
1728 vcpu->arch.osvw.length = data;
1729 break;
1730 case MSR_AMD64_OSVW_STATUS:
1731 if (!guest_cpuid_has_osvw(vcpu))
1732 return 1;
1733 vcpu->arch.osvw.status = data;
1734 break;
1664 default: 1735 default:
1665 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) 1736 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1666 return xen_hvm_config(vcpu, data); 1737 return xen_hvm_config(vcpu, data);
@@ -1835,6 +1906,14 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1835 case MSR_FAM10H_MMIO_CONF_BASE: 1906 case MSR_FAM10H_MMIO_CONF_BASE:
1836 data = 0; 1907 data = 0;
1837 break; 1908 break;
1909 case MSR_P6_PERFCTR0:
1910 case MSR_P6_PERFCTR1:
1911 case MSR_P6_EVNTSEL0:
1912 case MSR_P6_EVNTSEL1:
1913 if (kvm_pmu_msr(vcpu, msr))
1914 return kvm_pmu_get_msr(vcpu, msr, pdata);
1915 data = 0;
1916 break;
1838 case MSR_IA32_UCODE_REV: 1917 case MSR_IA32_UCODE_REV:
1839 data = 0x100000000ULL; 1918 data = 0x100000000ULL;
1840 break; 1919 break;
@@ -1937,6 +2016,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1937 */ 2016 */
1938 data = 0xbe702111; 2017 data = 0xbe702111;
1939 break; 2018 break;
2019 case MSR_AMD64_OSVW_ID_LENGTH:
2020 if (!guest_cpuid_has_osvw(vcpu))
2021 return 1;
2022 data = vcpu->arch.osvw.length;
2023 break;
2024 case MSR_AMD64_OSVW_STATUS:
2025 if (!guest_cpuid_has_osvw(vcpu))
2026 return 1;
2027 data = vcpu->arch.osvw.status;
2028 break;
1940 default: 2029 default:
1941 if (kvm_pmu_msr(vcpu, msr)) 2030 if (kvm_pmu_msr(vcpu, msr))
1942 return kvm_pmu_get_msr(vcpu, msr, pdata); 2031 return kvm_pmu_get_msr(vcpu, msr, pdata);
@@ -2057,6 +2146,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2057 case KVM_CAP_XSAVE: 2146 case KVM_CAP_XSAVE:
2058 case KVM_CAP_ASYNC_PF: 2147 case KVM_CAP_ASYNC_PF:
2059 case KVM_CAP_GET_TSC_KHZ: 2148 case KVM_CAP_GET_TSC_KHZ:
2149 case KVM_CAP_PCI_2_3:
2060 r = 1; 2150 r = 1;
2061 break; 2151 break;
2062 case KVM_CAP_COALESCED_MMIO: 2152 case KVM_CAP_COALESCED_MMIO:
@@ -2191,19 +2281,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2191 } 2281 }
2192 2282
2193 kvm_x86_ops->vcpu_load(vcpu, cpu); 2283 kvm_x86_ops->vcpu_load(vcpu, cpu);
2194 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2195 /* Make sure TSC doesn't go backwards */
2196 s64 tsc_delta;
2197 u64 tsc;
2198 2284
2199 tsc = kvm_x86_ops->read_l1_tsc(vcpu); 2285 /* Apply any externally detected TSC adjustments (due to suspend) */
2200 tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : 2286 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2201 tsc - vcpu->arch.last_guest_tsc; 2287 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2288 vcpu->arch.tsc_offset_adjustment = 0;
2289 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
2290 }
2202 2291
2292 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2293 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2294 native_read_tsc() - vcpu->arch.last_host_tsc;
2203 if (tsc_delta < 0) 2295 if (tsc_delta < 0)
2204 mark_tsc_unstable("KVM discovered backwards TSC"); 2296 mark_tsc_unstable("KVM discovered backwards TSC");
2205 if (check_tsc_unstable()) { 2297 if (check_tsc_unstable()) {
2206 kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); 2298 u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
2299 vcpu->arch.last_guest_tsc);
2300 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2207 vcpu->arch.tsc_catchup = 1; 2301 vcpu->arch.tsc_catchup = 1;
2208 } 2302 }
2209 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 2303 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2220,7 +2314,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2220{ 2314{
2221 kvm_x86_ops->vcpu_put(vcpu); 2315 kvm_x86_ops->vcpu_put(vcpu);
2222 kvm_put_guest_fpu(vcpu); 2316 kvm_put_guest_fpu(vcpu);
2223 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); 2317 vcpu->arch.last_host_tsc = native_read_tsc();
2224} 2318}
2225 2319
2226static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 2320static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@ -2762,26 +2856,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2762 u32 user_tsc_khz; 2856 u32 user_tsc_khz;
2763 2857
2764 r = -EINVAL; 2858 r = -EINVAL;
2765 if (!kvm_has_tsc_control)
2766 break;
2767
2768 user_tsc_khz = (u32)arg; 2859 user_tsc_khz = (u32)arg;
2769 2860
2770 if (user_tsc_khz >= kvm_max_guest_tsc_khz) 2861 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
2771 goto out; 2862 goto out;
2772 2863
2773 kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); 2864 if (user_tsc_khz == 0)
2865 user_tsc_khz = tsc_khz;
2866
2867 kvm_set_tsc_khz(vcpu, user_tsc_khz);
2774 2868
2775 r = 0; 2869 r = 0;
2776 goto out; 2870 goto out;
2777 } 2871 }
2778 case KVM_GET_TSC_KHZ: { 2872 case KVM_GET_TSC_KHZ: {
2779 r = -EIO; 2873 r = vcpu->arch.virtual_tsc_khz;
2780 if (check_tsc_unstable())
2781 goto out;
2782
2783 r = vcpu_tsc_khz(vcpu);
2784
2785 goto out; 2874 goto out;
2786 } 2875 }
2787 default: 2876 default:
@@ -2792,6 +2881,11 @@ out:
2792 return r; 2881 return r;
2793} 2882}
2794 2883
2884int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2885{
2886 return VM_FAULT_SIGBUS;
2887}
2888
2795static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) 2889static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
2796{ 2890{
2797 int ret; 2891 int ret;
@@ -2975,6 +3069,8 @@ static void write_protect_slot(struct kvm *kvm,
2975 unsigned long *dirty_bitmap, 3069 unsigned long *dirty_bitmap,
2976 unsigned long nr_dirty_pages) 3070 unsigned long nr_dirty_pages)
2977{ 3071{
3072 spin_lock(&kvm->mmu_lock);
3073
2978 /* Not many dirty pages compared to # of shadow pages. */ 3074 /* Not many dirty pages compared to # of shadow pages. */
2979 if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { 3075 if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
2980 unsigned long gfn_offset; 3076 unsigned long gfn_offset;
@@ -2982,16 +3078,13 @@ static void write_protect_slot(struct kvm *kvm,
2982 for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { 3078 for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
2983 unsigned long gfn = memslot->base_gfn + gfn_offset; 3079 unsigned long gfn = memslot->base_gfn + gfn_offset;
2984 3080
2985 spin_lock(&kvm->mmu_lock);
2986 kvm_mmu_rmap_write_protect(kvm, gfn, memslot); 3081 kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
2987 spin_unlock(&kvm->mmu_lock);
2988 } 3082 }
2989 kvm_flush_remote_tlbs(kvm); 3083 kvm_flush_remote_tlbs(kvm);
2990 } else { 3084 } else
2991 spin_lock(&kvm->mmu_lock);
2992 kvm_mmu_slot_remove_write_access(kvm, memslot->id); 3085 kvm_mmu_slot_remove_write_access(kvm, memslot->id);
2993 spin_unlock(&kvm->mmu_lock); 3086
2994 } 3087 spin_unlock(&kvm->mmu_lock);
2995} 3088}
2996 3089
2997/* 3090/*
@@ -3110,6 +3203,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
3110 r = -EEXIST; 3203 r = -EEXIST;
3111 if (kvm->arch.vpic) 3204 if (kvm->arch.vpic)
3112 goto create_irqchip_unlock; 3205 goto create_irqchip_unlock;
3206 r = -EINVAL;
3207 if (atomic_read(&kvm->online_vcpus))
3208 goto create_irqchip_unlock;
3113 r = -ENOMEM; 3209 r = -ENOMEM;
3114 vpic = kvm_create_pic(kvm); 3210 vpic = kvm_create_pic(kvm);
3115 if (vpic) { 3211 if (vpic) {
@@ -3826,7 +3922,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
3826 goto emul_write; 3922 goto emul_write;
3827 } 3923 }
3828 3924
3829 kaddr = kmap_atomic(page, KM_USER0); 3925 kaddr = kmap_atomic(page);
3830 kaddr += offset_in_page(gpa); 3926 kaddr += offset_in_page(gpa);
3831 switch (bytes) { 3927 switch (bytes) {
3832 case 1: 3928 case 1:
@@ -3844,7 +3940,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
3844 default: 3940 default:
3845 BUG(); 3941 BUG();
3846 } 3942 }
3847 kunmap_atomic(kaddr, KM_USER0); 3943 kunmap_atomic(kaddr);
3848 kvm_release_page_dirty(page); 3944 kvm_release_page_dirty(page);
3849 3945
3850 if (!exchanged) 3946 if (!exchanged)
@@ -4040,6 +4136,11 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4040 return res; 4136 return res;
4041} 4137}
4042 4138
4139static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
4140{
4141 kvm_set_rflags(emul_to_vcpu(ctxt), val);
4142}
4143
4043static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) 4144static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4044{ 4145{
4045 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); 4146 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
@@ -4180,6 +4281,28 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
4180 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); 4281 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4181} 4282}
4182 4283
4284static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
4285 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
4286{
4287 struct kvm_cpuid_entry2 *cpuid = NULL;
4288
4289 if (eax && ecx)
4290 cpuid = kvm_find_cpuid_entry(emul_to_vcpu(ctxt),
4291 *eax, *ecx);
4292
4293 if (cpuid) {
4294 *eax = cpuid->eax;
4295 *ecx = cpuid->ecx;
4296 if (ebx)
4297 *ebx = cpuid->ebx;
4298 if (edx)
4299 *edx = cpuid->edx;
4300 return true;
4301 }
4302
4303 return false;
4304}
4305
4183static struct x86_emulate_ops emulate_ops = { 4306static struct x86_emulate_ops emulate_ops = {
4184 .read_std = kvm_read_guest_virt_system, 4307 .read_std = kvm_read_guest_virt_system,
4185 .write_std = kvm_write_guest_virt_system, 4308 .write_std = kvm_write_guest_virt_system,
@@ -4199,6 +4322,7 @@ static struct x86_emulate_ops emulate_ops = {
4199 .set_idt = emulator_set_idt, 4322 .set_idt = emulator_set_idt,
4200 .get_cr = emulator_get_cr, 4323 .get_cr = emulator_get_cr,
4201 .set_cr = emulator_set_cr, 4324 .set_cr = emulator_set_cr,
4325 .set_rflags = emulator_set_rflags,
4202 .cpl = emulator_get_cpl, 4326 .cpl = emulator_get_cpl,
4203 .get_dr = emulator_get_dr, 4327 .get_dr = emulator_get_dr,
4204 .set_dr = emulator_set_dr, 4328 .set_dr = emulator_set_dr,
@@ -4211,6 +4335,7 @@ static struct x86_emulate_ops emulate_ops = {
4211 .get_fpu = emulator_get_fpu, 4335 .get_fpu = emulator_get_fpu,
4212 .put_fpu = emulator_put_fpu, 4336 .put_fpu = emulator_put_fpu,
4213 .intercept = emulator_intercept, 4337 .intercept = emulator_intercept,
4338 .get_cpuid = emulator_get_cpuid,
4214}; 4339};
4215 4340
4216static void cache_all_regs(struct kvm_vcpu *vcpu) 4341static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -5242,6 +5367,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5242 profile_hit(KVM_PROFILING, (void *)rip); 5367 profile_hit(KVM_PROFILING, (void *)rip);
5243 } 5368 }
5244 5369
5370 if (unlikely(vcpu->arch.tsc_always_catchup))
5371 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5245 5372
5246 kvm_lapic_sync_from_vapic(vcpu); 5373 kvm_lapic_sync_from_vapic(vcpu);
5247 5374
@@ -5541,15 +5668,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
5541 return 0; 5668 return 0;
5542} 5669}
5543 5670
5544int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 5671int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
5545 bool has_error_code, u32 error_code) 5672 int reason, bool has_error_code, u32 error_code)
5546{ 5673{
5547 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 5674 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5548 int ret; 5675 int ret;
5549 5676
5550 init_emulate_ctxt(vcpu); 5677 init_emulate_ctxt(vcpu);
5551 5678
5552 ret = emulator_task_switch(ctxt, tss_selector, reason, 5679 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
5553 has_error_code, error_code); 5680 has_error_code, error_code);
5554 5681
5555 if (ret) 5682 if (ret)
@@ -5882,13 +6009,88 @@ int kvm_arch_hardware_enable(void *garbage)
5882 struct kvm *kvm; 6009 struct kvm *kvm;
5883 struct kvm_vcpu *vcpu; 6010 struct kvm_vcpu *vcpu;
5884 int i; 6011 int i;
6012 int ret;
6013 u64 local_tsc;
6014 u64 max_tsc = 0;
6015 bool stable, backwards_tsc = false;
5885 6016
5886 kvm_shared_msr_cpu_online(); 6017 kvm_shared_msr_cpu_online();
5887 list_for_each_entry(kvm, &vm_list, vm_list) 6018 ret = kvm_x86_ops->hardware_enable(garbage);
5888 kvm_for_each_vcpu(i, vcpu, kvm) 6019 if (ret != 0)
5889 if (vcpu->cpu == smp_processor_id()) 6020 return ret;
5890 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 6021
5891 return kvm_x86_ops->hardware_enable(garbage); 6022 local_tsc = native_read_tsc();
6023 stable = !check_tsc_unstable();
6024 list_for_each_entry(kvm, &vm_list, vm_list) {
6025 kvm_for_each_vcpu(i, vcpu, kvm) {
6026 if (!stable && vcpu->cpu == smp_processor_id())
6027 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
6028 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
6029 backwards_tsc = true;
6030 if (vcpu->arch.last_host_tsc > max_tsc)
6031 max_tsc = vcpu->arch.last_host_tsc;
6032 }
6033 }
6034 }
6035
6036 /*
6037 * Sometimes, even reliable TSCs go backwards. This happens on
6038 * platforms that reset TSC during suspend or hibernate actions, but
6039 * maintain synchronization. We must compensate. Fortunately, we can
6040 * detect that condition here, which happens early in CPU bringup,
6041 * before any KVM threads can be running. Unfortunately, we can't
6042 * bring the TSCs fully up to date with real time, as we aren't yet far
6043 * enough into CPU bringup that we know how much real time has actually
6044 * elapsed; our helper function, get_kernel_ns() will be using boot
6045 * variables that haven't been updated yet.
6046 *
6047 * So we simply find the maximum observed TSC above, then record the
6048 * adjustment to TSC in each VCPU. When the VCPU later gets loaded,
6049 * the adjustment will be applied. Note that we accumulate
6050 * adjustments, in case multiple suspend cycles happen before some VCPU
6051 * gets a chance to run again. In the event that no KVM threads get a
6052 * chance to run, we will miss the entire elapsed period, as we'll have
6053 * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
6054 * loose cycle time. This isn't too big a deal, since the loss will be
6055 * uniform across all VCPUs (not to mention the scenario is extremely
6056 * unlikely). It is possible that a second hibernate recovery happens
6057 * much faster than a first, causing the observed TSC here to be
6058 * smaller; this would require additional padding adjustment, which is
6059 * why we set last_host_tsc to the local tsc observed here.
6060 *
6061 * N.B. - this code below runs only on platforms with reliable TSC,
6062 * as that is the only way backwards_tsc is set above. Also note
6063 * that this runs for ALL vcpus, which is not a bug; all VCPUs should
6064 * have the same delta_cyc adjustment applied if backwards_tsc
6065 * is detected. Note further, this adjustment is only done once,
6066 * as we reset last_host_tsc on all VCPUs to stop this from being
6067 * called multiple times (one for each physical CPU bringup).
6068 *
6069 * Platforms with unnreliable TSCs don't have to deal with this, they
6070 * will be compensated by the logic in vcpu_load, which sets the TSC to
6071 * catchup mode. This will catchup all VCPUs to real time, but cannot
6072 * guarantee that they stay in perfect synchronization.
6073 */
6074 if (backwards_tsc) {
6075 u64 delta_cyc = max_tsc - local_tsc;
6076 list_for_each_entry(kvm, &vm_list, vm_list) {
6077 kvm_for_each_vcpu(i, vcpu, kvm) {
6078 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6079 vcpu->arch.last_host_tsc = local_tsc;
6080 }
6081
6082 /*
6083 * We have to disable TSC offset matching.. if you were
6084 * booting a VM while issuing an S4 host suspend....
6085 * you may have some problem. Solving this issue is
6086 * left as an exercise to the reader.
6087 */
6088 kvm->arch.last_tsc_nsec = 0;
6089 kvm->arch.last_tsc_write = 0;
6090 }
6091
6092 }
6093 return 0;
5892} 6094}
5893 6095
5894void kvm_arch_hardware_disable(void *garbage) 6096void kvm_arch_hardware_disable(void *garbage)
@@ -5912,6 +6114,11 @@ void kvm_arch_check_processor_compat(void *rtn)
5912 kvm_x86_ops->check_processor_compatibility(rtn); 6114 kvm_x86_ops->check_processor_compatibility(rtn);
5913} 6115}
5914 6116
6117bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
6118{
6119 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
6120}
6121
5915int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 6122int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5916{ 6123{
5917 struct page *page; 6124 struct page *page;
@@ -5934,7 +6141,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5934 } 6141 }
5935 vcpu->arch.pio_data = page_address(page); 6142 vcpu->arch.pio_data = page_address(page);
5936 6143
5937 kvm_init_tsc_catchup(vcpu, max_tsc_khz); 6144 kvm_set_tsc_khz(vcpu, max_tsc_khz);
5938 6145
5939 r = kvm_mmu_create(vcpu); 6146 r = kvm_mmu_create(vcpu);
5940 if (r < 0) 6147 if (r < 0)
@@ -5986,8 +6193,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5986 free_page((unsigned long)vcpu->arch.pio_data); 6193 free_page((unsigned long)vcpu->arch.pio_data);
5987} 6194}
5988 6195
5989int kvm_arch_init_vm(struct kvm *kvm) 6196int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
5990{ 6197{
6198 if (type)
6199 return -EINVAL;
6200
5991 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 6201 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
5992 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 6202 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
5993 6203
@@ -6047,6 +6257,65 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
6047 put_page(kvm->arch.ept_identity_pagetable); 6257 put_page(kvm->arch.ept_identity_pagetable);
6048} 6258}
6049 6259
6260void kvm_arch_free_memslot(struct kvm_memory_slot *free,
6261 struct kvm_memory_slot *dont)
6262{
6263 int i;
6264
6265 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
6266 if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
6267 vfree(free->arch.lpage_info[i]);
6268 free->arch.lpage_info[i] = NULL;
6269 }
6270 }
6271}
6272
6273int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
6274{
6275 int i;
6276
6277 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
6278 unsigned long ugfn;
6279 int lpages;
6280 int level = i + 2;
6281
6282 lpages = gfn_to_index(slot->base_gfn + npages - 1,
6283 slot->base_gfn, level) + 1;
6284
6285 slot->arch.lpage_info[i] =
6286 vzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
6287 if (!slot->arch.lpage_info[i])
6288 goto out_free;
6289
6290 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
6291 slot->arch.lpage_info[i][0].write_count = 1;
6292 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
6293 slot->arch.lpage_info[i][lpages - 1].write_count = 1;
6294 ugfn = slot->userspace_addr >> PAGE_SHIFT;
6295 /*
6296 * If the gfn and userspace address are not aligned wrt each
6297 * other, or if explicitly asked to, disable large page
6298 * support for this slot
6299 */
6300 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
6301 !kvm_largepages_enabled()) {
6302 unsigned long j;
6303
6304 for (j = 0; j < lpages; ++j)
6305 slot->arch.lpage_info[i][j].write_count = 1;
6306 }
6307 }
6308
6309 return 0;
6310
6311out_free:
6312 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
6313 vfree(slot->arch.lpage_info[i]);
6314 slot->arch.lpage_info[i] = NULL;
6315 }
6316 return -ENOMEM;
6317}
6318
6050int kvm_arch_prepare_memory_region(struct kvm *kvm, 6319int kvm_arch_prepare_memory_region(struct kvm *kvm,
6051 struct kvm_memory_slot *memslot, 6320 struct kvm_memory_slot *memslot,
6052 struct kvm_memory_slot old, 6321 struct kvm_memory_slot old,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index cf4603ba866f..642d8805bc1b 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -856,18 +856,23 @@ static void __init lguest_init_IRQ(void)
856} 856}
857 857
858/* 858/*
859 * With CONFIG_SPARSE_IRQ, interrupt descriptors are allocated as-needed, so 859 * Interrupt descriptors are allocated as-needed, but low-numbered ones are
860 * rather than set them in lguest_init_IRQ we are called here every time an 860 * reserved by the generic x86 code. So we ignore irq_alloc_desc_at if it
861 * lguest device needs an interrupt. 861 * tells us the irq is already used: other errors (ie. ENOMEM) we take
862 * 862 * seriously.
863 * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should
864 * pass that up!
865 */ 863 */
866void lguest_setup_irq(unsigned int irq) 864int lguest_setup_irq(unsigned int irq)
867{ 865{
868 irq_alloc_desc_at(irq, 0); 866 int err;
867
868 /* Returns -ve error or vector number. */
869 err = irq_alloc_desc_at(irq, 0);
870 if (err < 0 && err != -EEXIST)
871 return err;
872
869 irq_set_chip_and_handler_name(irq, &lguest_irq_controller, 873 irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
870 handle_level_irq, "level"); 874 handle_level_irq, "level");
875 return 0;
871} 876}
872 877
873/* 878/*
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index 042f6826bf57..a0b4a350daa7 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -1,59 +1,4 @@
1#include <linux/compiler.h> 1#define ATOMIC64_EXPORT EXPORT_SYMBOL
2#include <linux/module.h>
3#include <linux/types.h>
4 2
5#include <asm/processor.h> 3#include <linux/export.h>
6#include <asm/cmpxchg.h>
7#include <linux/atomic.h> 4#include <linux/atomic.h>
8
9long long atomic64_read_cx8(long long, const atomic64_t *v);
10EXPORT_SYMBOL(atomic64_read_cx8);
11long long atomic64_set_cx8(long long, const atomic64_t *v);
12EXPORT_SYMBOL(atomic64_set_cx8);
13long long atomic64_xchg_cx8(long long, unsigned high);
14EXPORT_SYMBOL(atomic64_xchg_cx8);
15long long atomic64_add_return_cx8(long long a, atomic64_t *v);
16EXPORT_SYMBOL(atomic64_add_return_cx8);
17long long atomic64_sub_return_cx8(long long a, atomic64_t *v);
18EXPORT_SYMBOL(atomic64_sub_return_cx8);
19long long atomic64_inc_return_cx8(long long a, atomic64_t *v);
20EXPORT_SYMBOL(atomic64_inc_return_cx8);
21long long atomic64_dec_return_cx8(long long a, atomic64_t *v);
22EXPORT_SYMBOL(atomic64_dec_return_cx8);
23long long atomic64_dec_if_positive_cx8(atomic64_t *v);
24EXPORT_SYMBOL(atomic64_dec_if_positive_cx8);
25int atomic64_inc_not_zero_cx8(atomic64_t *v);
26EXPORT_SYMBOL(atomic64_inc_not_zero_cx8);
27int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u);
28EXPORT_SYMBOL(atomic64_add_unless_cx8);
29
30#ifndef CONFIG_X86_CMPXCHG64
31long long atomic64_read_386(long long, const atomic64_t *v);
32EXPORT_SYMBOL(atomic64_read_386);
33long long atomic64_set_386(long long, const atomic64_t *v);
34EXPORT_SYMBOL(atomic64_set_386);
35long long atomic64_xchg_386(long long, unsigned high);
36EXPORT_SYMBOL(atomic64_xchg_386);
37long long atomic64_add_return_386(long long a, atomic64_t *v);
38EXPORT_SYMBOL(atomic64_add_return_386);
39long long atomic64_sub_return_386(long long a, atomic64_t *v);
40EXPORT_SYMBOL(atomic64_sub_return_386);
41long long atomic64_inc_return_386(long long a, atomic64_t *v);
42EXPORT_SYMBOL(atomic64_inc_return_386);
43long long atomic64_dec_return_386(long long a, atomic64_t *v);
44EXPORT_SYMBOL(atomic64_dec_return_386);
45long long atomic64_add_386(long long a, atomic64_t *v);
46EXPORT_SYMBOL(atomic64_add_386);
47long long atomic64_sub_386(long long a, atomic64_t *v);
48EXPORT_SYMBOL(atomic64_sub_386);
49long long atomic64_inc_386(long long a, atomic64_t *v);
50EXPORT_SYMBOL(atomic64_inc_386);
51long long atomic64_dec_386(long long a, atomic64_t *v);
52EXPORT_SYMBOL(atomic64_dec_386);
53long long atomic64_dec_if_positive_386(atomic64_t *v);
54EXPORT_SYMBOL(atomic64_dec_if_positive_386);
55int atomic64_inc_not_zero_386(atomic64_t *v);
56EXPORT_SYMBOL(atomic64_inc_not_zero_386);
57int atomic64_add_unless_386(atomic64_t *v, long long a, long long u);
58EXPORT_SYMBOL(atomic64_add_unless_386);
59#endif
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index e8e7e0d06f42..00933d5e992f 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -137,13 +137,13 @@ BEGIN(dec_return)
137RET_ENDP 137RET_ENDP
138#undef v 138#undef v
139 139
140#define v %ecx 140#define v %esi
141BEGIN(add_unless) 141BEGIN(add_unless)
142 addl %eax, %esi 142 addl %eax, %ecx
143 adcl %edx, %edi 143 adcl %edx, %edi
144 addl (v), %eax 144 addl (v), %eax
145 adcl 4(v), %edx 145 adcl 4(v), %edx
146 cmpl %eax, %esi 146 cmpl %eax, %ecx
147 je 3f 147 je 3f
1481: 1481:
149 movl %eax, (v) 149 movl %eax, (v)
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 391a083674b4..f5cc9eb1d51b 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8)
55ENTRY(atomic64_xchg_cx8) 55ENTRY(atomic64_xchg_cx8)
56 CFI_STARTPROC 56 CFI_STARTPROC
57 57
58 movl %ebx, %eax
59 movl %ecx, %edx
601: 581:
61 LOCK_PREFIX 59 LOCK_PREFIX
62 cmpxchg8b (%esi) 60 cmpxchg8b (%esi)
@@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8)
78 movl %edx, %edi 76 movl %edx, %edi
79 movl %ecx, %ebp 77 movl %ecx, %ebp
80 78
81 read64 %ebp 79 read64 %ecx
821: 801:
83 movl %eax, %ebx 81 movl %eax, %ebx
84 movl %edx, %ecx 82 movl %edx, %ecx
@@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8)
159 SAVE ebx 157 SAVE ebx
160/* these just push these two parameters on the stack */ 158/* these just push these two parameters on the stack */
161 SAVE edi 159 SAVE edi
162 SAVE esi 160 SAVE ecx
163 161
164 movl %ecx, %ebp 162 movl %eax, %ebp
165 movl %eax, %esi
166 movl %edx, %edi 163 movl %edx, %edi
167 164
168 read64 %ebp 165 read64 %esi
1691: 1661:
170 cmpl %eax, 0(%esp) 167 cmpl %eax, 0(%esp)
171 je 4f 168 je 4f
1722: 1692:
173 movl %eax, %ebx 170 movl %eax, %ebx
174 movl %edx, %ecx 171 movl %edx, %ecx
175 addl %esi, %ebx 172 addl %ebp, %ebx
176 adcl %edi, %ecx 173 adcl %edi, %ecx
177 LOCK_PREFIX 174 LOCK_PREFIX
178 cmpxchg8b (%ebp) 175 cmpxchg8b (%esi)
179 jne 1b 176 jne 1b
180 177
181 movl $1, %eax 178 movl $1, %eax
@@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8)
199 196
200 read64 %esi 197 read64 %esi
2011: 1981:
202 testl %eax, %eax 199 movl %eax, %ecx
203 je 4f 200 orl %edx, %ecx
2042: 201 jz 3f
205 movl %eax, %ebx 202 movl %eax, %ebx
206 movl %edx, %ecx 203 xorl %ecx, %ecx
207 addl $1, %ebx 204 addl $1, %ebx
208 adcl $0, %ecx 205 adcl %edx, %ecx
209 LOCK_PREFIX 206 LOCK_PREFIX
210 cmpxchg8b (%esi) 207 cmpxchg8b (%esi)
211 jne 1b 208 jne 1b
@@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8)
2143: 2113:
215 RESTORE ebx 212 RESTORE ebx
216 ret 213 ret
2174:
218 testl %edx, %edx
219 jne 2b
220 jmp 3b
221 CFI_ENDPROC 214 CFI_ENDPROC
222ENDPROC(atomic64_inc_not_zero_cx8) 215ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 01c805ba5359..6b34d04d096a 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -20,14 +20,12 @@ ENDPROC(copy_page_c)
20 20
21ENTRY(copy_page) 21ENTRY(copy_page)
22 CFI_STARTPROC 22 CFI_STARTPROC
23 subq $3*8,%rsp 23 subq $2*8,%rsp
24 CFI_ADJUST_CFA_OFFSET 3*8 24 CFI_ADJUST_CFA_OFFSET 2*8
25 movq %rbx,(%rsp) 25 movq %rbx,(%rsp)
26 CFI_REL_OFFSET rbx, 0 26 CFI_REL_OFFSET rbx, 0
27 movq %r12,1*8(%rsp) 27 movq %r12,1*8(%rsp)
28 CFI_REL_OFFSET r12, 1*8 28 CFI_REL_OFFSET r12, 1*8
29 movq %r13,2*8(%rsp)
30 CFI_REL_OFFSET r13, 2*8
31 29
32 movl $(4096/64)-5,%ecx 30 movl $(4096/64)-5,%ecx
33 .p2align 4 31 .p2align 4
@@ -91,10 +89,8 @@ ENTRY(copy_page)
91 CFI_RESTORE rbx 89 CFI_RESTORE rbx
92 movq 1*8(%rsp),%r12 90 movq 1*8(%rsp),%r12
93 CFI_RESTORE r12 91 CFI_RESTORE r12
94 movq 2*8(%rsp),%r13 92 addq $2*8,%rsp
95 CFI_RESTORE r13 93 CFI_ADJUST_CFA_OFFSET -2*8
96 addq $3*8,%rsp
97 CFI_ADJUST_CFA_OFFSET -3*8
98 ret 94 ret
99.Lcopy_page_end: 95.Lcopy_page_end:
100 CFI_ENDPROC 96 CFI_ENDPROC
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index fc45ba887d05..e395693abdb1 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -48,9 +48,9 @@ static void delay_loop(unsigned long loops)
48} 48}
49 49
50/* TSC based delay: */ 50/* TSC based delay: */
51static void delay_tsc(unsigned long loops) 51static void delay_tsc(unsigned long __loops)
52{ 52{
53 unsigned long bclock, now; 53 u32 bclock, now, loops = __loops;
54 int cpu; 54 int cpu;
55 55
56 preempt_disable(); 56 preempt_disable();
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 88ad5fbda6e1..c1f01a8e9f65 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
29 return inat_primary_table[opcode]; 29 return inat_primary_table[opcode];
30} 30}
31 31
32insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, 32int inat_get_last_prefix_id(insn_byte_t last_pfx)
33{
34 insn_attr_t lpfx_attr;
35
36 lpfx_attr = inat_get_opcode_attribute(last_pfx);
37 return inat_last_prefix_id(lpfx_attr);
38}
39
40insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
33 insn_attr_t esc_attr) 41 insn_attr_t esc_attr)
34{ 42{
35 const insn_attr_t *table; 43 const insn_attr_t *table;
36 insn_attr_t lpfx_attr; 44 int n;
37 int n, m = 0;
38 45
39 n = inat_escape_id(esc_attr); 46 n = inat_escape_id(esc_attr);
40 if (last_pfx) { 47
41 lpfx_attr = inat_get_opcode_attribute(last_pfx);
42 m = inat_last_prefix_id(lpfx_attr);
43 }
44 table = inat_escape_tables[n][0]; 48 table = inat_escape_tables[n][0];
45 if (!table) 49 if (!table)
46 return 0; 50 return 0;
47 if (inat_has_variant(table[opcode]) && m) { 51 if (inat_has_variant(table[opcode]) && lpfx_id) {
48 table = inat_escape_tables[n][m]; 52 table = inat_escape_tables[n][lpfx_id];
49 if (!table) 53 if (!table)
50 return 0; 54 return 0;
51 } 55 }
52 return table[opcode]; 56 return table[opcode];
53} 57}
54 58
55insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, 59insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
56 insn_attr_t grp_attr) 60 insn_attr_t grp_attr)
57{ 61{
58 const insn_attr_t *table; 62 const insn_attr_t *table;
59 insn_attr_t lpfx_attr; 63 int n;
60 int n, m = 0;
61 64
62 n = inat_group_id(grp_attr); 65 n = inat_group_id(grp_attr);
63 if (last_pfx) { 66
64 lpfx_attr = inat_get_opcode_attribute(last_pfx);
65 m = inat_last_prefix_id(lpfx_attr);
66 }
67 table = inat_group_tables[n][0]; 67 table = inat_group_tables[n][0];
68 if (!table) 68 if (!table)
69 return inat_group_common_attribute(grp_attr); 69 return inat_group_common_attribute(grp_attr);
70 if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { 70 if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
71 table = inat_group_tables[n][m]; 71 table = inat_group_tables[n][lpfx_id];
72 if (!table) 72 if (!table)
73 return inat_group_common_attribute(grp_attr); 73 return inat_group_common_attribute(grp_attr);
74 } 74 }
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 5a1f9f3e3fbb..25feb1ae71c5 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -185,7 +185,8 @@ err_out:
185void insn_get_opcode(struct insn *insn) 185void insn_get_opcode(struct insn *insn)
186{ 186{
187 struct insn_field *opcode = &insn->opcode; 187 struct insn_field *opcode = &insn->opcode;
188 insn_byte_t op, pfx; 188 insn_byte_t op;
189 int pfx_id;
189 if (opcode->got) 190 if (opcode->got)
190 return; 191 return;
191 if (!insn->prefixes.got) 192 if (!insn->prefixes.got)
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn)
212 /* Get escaped opcode */ 213 /* Get escaped opcode */
213 op = get_next(insn_byte_t, insn); 214 op = get_next(insn_byte_t, insn);
214 opcode->bytes[opcode->nbytes++] = op; 215 opcode->bytes[opcode->nbytes++] = op;
215 pfx = insn_last_prefix(insn); 216 pfx_id = insn_last_prefix_id(insn);
216 insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); 217 insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
217 } 218 }
218 if (inat_must_vex(insn->attr)) 219 if (inat_must_vex(insn->attr))
219 insn->attr = 0; /* This instruction is bad */ 220 insn->attr = 0; /* This instruction is bad */
@@ -235,7 +236,7 @@ err_out:
235void insn_get_modrm(struct insn *insn) 236void insn_get_modrm(struct insn *insn)
236{ 237{
237 struct insn_field *modrm = &insn->modrm; 238 struct insn_field *modrm = &insn->modrm;
238 insn_byte_t pfx, mod; 239 insn_byte_t pfx_id, mod;
239 if (modrm->got) 240 if (modrm->got)
240 return; 241 return;
241 if (!insn->opcode.got) 242 if (!insn->opcode.got)
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn)
246 modrm->value = mod; 247 modrm->value = mod;
247 modrm->nbytes = 1; 248 modrm->nbytes = 1;
248 if (inat_is_group(insn->attr)) { 249 if (inat_is_group(insn->attr)) {
249 pfx = insn_last_prefix(insn); 250 pfx_id = insn_last_prefix_id(insn);
250 insn->attr = inat_get_group_attribute(mod, pfx, 251 insn->attr = inat_get_group_attribute(mod, pfx_id,
251 insn->attr); 252 insn->attr);
252 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) 253 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
253 insn->attr = 0; /* This is bad */ 254 insn->attr = 0; /* This is bad */
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index efbf2a0ecdea..1c273be7c97e 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,9 +27,8 @@
27 .section .altinstr_replacement, "ax", @progbits 27 .section .altinstr_replacement, "ax", @progbits
28.Lmemcpy_c: 28.Lmemcpy_c:
29 movq %rdi, %rax 29 movq %rdi, %rax
30 30 movq %rdx, %rcx
31 movl %edx, %ecx 31 shrq $3, %rcx
32 shrl $3, %ecx
33 andl $7, %edx 32 andl $7, %edx
34 rep movsq 33 rep movsq
35 movl %edx, %ecx 34 movl %edx, %ecx
@@ -48,8 +47,7 @@
48 .section .altinstr_replacement, "ax", @progbits 47 .section .altinstr_replacement, "ax", @progbits
49.Lmemcpy_c_e: 48.Lmemcpy_c_e:
50 movq %rdi, %rax 49 movq %rdi, %rax
51 50 movq %rdx, %rcx
52 movl %edx, %ecx
53 rep movsb 51 rep movsb
54 ret 52 ret
55.Lmemcpy_e_e: 53.Lmemcpy_e_e:
@@ -60,10 +58,7 @@ ENTRY(memcpy)
60 CFI_STARTPROC 58 CFI_STARTPROC
61 movq %rdi, %rax 59 movq %rdi, %rax
62 60
63 /* 61 cmpq $0x20, %rdx
64 * Use 32bit CMP here to avoid long NOP padding.
65 */
66 cmp $0x20, %edx
67 jb .Lhandle_tail 62 jb .Lhandle_tail
68 63
69 /* 64 /*
@@ -72,7 +67,7 @@ ENTRY(memcpy)
72 */ 67 */
73 cmp %dil, %sil 68 cmp %dil, %sil
74 jl .Lcopy_backward 69 jl .Lcopy_backward
75 subl $0x20, %edx 70 subq $0x20, %rdx
76.Lcopy_forward_loop: 71.Lcopy_forward_loop:
77 subq $0x20, %rdx 72 subq $0x20, %rdx
78 73
@@ -91,7 +86,7 @@ ENTRY(memcpy)
91 movq %r11, 3*8(%rdi) 86 movq %r11, 3*8(%rdi)
92 leaq 4*8(%rdi), %rdi 87 leaq 4*8(%rdi), %rdi
93 jae .Lcopy_forward_loop 88 jae .Lcopy_forward_loop
94 addq $0x20, %rdx 89 addl $0x20, %edx
95 jmp .Lhandle_tail 90 jmp .Lhandle_tail
96 91
97.Lcopy_backward: 92.Lcopy_backward:
@@ -123,11 +118,11 @@ ENTRY(memcpy)
123 /* 118 /*
124 * Calculate copy position to head. 119 * Calculate copy position to head.
125 */ 120 */
126 addq $0x20, %rdx 121 addl $0x20, %edx
127 subq %rdx, %rsi 122 subq %rdx, %rsi
128 subq %rdx, %rdi 123 subq %rdx, %rdi
129.Lhandle_tail: 124.Lhandle_tail:
130 cmpq $16, %rdx 125 cmpl $16, %edx
131 jb .Lless_16bytes 126 jb .Lless_16bytes
132 127
133 /* 128 /*
@@ -144,7 +139,7 @@ ENTRY(memcpy)
144 retq 139 retq
145 .p2align 4 140 .p2align 4
146.Lless_16bytes: 141.Lless_16bytes:
147 cmpq $8, %rdx 142 cmpl $8, %edx
148 jb .Lless_8bytes 143 jb .Lless_8bytes
149 /* 144 /*
150 * Move data from 8 bytes to 15 bytes. 145 * Move data from 8 bytes to 15 bytes.
@@ -156,7 +151,7 @@ ENTRY(memcpy)
156 retq 151 retq
157 .p2align 4 152 .p2align 4
158.Lless_8bytes: 153.Lless_8bytes:
159 cmpq $4, %rdx 154 cmpl $4, %edx
160 jb .Lless_3bytes 155 jb .Lless_3bytes
161 156
162 /* 157 /*
@@ -169,18 +164,19 @@ ENTRY(memcpy)
169 retq 164 retq
170 .p2align 4 165 .p2align 4
171.Lless_3bytes: 166.Lless_3bytes:
172 cmpl $0, %edx 167 subl $1, %edx
173 je .Lend 168 jb .Lend
174 /* 169 /*
175 * Move data from 1 bytes to 3 bytes. 170 * Move data from 1 bytes to 3 bytes.
176 */ 171 */
177.Lloop_1: 172 movzbl (%rsi), %ecx
178 movb (%rsi), %r8b 173 jz .Lstore_1byte
179 movb %r8b, (%rdi) 174 movzbq 1(%rsi), %r8
180 incq %rdi 175 movzbq (%rsi, %rdx), %r9
181 incq %rsi 176 movb %r8b, 1(%rdi)
182 decl %edx 177 movb %r9b, (%rdi, %rdx)
183 jnz .Lloop_1 178.Lstore_1byte:
179 movb %cl, (%rdi)
184 180
185.Lend: 181.Lend:
186 retq 182 retq
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 79bd454b78a3..2dcb3808cbda 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -19,16 +19,15 @@
19 .section .altinstr_replacement, "ax", @progbits 19 .section .altinstr_replacement, "ax", @progbits
20.Lmemset_c: 20.Lmemset_c:
21 movq %rdi,%r9 21 movq %rdi,%r9
22 movl %edx,%r8d 22 movq %rdx,%rcx
23 andl $7,%r8d 23 andl $7,%edx
24 movl %edx,%ecx 24 shrq $3,%rcx
25 shrl $3,%ecx
26 /* expand byte value */ 25 /* expand byte value */
27 movzbl %sil,%esi 26 movzbl %sil,%esi
28 movabs $0x0101010101010101,%rax 27 movabs $0x0101010101010101,%rax
29 mulq %rsi /* with rax, clobbers rdx */ 28 imulq %rsi,%rax
30 rep stosq 29 rep stosq
31 movl %r8d,%ecx 30 movl %edx,%ecx
32 rep stosb 31 rep stosb
33 movq %r9,%rax 32 movq %r9,%rax
34 ret 33 ret
@@ -50,7 +49,7 @@
50.Lmemset_c_e: 49.Lmemset_c_e:
51 movq %rdi,%r9 50 movq %rdi,%r9
52 movb %sil,%al 51 movb %sil,%al
53 movl %edx,%ecx 52 movq %rdx,%rcx
54 rep stosb 53 rep stosb
55 movq %r9,%rax 54 movq %r9,%rax
56 ret 55 ret
@@ -61,12 +60,11 @@ ENTRY(memset)
61ENTRY(__memset) 60ENTRY(__memset)
62 CFI_STARTPROC 61 CFI_STARTPROC
63 movq %rdi,%r10 62 movq %rdi,%r10
64 movq %rdx,%r11
65 63
66 /* expand byte value */ 64 /* expand byte value */
67 movzbl %sil,%ecx 65 movzbl %sil,%ecx
68 movabs $0x0101010101010101,%rax 66 movabs $0x0101010101010101,%rax
69 mul %rcx /* with rax, clobbers rdx */ 67 imulq %rcx,%rax
70 68
71 /* align dst */ 69 /* align dst */
72 movl %edi,%r9d 70 movl %edi,%r9d
@@ -75,13 +73,13 @@ ENTRY(__memset)
75 CFI_REMEMBER_STATE 73 CFI_REMEMBER_STATE
76.Lafter_bad_alignment: 74.Lafter_bad_alignment:
77 75
78 movl %r11d,%ecx 76 movq %rdx,%rcx
79 shrl $6,%ecx 77 shrq $6,%rcx
80 jz .Lhandle_tail 78 jz .Lhandle_tail
81 79
82 .p2align 4 80 .p2align 4
83.Lloop_64: 81.Lloop_64:
84 decl %ecx 82 decq %rcx
85 movq %rax,(%rdi) 83 movq %rax,(%rdi)
86 movq %rax,8(%rdi) 84 movq %rax,8(%rdi)
87 movq %rax,16(%rdi) 85 movq %rax,16(%rdi)
@@ -97,7 +95,7 @@ ENTRY(__memset)
97 to predict jump tables. */ 95 to predict jump tables. */
98 .p2align 4 96 .p2align 4
99.Lhandle_tail: 97.Lhandle_tail:
100 movl %r11d,%ecx 98 movl %edx,%ecx
101 andl $63&(~7),%ecx 99 andl $63&(~7),%ecx
102 jz .Lhandle_7 100 jz .Lhandle_7
103 shrl $3,%ecx 101 shrl $3,%ecx
@@ -109,12 +107,11 @@ ENTRY(__memset)
109 jnz .Lloop_8 107 jnz .Lloop_8
110 108
111.Lhandle_7: 109.Lhandle_7:
112 movl %r11d,%ecx 110 andl $7,%edx
113 andl $7,%ecx
114 jz .Lende 111 jz .Lende
115 .p2align 4 112 .p2align 4
116.Lloop_1: 113.Lloop_1:
117 decl %ecx 114 decl %edx
118 movb %al,(%rdi) 115 movb %al,(%rdi)
119 leaq 1(%rdi),%rdi 116 leaq 1(%rdi),%rdi
120 jnz .Lloop_1 117 jnz .Lloop_1
@@ -125,13 +122,13 @@ ENTRY(__memset)
125 122
126 CFI_RESTORE_STATE 123 CFI_RESTORE_STATE
127.Lbad_alignment: 124.Lbad_alignment:
128 cmpq $7,%r11 125 cmpq $7,%rdx
129 jbe .Lhandle_7 126 jbe .Lhandle_7
130 movq %rax,(%rdi) /* unaligned store */ 127 movq %rax,(%rdi) /* unaligned store */
131 movq $8,%r8 128 movq $8,%r8
132 subq %r9,%r8 129 subq %r9,%r8
133 addq %r8,%rdi 130 addq %r8,%rdi
134 subq %r8,%r11 131 subq %r8,%rdx
135 jmp .Lafter_bad_alignment 132 jmp .Lafter_bad_alignment
136.Lfinal: 133.Lfinal:
137 CFI_ENDPROC 134 CFI_ENDPROC
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 97be9cb54483..57252c928f56 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
7#include <linux/highmem.h> 7#include <linux/highmem.h>
8#include <linux/module.h> 8#include <linux/module.h>
9 9
10#include <asm/word-at-a-time.h>
11
10/* 12/*
11 * best effort, GUP based copy_from_user() that is NMI-safe 13 * best effort, GUP based copy_from_user() that is NMI-safe
12 */ 14 */
@@ -41,3 +43,104 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
41 return len; 43 return len;
42} 44}
43EXPORT_SYMBOL_GPL(copy_from_user_nmi); 45EXPORT_SYMBOL_GPL(copy_from_user_nmi);
46
47static inline unsigned long count_bytes(unsigned long mask)
48{
49 mask = (mask - 1) & ~mask;
50 mask >>= 7;
51 return count_masked_bytes(mask);
52}
53
54/*
55 * Do a strncpy, return length of string without final '\0'.
56 * 'count' is the user-supplied count (return 'count' if we
57 * hit it), 'max' is the address space maximum (and we return
58 * -EFAULT if we hit it).
59 */
60static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, long max)
61{
62 long res = 0;
63
64 /*
65 * Truncate 'max' to the user-specified limit, so that
66 * we only have one limit we need to check in the loop
67 */
68 if (max > count)
69 max = count;
70
71 while (max >= sizeof(unsigned long)) {
72 unsigned long c;
73
74 /* Fall back to byte-at-a-time if we get a page fault */
75 if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
76 break;
77 /* This can write a few bytes past the NUL character, but that's ok */
78 *(unsigned long *)(dst+res) = c;
79 c = has_zero(c);
80 if (c)
81 return res + count_bytes(c);
82 res += sizeof(unsigned long);
83 max -= sizeof(unsigned long);
84 }
85
86 while (max) {
87 char c;
88
89 if (unlikely(__get_user(c,src+res)))
90 return -EFAULT;
91 dst[res] = c;
92 if (!c)
93 return res;
94 res++;
95 max--;
96 }
97
98 /*
99 * Uhhuh. We hit 'max'. But was that the user-specified maximum
100 * too? If so, that's ok - we got as much as the user asked for.
101 */
102 if (res >= count)
103 return count;
104
105 /*
106 * Nope: we hit the address space limit, and we still had more
107 * characters the caller would have wanted. That's an EFAULT.
108 */
109 return -EFAULT;
110}
111
112/**
113 * strncpy_from_user: - Copy a NUL terminated string from userspace.
114 * @dst: Destination address, in kernel space. This buffer must be at
115 * least @count bytes long.
116 * @src: Source address, in user space.
117 * @count: Maximum number of bytes to copy, including the trailing NUL.
118 *
119 * Copies a NUL-terminated string from userspace to kernel space.
120 *
121 * On success, returns the length of the string (not including the trailing
122 * NUL).
123 *
124 * If access to userspace fails, returns -EFAULT (some data may have been
125 * copied).
126 *
127 * If @count is smaller than the length of the string, copies @count bytes
128 * and returns @count.
129 */
130long
131strncpy_from_user(char *dst, const char __user *src, long count)
132{
133 unsigned long max_addr, src_addr;
134
135 if (unlikely(count <= 0))
136 return 0;
137
138 max_addr = current_thread_info()->addr_limit.seg;
139 src_addr = (unsigned long)src;
140 if (likely(src_addr < max_addr)) {
141 unsigned long max = max_addr - src_addr;
142 return do_strncpy_from_user(dst, src, count, max);
143 }
144 return -EFAULT;
145}
146EXPORT_SYMBOL(strncpy_from_user);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index e218d5df85ff..ef2a6a5d78e3 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -33,93 +33,6 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
33 __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) 33 __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n))
34 34
35/* 35/*
36 * Copy a null terminated string from userspace.
37 */
38
39#define __do_strncpy_from_user(dst, src, count, res) \
40do { \
41 int __d0, __d1, __d2; \
42 might_fault(); \
43 __asm__ __volatile__( \
44 " testl %1,%1\n" \
45 " jz 2f\n" \
46 "0: lodsb\n" \
47 " stosb\n" \
48 " testb %%al,%%al\n" \
49 " jz 1f\n" \
50 " decl %1\n" \
51 " jnz 0b\n" \
52 "1: subl %1,%0\n" \
53 "2:\n" \
54 ".section .fixup,\"ax\"\n" \
55 "3: movl %5,%0\n" \
56 " jmp 2b\n" \
57 ".previous\n" \
58 _ASM_EXTABLE(0b,3b) \
59 : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
60 "=&D" (__d2) \
61 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
62 : "memory"); \
63} while (0)
64
65/**
66 * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
67 * @dst: Destination address, in kernel space. This buffer must be at
68 * least @count bytes long.
69 * @src: Source address, in user space.
70 * @count: Maximum number of bytes to copy, including the trailing NUL.
71 *
72 * Copies a NUL-terminated string from userspace to kernel space.
73 * Caller must check the specified block with access_ok() before calling
74 * this function.
75 *
76 * On success, returns the length of the string (not including the trailing
77 * NUL).
78 *
79 * If access to userspace fails, returns -EFAULT (some data may have been
80 * copied).
81 *
82 * If @count is smaller than the length of the string, copies @count bytes
83 * and returns @count.
84 */
85long
86__strncpy_from_user(char *dst, const char __user *src, long count)
87{
88 long res;
89 __do_strncpy_from_user(dst, src, count, res);
90 return res;
91}
92EXPORT_SYMBOL(__strncpy_from_user);
93
94/**
95 * strncpy_from_user: - Copy a NUL terminated string from userspace.
96 * @dst: Destination address, in kernel space. This buffer must be at
97 * least @count bytes long.
98 * @src: Source address, in user space.
99 * @count: Maximum number of bytes to copy, including the trailing NUL.
100 *
101 * Copies a NUL-terminated string from userspace to kernel space.
102 *
103 * On success, returns the length of the string (not including the trailing
104 * NUL).
105 *
106 * If access to userspace fails, returns -EFAULT (some data may have been
107 * copied).
108 *
109 * If @count is smaller than the length of the string, copies @count bytes
110 * and returns @count.
111 */
112long
113strncpy_from_user(char *dst, const char __user *src, long count)
114{
115 long res = -EFAULT;
116 if (access_ok(VERIFY_READ, src, 1))
117 __do_strncpy_from_user(dst, src, count, res);
118 return res;
119}
120EXPORT_SYMBOL(strncpy_from_user);
121
122/*
123 * Zero Userspace 36 * Zero Userspace
124 */ 37 */
125 38
@@ -760,9 +673,9 @@ survive:
760 break; 673 break;
761 } 674 }
762 675
763 maddr = kmap_atomic(pg, KM_USER0); 676 maddr = kmap_atomic(pg);
764 memcpy(maddr + offset, from, len); 677 memcpy(maddr + offset, from, len);
765 kunmap_atomic(maddr, KM_USER0); 678 kunmap_atomic(maddr);
766 set_page_dirty_lock(pg); 679 set_page_dirty_lock(pg);
767 put_page(pg); 680 put_page(pg);
768 up_read(&current->mm->mmap_sem); 681 up_read(&current->mm->mmap_sem);
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index b7c2849ffb66..0d0326f388c0 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -9,55 +9,6 @@
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10 10
11/* 11/*
12 * Copy a null terminated string from userspace.
13 */
14
15#define __do_strncpy_from_user(dst,src,count,res) \
16do { \
17 long __d0, __d1, __d2; \
18 might_fault(); \
19 __asm__ __volatile__( \
20 " testq %1,%1\n" \
21 " jz 2f\n" \
22 "0: lodsb\n" \
23 " stosb\n" \
24 " testb %%al,%%al\n" \
25 " jz 1f\n" \
26 " decq %1\n" \
27 " jnz 0b\n" \
28 "1: subq %1,%0\n" \
29 "2:\n" \
30 ".section .fixup,\"ax\"\n" \
31 "3: movq %5,%0\n" \
32 " jmp 2b\n" \
33 ".previous\n" \
34 _ASM_EXTABLE(0b,3b) \
35 : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
36 "=&D" (__d2) \
37 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
38 : "memory"); \
39} while (0)
40
41long
42__strncpy_from_user(char *dst, const char __user *src, long count)
43{
44 long res;
45 __do_strncpy_from_user(dst, src, count, res);
46 return res;
47}
48EXPORT_SYMBOL(__strncpy_from_user);
49
50long
51strncpy_from_user(char *dst, const char __user *src, long count)
52{
53 long res = -EFAULT;
54 if (access_ok(VERIFY_READ, src, 1))
55 return __strncpy_from_user(dst, src, count);
56 return res;
57}
58EXPORT_SYMBOL(strncpy_from_user);
59
60/*
61 * Zero Userspace 12 * Zero Userspace
62 */ 13 */
63 14
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 5b83c51c12e0..819137904428 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -219,7 +219,9 @@ ab: STOS/W/D/Q Yv,rAX
219ac: LODS/B AL,Xb 219ac: LODS/B AL,Xb
220ad: LODS/W/D/Q rAX,Xv 220ad: LODS/W/D/Q rAX,Xv
221ae: SCAS/B AL,Yb 221ae: SCAS/B AL,Yb
222af: SCAS/W/D/Q rAX,Xv 222# Note: The May 2011 Intel manual shows Xv for the second parameter of the
223# next instruction but Yv is correct
224af: SCAS/W/D/Q rAX,Yv
223# 0xb0 - 0xbf 225# 0xb0 - 0xbf
224b0: MOV AL/R8L,Ib 226b0: MOV AL/R8L,Ib
225b1: MOV CL/R9L,Ib 227b1: MOV CL/R9L,Ib
@@ -729,8 +731,8 @@ de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
729df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) 731df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
730f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) 732f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2)
731f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) 733f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2)
732f3: ANDN Gy,By,Ey (v) 734f2: ANDN Gy,By,Ey (v)
733f4: Grp17 (1A) 735f3: Grp17 (1A)
734f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) 736f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
735f6: MULX By,Gy,rDX,Ey (F2),(v) 737f6: MULX By,Gy,rDX,Ey (F2),(v)
736f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) 738f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 7718541541d4..9b868124128d 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -28,6 +28,7 @@
28#include <linux/regset.h> 28#include <linux/regset.h>
29 29
30#include <asm/uaccess.h> 30#include <asm/uaccess.h>
31#include <asm/traps.h>
31#include <asm/desc.h> 32#include <asm/desc.h>
32#include <asm/user.h> 33#include <asm/user.h>
33#include <asm/i387.h> 34#include <asm/i387.h>
@@ -269,7 +270,7 @@ void math_emulate(struct math_emu_info *info)
269 FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */ 270 FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */
270 271
271 RE_ENTRANT_CHECK_OFF; 272 RE_ENTRANT_CHECK_OFF;
272 current->thread.trap_no = 16; 273 current->thread.trap_nr = X86_TRAP_MF;
273 current->thread.error_code = 0; 274 current->thread.error_code = 0;
274 send_sig(SIGFPE, current, 1); 275 send_sig(SIGFPE, current, 1);
275 return; 276 return;
@@ -662,7 +663,7 @@ static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
662void math_abort(struct math_emu_info *info, unsigned int signal) 663void math_abort(struct math_emu_info *info, unsigned int signal)
663{ 664{
664 FPU_EIP = FPU_ORIG_EIP; 665 FPU_EIP = FPU_ORIG_EIP;
665 current->thread.trap_no = 16; 666 current->thread.trap_nr = X86_TRAP_MF;
666 current->thread.error_code = 0; 667 current->thread.error_code = 0;
667 send_sig(signal, current, 1); 668 send_sig(signal, current, 1);
668 RE_ENTRANT_CHECK_OFF; 669 RE_ENTRANT_CHECK_OFF;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9d74824a708d..3ecfd1aaf214 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -615,7 +615,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,
615 dump_pagetable(address); 615 dump_pagetable(address);
616 616
617 tsk->thread.cr2 = address; 617 tsk->thread.cr2 = address;
618 tsk->thread.trap_no = 14; 618 tsk->thread.trap_nr = X86_TRAP_PF;
619 tsk->thread.error_code = error_code; 619 tsk->thread.error_code = error_code;
620 620
621 if (__die("Bad pagetable", regs, error_code)) 621 if (__die("Bad pagetable", regs, error_code))
@@ -636,7 +636,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
636 /* Are we prepared to handle this kernel fault? */ 636 /* Are we prepared to handle this kernel fault? */
637 if (fixup_exception(regs)) { 637 if (fixup_exception(regs)) {
638 if (current_thread_info()->sig_on_uaccess_error && signal) { 638 if (current_thread_info()->sig_on_uaccess_error && signal) {
639 tsk->thread.trap_no = 14; 639 tsk->thread.trap_nr = X86_TRAP_PF;
640 tsk->thread.error_code = error_code | PF_USER; 640 tsk->thread.error_code = error_code | PF_USER;
641 tsk->thread.cr2 = address; 641 tsk->thread.cr2 = address;
642 642
@@ -673,10 +673,10 @@ no_context(struct pt_regs *regs, unsigned long error_code,
673 673
674 stackend = end_of_stack(tsk); 674 stackend = end_of_stack(tsk);
675 if (tsk != &init_task && *stackend != STACK_END_MAGIC) 675 if (tsk != &init_task && *stackend != STACK_END_MAGIC)
676 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); 676 printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
677 677
678 tsk->thread.cr2 = address; 678 tsk->thread.cr2 = address;
679 tsk->thread.trap_no = 14; 679 tsk->thread.trap_nr = X86_TRAP_PF;
680 tsk->thread.error_code = error_code; 680 tsk->thread.error_code = error_code;
681 681
682 sig = SIGKILL; 682 sig = SIGKILL;
@@ -684,7 +684,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
684 sig = 0; 684 sig = 0;
685 685
686 /* Executive summary in case the body of the oops scrolled away */ 686 /* Executive summary in case the body of the oops scrolled away */
687 printk(KERN_EMERG "CR2: %016lx\n", address); 687 printk(KERN_DEFAULT "CR2: %016lx\n", address);
688 688
689 oops_end(flags, regs, sig); 689 oops_end(flags, regs, sig);
690} 690}
@@ -754,7 +754,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
754 /* Kernel addresses are always protection faults: */ 754 /* Kernel addresses are always protection faults: */
755 tsk->thread.cr2 = address; 755 tsk->thread.cr2 = address;
756 tsk->thread.error_code = error_code | (address >= TASK_SIZE); 756 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
757 tsk->thread.trap_no = 14; 757 tsk->thread.trap_nr = X86_TRAP_PF;
758 758
759 force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); 759 force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
760 760
@@ -838,7 +838,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
838 838
839 tsk->thread.cr2 = address; 839 tsk->thread.cr2 = address;
840 tsk->thread.error_code = error_code; 840 tsk->thread.error_code = error_code;
841 tsk->thread.trap_no = 14; 841 tsk->thread.trap_nr = X86_TRAP_PF;
842 842
843#ifdef CONFIG_MEMORY_FAILURE 843#ifdef CONFIG_MEMORY_FAILURE
844 if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { 844 if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index f4f29b19fac5..6f31ee56c008 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -51,11 +51,11 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
51} 51}
52EXPORT_SYMBOL(kmap_atomic_prot); 52EXPORT_SYMBOL(kmap_atomic_prot);
53 53
54void *__kmap_atomic(struct page *page) 54void *kmap_atomic(struct page *page)
55{ 55{
56 return kmap_atomic_prot(page, kmap_prot); 56 return kmap_atomic_prot(page, kmap_prot);
57} 57}
58EXPORT_SYMBOL(__kmap_atomic); 58EXPORT_SYMBOL(kmap_atomic);
59 59
60/* 60/*
61 * This is the same as kmap_atomic() but can map memory that doesn't 61 * This is the same as kmap_atomic() but can map memory that doesn't
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index f581a18c0d4d..f6679a7fb8ca 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -308,10 +308,11 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
308{ 308{
309 struct hstate *h = hstate_file(file); 309 struct hstate *h = hstate_file(file);
310 struct mm_struct *mm = current->mm; 310 struct mm_struct *mm = current->mm;
311 struct vm_area_struct *vma, *prev_vma; 311 struct vm_area_struct *vma;
312 unsigned long base = mm->mmap_base, addr = addr0; 312 unsigned long base = mm->mmap_base;
313 unsigned long addr = addr0;
313 unsigned long largest_hole = mm->cached_hole_size; 314 unsigned long largest_hole = mm->cached_hole_size;
314 int first_time = 1; 315 unsigned long start_addr;
315 316
316 /* don't allow allocations above current base */ 317 /* don't allow allocations above current base */
317 if (mm->free_area_cache > base) 318 if (mm->free_area_cache > base)
@@ -322,6 +323,8 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
322 mm->free_area_cache = base; 323 mm->free_area_cache = base;
323 } 324 }
324try_again: 325try_again:
326 start_addr = mm->free_area_cache;
327
325 /* make sure it can fit in the remaining address space */ 328 /* make sure it can fit in the remaining address space */
326 if (mm->free_area_cache < len) 329 if (mm->free_area_cache < len)
327 goto fail; 330 goto fail;
@@ -333,24 +336,18 @@ try_again:
333 * Lookup failure means no vma is above this address, 336 * Lookup failure means no vma is above this address,
334 * i.e. return with success: 337 * i.e. return with success:
335 */ 338 */
336 if (!(vma = find_vma_prev(mm, addr, &prev_vma))) 339 vma = find_vma(mm, addr);
340 if (!vma)
337 return addr; 341 return addr;
338 342
339 /* 343 if (addr + len <= vma->vm_start) {
340 * new region fits between prev_vma->vm_end and
341 * vma->vm_start, use it:
342 */
343 if (addr + len <= vma->vm_start &&
344 (!prev_vma || (addr >= prev_vma->vm_end))) {
345 /* remember the address as a hint for next time */ 344 /* remember the address as a hint for next time */
346 mm->cached_hole_size = largest_hole; 345 mm->cached_hole_size = largest_hole;
347 return (mm->free_area_cache = addr); 346 return (mm->free_area_cache = addr);
348 } else { 347 } else if (mm->free_area_cache == vma->vm_end) {
349 /* pull free_area_cache down to the first hole */ 348 /* pull free_area_cache down to the first hole */
350 if (mm->free_area_cache == vma->vm_end) { 349 mm->free_area_cache = vma->vm_start;
351 mm->free_area_cache = vma->vm_start; 350 mm->cached_hole_size = largest_hole;
352 mm->cached_hole_size = largest_hole;
353 }
354 } 351 }
355 352
356 /* remember the largest hole we saw so far */ 353 /* remember the largest hole we saw so far */
@@ -366,10 +363,9 @@ fail:
366 * if hint left us with no space for the requested 363 * if hint left us with no space for the requested
367 * mapping then try again: 364 * mapping then try again:
368 */ 365 */
369 if (first_time) { 366 if (start_addr != base) {
370 mm->free_area_cache = base; 367 mm->free_area_cache = base;
371 largest_hole = 0; 368 largest_hole = 0;
372 first_time = 0;
373 goto try_again; 369 goto try_again;
374 } 370 }
375 /* 371 /*
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index a298914058f9..4f0cec7e4ffb 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -3,6 +3,7 @@
3#include <linux/ioport.h> 3#include <linux/ioport.h>
4#include <linux/swap.h> 4#include <linux/swap.h>
5#include <linux/memblock.h> 5#include <linux/memblock.h>
6#include <linux/bootmem.h> /* for max_low_pfn */
6 7
7#include <asm/cacheflush.h> 8#include <asm/cacheflush.h>
8#include <asm/e820.h> 9#include <asm/e820.h>
@@ -11,10 +12,10 @@
11#include <asm/page_types.h> 12#include <asm/page_types.h>
12#include <asm/sections.h> 13#include <asm/sections.h>
13#include <asm/setup.h> 14#include <asm/setup.h>
14#include <asm/system.h>
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#include <asm/tlb.h> 16#include <asm/tlb.h>
17#include <asm/proto.h> 17#include <asm/proto.h>
18#include <asm/dma.h> /* for MAX_DMA_PFN */
18 19
19unsigned long __initdata pgt_buf_start; 20unsigned long __initdata pgt_buf_start;
20unsigned long __meminitdata pgt_buf_end; 21unsigned long __meminitdata pgt_buf_end;
@@ -392,3 +393,24 @@ void free_initrd_mem(unsigned long start, unsigned long end)
392 free_init_pages("initrd memory", start, PAGE_ALIGN(end)); 393 free_init_pages("initrd memory", start, PAGE_ALIGN(end));
393} 394}
394#endif 395#endif
396
397void __init zone_sizes_init(void)
398{
399 unsigned long max_zone_pfns[MAX_NR_ZONES];
400
401 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
402
403#ifdef CONFIG_ZONE_DMA
404 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
405#endif
406#ifdef CONFIG_ZONE_DMA32
407 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
408#endif
409 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
410#ifdef CONFIG_HIGHMEM
411 max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
412#endif
413
414 free_area_init_nodes(max_zone_pfns);
415}
416
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0c1da394a634..575d86f85ce4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -35,7 +35,6 @@
35#include <asm/asm.h> 35#include <asm/asm.h>
36#include <asm/bios_ebda.h> 36#include <asm/bios_ebda.h>
37#include <asm/processor.h> 37#include <asm/processor.h>
38#include <asm/system.h>
39#include <asm/uaccess.h> 38#include <asm/uaccess.h>
40#include <asm/pgtable.h> 39#include <asm/pgtable.h>
41#include <asm/dma.h> 40#include <asm/dma.h>
@@ -668,22 +667,6 @@ void __init initmem_init(void)
668} 667}
669#endif /* !CONFIG_NEED_MULTIPLE_NODES */ 668#endif /* !CONFIG_NEED_MULTIPLE_NODES */
670 669
671static void __init zone_sizes_init(void)
672{
673 unsigned long max_zone_pfns[MAX_NR_ZONES];
674 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
675#ifdef CONFIG_ZONE_DMA
676 max_zone_pfns[ZONE_DMA] =
677 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
678#endif
679 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
680#ifdef CONFIG_HIGHMEM
681 max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
682#endif
683
684 free_area_init_nodes(max_zone_pfns);
685}
686
687void __init setup_bootmem_allocator(void) 670void __init setup_bootmem_allocator(void)
688{ 671{
689 printk(KERN_INFO " mapped low ram: 0 - %08lx\n", 672 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
@@ -754,6 +737,17 @@ void __init mem_init(void)
754#ifdef CONFIG_FLATMEM 737#ifdef CONFIG_FLATMEM
755 BUG_ON(!mem_map); 738 BUG_ON(!mem_map);
756#endif 739#endif
740 /*
741 * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
742 * be done before free_all_bootmem(). Memblock use free low memory for
743 * temporary data (see find_range_array()) and for this purpose can use
744 * pages that was already passed to the buddy allocator, hence marked as
745 * not accessible in the page tables when compiled with
746 * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
747 * important here.
748 */
749 set_highmem_pages_init();
750
757 /* this will put all low memory onto the freelists */ 751 /* this will put all low memory onto the freelists */
758 totalram_pages += free_all_bootmem(); 752 totalram_pages += free_all_bootmem();
759 753
@@ -765,8 +759,6 @@ void __init mem_init(void)
765 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) 759 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
766 reservedpages++; 760 reservedpages++;
767 761
768 set_highmem_pages_init();
769
770 codesize = (unsigned long) &_etext - (unsigned long) &_text; 762 codesize = (unsigned long) &_etext - (unsigned long) &_text;
771 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 763 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
772 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 764 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a8a56ce3a962..fc18be0f6f29 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -35,7 +35,6 @@
35 35
36#include <asm/processor.h> 36#include <asm/processor.h>
37#include <asm/bios_ebda.h> 37#include <asm/bios_ebda.h>
38#include <asm/system.h>
39#include <asm/uaccess.h> 38#include <asm/uaccess.h>
40#include <asm/pgtable.h> 39#include <asm/pgtable.h>
41#include <asm/pgalloc.h> 40#include <asm/pgalloc.h>
@@ -614,15 +613,6 @@ void __init initmem_init(void)
614 613
615void __init paging_init(void) 614void __init paging_init(void)
616{ 615{
617 unsigned long max_zone_pfns[MAX_NR_ZONES];
618
619 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
620#ifdef CONFIG_ZONE_DMA
621 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
622#endif
623 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
624 max_zone_pfns[ZONE_NORMAL] = max_pfn;
625
626 sparse_memory_present_with_active_regions(MAX_NUMNODES); 616 sparse_memory_present_with_active_regions(MAX_NUMNODES);
627 sparse_init(); 617 sparse_init();
628 618
@@ -634,7 +624,7 @@ void __init paging_init(void)
634 */ 624 */
635 node_clear_state(0, N_NORMAL_MEMORY); 625 node_clear_state(0, N_NORMAL_MEMORY);
636 626
637 free_area_init_nodes(max_zone_pfns); 627 zone_sizes_init();
638} 628}
639 629
640/* 630/*
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c
index 036efbea8b28..aef7140c0063 100644
--- a/arch/x86/mm/kmemcheck/selftest.c
+++ b/arch/x86/mm/kmemcheck/selftest.c
@@ -1,3 +1,4 @@
1#include <linux/bug.h>
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2 3
3#include "opcode.h" 4#include "opcode.h"
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 4b5ba85eb5c9..845df6835f9f 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -75,9 +75,9 @@ static unsigned long mmap_rnd(void)
75 */ 75 */
76 if (current->flags & PF_RANDOMIZE) { 76 if (current->flags & PF_RANDOMIZE) {
77 if (mmap_is_ia32()) 77 if (mmap_is_ia32())
78 rnd = (long)get_random_int() % (1<<8); 78 rnd = get_random_int() % (1<<8);
79 else 79 else
80 rnd = (long)(get_random_int() % (1<<28)); 80 rnd = get_random_int() % (1<<28);
81 } 81 }
82 return rnd << PAGE_SHIFT; 82 return rnd << PAGE_SHIFT;
83} 83}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index de54b9b278a7..dc0b727742f4 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -75,8 +75,8 @@ static LIST_HEAD(trace_list); /* struct remap_trace */
75 75
76/* module parameters */ 76/* module parameters */
77static unsigned long filter_offset; 77static unsigned long filter_offset;
78static int nommiotrace; 78static bool nommiotrace;
79static int trace_pc; 79static bool trace_pc;
80 80
81module_param(filter_offset, ulong, 0); 81module_param(filter_offset, ulong, 0);
82module_param(nommiotrace, bool, 0); 82module_param(nommiotrace, bool, 0);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 496f494593bf..19d3fa08b119 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -110,7 +110,7 @@ void __cpuinit numa_clear_node(int cpu)
110 * Allocate node_to_cpumask_map based on number of available nodes 110 * Allocate node_to_cpumask_map based on number of available nodes
111 * Requires node_possible_map to be valid. 111 * Requires node_possible_map to be valid.
112 * 112 *
113 * Note: node_to_cpumask() is not valid until after this is done. 113 * Note: cpumask_of_node() is not valid until after this is done.
114 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) 114 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
115 */ 115 */
116void __init setup_node_to_cpumask_map(void) 116void __init setup_node_to_cpumask_map(void)
@@ -422,8 +422,9 @@ static int __init numa_alloc_distance(void)
422 * calls are ignored until the distance table is reset with 422 * calls are ignored until the distance table is reset with
423 * numa_reset_distance(). 423 * numa_reset_distance().
424 * 424 *
425 * If @from or @to is higher than the highest known node at the time of 425 * If @from or @to is higher than the highest known node or lower than zero
426 * table creation or @distance doesn't make sense, the call is ignored. 426 * at the time of table creation or @distance doesn't make sense, the call
427 * is ignored.
427 * This is to allow simplification of specific NUMA config implementations. 428 * This is to allow simplification of specific NUMA config implementations.
428 */ 429 */
429void __init numa_set_distance(int from, int to, int distance) 430void __init numa_set_distance(int from, int to, int distance)
@@ -431,8 +432,9 @@ void __init numa_set_distance(int from, int to, int distance)
431 if (!numa_distance && numa_alloc_distance() < 0) 432 if (!numa_distance && numa_alloc_distance() < 0)
432 return; 433 return;
433 434
434 if (from >= numa_distance_cnt || to >= numa_distance_cnt) { 435 if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
435 printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", 436 from < 0 || to < 0) {
437 pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
436 from, to, distance); 438 from, to, distance);
437 return; 439 return;
438 } 440 }
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 46db56845f18..53489ff6bf82 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -28,7 +28,7 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
28 return -ENOENT; 28 return -ENOENT;
29} 29}
30 30
31static u64 mem_hole_size(u64 start, u64 end) 31static u64 __init mem_hole_size(u64 start, u64 end)
32{ 32{
33 unsigned long start_pfn = PFN_UP(start); 33 unsigned long start_pfn = PFN_UP(start);
34 unsigned long end_pfn = PFN_DOWN(end); 34 unsigned long end_pfn = PFN_DOWN(end);
@@ -60,7 +60,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
60 eb->nid = nid; 60 eb->nid = nid;
61 61
62 if (emu_nid_to_phys[nid] == NUMA_NO_NODE) 62 if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
63 emu_nid_to_phys[nid] = pb->nid; 63 emu_nid_to_phys[nid] = nid;
64 64
65 pb->start += size; 65 pb->start += size;
66 if (pb->start >= pb->end) { 66 if (pb->start >= pb->end) {
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index eda2acbb6e81..e1ebde315210 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1334,12 +1334,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1334 } 1334 }
1335 1335
1336 /* 1336 /*
1337 * If page allocator is not up yet then do not call c_p_a():
1338 */
1339 if (!debug_pagealloc_enabled)
1340 return;
1341
1342 /*
1343 * The return value is ignored as the calls cannot fail. 1337 * The return value is ignored as the calls cannot fail.
1344 * Large pages for identity mappings are not used at boot time 1338 * Large pages for identity mappings are not used at boot time
1345 * and hence no memory allocations during large page split. 1339 * and hence no memory allocations during large page split.
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cac718499256..a69bcb8c7621 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,7 +10,6 @@
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/module.h> 11#include <linux/module.h>
12 12
13#include <asm/system.h>
14#include <asm/pgtable.h> 13#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 14#include <asm/pgalloc.h>
16#include <asm/fixmap.h> 15#include <asm/fixmap.h>
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index fd61b3fb7341..efb5b4b93711 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -70,7 +70,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
70 return; 70 return;
71 pxm = pa->proximity_domain; 71 pxm = pa->proximity_domain;
72 apic_id = pa->apic_id; 72 apic_id = pa->apic_id;
73 if (!cpu_has_x2apic && (apic_id >= 0xff)) { 73 if (!apic->apic_id_valid(apic_id)) {
74 printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n", 74 printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
75 pxm, apic_id); 75 pxm, apic_id);
76 return; 76 return;
@@ -109,6 +109,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
109 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) 109 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
110 return; 110 return;
111 pxm = pa->proximity_domain_lo; 111 pxm = pa->proximity_domain_lo;
112 if (acpi_srat_revision >= 2)
113 pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
112 node = setup_node(pxm); 114 node = setup_node(pxm);
113 if (node < 0) { 115 if (node < 0) {
114 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); 116 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
@@ -160,6 +162,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
160 start = ma->base_address; 162 start = ma->base_address;
161 end = start + ma->length; 163 end = start + ma->length;
162 pxm = ma->proximity_domain; 164 pxm = ma->proximity_domain;
165 if (acpi_srat_revision <= 1)
166 pxm &= 0xff;
163 node = setup_node(pxm); 167 node = setup_node(pxm);
164 if (node < 0) { 168 if (node < 0) {
165 printk(KERN_ERR "SRAT: Too many proximity domains.\n"); 169 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 66870223f8c5..877b9a1b2152 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,17 @@
18 * r9d : hlen = skb->len - skb->data_len 18 * r9d : hlen = skb->len - skb->data_len
19 */ 19 */
20#define SKBDATA %r8 20#define SKBDATA %r8
21 21#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
22sk_load_word_ind:
23 .globl sk_load_word_ind
24
25 add %ebx,%esi /* offset += X */
26# test %esi,%esi /* if (offset < 0) goto bpf_error; */
27 js bpf_error
28 22
29sk_load_word: 23sk_load_word:
30 .globl sk_load_word 24 .globl sk_load_word
31 25
26 test %esi,%esi
27 js bpf_slow_path_word_neg
28
29sk_load_word_positive_offset:
30 .globl sk_load_word_positive_offset
31
32 mov %r9d,%eax # hlen 32 mov %r9d,%eax # hlen
33 sub %esi,%eax # hlen - offset 33 sub %esi,%eax # hlen - offset
34 cmp $3,%eax 34 cmp $3,%eax
@@ -37,16 +37,15 @@ sk_load_word:
37 bswap %eax /* ntohl() */ 37 bswap %eax /* ntohl() */
38 ret 38 ret
39 39
40
41sk_load_half_ind:
42 .globl sk_load_half_ind
43
44 add %ebx,%esi /* offset += X */
45 js bpf_error
46
47sk_load_half: 40sk_load_half:
48 .globl sk_load_half 41 .globl sk_load_half
49 42
43 test %esi,%esi
44 js bpf_slow_path_half_neg
45
46sk_load_half_positive_offset:
47 .globl sk_load_half_positive_offset
48
50 mov %r9d,%eax 49 mov %r9d,%eax
51 sub %esi,%eax # hlen - offset 50 sub %esi,%eax # hlen - offset
52 cmp $1,%eax 51 cmp $1,%eax
@@ -55,14 +54,15 @@ sk_load_half:
55 rol $8,%ax # ntohs() 54 rol $8,%ax # ntohs()
56 ret 55 ret
57 56
58sk_load_byte_ind:
59 .globl sk_load_byte_ind
60 add %ebx,%esi /* offset += X */
61 js bpf_error
62
63sk_load_byte: 57sk_load_byte:
64 .globl sk_load_byte 58 .globl sk_load_byte
65 59
60 test %esi,%esi
61 js bpf_slow_path_byte_neg
62
63sk_load_byte_positive_offset:
64 .globl sk_load_byte_positive_offset
65
66 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ 66 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
67 jle bpf_slow_path_byte 67 jle bpf_slow_path_byte
68 movzbl (SKBDATA,%rsi),%eax 68 movzbl (SKBDATA,%rsi),%eax
@@ -73,25 +73,21 @@ sk_load_byte:
73 * 73 *
74 * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) 74 * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
75 * Must preserve A accumulator (%eax) 75 * Must preserve A accumulator (%eax)
76 * Inputs : %esi is the offset value, already known positive 76 * Inputs : %esi is the offset value
77 */ 77 */
78ENTRY(sk_load_byte_msh) 78sk_load_byte_msh:
79 CFI_STARTPROC 79 .globl sk_load_byte_msh
80 test %esi,%esi
81 js bpf_slow_path_byte_msh_neg
82
83sk_load_byte_msh_positive_offset:
84 .globl sk_load_byte_msh_positive_offset
80 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ 85 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
81 jle bpf_slow_path_byte_msh 86 jle bpf_slow_path_byte_msh
82 movzbl (SKBDATA,%rsi),%ebx 87 movzbl (SKBDATA,%rsi),%ebx
83 and $15,%bl 88 and $15,%bl
84 shl $2,%bl 89 shl $2,%bl
85 ret 90 ret
86 CFI_ENDPROC
87ENDPROC(sk_load_byte_msh)
88
89bpf_error:
90# force a return 0 from jit handler
91 xor %eax,%eax
92 mov -8(%rbp),%rbx
93 leaveq
94 ret
95 91
96/* rsi contains offset and can be scratched */ 92/* rsi contains offset and can be scratched */
97#define bpf_slow_path_common(LEN) \ 93#define bpf_slow_path_common(LEN) \
@@ -138,3 +134,67 @@ bpf_slow_path_byte_msh:
138 shl $2,%al 134 shl $2,%al
139 xchg %eax,%ebx 135 xchg %eax,%ebx
140 ret 136 ret
137
138#define sk_negative_common(SIZE) \
139 push %rdi; /* save skb */ \
140 push %r9; \
141 push SKBDATA; \
142/* rsi already has offset */ \
143 mov $SIZE,%ecx; /* size */ \
144 call bpf_internal_load_pointer_neg_helper; \
145 test %rax,%rax; \
146 pop SKBDATA; \
147 pop %r9; \
148 pop %rdi; \
149 jz bpf_error
150
151
152bpf_slow_path_word_neg:
153 cmp SKF_MAX_NEG_OFF, %esi /* test range */
154 jl bpf_error /* offset lower -> error */
155sk_load_word_negative_offset:
156 .globl sk_load_word_negative_offset
157 sk_negative_common(4)
158 mov (%rax), %eax
159 bswap %eax
160 ret
161
162bpf_slow_path_half_neg:
163 cmp SKF_MAX_NEG_OFF, %esi
164 jl bpf_error
165sk_load_half_negative_offset:
166 .globl sk_load_half_negative_offset
167 sk_negative_common(2)
168 mov (%rax),%ax
169 rol $8,%ax
170 movzwl %ax,%eax
171 ret
172
173bpf_slow_path_byte_neg:
174 cmp SKF_MAX_NEG_OFF, %esi
175 jl bpf_error
176sk_load_byte_negative_offset:
177 .globl sk_load_byte_negative_offset
178 sk_negative_common(1)
179 movzbl (%rax), %eax
180 ret
181
182bpf_slow_path_byte_msh_neg:
183 cmp SKF_MAX_NEG_OFF, %esi
184 jl bpf_error
185sk_load_byte_msh_negative_offset:
186 .globl sk_load_byte_msh_negative_offset
187 xchg %eax,%ebx /* dont lose A , X is about to be scratched */
188 sk_negative_common(1)
189 movzbl (%rax),%eax
190 and $15,%al
191 shl $2,%al
192 xchg %eax,%ebx
193 ret
194
195bpf_error:
196# force a return 0 from jit handler
197 xor %eax,%eax
198 mov -8(%rbp),%rbx
199 leaveq
200 ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7b65f752c5f8..0597f95b6da6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
30 * assembly code in arch/x86/net/bpf_jit.S 30 * assembly code in arch/x86/net/bpf_jit.S
31 */ 31 */
32extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; 32extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
33extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; 33extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
34extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
35extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
36extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
34 37
35static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) 38static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
36{ 39{
@@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
117 set_fs(old_fs); 120 set_fs(old_fs);
118} 121}
119 122
123#define CHOOSE_LOAD_FUNC(K, func) \
124 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
120 125
121void bpf_jit_compile(struct sk_filter *fp) 126void bpf_jit_compile(struct sk_filter *fp)
122{ 127{
@@ -151,17 +156,18 @@ void bpf_jit_compile(struct sk_filter *fp)
151 cleanup_addr = proglen; /* epilogue address */ 156 cleanup_addr = proglen; /* epilogue address */
152 157
153 for (pass = 0; pass < 10; pass++) { 158 for (pass = 0; pass < 10; pass++) {
159 u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
154 /* no prologue/epilogue for trivial filters (RET something) */ 160 /* no prologue/epilogue for trivial filters (RET something) */
155 proglen = 0; 161 proglen = 0;
156 prog = temp; 162 prog = temp;
157 163
158 if (seen) { 164 if (seen_or_pass0) {
159 EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ 165 EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
160 EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ 166 EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */
161 /* note : must save %rbx in case bpf_error is hit */ 167 /* note : must save %rbx in case bpf_error is hit */
162 if (seen & (SEEN_XREG | SEEN_DATAREF)) 168 if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF))
163 EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */ 169 EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
164 if (seen & SEEN_XREG) 170 if (seen_or_pass0 & SEEN_XREG)
165 CLEAR_X(); /* make sure we dont leek kernel memory */ 171 CLEAR_X(); /* make sure we dont leek kernel memory */
166 172
167 /* 173 /*
@@ -170,7 +176,7 @@ void bpf_jit_compile(struct sk_filter *fp)
170 * r9 = skb->len - skb->data_len 176 * r9 = skb->len - skb->data_len
171 * r8 = skb->data 177 * r8 = skb->data
172 */ 178 */
173 if (seen & SEEN_DATAREF) { 179 if (seen_or_pass0 & SEEN_DATAREF) {
174 if (offsetof(struct sk_buff, len) <= 127) 180 if (offsetof(struct sk_buff, len) <= 127)
175 /* mov off8(%rdi),%r9d */ 181 /* mov off8(%rdi),%r9d */
176 EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len)); 182 EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
@@ -260,9 +266,14 @@ void bpf_jit_compile(struct sk_filter *fp)
260 case BPF_S_ALU_DIV_X: /* A /= X; */ 266 case BPF_S_ALU_DIV_X: /* A /= X; */
261 seen |= SEEN_XREG; 267 seen |= SEEN_XREG;
262 EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ 268 EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
263 if (pc_ret0 != -1) 269 if (pc_ret0 > 0) {
264 EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4)); 270 /* addrs[pc_ret0 - 1] is start address of target
265 else { 271 * (addrs[i] - 4) is the address following this jmp
272 * ("xor %edx,%edx; div %ebx" being 4 bytes long)
273 */
274 EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
275 (addrs[i] - 4));
276 } else {
266 EMIT_COND_JMP(X86_JNE, 2 + 5); 277 EMIT_COND_JMP(X86_JNE, 2 + 5);
267 CLEAR_A(); 278 CLEAR_A();
268 EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ 279 EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
@@ -283,7 +294,7 @@ void bpf_jit_compile(struct sk_filter *fp)
283 EMIT2(0x24, K & 0xFF); /* and imm8,%al */ 294 EMIT2(0x24, K & 0xFF); /* and imm8,%al */
284 } else if (K >= 0xFFFF0000) { 295 } else if (K >= 0xFFFF0000) {
285 EMIT2(0x66, 0x25); /* and imm16,%ax */ 296 EMIT2(0x66, 0x25); /* and imm16,%ax */
286 EMIT2(K, 2); 297 EMIT(K, 2);
287 } else { 298 } else {
288 EMIT1_off32(0x25, K); /* and imm32,%eax */ 299 EMIT1_off32(0x25, K); /* and imm32,%eax */
289 } 300 }
@@ -335,12 +346,12 @@ void bpf_jit_compile(struct sk_filter *fp)
335 } 346 }
336 /* fallinto */ 347 /* fallinto */
337 case BPF_S_RET_A: 348 case BPF_S_RET_A:
338 if (seen) { 349 if (seen_or_pass0) {
339 if (i != flen - 1) { 350 if (i != flen - 1) {
340 EMIT_JMP(cleanup_addr - addrs[i]); 351 EMIT_JMP(cleanup_addr - addrs[i]);
341 break; 352 break;
342 } 353 }
343 if (seen & SEEN_XREG) 354 if (seen_or_pass0 & SEEN_XREG)
344 EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */ 355 EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */
345 EMIT1(0xc9); /* leaveq */ 356 EMIT1(0xc9); /* leaveq */
346 } 357 }
@@ -467,47 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp)
467#endif 478#endif
468 break; 479 break;
469 case BPF_S_LD_W_ABS: 480 case BPF_S_LD_W_ABS:
470 func = sk_load_word; 481 func = CHOOSE_LOAD_FUNC(K, sk_load_word);
471common_load: seen |= SEEN_DATAREF; 482common_load: seen |= SEEN_DATAREF;
472 if ((int)K < 0)
473 goto out;
474 t_offset = func - (image + addrs[i]); 483 t_offset = func - (image + addrs[i]);
475 EMIT1_off32(0xbe, K); /* mov imm32,%esi */ 484 EMIT1_off32(0xbe, K); /* mov imm32,%esi */
476 EMIT1_off32(0xe8, t_offset); /* call */ 485 EMIT1_off32(0xe8, t_offset); /* call */
477 break; 486 break;
478 case BPF_S_LD_H_ABS: 487 case BPF_S_LD_H_ABS:
479 func = sk_load_half; 488 func = CHOOSE_LOAD_FUNC(K, sk_load_half);
480 goto common_load; 489 goto common_load;
481 case BPF_S_LD_B_ABS: 490 case BPF_S_LD_B_ABS:
482 func = sk_load_byte; 491 func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
483 goto common_load; 492 goto common_load;
484 case BPF_S_LDX_B_MSH: 493 case BPF_S_LDX_B_MSH:
485 if ((int)K < 0) { 494 func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
486 if (pc_ret0 != -1) {
487 EMIT_JMP(addrs[pc_ret0] - addrs[i]);
488 break;
489 }
490 CLEAR_A();
491 EMIT_JMP(cleanup_addr - addrs[i]);
492 break;
493 }
494 seen |= SEEN_DATAREF | SEEN_XREG; 495 seen |= SEEN_DATAREF | SEEN_XREG;
495 t_offset = sk_load_byte_msh - (image + addrs[i]); 496 t_offset = func - (image + addrs[i]);
496 EMIT1_off32(0xbe, K); /* mov imm32,%esi */ 497 EMIT1_off32(0xbe, K); /* mov imm32,%esi */
497 EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ 498 EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
498 break; 499 break;
499 case BPF_S_LD_W_IND: 500 case BPF_S_LD_W_IND:
500 func = sk_load_word_ind; 501 func = sk_load_word;
501common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; 502common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
502 t_offset = func - (image + addrs[i]); 503 t_offset = func - (image + addrs[i]);
503 EMIT1_off32(0xbe, K); /* mov imm32,%esi */ 504 if (K) {
505 if (is_imm8(K)) {
506 EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
507 } else {
508 EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
509 EMIT(K, 4);
510 }
511 } else {
512 EMIT2(0x89,0xde); /* mov %ebx,%esi */
513 }
504 EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ 514 EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
505 break; 515 break;
506 case BPF_S_LD_H_IND: 516 case BPF_S_LD_H_IND:
507 func = sk_load_half_ind; 517 func = sk_load_half;
508 goto common_load_ind; 518 goto common_load_ind;
509 case BPF_S_LD_B_IND: 519 case BPF_S_LD_B_IND:
510 func = sk_load_byte_ind; 520 func = sk_load_byte;
511 goto common_load_ind; 521 goto common_load_ind;
512 case BPF_S_JMP_JA: 522 case BPF_S_JMP_JA:
513 t_offset = addrs[i + K] - addrs[i]; 523 t_offset = addrs[i + K] - addrs[i];
@@ -599,13 +609,14 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
599 * use it to give the cleanup instruction(s) addr 609 * use it to give the cleanup instruction(s) addr
600 */ 610 */
601 cleanup_addr = proglen - 1; /* ret */ 611 cleanup_addr = proglen - 1; /* ret */
602 if (seen) 612 if (seen_or_pass0)
603 cleanup_addr -= 1; /* leaveq */ 613 cleanup_addr -= 1; /* leaveq */
604 if (seen & SEEN_XREG) 614 if (seen_or_pass0 & SEEN_XREG)
605 cleanup_addr -= 4; /* mov -8(%rbp),%rbx */ 615 cleanup_addr -= 4; /* mov -8(%rbp),%rbx */
606 616
607 if (image) { 617 if (image) {
608 WARN_ON(proglen != oldproglen); 618 if (proglen != oldproglen)
619 pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n", proglen, oldproglen);
609 break; 620 break;
610 } 621 }
611 if (proglen == oldproglen) { 622 if (proglen == oldproglen) {
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index bff89dfe3619..d6aa6e8315d1 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -67,7 +67,7 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
67{ 67{
68 struct stack_frame_ia32 *head; 68 struct stack_frame_ia32 *head;
69 69
70 /* User process is 32-bit */ 70 /* User process is IA32 */
71 if (!current || !test_thread_flag(TIF_IA32)) 71 if (!current || !test_thread_flag(TIF_IA32))
72 return 0; 72 return 0;
73 73
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 6b8759f7634e..e76e18c94a3c 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -15,11 +15,12 @@ obj-$(CONFIG_X86_VISWS) += visws.o
15 15
16obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 16obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
17 17
18obj-$(CONFIG_X86_MRST) += mrst.o 18obj-$(CONFIG_X86_INTEL_MID) += mrst.o
19 19
20obj-y += common.o early.o 20obj-y += common.o early.o
21obj-y += amd_bus.o bus_numa.o 21obj-y += bus_numa.o
22 22
23obj-$(CONFIG_AMD_NB) += amd_bus.o
23obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o 24obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o
24 25
25ifeq ($(CONFIG_PCI_DEBUG),y) 26ifeq ($(CONFIG_PCI_DEBUG),y)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index a312e76063a7..ed2835e148b5 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -60,6 +60,16 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
60 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), 60 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
61 }, 61 },
62 }, 62 },
63 /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */
64 {
65 .callback = set_use_crs,
66 .ident = "MSI MS-7253",
67 .matches = {
68 DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
69 DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
70 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
71 },
72 },
63 73
64 /* Now for the blacklist.. */ 74 /* Now for the blacklist.. */
65 75
@@ -282,9 +292,6 @@ static void add_resources(struct pci_root_info *info)
282 int i; 292 int i;
283 struct resource *res, *root, *conflict; 293 struct resource *res, *root, *conflict;
284 294
285 if (!pci_use_crs)
286 return;
287
288 coalesce_windows(info, IORESOURCE_MEM); 295 coalesce_windows(info, IORESOURCE_MEM);
289 coalesce_windows(info, IORESOURCE_IO); 296 coalesce_windows(info, IORESOURCE_IO);
290 297
@@ -336,8 +343,13 @@ get_current_resources(struct acpi_device *device, int busnum,
336 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, 343 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
337 &info); 344 &info);
338 345
339 add_resources(&info); 346 if (pci_use_crs) {
340 return; 347 add_resources(&info);
348
349 return;
350 }
351
352 kfree(info.name);
341 353
342name_alloc_fail: 354name_alloc_fail:
343 kfree(info.res); 355 kfree(info.res);
@@ -404,7 +416,12 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
404 kfree(sd); 416 kfree(sd);
405 } else { 417 } else {
406 get_current_resources(device, busnum, domain, &resources); 418 get_current_resources(device, busnum, domain, &resources);
407 if (list_empty(&resources)) 419
420 /*
421 * _CRS with no apertures is normal, so only fall back to
422 * defaults or native bridge info if we're ignoring _CRS.
423 */
424 if (!pci_use_crs)
408 x86_pci_root_bus_resources(busnum, &resources); 425 x86_pci_root_bus_resources(busnum, &resources);
409 bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, 426 bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd,
410 &resources); 427 &resources);
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 6dd89555fbfa..d0e6e403b4f6 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -164,11 +164,11 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_
164 */ 164 */
165static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev) 165static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev)
166{ 166{
167 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && 167 if ((dev->device & 0xff00) == 0x2400)
168 (dev->device & 0xff00) == 0x2400)
169 dev->transparent = 1; 168 dev->transparent = 1;
170} 169}
171DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge); 170DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
171 PCI_CLASS_BRIDGE_PCI, 8, pci_fixup_transparent_bridge);
172 172
173/* 173/*
174 * Fixup for C1 Halt Disconnect problem on nForce2 systems. 174 * Fixup for C1 Halt Disconnect problem on nForce2 systems.
@@ -322,9 +322,6 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
322 struct pci_bus *bus; 322 struct pci_bus *bus;
323 u16 config; 323 u16 config;
324 324
325 if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
326 return;
327
328 /* Is VGA routed to us? */ 325 /* Is VGA routed to us? */
329 bus = pdev->bus; 326 bus = pdev->bus;
330 while (bus) { 327 while (bus) {
@@ -353,7 +350,8 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
353 dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); 350 dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n");
354 } 351 }
355} 352}
356DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); 353DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
354 PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
357 355
358 356
359static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = { 357static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 91821a1a0c3a..831971e731f7 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -39,6 +39,87 @@
39#include <asm/io_apic.h> 39#include <asm/io_apic.h>
40 40
41 41
42/*
43 * This list of dynamic mappings is for temporarily maintaining
44 * original BIOS BAR addresses for possible reinstatement.
45 */
46struct pcibios_fwaddrmap {
47 struct list_head list;
48 struct pci_dev *dev;
49 resource_size_t fw_addr[DEVICE_COUNT_RESOURCE];
50};
51
52static LIST_HEAD(pcibios_fwaddrmappings);
53static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock);
54
55/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */
56static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev)
57{
58 struct pcibios_fwaddrmap *map;
59
60 WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock));
61
62 list_for_each_entry(map, &pcibios_fwaddrmappings, list)
63 if (map->dev == dev)
64 return map;
65
66 return NULL;
67}
68
69static void
70pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr)
71{
72 unsigned long flags;
73 struct pcibios_fwaddrmap *map;
74
75 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
76 map = pcibios_fwaddrmap_lookup(dev);
77 if (!map) {
78 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
79 map = kzalloc(sizeof(*map), GFP_KERNEL);
80 if (!map)
81 return;
82
83 map->dev = pci_dev_get(dev);
84 map->fw_addr[idx] = fw_addr;
85 INIT_LIST_HEAD(&map->list);
86
87 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
88 list_add_tail(&map->list, &pcibios_fwaddrmappings);
89 } else
90 map->fw_addr[idx] = fw_addr;
91 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
92}
93
94resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
95{
96 unsigned long flags;
97 struct pcibios_fwaddrmap *map;
98 resource_size_t fw_addr = 0;
99
100 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
101 map = pcibios_fwaddrmap_lookup(dev);
102 if (map)
103 fw_addr = map->fw_addr[idx];
104 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
105
106 return fw_addr;
107}
108
109static void pcibios_fw_addr_list_del(void)
110{
111 unsigned long flags;
112 struct pcibios_fwaddrmap *entry, *next;
113
114 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
115 list_for_each_entry_safe(entry, next, &pcibios_fwaddrmappings, list) {
116 list_del(&entry->list);
117 pci_dev_put(entry->dev);
118 kfree(entry);
119 }
120 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
121}
122
42static int 123static int
43skip_isa_ioresource_align(struct pci_dev *dev) { 124skip_isa_ioresource_align(struct pci_dev *dev) {
44 125
@@ -182,7 +263,8 @@ static void __init pcibios_allocate_resources(int pass)
182 idx, r, disabled, pass); 263 idx, r, disabled, pass);
183 if (pci_claim_resource(dev, idx) < 0) { 264 if (pci_claim_resource(dev, idx) < 0) {
184 /* We'll assign a new address later */ 265 /* We'll assign a new address later */
185 dev->fw_addr[idx] = r->start; 266 pcibios_save_fw_addr(dev,
267 idx, r->start);
186 r->end -= r->start; 268 r->end -= r->start;
187 r->start = 0; 269 r->start = 0;
188 } 270 }
@@ -228,6 +310,7 @@ static int __init pcibios_assign_resources(void)
228 } 310 }
229 311
230 pci_assign_unassigned_resources(); 312 pci_assign_unassigned_resources();
313 pcibios_fw_addr_list_del();
231 314
232 return 0; 315 return 0;
233} 316}
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index cb29191cee58..140942f66b31 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -43,6 +43,8 @@
43#define PCI_FIXED_BAR_4_SIZE 0x14 43#define PCI_FIXED_BAR_4_SIZE 0x14
44#define PCI_FIXED_BAR_5_SIZE 0x1c 44#define PCI_FIXED_BAR_5_SIZE 0x1c
45 45
46static int pci_soc_mode = 0;
47
46/** 48/**
47 * fixed_bar_cap - return the offset of the fixed BAR cap if found 49 * fixed_bar_cap - return the offset of the fixed BAR cap if found
48 * @bus: PCI bus 50 * @bus: PCI bus
@@ -148,7 +150,9 @@ static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
148 */ 150 */
149 if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) 151 if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
150 return 0; 152 return 0;
151 if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0))) 153 if (bus == 0 && (devfn == PCI_DEVFN(2, 0)
154 || devfn == PCI_DEVFN(0, 0)
155 || devfn == PCI_DEVFN(3, 0)))
152 return 1; 156 return 1;
153 return 0; /* langwell on others */ 157 return 0; /* langwell on others */
154} 158}
@@ -231,14 +235,43 @@ struct pci_ops pci_mrst_ops = {
231 */ 235 */
232int __init pci_mrst_init(void) 236int __init pci_mrst_init(void)
233{ 237{
234 printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n"); 238 printk(KERN_INFO "Intel MID platform detected, using MID PCI ops\n");
235 pci_mmcfg_late_init(); 239 pci_mmcfg_late_init();
236 pcibios_enable_irq = mrst_pci_irq_enable; 240 pcibios_enable_irq = mrst_pci_irq_enable;
237 pci_root_ops = pci_mrst_ops; 241 pci_root_ops = pci_mrst_ops;
242 pci_soc_mode = 1;
238 /* Continue with standard init */ 243 /* Continue with standard init */
239 return 1; 244 return 1;
240} 245}
241 246
247/* Langwell devices are not true pci devices, they are not subject to 10 ms
248 * d3 to d0 delay required by pci spec.
249 */
250static void __devinit pci_d3delay_fixup(struct pci_dev *dev)
251{
252 /* PCI fixups are effectively decided compile time. If we have a dual
253 SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */
254 if (!pci_soc_mode)
255 return;
256 /* true pci devices in lincroft should allow type 1 access, the rest
257 * are langwell fake pci devices.
258 */
259 if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID))
260 return;
261 dev->d3_delay = 0;
262}
263DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
264
265static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev)
266{
267 pci_set_power_state(dev, PCI_D3cold);
268}
269DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev);
270DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev);
271DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev);
272DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev);
273DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev);
274
242/* 275/*
243 * Langwell devices reside at fixed offsets, don't try to move them. 276 * Langwell devices reside at fixed offsets, don't try to move them.
244 */ 277 */
@@ -248,6 +281,9 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
248 u32 size; 281 u32 size;
249 int i; 282 int i;
250 283
284 if (!pci_soc_mode)
285 return;
286
251 /* Must have extended configuration space */ 287 /* Must have extended configuration space */
252 if (dev->cfg_size < PCIE_CAP_OFFSET + 4) 288 if (dev->cfg_size < PCIE_CAP_OFFSET + 4)
253 return; 289 return;
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 249a5ae17d02..7415aa927913 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -400,7 +400,7 @@ int __init pci_xen_init(void)
400 400
401int __init pci_xen_hvm_init(void) 401int __init pci_xen_hvm_init(void)
402{ 402{
403 if (!xen_feature(XENFEAT_hvm_pirqs)) 403 if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
404 return 0; 404 return 0;
405 405
406#ifdef CONFIG_ACPI 406#ifdef CONFIG_ACPI
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index e70be38ce039..ce874f872cc6 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -208,16 +208,19 @@
208 interrupts = <14 1>; 208 interrupts = <14 1>;
209 }; 209 };
210 210
211 gpio@b,1 { 211 pcigpio: gpio@b,1 {
212 #gpio-cells = <2>;
213 #interrupt-cells = <2>;
212 compatible = "pci8086,2e67.2", 214 compatible = "pci8086,2e67.2",
213 "pci8086,2e67", 215 "pci8086,2e67",
214 "pciclassff0000", 216 "pciclassff0000",
215 "pciclassff00"; 217 "pciclassff00";
216 218
217 #gpio-cells = <2>;
218 reg = <0x15900 0x0 0x0 0x0 0x0>; 219 reg = <0x15900 0x0 0x0 0x0 0x0>;
219 interrupts = <15 1>; 220 interrupts = <15 1>;
221 interrupt-controller;
220 gpio-controller; 222 gpio-controller;
223 intel,muxctl = <0>;
221 }; 224 };
222 225
223 i2c-controller@b,2 { 226 i2c-controller@b,2 {
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 4cf9bd0a1653..92660edaa1e7 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -26,6 +26,8 @@
26 * Skip non-WB memory and ignore empty memory ranges. 26 * Skip non-WB memory and ignore empty memory ranges.
27 */ 27 */
28 28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
29#include <linux/kernel.h> 31#include <linux/kernel.h>
30#include <linux/init.h> 32#include <linux/init.h>
31#include <linux/efi.h> 33#include <linux/efi.h>
@@ -47,7 +49,6 @@
47#include <asm/x86_init.h> 49#include <asm/x86_init.h>
48 50
49#define EFI_DEBUG 1 51#define EFI_DEBUG 1
50#define PFX "EFI: "
51 52
52int efi_enabled; 53int efi_enabled;
53EXPORT_SYMBOL(efi_enabled); 54EXPORT_SYMBOL(efi_enabled);
@@ -67,6 +68,9 @@ EXPORT_SYMBOL(efi);
67 68
68struct efi_memory_map memmap; 69struct efi_memory_map memmap;
69 70
71bool efi_64bit;
72static bool efi_native;
73
70static struct efi efi_phys __initdata; 74static struct efi efi_phys __initdata;
71static efi_system_table_t efi_systab __initdata; 75static efi_system_table_t efi_systab __initdata;
72 76
@@ -254,7 +258,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
254 258
255 status = efi.get_time(&eft, &cap); 259 status = efi.get_time(&eft, &cap);
256 if (status != EFI_SUCCESS) { 260 if (status != EFI_SUCCESS) {
257 printk(KERN_ERR "Oops: efitime: can't read time!\n"); 261 pr_err("Oops: efitime: can't read time!\n");
258 return -1; 262 return -1;
259 } 263 }
260 264
@@ -268,7 +272,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
268 272
269 status = efi.set_time(&eft); 273 status = efi.set_time(&eft);
270 if (status != EFI_SUCCESS) { 274 if (status != EFI_SUCCESS) {
271 printk(KERN_ERR "Oops: efitime: can't write time!\n"); 275 pr_err("Oops: efitime: can't write time!\n");
272 return -1; 276 return -1;
273 } 277 }
274 return 0; 278 return 0;
@@ -282,7 +286,7 @@ unsigned long efi_get_time(void)
282 286
283 status = efi.get_time(&eft, &cap); 287 status = efi.get_time(&eft, &cap);
284 if (status != EFI_SUCCESS) 288 if (status != EFI_SUCCESS)
285 printk(KERN_ERR "Oops: efitime: can't read time!\n"); 289 pr_err("Oops: efitime: can't read time!\n");
286 290
287 return mktime(eft.year, eft.month, eft.day, eft.hour, 291 return mktime(eft.year, eft.month, eft.day, eft.hour,
288 eft.minute, eft.second); 292 eft.minute, eft.second);
@@ -338,11 +342,16 @@ static void __init do_add_efi_memmap(void)
338 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 342 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
339} 343}
340 344
341void __init efi_memblock_x86_reserve_range(void) 345int __init efi_memblock_x86_reserve_range(void)
342{ 346{
343 unsigned long pmap; 347 unsigned long pmap;
344 348
345#ifdef CONFIG_X86_32 349#ifdef CONFIG_X86_32
350 /* Can't handle data above 4GB at this time */
351 if (boot_params.efi_info.efi_memmap_hi) {
352 pr_err("Memory map is above 4GB, disabling EFI.\n");
353 return -EINVAL;
354 }
346 pmap = boot_params.efi_info.efi_memmap; 355 pmap = boot_params.efi_info.efi_memmap;
347#else 356#else
348 pmap = (boot_params.efi_info.efi_memmap | 357 pmap = (boot_params.efi_info.efi_memmap |
@@ -354,6 +363,8 @@ void __init efi_memblock_x86_reserve_range(void)
354 memmap.desc_version = boot_params.efi_info.efi_memdesc_version; 363 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
355 memmap.desc_size = boot_params.efi_info.efi_memdesc_size; 364 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
356 memblock_reserve(pmap, memmap.nr_map * memmap.desc_size); 365 memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
366
367 return 0;
357} 368}
358 369
359#if EFI_DEBUG 370#if EFI_DEBUG
@@ -367,7 +378,7 @@ static void __init print_efi_memmap(void)
367 p < memmap.map_end; 378 p < memmap.map_end;
368 p += memmap.desc_size, i++) { 379 p += memmap.desc_size, i++) {
369 md = p; 380 md = p;
370 printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, " 381 pr_info("mem%02u: type=%u, attr=0x%llx, "
371 "range=[0x%016llx-0x%016llx) (%lluMB)\n", 382 "range=[0x%016llx-0x%016llx) (%lluMB)\n",
372 i, md->type, md->attribute, md->phys_addr, 383 i, md->type, md->attribute, md->phys_addr,
373 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), 384 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
@@ -400,7 +411,7 @@ void __init efi_reserve_boot_services(void)
400 memblock_is_region_reserved(start, size)) { 411 memblock_is_region_reserved(start, size)) {
401 /* Could not reserve, skip it */ 412 /* Could not reserve, skip it */
402 md->num_pages = 0; 413 md->num_pages = 0;
403 memblock_dbg(PFX "Could not reserve boot range " 414 memblock_dbg("Could not reserve boot range "
404 "[0x%010llx-0x%010llx]\n", 415 "[0x%010llx-0x%010llx]\n",
405 start, start+size-1); 416 start, start+size-1);
406 } else 417 } else
@@ -429,103 +440,172 @@ static void __init efi_free_boot_services(void)
429 } 440 }
430} 441}
431 442
432void __init efi_init(void) 443static int __init efi_systab_init(void *phys)
433{ 444{
434 efi_config_table_t *config_tables; 445 if (efi_64bit) {
435 efi_runtime_services_t *runtime; 446 efi_system_table_64_t *systab64;
436 efi_char16_t *c16; 447 u64 tmp = 0;
437 char vendor[100] = "unknown"; 448
438 int i = 0; 449 systab64 = early_ioremap((unsigned long)phys,
439 void *tmp; 450 sizeof(*systab64));
451 if (systab64 == NULL) {
452 pr_err("Couldn't map the system table!\n");
453 return -ENOMEM;
454 }
440 455
456 efi_systab.hdr = systab64->hdr;
457 efi_systab.fw_vendor = systab64->fw_vendor;
458 tmp |= systab64->fw_vendor;
459 efi_systab.fw_revision = systab64->fw_revision;
460 efi_systab.con_in_handle = systab64->con_in_handle;
461 tmp |= systab64->con_in_handle;
462 efi_systab.con_in = systab64->con_in;
463 tmp |= systab64->con_in;
464 efi_systab.con_out_handle = systab64->con_out_handle;
465 tmp |= systab64->con_out_handle;
466 efi_systab.con_out = systab64->con_out;
467 tmp |= systab64->con_out;
468 efi_systab.stderr_handle = systab64->stderr_handle;
469 tmp |= systab64->stderr_handle;
470 efi_systab.stderr = systab64->stderr;
471 tmp |= systab64->stderr;
472 efi_systab.runtime = (void *)(unsigned long)systab64->runtime;
473 tmp |= systab64->runtime;
474 efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
475 tmp |= systab64->boottime;
476 efi_systab.nr_tables = systab64->nr_tables;
477 efi_systab.tables = systab64->tables;
478 tmp |= systab64->tables;
479
480 early_iounmap(systab64, sizeof(*systab64));
441#ifdef CONFIG_X86_32 481#ifdef CONFIG_X86_32
442 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; 482 if (tmp >> 32) {
443#else 483 pr_err("EFI data located above 4GB, disabling EFI.\n");
444 efi_phys.systab = (efi_system_table_t *) 484 return -EINVAL;
445 (boot_params.efi_info.efi_systab | 485 }
446 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
447#endif 486#endif
487 } else {
488 efi_system_table_32_t *systab32;
489
490 systab32 = early_ioremap((unsigned long)phys,
491 sizeof(*systab32));
492 if (systab32 == NULL) {
493 pr_err("Couldn't map the system table!\n");
494 return -ENOMEM;
495 }
496
497 efi_systab.hdr = systab32->hdr;
498 efi_systab.fw_vendor = systab32->fw_vendor;
499 efi_systab.fw_revision = systab32->fw_revision;
500 efi_systab.con_in_handle = systab32->con_in_handle;
501 efi_systab.con_in = systab32->con_in;
502 efi_systab.con_out_handle = systab32->con_out_handle;
503 efi_systab.con_out = systab32->con_out;
504 efi_systab.stderr_handle = systab32->stderr_handle;
505 efi_systab.stderr = systab32->stderr;
506 efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
507 efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
508 efi_systab.nr_tables = systab32->nr_tables;
509 efi_systab.tables = systab32->tables;
510
511 early_iounmap(systab32, sizeof(*systab32));
512 }
448 513
449 efi.systab = early_ioremap((unsigned long)efi_phys.systab,
450 sizeof(efi_system_table_t));
451 if (efi.systab == NULL)
452 printk(KERN_ERR "Couldn't map the EFI system table!\n");
453 memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
454 early_iounmap(efi.systab, sizeof(efi_system_table_t));
455 efi.systab = &efi_systab; 514 efi.systab = &efi_systab;
456 515
457 /* 516 /*
458 * Verify the EFI Table 517 * Verify the EFI Table
459 */ 518 */
460 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) 519 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
461 printk(KERN_ERR "EFI system table signature incorrect!\n"); 520 pr_err("System table signature incorrect!\n");
521 return -EINVAL;
522 }
462 if ((efi.systab->hdr.revision >> 16) == 0) 523 if ((efi.systab->hdr.revision >> 16) == 0)
463 printk(KERN_ERR "Warning: EFI system table version " 524 pr_err("Warning: System table version "
464 "%d.%02d, expected 1.00 or greater!\n", 525 "%d.%02d, expected 1.00 or greater!\n",
465 efi.systab->hdr.revision >> 16, 526 efi.systab->hdr.revision >> 16,
466 efi.systab->hdr.revision & 0xffff); 527 efi.systab->hdr.revision & 0xffff);
467 528
468 /* 529 return 0;
469 * Show what we know for posterity 530}
470 */
471 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
472 if (c16) {
473 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
474 vendor[i] = *c16++;
475 vendor[i] = '\0';
476 } else
477 printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
478 early_iounmap(tmp, 2);
479 531
480 printk(KERN_INFO "EFI v%u.%.02u by %s\n", 532static int __init efi_config_init(u64 tables, int nr_tables)
481 efi.systab->hdr.revision >> 16, 533{
482 efi.systab->hdr.revision & 0xffff, vendor); 534 void *config_tables, *tablep;
535 int i, sz;
536
537 if (efi_64bit)
538 sz = sizeof(efi_config_table_64_t);
539 else
540 sz = sizeof(efi_config_table_32_t);
483 541
484 /* 542 /*
485 * Let's see what config tables the firmware passed to us. 543 * Let's see what config tables the firmware passed to us.
486 */ 544 */
487 config_tables = early_ioremap( 545 config_tables = early_ioremap(tables, nr_tables * sz);
488 efi.systab->tables, 546 if (config_tables == NULL) {
489 efi.systab->nr_tables * sizeof(efi_config_table_t)); 547 pr_err("Could not map Configuration table!\n");
490 if (config_tables == NULL) 548 return -ENOMEM;
491 printk(KERN_ERR "Could not map EFI Configuration Table!\n"); 549 }
492 550
493 printk(KERN_INFO); 551 tablep = config_tables;
552 pr_info("");
494 for (i = 0; i < efi.systab->nr_tables; i++) { 553 for (i = 0; i < efi.systab->nr_tables; i++) {
495 if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) { 554 efi_guid_t guid;
496 efi.mps = config_tables[i].table; 555 unsigned long table;
497 printk(" MPS=0x%lx ", config_tables[i].table); 556
498 } else if (!efi_guidcmp(config_tables[i].guid, 557 if (efi_64bit) {
499 ACPI_20_TABLE_GUID)) { 558 u64 table64;
500 efi.acpi20 = config_tables[i].table; 559 guid = ((efi_config_table_64_t *)tablep)->guid;
501 printk(" ACPI 2.0=0x%lx ", config_tables[i].table); 560 table64 = ((efi_config_table_64_t *)tablep)->table;
502 } else if (!efi_guidcmp(config_tables[i].guid, 561 table = table64;
503 ACPI_TABLE_GUID)) { 562#ifdef CONFIG_X86_32
504 efi.acpi = config_tables[i].table; 563 if (table64 >> 32) {
505 printk(" ACPI=0x%lx ", config_tables[i].table); 564 pr_cont("\n");
506 } else if (!efi_guidcmp(config_tables[i].guid, 565 pr_err("Table located above 4GB, disabling EFI.\n");
507 SMBIOS_TABLE_GUID)) { 566 early_iounmap(config_tables,
508 efi.smbios = config_tables[i].table; 567 efi.systab->nr_tables * sz);
509 printk(" SMBIOS=0x%lx ", config_tables[i].table); 568 return -EINVAL;
569 }
570#endif
571 } else {
572 guid = ((efi_config_table_32_t *)tablep)->guid;
573 table = ((efi_config_table_32_t *)tablep)->table;
574 }
575 if (!efi_guidcmp(guid, MPS_TABLE_GUID)) {
576 efi.mps = table;
577 pr_cont(" MPS=0x%lx ", table);
578 } else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) {
579 efi.acpi20 = table;
580 pr_cont(" ACPI 2.0=0x%lx ", table);
581 } else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) {
582 efi.acpi = table;
583 pr_cont(" ACPI=0x%lx ", table);
584 } else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) {
585 efi.smbios = table;
586 pr_cont(" SMBIOS=0x%lx ", table);
510#ifdef CONFIG_X86_UV 587#ifdef CONFIG_X86_UV
511 } else if (!efi_guidcmp(config_tables[i].guid, 588 } else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) {
512 UV_SYSTEM_TABLE_GUID)) { 589 efi.uv_systab = table;
513 efi.uv_systab = config_tables[i].table; 590 pr_cont(" UVsystab=0x%lx ", table);
514 printk(" UVsystab=0x%lx ", config_tables[i].table);
515#endif 591#endif
516 } else if (!efi_guidcmp(config_tables[i].guid, 592 } else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) {
517 HCDP_TABLE_GUID)) { 593 efi.hcdp = table;
518 efi.hcdp = config_tables[i].table; 594 pr_cont(" HCDP=0x%lx ", table);
519 printk(" HCDP=0x%lx ", config_tables[i].table); 595 } else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) {
520 } else if (!efi_guidcmp(config_tables[i].guid, 596 efi.uga = table;
521 UGA_IO_PROTOCOL_GUID)) { 597 pr_cont(" UGA=0x%lx ", table);
522 efi.uga = config_tables[i].table;
523 printk(" UGA=0x%lx ", config_tables[i].table);
524 } 598 }
599 tablep += sz;
525 } 600 }
526 printk("\n"); 601 pr_cont("\n");
527 early_iounmap(config_tables, 602 early_iounmap(config_tables, efi.systab->nr_tables * sz);
528 efi.systab->nr_tables * sizeof(efi_config_table_t)); 603 return 0;
604}
605
606static int __init efi_runtime_init(void)
607{
608 efi_runtime_services_t *runtime;
529 609
530 /* 610 /*
531 * Check out the runtime services table. We need to map 611 * Check out the runtime services table. We need to map
@@ -535,43 +615,116 @@ void __init efi_init(void)
535 */ 615 */
536 runtime = early_ioremap((unsigned long)efi.systab->runtime, 616 runtime = early_ioremap((unsigned long)efi.systab->runtime,
537 sizeof(efi_runtime_services_t)); 617 sizeof(efi_runtime_services_t));
538 if (runtime != NULL) { 618 if (!runtime) {
539 /* 619 pr_err("Could not map the runtime service table!\n");
540 * We will only need *early* access to the following 620 return -ENOMEM;
541 * two EFI runtime services before set_virtual_address_map 621 }
542 * is invoked. 622 /*
543 */ 623 * We will only need *early* access to the following
544 efi_phys.get_time = (efi_get_time_t *)runtime->get_time; 624 * two EFI runtime services before set_virtual_address_map
545 efi_phys.set_virtual_address_map = 625 * is invoked.
546 (efi_set_virtual_address_map_t *) 626 */
547 runtime->set_virtual_address_map; 627 efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
548 /* 628 efi_phys.set_virtual_address_map =
549 * Make efi_get_time can be called before entering 629 (efi_set_virtual_address_map_t *)
550 * virtual mode. 630 runtime->set_virtual_address_map;
551 */ 631 /*
552 efi.get_time = phys_efi_get_time; 632 * Make efi_get_time can be called before entering
553 } else 633 * virtual mode.
554 printk(KERN_ERR "Could not map the EFI runtime service " 634 */
555 "table!\n"); 635 efi.get_time = phys_efi_get_time;
556 early_iounmap(runtime, sizeof(efi_runtime_services_t)); 636 early_iounmap(runtime, sizeof(efi_runtime_services_t));
557 637
638 return 0;
639}
640
641static int __init efi_memmap_init(void)
642{
558 /* Map the EFI memory map */ 643 /* Map the EFI memory map */
559 memmap.map = early_ioremap((unsigned long)memmap.phys_map, 644 memmap.map = early_ioremap((unsigned long)memmap.phys_map,
560 memmap.nr_map * memmap.desc_size); 645 memmap.nr_map * memmap.desc_size);
561 if (memmap.map == NULL) 646 if (memmap.map == NULL) {
562 printk(KERN_ERR "Could not map the EFI memory map!\n"); 647 pr_err("Could not map the memory map!\n");
648 return -ENOMEM;
649 }
563 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); 650 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
564 651
565 if (memmap.desc_size != sizeof(efi_memory_desc_t))
566 printk(KERN_WARNING
567 "Kernel-defined memdesc doesn't match the one from EFI!\n");
568
569 if (add_efi_memmap) 652 if (add_efi_memmap)
570 do_add_efi_memmap(); 653 do_add_efi_memmap();
571 654
655 return 0;
656}
657
658void __init efi_init(void)
659{
660 efi_char16_t *c16;
661 char vendor[100] = "unknown";
662 int i = 0;
663 void *tmp;
664
665#ifdef CONFIG_X86_32
666 if (boot_params.efi_info.efi_systab_hi ||
667 boot_params.efi_info.efi_memmap_hi) {
668 pr_info("Table located above 4GB, disabling EFI.\n");
669 efi_enabled = 0;
670 return;
671 }
672 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
673 efi_native = !efi_64bit;
674#else
675 efi_phys.systab = (efi_system_table_t *)
676 (boot_params.efi_info.efi_systab |
677 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
678 efi_native = efi_64bit;
679#endif
680
681 if (efi_systab_init(efi_phys.systab)) {
682 efi_enabled = 0;
683 return;
684 }
685
686 /*
687 * Show what we know for posterity
688 */
689 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
690 if (c16) {
691 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
692 vendor[i] = *c16++;
693 vendor[i] = '\0';
694 } else
695 pr_err("Could not map the firmware vendor!\n");
696 early_iounmap(tmp, 2);
697
698 pr_info("EFI v%u.%.02u by %s\n",
699 efi.systab->hdr.revision >> 16,
700 efi.systab->hdr.revision & 0xffff, vendor);
701
702 if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) {
703 efi_enabled = 0;
704 return;
705 }
706
707 /*
708 * Note: We currently don't support runtime services on an EFI
709 * that doesn't match the kernel 32/64-bit mode.
710 */
711
712 if (!efi_native)
713 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
714 else if (efi_runtime_init()) {
715 efi_enabled = 0;
716 return;
717 }
718
719 if (efi_memmap_init()) {
720 efi_enabled = 0;
721 return;
722 }
572#ifdef CONFIG_X86_32 723#ifdef CONFIG_X86_32
573 x86_platform.get_wallclock = efi_get_time; 724 if (efi_native) {
574 x86_platform.set_wallclock = efi_set_rtc_mmss; 725 x86_platform.get_wallclock = efi_get_time;
726 x86_platform.set_wallclock = efi_set_rtc_mmss;
727 }
575#endif 728#endif
576 729
577#if EFI_DEBUG 730#if EFI_DEBUG
@@ -629,6 +782,14 @@ void __init efi_enter_virtual_mode(void)
629 782
630 efi.systab = NULL; 783 efi.systab = NULL;
631 784
785 /*
786 * We don't do virtual mode, since we don't do runtime services, on
787 * non-native EFI
788 */
789
790 if (!efi_native)
791 goto out;
792
632 /* Merge contiguous regions of the same type and attribute */ 793 /* Merge contiguous regions of the same type and attribute */
633 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 794 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
634 u64 prev_size; 795 u64 prev_size;
@@ -677,7 +838,7 @@ void __init efi_enter_virtual_mode(void)
677 md->virt_addr = (u64) (unsigned long) va; 838 md->virt_addr = (u64) (unsigned long) va;
678 839
679 if (!va) { 840 if (!va) {
680 printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n", 841 pr_err("ioremap of 0x%llX failed!\n",
681 (unsigned long long)md->phys_addr); 842 (unsigned long long)md->phys_addr);
682 continue; 843 continue;
683 } 844 }
@@ -711,8 +872,8 @@ void __init efi_enter_virtual_mode(void)
711 (efi_memory_desc_t *)__pa(new_memmap)); 872 (efi_memory_desc_t *)__pa(new_memmap));
712 873
713 if (status != EFI_SUCCESS) { 874 if (status != EFI_SUCCESS) {
714 printk(KERN_ALERT "Unable to switch EFI into virtual mode " 875 pr_alert("Unable to switch EFI into virtual mode "
715 "(status=%lx)!\n", status); 876 "(status=%lx)!\n", status);
716 panic("EFI call to SetVirtualAddressMap() failed!"); 877 panic("EFI call to SetVirtualAddressMap() failed!");
717 } 878 }
718 879
@@ -744,6 +905,8 @@ void __init efi_enter_virtual_mode(void)
744 efi.query_capsule_caps = virt_efi_query_capsule_caps; 905 efi.query_capsule_caps = virt_efi_query_capsule_caps;
745 if (__supported_pte_mask & _PAGE_NX) 906 if (__supported_pte_mask & _PAGE_NX)
746 runtime_code_page_mkexec(); 907 runtime_code_page_mkexec();
908
909out:
747 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); 910 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
748 memmap.map = NULL; 911 memmap.map = NULL;
749 kfree(new_memmap); 912 kfree(new_memmap);
diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile
index 07c9cd05021a..5b51194f4c8d 100644
--- a/arch/x86/platform/geode/Makefile
+++ b/arch/x86/platform/geode/Makefile
@@ -1 +1,3 @@
1obj-$(CONFIG_ALIX) += alix.o 1obj-$(CONFIG_ALIX) += alix.o
2obj-$(CONFIG_NET5501) += net5501.o
3obj-$(CONFIG_GEOS) += geos.o
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index ca1973699d3d..90e23e7679a5 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -6,6 +6,7 @@
6 * 6 *
7 * Copyright (C) 2008 Constantin Baranov <const@mimas.ru> 7 * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
8 * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com> 8 * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
9 * and Philip Prindeville <philipp@redfish-solutions.com>
9 * 10 *
10 * TODO: There are large similarities with leds-net5501.c 11 * TODO: There are large similarities with leds-net5501.c
11 * by Alessandro Zummo <a.zummo@towertech.it> 12 * by Alessandro Zummo <a.zummo@towertech.it>
@@ -24,14 +25,47 @@
24#include <linux/leds.h> 25#include <linux/leds.h>
25#include <linux/platform_device.h> 26#include <linux/platform_device.h>
26#include <linux/gpio.h> 27#include <linux/gpio.h>
28#include <linux/input.h>
29#include <linux/gpio_keys.h>
30#include <linux/dmi.h>
27 31
28#include <asm/geode.h> 32#include <asm/geode.h>
29 33
30static int force = 0; 34#define BIOS_SIGNATURE_TINYBIOS 0xf0000
35#define BIOS_SIGNATURE_COREBOOT 0x500
36#define BIOS_REGION_SIZE 0x10000
37
38static bool force = 0;
31module_param(force, bool, 0444); 39module_param(force, bool, 0444);
32/* FIXME: Award bios is not automatically detected as Alix platform */ 40/* FIXME: Award bios is not automatically detected as Alix platform */
33MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform"); 41MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform");
34 42
43static struct gpio_keys_button alix_gpio_buttons[] = {
44 {
45 .code = KEY_RESTART,
46 .gpio = 24,
47 .active_low = 1,
48 .desc = "Reset button",
49 .type = EV_KEY,
50 .wakeup = 0,
51 .debounce_interval = 100,
52 .can_disable = 0,
53 }
54};
55static struct gpio_keys_platform_data alix_buttons_data = {
56 .buttons = alix_gpio_buttons,
57 .nbuttons = ARRAY_SIZE(alix_gpio_buttons),
58 .poll_interval = 20,
59};
60
61static struct platform_device alix_buttons_dev = {
62 .name = "gpio-keys-polled",
63 .id = 1,
64 .dev = {
65 .platform_data = &alix_buttons_data,
66 }
67};
68
35static struct gpio_led alix_leds[] = { 69static struct gpio_led alix_leds[] = {
36 { 70 {
37 .name = "alix:1", 71 .name = "alix:1",
@@ -64,17 +98,22 @@ static struct platform_device alix_leds_dev = {
64 .dev.platform_data = &alix_leds_data, 98 .dev.platform_data = &alix_leds_data,
65}; 99};
66 100
101static struct __initdata platform_device *alix_devs[] = {
102 &alix_buttons_dev,
103 &alix_leds_dev,
104};
105
67static void __init register_alix(void) 106static void __init register_alix(void)
68{ 107{
69 /* Setup LED control through leds-gpio driver */ 108 /* Setup LED control through leds-gpio driver */
70 platform_device_register(&alix_leds_dev); 109 platform_add_devices(alix_devs, ARRAY_SIZE(alix_devs));
71} 110}
72 111
73static int __init alix_present(unsigned long bios_phys, 112static bool __init alix_present(unsigned long bios_phys,
74 const char *alix_sig, 113 const char *alix_sig,
75 size_t alix_sig_len) 114 size_t alix_sig_len)
76{ 115{
77 const size_t bios_len = 0x00010000; 116 const size_t bios_len = BIOS_REGION_SIZE;
78 const char *bios_virt; 117 const char *bios_virt;
79 const char *scan_end; 118 const char *scan_end;
80 const char *p; 119 const char *p;
@@ -84,7 +123,7 @@ static int __init alix_present(unsigned long bios_phys,
84 printk(KERN_NOTICE "%s: forced to skip BIOS test, " 123 printk(KERN_NOTICE "%s: forced to skip BIOS test, "
85 "assume system is ALIX.2/ALIX.3\n", 124 "assume system is ALIX.2/ALIX.3\n",
86 KBUILD_MODNAME); 125 KBUILD_MODNAME);
87 return 1; 126 return true;
88 } 127 }
89 128
90 bios_virt = phys_to_virt(bios_phys); 129 bios_virt = phys_to_virt(bios_phys);
@@ -109,15 +148,33 @@ static int __init alix_present(unsigned long bios_phys,
109 *a = '\0'; 148 *a = '\0';
110 149
111 tail = p + alix_sig_len; 150 tail = p + alix_sig_len;
112 if ((tail[0] == '2' || tail[0] == '3')) { 151 if ((tail[0] == '2' || tail[0] == '3' || tail[0] == '6')) {
113 printk(KERN_INFO 152 printk(KERN_INFO
114 "%s: system is recognized as \"%s\"\n", 153 "%s: system is recognized as \"%s\"\n",
115 KBUILD_MODNAME, name); 154 KBUILD_MODNAME, name);
116 return 1; 155 return true;
117 } 156 }
118 } 157 }
119 158
120 return 0; 159 return false;
160}
161
162static bool __init alix_present_dmi(void)
163{
164 const char *vendor, *product;
165
166 vendor = dmi_get_system_info(DMI_SYS_VENDOR);
167 if (!vendor || strcmp(vendor, "PC Engines"))
168 return false;
169
170 product = dmi_get_system_info(DMI_PRODUCT_NAME);
171 if (!product || (strcmp(product, "ALIX.2D") && strcmp(product, "ALIX.6")))
172 return false;
173
174 printk(KERN_INFO "%s: system is recognized as \"%s %s\"\n",
175 KBUILD_MODNAME, vendor, product);
176
177 return true;
121} 178}
122 179
123static int __init alix_init(void) 180static int __init alix_init(void)
@@ -128,8 +185,9 @@ static int __init alix_init(void)
128 if (!is_geode()) 185 if (!is_geode())
129 return 0; 186 return 0;
130 187
131 if (alix_present(0xf0000, tinybios_sig, sizeof(tinybios_sig) - 1) || 188 if (alix_present(BIOS_SIGNATURE_TINYBIOS, tinybios_sig, sizeof(tinybios_sig) - 1) ||
132 alix_present(0x500, coreboot_sig, sizeof(coreboot_sig) - 1)) 189 alix_present(BIOS_SIGNATURE_COREBOOT, coreboot_sig, sizeof(coreboot_sig) - 1) ||
190 alix_present_dmi())
133 register_alix(); 191 register_alix();
134 192
135 return 0; 193 return 0;
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
new file mode 100644
index 000000000000..c2e6d53558be
--- /dev/null
+++ b/arch/x86/platform/geode/geos.c
@@ -0,0 +1,128 @@
1/*
2 * System Specific setup for Traverse Technologies GEOS.
3 * At the moment this means setup of GPIO control of LEDs.
4 *
5 * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
6 * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
7 * and Philip Prindeville <philipp@redfish-solutions.com>
8 *
9 * TODO: There are large similarities with leds-net5501.c
10 * by Alessandro Zummo <a.zummo@towertech.it>
11 * In the future leds-net5501.c should be migrated over to platform
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation.
16 */
17
18#include <linux/kernel.h>
19#include <linux/init.h>
20#include <linux/io.h>
21#include <linux/string.h>
22#include <linux/module.h>
23#include <linux/leds.h>
24#include <linux/platform_device.h>
25#include <linux/gpio.h>
26#include <linux/input.h>
27#include <linux/gpio_keys.h>
28#include <linux/dmi.h>
29
30#include <asm/geode.h>
31
32static struct gpio_keys_button geos_gpio_buttons[] = {
33 {
34 .code = KEY_RESTART,
35 .gpio = 3,
36 .active_low = 1,
37 .desc = "Reset button",
38 .type = EV_KEY,
39 .wakeup = 0,
40 .debounce_interval = 100,
41 .can_disable = 0,
42 }
43};
44static struct gpio_keys_platform_data geos_buttons_data = {
45 .buttons = geos_gpio_buttons,
46 .nbuttons = ARRAY_SIZE(geos_gpio_buttons),
47 .poll_interval = 20,
48};
49
50static struct platform_device geos_buttons_dev = {
51 .name = "gpio-keys-polled",
52 .id = 1,
53 .dev = {
54 .platform_data = &geos_buttons_data,
55 }
56};
57
58static struct gpio_led geos_leds[] = {
59 {
60 .name = "geos:1",
61 .gpio = 6,
62 .default_trigger = "default-on",
63 .active_low = 1,
64 },
65 {
66 .name = "geos:2",
67 .gpio = 25,
68 .default_trigger = "default-off",
69 .active_low = 1,
70 },
71 {
72 .name = "geos:3",
73 .gpio = 27,
74 .default_trigger = "default-off",
75 .active_low = 1,
76 },
77};
78
79static struct gpio_led_platform_data geos_leds_data = {
80 .num_leds = ARRAY_SIZE(geos_leds),
81 .leds = geos_leds,
82};
83
84static struct platform_device geos_leds_dev = {
85 .name = "leds-gpio",
86 .id = -1,
87 .dev.platform_data = &geos_leds_data,
88};
89
90static struct __initdata platform_device *geos_devs[] = {
91 &geos_buttons_dev,
92 &geos_leds_dev,
93};
94
95static void __init register_geos(void)
96{
97 /* Setup LED control through leds-gpio driver */
98 platform_add_devices(geos_devs, ARRAY_SIZE(geos_devs));
99}
100
101static int __init geos_init(void)
102{
103 const char *vendor, *product;
104
105 if (!is_geode())
106 return 0;
107
108 vendor = dmi_get_system_info(DMI_SYS_VENDOR);
109 if (!vendor || strcmp(vendor, "Traverse Technologies"))
110 return 0;
111
112 product = dmi_get_system_info(DMI_PRODUCT_NAME);
113 if (!product || strcmp(product, "Geos"))
114 return 0;
115
116 printk(KERN_INFO "%s: system is recognized as \"%s %s\"\n",
117 KBUILD_MODNAME, vendor, product);
118
119 register_geos();
120
121 return 0;
122}
123
124module_init(geos_init);
125
126MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
127MODULE_DESCRIPTION("Traverse Technologies Geos System Setup");
128MODULE_LICENSE("GPL");
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
new file mode 100644
index 000000000000..66d377e334f7
--- /dev/null
+++ b/arch/x86/platform/geode/net5501.c
@@ -0,0 +1,154 @@
1/*
2 * System Specific setup for Soekris net5501
3 * At the moment this means setup of GPIO control of LEDs and buttons
4 * on net5501 boards.
5 *
6 *
7 * Copyright (C) 2008-2009 Tower Technologies
8 * Written by Alessandro Zummo <a.zummo@towertech.it>
9 *
10 * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
11 * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
12 * and Philip Prindeville <philipp@redfish-solutions.com>
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License version 2
16 * as published by the Free Software Foundation.
17 */
18
19#include <linux/kernel.h>
20#include <linux/init.h>
21#include <linux/io.h>
22#include <linux/string.h>
23#include <linux/module.h>
24#include <linux/leds.h>
25#include <linux/platform_device.h>
26#include <linux/gpio.h>
27#include <linux/input.h>
28#include <linux/gpio_keys.h>
29
30#include <asm/geode.h>
31
32#define BIOS_REGION_BASE 0xffff0000
33#define BIOS_REGION_SIZE 0x00010000
34
35static struct gpio_keys_button net5501_gpio_buttons[] = {
36 {
37 .code = KEY_RESTART,
38 .gpio = 24,
39 .active_low = 1,
40 .desc = "Reset button",
41 .type = EV_KEY,
42 .wakeup = 0,
43 .debounce_interval = 100,
44 .can_disable = 0,
45 }
46};
47static struct gpio_keys_platform_data net5501_buttons_data = {
48 .buttons = net5501_gpio_buttons,
49 .nbuttons = ARRAY_SIZE(net5501_gpio_buttons),
50 .poll_interval = 20,
51};
52
53static struct platform_device net5501_buttons_dev = {
54 .name = "gpio-keys-polled",
55 .id = 1,
56 .dev = {
57 .platform_data = &net5501_buttons_data,
58 }
59};
60
61static struct gpio_led net5501_leds[] = {
62 {
63 .name = "net5501:1",
64 .gpio = 6,
65 .default_trigger = "default-on",
66 .active_low = 1,
67 },
68};
69
70static struct gpio_led_platform_data net5501_leds_data = {
71 .num_leds = ARRAY_SIZE(net5501_leds),
72 .leds = net5501_leds,
73};
74
75static struct platform_device net5501_leds_dev = {
76 .name = "leds-gpio",
77 .id = -1,
78 .dev.platform_data = &net5501_leds_data,
79};
80
81static struct __initdata platform_device *net5501_devs[] = {
82 &net5501_buttons_dev,
83 &net5501_leds_dev,
84};
85
86static void __init register_net5501(void)
87{
88 /* Setup LED control through leds-gpio driver */
89 platform_add_devices(net5501_devs, ARRAY_SIZE(net5501_devs));
90}
91
92struct net5501_board {
93 u16 offset;
94 u16 len;
95 char *sig;
96};
97
98static struct net5501_board __initdata boards[] = {
99 { 0xb7b, 7, "net5501" }, /* net5501 v1.33/1.33c */
100 { 0xb1f, 7, "net5501" }, /* net5501 v1.32i */
101};
102
103static bool __init net5501_present(void)
104{
105 int i;
106 unsigned char *rombase, *bios;
107 bool found = false;
108
109 rombase = ioremap(BIOS_REGION_BASE, BIOS_REGION_SIZE - 1);
110 if (!rombase) {
111 printk(KERN_ERR "%s: failed to get rombase\n", KBUILD_MODNAME);
112 return found;
113 }
114
115 bios = rombase + 0x20; /* null terminated */
116
117 if (memcmp(bios, "comBIOS", 7))
118 goto unmap;
119
120 for (i = 0; i < ARRAY_SIZE(boards); i++) {
121 unsigned char *model = rombase + boards[i].offset;
122
123 if (!memcmp(model, boards[i].sig, boards[i].len)) {
124 printk(KERN_INFO "%s: system is recognized as \"%s\"\n",
125 KBUILD_MODNAME, model);
126
127 found = true;
128 break;
129 }
130 }
131
132unmap:
133 iounmap(rombase);
134 return found;
135}
136
137static int __init net5501_init(void)
138{
139 if (!is_geode())
140 return 0;
141
142 if (!net5501_present())
143 return 0;
144
145 register_net5501();
146
147 return 0;
148}
149
150module_init(net5501_init);
151
152MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
153MODULE_DESCRIPTION("Soekris net5501 System Setup");
154MODULE_LICENSE("GPL");
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
index 1ba7f5ed8c9b..5917eb56b313 100644
--- a/arch/x86/platform/iris/iris.c
+++ b/arch/x86/platform/iris/iris.c
@@ -42,7 +42,7 @@ MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
42MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); 42MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
43MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); 43MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
44 44
45static int force; 45static bool force;
46 46
47module_param(force, bool, 0); 47module_param(force, bool, 0);
48MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); 48MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation.");
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index 1ea38775a6d3..af1da7e623f9 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1,4 +1,3 @@
1obj-$(CONFIG_X86_MRST) += mrst.o 1obj-$(CONFIG_X86_INTEL_MID) += mrst.o
2obj-$(CONFIG_X86_MRST) += vrtc.o 2obj-$(CONFIG_X86_INTEL_MID) += vrtc.o
3obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o 3obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o
4obj-$(CONFIG_X86_MRST) += pmu.o
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index ad4ec1cb097e..e0a37233c0af 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -28,6 +28,8 @@
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/notifier.h> 29#include <linux/notifier.h>
30#include <linux/mfd/intel_msic.h> 30#include <linux/mfd/intel_msic.h>
31#include <linux/gpio.h>
32#include <linux/i2c/tc35876x.h>
31 33
32#include <asm/setup.h> 34#include <asm/setup.h>
33#include <asm/mpspec_def.h> 35#include <asm/mpspec_def.h>
@@ -78,16 +80,11 @@ int sfi_mrtc_num;
78 80
79static void mrst_power_off(void) 81static void mrst_power_off(void)
80{ 82{
81 if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT)
82 intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 1);
83} 83}
84 84
85static void mrst_reboot(void) 85static void mrst_reboot(void)
86{ 86{
87 if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) 87 intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
88 intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
89 else
90 intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
91} 88}
92 89
93/* parse all the mtimer info to a static mtimer array */ 90/* parse all the mtimer info to a static mtimer array */
@@ -200,34 +197,28 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
200 197
201static unsigned long __init mrst_calibrate_tsc(void) 198static unsigned long __init mrst_calibrate_tsc(void)
202{ 199{
203 unsigned long flags, fast_calibrate; 200 unsigned long fast_calibrate;
204 if (__mrst_cpu_chip == MRST_CPU_CHIP_PENWELL) { 201 u32 lo, hi, ratio, fsb;
205 u32 lo, hi, ratio, fsb; 202
206 203 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
207 rdmsr(MSR_IA32_PERF_STATUS, lo, hi); 204 pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
208 pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); 205 ratio = (hi >> 8) & 0x1f;
209 ratio = (hi >> 8) & 0x1f; 206 pr_debug("ratio is %d\n", ratio);
210 pr_debug("ratio is %d\n", ratio); 207 if (!ratio) {
211 if (!ratio) { 208 pr_err("read a zero ratio, should be incorrect!\n");
212 pr_err("read a zero ratio, should be incorrect!\n"); 209 pr_err("force tsc ratio to 16 ...\n");
213 pr_err("force tsc ratio to 16 ...\n"); 210 ratio = 16;
214 ratio = 16;
215 }
216 rdmsr(MSR_FSB_FREQ, lo, hi);
217 if ((lo & 0x7) == 0x7)
218 fsb = PENWELL_FSB_FREQ_83SKU;
219 else
220 fsb = PENWELL_FSB_FREQ_100SKU;
221 fast_calibrate = ratio * fsb;
222 pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
223 lapic_timer_frequency = fsb * 1000 / HZ;
224 /* mark tsc clocksource as reliable */
225 set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
226 } else {
227 local_irq_save(flags);
228 fast_calibrate = apbt_quick_calibrate();
229 local_irq_restore(flags);
230 } 211 }
212 rdmsr(MSR_FSB_FREQ, lo, hi);
213 if ((lo & 0x7) == 0x7)
214 fsb = PENWELL_FSB_FREQ_83SKU;
215 else
216 fsb = PENWELL_FSB_FREQ_100SKU;
217 fast_calibrate = ratio * fsb;
218 pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
219 lapic_timer_frequency = fsb * 1000 / HZ;
220 /* mark tsc clocksource as reliable */
221 set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
231 222
232 if (fast_calibrate) 223 if (fast_calibrate)
233 return fast_calibrate; 224 return fast_calibrate;
@@ -261,16 +252,11 @@ static void __cpuinit mrst_arch_setup(void)
261{ 252{
262 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) 253 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
263 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; 254 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
264 else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26)
265 __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
266 else { 255 else {
267 pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n", 256 pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n",
268 boot_cpu_data.x86, boot_cpu_data.x86_model); 257 boot_cpu_data.x86, boot_cpu_data.x86_model);
269 __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; 258 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
270 } 259 }
271 pr_debug("Moorestown CPU %s identified\n",
272 (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ?
273 "Lincroft" : "Penwell");
274} 260}
275 261
276/* MID systems don't have i8042 controller */ 262/* MID systems don't have i8042 controller */
@@ -686,6 +672,24 @@ static void *msic_ocd_platform_data(void *info)
686 return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD); 672 return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
687} 673}
688 674
675static void *msic_thermal_platform_data(void *info)
676{
677 return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL);
678}
679
680/* tc35876x DSI-LVDS bridge chip and panel platform data */
681static void *tc35876x_platform_data(void *data)
682{
683 static struct tc35876x_platform_data pdata;
684
685 /* gpio pins set to -1 will not be used by the driver */
686 pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
687 pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
688 pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
689
690 return &pdata;
691}
692
689static const struct devs_id __initconst device_ids[] = { 693static const struct devs_id __initconst device_ids[] = {
690 {"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data}, 694 {"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data},
691 {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data}, 695 {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
@@ -698,6 +702,7 @@ static const struct devs_id __initconst device_ids[] = {
698 {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data}, 702 {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
699 {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data}, 703 {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
700 {"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data}, 704 {"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data},
705 {"i2c_disp_brig", SFI_DEV_TYPE_I2C, 0, &tc35876x_platform_data},
701 706
702 /* MSIC subdevices */ 707 /* MSIC subdevices */
703 {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data}, 708 {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data},
@@ -705,6 +710,7 @@ static const struct devs_id __initconst device_ids[] = {
705 {"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data}, 710 {"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data},
706 {"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data}, 711 {"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data},
707 {"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data}, 712 {"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data},
713 {"msic_thermal", SFI_DEV_TYPE_IPC, 1, &msic_thermal_platform_data},
708 714
709 {}, 715 {},
710}; 716};
@@ -848,8 +854,7 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry)
848 if (mrst_has_msic()) 854 if (mrst_has_msic())
849 return; 855 return;
850 856
851 /* ID as IRQ is a hack that will go away */ 857 pdev = platform_device_alloc(entry->name, 0);
852 pdev = platform_device_alloc(entry->name, entry->irq);
853 if (pdev == NULL) { 858 if (pdev == NULL) {
854 pr_err("out of memory for SFI platform device '%s'.\n", 859 pr_err("out of memory for SFI platform device '%s'.\n",
855 entry->name); 860 entry->name);
@@ -1030,6 +1035,7 @@ static int __init pb_keys_init(void)
1030 num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); 1035 num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
1031 for (i = 0; i < num; i++) { 1036 for (i = 0; i < num; i++) {
1032 gb[i].gpio = get_gpio_by_name(gb[i].desc); 1037 gb[i].gpio = get_gpio_by_name(gb[i].desc);
1038 pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio);
1033 if (gb[i].gpio == -1) 1039 if (gb[i].gpio == -1)
1034 continue; 1040 continue;
1035 1041
diff --git a/arch/x86/platform/mrst/pmu.c b/arch/x86/platform/mrst/pmu.c
deleted file mode 100644
index c0ac06da57ac..000000000000
--- a/arch/x86/platform/mrst/pmu.c
+++ /dev/null
@@ -1,817 +0,0 @@
1/*
2 * mrst/pmu.c - driver for MRST Power Management Unit
3 *
4 * Copyright (c) 2011, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20#include <linux/cpuidle.h>
21#include <linux/debugfs.h>
22#include <linux/delay.h>
23#include <linux/interrupt.h>
24#include <linux/module.h>
25#include <linux/pci.h>
26#include <linux/seq_file.h>
27#include <linux/sfi.h>
28#include <asm/intel_scu_ipc.h>
29#include "pmu.h"
30
31#define IPCMSG_FW_REVISION 0xF4
32
33struct mrst_device {
34 u16 pci_dev_num; /* DEBUG only */
35 u16 lss;
36 u16 latest_request;
37 unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */
38};
39
40/*
41 * comlete list of MRST PCI devices
42 */
43static struct mrst_device mrst_devs[] = {
44/* 0 */ { 0x0800, LSS_SPI0 }, /* Moorestown SPI Ctrl 0 */
45/* 1 */ { 0x0801, LSS_SPI1 }, /* Moorestown SPI Ctrl 1 */
46/* 2 */ { 0x0802, LSS_I2C0 }, /* Moorestown I2C 0 */
47/* 3 */ { 0x0803, LSS_I2C1 }, /* Moorestown I2C 1 */
48/* 4 */ { 0x0804, LSS_I2C2 }, /* Moorestown I2C 2 */
49/* 5 */ { 0x0805, LSS_KBD }, /* Moorestown Keyboard Ctrl */
50/* 6 */ { 0x0806, LSS_USB_HC }, /* Moorestown USB Ctrl */
51/* 7 */ { 0x0807, LSS_SD_HC0 }, /* Moorestown SD Host Ctrl 0 */
52/* 8 */ { 0x0808, LSS_SD_HC1 }, /* Moorestown SD Host Ctrl 1 */
53/* 9 */ { 0x0809, LSS_NAND }, /* Moorestown NAND Ctrl */
54/* 10 */ { 0x080a, LSS_AUDIO }, /* Moorestown Audio Ctrl */
55/* 11 */ { 0x080b, LSS_IMAGING }, /* Moorestown ISP */
56/* 12 */ { 0x080c, LSS_SECURITY }, /* Moorestown Security Controller */
57/* 13 */ { 0x080d, LSS_DISPLAY }, /* Moorestown External Displays */
58/* 14 */ { 0x080e, 0 }, /* Moorestown SCU IPC */
59/* 15 */ { 0x080f, LSS_GPIO }, /* Moorestown GPIO Controller */
60/* 16 */ { 0x0810, 0 }, /* Moorestown Power Management Unit */
61/* 17 */ { 0x0811, LSS_USB_OTG }, /* Moorestown OTG Ctrl */
62/* 18 */ { 0x0812, LSS_SPI2 }, /* Moorestown SPI Ctrl 2 */
63/* 19 */ { 0x0813, 0 }, /* Moorestown SC DMA */
64/* 20 */ { 0x0814, LSS_AUDIO_LPE }, /* Moorestown LPE DMA */
65/* 21 */ { 0x0815, LSS_AUDIO_SSP }, /* Moorestown SSP0 */
66
67/* 22 */ { 0x084F, LSS_SD_HC2 }, /* Moorestown SD Host Ctrl 2 */
68
69/* 23 */ { 0x4102, 0 }, /* Lincroft */
70/* 24 */ { 0x4110, 0 }, /* Lincroft */
71};
72
73/* n.b. We ignore PCI-id 0x815 in LSS9 b/c Linux has no driver for it */
74static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
75static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
76 0x0804, 0x0805, 0x080f, 0};
77
78/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
79static spinlock_t mrst_pmu_power_state_lock;
80
81static unsigned int wake_counters[MRST_NUM_LSS]; /* DEBUG only */
82static unsigned int pmu_irq_stats[INT_INVALID + 1]; /* DEBUG only */
83
84static int graphics_is_off;
85static int lss_s0i3_enabled;
86static bool mrst_pmu_s0i3_enable;
87
88/* debug counters */
89static u32 pmu_wait_ready_calls;
90static u32 pmu_wait_ready_udelays;
91static u32 pmu_wait_ready_udelays_max;
92static u32 pmu_wait_done_calls;
93static u32 pmu_wait_done_udelays;
94static u32 pmu_wait_done_udelays_max;
95static u32 pmu_set_power_state_entry;
96static u32 pmu_set_power_state_send_cmd;
97
98static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
99{
100 int index = 0;
101
102 if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
103 index = pci_dev_num - 0x800;
104 else if (pci_dev_num == 0x084F)
105 index = 22;
106 else if (pci_dev_num == 0x4102)
107 index = 23;
108 else if (pci_dev_num == 0x4110)
109 index = 24;
110
111 if (pci_dev_num != mrst_devs[index].pci_dev_num) {
112 WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
113 return 0;
114 }
115
116 return &mrst_devs[index];
117}
118
119/**
120 * mrst_pmu_validate_cstates
121 * @dev: cpuidle_device
122 *
123 * Certain states are not appropriate for governor to pick in some cases.
124 * This function will be called as cpuidle_device's prepare callback and
125 * thus tells governor to ignore such states when selecting the next state
126 * to enter.
127 */
128
129#define IDLE_STATE4_IS_C6 4
130#define IDLE_STATE5_IS_S0I3 5
131
132int mrst_pmu_invalid_cstates(void)
133{
134 int cpu = smp_processor_id();
135
136 /*
137 * Demote to C4 if the PMU is busy.
138 * Since LSS changes leave the busy bit clear...
139 * busy means either the PMU is waiting for an ACK-C6 that
140 * isn't coming due to an MWAIT that returned immediately;
141 * or we returned from S0i3 successfully, and the PMU
142 * is not done sending us interrupts.
143 */
144 if (pmu_read_busy_status())
145 return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
146
147 /*
148 * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
149 * or if device LSS is insufficient, or the GPU is active,
150 * or if it has been explicitly disabled.
151 */
152 if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
153 !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
154 return 1 << IDLE_STATE5_IS_S0I3;
155 else
156 return 0;
157}
158
159/*
160 * pmu_update_wake_counters(): read PM_WKS, update wake_counters[]
161 * DEBUG only.
162 */
163static void pmu_update_wake_counters(void)
164{
165 int lss;
166 u32 wake_status;
167
168 wake_status = pmu_read_wks();
169
170 for (lss = 0; lss < MRST_NUM_LSS; ++lss) {
171 if (wake_status & (1 << lss))
172 wake_counters[lss]++;
173 }
174}
175
176int mrst_pmu_s0i3_entry(void)
177{
178 int status;
179
180 /* Clear any possible error conditions */
181 pmu_write_ics(0x300);
182
183 /* set wake control to current D-states */
184 pmu_write_wssc(S0I3_SSS_TARGET);
185
186 status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
187 pmu_update_wake_counters();
188 return status;
189}
190
191/* poll for maximum of 5ms for busy bit to clear */
192static int pmu_wait_ready(void)
193{
194 int udelays;
195
196 pmu_wait_ready_calls++;
197
198 for (udelays = 0; udelays < 500; ++udelays) {
199 if (udelays > pmu_wait_ready_udelays_max)
200 pmu_wait_ready_udelays_max = udelays;
201
202 if (pmu_read_busy_status() == 0)
203 return 0;
204
205 udelay(10);
206 pmu_wait_ready_udelays++;
207 }
208
209 /*
210 * if this fires, observe
211 * /sys/kernel/debug/mrst_pmu_wait_ready_calls
212 * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
213 */
214 WARN_ONCE(1, "SCU not ready for 5ms");
215 return -EBUSY;
216}
217/* poll for maximum of 50ms us for busy bit to clear */
218static int pmu_wait_done(void)
219{
220 int udelays;
221
222 pmu_wait_done_calls++;
223
224 for (udelays = 0; udelays < 500; ++udelays) {
225 if (udelays > pmu_wait_done_udelays_max)
226 pmu_wait_done_udelays_max = udelays;
227
228 if (pmu_read_busy_status() == 0)
229 return 0;
230
231 udelay(100);
232 pmu_wait_done_udelays++;
233 }
234
235 /*
236 * if this fires, observe
237 * /sys/kernel/debug/mrst_pmu_wait_done_calls
238 * /sys/kernel/debug/mrst_pmu_wait_done_udelays
239 */
240 WARN_ONCE(1, "SCU not done for 50ms");
241 return -EBUSY;
242}
243
244u32 mrst_pmu_msi_is_disabled(void)
245{
246 return pmu_msi_is_disabled();
247}
248
249void mrst_pmu_enable_msi(void)
250{
251 pmu_msi_enable();
252}
253
254/**
255 * pmu_irq - pmu driver interrupt handler
256 * Context: interrupt context
257 */
258static irqreturn_t pmu_irq(int irq, void *dummy)
259{
260 union pmu_pm_ics pmu_ics;
261
262 pmu_ics.value = pmu_read_ics();
263
264 if (!pmu_ics.bits.pending)
265 return IRQ_NONE;
266
267 switch (pmu_ics.bits.cause) {
268 case INT_SPURIOUS:
269 case INT_CMD_DONE:
270 case INT_CMD_ERR:
271 case INT_WAKE_RX:
272 case INT_SS_ERROR:
273 case INT_S0IX_MISS:
274 case INT_NO_ACKC6:
275 pmu_irq_stats[pmu_ics.bits.cause]++;
276 break;
277 default:
278 pmu_irq_stats[INT_INVALID]++;
279 }
280
281 pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
282
283 return IRQ_HANDLED;
284}
285
286/*
287 * Translate PCI power management to MRST LSS D-states
288 */
289static int pci_2_mrst_state(int lss, pci_power_t pci_state)
290{
291 switch (pci_state) {
292 case PCI_D0:
293 if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
294 return D0i1;
295 else
296 return D0;
297 case PCI_D1:
298 return D0i1;
299 case PCI_D2:
300 return D0i2;
301 case PCI_D3hot:
302 case PCI_D3cold:
303 return D0i3;
304 default:
305 WARN(1, "pci_state %d\n", pci_state);
306 return 0;
307 }
308}
309
310static int pmu_issue_command(u32 pm_ssc)
311{
312 union pmu_pm_set_cfg_cmd_t command;
313
314 if (pmu_read_busy_status()) {
315 pr_debug("pmu is busy, Operation not permitted\n");
316 return -1;
317 }
318
319 /*
320 * enable interrupts in PMU so that interrupts are
321 * propagated when ioc bit for a particular set
322 * command is set
323 */
324
325 pmu_irq_enable();
326
327 /* Configure the sub systems for pmu2 */
328
329 pmu_write_ssc(pm_ssc);
330
331 /*
332 * Send the set config command for pmu its configured
333 * for mode CM_IMMEDIATE & hence with No Trigger
334 */
335
336 command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE;
337 command.pmu2_params.d_param.cfg_delay = 0;
338 command.pmu2_params.d_param.rsvd = 0;
339
340 /* construct the command to send SET_CFG to particular PMU */
341 command.pmu2_params.d_param.cmd = SET_CFG_CMD;
342 command.pmu2_params.d_param.ioc = 0;
343 command.pmu2_params.d_param.mode_id = 0;
344 command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
345
346 /* write the value of PM_CMD into particular PMU */
347 pr_debug("pmu command being written %x\n",
348 command.pmu_pm_set_cfg_cmd_value);
349
350 pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value);
351
352 return 0;
353}
354
355static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
356{
357 u16 existing_request;
358 int i;
359
360 for (i = 0; ids[i]; ++i) {
361 struct mrst_device *mrst_dev;
362
363 mrst_dev = pci_id_2_mrst_dev(ids[i]);
364 if (unlikely(!mrst_dev))
365 continue;
366
367 existing_request = mrst_dev->latest_request;
368 if (existing_request < pci_state)
369 pci_state = existing_request;
370 }
371 return pci_state;
372}
373
374/**
375 * pmu_pci_set_power_state - Callback function is used by all the PCI devices
376 * for a platform specific device power on/shutdown.
377 */
378
379int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
380{
381 u32 old_sss, new_sss;
382 int status = 0;
383 struct mrst_device *mrst_dev;
384
385 pmu_set_power_state_entry++;
386
387 BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
388 BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
389
390 mrst_dev = pci_id_2_mrst_dev(pdev->device);
391 if (unlikely(!mrst_dev))
392 return -ENODEV;
393
394 mrst_dev->pci_state_counts[pci_state]++; /* count invocations */
395
396 /* PMU driver calls self as part of PCI initialization, ignore */
397 if (pdev->device == PCI_DEV_ID_MRST_PMU)
398 return 0;
399
400 BUG_ON(!pmu_reg); /* SW bug if called before initialized */
401
402 spin_lock(&mrst_pmu_power_state_lock);
403
404 if (pdev->d3_delay) {
405 dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
406 pdev->d3_delay);
407 pdev->d3_delay = 0;
408 }
409 /*
410 * If Lincroft graphics, simply remember state
411 */
412 if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY
413 && !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) {
414 if (pci_state == PCI_D0)
415 graphics_is_off = 0;
416 else
417 graphics_is_off = 1;
418 goto ret;
419 }
420
421 if (!mrst_dev->lss)
422 goto ret; /* device with no LSS */
423
424 if (mrst_dev->latest_request == pci_state)
425 goto ret; /* no change */
426
427 mrst_dev->latest_request = pci_state; /* record latest request */
428
429 /*
430 * LSS9 and LSS10 contain multiple PCI devices.
431 * Use the lowest numbered (highest power) state in the LSS
432 */
433 if (mrst_dev->lss == 9)
434 pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state);
435 else if (mrst_dev->lss == 10)
436 pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
437
438 status = pmu_wait_ready();
439 if (status)
440 goto ret;
441
442 old_sss = pmu_read_sss();
443 new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
444 new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state),
445 mrst_dev->lss);
446
447 if (new_sss == old_sss)
448 goto ret; /* nothing to do */
449
450 pmu_set_power_state_send_cmd++;
451
452 status = pmu_issue_command(new_sss);
453
454 if (unlikely(status != 0)) {
455 dev_err(&pdev->dev, "Failed to Issue a PM command\n");
456 goto ret;
457 }
458
459 if (pmu_wait_done())
460 goto ret;
461
462 lss_s0i3_enabled =
463 ((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
464ret:
465 spin_unlock(&mrst_pmu_power_state_lock);
466 return status;
467}
468
469#ifdef CONFIG_DEBUG_FS
470static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"};
471
472static inline const char *d0ix_name(int state)
473{
474 return d0ix_names[(int) state];
475}
476
477static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
478{
479 struct pci_dev *pdev = NULL;
480 u32 cur_pmsss;
481 int lss;
482
483 seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
484
485 cur_pmsss = pmu_read_sss();
486
487 seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
488
489 seq_printf(s, "0x%08X Current SSS ", cur_pmsss);
490 seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n");
491
492 if (cpumask_equal(cpu_online_mask, cpumask_of(0)))
493 seq_printf(s, "cpu0 is only cpu online\n");
494 else
495 seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n");
496
497 seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]");
498
499
500 for_each_pci_dev(pdev) {
501 int pos;
502 u16 pmcsr;
503 struct mrst_device *mrst_dev;
504 int i;
505
506 mrst_dev = pci_id_2_mrst_dev(pdev->device);
507
508 seq_printf(s, "%s %04x/%04X %-16.16s ",
509 dev_name(&pdev->dev),
510 pdev->vendor, pdev->device,
511 dev_driver_string(&pdev->dev));
512
513 if (unlikely (!mrst_dev)) {
514 seq_printf(s, " UNKNOWN\n");
515 continue;
516 }
517
518 if (mrst_dev->lss)
519 seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
520 d0ix_name(((cur_pmsss >>
521 (mrst_dev->lss * 2)) & 0x3)));
522 else
523 seq_printf(s, " ");
524
525 /* PCI PM config space setting */
526 pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
527 if (pos != 0) {
528 pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
529 seq_printf(s, "PCI-%-4s",
530 pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK));
531 } else {
532 seq_printf(s, " ");
533 }
534
535 seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request));
536 for (i = 0; i <= PCI_D3cold; ++i)
537 seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
538
539 if (mrst_dev->lss) {
540 unsigned int lssmask;
541
542 lssmask = SSMSK(D0i3, mrst_dev->lss);
543
544 if ((lssmask & S0I3_SSS_TARGET) &&
545 ((lssmask & cur_pmsss) !=
546 (lssmask & S0I3_SSS_TARGET)))
547 seq_printf(s , "[BLOCKS s0i3]");
548 }
549
550 seq_printf(s, "\n");
551 }
552 seq_printf(s, "Wake Counters:\n");
553 for (lss = 0; lss < MRST_NUM_LSS; ++lss)
554 seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]);
555
556 seq_printf(s, "Interrupt Counters:\n");
557 seq_printf(s,
558 "INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n"
559 "INT_CMD_ERR \t%8u\n" "INT_WAKE_RX \t%8u\n"
560 "INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n"
561 "INT_NO_ACKC6 \t%8u\n" "INT_INVALID \t%8u\n",
562 pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE],
563 pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
564 pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
565 pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
566
567 seq_printf(s, "mrst_pmu_wait_ready_calls %8d\n",
568 pmu_wait_ready_calls);
569 seq_printf(s, "mrst_pmu_wait_ready_udelays %8d\n",
570 pmu_wait_ready_udelays);
571 seq_printf(s, "mrst_pmu_wait_ready_udelays_max %8d\n",
572 pmu_wait_ready_udelays_max);
573 seq_printf(s, "mrst_pmu_wait_done_calls %8d\n",
574 pmu_wait_done_calls);
575 seq_printf(s, "mrst_pmu_wait_done_udelays %8d\n",
576 pmu_wait_done_udelays);
577 seq_printf(s, "mrst_pmu_wait_done_udelays_max %8d\n",
578 pmu_wait_done_udelays_max);
579 seq_printf(s, "mrst_pmu_set_power_state_entry %8d\n",
580 pmu_set_power_state_entry);
581 seq_printf(s, "mrst_pmu_set_power_state_send_cmd %8d\n",
582 pmu_set_power_state_send_cmd);
583 seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
584
585 return 0;
586}
587
588static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
589{
590 return single_open(file, debug_mrst_pmu_show, NULL);
591}
592
593static const struct file_operations devices_state_operations = {
594 .open = debug_mrst_pmu_open,
595 .read = seq_read,
596 .llseek = seq_lseek,
597 .release = single_release,
598};
599#endif /* DEBUG_FS */
600
601/*
602 * Validate SCU PCI shim PCI vendor capability byte
603 * against LSS hard-coded in mrst_devs[] above.
604 * DEBUG only.
605 */
606static void pmu_scu_firmware_debug(void)
607{
608 struct pci_dev *pdev = NULL;
609
610 for_each_pci_dev(pdev) {
611 struct mrst_device *mrst_dev;
612 u8 pci_config_lss;
613 int pos;
614
615 mrst_dev = pci_id_2_mrst_dev(pdev->device);
616 if (unlikely(!mrst_dev)) {
617 printk(KERN_ERR FW_BUG "pmu: Unknown "
618 "PCI device 0x%04X\n", pdev->device);
619 continue;
620 }
621
622 if (mrst_dev->lss == 0)
623 continue; /* no LSS in our table */
624
625 pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
626 if (!pos != 0) {
627 printk(KERN_ERR FW_BUG "pmu: 0x%04X "
628 "missing PCI Vendor Capability\n",
629 pdev->device);
630 continue;
631 }
632 pci_read_config_byte(pdev, pos + 4, &pci_config_lss);
633 if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) {
634 printk(KERN_ERR FW_BUG "pmu: 0x%04X "
635 "invalid PCI Vendor Capability 0x%x "
636 " expected LSS 0x%X\n",
637 pdev->device, pci_config_lss, mrst_dev->lss);
638 continue;
639 }
640 pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK;
641
642 if (mrst_dev->lss == pci_config_lss)
643 continue;
644
645 printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n",
646 pdev->device, pci_config_lss, mrst_dev->lss);
647 }
648}
649
650/**
651 * pmu_probe
652 */
653static int __devinit pmu_probe(struct pci_dev *pdev,
654 const struct pci_device_id *pci_id)
655{
656 int ret;
657 struct mrst_pmu_reg *pmu;
658
659 /* Init the device */
660 ret = pci_enable_device(pdev);
661 if (ret) {
662 dev_err(&pdev->dev, "Unable to Enable PCI device\n");
663 return ret;
664 }
665
666 ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME);
667 if (ret < 0) {
668 dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n");
669 goto out_err1;
670 }
671
672 /* Map the memory of PMU reg base */
673 pmu = pci_iomap(pdev, 0, 0);
674 if (!pmu) {
675 dev_err(&pdev->dev, "Unable to map the PMU address space\n");
676 ret = -ENOMEM;
677 goto out_err2;
678 }
679
680#ifdef CONFIG_DEBUG_FS
681 /* /sys/kernel/debug/mrst_pmu */
682 (void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO,
683 NULL, NULL, &devices_state_operations);
684#endif
685 pmu_reg = pmu; /* success */
686
687 if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) {
688 dev_err(&pdev->dev, "Registering isr has failed\n");
689 ret = -1;
690 goto out_err3;
691 }
692
693 pmu_scu_firmware_debug();
694
695 pmu_write_wkc(S0I3_WAKE_SOURCES); /* Enable S0i3 wakeup sources */
696
697 pmu_wait_ready();
698
699 pmu_write_ssc(D0I1_ACG_SSS_TARGET); /* Enable Auto-Clock_Gating */
700 pmu_write_cmd(0x201);
701
702 spin_lock_init(&mrst_pmu_power_state_lock);
703
704 /* Enable the hardware interrupt */
705 pmu_irq_enable();
706 return 0;
707
708out_err3:
709 free_irq(pdev->irq, NULL);
710 pci_iounmap(pdev, pmu_reg);
711 pmu_reg = NULL;
712out_err2:
713 pci_release_region(pdev, 0);
714out_err1:
715 pci_disable_device(pdev);
716 return ret;
717}
718
719static void __devexit pmu_remove(struct pci_dev *pdev)
720{
721 dev_err(&pdev->dev, "Mid PM pmu_remove called\n");
722
723 /* Freeing up the irq */
724 free_irq(pdev->irq, NULL);
725
726 pci_iounmap(pdev, pmu_reg);
727 pmu_reg = NULL;
728
729 /* disable the current PCI device */
730 pci_release_region(pdev, 0);
731 pci_disable_device(pdev);
732}
733
734static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = {
735 { PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 },
736 { }
737};
738
739MODULE_DEVICE_TABLE(pci, pmu_pci_ids);
740
741static struct pci_driver driver = {
742 .name = MRST_PMU_DRV_NAME,
743 .id_table = pmu_pci_ids,
744 .probe = pmu_probe,
745 .remove = __devexit_p(pmu_remove),
746};
747
748/**
749 * pmu_pci_register - register the PMU driver as PCI device
750 */
751static int __init pmu_pci_register(void)
752{
753 return pci_register_driver(&driver);
754}
755
756/* Register and probe via fs_initcall() to preceed device_initcall() */
757fs_initcall(pmu_pci_register);
758
759static void __exit mid_pci_cleanup(void)
760{
761 pci_unregister_driver(&driver);
762}
763
764static int ia_major;
765static int ia_minor;
766
767static int pmu_sfi_parse_oem(struct sfi_table_header *table)
768{
769 struct sfi_table_simple *sb;
770
771 sb = (struct sfi_table_simple *)table;
772 ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
773 ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
774 printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
775 ia_major, ia_minor);
776
777 return 0;
778}
779
780static int __init scu_fw_check(void)
781{
782 int ret;
783 u32 fw_version;
784
785 if (!pmu_reg)
786 return 0; /* this driver didn't probe-out */
787
788 sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
789
790 if (ia_major < 0x6005 || ia_minor < 0x1525) {
791 WARN(1, "mrst_pmu: IA FW version too old\n");
792 return -1;
793 }
794
795 ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
796 &fw_version, 1);
797
798 if (ret) {
799 WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
800 } else {
801 int scu_major = (fw_version >> 8) & 0xFF;
802 int scu_minor = (fw_version >> 0) & 0xFF;
803
804 printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
805
806 if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
807 printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
808 mrst_pmu_s0i3_enable = true;
809 } else {
810 WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
811 scu_major, scu_minor);
812 }
813 }
814 return 0;
815}
816late_initcall(scu_fw_check);
817module_exit(mid_pci_cleanup);
diff --git a/arch/x86/platform/mrst/pmu.h b/arch/x86/platform/mrst/pmu.h
deleted file mode 100644
index bfbfe64b167b..000000000000
--- a/arch/x86/platform/mrst/pmu.h
+++ /dev/null
@@ -1,234 +0,0 @@
1/*
2 * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c
3 *
4 * Copyright (c) 2011, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20#ifndef _MRST_PMU_H_
21#define _MRST_PMU_H_
22
23#define PCI_DEV_ID_MRST_PMU 0x0810
24#define MRST_PMU_DRV_NAME "mrst_pmu"
25#define PCI_SUB_CLASS_MASK 0xFF00
26
27#define PCI_VENDOR_CAP_LOG_ID_MASK 0x7F
28#define PCI_VENDOR_CAP_LOG_SS_MASK 0x80
29
30#define SUB_SYS_ALL_D0I1 0x01155555
31#define S0I3_WAKE_SOURCES 0x00001FFF
32
33#define PM_S0I3_COMMAND \
34 ((0 << 31) | /* Reserved */ \
35 (0 << 30) | /* Core must be idle */ \
36 (0xc2 << 22) | /* ACK C6 trigger */ \
37 (3 << 19) | /* Trigger on DMI message */ \
38 (3 << 16) | /* Enter S0i3 */ \
39 (0 << 13) | /* Numeric mode ID (sw) */ \
40 (3 << 9) | /* Trigger mode */ \
41 (0 << 8) | /* Do not interrupt */ \
42 (1 << 0)) /* Set configuration */
43
44#define LSS_DMI 0
45#define LSS_SD_HC0 1
46#define LSS_SD_HC1 2
47#define LSS_NAND 3
48#define LSS_IMAGING 4
49#define LSS_SECURITY 5
50#define LSS_DISPLAY 6
51#define LSS_USB_HC 7
52#define LSS_USB_OTG 8
53#define LSS_AUDIO 9
54#define LSS_AUDIO_LPE 9
55#define LSS_AUDIO_SSP 9
56#define LSS_I2C0 10
57#define LSS_I2C1 10
58#define LSS_I2C2 10
59#define LSS_KBD 10
60#define LSS_SPI0 10
61#define LSS_SPI1 10
62#define LSS_SPI2 10
63#define LSS_GPIO 10
64#define LSS_SRAM 11 /* used by SCU, do not touch */
65#define LSS_SD_HC2 12
66/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
67#define MRST_NUM_LSS 13
68
69#define MIN(a, b) (((a) < (b)) ? (a) : (b))
70
71#define SSMSK(mask, lss) ((mask) << ((lss) * 2))
72#define D0 0
73#define D0i1 1
74#define D0i2 2
75#define D0i3 3
76
77#define S0I3_SSS_TARGET ( \
78 SSMSK(D0i1, LSS_DMI) | \
79 SSMSK(D0i3, LSS_SD_HC0) | \
80 SSMSK(D0i3, LSS_SD_HC1) | \
81 SSMSK(D0i3, LSS_NAND) | \
82 SSMSK(D0i3, LSS_SD_HC2) | \
83 SSMSK(D0i3, LSS_IMAGING) | \
84 SSMSK(D0i3, LSS_SECURITY) | \
85 SSMSK(D0i3, LSS_DISPLAY) | \
86 SSMSK(D0i3, LSS_USB_HC) | \
87 SSMSK(D0i3, LSS_USB_OTG) | \
88 SSMSK(D0i3, LSS_AUDIO) | \
89 SSMSK(D0i1, LSS_I2C0))
90
91/*
92 * D0i1 on Langwell is Autonomous Clock Gating (ACG).
93 * Enable ACG on every LSS except camera and audio
94 */
95#define D0I1_ACG_SSS_TARGET \
96 (SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
97
98enum cm_mode {
99 CM_NOP, /* ignore the config mode value */
100 CM_IMMEDIATE,
101 CM_DELAY,
102 CM_TRIGGER,
103 CM_INVALID
104};
105
106enum sys_state {
107 SYS_STATE_S0I0,
108 SYS_STATE_S0I1,
109 SYS_STATE_S0I2,
110 SYS_STATE_S0I3,
111 SYS_STATE_S3,
112 SYS_STATE_S5
113};
114
115#define SET_CFG_CMD 1
116
117enum int_status {
118 INT_SPURIOUS = 0,
119 INT_CMD_DONE = 1,
120 INT_CMD_ERR = 2,
121 INT_WAKE_RX = 3,
122 INT_SS_ERROR = 4,
123 INT_S0IX_MISS = 5,
124 INT_NO_ACKC6 = 6,
125 INT_INVALID = 7,
126};
127
128/* PMU register interface */
129static struct mrst_pmu_reg {
130 u32 pm_sts; /* 0x00 */
131 u32 pm_cmd; /* 0x04 */
132 u32 pm_ics; /* 0x08 */
133 u32 _resv1; /* 0x0C */
134 u32 pm_wkc[2]; /* 0x10 */
135 u32 pm_wks[2]; /* 0x18 */
136 u32 pm_ssc[4]; /* 0x20 */
137 u32 pm_sss[4]; /* 0x30 */
138 u32 pm_wssc[4]; /* 0x40 */
139 u32 pm_c3c4; /* 0x50 */
140 u32 pm_c5c6; /* 0x54 */
141 u32 pm_msi_disable; /* 0x58 */
142} *pmu_reg;
143
144static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); }
145static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); }
146static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); }
147static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); }
148
149static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
150static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
151static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
152static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
153static inline void pmu_write_wssc(u32 arg)
154 { writel(arg, &pmu_reg->pm_wssc[0]); }
155
156static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
157static inline u32 pmu_msi_is_disabled(void)
158 { return readl(&pmu_reg->pm_msi_disable); }
159
160union pmu_pm_ics {
161 struct {
162 u32 cause:8;
163 u32 enable:1;
164 u32 pending:1;
165 u32 reserved:22;
166 } bits;
167 u32 value;
168};
169
170static inline void pmu_irq_enable(void)
171{
172 union pmu_pm_ics pmu_ics;
173
174 pmu_ics.value = pmu_read_ics();
175 pmu_ics.bits.enable = 1;
176 pmu_write_ics(pmu_ics.value);
177}
178
179union pmu_pm_status {
180 struct {
181 u32 pmu_rev:8;
182 u32 pmu_busy:1;
183 u32 mode_id:4;
184 u32 Reserved:19;
185 } pmu_status_parts;
186 u32 pmu_status_value;
187};
188
189static inline int pmu_read_busy_status(void)
190{
191 union pmu_pm_status result;
192
193 result.pmu_status_value = pmu_read_sts();
194
195 return result.pmu_status_parts.pmu_busy;
196}
197
198/* pmu set config parameters */
199struct cfg_delay_param_t {
200 u32 cmd:8;
201 u32 ioc:1;
202 u32 cfg_mode:4;
203 u32 mode_id:3;
204 u32 sys_state:3;
205 u32 cfg_delay:8;
206 u32 rsvd:5;
207};
208
209struct cfg_trig_param_t {
210 u32 cmd:8;
211 u32 ioc:1;
212 u32 cfg_mode:4;
213 u32 mode_id:3;
214 u32 sys_state:3;
215 u32 cfg_trig_type:3;
216 u32 cfg_trig_val:8;
217 u32 cmbi:1;
218 u32 rsvd1:1;
219};
220
221union pmu_pm_set_cfg_cmd_t {
222 union {
223 struct cfg_delay_param_t d_param;
224 struct cfg_trig_param_t t_param;
225 } pmu2_params;
226 u32 pmu_pm_set_cfg_cmd_value;
227};
228
229#ifdef FUTURE_PATCH
230extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
231#else
232static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
233#endif
234#endif
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 2b235b77d9ab..23e5b9d7977b 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -23,7 +23,66 @@
23#define XO15_SCI_CLASS DRV_NAME 23#define XO15_SCI_CLASS DRV_NAME
24#define XO15_SCI_DEVICE_NAME "OLPC XO-1.5 SCI" 24#define XO15_SCI_DEVICE_NAME "OLPC XO-1.5 SCI"
25 25
26static unsigned long xo15_sci_gpe; 26static unsigned long xo15_sci_gpe;
27static bool lid_wake_on_close;
28
29/*
30 * The normal ACPI LID wakeup behavior is wake-on-open, but not
31 * wake-on-close. This is implemented as standard by the XO-1.5 DSDT.
32 *
33 * We provide here a sysfs attribute that will additionally enable
34 * wake-on-close behavior. This is useful (e.g.) when we oportunistically
35 * suspend with the display running; if the lid is then closed, we want to
36 * wake up to turn the display off.
37 *
38 * This is controlled through a custom method in the XO-1.5 DSDT.
39 */
40static int set_lid_wake_behavior(bool wake_on_close)
41{
42 struct acpi_object_list arg_list;
43 union acpi_object arg;
44 acpi_status status;
45
46 arg_list.count = 1;
47 arg_list.pointer = &arg;
48 arg.type = ACPI_TYPE_INTEGER;
49 arg.integer.value = wake_on_close;
50
51 status = acpi_evaluate_object(NULL, "\\_SB.PCI0.LID.LIDW", &arg_list, NULL);
52 if (ACPI_FAILURE(status)) {
53 pr_warning(PFX "failed to set lid behavior\n");
54 return 1;
55 }
56
57 lid_wake_on_close = wake_on_close;
58
59 return 0;
60}
61
62static ssize_t
63lid_wake_on_close_show(struct kobject *s, struct kobj_attribute *attr, char *buf)
64{
65 return sprintf(buf, "%u\n", lid_wake_on_close);
66}
67
68static ssize_t lid_wake_on_close_store(struct kobject *s,
69 struct kobj_attribute *attr,
70 const char *buf, size_t n)
71{
72 unsigned int val;
73
74 if (sscanf(buf, "%u", &val) != 1)
75 return -EINVAL;
76
77 set_lid_wake_behavior(!!val);
78
79 return n;
80}
81
82static struct kobj_attribute lid_wake_on_close_attr =
83 __ATTR(lid_wake_on_close, 0644,
84 lid_wake_on_close_show,
85 lid_wake_on_close_store);
27 86
28static void battery_status_changed(void) 87static void battery_status_changed(void)
29{ 88{
@@ -91,6 +150,7 @@ static int xo15_sci_add(struct acpi_device *device)
91{ 150{
92 unsigned long long tmp; 151 unsigned long long tmp;
93 acpi_status status; 152 acpi_status status;
153 int r;
94 154
95 if (!device) 155 if (!device)
96 return -EINVAL; 156 return -EINVAL;
@@ -112,6 +172,10 @@ static int xo15_sci_add(struct acpi_device *device)
112 172
113 dev_info(&device->dev, "Initialized, GPE = 0x%lx\n", xo15_sci_gpe); 173 dev_info(&device->dev, "Initialized, GPE = 0x%lx\n", xo15_sci_gpe);
114 174
175 r = sysfs_create_file(&device->dev.kobj, &lid_wake_on_close_attr.attr);
176 if (r)
177 goto err_sysfs;
178
115 /* Flush queue, and enable all SCI events */ 179 /* Flush queue, and enable all SCI events */
116 process_sci_queue(); 180 process_sci_queue();
117 olpc_ec_mask_write(EC_SCI_SRC_ALL); 181 olpc_ec_mask_write(EC_SCI_SRC_ALL);
@@ -123,6 +187,11 @@ static int xo15_sci_add(struct acpi_device *device)
123 device_init_wakeup(&device->dev, true); 187 device_init_wakeup(&device->dev, true);
124 188
125 return 0; 189 return 0;
190
191err_sysfs:
192 acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
193 cancel_work_sync(&sci_work);
194 return r;
126} 195}
127 196
128static int xo15_sci_remove(struct acpi_device *device, int type) 197static int xo15_sci_remove(struct acpi_device *device, int type)
@@ -130,6 +199,7 @@ static int xo15_sci_remove(struct acpi_device *device, int type)
130 acpi_disable_gpe(NULL, xo15_sci_gpe); 199 acpi_disable_gpe(NULL, xo15_sci_gpe);
131 acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler); 200 acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
132 cancel_work_sync(&sci_work); 201 cancel_work_sync(&sci_work);
202 sysfs_remove_file(&device->dev.kobj, &lid_wake_on_close_attr.attr);
133 return 0; 203 return 0;
134} 204}
135 205
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 7cce722667b8..a4bee53c2e54 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -20,6 +20,8 @@
20#include <linux/platform_device.h> 20#include <linux/platform_device.h>
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/syscore_ops.h> 22#include <linux/syscore_ops.h>
23#include <linux/debugfs.h>
24#include <linux/mutex.h>
23 25
24#include <asm/geode.h> 26#include <asm/geode.h>
25#include <asm/setup.h> 27#include <asm/setup.h>
@@ -31,6 +33,15 @@ EXPORT_SYMBOL_GPL(olpc_platform_info);
31 33
32static DEFINE_SPINLOCK(ec_lock); 34static DEFINE_SPINLOCK(ec_lock);
33 35
36/* debugfs interface to EC commands */
37#define EC_MAX_CMD_ARGS (5 + 1) /* cmd byte + 5 args */
38#define EC_MAX_CMD_REPLY (8)
39
40static struct dentry *ec_debugfs_dir;
41static DEFINE_MUTEX(ec_debugfs_cmd_lock);
42static unsigned char ec_debugfs_resp[EC_MAX_CMD_REPLY];
43static unsigned int ec_debugfs_resp_bytes;
44
34/* EC event mask to be applied during suspend (defining wakeup sources). */ 45/* EC event mask to be applied during suspend (defining wakeup sources). */
35static u16 ec_wakeup_mask; 46static u16 ec_wakeup_mask;
36 47
@@ -269,6 +280,91 @@ int olpc_ec_sci_query(u16 *sci_value)
269} 280}
270EXPORT_SYMBOL_GPL(olpc_ec_sci_query); 281EXPORT_SYMBOL_GPL(olpc_ec_sci_query);
271 282
283static ssize_t ec_debugfs_cmd_write(struct file *file, const char __user *buf,
284 size_t size, loff_t *ppos)
285{
286 int i, m;
287 unsigned char ec_cmd[EC_MAX_CMD_ARGS];
288 unsigned int ec_cmd_int[EC_MAX_CMD_ARGS];
289 char cmdbuf[64];
290 int ec_cmd_bytes;
291
292 mutex_lock(&ec_debugfs_cmd_lock);
293
294 size = simple_write_to_buffer(cmdbuf, sizeof(cmdbuf), ppos, buf, size);
295
296 m = sscanf(cmdbuf, "%x:%u %x %x %x %x %x", &ec_cmd_int[0],
297 &ec_debugfs_resp_bytes,
298 &ec_cmd_int[1], &ec_cmd_int[2], &ec_cmd_int[3],
299 &ec_cmd_int[4], &ec_cmd_int[5]);
300 if (m < 2 || ec_debugfs_resp_bytes > EC_MAX_CMD_REPLY) {
301 /* reset to prevent overflow on read */
302 ec_debugfs_resp_bytes = 0;
303
304 printk(KERN_DEBUG "olpc-ec: bad ec cmd: "
305 "cmd:response-count [arg1 [arg2 ...]]\n");
306 size = -EINVAL;
307 goto out;
308 }
309
310 /* convert scanf'd ints to char */
311 ec_cmd_bytes = m - 2;
312 for (i = 0; i <= ec_cmd_bytes; i++)
313 ec_cmd[i] = ec_cmd_int[i];
314
315 printk(KERN_DEBUG "olpc-ec: debugfs cmd 0x%02x with %d args "
316 "%02x %02x %02x %02x %02x, want %d returns\n",
317 ec_cmd[0], ec_cmd_bytes, ec_cmd[1], ec_cmd[2], ec_cmd[3],
318 ec_cmd[4], ec_cmd[5], ec_debugfs_resp_bytes);
319
320 olpc_ec_cmd(ec_cmd[0], (ec_cmd_bytes == 0) ? NULL : &ec_cmd[1],
321 ec_cmd_bytes, ec_debugfs_resp, ec_debugfs_resp_bytes);
322
323 printk(KERN_DEBUG "olpc-ec: response "
324 "%02x %02x %02x %02x %02x %02x %02x %02x (%d bytes expected)\n",
325 ec_debugfs_resp[0], ec_debugfs_resp[1], ec_debugfs_resp[2],
326 ec_debugfs_resp[3], ec_debugfs_resp[4], ec_debugfs_resp[5],
327 ec_debugfs_resp[6], ec_debugfs_resp[7], ec_debugfs_resp_bytes);
328
329out:
330 mutex_unlock(&ec_debugfs_cmd_lock);
331 return size;
332}
333
334static ssize_t ec_debugfs_cmd_read(struct file *file, char __user *buf,
335 size_t size, loff_t *ppos)
336{
337 unsigned int i, r;
338 char *rp;
339 char respbuf[64];
340
341 mutex_lock(&ec_debugfs_cmd_lock);
342 rp = respbuf;
343 rp += sprintf(rp, "%02x", ec_debugfs_resp[0]);
344 for (i = 1; i < ec_debugfs_resp_bytes; i++)
345 rp += sprintf(rp, ", %02x", ec_debugfs_resp[i]);
346 mutex_unlock(&ec_debugfs_cmd_lock);
347 rp += sprintf(rp, "\n");
348
349 r = rp - respbuf;
350 return simple_read_from_buffer(buf, size, ppos, respbuf, r);
351}
352
353static const struct file_operations ec_debugfs_genops = {
354 .write = ec_debugfs_cmd_write,
355 .read = ec_debugfs_cmd_read,
356};
357
358static void setup_debugfs(void)
359{
360 ec_debugfs_dir = debugfs_create_dir("olpc-ec", 0);
361 if (ec_debugfs_dir == ERR_PTR(-ENODEV))
362 return;
363
364 debugfs_create_file("cmd", 0600, ec_debugfs_dir, NULL,
365 &ec_debugfs_genops);
366}
367
272static int olpc_ec_suspend(void) 368static int olpc_ec_suspend(void)
273{ 369{
274 return olpc_ec_mask_write(ec_wakeup_mask); 370 return olpc_ec_mask_write(ec_wakeup_mask);
@@ -372,6 +468,7 @@ static int __init olpc_init(void)
372 } 468 }
373 469
374 register_syscore_ops(&olpc_syscore_ops); 470 register_syscore_ops(&olpc_syscore_ops);
471 setup_debugfs();
375 472
376 return 0; 473 return 0;
377} 474}
diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
index 7e004acbe526..7a9ad30d6c9f 100644
--- a/arch/x86/platform/scx200/scx200_32.c
+++ b/arch/x86/platform/scx200/scx200_32.c
@@ -17,8 +17,6 @@
17/* Verify that the configuration block really is there */ 17/* Verify that the configuration block really is there */
18#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base)) 18#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
19 19
20#define NAME "scx200"
21
22MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>"); 20MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
23MODULE_DESCRIPTION("NatSemi SCx200 Driver"); 21MODULE_DESCRIPTION("NatSemi SCx200 Driver");
24MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
@@ -29,10 +27,10 @@ unsigned long scx200_gpio_shadow[2];
29unsigned scx200_cb_base = 0; 27unsigned scx200_cb_base = 0;
30 28
31static struct pci_device_id scx200_tbl[] = { 29static struct pci_device_id scx200_tbl[] = {
32 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) }, 30 { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
33 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) }, 31 { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
34 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) }, 32 { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
35 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) }, 33 { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
36 { }, 34 { },
37}; 35};
38MODULE_DEVICE_TABLE(pci,scx200_tbl); 36MODULE_DEVICE_TABLE(pci,scx200_tbl);
@@ -63,10 +61,11 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_
63 if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || 61 if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
64 pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) { 62 pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
65 base = pci_resource_start(pdev, 0); 63 base = pci_resource_start(pdev, 0);
66 printk(KERN_INFO NAME ": GPIO base 0x%x\n", base); 64 pr_info("GPIO base 0x%x\n", base);
67 65
68 if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) { 66 if (!request_region(base, SCx200_GPIO_SIZE,
69 printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n"); 67 "NatSemi SCx200 GPIO")) {
68 pr_err("can't allocate I/O for GPIOs\n");
70 return -EBUSY; 69 return -EBUSY;
71 } 70 }
72 71
@@ -82,11 +81,11 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_
82 if (scx200_cb_probe(base)) { 81 if (scx200_cb_probe(base)) {
83 scx200_cb_base = base; 82 scx200_cb_base = base;
84 } else { 83 } else {
85 printk(KERN_WARNING NAME ": Configuration Block not found\n"); 84 pr_warn("Configuration Block not found\n");
86 return -ENODEV; 85 return -ENODEV;
87 } 86 }
88 } 87 }
89 printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base); 88 pr_info("Configuration Block base 0x%x\n", scx200_cb_base);
90 } 89 }
91 90
92 return 0; 91 return 0;
@@ -111,8 +110,7 @@ u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits)
111 110
112static int __init scx200_init(void) 111static int __init scx200_init(void)
113{ 112{
114 printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); 113 pr_info("NatSemi SCx200 Driver\n");
115
116 return pci_register_driver(&scx200_pci_driver); 114 return pci_register_driver(&scx200_pci_driver);
117} 115}
118 116
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 5b552198f774..3ae0e61abd23 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -157,13 +157,14 @@ static int __init uvhub_to_first_apicid(int uvhub)
157 * clear of the Timeout bit (as well) will free the resource. No reply will 157 * clear of the Timeout bit (as well) will free the resource. No reply will
158 * be sent (the hardware will only do one reply per message). 158 * be sent (the hardware will only do one reply per message).
159 */ 159 */
160static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp) 160static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
161 int do_acknowledge)
161{ 162{
162 unsigned long dw; 163 unsigned long dw;
163 struct bau_pq_entry *msg; 164 struct bau_pq_entry *msg;
164 165
165 msg = mdp->msg; 166 msg = mdp->msg;
166 if (!msg->canceled) { 167 if (!msg->canceled && do_acknowledge) {
167 dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec; 168 dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
168 write_mmr_sw_ack(dw); 169 write_mmr_sw_ack(dw);
169 } 170 }
@@ -212,8 +213,8 @@ static void bau_process_retry_msg(struct msg_desc *mdp,
212 if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { 213 if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
213 unsigned long mr; 214 unsigned long mr;
214 /* 215 /*
215 * is the resource timed out? 216 * Is the resource timed out?
216 * make everyone ignore the cancelled message. 217 * Make everyone ignore the cancelled message.
217 */ 218 */
218 msg2->canceled = 1; 219 msg2->canceled = 1;
219 stat->d_canceled++; 220 stat->d_canceled++;
@@ -231,8 +232,8 @@ static void bau_process_retry_msg(struct msg_desc *mdp,
231 * Do all the things a cpu should do for a TLB shootdown message. 232 * Do all the things a cpu should do for a TLB shootdown message.
232 * Other cpu's may come here at the same time for this message. 233 * Other cpu's may come here at the same time for this message.
233 */ 234 */
234static void bau_process_message(struct msg_desc *mdp, 235static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
235 struct bau_control *bcp) 236 int do_acknowledge)
236{ 237{
237 short socket_ack_count = 0; 238 short socket_ack_count = 0;
238 short *sp; 239 short *sp;
@@ -284,8 +285,9 @@ static void bau_process_message(struct msg_desc *mdp,
284 if (msg_ack_count == bcp->cpus_in_uvhub) { 285 if (msg_ack_count == bcp->cpus_in_uvhub) {
285 /* 286 /*
286 * All cpus in uvhub saw it; reply 287 * All cpus in uvhub saw it; reply
288 * (unless we are in the UV2 workaround)
287 */ 289 */
288 reply_to_message(mdp, bcp); 290 reply_to_message(mdp, bcp, do_acknowledge);
289 } 291 }
290 } 292 }
291 293
@@ -491,27 +493,138 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
491/* 493/*
492 * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. 494 * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register.
493 */ 495 */
494static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu) 496static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
495{ 497{
496 unsigned long descriptor_status; 498 unsigned long descriptor_status;
497 unsigned long descriptor_status2; 499 unsigned long descriptor_status2;
498 500
499 descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); 501 descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK);
500 descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL; 502 descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL;
501 descriptor_status = (descriptor_status << 1) | descriptor_status2; 503 descriptor_status = (descriptor_status << 1) | descriptor_status2;
502 return descriptor_status; 504 return descriptor_status;
503} 505}
504 506
507/*
508 * Return whether the status of the descriptor that is normally used for this
509 * cpu (the one indexed by its hub-relative cpu number) is busy.
510 * The status of the original 32 descriptors is always reflected in the 64
511 * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0.
512 * The bit provided by the activation_status_2 register is irrelevant to
513 * the status if it is only being tested for busy or not busy.
514 */
515int normal_busy(struct bau_control *bcp)
516{
517 int cpu = bcp->uvhub_cpu;
518 int mmr_offset;
519 int right_shift;
520
521 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
522 right_shift = cpu * UV_ACT_STATUS_SIZE;
523 return (((((read_lmmr(mmr_offset) >> right_shift) &
524 UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
525}
526
527/*
528 * Entered when a bau descriptor has gone into a permanent busy wait because
529 * of a hardware bug.
530 * Workaround the bug.
531 */
532int handle_uv2_busy(struct bau_control *bcp)
533{
534 int busy_one = bcp->using_desc;
535 int normal = bcp->uvhub_cpu;
536 int selected = -1;
537 int i;
538 unsigned long descriptor_status;
539 unsigned long status;
540 int mmr_offset;
541 struct bau_desc *bau_desc_old;
542 struct bau_desc *bau_desc_new;
543 struct bau_control *hmaster = bcp->uvhub_master;
544 struct ptc_stats *stat = bcp->statp;
545 cycles_t ttm;
546
547 stat->s_uv2_wars++;
548 spin_lock(&hmaster->uvhub_lock);
549 /* try for the original first */
550 if (busy_one != normal) {
551 if (!normal_busy(bcp))
552 selected = normal;
553 }
554 if (selected < 0) {
555 /* can't use the normal, select an alternate */
556 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
557 descriptor_status = read_lmmr(mmr_offset);
558
559 /* scan available descriptors 32-63 */
560 for (i = 0; i < UV_CPUS_PER_AS; i++) {
561 if ((hmaster->inuse_map & (1 << i)) == 0) {
562 status = ((descriptor_status >>
563 (i * UV_ACT_STATUS_SIZE)) &
564 UV_ACT_STATUS_MASK) << 1;
565 if (status != UV2H_DESC_BUSY) {
566 selected = i + UV_CPUS_PER_AS;
567 break;
568 }
569 }
570 }
571 }
572
573 if (busy_one != normal)
574 /* mark the busy alternate as not in-use */
575 hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS));
576
577 if (selected >= 0) {
578 /* switch to the selected descriptor */
579 if (selected != normal) {
580 /* set the selected alternate as in-use */
581 hmaster->inuse_map |=
582 (1 << (selected - UV_CPUS_PER_AS));
583 if (selected > stat->s_uv2_wars_hw)
584 stat->s_uv2_wars_hw = selected;
585 }
586 bau_desc_old = bcp->descriptor_base;
587 bau_desc_old += (ITEMS_PER_DESC * busy_one);
588 bcp->using_desc = selected;
589 bau_desc_new = bcp->descriptor_base;
590 bau_desc_new += (ITEMS_PER_DESC * selected);
591 *bau_desc_new = *bau_desc_old;
592 } else {
593 /*
594 * All are busy. Wait for the normal one for this cpu to
595 * free up.
596 */
597 stat->s_uv2_war_waits++;
598 spin_unlock(&hmaster->uvhub_lock);
599 ttm = get_cycles();
600 do {
601 cpu_relax();
602 } while (normal_busy(bcp));
603 spin_lock(&hmaster->uvhub_lock);
604 /* switch to the original descriptor */
605 bcp->using_desc = normal;
606 bau_desc_old = bcp->descriptor_base;
607 bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc);
608 bcp->using_desc = (ITEMS_PER_DESC * normal);
609 bau_desc_new = bcp->descriptor_base;
610 bau_desc_new += (ITEMS_PER_DESC * normal);
611 *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */
612 }
613 spin_unlock(&hmaster->uvhub_lock);
614 return FLUSH_RETRY_BUSYBUG;
615}
616
505static int uv2_wait_completion(struct bau_desc *bau_desc, 617static int uv2_wait_completion(struct bau_desc *bau_desc,
506 unsigned long mmr_offset, int right_shift, 618 unsigned long mmr_offset, int right_shift,
507 struct bau_control *bcp, long try) 619 struct bau_control *bcp, long try)
508{ 620{
509 unsigned long descriptor_stat; 621 unsigned long descriptor_stat;
510 cycles_t ttm; 622 cycles_t ttm;
511 int cpu = bcp->uvhub_cpu; 623 int desc = bcp->using_desc;
624 long busy_reps = 0;
512 struct ptc_stats *stat = bcp->statp; 625 struct ptc_stats *stat = bcp->statp;
513 626
514 descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); 627 descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc);
515 628
516 /* spin on the status MMR, waiting for it to go idle */ 629 /* spin on the status MMR, waiting for it to go idle */
517 while (descriptor_stat != UV2H_DESC_IDLE) { 630 while (descriptor_stat != UV2H_DESC_IDLE) {
@@ -522,32 +635,35 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
522 * our message and its state will stay IDLE. 635 * our message and its state will stay IDLE.
523 */ 636 */
524 if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || 637 if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) ||
525 (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) ||
526 (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { 638 (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) {
527 stat->s_stimeout++; 639 stat->s_stimeout++;
528 return FLUSH_GIVEUP; 640 return FLUSH_GIVEUP;
641 } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) {
642 stat->s_strongnacks++;
643 bcp->conseccompletes = 0;
644 return FLUSH_GIVEUP;
529 } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { 645 } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
530 stat->s_dtimeout++; 646 stat->s_dtimeout++;
531 ttm = get_cycles();
532 /*
533 * Our retries may be blocked by all destination
534 * swack resources being consumed, and a timeout
535 * pending. In that case hardware returns the
536 * ERROR that looks like a destination timeout.
537 */
538 if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
539 bcp->conseccompletes = 0;
540 return FLUSH_RETRY_PLUGGED;
541 }
542 bcp->conseccompletes = 0; 647 bcp->conseccompletes = 0;
543 return FLUSH_RETRY_TIMEOUT; 648 return FLUSH_RETRY_TIMEOUT;
544 } else { 649 } else {
650 busy_reps++;
651 if (busy_reps > 1000000) {
652 /* not to hammer on the clock */
653 busy_reps = 0;
654 ttm = get_cycles();
655 if ((ttm - bcp->send_message) >
656 (bcp->clocks_per_100_usec)) {
657 return handle_uv2_busy(bcp);
658 }
659 }
545 /* 660 /*
546 * descriptor_stat is still BUSY 661 * descriptor_stat is still BUSY
547 */ 662 */
548 cpu_relax(); 663 cpu_relax();
549 } 664 }
550 descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); 665 descriptor_stat = uv2_read_status(mmr_offset, right_shift,
666 desc);
551 } 667 }
552 bcp->conseccompletes++; 668 bcp->conseccompletes++;
553 return FLUSH_COMPLETE; 669 return FLUSH_COMPLETE;
@@ -563,17 +679,17 @@ static int wait_completion(struct bau_desc *bau_desc,
563{ 679{
564 int right_shift; 680 int right_shift;
565 unsigned long mmr_offset; 681 unsigned long mmr_offset;
566 int cpu = bcp->uvhub_cpu; 682 int desc = bcp->using_desc;
567 683
568 if (cpu < UV_CPUS_PER_AS) { 684 if (desc < UV_CPUS_PER_AS) {
569 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; 685 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
570 right_shift = cpu * UV_ACT_STATUS_SIZE; 686 right_shift = desc * UV_ACT_STATUS_SIZE;
571 } else { 687 } else {
572 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; 688 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
573 right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE); 689 right_shift = ((desc - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
574 } 690 }
575 691
576 if (is_uv1_hub()) 692 if (bcp->uvhub_version == 1)
577 return uv1_wait_completion(bau_desc, mmr_offset, right_shift, 693 return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
578 bcp, try); 694 bcp, try);
579 else 695 else
@@ -752,19 +868,22 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
752 * Returns 1 if it gives up entirely and the original cpu mask is to be 868 * Returns 1 if it gives up entirely and the original cpu mask is to be
753 * returned to the kernel. 869 * returned to the kernel.
754 */ 870 */
755int uv_flush_send_and_wait(struct bau_desc *bau_desc, 871int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
756 struct cpumask *flush_mask, struct bau_control *bcp)
757{ 872{
758 int seq_number = 0; 873 int seq_number = 0;
759 int completion_stat = 0; 874 int completion_stat = 0;
875 int uv1 = 0;
760 long try = 0; 876 long try = 0;
761 unsigned long index; 877 unsigned long index;
762 cycles_t time1; 878 cycles_t time1;
763 cycles_t time2; 879 cycles_t time2;
764 struct ptc_stats *stat = bcp->statp; 880 struct ptc_stats *stat = bcp->statp;
765 struct bau_control *hmaster = bcp->uvhub_master; 881 struct bau_control *hmaster = bcp->uvhub_master;
882 struct uv1_bau_msg_header *uv1_hdr = NULL;
883 struct uv2_bau_msg_header *uv2_hdr = NULL;
884 struct bau_desc *bau_desc;
766 885
767 if (is_uv1_hub()) 886 if (bcp->uvhub_version == 1)
768 uv1_throttle(hmaster, stat); 887 uv1_throttle(hmaster, stat);
769 888
770 while (hmaster->uvhub_quiesce) 889 while (hmaster->uvhub_quiesce)
@@ -772,22 +891,39 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
772 891
773 time1 = get_cycles(); 892 time1 = get_cycles();
774 do { 893 do {
775 if (try == 0) { 894 bau_desc = bcp->descriptor_base;
776 bau_desc->header.msg_type = MSG_REGULAR; 895 bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
896 if (bcp->uvhub_version == 1) {
897 uv1 = 1;
898 uv1_hdr = &bau_desc->header.uv1_hdr;
899 } else
900 uv2_hdr = &bau_desc->header.uv2_hdr;
901 if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) {
902 if (uv1)
903 uv1_hdr->msg_type = MSG_REGULAR;
904 else
905 uv2_hdr->msg_type = MSG_REGULAR;
777 seq_number = bcp->message_number++; 906 seq_number = bcp->message_number++;
778 } else { 907 } else {
779 bau_desc->header.msg_type = MSG_RETRY; 908 if (uv1)
909 uv1_hdr->msg_type = MSG_RETRY;
910 else
911 uv2_hdr->msg_type = MSG_RETRY;
780 stat->s_retry_messages++; 912 stat->s_retry_messages++;
781 } 913 }
782 914
783 bau_desc->header.sequence = seq_number; 915 if (uv1)
784 index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; 916 uv1_hdr->sequence = seq_number;
917 else
918 uv2_hdr->sequence = seq_number;
919 index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc;
785 bcp->send_message = get_cycles(); 920 bcp->send_message = get_cycles();
786 921
787 write_mmr_activation(index); 922 write_mmr_activation(index);
788 923
789 try++; 924 try++;
790 completion_stat = wait_completion(bau_desc, bcp, try); 925 completion_stat = wait_completion(bau_desc, bcp, try);
926 /* UV2: wait_completion() may change the bcp->using_desc */
791 927
792 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); 928 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
793 929
@@ -798,6 +934,7 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
798 } 934 }
799 cpu_relax(); 935 cpu_relax();
800 } while ((completion_stat == FLUSH_RETRY_PLUGGED) || 936 } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
937 (completion_stat == FLUSH_RETRY_BUSYBUG) ||
801 (completion_stat == FLUSH_RETRY_TIMEOUT)); 938 (completion_stat == FLUSH_RETRY_TIMEOUT));
802 939
803 time2 = get_cycles(); 940 time2 = get_cycles();
@@ -812,6 +949,7 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
812 record_send_stats(time1, time2, bcp, stat, completion_stat, try); 949 record_send_stats(time1, time2, bcp, stat, completion_stat, try);
813 950
814 if (completion_stat == FLUSH_GIVEUP) 951 if (completion_stat == FLUSH_GIVEUP)
952 /* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
815 return 1; 953 return 1;
816 return 0; 954 return 0;
817} 955}
@@ -967,7 +1105,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
967 stat->s_ntargself++; 1105 stat->s_ntargself++;
968 1106
969 bau_desc = bcp->descriptor_base; 1107 bau_desc = bcp->descriptor_base;
970 bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu; 1108 bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
971 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 1109 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
972 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) 1110 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
973 return NULL; 1111 return NULL;
@@ -980,13 +1118,86 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
980 * uv_flush_send_and_wait returns 0 if all cpu's were messaged, 1118 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
981 * or 1 if it gave up and the original cpumask should be returned. 1119 * or 1 if it gave up and the original cpumask should be returned.
982 */ 1120 */
983 if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) 1121 if (!uv_flush_send_and_wait(flush_mask, bcp))
984 return NULL; 1122 return NULL;
985 else 1123 else
986 return cpumask; 1124 return cpumask;
987} 1125}
988 1126
989/* 1127/*
1128 * Search the message queue for any 'other' message with the same software
1129 * acknowledge resource bit vector.
1130 */
1131struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
1132 struct bau_control *bcp, unsigned char swack_vec)
1133{
1134 struct bau_pq_entry *msg_next = msg + 1;
1135
1136 if (msg_next > bcp->queue_last)
1137 msg_next = bcp->queue_first;
1138 while ((msg_next->swack_vec != 0) && (msg_next != msg)) {
1139 if (msg_next->swack_vec == swack_vec)
1140 return msg_next;
1141 msg_next++;
1142 if (msg_next > bcp->queue_last)
1143 msg_next = bcp->queue_first;
1144 }
1145 return NULL;
1146}
1147
1148/*
1149 * UV2 needs to work around a bug in which an arriving message has not
1150 * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register.
1151 * Such a message must be ignored.
1152 */
1153void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
1154{
1155 unsigned long mmr_image;
1156 unsigned char swack_vec;
1157 struct bau_pq_entry *msg = mdp->msg;
1158 struct bau_pq_entry *other_msg;
1159
1160 mmr_image = read_mmr_sw_ack();
1161 swack_vec = msg->swack_vec;
1162
1163 if ((swack_vec & mmr_image) == 0) {
1164 /*
1165 * This message was assigned a swack resource, but no
1166 * reserved acknowlegment is pending.
1167 * The bug has prevented this message from setting the MMR.
1168 * And no other message has used the same sw_ack resource.
1169 * Do the requested shootdown but do not reply to the msg.
1170 * (the 0 means make no acknowledge)
1171 */
1172 bau_process_message(mdp, bcp, 0);
1173 return;
1174 }
1175
1176 /*
1177 * Some message has set the MMR 'pending' bit; it might have been
1178 * another message. Look for that message.
1179 */
1180 other_msg = find_another_by_swack(msg, bcp, msg->swack_vec);
1181 if (other_msg) {
1182 /* There is another. Do not ack the current one. */
1183 bau_process_message(mdp, bcp, 0);
1184 /*
1185 * Let the natural processing of that message acknowledge
1186 * it. Don't get the processing of sw_ack's out of order.
1187 */
1188 return;
1189 }
1190
1191 /*
1192 * There is no other message using this sw_ack, so it is safe to
1193 * acknowledge it.
1194 */
1195 bau_process_message(mdp, bcp, 1);
1196
1197 return;
1198}
1199
1200/*
990 * The BAU message interrupt comes here. (registered by set_intr_gate) 1201 * The BAU message interrupt comes here. (registered by set_intr_gate)
991 * See entry_64.S 1202 * See entry_64.S
992 * 1203 *
@@ -1009,6 +1220,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
1009 struct ptc_stats *stat; 1220 struct ptc_stats *stat;
1010 struct msg_desc msgdesc; 1221 struct msg_desc msgdesc;
1011 1222
1223 ack_APIC_irq();
1012 time_start = get_cycles(); 1224 time_start = get_cycles();
1013 1225
1014 bcp = &per_cpu(bau_control, smp_processor_id()); 1226 bcp = &per_cpu(bau_control, smp_processor_id());
@@ -1022,9 +1234,11 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
1022 count++; 1234 count++;
1023 1235
1024 msgdesc.msg_slot = msg - msgdesc.queue_first; 1236 msgdesc.msg_slot = msg - msgdesc.queue_first;
1025 msgdesc.swack_slot = ffs(msg->swack_vec) - 1;
1026 msgdesc.msg = msg; 1237 msgdesc.msg = msg;
1027 bau_process_message(&msgdesc, bcp); 1238 if (bcp->uvhub_version == 2)
1239 process_uv2_message(&msgdesc, bcp);
1240 else
1241 bau_process_message(&msgdesc, bcp, 1);
1028 1242
1029 msg++; 1243 msg++;
1030 if (msg > msgdesc.queue_last) 1244 if (msg > msgdesc.queue_last)
@@ -1036,8 +1250,6 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
1036 stat->d_nomsg++; 1250 stat->d_nomsg++;
1037 else if (count > 1) 1251 else if (count > 1)
1038 stat->d_multmsg++; 1252 stat->d_multmsg++;
1039
1040 ack_APIC_irq();
1041} 1253}
1042 1254
1043/* 1255/*
@@ -1083,7 +1295,7 @@ static void __init enable_timeouts(void)
1083 */ 1295 */
1084 mmr_image |= (1L << SOFTACK_MSHIFT); 1296 mmr_image |= (1L << SOFTACK_MSHIFT);
1085 if (is_uv2_hub()) { 1297 if (is_uv2_hub()) {
1086 mmr_image |= (1L << UV2_LEG_SHFT); 1298 mmr_image &= ~(1L << UV2_LEG_SHFT);
1087 mmr_image |= (1L << UV2_EXT_SHFT); 1299 mmr_image |= (1L << UV2_EXT_SHFT);
1088 } 1300 }
1089 write_mmr_misc_control(pnode, mmr_image); 1301 write_mmr_misc_control(pnode, mmr_image);
@@ -1136,13 +1348,13 @@ static int ptc_seq_show(struct seq_file *file, void *data)
1136 seq_printf(file, 1348 seq_printf(file,
1137 "remotehub numuvhubs numuvhubs16 numuvhubs8 "); 1349 "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
1138 seq_printf(file, 1350 seq_printf(file,
1139 "numuvhubs4 numuvhubs2 numuvhubs1 dto retries rok "); 1351 "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok ");
1140 seq_printf(file, 1352 seq_printf(file,
1141 "resetp resett giveup sto bz throt swack recv rtime "); 1353 "resetp resett giveup sto bz throt swack recv rtime ");
1142 seq_printf(file, 1354 seq_printf(file,
1143 "all one mult none retry canc nocan reset rcan "); 1355 "all one mult none retry canc nocan reset rcan ");
1144 seq_printf(file, 1356 seq_printf(file,
1145 "disable enable\n"); 1357 "disable enable wars warshw warwaits\n");
1146 } 1358 }
1147 if (cpu < num_possible_cpus() && cpu_online(cpu)) { 1359 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
1148 stat = &per_cpu(ptcstats, cpu); 1360 stat = &per_cpu(ptcstats, cpu);
@@ -1154,10 +1366,10 @@ static int ptc_seq_show(struct seq_file *file, void *data)
1154 stat->s_ntargremotes, stat->s_ntargcpu, 1366 stat->s_ntargremotes, stat->s_ntargcpu,
1155 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, 1367 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
1156 stat->s_ntarguvhub, stat->s_ntarguvhub16); 1368 stat->s_ntarguvhub, stat->s_ntarguvhub16);
1157 seq_printf(file, "%ld %ld %ld %ld %ld ", 1369 seq_printf(file, "%ld %ld %ld %ld %ld %ld ",
1158 stat->s_ntarguvhub8, stat->s_ntarguvhub4, 1370 stat->s_ntarguvhub8, stat->s_ntarguvhub4,
1159 stat->s_ntarguvhub2, stat->s_ntarguvhub1, 1371 stat->s_ntarguvhub2, stat->s_ntarguvhub1,
1160 stat->s_dtimeout); 1372 stat->s_dtimeout, stat->s_strongnacks);
1161 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", 1373 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
1162 stat->s_retry_messages, stat->s_retriesok, 1374 stat->s_retry_messages, stat->s_retriesok,
1163 stat->s_resets_plug, stat->s_resets_timeout, 1375 stat->s_resets_plug, stat->s_resets_timeout,
@@ -1173,8 +1385,10 @@ static int ptc_seq_show(struct seq_file *file, void *data)
1173 stat->d_nomsg, stat->d_retries, stat->d_canceled, 1385 stat->d_nomsg, stat->d_retries, stat->d_canceled,
1174 stat->d_nocanceled, stat->d_resets, 1386 stat->d_nocanceled, stat->d_resets,
1175 stat->d_rcanceled); 1387 stat->d_rcanceled);
1176 seq_printf(file, "%ld %ld\n", 1388 seq_printf(file, "%ld %ld %ld %ld %ld\n",
1177 stat->s_bau_disabled, stat->s_bau_reenabled); 1389 stat->s_bau_disabled, stat->s_bau_reenabled,
1390 stat->s_uv2_wars, stat->s_uv2_wars_hw,
1391 stat->s_uv2_war_waits);
1178 } 1392 }
1179 return 0; 1393 return 0;
1180} 1394}
@@ -1432,12 +1646,15 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
1432{ 1646{
1433 int i; 1647 int i;
1434 int cpu; 1648 int cpu;
1649 int uv1 = 0;
1435 unsigned long gpa; 1650 unsigned long gpa;
1436 unsigned long m; 1651 unsigned long m;
1437 unsigned long n; 1652 unsigned long n;
1438 size_t dsize; 1653 size_t dsize;
1439 struct bau_desc *bau_desc; 1654 struct bau_desc *bau_desc;
1440 struct bau_desc *bd2; 1655 struct bau_desc *bd2;
1656 struct uv1_bau_msg_header *uv1_hdr;
1657 struct uv2_bau_msg_header *uv2_hdr;
1441 struct bau_control *bcp; 1658 struct bau_control *bcp;
1442 1659
1443 /* 1660 /*
@@ -1451,6 +1668,8 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
1451 gpa = uv_gpa(bau_desc); 1668 gpa = uv_gpa(bau_desc);
1452 n = uv_gpa_to_gnode(gpa); 1669 n = uv_gpa_to_gnode(gpa);
1453 m = uv_gpa_to_offset(gpa); 1670 m = uv_gpa_to_offset(gpa);
1671 if (is_uv1_hub())
1672 uv1 = 1;
1454 1673
1455 /* the 14-bit pnode */ 1674 /* the 14-bit pnode */
1456 write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); 1675 write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
@@ -1461,21 +1680,33 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
1461 */ 1680 */
1462 for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) { 1681 for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
1463 memset(bd2, 0, sizeof(struct bau_desc)); 1682 memset(bd2, 0, sizeof(struct bau_desc));
1464 bd2->header.swack_flag = 1; 1683 if (uv1) {
1465 /* 1684 uv1_hdr = &bd2->header.uv1_hdr;
1466 * The base_dest_nasid set in the message header is the nasid 1685 uv1_hdr->swack_flag = 1;
1467 * of the first uvhub in the partition. The bit map will 1686 /*
1468 * indicate destination pnode numbers relative to that base. 1687 * The base_dest_nasid set in the message header
1469 * They may not be consecutive if nasid striding is being used. 1688 * is the nasid of the first uvhub in the partition.
1470 */ 1689 * The bit map will indicate destination pnode numbers
1471 bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); 1690 * relative to that base. They may not be consecutive
1472 bd2->header.dest_subnodeid = UV_LB_SUBNODEID; 1691 * if nasid striding is being used.
1473 bd2->header.command = UV_NET_ENDPOINT_INTD; 1692 */
1474 bd2->header.int_both = 1; 1693 uv1_hdr->base_dest_nasid =
1475 /* 1694 UV_PNODE_TO_NASID(base_pnode);
1476 * all others need to be set to zero: 1695 uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID;
1477 * fairness chaining multilevel count replied_to 1696 uv1_hdr->command = UV_NET_ENDPOINT_INTD;
1478 */ 1697 uv1_hdr->int_both = 1;
1698 /*
1699 * all others need to be set to zero:
1700 * fairness chaining multilevel count replied_to
1701 */
1702 } else {
1703 uv2_hdr = &bd2->header.uv2_hdr;
1704 uv2_hdr->swack_flag = 1;
1705 uv2_hdr->base_dest_nasid =
1706 UV_PNODE_TO_NASID(base_pnode);
1707 uv2_hdr->dest_subnodeid = UV_LB_SUBNODEID;
1708 uv2_hdr->command = UV_NET_ENDPOINT_INTD;
1709 }
1479 } 1710 }
1480 for_each_present_cpu(cpu) { 1711 for_each_present_cpu(cpu) {
1481 if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) 1712 if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
@@ -1531,6 +1762,7 @@ static void pq_init(int node, int pnode)
1531 write_mmr_payload_first(pnode, pn_first); 1762 write_mmr_payload_first(pnode, pn_first);
1532 write_mmr_payload_tail(pnode, first); 1763 write_mmr_payload_tail(pnode, first);
1533 write_mmr_payload_last(pnode, last); 1764 write_mmr_payload_last(pnode, last);
1765 write_gmmr_sw_ack(pnode, 0xffffUL);
1534 1766
1535 /* in effect, all msg_type's are set to MSG_NOOP */ 1767 /* in effect, all msg_type's are set to MSG_NOOP */
1536 memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); 1768 memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
@@ -1584,14 +1816,14 @@ static int calculate_destination_timeout(void)
1584 ts_ns = base * mult1 * mult2; 1816 ts_ns = base * mult1 * mult2;
1585 ret = ts_ns / 1000; 1817 ret = ts_ns / 1000;
1586 } else { 1818 } else {
1587 /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */ 1819 /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */
1588 mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); 1820 mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
1589 mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; 1821 mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
1590 if (mmr_image & (1L << UV2_ACK_UNITS_SHFT)) 1822 if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
1591 mult1 = 80; 1823 base = 80;
1592 else 1824 else
1593 mult1 = 10; 1825 base = 10;
1594 base = mmr_image & UV2_ACK_MASK; 1826 mult1 = mmr_image & UV2_ACK_MASK;
1595 ret = mult1 * base; 1827 ret = mult1 * base;
1596 } 1828 }
1597 return ret; 1829 return ret;
@@ -1618,6 +1850,9 @@ static void __init init_per_cpu_tunables(void)
1618 bcp->cong_response_us = congested_respns_us; 1850 bcp->cong_response_us = congested_respns_us;
1619 bcp->cong_reps = congested_reps; 1851 bcp->cong_reps = congested_reps;
1620 bcp->cong_period = congested_period; 1852 bcp->cong_period = congested_period;
1853 bcp->clocks_per_100_usec = usec_2_cycles(100);
1854 spin_lock_init(&bcp->queue_lock);
1855 spin_lock_init(&bcp->uvhub_lock);
1621 } 1856 }
1622} 1857}
1623 1858
@@ -1728,8 +1963,17 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
1728 bcp->cpus_in_socket = sdp->num_cpus; 1963 bcp->cpus_in_socket = sdp->num_cpus;
1729 bcp->socket_master = *smasterp; 1964 bcp->socket_master = *smasterp;
1730 bcp->uvhub = bdp->uvhub; 1965 bcp->uvhub = bdp->uvhub;
1966 if (is_uv1_hub())
1967 bcp->uvhub_version = 1;
1968 else if (is_uv2_hub())
1969 bcp->uvhub_version = 2;
1970 else {
1971 printk(KERN_EMERG "uvhub version not 1 or 2\n");
1972 return 1;
1973 }
1731 bcp->uvhub_master = *hmasterp; 1974 bcp->uvhub_master = *hmasterp;
1732 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; 1975 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
1976 bcp->using_desc = bcp->uvhub_cpu;
1733 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { 1977 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
1734 printk(KERN_EMERG "%d cpus per uvhub invalid\n", 1978 printk(KERN_EMERG "%d cpus per uvhub invalid\n",
1735 bcp->uvhub_cpu); 1979 bcp->uvhub_cpu);
@@ -1845,6 +2089,8 @@ static int __init uv_bau_init(void)
1845 uv_base_pnode = uv_blade_to_pnode(uvhub); 2089 uv_base_pnode = uv_blade_to_pnode(uvhub);
1846 } 2090 }
1847 2091
2092 enable_timeouts();
2093
1848 if (init_per_cpu(nuvhubs, uv_base_pnode)) { 2094 if (init_per_cpu(nuvhubs, uv_base_pnode)) {
1849 nobau = 1; 2095 nobau = 1;
1850 return 0; 2096 return 0;
@@ -1855,7 +2101,6 @@ static int __init uv_bau_init(void)
1855 if (uv_blade_nr_possible_cpus(uvhub)) 2101 if (uv_blade_nr_possible_cpus(uvhub))
1856 init_uvhub(uvhub, vector, uv_base_pnode); 2102 init_uvhub(uvhub, vector, uv_base_pnode);
1857 2103
1858 enable_timeouts();
1859 alloc_intr_gate(vector, uv_bau_message_intr1); 2104 alloc_intr_gate(vector, uv_bau_message_intr1);
1860 2105
1861 for_each_possible_blade(uvhub) { 2106 for_each_possible_blade(uvhub) {
@@ -1867,7 +2112,8 @@ static int __init uv_bau_init(void)
1867 val = 1L << 63; 2112 val = 1L << 63;
1868 write_gmmr_activation(pnode, val); 2113 write_gmmr_activation(pnode, val);
1869 mmr = 1; /* should be 1 to broadcast to both sockets */ 2114 mmr = 1; /* should be 1 to broadcast to both sockets */
1870 write_mmr_data_broadcast(pnode, mmr); 2115 if (!is_uv1_hub())
2116 write_mmr_data_broadcast(pnode, mmr);
1871 } 2117 }
1872 } 2118 }
1873 2119
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 374a05d8ad22..f25c2765a5c9 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -25,7 +25,7 @@ struct uv_irq_2_mmr_pnode{
25 int irq; 25 int irq;
26}; 26};
27 27
28static spinlock_t uv_irq_lock; 28static DEFINE_SPINLOCK(uv_irq_lock);
29static struct rb_root uv_irq_root; 29static struct rb_root uv_irq_root;
30 30
31static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); 31static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 9f29a01ee1b3..5032e0d19b86 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -37,7 +37,7 @@ static void uv_rtc_timer_setup(enum clock_event_mode,
37 37
38static struct clocksource clocksource_uv = { 38static struct clocksource clocksource_uv = {
39 .name = RTC_NAME, 39 .name = RTC_NAME,
40 .rating = 400, 40 .rating = 299,
41 .read = uv_read_rtc, 41 .read = uv_read_rtc,
42 .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, 42 .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
43 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 43 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -379,10 +379,6 @@ static __init int uv_rtc_setup_clock(void)
379 if (!is_uv_system()) 379 if (!is_uv_system())
380 return -ENODEV; 380 return -ENODEV;
381 381
382 /* If single blade, prefer tsc */
383 if (uv_num_possible_blades() == 1)
384 clocksource_uv.rating = 250;
385
386 rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second); 382 rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second);
387 if (rc) 383 if (rc)
388 printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); 384 printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index f10c0afa1cb4..218cdb16163c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -20,6 +20,7 @@
20#include <asm/xcr.h> 20#include <asm/xcr.h>
21#include <asm/suspend.h> 21#include <asm/suspend.h>
22#include <asm/debugreg.h> 22#include <asm/debugreg.h>
23#include <asm/fpu-internal.h> /* pcntxt_mask */
23 24
24#ifdef CONFIG_X86_32 25#ifdef CONFIG_X86_32
25static struct saved_context saved_context; 26static struct saved_context saved_context;
@@ -114,7 +115,7 @@ static void __save_processor_state(struct saved_context *ctxt)
114void save_processor_state(void) 115void save_processor_state(void)
115{ 116{
116 __save_processor_state(&saved_context); 117 __save_processor_state(&saved_context);
117 save_sched_clock_state(); 118 x86_platform.save_sched_clock_state();
118} 119}
119#ifdef CONFIG_X86_32 120#ifdef CONFIG_X86_32
120EXPORT_SYMBOL(save_processor_state); 121EXPORT_SYMBOL(save_processor_state);
@@ -224,6 +225,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
224 fix_processor_context(); 225 fix_processor_context();
225 226
226 do_fpu_end(); 227 do_fpu_end();
228 x86_platform.restore_sched_clock_state();
227 mtrr_bp_restore(); 229 mtrr_bp_restore();
228} 230}
229 231
@@ -231,7 +233,6 @@ static void __restore_processor_state(struct saved_context *ctxt)
231void restore_processor_state(void) 233void restore_processor_state(void)
232{ 234{
233 __restore_processor_state(&saved_context); 235 __restore_processor_state(&saved_context);
234 restore_sched_clock_state();
235} 236}
236#ifdef CONFIG_X86_32 237#ifdef CONFIG_X86_32
237EXPORT_SYMBOL(restore_processor_state); 238EXPORT_SYMBOL(restore_processor_state);
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 3769079874d8..74202c1910cd 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -10,7 +10,6 @@
10#include <linux/suspend.h> 10#include <linux/suspend.h>
11#include <linux/bootmem.h> 11#include <linux/bootmem.h>
12 12
13#include <asm/system.h>
14#include <asm/page.h> 13#include <asm/page.h>
15#include <asm/pgtable.h> 14#include <asm/pgtable.h>
16#include <asm/mmzone.h> 15#include <asm/mmzone.h>
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
new file mode 100644
index 000000000000..3236aebc828d
--- /dev/null
+++ b/arch/x86/syscalls/Makefile
@@ -0,0 +1,55 @@
1out := $(obj)/../include/generated/asm
2
3# Create output directory if not already present
4_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
5
6syscall32 := $(srctree)/$(src)/syscall_32.tbl
7syscall64 := $(srctree)/$(src)/syscall_64.tbl
8
9syshdr := $(srctree)/$(src)/syscallhdr.sh
10systbl := $(srctree)/$(src)/syscalltbl.sh
11
12quiet_cmd_syshdr = SYSHDR $@
13 cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
14 '$(syshdr_abi_$(basetarget))' \
15 '$(syshdr_pfx_$(basetarget))' \
16 '$(syshdr_offset_$(basetarget))'
17quiet_cmd_systbl = SYSTBL $@
18 cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
19
20syshdr_abi_unistd_32 := i386
21$(out)/unistd_32.h: $(syscall32) $(syshdr)
22 $(call if_changed,syshdr)
23
24syshdr_abi_unistd_32_ia32 := i386
25syshdr_pfx_unistd_32_ia32 := ia32_
26$(out)/unistd_32_ia32.h: $(syscall32) $(syshdr)
27 $(call if_changed,syshdr)
28
29syshdr_abi_unistd_x32 := common,x32
30syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT
31$(out)/unistd_x32.h: $(syscall64) $(syshdr)
32 $(call if_changed,syshdr)
33
34syshdr_abi_unistd_64 := common,64
35$(out)/unistd_64.h: $(syscall64) $(syshdr)
36 $(call if_changed,syshdr)
37
38syshdr_abi_unistd_64_x32 := x32
39syshdr_pfx_unistd_64_x32 := x32_
40$(out)/unistd_64_x32.h: $(syscall64) $(syshdr)
41 $(call if_changed,syshdr)
42
43$(out)/syscalls_32.h: $(syscall32) $(systbl)
44 $(call if_changed,systbl)
45$(out)/syscalls_64.h: $(syscall64) $(systbl)
46 $(call if_changed,systbl)
47
48syshdr-y += unistd_32.h unistd_64.h unistd_x32.h
49syshdr-y += syscalls_32.h
50syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h
51syshdr-$(CONFIG_X86_64) += syscalls_64.h
52
53targets += $(syshdr-y)
54
55all: $(addprefix $(out)/,$(targets))
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
new file mode 100644
index 000000000000..29f9f0554f7d
--- /dev/null
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -0,0 +1,357 @@
1#
2# 32-bit system call numbers and entry vectors
3#
4# The format is:
5# <number> <abi> <name> <entry point> <compat entry point>
6#
7# The abi is always "i386" for this file.
8#
90 i386 restart_syscall sys_restart_syscall
101 i386 exit sys_exit
112 i386 fork ptregs_fork stub32_fork
123 i386 read sys_read
134 i386 write sys_write
145 i386 open sys_open compat_sys_open
156 i386 close sys_close
167 i386 waitpid sys_waitpid sys32_waitpid
178 i386 creat sys_creat
189 i386 link sys_link
1910 i386 unlink sys_unlink
2011 i386 execve ptregs_execve stub32_execve
2112 i386 chdir sys_chdir
2213 i386 time sys_time compat_sys_time
2314 i386 mknod sys_mknod
2415 i386 chmod sys_chmod
2516 i386 lchown sys_lchown16
2617 i386 break
2718 i386 oldstat sys_stat
2819 i386 lseek sys_lseek sys32_lseek
2920 i386 getpid sys_getpid
3021 i386 mount sys_mount compat_sys_mount
3122 i386 umount sys_oldumount
3223 i386 setuid sys_setuid16
3324 i386 getuid sys_getuid16
3425 i386 stime sys_stime compat_sys_stime
3526 i386 ptrace sys_ptrace compat_sys_ptrace
3627 i386 alarm sys_alarm
3728 i386 oldfstat sys_fstat
3829 i386 pause sys_pause
3930 i386 utime sys_utime compat_sys_utime
4031 i386 stty
4132 i386 gtty
4233 i386 access sys_access
4334 i386 nice sys_nice
4435 i386 ftime
4536 i386 sync sys_sync
4637 i386 kill sys_kill sys32_kill
4738 i386 rename sys_rename
4839 i386 mkdir sys_mkdir
4940 i386 rmdir sys_rmdir
5041 i386 dup sys_dup
5142 i386 pipe sys_pipe
5243 i386 times sys_times compat_sys_times
5344 i386 prof
5445 i386 brk sys_brk
5546 i386 setgid sys_setgid16
5647 i386 getgid sys_getgid16
5748 i386 signal sys_signal
5849 i386 geteuid sys_geteuid16
5950 i386 getegid sys_getegid16
6051 i386 acct sys_acct
6152 i386 umount2 sys_umount
6253 i386 lock
6354 i386 ioctl sys_ioctl compat_sys_ioctl
6455 i386 fcntl sys_fcntl compat_sys_fcntl64
6556 i386 mpx
6657 i386 setpgid sys_setpgid
6758 i386 ulimit
6859 i386 oldolduname sys_olduname
6960 i386 umask sys_umask
7061 i386 chroot sys_chroot
7162 i386 ustat sys_ustat compat_sys_ustat
7263 i386 dup2 sys_dup2
7364 i386 getppid sys_getppid
7465 i386 getpgrp sys_getpgrp
7566 i386 setsid sys_setsid
7667 i386 sigaction sys_sigaction sys32_sigaction
7768 i386 sgetmask sys_sgetmask
7869 i386 ssetmask sys_ssetmask
7970 i386 setreuid sys_setreuid16
8071 i386 setregid sys_setregid16
8172 i386 sigsuspend sys_sigsuspend sys32_sigsuspend
8273 i386 sigpending sys_sigpending compat_sys_sigpending
8374 i386 sethostname sys_sethostname
8475 i386 setrlimit sys_setrlimit compat_sys_setrlimit
8576 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
8677 i386 getrusage sys_getrusage compat_sys_getrusage
8778 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday
8879 i386 settimeofday sys_settimeofday compat_sys_settimeofday
8980 i386 getgroups sys_getgroups16
9081 i386 setgroups sys_setgroups16
9182 i386 select sys_old_select compat_sys_old_select
9283 i386 symlink sys_symlink
9384 i386 oldlstat sys_lstat
9485 i386 readlink sys_readlink
9586 i386 uselib sys_uselib
9687 i386 swapon sys_swapon
9788 i386 reboot sys_reboot
9889 i386 readdir sys_old_readdir compat_sys_old_readdir
9990 i386 mmap sys_old_mmap sys32_mmap
10091 i386 munmap sys_munmap
10192 i386 truncate sys_truncate
10293 i386 ftruncate sys_ftruncate
10394 i386 fchmod sys_fchmod
10495 i386 fchown sys_fchown16
10596 i386 getpriority sys_getpriority
10697 i386 setpriority sys_setpriority
10798 i386 profil
10899 i386 statfs sys_statfs compat_sys_statfs
109100 i386 fstatfs sys_fstatfs compat_sys_fstatfs
110101 i386 ioperm sys_ioperm
111102 i386 socketcall sys_socketcall compat_sys_socketcall
112103 i386 syslog sys_syslog
113104 i386 setitimer sys_setitimer compat_sys_setitimer
114105 i386 getitimer sys_getitimer compat_sys_getitimer
115106 i386 stat sys_newstat compat_sys_newstat
116107 i386 lstat sys_newlstat compat_sys_newlstat
117108 i386 fstat sys_newfstat compat_sys_newfstat
118109 i386 olduname sys_uname
119110 i386 iopl ptregs_iopl stub32_iopl
120111 i386 vhangup sys_vhangup
121112 i386 idle
122113 i386 vm86old ptregs_vm86old sys32_vm86_warning
123114 i386 wait4 sys_wait4 compat_sys_wait4
124115 i386 swapoff sys_swapoff
125116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
126117 i386 ipc sys_ipc sys32_ipc
127118 i386 fsync sys_fsync
128119 i386 sigreturn ptregs_sigreturn stub32_sigreturn
129120 i386 clone ptregs_clone stub32_clone
130121 i386 setdomainname sys_setdomainname
131122 i386 uname sys_newuname
132123 i386 modify_ldt sys_modify_ldt
133124 i386 adjtimex sys_adjtimex compat_sys_adjtimex
134125 i386 mprotect sys_mprotect sys32_mprotect
135126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask
136127 i386 create_module
137128 i386 init_module sys_init_module
138129 i386 delete_module sys_delete_module
139130 i386 get_kernel_syms
140131 i386 quotactl sys_quotactl sys32_quotactl
141132 i386 getpgid sys_getpgid
142133 i386 fchdir sys_fchdir
143134 i386 bdflush sys_bdflush
144135 i386 sysfs sys_sysfs
145136 i386 personality sys_personality
146137 i386 afs_syscall
147138 i386 setfsuid sys_setfsuid16
148139 i386 setfsgid sys_setfsgid16
149140 i386 _llseek sys_llseek
150141 i386 getdents sys_getdents compat_sys_getdents
151142 i386 _newselect sys_select compat_sys_select
152143 i386 flock sys_flock
153144 i386 msync sys_msync
154145 i386 readv sys_readv compat_sys_readv
155146 i386 writev sys_writev compat_sys_writev
156147 i386 getsid sys_getsid
157148 i386 fdatasync sys_fdatasync
158149 i386 _sysctl sys_sysctl compat_sys_sysctl
159150 i386 mlock sys_mlock
160151 i386 munlock sys_munlock
161152 i386 mlockall sys_mlockall
162153 i386 munlockall sys_munlockall
163154 i386 sched_setparam sys_sched_setparam
164155 i386 sched_getparam sys_sched_getparam
165156 i386 sched_setscheduler sys_sched_setscheduler
166157 i386 sched_getscheduler sys_sched_getscheduler
167158 i386 sched_yield sys_sched_yield
168159 i386 sched_get_priority_max sys_sched_get_priority_max
169160 i386 sched_get_priority_min sys_sched_get_priority_min
170161 i386 sched_rr_get_interval sys_sched_rr_get_interval sys32_sched_rr_get_interval
171162 i386 nanosleep sys_nanosleep compat_sys_nanosleep
172163 i386 mremap sys_mremap
173164 i386 setresuid sys_setresuid16
174165 i386 getresuid sys_getresuid16
175166 i386 vm86 ptregs_vm86 sys32_vm86_warning
176167 i386 query_module
177168 i386 poll sys_poll
178169 i386 nfsservctl
179170 i386 setresgid sys_setresgid16
180171 i386 getresgid sys_getresgid16
181172 i386 prctl sys_prctl
182173 i386 rt_sigreturn ptregs_rt_sigreturn stub32_rt_sigreturn
183174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction
184175 i386 rt_sigprocmask sys_rt_sigprocmask
185176 i386 rt_sigpending sys_rt_sigpending sys32_rt_sigpending
186177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
187178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo sys32_rt_sigqueueinfo
188179 i386 rt_sigsuspend sys_rt_sigsuspend
189180 i386 pread64 sys_pread64 sys32_pread
190181 i386 pwrite64 sys_pwrite64 sys32_pwrite
191182 i386 chown sys_chown16
192183 i386 getcwd sys_getcwd
193184 i386 capget sys_capget
194185 i386 capset sys_capset
195186 i386 sigaltstack ptregs_sigaltstack stub32_sigaltstack
196187 i386 sendfile sys_sendfile sys32_sendfile
197188 i386 getpmsg
198189 i386 putpmsg
199190 i386 vfork ptregs_vfork stub32_vfork
200191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
201192 i386 mmap2 sys_mmap_pgoff
202193 i386 truncate64 sys_truncate64 sys32_truncate64
203194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64
204195 i386 stat64 sys_stat64 sys32_stat64
205196 i386 lstat64 sys_lstat64 sys32_lstat64
206197 i386 fstat64 sys_fstat64 sys32_fstat64
207198 i386 lchown32 sys_lchown
208199 i386 getuid32 sys_getuid
209200 i386 getgid32 sys_getgid
210201 i386 geteuid32 sys_geteuid
211202 i386 getegid32 sys_getegid
212203 i386 setreuid32 sys_setreuid
213204 i386 setregid32 sys_setregid
214205 i386 getgroups32 sys_getgroups
215206 i386 setgroups32 sys_setgroups
216207 i386 fchown32 sys_fchown
217208 i386 setresuid32 sys_setresuid
218209 i386 getresuid32 sys_getresuid
219210 i386 setresgid32 sys_setresgid
220211 i386 getresgid32 sys_getresgid
221212 i386 chown32 sys_chown
222213 i386 setuid32 sys_setuid
223214 i386 setgid32 sys_setgid
224215 i386 setfsuid32 sys_setfsuid
225216 i386 setfsgid32 sys_setfsgid
226217 i386 pivot_root sys_pivot_root
227218 i386 mincore sys_mincore
228219 i386 madvise sys_madvise
229220 i386 getdents64 sys_getdents64 compat_sys_getdents64
230221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64
231# 222 is unused
232# 223 is unused
233224 i386 gettid sys_gettid
234225 i386 readahead sys_readahead sys32_readahead
235226 i386 setxattr sys_setxattr
236227 i386 lsetxattr sys_lsetxattr
237228 i386 fsetxattr sys_fsetxattr
238229 i386 getxattr sys_getxattr
239230 i386 lgetxattr sys_lgetxattr
240231 i386 fgetxattr sys_fgetxattr
241232 i386 listxattr sys_listxattr
242233 i386 llistxattr sys_llistxattr
243234 i386 flistxattr sys_flistxattr
244235 i386 removexattr sys_removexattr
245236 i386 lremovexattr sys_lremovexattr
246237 i386 fremovexattr sys_fremovexattr
247238 i386 tkill sys_tkill
248239 i386 sendfile64 sys_sendfile64
249240 i386 futex sys_futex compat_sys_futex
250241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
251242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
252243 i386 set_thread_area sys_set_thread_area
253244 i386 get_thread_area sys_get_thread_area
254245 i386 io_setup sys_io_setup compat_sys_io_setup
255246 i386 io_destroy sys_io_destroy
256247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
257248 i386 io_submit sys_io_submit compat_sys_io_submit
258249 i386 io_cancel sys_io_cancel
259250 i386 fadvise64 sys_fadvise64 sys32_fadvise64
260# 251 is available for reuse (was briefly sys_set_zone_reclaim)
261252 i386 exit_group sys_exit_group
262253 i386 lookup_dcookie sys_lookup_dcookie sys32_lookup_dcookie
263254 i386 epoll_create sys_epoll_create
264255 i386 epoll_ctl sys_epoll_ctl
265256 i386 epoll_wait sys_epoll_wait
266257 i386 remap_file_pages sys_remap_file_pages
267258 i386 set_tid_address sys_set_tid_address
268259 i386 timer_create sys_timer_create compat_sys_timer_create
269260 i386 timer_settime sys_timer_settime compat_sys_timer_settime
270261 i386 timer_gettime sys_timer_gettime compat_sys_timer_gettime
271262 i386 timer_getoverrun sys_timer_getoverrun
272263 i386 timer_delete sys_timer_delete
273264 i386 clock_settime sys_clock_settime compat_sys_clock_settime
274265 i386 clock_gettime sys_clock_gettime compat_sys_clock_gettime
275266 i386 clock_getres sys_clock_getres compat_sys_clock_getres
276267 i386 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
277268 i386 statfs64 sys_statfs64 compat_sys_statfs64
278269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
279270 i386 tgkill sys_tgkill
280271 i386 utimes sys_utimes compat_sys_utimes
281272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
282273 i386 vserver
283274 i386 mbind sys_mbind
284275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
285276 i386 set_mempolicy sys_set_mempolicy
286277 i386 mq_open sys_mq_open compat_sys_mq_open
287278 i386 mq_unlink sys_mq_unlink
288279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
289280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
290281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
291282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
292283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
293284 i386 waitid sys_waitid compat_sys_waitid
294# 285 sys_setaltroot
295286 i386 add_key sys_add_key
296287 i386 request_key sys_request_key
297288 i386 keyctl sys_keyctl
298289 i386 ioprio_set sys_ioprio_set
299290 i386 ioprio_get sys_ioprio_get
300291 i386 inotify_init sys_inotify_init
301292 i386 inotify_add_watch sys_inotify_add_watch
302293 i386 inotify_rm_watch sys_inotify_rm_watch
303294 i386 migrate_pages sys_migrate_pages
304295 i386 openat sys_openat compat_sys_openat
305296 i386 mkdirat sys_mkdirat
306297 i386 mknodat sys_mknodat
307298 i386 fchownat sys_fchownat
308299 i386 futimesat sys_futimesat compat_sys_futimesat
309300 i386 fstatat64 sys_fstatat64 sys32_fstatat
310301 i386 unlinkat sys_unlinkat
311302 i386 renameat sys_renameat
312303 i386 linkat sys_linkat
313304 i386 symlinkat sys_symlinkat
314305 i386 readlinkat sys_readlinkat
315306 i386 fchmodat sys_fchmodat
316307 i386 faccessat sys_faccessat
317308 i386 pselect6 sys_pselect6 compat_sys_pselect6
318309 i386 ppoll sys_ppoll compat_sys_ppoll
319310 i386 unshare sys_unshare
320311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
321312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
322313 i386 splice sys_splice
323314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range
324315 i386 tee sys_tee
325316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
326317 i386 move_pages sys_move_pages compat_sys_move_pages
327318 i386 getcpu sys_getcpu
328319 i386 epoll_pwait sys_epoll_pwait
329320 i386 utimensat sys_utimensat compat_sys_utimensat
330321 i386 signalfd sys_signalfd compat_sys_signalfd
331322 i386 timerfd_create sys_timerfd_create
332323 i386 eventfd sys_eventfd
333324 i386 fallocate sys_fallocate sys32_fallocate
334325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
335326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
336327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
337328 i386 eventfd2 sys_eventfd2
338329 i386 epoll_create1 sys_epoll_create1
339330 i386 dup3 sys_dup3
340331 i386 pipe2 sys_pipe2
341332 i386 inotify_init1 sys_inotify_init1
342333 i386 preadv sys_preadv compat_sys_preadv
343334 i386 pwritev sys_pwritev compat_sys_pwritev
344335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
345336 i386 perf_event_open sys_perf_event_open
346337 i386 recvmmsg sys_recvmmsg compat_sys_recvmmsg
347338 i386 fanotify_init sys_fanotify_init
348339 i386 fanotify_mark sys_fanotify_mark sys32_fanotify_mark
349340 i386 prlimit64 sys_prlimit64
350341 i386 name_to_handle_at sys_name_to_handle_at
351342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
352343 i386 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
353344 i386 syncfs sys_syncfs
354345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg
355346 i386 setns sys_setns
356347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
357348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
new file mode 100644
index 000000000000..dd29a9ea27c5
--- /dev/null
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -0,0 +1,353 @@
1#
2# 64-bit system call numbers and entry vectors
3#
4# The format is:
5# <number> <abi> <name> <entry point>
6#
7# The abi is "common", "64" or "x32" for this file.
8#
90 common read sys_read
101 common write sys_write
112 common open sys_open
123 common close sys_close
134 common stat sys_newstat
145 common fstat sys_newfstat
156 common lstat sys_newlstat
167 common poll sys_poll
178 common lseek sys_lseek
189 common mmap sys_mmap
1910 common mprotect sys_mprotect
2011 common munmap sys_munmap
2112 common brk sys_brk
2213 64 rt_sigaction sys_rt_sigaction
2314 common rt_sigprocmask sys_rt_sigprocmask
2415 64 rt_sigreturn stub_rt_sigreturn
2516 64 ioctl sys_ioctl
2617 common pread64 sys_pread64
2718 common pwrite64 sys_pwrite64
2819 64 readv sys_readv
2920 64 writev sys_writev
3021 common access sys_access
3122 common pipe sys_pipe
3223 common select sys_select
3324 common sched_yield sys_sched_yield
3425 common mremap sys_mremap
3526 common msync sys_msync
3627 common mincore sys_mincore
3728 common madvise sys_madvise
3829 common shmget sys_shmget
3930 common shmat sys_shmat
4031 common shmctl sys_shmctl
4132 common dup sys_dup
4233 common dup2 sys_dup2
4334 common pause sys_pause
4435 common nanosleep sys_nanosleep
4536 common getitimer sys_getitimer
4637 common alarm sys_alarm
4738 common setitimer sys_setitimer
4839 common getpid sys_getpid
4940 common sendfile sys_sendfile64
5041 common socket sys_socket
5142 common connect sys_connect
5243 common accept sys_accept
5344 common sendto sys_sendto
5445 64 recvfrom sys_recvfrom
5546 64 sendmsg sys_sendmsg
5647 64 recvmsg sys_recvmsg
5748 common shutdown sys_shutdown
5849 common bind sys_bind
5950 common listen sys_listen
6051 common getsockname sys_getsockname
6152 common getpeername sys_getpeername
6253 common socketpair sys_socketpair
6354 common setsockopt sys_setsockopt
6455 common getsockopt sys_getsockopt
6556 common clone stub_clone
6657 common fork stub_fork
6758 common vfork stub_vfork
6859 64 execve stub_execve
6960 common exit sys_exit
7061 common wait4 sys_wait4
7162 common kill sys_kill
7263 common uname sys_newuname
7364 common semget sys_semget
7465 common semop sys_semop
7566 common semctl sys_semctl
7667 common shmdt sys_shmdt
7768 common msgget sys_msgget
7869 common msgsnd sys_msgsnd
7970 common msgrcv sys_msgrcv
8071 common msgctl sys_msgctl
8172 common fcntl sys_fcntl
8273 common flock sys_flock
8374 common fsync sys_fsync
8475 common fdatasync sys_fdatasync
8576 common truncate sys_truncate
8677 common ftruncate sys_ftruncate
8778 common getdents sys_getdents
8879 common getcwd sys_getcwd
8980 common chdir sys_chdir
9081 common fchdir sys_fchdir
9182 common rename sys_rename
9283 common mkdir sys_mkdir
9384 common rmdir sys_rmdir
9485 common creat sys_creat
9586 common link sys_link
9687 common unlink sys_unlink
9788 common symlink sys_symlink
9889 common readlink sys_readlink
9990 common chmod sys_chmod
10091 common fchmod sys_fchmod
10192 common chown sys_chown
10293 common fchown sys_fchown
10394 common lchown sys_lchown
10495 common umask sys_umask
10596 common gettimeofday sys_gettimeofday
10697 common getrlimit sys_getrlimit
10798 common getrusage sys_getrusage
10899 common sysinfo sys_sysinfo
109100 common times sys_times
110101 64 ptrace sys_ptrace
111102 common getuid sys_getuid
112103 common syslog sys_syslog
113104 common getgid sys_getgid
114105 common setuid sys_setuid
115106 common setgid sys_setgid
116107 common geteuid sys_geteuid
117108 common getegid sys_getegid
118109 common setpgid sys_setpgid
119110 common getppid sys_getppid
120111 common getpgrp sys_getpgrp
121112 common setsid sys_setsid
122113 common setreuid sys_setreuid
123114 common setregid sys_setregid
124115 common getgroups sys_getgroups
125116 common setgroups sys_setgroups
126117 common setresuid sys_setresuid
127118 common getresuid sys_getresuid
128119 common setresgid sys_setresgid
129120 common getresgid sys_getresgid
130121 common getpgid sys_getpgid
131122 common setfsuid sys_setfsuid
132123 common setfsgid sys_setfsgid
133124 common getsid sys_getsid
134125 common capget sys_capget
135126 common capset sys_capset
136127 64 rt_sigpending sys_rt_sigpending
137128 64 rt_sigtimedwait sys_rt_sigtimedwait
138129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
139130 common rt_sigsuspend sys_rt_sigsuspend
140131 64 sigaltstack stub_sigaltstack
141132 common utime sys_utime
142133 common mknod sys_mknod
143134 64 uselib
144135 common personality sys_personality
145136 common ustat sys_ustat
146137 common statfs sys_statfs
147138 common fstatfs sys_fstatfs
148139 common sysfs sys_sysfs
149140 common getpriority sys_getpriority
150141 common setpriority sys_setpriority
151142 common sched_setparam sys_sched_setparam
152143 common sched_getparam sys_sched_getparam
153144 common sched_setscheduler sys_sched_setscheduler
154145 common sched_getscheduler sys_sched_getscheduler
155146 common sched_get_priority_max sys_sched_get_priority_max
156147 common sched_get_priority_min sys_sched_get_priority_min
157148 common sched_rr_get_interval sys_sched_rr_get_interval
158149 common mlock sys_mlock
159150 common munlock sys_munlock
160151 common mlockall sys_mlockall
161152 common munlockall sys_munlockall
162153 common vhangup sys_vhangup
163154 common modify_ldt sys_modify_ldt
164155 common pivot_root sys_pivot_root
165156 64 _sysctl sys_sysctl
166157 common prctl sys_prctl
167158 common arch_prctl sys_arch_prctl
168159 common adjtimex sys_adjtimex
169160 common setrlimit sys_setrlimit
170161 common chroot sys_chroot
171162 common sync sys_sync
172163 common acct sys_acct
173164 common settimeofday sys_settimeofday
174165 common mount sys_mount
175166 common umount2 sys_umount
176167 common swapon sys_swapon
177168 common swapoff sys_swapoff
178169 common reboot sys_reboot
179170 common sethostname sys_sethostname
180171 common setdomainname sys_setdomainname
181172 common iopl stub_iopl
182173 common ioperm sys_ioperm
183174 64 create_module
184175 common init_module sys_init_module
185176 common delete_module sys_delete_module
186177 64 get_kernel_syms
187178 64 query_module
188179 common quotactl sys_quotactl
189180 64 nfsservctl
190181 common getpmsg
191182 common putpmsg
192183 common afs_syscall
193184 common tuxcall
194185 common security
195186 common gettid sys_gettid
196187 common readahead sys_readahead
197188 common setxattr sys_setxattr
198189 common lsetxattr sys_lsetxattr
199190 common fsetxattr sys_fsetxattr
200191 common getxattr sys_getxattr
201192 common lgetxattr sys_lgetxattr
202193 common fgetxattr sys_fgetxattr
203194 common listxattr sys_listxattr
204195 common llistxattr sys_llistxattr
205196 common flistxattr sys_flistxattr
206197 common removexattr sys_removexattr
207198 common lremovexattr sys_lremovexattr
208199 common fremovexattr sys_fremovexattr
209200 common tkill sys_tkill
210201 common time sys_time
211202 common futex sys_futex
212203 common sched_setaffinity sys_sched_setaffinity
213204 common sched_getaffinity sys_sched_getaffinity
214205 64 set_thread_area
215206 common io_setup sys_io_setup
216207 common io_destroy sys_io_destroy
217208 common io_getevents sys_io_getevents
218209 common io_submit sys_io_submit
219210 common io_cancel sys_io_cancel
220211 64 get_thread_area
221212 common lookup_dcookie sys_lookup_dcookie
222213 common epoll_create sys_epoll_create
223214 64 epoll_ctl_old
224215 64 epoll_wait_old
225216 common remap_file_pages sys_remap_file_pages
226217 common getdents64 sys_getdents64
227218 common set_tid_address sys_set_tid_address
228219 common restart_syscall sys_restart_syscall
229220 common semtimedop sys_semtimedop
230221 common fadvise64 sys_fadvise64
231222 64 timer_create sys_timer_create
232223 common timer_settime sys_timer_settime
233224 common timer_gettime sys_timer_gettime
234225 common timer_getoverrun sys_timer_getoverrun
235226 common timer_delete sys_timer_delete
236227 common clock_settime sys_clock_settime
237228 common clock_gettime sys_clock_gettime
238229 common clock_getres sys_clock_getres
239230 common clock_nanosleep sys_clock_nanosleep
240231 common exit_group sys_exit_group
241232 common epoll_wait sys_epoll_wait
242233 common epoll_ctl sys_epoll_ctl
243234 common tgkill sys_tgkill
244235 common utimes sys_utimes
245236 64 vserver
246237 common mbind sys_mbind
247238 common set_mempolicy sys_set_mempolicy
248239 common get_mempolicy sys_get_mempolicy
249240 common mq_open sys_mq_open
250241 common mq_unlink sys_mq_unlink
251242 common mq_timedsend sys_mq_timedsend
252243 common mq_timedreceive sys_mq_timedreceive
253244 64 mq_notify sys_mq_notify
254245 common mq_getsetattr sys_mq_getsetattr
255246 64 kexec_load sys_kexec_load
256247 64 waitid sys_waitid
257248 common add_key sys_add_key
258249 common request_key sys_request_key
259250 common keyctl sys_keyctl
260251 common ioprio_set sys_ioprio_set
261252 common ioprio_get sys_ioprio_get
262253 common inotify_init sys_inotify_init
263254 common inotify_add_watch sys_inotify_add_watch
264255 common inotify_rm_watch sys_inotify_rm_watch
265256 common migrate_pages sys_migrate_pages
266257 common openat sys_openat
267258 common mkdirat sys_mkdirat
268259 common mknodat sys_mknodat
269260 common fchownat sys_fchownat
270261 common futimesat sys_futimesat
271262 common newfstatat sys_newfstatat
272263 common unlinkat sys_unlinkat
273264 common renameat sys_renameat
274265 common linkat sys_linkat
275266 common symlinkat sys_symlinkat
276267 common readlinkat sys_readlinkat
277268 common fchmodat sys_fchmodat
278269 common faccessat sys_faccessat
279270 common pselect6 sys_pselect6
280271 common ppoll sys_ppoll
281272 common unshare sys_unshare
282273 64 set_robust_list sys_set_robust_list
283274 64 get_robust_list sys_get_robust_list
284275 common splice sys_splice
285276 common tee sys_tee
286277 common sync_file_range sys_sync_file_range
287278 64 vmsplice sys_vmsplice
288279 64 move_pages sys_move_pages
289280 common utimensat sys_utimensat
290281 common epoll_pwait sys_epoll_pwait
291282 common signalfd sys_signalfd
292283 common timerfd_create sys_timerfd_create
293284 common eventfd sys_eventfd
294285 common fallocate sys_fallocate
295286 common timerfd_settime sys_timerfd_settime
296287 common timerfd_gettime sys_timerfd_gettime
297288 common accept4 sys_accept4
298289 common signalfd4 sys_signalfd4
299290 common eventfd2 sys_eventfd2
300291 common epoll_create1 sys_epoll_create1
301292 common dup3 sys_dup3
302293 common pipe2 sys_pipe2
303294 common inotify_init1 sys_inotify_init1
304295 64 preadv sys_preadv
305296 64 pwritev sys_pwritev
306297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
307298 common perf_event_open sys_perf_event_open
308299 64 recvmmsg sys_recvmmsg
309300 common fanotify_init sys_fanotify_init
310301 common fanotify_mark sys_fanotify_mark
311302 common prlimit64 sys_prlimit64
312303 common name_to_handle_at sys_name_to_handle_at
313304 common open_by_handle_at sys_open_by_handle_at
314305 common clock_adjtime sys_clock_adjtime
315306 common syncfs sys_syncfs
316307 64 sendmmsg sys_sendmmsg
317308 common setns sys_setns
318309 common getcpu sys_getcpu
319310 64 process_vm_readv sys_process_vm_readv
320311 64 process_vm_writev sys_process_vm_writev
321#
322# x32-specific system call numbers start at 512 to avoid cache impact
323# for native 64-bit operation.
324#
325512 x32 rt_sigaction sys32_rt_sigaction
326513 x32 rt_sigreturn stub_x32_rt_sigreturn
327514 x32 ioctl compat_sys_ioctl
328515 x32 readv compat_sys_readv
329516 x32 writev compat_sys_writev
330517 x32 recvfrom compat_sys_recvfrom
331518 x32 sendmsg compat_sys_sendmsg
332519 x32 recvmsg compat_sys_recvmsg
333520 x32 execve stub_x32_execve
334521 x32 ptrace compat_sys_ptrace
335522 x32 rt_sigpending sys32_rt_sigpending
336523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
337524 x32 rt_sigqueueinfo sys32_rt_sigqueueinfo
338525 x32 sigaltstack stub_x32_sigaltstack
339526 x32 timer_create compat_sys_timer_create
340527 x32 mq_notify compat_sys_mq_notify
341528 x32 kexec_load compat_sys_kexec_load
342529 x32 waitid compat_sys_waitid
343530 x32 set_robust_list compat_sys_set_robust_list
344531 x32 get_robust_list compat_sys_get_robust_list
345532 x32 vmsplice compat_sys_vmsplice
346533 x32 move_pages compat_sys_move_pages
347534 x32 preadv compat_sys_preadv64
348535 x32 pwritev compat_sys_pwritev64
349536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
350537 x32 recvmmsg compat_sys_recvmmsg
351538 x32 sendmmsg compat_sys_sendmmsg
352539 x32 process_vm_readv compat_sys_process_vm_readv
353540 x32 process_vm_writev compat_sys_process_vm_writev
diff --git a/arch/x86/syscalls/syscallhdr.sh b/arch/x86/syscalls/syscallhdr.sh
new file mode 100644
index 000000000000..31fd5f1f38f7
--- /dev/null
+++ b/arch/x86/syscalls/syscallhdr.sh
@@ -0,0 +1,27 @@
1#!/bin/sh
2
3in="$1"
4out="$2"
5my_abis=`echo "($3)" | tr ',' '|'`
6prefix="$4"
7offset="$5"
8
9fileguard=_ASM_X86_`basename "$out" | sed \
10 -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
11 -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
12grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
13 echo "#ifndef ${fileguard}"
14 echo "#define ${fileguard} 1"
15 echo ""
16
17 while read nr abi name entry ; do
18 if [ -z "$offset" ]; then
19 echo "#define __NR_${prefix}${name} $nr"
20 else
21 echo "#define __NR_${prefix}${name} ($offset + $nr)"
22 fi
23 done
24
25 echo ""
26 echo "#endif /* ${fileguard} */"
27) > "$out"
diff --git a/arch/x86/syscalls/syscalltbl.sh b/arch/x86/syscalls/syscalltbl.sh
new file mode 100644
index 000000000000..0e7f8ec071e7
--- /dev/null
+++ b/arch/x86/syscalls/syscalltbl.sh
@@ -0,0 +1,15 @@
1#!/bin/sh
2
3in="$1"
4out="$2"
5
6grep '^[0-9]' "$in" | sort -n | (
7 while read nr abi name entry compat; do
8 abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
9 if [ -n "$compat" ]; then
10 echo "__SYSCALL_${abi}($nr, $entry, $compat)"
11 elif [ -n "$entry" ]; then
12 echo "__SYSCALL_${abi}($nr, $entry, $entry)"
13 fi
14 done
15) > "$out"
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 1d97bd84b6fb..9926e11a772d 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -6,14 +6,6 @@ menu "UML-specific options"
6 6
7menu "Host processor type and features" 7menu "Host processor type and features"
8 8
9config CMPXCHG_LOCAL
10 bool
11 default n
12
13config CMPXCHG_DOUBLE
14 bool
15 default n
16
17source "arch/x86/Kconfig.cpu" 9source "arch/x86/Kconfig.cpu"
18 10
19endmenu 11endmenu
@@ -23,8 +15,8 @@ config UML_X86
23 select GENERIC_FIND_FIRST_BIT 15 select GENERIC_FIND_FIRST_BIT
24 16
25config 64BIT 17config 64BIT
26 bool 18 bool "64-bit kernel" if SUBARCH = "x86"
27 default SUBARCH = "x86_64" 19 default SUBARCH != "i386"
28 20
29config X86_32 21config X86_32
30 def_bool !64BIT 22 def_bool !64BIT
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 8fb58400e415..5d065b2222d3 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -37,7 +37,8 @@ subarch-$(CONFIG_MODULES) += ../kernel/module.o
37USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o 37USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o
38 38
39extra-y += user-offsets.s 39extra-y += user-offsets.s
40$(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) 40$(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) \
41 -Iarch/x86/include/generated
41 42
42UNPROFILE_OBJS := stub_segv.o 43UNPROFILE_OBJS := stub_segv.o
43CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING) 44CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
new file mode 100644
index 000000000000..7d01b8c56c00
--- /dev/null
+++ b/arch/x86/um/asm/barrier.h
@@ -0,0 +1,75 @@
1#ifndef _ASM_UM_BARRIER_H_
2#define _ASM_UM_BARRIER_H_
3
4#include <asm/asm.h>
5#include <asm/segment.h>
6#include <asm/cpufeature.h>
7#include <asm/cmpxchg.h>
8#include <asm/nops.h>
9
10#include <linux/kernel.h>
11#include <linux/irqflags.h>
12
13/*
14 * Force strict CPU ordering.
15 * And yes, this is required on UP too when we're talking
16 * to devices.
17 */
18#ifdef CONFIG_X86_32
19
20#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
21#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
22#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
23
24#else /* CONFIG_X86_32 */
25
26#define mb() asm volatile("mfence" : : : "memory")
27#define rmb() asm volatile("lfence" : : : "memory")
28#define wmb() asm volatile("sfence" : : : "memory")
29
30#endif /* CONFIG_X86_32 */
31
32#define read_barrier_depends() do { } while (0)
33
34#ifdef CONFIG_SMP
35
36#define smp_mb() mb()
37#ifdef CONFIG_X86_PPRO_FENCE
38#define smp_rmb() rmb()
39#else /* CONFIG_X86_PPRO_FENCE */
40#define smp_rmb() barrier()
41#endif /* CONFIG_X86_PPRO_FENCE */
42
43#ifdef CONFIG_X86_OOSTORE
44#define smp_wmb() wmb()
45#else /* CONFIG_X86_OOSTORE */
46#define smp_wmb() barrier()
47#endif /* CONFIG_X86_OOSTORE */
48
49#define smp_read_barrier_depends() read_barrier_depends()
50#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
51
52#else /* CONFIG_SMP */
53
54#define smp_mb() barrier()
55#define smp_rmb() barrier()
56#define smp_wmb() barrier()
57#define smp_read_barrier_depends() do { } while (0)
58#define set_mb(var, value) do { var = value; barrier(); } while (0)
59
60#endif /* CONFIG_SMP */
61
62/*
63 * Stop RDTSC speculation. This is needed when you need to use RDTSC
64 * (or get_cycles or vread that possibly accesses the TSC) in a defined
65 * code region.
66 *
67 * (Could use an alternative three way for this if there was one.)
68 */
69static inline void rdtsc_barrier(void)
70{
71 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
72 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
73}
74
75#endif
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index 2c32df6fe231..04f82e020f2b 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -17,6 +17,16 @@
17#define ARCH_IS_STACKGROW(address) \ 17#define ARCH_IS_STACKGROW(address) \
18 (address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(&current->thread.regs.regs)) 18 (address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(&current->thread.regs.regs))
19 19
20#include <asm/user.h>
21
22/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
23static inline void rep_nop(void)
24{
25 __asm__ __volatile__("rep;nop": : :"memory");
26}
27
28#define cpu_relax() rep_nop()
29
20#include <asm/processor-generic.h> 30#include <asm/processor-generic.h>
21 31
22#endif 32#endif
diff --git a/arch/x86/um/asm/processor_32.h b/arch/x86/um/asm/processor_32.h
index 018f732704dd..6c6689e574ce 100644
--- a/arch/x86/um/asm/processor_32.h
+++ b/arch/x86/um/asm/processor_32.h
@@ -45,16 +45,6 @@ static inline void arch_copy_thread(struct arch_thread *from,
45 memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array)); 45 memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array));
46} 46}
47 47
48#include <asm/user.h>
49
50/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
51static inline void rep_nop(void)
52{
53 __asm__ __volatile__("rep;nop": : :"memory");
54}
55
56#define cpu_relax() rep_nop()
57
58/* 48/*
59 * Default implementation of macro that returns current 49 * Default implementation of macro that returns current
60 * instruction pointer ("program counter"). Stolen 50 * instruction pointer ("program counter"). Stolen
diff --git a/arch/x86/um/asm/processor_64.h b/arch/x86/um/asm/processor_64.h
index 61de92d916c3..4b02a8455bd1 100644
--- a/arch/x86/um/asm/processor_64.h
+++ b/arch/x86/um/asm/processor_64.h
@@ -14,14 +14,6 @@ struct arch_thread {
14 struct faultinfo faultinfo; 14 struct faultinfo faultinfo;
15}; 15};
16 16
17/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
18static inline void rep_nop(void)
19{
20 __asm__ __volatile__("rep;nop": : :"memory");
21}
22
23#define cpu_relax() rep_nop()
24
25#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \ 17#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \
26 .debugregs_seq = 0, \ 18 .debugregs_seq = 0, \
27 .fs = 0, \ 19 .fs = 0, \
@@ -37,8 +29,6 @@ static inline void arch_copy_thread(struct arch_thread *from,
37 to->fs = from->fs; 29 to->fs = from->fs;
38} 30}
39 31
40#include <asm/user.h>
41
42#define current_text_addr() \ 32#define current_text_addr() \
43 ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; }) 33 ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; })
44 34
diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c
index a1fba5fb9dbe..17d88cf2c6c4 100644
--- a/arch/x86/um/bugs_32.c
+++ b/arch/x86/um/bugs_32.c
@@ -13,8 +13,6 @@
13static int host_has_cmov = 1; 13static int host_has_cmov = 1;
14static jmp_buf cmov_test_return; 14static jmp_buf cmov_test_return;
15 15
16#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
17
18static void cmov_sigill_test_handler(int sig) 16static void cmov_sigill_test_handler(int sig)
19{ 17{
20 host_has_cmov = 0; 18 host_has_cmov = 0;
@@ -51,7 +49,7 @@ void arch_examine_signal(int sig, struct uml_pt_regs *regs)
51 * This is testing for a cmov (0x0f 0x4x) instruction causing a 49 * This is testing for a cmov (0x0f 0x4x) instruction causing a
52 * SIGILL in init. 50 * SIGILL in init.
53 */ 51 */
54 if ((sig != SIGILL) || (TASK_PID(get_current()) != 1)) 52 if ((sig != SIGILL) || (get_current_pid() != 1))
55 return; 53 return;
56 54
57 if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) { 55 if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) {
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
index 639900a6fde9..f40281e5d6a2 100644
--- a/arch/x86/um/mem_32.c
+++ b/arch/x86/um/mem_32.c
@@ -23,14 +23,6 @@ static int __init gate_vma_init(void)
23 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; 23 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
24 gate_vma.vm_page_prot = __P101; 24 gate_vma.vm_page_prot = __P101;
25 25
26 /*
27 * Make sure the vDSO gets into every core dump.
28 * Dumping its contents makes post-mortem fully interpretable later
29 * without matching up the same kernel and hardware config to see
30 * what PC values meant.
31 */
32 gate_vma.vm_flags |= VM_ALWAYSDUMP;
33
34 return 0; 26 return 0;
35} 27}
36__initcall(gate_vma_init); 28__initcall(gate_vma_init);
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 711b1621747f..2bbe1ec2d96a 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -1,5 +1,15 @@
1#ifndef __SYSDEP_X86_PTRACE_H
2#define __SYSDEP_X86_PTRACE_H
3
1#ifdef __i386__ 4#ifdef __i386__
2#include "ptrace_32.h" 5#include "ptrace_32.h"
3#else 6#else
4#include "ptrace_64.h" 7#include "ptrace_64.h"
5#endif 8#endif
9
10static inline long regs_return_value(struct uml_pt_regs *regs)
11{
12 return UPT_SYSCALL_RET(regs);
13}
14
15#endif /* __SYSDEP_X86_PTRACE_H */
diff --git a/arch/x86/um/sys_call_table_32.S b/arch/x86/um/sys_call_table_32.S
deleted file mode 100644
index a7ca80d2dceb..000000000000
--- a/arch/x86/um/sys_call_table_32.S
+++ /dev/null
@@ -1,26 +0,0 @@
1#include <linux/linkage.h>
2/* Steal i386 syscall table for our purposes, but with some slight changes.*/
3
4#define sys_iopl sys_ni_syscall
5#define sys_ioperm sys_ni_syscall
6
7#define sys_vm86old sys_ni_syscall
8#define sys_vm86 sys_ni_syscall
9
10#define old_mmap sys_old_mmap
11
12#define ptregs_fork sys_fork
13#define ptregs_execve sys_execve
14#define ptregs_iopl sys_iopl
15#define ptregs_vm86old sys_vm86old
16#define ptregs_clone sys_clone
17#define ptregs_vm86 sys_vm86
18#define ptregs_sigaltstack sys_sigaltstack
19#define ptregs_vfork sys_vfork
20
21.section .rodata,"a"
22
23#include "../kernel/syscall_table_32.S"
24
25ENTRY(syscall_table_size)
26.long .-sys_call_table
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
new file mode 100644
index 000000000000..416bd40c0eba
--- /dev/null
+++ b/arch/x86/um/sys_call_table_32.c
@@ -0,0 +1,55 @@
1/*
2 * System call table for UML/i386, copied from arch/x86/kernel/syscall_*.c
3 * with some changes for UML.
4 */
5
6#include <linux/linkage.h>
7#include <linux/sys.h>
8#include <linux/cache.h>
9#include <generated/user_constants.h>
10
11#define __NO_STUBS
12
13/*
14 * Below you can see, in terms of #define's, the differences between the x86-64
15 * and the UML syscall table.
16 */
17
18/* Not going to be implemented by UML, since we have no hardware. */
19#define sys_iopl sys_ni_syscall
20#define sys_ioperm sys_ni_syscall
21
22#define sys_vm86old sys_ni_syscall
23#define sys_vm86 sys_ni_syscall
24
25#define old_mmap sys_old_mmap
26
27#define ptregs_fork sys_fork
28#define ptregs_execve sys_execve
29#define ptregs_iopl sys_iopl
30#define ptregs_vm86old sys_vm86old
31#define ptregs_clone sys_clone
32#define ptregs_vm86 sys_vm86
33#define ptregs_sigaltstack sys_sigaltstack
34#define ptregs_vfork sys_vfork
35
36#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
37#include <asm/syscalls_32.h>
38
39#undef __SYSCALL_I386
40#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
41
42typedef void (*sys_call_ptr_t)(void);
43
44extern void sys_ni_syscall(void);
45
46const sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
47 /*
48 * Smells like a compiler bug -- it doesn't work
49 * when the & below is removed.
50 */
51 [0 ... __NR_syscall_max] = &sys_ni_syscall,
52#include <asm/syscalls_32.h>
53};
54
55int syscall_table_size = sizeof(sys_call_table);
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 99522f78b162..9924776f4265 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -1,11 +1,12 @@
1/* 1/*
2 * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c 2 * System call table for UML/x86-64, copied from arch/x86/kernel/syscall_*.c
3 * with some changes for UML. 3 * with some changes for UML.
4 */ 4 */
5 5
6#include <linux/linkage.h> 6#include <linux/linkage.h>
7#include <linux/sys.h> 7#include <linux/sys.h>
8#include <linux/cache.h> 8#include <linux/cache.h>
9#include <generated/user_constants.h>
9 10
10#define __NO_STUBS 11#define __NO_STUBS
11 12
@@ -34,31 +35,26 @@
34#define stub_sigaltstack sys_sigaltstack 35#define stub_sigaltstack sys_sigaltstack
35#define stub_rt_sigreturn sys_rt_sigreturn 36#define stub_rt_sigreturn sys_rt_sigreturn
36 37
37#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 38#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
38#undef _ASM_X86_UNISTD_64_H 39#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
39#include "../../x86/include/asm/unistd_64.h"
40 40
41#undef __SYSCALL 41#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ;
42#define __SYSCALL(nr, sym) [ nr ] = sym, 42#include <asm/syscalls_64.h>
43#undef _ASM_X86_UNISTD_64_H 43
44#undef __SYSCALL_64
45#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym,
44 46
45typedef void (*sys_call_ptr_t)(void); 47typedef void (*sys_call_ptr_t)(void);
46 48
47extern void sys_ni_syscall(void); 49extern void sys_ni_syscall(void);
48 50
49/* 51const sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
50 * We used to have a trick here which made sure that holes in the 52 /*
51 * x86_64 table were filled in with sys_ni_syscall, but a comment in 53 * Smells like a compiler bug -- it doesn't work
52 * unistd_64.h says that holes aren't allowed, so the trick was 54 * when the & below is removed.
53 * removed. 55 */
54 * The trick looked like this 56 [0 ... __NR_syscall_max] = &sys_ni_syscall,
55 * [0 ... UM_NR_syscall_max] = &sys_ni_syscall 57#include <asm/syscalls_64.h>
56 * before including unistd_64.h - the later initializations overwrote
57 * the sys_ni_syscall filler.
58 */
59
60sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
61#include <asm/unistd_64.h>
62}; 58};
63 59
64int syscall_table_size = sizeof(sys_call_table); 60int syscall_table_size = sizeof(sys_call_table);
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index ca49be8ddd0c..ce7e3607a870 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -8,6 +8,20 @@
8#include <asm/ptrace.h> 8#include <asm/ptrace.h>
9#include <asm/types.h> 9#include <asm/types.h>
10 10
11#ifdef __i386__
12#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
13static char syscalls[] = {
14#include <asm/syscalls_32.h>
15};
16#else
17#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
18#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
19#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
20static char syscalls[] = {
21#include <asm/syscalls_64.h>
22};
23#endif
24
11#define DEFINE(sym, val) \ 25#define DEFINE(sym, val) \
12 asm volatile("\n->" #sym " %0 " #val : : "i" (val)) 26 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
13 27
@@ -77,4 +91,7 @@ void foo(void)
77 DEFINE(UM_PROT_READ, PROT_READ); 91 DEFINE(UM_PROT_READ, PROT_READ);
78 DEFINE(UM_PROT_WRITE, PROT_WRITE); 92 DEFINE(UM_PROT_WRITE, PROT_WRITE);
79 DEFINE(UM_PROT_EXEC, PROT_EXEC); 93 DEFINE(UM_PROT_EXEC, PROT_EXEC);
94
95 DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
96 DEFINE(NR_syscalls, sizeof(syscalls));
80} 97}
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index 91f4ec9a0a56..af91901babb8 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -64,8 +64,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
64 64
65 err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, 65 err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
66 VM_READ|VM_EXEC| 66 VM_READ|VM_EXEC|
67 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 67 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
68 VM_ALWAYSDUMP,
69 vdsop); 68 vdsop);
70 69
71 up_write(&mm->mmap_sem); 70 up_write(&mm->mmap_sem);
diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore
index 60274d5746e1..3282874bc61d 100644
--- a/arch/x86/vdso/.gitignore
+++ b/arch/x86/vdso/.gitignore
@@ -1,5 +1,7 @@
1vdso.lds 1vdso.lds
2vdso-syms.lds 2vdso-syms.lds
3vdsox32.lds
4vdsox32-syms.lds
3vdso32-syms.lds 5vdso32-syms.lds
4vdso32-syscall-syms.lds 6vdso32-syscall-syms.lds
5vdso32-sysenter-syms.lds 7vdso32-sysenter-syms.lds
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 5d179502a52c..fd14be1d1472 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -3,21 +3,29 @@
3# 3#
4 4
5VDSO64-$(CONFIG_X86_64) := y 5VDSO64-$(CONFIG_X86_64) := y
6VDSOX32-$(CONFIG_X86_X32_ABI) := y
6VDSO32-$(CONFIG_X86_32) := y 7VDSO32-$(CONFIG_X86_32) := y
7VDSO32-$(CONFIG_COMPAT) := y 8VDSO32-$(CONFIG_COMPAT) := y
8 9
9vdso-install-$(VDSO64-y) += vdso.so 10vdso-install-$(VDSO64-y) += vdso.so
11vdso-install-$(VDSOX32-y) += vdsox32.so
10vdso-install-$(VDSO32-y) += $(vdso32-images) 12vdso-install-$(VDSO32-y) += $(vdso32-images)
11 13
12 14
13# files to link into the vdso 15# files to link into the vdso
14vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o 16vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
15 17
18vobjs-$(VDSOX32-y) += $(vobjx32s-compat)
19
20# Filter out x32 objects.
21vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y))
22
16# files to link into kernel 23# files to link into kernel
17obj-$(VDSO64-y) += vma.o vdso.o 24obj-$(VDSO64-y) += vma.o vdso.o
25obj-$(VDSOX32-y) += vdsox32.o
18obj-$(VDSO32-y) += vdso32.o vdso32-setup.o 26obj-$(VDSO32-y) += vdso32.o vdso32-setup.o
19 27
20vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) 28vobjs := $(foreach F,$(vobj64s),$(obj)/$F)
21 29
22$(obj)/vdso.o: $(obj)/vdso.so 30$(obj)/vdso.o: $(obj)/vdso.so
23 31
@@ -73,6 +81,42 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
73 $(call if_changed,vdsosym) 81 $(call if_changed,vdsosym)
74 82
75# 83#
84# X32 processes use x32 vDSO to access 64bit kernel data.
85#
86# Build x32 vDSO image:
87# 1. Compile x32 vDSO as 64bit.
88# 2. Convert object files to x32.
89# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes
90# so that it can reach 64bit address space with 64bit pointers.
91#
92
93targets += vdsox32-syms.lds
94obj-$(VDSOX32-y) += vdsox32-syms.lds
95
96CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
97VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \
98 -Wl,-soname=linux-vdso.so.1 \
99 -Wl,-z,max-page-size=4096 \
100 -Wl,-z,common-page-size=4096
101
102vobjx32s-y := $(vobj64s:.o=-x32.o)
103vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
104
105# Convert 64bit object file to x32 for x32 vDSO.
106quiet_cmd_x32 = X32 $@
107 cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@
108
109$(obj)/%-x32.o: $(obj)/%.o FORCE
110 $(call if_changed,x32)
111
112targets += vdsox32.so vdsox32.so.dbg vdsox32.lds $(vobjx32s-y)
113
114$(obj)/vdsox32.o: $(src)/vdsox32.S $(obj)/vdsox32.so
115
116$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
117 $(call if_changed,vdso)
118
119#
76# Build multiple 32-bit vDSO images to choose from at boot time. 120# Build multiple 32-bit vDSO images to choose from at boot time.
77# 121#
78obj-$(VDSO32-y) += vdso32-syms.lds 122obj-$(VDSO32-y) += vdso32-syms.lds
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 6bc0e723b6e8..885eff49d6ab 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -70,100 +70,98 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
70 return ret; 70 return ret;
71} 71}
72 72
73notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
74{
75 long ret;
76
77 asm("syscall" : "=a" (ret) :
78 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
79 return ret;
80}
81
82
73notrace static inline long vgetns(void) 83notrace static inline long vgetns(void)
74{ 84{
75 long v; 85 long v;
76 cycles_t cycles; 86 cycles_t cycles;
77 if (gtod->clock.vclock_mode == VCLOCK_TSC) 87 if (gtod->clock.vclock_mode == VCLOCK_TSC)
78 cycles = vread_tsc(); 88 cycles = vread_tsc();
79 else 89 else if (gtod->clock.vclock_mode == VCLOCK_HPET)
80 cycles = vread_hpet(); 90 cycles = vread_hpet();
91 else
92 return 0;
81 v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; 93 v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
82 return (v * gtod->clock.mult) >> gtod->clock.shift; 94 return (v * gtod->clock.mult) >> gtod->clock.shift;
83} 95}
84 96
85notrace static noinline int do_realtime(struct timespec *ts) 97/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
98notrace static int __always_inline do_realtime(struct timespec *ts)
86{ 99{
87 unsigned long seq, ns; 100 unsigned long seq, ns;
101 int mode;
102
88 do { 103 do {
89 seq = read_seqbegin(&gtod->lock); 104 seq = read_seqcount_begin(&gtod->seq);
105 mode = gtod->clock.vclock_mode;
90 ts->tv_sec = gtod->wall_time_sec; 106 ts->tv_sec = gtod->wall_time_sec;
91 ts->tv_nsec = gtod->wall_time_nsec; 107 ts->tv_nsec = gtod->wall_time_nsec;
92 ns = vgetns(); 108 ns = vgetns();
93 } while (unlikely(read_seqretry(&gtod->lock, seq))); 109 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
110
94 timespec_add_ns(ts, ns); 111 timespec_add_ns(ts, ns);
95 return 0; 112 return mode;
96} 113}
97 114
98notrace static noinline int do_monotonic(struct timespec *ts) 115notrace static int do_monotonic(struct timespec *ts)
99{ 116{
100 unsigned long seq, ns, secs; 117 unsigned long seq, ns;
118 int mode;
119
101 do { 120 do {
102 seq = read_seqbegin(&gtod->lock); 121 seq = read_seqcount_begin(&gtod->seq);
103 secs = gtod->wall_time_sec; 122 mode = gtod->clock.vclock_mode;
104 ns = gtod->wall_time_nsec + vgetns(); 123 ts->tv_sec = gtod->monotonic_time_sec;
105 secs += gtod->wall_to_monotonic.tv_sec; 124 ts->tv_nsec = gtod->monotonic_time_nsec;
106 ns += gtod->wall_to_monotonic.tv_nsec; 125 ns = vgetns();
107 } while (unlikely(read_seqretry(&gtod->lock, seq))); 126 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
108 127 timespec_add_ns(ts, ns);
109 /* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec
110 * are all guaranteed to be nonnegative.
111 */
112 while (ns >= NSEC_PER_SEC) {
113 ns -= NSEC_PER_SEC;
114 ++secs;
115 }
116 ts->tv_sec = secs;
117 ts->tv_nsec = ns;
118 128
119 return 0; 129 return mode;
120} 130}
121 131
122notrace static noinline int do_realtime_coarse(struct timespec *ts) 132notrace static int do_realtime_coarse(struct timespec *ts)
123{ 133{
124 unsigned long seq; 134 unsigned long seq;
125 do { 135 do {
126 seq = read_seqbegin(&gtod->lock); 136 seq = read_seqcount_begin(&gtod->seq);
127 ts->tv_sec = gtod->wall_time_coarse.tv_sec; 137 ts->tv_sec = gtod->wall_time_coarse.tv_sec;
128 ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; 138 ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
129 } while (unlikely(read_seqretry(&gtod->lock, seq))); 139 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
130 return 0; 140 return 0;
131} 141}
132 142
133notrace static noinline int do_monotonic_coarse(struct timespec *ts) 143notrace static int do_monotonic_coarse(struct timespec *ts)
134{ 144{
135 unsigned long seq, ns, secs; 145 unsigned long seq;
136 do { 146 do {
137 seq = read_seqbegin(&gtod->lock); 147 seq = read_seqcount_begin(&gtod->seq);
138 secs = gtod->wall_time_coarse.tv_sec; 148 ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
139 ns = gtod->wall_time_coarse.tv_nsec; 149 ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
140 secs += gtod->wall_to_monotonic.tv_sec; 150 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
141 ns += gtod->wall_to_monotonic.tv_nsec;
142 } while (unlikely(read_seqretry(&gtod->lock, seq)));
143
144 /* wall_time_nsec and wall_to_monotonic.tv_nsec are
145 * guaranteed to be between 0 and NSEC_PER_SEC.
146 */
147 if (ns >= NSEC_PER_SEC) {
148 ns -= NSEC_PER_SEC;
149 ++secs;
150 }
151 ts->tv_sec = secs;
152 ts->tv_nsec = ns;
153 151
154 return 0; 152 return 0;
155} 153}
156 154
157notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 155notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
158{ 156{
157 int ret = VCLOCK_NONE;
158
159 switch (clock) { 159 switch (clock) {
160 case CLOCK_REALTIME: 160 case CLOCK_REALTIME:
161 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) 161 ret = do_realtime(ts);
162 return do_realtime(ts);
163 break; 162 break;
164 case CLOCK_MONOTONIC: 163 case CLOCK_MONOTONIC:
165 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) 164 ret = do_monotonic(ts);
166 return do_monotonic(ts);
167 break; 165 break;
168 case CLOCK_REALTIME_COARSE: 166 case CLOCK_REALTIME_COARSE:
169 return do_realtime_coarse(ts); 167 return do_realtime_coarse(ts);
@@ -171,32 +169,33 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
171 return do_monotonic_coarse(ts); 169 return do_monotonic_coarse(ts);
172 } 170 }
173 171
174 return vdso_fallback_gettime(clock, ts); 172 if (ret == VCLOCK_NONE)
173 return vdso_fallback_gettime(clock, ts);
174 return 0;
175} 175}
176int clock_gettime(clockid_t, struct timespec *) 176int clock_gettime(clockid_t, struct timespec *)
177 __attribute__((weak, alias("__vdso_clock_gettime"))); 177 __attribute__((weak, alias("__vdso_clock_gettime")));
178 178
179notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 179notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
180{ 180{
181 long ret; 181 long ret = VCLOCK_NONE;
182 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) { 182
183 if (likely(tv != NULL)) { 183 if (likely(tv != NULL)) {
184 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != 184 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
185 offsetof(struct timespec, tv_nsec) || 185 offsetof(struct timespec, tv_nsec) ||
186 sizeof(*tv) != sizeof(struct timespec)); 186 sizeof(*tv) != sizeof(struct timespec));
187 do_realtime((struct timespec *)tv); 187 ret = do_realtime((struct timespec *)tv);
188 tv->tv_usec /= 1000; 188 tv->tv_usec /= 1000;
189 }
190 if (unlikely(tz != NULL)) {
191 /* Avoid memcpy. Some old compilers fail to inline it */
192 tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
193 tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
194 }
195 return 0;
196 } 189 }
197 asm("syscall" : "=a" (ret) : 190 if (unlikely(tz != NULL)) {
198 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); 191 /* Avoid memcpy. Some old compilers fail to inline it */
199 return ret; 192 tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
193 tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
194 }
195
196 if (ret == VCLOCK_NONE)
197 return vdso_fallback_gtod(tv, tz);
198 return 0;
200} 199}
201int gettimeofday(struct timeval *, struct timezone *) 200int gettimeofday(struct timeval *, struct timezone *)
202 __attribute__((weak, alias("__vdso_gettimeofday"))); 201 __attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 468d591dde31..66e6d9359826 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -250,13 +250,7 @@ static int __init gate_vma_init(void)
250 gate_vma.vm_end = FIXADDR_USER_END; 250 gate_vma.vm_end = FIXADDR_USER_END;
251 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; 251 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
252 gate_vma.vm_page_prot = __P101; 252 gate_vma.vm_page_prot = __P101;
253 /* 253
254 * Make sure the vDSO gets into every core dump.
255 * Dumping its contents makes post-mortem fully interpretable later
256 * without matching up the same kernel and hardware config to see
257 * what PC values meant.
258 */
259 gate_vma.vm_flags |= VM_ALWAYSDUMP;
260 return 0; 254 return 0;
261} 255}
262 256
@@ -317,6 +311,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
317 int ret = 0; 311 int ret = 0;
318 bool compat; 312 bool compat;
319 313
314#ifdef CONFIG_X86_X32_ABI
315 if (test_thread_flag(TIF_X32))
316 return x32_setup_additional_pages(bprm, uses_interp);
317#endif
318
320 if (vdso_enabled == VDSO_DISABLED) 319 if (vdso_enabled == VDSO_DISABLED)
321 return 0; 320 return 0;
322 321
@@ -343,17 +342,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
343 if (compat_uses_vma || !compat) { 342 if (compat_uses_vma || !compat) {
344 /* 343 /*
345 * MAYWRITE to allow gdb to COW and set breakpoints 344 * MAYWRITE to allow gdb to COW and set breakpoints
346 *
347 * Make sure the vDSO gets into every core dump.
348 * Dumping its contents makes post-mortem fully
349 * interpretable later without matching up the same
350 * kernel and hardware config to see what PC values
351 * meant.
352 */ 345 */
353 ret = install_special_mapping(mm, addr, PAGE_SIZE, 346 ret = install_special_mapping(mm, addr, PAGE_SIZE,
354 VM_READ|VM_EXEC| 347 VM_READ|VM_EXEC|
355 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 348 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
356 VM_ALWAYSDUMP,
357 vdso32_pages); 349 vdso32_pages);
358 350
359 if (ret) 351 if (ret)
diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S
new file mode 100644
index 000000000000..d6b9a7f42a8a
--- /dev/null
+++ b/arch/x86/vdso/vdsox32.S
@@ -0,0 +1,22 @@
1#include <asm/page_types.h>
2#include <linux/linkage.h>
3#include <linux/init.h>
4
5__PAGE_ALIGNED_DATA
6
7 .globl vdsox32_start, vdsox32_end
8 .align PAGE_SIZE
9vdsox32_start:
10 .incbin "arch/x86/vdso/vdsox32.so"
11vdsox32_end:
12 .align PAGE_SIZE /* extra data here leaks to userspace. */
13
14.previous
15
16 .globl vdsox32_pages
17 .bss
18 .align 8
19 .type vdsox32_pages, @object
20vdsox32_pages:
21 .zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8
22 .size vdsox32_pages, .-vdsox32_pages
diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S
new file mode 100644
index 000000000000..62272aa2ae0a
--- /dev/null
+++ b/arch/x86/vdso/vdsox32.lds.S
@@ -0,0 +1,28 @@
1/*
2 * Linker script for x32 vDSO.
3 * We #include the file to define the layout details.
4 * Here we only choose the prelinked virtual address.
5 *
6 * This file defines the version script giving the user-exported symbols in
7 * the DSO. We can define local symbols here called VDSO* to make their
8 * values visible using the asm-x86/vdso.h macros from the kernel proper.
9 */
10
11#define VDSO_PRELINK 0
12#include "vdso-layout.lds.S"
13
14/*
15 * This controls what userland symbols we export from the vDSO.
16 */
17VERSION {
18 LINUX_2.6 {
19 global:
20 __vdso_clock_gettime;
21 __vdso_gettimeofday;
22 __vdso_getcpu;
23 __vdso_time;
24 local: *;
25 };
26}
27
28VDSOX32_PRELINK = VDSO_PRELINK;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 153407c35b75..00aaf047b39f 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -24,7 +24,44 @@ extern unsigned short vdso_sync_cpuid;
24extern struct page *vdso_pages[]; 24extern struct page *vdso_pages[];
25static unsigned vdso_size; 25static unsigned vdso_size;
26 26
27static void __init patch_vdso(void *vdso, size_t len) 27#ifdef CONFIG_X86_X32_ABI
28extern char vdsox32_start[], vdsox32_end[];
29extern struct page *vdsox32_pages[];
30static unsigned vdsox32_size;
31
32static void __init patch_vdsox32(void *vdso, size_t len)
33{
34 Elf32_Ehdr *hdr = vdso;
35 Elf32_Shdr *sechdrs, *alt_sec = 0;
36 char *secstrings;
37 void *alt_data;
38 int i;
39
40 BUG_ON(len < sizeof(Elf32_Ehdr));
41 BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0);
42
43 sechdrs = (void *)hdr + hdr->e_shoff;
44 secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
45
46 for (i = 1; i < hdr->e_shnum; i++) {
47 Elf32_Shdr *shdr = &sechdrs[i];
48 if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) {
49 alt_sec = shdr;
50 goto found;
51 }
52 }
53
54 /* If we get here, it's probably a bug. */
55 pr_warning("patch_vdsox32: .altinstructions not found\n");
56 return; /* nothing to patch */
57
58found:
59 alt_data = (void *)hdr + alt_sec->sh_offset;
60 apply_alternatives(alt_data, alt_data + alt_sec->sh_size);
61}
62#endif
63
64static void __init patch_vdso64(void *vdso, size_t len)
28{ 65{
29 Elf64_Ehdr *hdr = vdso; 66 Elf64_Ehdr *hdr = vdso;
30 Elf64_Shdr *sechdrs, *alt_sec = 0; 67 Elf64_Shdr *sechdrs, *alt_sec = 0;
@@ -47,7 +84,7 @@ static void __init patch_vdso(void *vdso, size_t len)
47 } 84 }
48 85
49 /* If we get here, it's probably a bug. */ 86 /* If we get here, it's probably a bug. */
50 pr_warning("patch_vdso: .altinstructions not found\n"); 87 pr_warning("patch_vdso64: .altinstructions not found\n");
51 return; /* nothing to patch */ 88 return; /* nothing to patch */
52 89
53found: 90found:
@@ -60,12 +97,20 @@ static int __init init_vdso(void)
60 int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; 97 int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
61 int i; 98 int i;
62 99
63 patch_vdso(vdso_start, vdso_end - vdso_start); 100 patch_vdso64(vdso_start, vdso_end - vdso_start);
64 101
65 vdso_size = npages << PAGE_SHIFT; 102 vdso_size = npages << PAGE_SHIFT;
66 for (i = 0; i < npages; i++) 103 for (i = 0; i < npages; i++)
67 vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); 104 vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE);
68 105
106#ifdef CONFIG_X86_X32_ABI
107 patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start);
108 npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE;
109 vdsox32_size = npages << PAGE_SHIFT;
110 for (i = 0; i < npages; i++)
111 vdsox32_pages[i] = virt_to_page(vdsox32_start + i*PAGE_SIZE);
112#endif
113
69 return 0; 114 return 0;
70} 115}
71subsys_initcall(init_vdso); 116subsys_initcall(init_vdso);
@@ -103,7 +148,10 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
103 148
104/* Setup a VMA at program startup for the vsyscall page. 149/* Setup a VMA at program startup for the vsyscall page.
105 Not called for compat tasks */ 150 Not called for compat tasks */
106int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 151static int setup_additional_pages(struct linux_binprm *bprm,
152 int uses_interp,
153 struct page **pages,
154 unsigned size)
107{ 155{
108 struct mm_struct *mm = current->mm; 156 struct mm_struct *mm = current->mm;
109 unsigned long addr; 157 unsigned long addr;
@@ -113,8 +161,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
113 return 0; 161 return 0;
114 162
115 down_write(&mm->mmap_sem); 163 down_write(&mm->mmap_sem);
116 addr = vdso_addr(mm->start_stack, vdso_size); 164 addr = vdso_addr(mm->start_stack, size);
117 addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0); 165 addr = get_unmapped_area(NULL, addr, size, 0, 0);
118 if (IS_ERR_VALUE(addr)) { 166 if (IS_ERR_VALUE(addr)) {
119 ret = addr; 167 ret = addr;
120 goto up_fail; 168 goto up_fail;
@@ -122,11 +170,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
122 170
123 current->mm->context.vdso = (void *)addr; 171 current->mm->context.vdso = (void *)addr;
124 172
125 ret = install_special_mapping(mm, addr, vdso_size, 173 ret = install_special_mapping(mm, addr, size,
126 VM_READ|VM_EXEC| 174 VM_READ|VM_EXEC|
127 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 175 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
128 VM_ALWAYSDUMP, 176 pages);
129 vdso_pages);
130 if (ret) { 177 if (ret) {
131 current->mm->context.vdso = NULL; 178 current->mm->context.vdso = NULL;
132 goto up_fail; 179 goto up_fail;
@@ -137,6 +184,20 @@ up_fail:
137 return ret; 184 return ret;
138} 185}
139 186
187int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
188{
189 return setup_additional_pages(bprm, uses_interp, vdso_pages,
190 vdso_size);
191}
192
193#ifdef CONFIG_X86_X32_ABI
194int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
195{
196 return setup_additional_pages(bprm, uses_interp, vdsox32_pages,
197 vdsox32_size);
198}
199#endif
200
140static __init int vdso_setup(char *s) 201static __init int vdso_setup(char *s)
141{ 202{
142 vdso_enabled = simple_strtoul(s, NULL, 0); 203 vdso_enabled = simple_strtoul(s, NULL, 0);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index fe06bf4ef0e3..4f51bebac02c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -967,7 +967,7 @@ void xen_setup_shared_info(void)
967 xen_setup_mfn_list_list(); 967 xen_setup_mfn_list_list();
968} 968}
969 969
970/* This is called once we have the cpu_possible_map */ 970/* This is called once we have the cpu_possible_mask */
971void xen_setup_vcpu_info_placement(void) 971void xen_setup_vcpu_info_placement(void)
972{ 972{
973 int cpu; 973 int cpu;
@@ -1232,7 +1232,9 @@ asmlinkage void __init xen_start_kernel(void)
1232 1232
1233 /* Prevent unwanted bits from being set in PTEs. */ 1233 /* Prevent unwanted bits from being set in PTEs. */
1234 __supported_pte_mask &= ~_PAGE_GLOBAL; 1234 __supported_pte_mask &= ~_PAGE_GLOBAL;
1235#if 0
1235 if (!xen_initial_domain()) 1236 if (!xen_initial_domain())
1237#endif
1236 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); 1238 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1237 1239
1238 __supported_pte_mask |= _PAGE_IOMAP; 1240 __supported_pte_mask |= _PAGE_IOMAP;
@@ -1295,10 +1297,6 @@ asmlinkage void __init xen_start_kernel(void)
1295 1297
1296 pgd = (pgd_t *)xen_start_info->pt_base; 1298 pgd = (pgd_t *)xen_start_info->pt_base;
1297 1299
1298 if (!xen_initial_domain())
1299 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1300
1301 __supported_pte_mask |= _PAGE_IOMAP;
1302 /* Don't do the full vcpu_info placement stuff until we have a 1300 /* Don't do the full vcpu_info placement stuff until we have a
1303 possible map and a non-dummy shared_info. */ 1301 possible map and a non-dummy shared_info. */
1304 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1302 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 91dc2871e336..b8e279479a6b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -415,13 +415,13 @@ static pteval_t iomap_pte(pteval_t val)
415static pteval_t xen_pte_val(pte_t pte) 415static pteval_t xen_pte_val(pte_t pte)
416{ 416{
417 pteval_t pteval = pte.pte; 417 pteval_t pteval = pte.pte;
418 418#if 0
419 /* If this is a WC pte, convert back from Xen WC to Linux WC */ 419 /* If this is a WC pte, convert back from Xen WC to Linux WC */
420 if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) { 420 if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
421 WARN_ON(!pat_enabled); 421 WARN_ON(!pat_enabled);
422 pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; 422 pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
423 } 423 }
424 424#endif
425 if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) 425 if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
426 return pteval; 426 return pteval;
427 427
@@ -463,7 +463,7 @@ void xen_set_pat(u64 pat)
463static pte_t xen_make_pte(pteval_t pte) 463static pte_t xen_make_pte(pteval_t pte)
464{ 464{
465 phys_addr_t addr = (pte & PTE_PFN_MASK); 465 phys_addr_t addr = (pte & PTE_PFN_MASK);
466 466#if 0
467 /* If Linux is trying to set a WC pte, then map to the Xen WC. 467 /* If Linux is trying to set a WC pte, then map to the Xen WC.
468 * If _PAGE_PAT is set, then it probably means it is really 468 * If _PAGE_PAT is set, then it probably means it is really
469 * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope 469 * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
@@ -476,7 +476,7 @@ static pte_t xen_make_pte(pteval_t pte)
476 if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT) 476 if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
477 pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; 477 pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
478 } 478 }
479 479#endif
480 /* 480 /*
481 * Unprivileged domains are allowed to do IOMAPpings for 481 * Unprivileged domains are allowed to do IOMAPpings for
482 * PCI passthrough, but not map ISA space. The ISA 482 * PCI passthrough, but not map ISA space. The ISA
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index b480d4207a4c..967633ad98c4 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -12,8 +12,8 @@ int xen_swiotlb __read_mostly;
12 12
13static struct dma_map_ops xen_swiotlb_dma_ops = { 13static struct dma_map_ops xen_swiotlb_dma_ops = {
14 .mapping_error = xen_swiotlb_dma_mapping_error, 14 .mapping_error = xen_swiotlb_dma_mapping_error,
15 .alloc_coherent = xen_swiotlb_alloc_coherent, 15 .alloc = xen_swiotlb_alloc_coherent,
16 .free_coherent = xen_swiotlb_free_coherent, 16 .free = xen_swiotlb_free_coherent,
17 .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, 17 .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
18 .sync_single_for_device = xen_swiotlb_sync_single_for_device, 18 .sync_single_for_device = xen_swiotlb_sync_single_for_device,
19 .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, 19 .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index e845555ff486..5fac6919b957 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -415,6 +415,13 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
415 play_dead_common(); 415 play_dead_common();
416 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); 416 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
417 cpu_bringup(); 417 cpu_bringup();
418 /*
419 * Balance out the preempt calls - as we are running in cpu_idle
420 * loop which has been called at bootup from cpu_bringup_and_idle.
421 * The cpucpu_bringup_and_idle called cpu_bringup which made a
422 * preempt_disable() So this preempt_enable will balance it out.
423 */
424 preempt_enable();
418} 425}
419 426
420#else /* !CONFIG_HOTPLUG_CPU */ 427#else /* !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index cc9b1e182fcf..d69cc6c3f808 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -116,9 +116,26 @@ static inline void spin_time_accum_blocked(u64 start)
116} 116}
117#endif /* CONFIG_XEN_DEBUG_FS */ 117#endif /* CONFIG_XEN_DEBUG_FS */
118 118
119/*
120 * Size struct xen_spinlock so it's the same as arch_spinlock_t.
121 */
122#if NR_CPUS < 256
123typedef u8 xen_spinners_t;
124# define inc_spinners(xl) \
125 asm(LOCK_PREFIX " incb %0" : "+m" ((xl)->spinners) : : "memory");
126# define dec_spinners(xl) \
127 asm(LOCK_PREFIX " decb %0" : "+m" ((xl)->spinners) : : "memory");
128#else
129typedef u16 xen_spinners_t;
130# define inc_spinners(xl) \
131 asm(LOCK_PREFIX " incw %0" : "+m" ((xl)->spinners) : : "memory");
132# define dec_spinners(xl) \
133 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
134#endif
135
119struct xen_spinlock { 136struct xen_spinlock {
120 unsigned char lock; /* 0 -> free; 1 -> locked */ 137 unsigned char lock; /* 0 -> free; 1 -> locked */
121 unsigned short spinners; /* count of waiting cpus */ 138 xen_spinners_t spinners; /* count of waiting cpus */
122}; 139};
123 140
124static int xen_spin_is_locked(struct arch_spinlock *lock) 141static int xen_spin_is_locked(struct arch_spinlock *lock)
@@ -164,8 +181,7 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
164 181
165 wmb(); /* set lock of interest before count */ 182 wmb(); /* set lock of interest before count */
166 183
167 asm(LOCK_PREFIX " incw %0" 184 inc_spinners(xl);
168 : "+m" (xl->spinners) : : "memory");
169 185
170 return prev; 186 return prev;
171} 187}
@@ -176,8 +192,7 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
176 */ 192 */
177static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) 193static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
178{ 194{
179 asm(LOCK_PREFIX " decw %0" 195 dec_spinners(xl);
180 : "+m" (xl->spinners) : : "memory");
181 wmb(); /* decrement count before restoring lock */ 196 wmb(); /* decrement count before restoring lock */
182 __this_cpu_write(lock_spinners, prev); 197 __this_cpu_write(lock_spinners, prev);
183} 198}
@@ -373,6 +388,8 @@ void xen_uninit_lock_cpu(int cpu)
373 388
374void __init xen_init_spinlocks(void) 389void __init xen_init_spinlocks(void)
375{ 390{
391 BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
392
376 pv_lock_ops.spin_is_locked = xen_spin_is_locked; 393 pv_lock_ops.spin_is_locked = xen_spin_is_locked;
377 pv_lock_ops.spin_is_contended = xen_spin_is_contended; 394 pv_lock_ops.spin_is_contended = xen_spin_is_contended;
378 pv_lock_ops.spin_lock = xen_spin_lock; 395 pv_lock_ops.spin_lock = xen_spin_lock;